|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994756161510225, |
|
"eval_steps": 500, |
|
"global_step": 953, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01048767697954903, |
|
"grad_norm": 12.504458138350461, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.1660214066505432, |
|
"log_odds_ratio": -0.6960338354110718, |
|
"logits/chosen": -2.542905330657959, |
|
"logits/rejected": -2.5316882133483887, |
|
"logps/chosen": -0.9998037219047546, |
|
"logps/rejected": -1.0999689102172852, |
|
"loss": 2.7433, |
|
"nll_loss": 2.6550583839416504, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04999018833041191, |
|
"rewards/margins": 0.005008256994187832, |
|
"rewards/rejected": -0.05499844625592232, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02097535395909806, |
|
"grad_norm": 3.296398746092505, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.1942831575870514, |
|
"log_odds_ratio": -0.6660380959510803, |
|
"logits/chosen": -3.148456335067749, |
|
"logits/rejected": -3.171660900115967, |
|
"logps/chosen": -0.7626909613609314, |
|
"logps/rejected": -0.8731427192687988, |
|
"loss": 0.563, |
|
"nll_loss": 0.5225270986557007, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03813454881310463, |
|
"rewards/margins": 0.00552258500829339, |
|
"rewards/rejected": -0.04365713149309158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03146303093864709, |
|
"grad_norm": 2.4400188978085695, |
|
"learning_rate": 6e-06, |
|
"log_odds_chosen": 0.2339784801006317, |
|
"log_odds_ratio": -0.6537522673606873, |
|
"logits/chosen": -2.9630327224731445, |
|
"logits/rejected": -2.9368481636047363, |
|
"logps/chosen": -0.8345462679862976, |
|
"logps/rejected": -0.9655241966247559, |
|
"loss": 0.5355, |
|
"nll_loss": 0.4940575659275055, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04172731190919876, |
|
"rewards/margins": 0.0065488978289067745, |
|
"rewards/rejected": -0.04827621206641197, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04195070791819612, |
|
"grad_norm": 2.765802378357493, |
|
"learning_rate": 8.000000000000001e-06, |
|
"log_odds_chosen": 0.15870003402233124, |
|
"log_odds_ratio": -0.6969180107116699, |
|
"logits/chosen": -2.8065195083618164, |
|
"logits/rejected": -2.7910008430480957, |
|
"logps/chosen": -0.8027766346931458, |
|
"logps/rejected": -0.9165509343147278, |
|
"loss": 0.5199, |
|
"nll_loss": 0.48035889863967896, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04013883322477341, |
|
"rewards/margins": 0.005688714794814587, |
|
"rewards/rejected": -0.04582754150032997, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05243838489774515, |
|
"grad_norm": 2.7404814506796704, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": 0.24872338771820068, |
|
"log_odds_ratio": -0.680080771446228, |
|
"logits/chosen": -2.7704856395721436, |
|
"logits/rejected": -2.77298641204834, |
|
"logps/chosen": -0.7987793684005737, |
|
"logps/rejected": -0.9668463468551636, |
|
"loss": 0.5424, |
|
"nll_loss": 0.48421746492385864, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03993896767497063, |
|
"rewards/margins": 0.00840335339307785, |
|
"rewards/rejected": -0.048342324793338776, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06292606187729417, |
|
"grad_norm": 2.7601739927853473, |
|
"learning_rate": 1.2e-05, |
|
"log_odds_chosen": 0.21160352230072021, |
|
"log_odds_ratio": -0.6764382123947144, |
|
"logits/chosen": -3.0032615661621094, |
|
"logits/rejected": -2.9960169792175293, |
|
"logps/chosen": -0.7965995669364929, |
|
"logps/rejected": -0.917363166809082, |
|
"loss": 0.5463, |
|
"nll_loss": 0.516124427318573, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.039829984307289124, |
|
"rewards/margins": 0.006038171239197254, |
|
"rewards/rejected": -0.045868150889873505, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07341373885684321, |
|
"grad_norm": 3.2123267767300128, |
|
"learning_rate": 1.4e-05, |
|
"log_odds_chosen": 0.19886036217212677, |
|
"log_odds_ratio": -0.690485417842865, |
|
"logits/chosen": -2.978163719177246, |
|
"logits/rejected": -3.0078656673431396, |
|
"logps/chosen": -0.8206535577774048, |
|
"logps/rejected": -0.9310994148254395, |
|
"loss": 0.5403, |
|
"nll_loss": 0.530234694480896, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04103267565369606, |
|
"rewards/margins": 0.0055222949013113976, |
|
"rewards/rejected": -0.046554967761039734, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08390141583639224, |
|
"grad_norm": 3.267750524500123, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"log_odds_chosen": 0.1725669652223587, |
|
"log_odds_ratio": -0.689757764339447, |
|
"logits/chosen": -2.963442087173462, |
|
"logits/rejected": -2.953914165496826, |
|
"logps/chosen": -0.8903671503067017, |
|
"logps/rejected": -1.0184500217437744, |
|
"loss": 0.5632, |
|
"nll_loss": 0.48384732007980347, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0445183590054512, |
|
"rewards/margins": 0.006404136773198843, |
|
"rewards/rejected": -0.050922494381666183, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09438909281594127, |
|
"grad_norm": 6.338896835312273, |
|
"learning_rate": 1.8e-05, |
|
"log_odds_chosen": 0.2590278387069702, |
|
"log_odds_ratio": -0.6696828603744507, |
|
"logits/chosen": -2.7556283473968506, |
|
"logits/rejected": -2.759223461151123, |
|
"logps/chosen": -0.8806008100509644, |
|
"logps/rejected": -1.0427037477493286, |
|
"loss": 0.5599, |
|
"nll_loss": 0.49117976427078247, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.044030044227838516, |
|
"rewards/margins": 0.008105142042040825, |
|
"rewards/rejected": -0.05213518068194389, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1048767697954903, |
|
"grad_norm": 2.844482964932932, |
|
"learning_rate": 2e-05, |
|
"log_odds_chosen": 0.20001336932182312, |
|
"log_odds_ratio": -0.6672823429107666, |
|
"logits/chosen": -2.836613178253174, |
|
"logits/rejected": -2.826347827911377, |
|
"logps/chosen": -0.8816211819648743, |
|
"logps/rejected": -1.0050264596939087, |
|
"loss": 0.5675, |
|
"nll_loss": 0.5239149332046509, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.044081058353185654, |
|
"rewards/margins": 0.006170268170535564, |
|
"rewards/rejected": -0.05025132745504379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11536444677503933, |
|
"grad_norm": 2.717573270122186, |
|
"learning_rate": 1.9069251784911845e-05, |
|
"log_odds_chosen": 0.26770642399787903, |
|
"log_odds_ratio": -0.6399692296981812, |
|
"logits/chosen": -2.8041529655456543, |
|
"logits/rejected": -2.828374147415161, |
|
"logps/chosen": -0.8482567071914673, |
|
"logps/rejected": -1.021328330039978, |
|
"loss": 0.568, |
|
"nll_loss": 0.5094035863876343, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.042412832379341125, |
|
"rewards/margins": 0.008653589524328709, |
|
"rewards/rejected": -0.05106641724705696, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12585212375458835, |
|
"grad_norm": 2.3522582585650906, |
|
"learning_rate": 1.825741858350554e-05, |
|
"log_odds_chosen": 0.2770318388938904, |
|
"log_odds_ratio": -0.6538770198822021, |
|
"logits/chosen": -2.9046432971954346, |
|
"logits/rejected": -2.921250343322754, |
|
"logps/chosen": -0.8698671460151672, |
|
"logps/rejected": -1.0593181848526, |
|
"loss": 0.6048, |
|
"nll_loss": 0.5620476007461548, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0434933602809906, |
|
"rewards/margins": 0.009472550824284554, |
|
"rewards/rejected": -0.05296590179204941, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1363398007341374, |
|
"grad_norm": 2.3512564845307704, |
|
"learning_rate": 1.7541160386140587e-05, |
|
"log_odds_chosen": 0.213302880525589, |
|
"log_odds_ratio": -0.6861675977706909, |
|
"logits/chosen": -2.926781177520752, |
|
"logits/rejected": -2.930361747741699, |
|
"logps/chosen": -0.9192083477973938, |
|
"logps/rejected": -1.06519615650177, |
|
"loss": 0.5923, |
|
"nll_loss": 0.5574383735656738, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04596042260527611, |
|
"rewards/margins": 0.007299385964870453, |
|
"rewards/rejected": -0.05325980857014656, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14682747771368643, |
|
"grad_norm": 2.2489368047485705, |
|
"learning_rate": 1.6903085094570334e-05, |
|
"log_odds_chosen": 0.24789170920848846, |
|
"log_odds_ratio": -0.655090868473053, |
|
"logits/chosen": -2.9084389209747314, |
|
"logits/rejected": -2.9173099994659424, |
|
"logps/chosen": -0.9441210031509399, |
|
"logps/rejected": -1.1045926809310913, |
|
"loss": 0.5882, |
|
"nll_loss": 0.5544429421424866, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.047206051647663116, |
|
"rewards/margins": 0.008023588918149471, |
|
"rewards/rejected": -0.05522964149713516, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15731515469323545, |
|
"grad_norm": 2.6715309670512903, |
|
"learning_rate": 1.6329931618554523e-05, |
|
"log_odds_chosen": 0.14654028415679932, |
|
"log_odds_ratio": -0.7416929006576538, |
|
"logits/chosen": -2.8286139965057373, |
|
"logits/rejected": -2.842860698699951, |
|
"logps/chosen": -0.9699670672416687, |
|
"logps/rejected": -1.0669214725494385, |
|
"loss": 0.5441, |
|
"nll_loss": 0.5359360575675964, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.048498354852199554, |
|
"rewards/margins": 0.004847715608775616, |
|
"rewards/rejected": -0.053346067667007446, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16780283167278448, |
|
"grad_norm": 2.4917874181934616, |
|
"learning_rate": 1.5811388300841898e-05, |
|
"log_odds_chosen": 0.19475655257701874, |
|
"log_odds_ratio": -0.664051353931427, |
|
"logits/chosen": -2.8252522945404053, |
|
"logits/rejected": -2.839994192123413, |
|
"logps/chosen": -0.9179447889328003, |
|
"logps/rejected": -1.0352815389633179, |
|
"loss": 0.6078, |
|
"nll_loss": 0.5540346503257751, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.045897237956523895, |
|
"rewards/margins": 0.005866840481758118, |
|
"rewards/rejected": -0.05176408216357231, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1782905086523335, |
|
"grad_norm": 2.493896039254152, |
|
"learning_rate": 1.533929977694741e-05, |
|
"log_odds_chosen": 0.25445470213890076, |
|
"log_odds_ratio": -0.6574397087097168, |
|
"logits/chosen": -2.895998477935791, |
|
"logits/rejected": -2.9125123023986816, |
|
"logps/chosen": -0.8917832374572754, |
|
"logps/rejected": -1.0586717128753662, |
|
"loss": 0.5884, |
|
"nll_loss": 0.5544494986534119, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04458915814757347, |
|
"rewards/margins": 0.008344428613781929, |
|
"rewards/rejected": -0.05293358489871025, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18877818563188253, |
|
"grad_norm": 2.368451448201635, |
|
"learning_rate": 1.49071198499986e-05, |
|
"log_odds_chosen": 0.2552924156188965, |
|
"log_odds_ratio": -0.6543556451797485, |
|
"logits/chosen": -2.8886399269104004, |
|
"logits/rejected": -2.905686378479004, |
|
"logps/chosen": -0.9206914901733398, |
|
"logps/rejected": -1.091048240661621, |
|
"loss": 0.5686, |
|
"nll_loss": 0.551173985004425, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04603457450866699, |
|
"rewards/margins": 0.008517834357917309, |
|
"rewards/rejected": -0.054552413523197174, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19926586261143156, |
|
"grad_norm": 4.734046585912702, |
|
"learning_rate": 1.4509525002200235e-05, |
|
"log_odds_chosen": 0.21173310279846191, |
|
"log_odds_ratio": -0.6579927206039429, |
|
"logits/chosen": -2.9355111122131348, |
|
"logits/rejected": -2.952430009841919, |
|
"logps/chosen": -0.9388859868049622, |
|
"logps/rejected": -1.0733187198638916, |
|
"loss": 0.5936, |
|
"nll_loss": 0.6142745018005371, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04694430157542229, |
|
"rewards/margins": 0.006721635349094868, |
|
"rewards/rejected": -0.05366594344377518, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2097535395909806, |
|
"grad_norm": 2.2391424397427073, |
|
"learning_rate": 1.4142135623730951e-05, |
|
"log_odds_chosen": 0.28418153524398804, |
|
"log_odds_ratio": -0.6668760180473328, |
|
"logits/chosen": -2.873599052429199, |
|
"logits/rejected": -2.9066414833068848, |
|
"logps/chosen": -0.9204713702201843, |
|
"logps/rejected": -1.128112554550171, |
|
"loss": 0.5689, |
|
"nll_loss": 0.5723541975021362, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.046023570001125336, |
|
"rewards/margins": 0.010382059030234814, |
|
"rewards/rejected": -0.056405626237392426, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22024121657052964, |
|
"grad_norm": 2.1684330770876152, |
|
"learning_rate": 1.3801311186847084e-05, |
|
"log_odds_chosen": 0.11919783055782318, |
|
"log_odds_ratio": -0.7173447012901306, |
|
"logits/chosen": -2.884079933166504, |
|
"logits/rejected": -2.8981668949127197, |
|
"logps/chosen": -0.8726099729537964, |
|
"logps/rejected": -0.9488958120346069, |
|
"loss": 0.5693, |
|
"nll_loss": 0.5325449109077454, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04363049939274788, |
|
"rewards/margins": 0.0038142912089824677, |
|
"rewards/rejected": -0.04744479060173035, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23072889355007867, |
|
"grad_norm": 2.510753834710904, |
|
"learning_rate": 1.3483997249264842e-05, |
|
"log_odds_chosen": 0.18100012838840485, |
|
"log_odds_ratio": -0.7047401666641235, |
|
"logits/chosen": -2.8885810375213623, |
|
"logits/rejected": -2.8980116844177246, |
|
"logps/chosen": -0.8880792856216431, |
|
"logps/rejected": -1.0071966648101807, |
|
"loss": 0.5589, |
|
"nll_loss": 0.5211626291275024, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.044403962790966034, |
|
"rewards/margins": 0.005955878179520369, |
|
"rewards/rejected": -0.05035984516143799, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2412165705296277, |
|
"grad_norm": 2.0148191421861705, |
|
"learning_rate": 1.3187609467915744e-05, |
|
"log_odds_chosen": 0.2717307209968567, |
|
"log_odds_ratio": -0.6763201951980591, |
|
"logits/chosen": -2.829516887664795, |
|
"logits/rejected": -2.842909574508667, |
|
"logps/chosen": -0.9367680549621582, |
|
"logps/rejected": -1.1125657558441162, |
|
"loss": 0.5701, |
|
"nll_loss": 0.5263533592224121, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04683841019868851, |
|
"rewards/margins": 0.008789879269897938, |
|
"rewards/rejected": -0.05562828853726387, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2517042475091767, |
|
"grad_norm": 2.286828850039024, |
|
"learning_rate": 1.2909944487358057e-05, |
|
"log_odds_chosen": 0.2564060091972351, |
|
"log_odds_ratio": -0.651031494140625, |
|
"logits/chosen": -2.979280471801758, |
|
"logits/rejected": -3.0063037872314453, |
|
"logps/chosen": -0.9010913968086243, |
|
"logps/rejected": -1.065353512763977, |
|
"loss": 0.5799, |
|
"nll_loss": 0.5546143054962158, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04505457356572151, |
|
"rewards/margins": 0.008213100023567677, |
|
"rewards/rejected": -0.053267668932676315, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26219192448872575, |
|
"grad_norm": 3.959216899336302, |
|
"learning_rate": 1.2649110640673518e-05, |
|
"log_odds_chosen": 0.2661912143230438, |
|
"log_odds_ratio": -0.6746715307235718, |
|
"logits/chosen": -2.9726908206939697, |
|
"logits/rejected": -2.974113941192627, |
|
"logps/chosen": -0.8829942941665649, |
|
"logps/rejected": -1.0264866352081299, |
|
"loss": 0.5502, |
|
"nll_loss": 0.5201153755187988, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04414971172809601, |
|
"rewards/margins": 0.007174622267484665, |
|
"rewards/rejected": -0.05132433772087097, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2726796014682748, |
|
"grad_norm": 2.2699181039817, |
|
"learning_rate": 1.2403473458920845e-05, |
|
"log_odds_chosen": 0.2342940866947174, |
|
"log_odds_ratio": -0.6783974766731262, |
|
"logits/chosen": -2.9759726524353027, |
|
"logits/rejected": -2.9923360347747803, |
|
"logps/chosen": -0.9042210578918457, |
|
"logps/rejected": -1.0481539964675903, |
|
"loss": 0.5304, |
|
"nll_loss": 0.45657747983932495, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0452110581099987, |
|
"rewards/margins": 0.007196647580713034, |
|
"rewards/rejected": -0.052407700568437576, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2831672784478238, |
|
"grad_norm": 2.380998150273162, |
|
"learning_rate": 1.2171612389003691e-05, |
|
"log_odds_chosen": 0.17961958050727844, |
|
"log_odds_ratio": -0.6983593702316284, |
|
"logits/chosen": -2.938765525817871, |
|
"logits/rejected": -2.965757369995117, |
|
"logps/chosen": -0.9548166990280151, |
|
"logps/rejected": -1.0895111560821533, |
|
"loss": 0.5673, |
|
"nll_loss": 0.5430372357368469, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04774082824587822, |
|
"rewards/margins": 0.006734730210155249, |
|
"rewards/rejected": -0.0544755645096302, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29365495542737285, |
|
"grad_norm": 2.0870887262121323, |
|
"learning_rate": 1.1952286093343936e-05, |
|
"log_odds_chosen": 0.2291949987411499, |
|
"log_odds_ratio": -0.6750219464302063, |
|
"logits/chosen": -2.928527355194092, |
|
"logits/rejected": -2.9543163776397705, |
|
"logps/chosen": -0.9355181455612183, |
|
"logps/rejected": -1.0729036331176758, |
|
"loss": 0.5434, |
|
"nll_loss": 0.47713321447372437, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.04677591472864151, |
|
"rewards/margins": 0.006869266740977764, |
|
"rewards/rejected": -0.05364518240094185, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.30414263240692185, |
|
"grad_norm": 2.661552133228645, |
|
"learning_rate": 1.1744404390294071e-05, |
|
"log_odds_chosen": 0.36491650342941284, |
|
"log_odds_ratio": -0.620793879032135, |
|
"logits/chosen": -2.880122661590576, |
|
"logits/rejected": -2.8935391902923584, |
|
"logps/chosen": -0.836012065410614, |
|
"logps/rejected": -1.05286705493927, |
|
"loss": 0.5596, |
|
"nll_loss": 0.4885989725589752, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0418006032705307, |
|
"rewards/margins": 0.010842744261026382, |
|
"rewards/rejected": -0.05264334753155708, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3146303093864709, |
|
"grad_norm": 3.127285518362044, |
|
"learning_rate": 1.1547005383792517e-05, |
|
"log_odds_chosen": 0.255328893661499, |
|
"log_odds_ratio": -0.6939107179641724, |
|
"logits/chosen": -2.9603378772735596, |
|
"logits/rejected": -2.992128372192383, |
|
"logps/chosen": -0.8731514811515808, |
|
"logps/rejected": -1.0526010990142822, |
|
"loss": 0.5835, |
|
"nll_loss": 0.5112031102180481, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0436575748026371, |
|
"rewards/margins": 0.008972481824457645, |
|
"rewards/rejected": -0.052630048245191574, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3251179863660199, |
|
"grad_norm": 2.013637214040506, |
|
"learning_rate": 1.1359236684941297e-05, |
|
"log_odds_chosen": 0.21040907502174377, |
|
"log_odds_ratio": -0.688109278678894, |
|
"logits/chosen": -2.9860305786132812, |
|
"logits/rejected": -2.9820261001586914, |
|
"logps/chosen": -0.9089478254318237, |
|
"logps/rejected": -1.0382112264633179, |
|
"loss": 0.585, |
|
"nll_loss": 0.5399721264839172, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.045447397977113724, |
|
"rewards/margins": 0.006463165394961834, |
|
"rewards/rejected": -0.051910560578107834, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33560566334556896, |
|
"grad_norm": 2.1577553752792995, |
|
"learning_rate": 1.118033988749895e-05, |
|
"log_odds_chosen": 0.27985960245132446, |
|
"log_odds_ratio": -0.6601210832595825, |
|
"logits/chosen": -3.0387003421783447, |
|
"logits/rejected": -3.0464096069335938, |
|
"logps/chosen": -0.9086373448371887, |
|
"logps/rejected": -1.0836986303329468, |
|
"loss": 0.5243, |
|
"nll_loss": 0.4922841191291809, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.045431867241859436, |
|
"rewards/margins": 0.008753069676458836, |
|
"rewards/rejected": -0.0541849359869957, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34609334032511796, |
|
"grad_norm": 2.422690319169778, |
|
"learning_rate": 1.1009637651263608e-05, |
|
"log_odds_chosen": 0.28255337476730347, |
|
"log_odds_ratio": -0.6909259557723999, |
|
"logits/chosen": -2.950887441635132, |
|
"logits/rejected": -2.9948947429656982, |
|
"logps/chosen": -0.9054603576660156, |
|
"logps/rejected": -1.0888211727142334, |
|
"loss": 0.5544, |
|
"nll_loss": 0.5376341342926025, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04527302458882332, |
|
"rewards/margins": 0.009168041869997978, |
|
"rewards/rejected": -0.05444106459617615, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.356581017304667, |
|
"grad_norm": 2.2975046406882798, |
|
"learning_rate": 1.0846522890932809e-05, |
|
"log_odds_chosen": 0.2153971642255783, |
|
"log_odds_ratio": -0.6926898956298828, |
|
"logits/chosen": -2.9686572551727295, |
|
"logits/rejected": -3.0199432373046875, |
|
"logps/chosen": -0.8590608835220337, |
|
"logps/rejected": -1.00636887550354, |
|
"loss": 0.5708, |
|
"nll_loss": 0.5127817392349243, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.042953044176101685, |
|
"rewards/margins": 0.007365405559539795, |
|
"rewards/rejected": -0.05031844973564148, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36706869428421607, |
|
"grad_norm": 2.135727653321979, |
|
"learning_rate": 1.0690449676496977e-05, |
|
"log_odds_chosen": 0.2665565609931946, |
|
"log_odds_ratio": -0.6829238533973694, |
|
"logits/chosen": -3.044860363006592, |
|
"logits/rejected": -3.0616378784179688, |
|
"logps/chosen": -0.8791500329971313, |
|
"logps/rejected": -1.0402672290802002, |
|
"loss": 0.5495, |
|
"nll_loss": 0.5228344202041626, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04395749792456627, |
|
"rewards/margins": 0.00805586390197277, |
|
"rewards/rejected": -0.05201335996389389, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37755637126376507, |
|
"grad_norm": 3.150177435714442, |
|
"learning_rate": 1.0540925533894598e-05, |
|
"log_odds_chosen": 0.4033277928829193, |
|
"log_odds_ratio": -0.602225124835968, |
|
"logits/chosen": -2.9472672939300537, |
|
"logits/rejected": -2.975858211517334, |
|
"logps/chosen": -0.8669608235359192, |
|
"logps/rejected": -1.110353708267212, |
|
"loss": 0.5494, |
|
"nll_loss": 0.5087054371833801, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0433480478823185, |
|
"rewards/margins": 0.01216964516788721, |
|
"rewards/rejected": -0.05551769211888313, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3880440482433141, |
|
"grad_norm": 2.130197231019511, |
|
"learning_rate": 1.0397504898200728e-05, |
|
"log_odds_chosen": 0.3966829478740692, |
|
"log_odds_ratio": -0.6142522096633911, |
|
"logits/chosen": -3.0528526306152344, |
|
"logits/rejected": -3.0623490810394287, |
|
"logps/chosen": -0.8640265464782715, |
|
"logps/rejected": -1.1243717670440674, |
|
"loss": 0.5232, |
|
"nll_loss": 0.5101068615913391, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.043201327323913574, |
|
"rewards/margins": 0.013017257675528526, |
|
"rewards/rejected": -0.05621858313679695, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3985317252228631, |
|
"grad_norm": 2.415549044992692, |
|
"learning_rate": 1.0259783520851543e-05, |
|
"log_odds_chosen": 0.46208301186561584, |
|
"log_odds_ratio": -0.5873923301696777, |
|
"logits/chosen": -3.055903196334839, |
|
"logits/rejected": -3.089763879776001, |
|
"logps/chosen": -0.8685981035232544, |
|
"logps/rejected": -1.1247217655181885, |
|
"loss": 0.5376, |
|
"nll_loss": 0.5167646408081055, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0434299036860466, |
|
"rewards/margins": 0.01280617993324995, |
|
"rewards/rejected": -0.056236088275909424, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4090194022024122, |
|
"grad_norm": 2.4197618087673036, |
|
"learning_rate": 1.0127393670836667e-05, |
|
"log_odds_chosen": 0.08936772495508194, |
|
"log_odds_ratio": -0.7186132073402405, |
|
"logits/chosen": -2.998857021331787, |
|
"logits/rejected": -3.021352529525757, |
|
"logps/chosen": -0.9128287434577942, |
|
"logps/rejected": -0.9754525423049927, |
|
"loss": 0.5571, |
|
"nll_loss": 0.5319759845733643, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04564143717288971, |
|
"rewards/margins": 0.0031311833299696445, |
|
"rewards/rejected": -0.048772621899843216, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4195070791819612, |
|
"grad_norm": 2.0748995530757424, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": 0.23965713381767273, |
|
"log_odds_ratio": -0.6899853348731995, |
|
"logits/chosen": -2.883575201034546, |
|
"logits/rejected": -2.908125400543213, |
|
"logps/chosen": -0.9490350484848022, |
|
"logps/rejected": -1.1106139421463013, |
|
"loss": 0.5725, |
|
"nll_loss": 0.5262094736099243, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04745175316929817, |
|
"rewards/margins": 0.00807894580066204, |
|
"rewards/rejected": -0.05553068965673447, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4299947561615102, |
|
"grad_norm": 2.0498490112152026, |
|
"learning_rate": 9.877295966495898e-06, |
|
"log_odds_chosen": 0.14244404435157776, |
|
"log_odds_ratio": -0.7278560996055603, |
|
"logits/chosen": -2.988100051879883, |
|
"logits/rejected": -2.9914164543151855, |
|
"logps/chosen": -0.8709594011306763, |
|
"logps/rejected": -0.9773006439208984, |
|
"loss": 0.5455, |
|
"nll_loss": 0.4832683503627777, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04354798048734665, |
|
"rewards/margins": 0.0053170593455433846, |
|
"rewards/rejected": -0.04886503517627716, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4404824331410593, |
|
"grad_norm": 1.9311064341389872, |
|
"learning_rate": 9.759000729485331e-06, |
|
"log_odds_chosen": 0.30063071846961975, |
|
"log_odds_ratio": -0.643203854560852, |
|
"logits/chosen": -2.9488558769226074, |
|
"logits/rejected": -2.9841551780700684, |
|
"logps/chosen": -0.8707404136657715, |
|
"logps/rejected": -1.0532442331314087, |
|
"loss": 0.5355, |
|
"nll_loss": 0.474843829870224, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04353701323270798, |
|
"rewards/margins": 0.009125196374952793, |
|
"rewards/rejected": -0.05266221612691879, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4509701101206083, |
|
"grad_norm": 2.119895291758326, |
|
"learning_rate": 9.644856443408244e-06, |
|
"log_odds_chosen": 0.2837393879890442, |
|
"log_odds_ratio": -0.6551750898361206, |
|
"logits/chosen": -2.9840757846832275, |
|
"logits/rejected": -2.9921929836273193, |
|
"logps/chosen": -0.8468173146247864, |
|
"logps/rejected": -1.0135347843170166, |
|
"loss": 0.5557, |
|
"nll_loss": 0.5443450212478638, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.04234086349606514, |
|
"rewards/margins": 0.00833587534725666, |
|
"rewards/rejected": -0.05067674070596695, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46145778710015734, |
|
"grad_norm": 2.095435518308805, |
|
"learning_rate": 9.534625892455923e-06, |
|
"log_odds_chosen": 0.2355252504348755, |
|
"log_odds_ratio": -0.6598283648490906, |
|
"logits/chosen": -3.0252740383148193, |
|
"logits/rejected": -3.045849323272705, |
|
"logps/chosen": -0.8709392547607422, |
|
"logps/rejected": -1.0179613828659058, |
|
"loss": 0.5508, |
|
"nll_loss": 0.5189236998558044, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04354696720838547, |
|
"rewards/margins": 0.0073511130176484585, |
|
"rewards/rejected": -0.050898075103759766, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47194546407970633, |
|
"grad_norm": 1.9017756846669818, |
|
"learning_rate": 9.428090415820635e-06, |
|
"log_odds_chosen": 0.34075412154197693, |
|
"log_odds_ratio": -0.6583858728408813, |
|
"logits/chosen": -3.0218703746795654, |
|
"logits/rejected": -3.0481696128845215, |
|
"logps/chosen": -0.8293315768241882, |
|
"logps/rejected": -1.047191858291626, |
|
"loss": 0.5286, |
|
"nll_loss": 0.4964592456817627, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.04146658256649971, |
|
"rewards/margins": 0.010893006809055805, |
|
"rewards/rejected": -0.05235959216952324, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4824331410592554, |
|
"grad_norm": 2.079766146123277, |
|
"learning_rate": 9.325048082403139e-06, |
|
"log_odds_chosen": 0.16855968534946442, |
|
"log_odds_ratio": -0.711928129196167, |
|
"logits/chosen": -3.0086510181427, |
|
"logits/rejected": -3.0489156246185303, |
|
"logps/chosen": -0.9442957043647766, |
|
"logps/rejected": -1.072997808456421, |
|
"loss": 0.5326, |
|
"nll_loss": 0.5338221788406372, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04721478372812271, |
|
"rewards/margins": 0.00643510278314352, |
|
"rewards/rejected": -0.05364988371729851, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4929208180388044, |
|
"grad_norm": 2.4868491558153085, |
|
"learning_rate": 9.225312080288851e-06, |
|
"log_odds_chosen": 0.23586861789226532, |
|
"log_odds_ratio": -0.6902174949645996, |
|
"logits/chosen": -2.986264705657959, |
|
"logits/rejected": -3.0127644538879395, |
|
"logps/chosen": -0.8882457613945007, |
|
"logps/rejected": -1.034985899925232, |
|
"loss": 0.5413, |
|
"nll_loss": 0.5090312361717224, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.044412292540073395, |
|
"rewards/margins": 0.007337009999901056, |
|
"rewards/rejected": -0.051749296486377716, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5034084950183534, |
|
"grad_norm": 2.0043501739666882, |
|
"learning_rate": 9.12870929175277e-06, |
|
"log_odds_chosen": 0.17604230344295502, |
|
"log_odds_ratio": -0.707550048828125, |
|
"logits/chosen": -3.088604211807251, |
|
"logits/rejected": -3.12184476852417, |
|
"logps/chosen": -0.8456010818481445, |
|
"logps/rejected": -0.9586717486381531, |
|
"loss": 0.5178, |
|
"nll_loss": 0.5126105546951294, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.042280055582523346, |
|
"rewards/margins": 0.005653535481542349, |
|
"rewards/rejected": -0.047933585941791534, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5138961719979025, |
|
"grad_norm": 1.9415978406566505, |
|
"learning_rate": 9.035079029052514e-06, |
|
"log_odds_chosen": 0.22476902604103088, |
|
"log_odds_ratio": -0.6716736555099487, |
|
"logits/chosen": -3.003417491912842, |
|
"logits/rejected": -3.0048608779907227, |
|
"logps/chosen": -0.9196673631668091, |
|
"logps/rejected": -1.0358223915100098, |
|
"loss": 0.5397, |
|
"nll_loss": 0.5024985671043396, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.045983362942934036, |
|
"rewards/margins": 0.005807754583656788, |
|
"rewards/rejected": -0.051791124045848846, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5243838489774515, |
|
"grad_norm": 2.2353701695425423, |
|
"learning_rate": 8.94427190999916e-06, |
|
"log_odds_chosen": 0.20684054493904114, |
|
"log_odds_ratio": -0.698712944984436, |
|
"logits/chosen": -3.0111751556396484, |
|
"logits/rejected": -3.0036330223083496, |
|
"logps/chosen": -0.8826943635940552, |
|
"logps/rejected": -1.0074814558029175, |
|
"loss": 0.548, |
|
"nll_loss": 0.5235316157341003, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0441347137093544, |
|
"rewards/margins": 0.006239361595362425, |
|
"rewards/rejected": -0.050374072045087814, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5348715259570005, |
|
"grad_norm": 1.742537477144132, |
|
"learning_rate": 8.856148855400955e-06, |
|
"log_odds_chosen": 0.3066679835319519, |
|
"log_odds_ratio": -0.6453306674957275, |
|
"logits/chosen": -2.9636032581329346, |
|
"logits/rejected": -2.97407865524292, |
|
"logps/chosen": -0.8404191136360168, |
|
"logps/rejected": -1.0267155170440674, |
|
"loss": 0.5264, |
|
"nll_loss": 0.5354185104370117, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04202095791697502, |
|
"rewards/margins": 0.009314822033047676, |
|
"rewards/rejected": -0.05133577436208725, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5453592029365496, |
|
"grad_norm": 1.6799388590726438, |
|
"learning_rate": 8.770580193070294e-06, |
|
"log_odds_chosen": 0.24468369781970978, |
|
"log_odds_ratio": -0.6710330247879028, |
|
"logits/chosen": -2.959213972091675, |
|
"logits/rejected": -2.966728687286377, |
|
"logps/chosen": -0.9035038948059082, |
|
"logps/rejected": -1.0690029859542847, |
|
"loss": 0.5366, |
|
"nll_loss": 0.47406935691833496, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04517520219087601, |
|
"rewards/margins": 0.008274954743683338, |
|
"rewards/rejected": -0.053450148552656174, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5558468799160986, |
|
"grad_norm": 1.8707354612150964, |
|
"learning_rate": 8.687444855261389e-06, |
|
"log_odds_chosen": 0.4215427339076996, |
|
"log_odds_ratio": -0.6489927172660828, |
|
"logits/chosen": -3.0756938457489014, |
|
"logits/rejected": -3.0923542976379395, |
|
"logps/chosen": -0.8253329992294312, |
|
"logps/rejected": -1.1108949184417725, |
|
"loss": 0.5365, |
|
"nll_loss": 0.45042163133621216, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04126664996147156, |
|
"rewards/margins": 0.014278100803494453, |
|
"rewards/rejected": -0.05554475262761116, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5663345568956476, |
|
"grad_norm": 1.922705947748225, |
|
"learning_rate": 8.606629658238705e-06, |
|
"log_odds_chosen": 0.1879667341709137, |
|
"log_odds_ratio": -0.6903280019760132, |
|
"logits/chosen": -2.975130796432495, |
|
"logits/rejected": -3.0028696060180664, |
|
"logps/chosen": -0.8695458173751831, |
|
"logps/rejected": -0.9805169105529785, |
|
"loss": 0.5535, |
|
"nll_loss": 0.5275255441665649, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04347729682922363, |
|
"rewards/margins": 0.005548550747334957, |
|
"rewards/rejected": -0.049025844782590866, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5768222338751966, |
|
"grad_norm": 1.9089385183272836, |
|
"learning_rate": 8.528028654224417e-06, |
|
"log_odds_chosen": 0.42722567915916443, |
|
"log_odds_ratio": -0.6043616533279419, |
|
"logits/chosen": -2.9973807334899902, |
|
"logits/rejected": -3.0049965381622314, |
|
"logps/chosen": -0.8592002987861633, |
|
"logps/rejected": -1.1192405223846436, |
|
"loss": 0.537, |
|
"nll_loss": 0.5372708439826965, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.042960021644830704, |
|
"rewards/margins": 0.013002010062336922, |
|
"rewards/rejected": -0.05596202611923218, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5873099108547457, |
|
"grad_norm": 1.9519454661958895, |
|
"learning_rate": 8.451542547285167e-06, |
|
"log_odds_chosen": 0.23686861991882324, |
|
"log_odds_ratio": -0.679013192653656, |
|
"logits/chosen": -3.0309016704559326, |
|
"logits/rejected": -3.0620574951171875, |
|
"logps/chosen": -0.8845365643501282, |
|
"logps/rejected": -1.0314432382583618, |
|
"loss": 0.5215, |
|
"nll_loss": 0.5018130540847778, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04422682151198387, |
|
"rewards/margins": 0.0073453388176858425, |
|
"rewards/rejected": -0.05157216265797615, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5977975878342947, |
|
"grad_norm": 1.902474576616517, |
|
"learning_rate": 8.37707816583391e-06, |
|
"log_odds_chosen": 0.157462477684021, |
|
"log_odds_ratio": -0.7165660858154297, |
|
"logits/chosen": -2.971592903137207, |
|
"logits/rejected": -2.9932913780212402, |
|
"logps/chosen": -0.8898121118545532, |
|
"logps/rejected": -0.9948716163635254, |
|
"loss": 0.5041, |
|
"nll_loss": 0.5276492834091187, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.044490598142147064, |
|
"rewards/margins": 0.005252980627119541, |
|
"rewards/rejected": -0.04974358528852463, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6082852648138437, |
|
"grad_norm": 1.9526588876095308, |
|
"learning_rate": 8.304547985373997e-06, |
|
"log_odds_chosen": 0.27767136693000793, |
|
"log_odds_ratio": -0.6578360199928284, |
|
"logits/chosen": -3.0485613346099854, |
|
"logits/rejected": -3.061281204223633, |
|
"logps/chosen": -0.8733240962028503, |
|
"logps/rejected": -1.0594861507415771, |
|
"loss": 0.5456, |
|
"nll_loss": 0.48286086320877075, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.043666206300258636, |
|
"rewards/margins": 0.009308096952736378, |
|
"rewards/rejected": -0.05297430604696274, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6187729417933928, |
|
"grad_norm": 1.963515177379308, |
|
"learning_rate": 8.233869695926184e-06, |
|
"log_odds_chosen": 0.32016056776046753, |
|
"log_odds_ratio": -0.6649240255355835, |
|
"logits/chosen": -3.0834898948669434, |
|
"logits/rejected": -3.123967409133911, |
|
"logps/chosen": -0.8281318545341492, |
|
"logps/rejected": -1.021436095237732, |
|
"loss": 0.5124, |
|
"nll_loss": 0.5498961210250854, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04140659421682358, |
|
"rewards/margins": 0.009665210731327534, |
|
"rewards/rejected": -0.05107180029153824, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6292606187729418, |
|
"grad_norm": 2.1416673571833584, |
|
"learning_rate": 8.164965809277262e-06, |
|
"log_odds_chosen": 0.3141978085041046, |
|
"log_odds_ratio": -0.6486893892288208, |
|
"logits/chosen": -3.1147074699401855, |
|
"logits/rejected": -3.11454176902771, |
|
"logps/chosen": -0.8215556144714355, |
|
"logps/rejected": -1.009476661682129, |
|
"loss": 0.5144, |
|
"nll_loss": 0.4836875796318054, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04107777774333954, |
|
"rewards/margins": 0.009396053850650787, |
|
"rewards/rejected": -0.05047383904457092, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6397482957524908, |
|
"grad_norm": 2.03894912155955, |
|
"learning_rate": 8.097763301789162e-06, |
|
"log_odds_chosen": 0.1958848237991333, |
|
"log_odds_ratio": -0.6933802366256714, |
|
"logits/chosen": -3.016098737716675, |
|
"logits/rejected": -3.046642780303955, |
|
"logps/chosen": -0.8733209371566772, |
|
"logps/rejected": -0.9883171916007996, |
|
"loss": 0.526, |
|
"nll_loss": 0.4880569875240326, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0436660535633564, |
|
"rewards/margins": 0.005749809555709362, |
|
"rewards/rejected": -0.049415864050388336, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6502359727320398, |
|
"grad_norm": 2.068974001178546, |
|
"learning_rate": 8.03219328902499e-06, |
|
"log_odds_chosen": 0.17991718649864197, |
|
"log_odds_ratio": -0.7055822610855103, |
|
"logits/chosen": -3.045403003692627, |
|
"logits/rejected": -3.0644798278808594, |
|
"logps/chosen": -0.8806620836257935, |
|
"logps/rejected": -1.0145095586776733, |
|
"loss": 0.5295, |
|
"nll_loss": 0.5151625275611877, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04403311014175415, |
|
"rewards/margins": 0.006692370865494013, |
|
"rewards/rejected": -0.05072547867894173, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6607236497115889, |
|
"grad_norm": 1.9705491215328443, |
|
"learning_rate": 7.968190728895958e-06, |
|
"log_odds_chosen": 0.23948292434215546, |
|
"log_odds_ratio": -0.6947344541549683, |
|
"logits/chosen": -3.016519546508789, |
|
"logits/rejected": -3.042133331298828, |
|
"logps/chosen": -0.8557758331298828, |
|
"logps/rejected": -1.0029237270355225, |
|
"loss": 0.5331, |
|
"nll_loss": 0.5245988368988037, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0427887924015522, |
|
"rewards/margins": 0.007357400842010975, |
|
"rewards/rejected": -0.0501461923122406, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6712113266911379, |
|
"grad_norm": 2.664256522681278, |
|
"learning_rate": 7.905694150420949e-06, |
|
"log_odds_chosen": 0.3717094659805298, |
|
"log_odds_ratio": -0.6480633020401001, |
|
"logits/chosen": -3.0543761253356934, |
|
"logits/rejected": -3.0751733779907227, |
|
"logps/chosen": -0.8645519018173218, |
|
"logps/rejected": -1.102386713027954, |
|
"loss": 0.5149, |
|
"nll_loss": 0.46133953332901, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04322759807109833, |
|
"rewards/margins": 0.011891739442944527, |
|
"rewards/rejected": -0.0551193431019783, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6816990036706869, |
|
"grad_norm": 1.878621524799117, |
|
"learning_rate": 7.844645405527363e-06, |
|
"log_odds_chosen": 0.1861819326877594, |
|
"log_odds_ratio": -0.7022497057914734, |
|
"logits/chosen": -3.0863146781921387, |
|
"logits/rejected": -3.113098621368408, |
|
"logps/chosen": -0.8403372764587402, |
|
"logps/rejected": -0.9548438191413879, |
|
"loss": 0.5336, |
|
"nll_loss": 0.5122831463813782, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04201686754822731, |
|
"rewards/margins": 0.0057253288105130196, |
|
"rewards/rejected": -0.047742195427417755, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6921866806502359, |
|
"grad_norm": 1.8977100039056058, |
|
"learning_rate": 7.78498944161523e-06, |
|
"log_odds_chosen": 0.2854728400707245, |
|
"log_odds_ratio": -0.6552462577819824, |
|
"logits/chosen": -3.052263021469116, |
|
"logits/rejected": -3.0898962020874023, |
|
"logps/chosen": -0.8826674222946167, |
|
"logps/rejected": -1.0711818933486938, |
|
"loss": 0.5304, |
|
"nll_loss": 0.4874996542930603, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.044133372604846954, |
|
"rewards/margins": 0.009425725787878036, |
|
"rewards/rejected": -0.05355909466743469, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.702674357629785, |
|
"grad_norm": 1.8195731091765575, |
|
"learning_rate": 7.726674092862559e-06, |
|
"log_odds_chosen": 0.4364054203033447, |
|
"log_odds_ratio": -0.6321254968643188, |
|
"logits/chosen": -2.9931445121765137, |
|
"logits/rejected": -3.025317907333374, |
|
"logps/chosen": -0.8416171073913574, |
|
"logps/rejected": -1.1292223930358887, |
|
"loss": 0.5237, |
|
"nll_loss": 0.46936100721359253, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04208085685968399, |
|
"rewards/margins": 0.014380265958607197, |
|
"rewards/rejected": -0.05646112561225891, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.713162034609334, |
|
"grad_norm": 2.0599075037830192, |
|
"learning_rate": 7.669649888473705e-06, |
|
"log_odds_chosen": 0.31395241618156433, |
|
"log_odds_ratio": -0.650139570236206, |
|
"logits/chosen": -2.9855525493621826, |
|
"logits/rejected": -2.9897267818450928, |
|
"logps/chosen": -0.8750125169754028, |
|
"logps/rejected": -1.0669299364089966, |
|
"loss": 0.5075, |
|
"nll_loss": 0.4943002760410309, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.04375062882900238, |
|
"rewards/margins": 0.009595867246389389, |
|
"rewards/rejected": -0.05334649235010147, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.723649711588883, |
|
"grad_norm": 1.8347271674417223, |
|
"learning_rate": 7.61386987626881e-06, |
|
"log_odds_chosen": 0.18291696906089783, |
|
"log_odds_ratio": -0.7239105701446533, |
|
"logits/chosen": -2.97595477104187, |
|
"logits/rejected": -2.991725444793701, |
|
"logps/chosen": -0.8641953468322754, |
|
"logps/rejected": -0.9991108179092407, |
|
"loss": 0.5304, |
|
"nll_loss": 0.5499680638313293, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04320976510643959, |
|
"rewards/margins": 0.006745772901922464, |
|
"rewards/rejected": -0.04995553940534592, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7341373885684321, |
|
"grad_norm": 2.2852704943230915, |
|
"learning_rate": 7.559289460184545e-06, |
|
"log_odds_chosen": 0.3105728030204773, |
|
"log_odds_ratio": -0.6319602727890015, |
|
"logits/chosen": -2.985989809036255, |
|
"logits/rejected": -3.0209579467773438, |
|
"logps/chosen": -0.8320032358169556, |
|
"logps/rejected": -1.0303562879562378, |
|
"loss": 0.5296, |
|
"nll_loss": 0.5422422885894775, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04160016402602196, |
|
"rewards/margins": 0.009917653165757656, |
|
"rewards/rejected": -0.05151782184839249, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7446250655479811, |
|
"grad_norm": 1.9768197452256755, |
|
"learning_rate": 7.505866250408016e-06, |
|
"log_odds_chosen": 0.2948063015937805, |
|
"log_odds_ratio": -0.6451742649078369, |
|
"logits/chosen": -3.1170597076416016, |
|
"logits/rejected": -3.136089324951172, |
|
"logps/chosen": -0.8415013551712036, |
|
"logps/rejected": -1.0454984903335571, |
|
"loss": 0.5237, |
|
"nll_loss": 0.47949719429016113, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04207506403326988, |
|
"rewards/margins": 0.01019985694438219, |
|
"rewards/rejected": -0.0522749237716198, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7551127425275301, |
|
"grad_norm": 1.905599119477425, |
|
"learning_rate": 7.4535599249993e-06, |
|
"log_odds_chosen": 0.40306347608566284, |
|
"log_odds_ratio": -0.6352882385253906, |
|
"logits/chosen": -3.064483642578125, |
|
"logits/rejected": -3.087808847427368, |
|
"logps/chosen": -0.7972971200942993, |
|
"logps/rejected": -1.046507477760315, |
|
"loss": 0.5304, |
|
"nll_loss": 0.4636651873588562, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.039864856749773026, |
|
"rewards/margins": 0.012460513040423393, |
|
"rewards/rejected": -0.05232536792755127, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7656004195070791, |
|
"grad_norm": 2.19124615484763, |
|
"learning_rate": 7.402332101976053e-06, |
|
"log_odds_chosen": 0.12367966026067734, |
|
"log_odds_ratio": -0.7226089239120483, |
|
"logits/chosen": -3.0835583209991455, |
|
"logits/rejected": -3.0826332569122314, |
|
"logps/chosen": -0.8365408778190613, |
|
"logps/rejected": -0.9029885530471802, |
|
"loss": 0.5374, |
|
"nll_loss": 0.5031268000602722, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04182704538106918, |
|
"rewards/margins": 0.0033223754726350307, |
|
"rewards/rejected": -0.04514942690730095, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7760880964866282, |
|
"grad_norm": 2.0835998895674837, |
|
"learning_rate": 7.352146220938079e-06, |
|
"log_odds_chosen": 0.33691075444221497, |
|
"log_odds_ratio": -0.6264201402664185, |
|
"logits/chosen": -3.1278512477874756, |
|
"logits/rejected": -3.139995574951172, |
|
"logps/chosen": -0.8067742586135864, |
|
"logps/rejected": -1.0221493244171143, |
|
"loss": 0.5312, |
|
"nll_loss": 0.4790155291557312, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.040338706225156784, |
|
"rewards/margins": 0.010768752545118332, |
|
"rewards/rejected": -0.051107458770275116, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7865757734661772, |
|
"grad_norm": 1.9667031119071154, |
|
"learning_rate": 7.3029674334022146e-06, |
|
"log_odds_chosen": 0.23670358955860138, |
|
"log_odds_ratio": -0.6752098202705383, |
|
"logits/chosen": -3.1056113243103027, |
|
"logits/rejected": -3.1298460960388184, |
|
"logps/chosen": -0.8614869117736816, |
|
"logps/rejected": -0.9949930310249329, |
|
"loss": 0.5426, |
|
"nll_loss": 0.4975660443305969, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0430743470788002, |
|
"rewards/margins": 0.006675302051007748, |
|
"rewards/rejected": -0.049749650061130524, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7970634504457262, |
|
"grad_norm": 1.8638714551633075, |
|
"learning_rate": 7.254762501100117e-06, |
|
"log_odds_chosen": 0.2394195795059204, |
|
"log_odds_ratio": -0.6686865091323853, |
|
"logits/chosen": -3.092322826385498, |
|
"logits/rejected": -3.0998446941375732, |
|
"logps/chosen": -0.8189753293991089, |
|
"logps/rejected": -0.9735254049301147, |
|
"loss": 0.5115, |
|
"nll_loss": 0.4049908220767975, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0409487709403038, |
|
"rewards/margins": 0.007727508433163166, |
|
"rewards/rejected": -0.048676274716854095, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8075511274252754, |
|
"grad_norm": 2.098087236150393, |
|
"learning_rate": 7.207499701564472e-06, |
|
"log_odds_chosen": 0.21572642028331757, |
|
"log_odds_ratio": -0.7029857635498047, |
|
"logits/chosen": -3.0059127807617188, |
|
"logits/rejected": -3.0258781909942627, |
|
"logps/chosen": -0.8941653370857239, |
|
"logps/rejected": -1.0438942909240723, |
|
"loss": 0.5343, |
|
"nll_loss": 0.5011810064315796, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.044708263128995895, |
|
"rewards/margins": 0.007486448623239994, |
|
"rewards/rejected": -0.05219471454620361, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8180388044048243, |
|
"grad_norm": 1.908201970451478, |
|
"learning_rate": 7.1611487403943295e-06, |
|
"log_odds_chosen": 0.22588184475898743, |
|
"log_odds_ratio": -0.6703106164932251, |
|
"logits/chosen": -3.0057101249694824, |
|
"logits/rejected": -3.0319108963012695, |
|
"logps/chosen": -0.8802768588066101, |
|
"logps/rejected": -0.997613787651062, |
|
"loss": 0.5466, |
|
"nll_loss": 0.5490036606788635, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0440138503909111, |
|
"rewards/margins": 0.005866837687790394, |
|
"rewards/rejected": -0.04988069087266922, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8285264813843733, |
|
"grad_norm": 1.8452821315553456, |
|
"learning_rate": 7.115680669648201e-06, |
|
"log_odds_chosen": 0.32251420617103577, |
|
"log_odds_ratio": -0.6489396691322327, |
|
"logits/chosen": -2.991415500640869, |
|
"logits/rejected": -3.0075478553771973, |
|
"logps/chosen": -0.8143788576126099, |
|
"logps/rejected": -1.0171436071395874, |
|
"loss": 0.5052, |
|
"nll_loss": 0.4423222541809082, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04071894288063049, |
|
"rewards/margins": 0.010138243436813354, |
|
"rewards/rejected": -0.05085718631744385, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8390141583639223, |
|
"grad_norm": 2.099723593564682, |
|
"learning_rate": 7.0710678118654756e-06, |
|
"log_odds_chosen": 0.4498319625854492, |
|
"log_odds_ratio": -0.5986544489860535, |
|
"logits/chosen": -2.9999208450317383, |
|
"logits/rejected": -2.9963490962982178, |
|
"logps/chosen": -0.782555341720581, |
|
"logps/rejected": -1.068285584449768, |
|
"loss": 0.5173, |
|
"nll_loss": 0.4201901853084564, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03912776708602905, |
|
"rewards/margins": 0.014286505989730358, |
|
"rewards/rejected": -0.053414274007081985, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8495018353434715, |
|
"grad_norm": 1.9010573028789273, |
|
"learning_rate": 7.027283689263066e-06, |
|
"log_odds_chosen": 0.34422335028648376, |
|
"log_odds_ratio": -0.6322020292282104, |
|
"logits/chosen": -3.0011842250823975, |
|
"logits/rejected": -2.9966137409210205, |
|
"logps/chosen": -0.8086786270141602, |
|
"logps/rejected": -1.0155996084213257, |
|
"loss": 0.5132, |
|
"nll_loss": 0.4740920066833496, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.040433935821056366, |
|
"rewards/margins": 0.010346042923629284, |
|
"rewards/rejected": -0.05077998712658882, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8599895123230205, |
|
"grad_norm": 2.3144073315770353, |
|
"learning_rate": 6.984302957695783e-06, |
|
"log_odds_chosen": 0.29515784978866577, |
|
"log_odds_ratio": -0.6521409749984741, |
|
"logits/chosen": -2.943692445755005, |
|
"logits/rejected": -2.9414219856262207, |
|
"logps/chosen": -0.8414862751960754, |
|
"logps/rejected": -1.0143965482711792, |
|
"loss": 0.504, |
|
"nll_loss": 0.4271189570426941, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04207431524991989, |
|
"rewards/margins": 0.008645516820251942, |
|
"rewards/rejected": -0.05071982741355896, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8704771893025695, |
|
"grad_norm": 2.371001107698096, |
|
"learning_rate": 6.942101345006233e-06, |
|
"log_odds_chosen": 0.2455742061138153, |
|
"log_odds_ratio": -0.7013689279556274, |
|
"logits/chosen": -2.933568239212036, |
|
"logits/rejected": -2.977832794189453, |
|
"logps/chosen": -0.8553229570388794, |
|
"logps/rejected": -1.0332233905792236, |
|
"loss": 0.5251, |
|
"nll_loss": 0.46586036682128906, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04276614636182785, |
|
"rewards/margins": 0.008895025588572025, |
|
"rewards/rejected": -0.0516611710190773, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8809648662821186, |
|
"grad_norm": 1.977587507180873, |
|
"learning_rate": 6.900655593423542e-06, |
|
"log_odds_chosen": 0.19387319684028625, |
|
"log_odds_ratio": -0.6939007639884949, |
|
"logits/chosen": -2.9483094215393066, |
|
"logits/rejected": -2.966421365737915, |
|
"logps/chosen": -0.8696029782295227, |
|
"logps/rejected": -1.0034617185592651, |
|
"loss": 0.5136, |
|
"nll_loss": 0.48451894521713257, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.043480150401592255, |
|
"rewards/margins": 0.006692938506603241, |
|
"rewards/rejected": -0.050173092633485794, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8914525432616676, |
|
"grad_norm": 2.0931872980265527, |
|
"learning_rate": 6.859943405700353e-06, |
|
"log_odds_chosen": 0.27469760179519653, |
|
"log_odds_ratio": -0.6496983170509338, |
|
"logits/chosen": -2.882544994354248, |
|
"logits/rejected": -2.907102584838867, |
|
"logps/chosen": -0.8309645652770996, |
|
"logps/rejected": -0.9983605146408081, |
|
"loss": 0.5054, |
|
"nll_loss": 0.4892002046108246, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0415482297539711, |
|
"rewards/margins": 0.008369805291295052, |
|
"rewards/rejected": -0.049918033182621, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9019402202412166, |
|
"grad_norm": 1.9059523373512675, |
|
"learning_rate": 6.819943394704736e-06, |
|
"log_odds_chosen": 0.2372780740261078, |
|
"log_odds_ratio": -0.6811105012893677, |
|
"logits/chosen": -2.9579243659973145, |
|
"logits/rejected": -2.9706907272338867, |
|
"logps/chosen": -0.8282278180122375, |
|
"logps/rejected": -0.982342541217804, |
|
"loss": 0.5277, |
|
"nll_loss": 0.4725598692893982, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0414113886654377, |
|
"rewards/margins": 0.007705743424594402, |
|
"rewards/rejected": -0.049117133021354675, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9124278972207656, |
|
"grad_norm": 1.892543797666968, |
|
"learning_rate": 6.780635036208105e-06, |
|
"log_odds_chosen": 0.287548691034317, |
|
"log_odds_ratio": -0.6644268035888672, |
|
"logits/chosen": -3.0049710273742676, |
|
"logits/rejected": -3.0431902408599854, |
|
"logps/chosen": -0.8620280027389526, |
|
"logps/rejected": -1.0551369190216064, |
|
"loss": 0.4935, |
|
"nll_loss": 0.4828346371650696, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04310140386223793, |
|
"rewards/margins": 0.009655444882810116, |
|
"rewards/rejected": -0.05275684595108032, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9229155742003147, |
|
"grad_norm": 1.6128728864363475, |
|
"learning_rate": 6.741998624632421e-06, |
|
"log_odds_chosen": 0.2844703197479248, |
|
"log_odds_ratio": -0.6617631316184998, |
|
"logits/chosen": -3.044353723526001, |
|
"logits/rejected": -3.0480034351348877, |
|
"logps/chosen": -0.808245837688446, |
|
"logps/rejected": -0.990073561668396, |
|
"loss": 0.4881, |
|
"nll_loss": 0.43747878074645996, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0404122956097126, |
|
"rewards/margins": 0.009091392159461975, |
|
"rewards/rejected": -0.04950368404388428, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9334032511798637, |
|
"grad_norm": 2.329046484142618, |
|
"learning_rate": 6.70401523153991e-06, |
|
"log_odds_chosen": 0.32051050662994385, |
|
"log_odds_ratio": -0.6461818218231201, |
|
"logits/chosen": -3.0071539878845215, |
|
"logits/rejected": -3.0232186317443848, |
|
"logps/chosen": -0.8105939030647278, |
|
"logps/rejected": -0.993729293346405, |
|
"loss": 0.4935, |
|
"nll_loss": 0.46434158086776733, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04052969440817833, |
|
"rewards/margins": 0.00915677472949028, |
|
"rewards/rejected": -0.04968646913766861, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9438909281594127, |
|
"grad_norm": 2.0086740635642073, |
|
"learning_rate": 6.666666666666667e-06, |
|
"log_odds_chosen": 0.2798821032047272, |
|
"log_odds_ratio": -0.664302408695221, |
|
"logits/chosen": -2.9259209632873535, |
|
"logits/rejected": -2.9381814002990723, |
|
"logps/chosen": -0.7818757891654968, |
|
"logps/rejected": -0.9571603536605835, |
|
"loss": 0.5239, |
|
"nll_loss": 0.4661863446235657, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03909378498792648, |
|
"rewards/margins": 0.008764232508838177, |
|
"rewards/rejected": -0.04785802215337753, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9543786051389617, |
|
"grad_norm": 2.068822950454407, |
|
"learning_rate": 6.629935441317959e-06, |
|
"log_odds_chosen": 0.479647159576416, |
|
"log_odds_ratio": -0.6314842700958252, |
|
"logits/chosen": -2.974902629852295, |
|
"logits/rejected": -2.9787256717681885, |
|
"logps/chosen": -0.8285977244377136, |
|
"logps/rejected": -1.1534996032714844, |
|
"loss": 0.5142, |
|
"nll_loss": 0.46572408080101013, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04142988473176956, |
|
"rewards/margins": 0.016245096921920776, |
|
"rewards/rejected": -0.05767498165369034, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9648662821185108, |
|
"grad_norm": 1.9606527520032064, |
|
"learning_rate": 6.593804733957872e-06, |
|
"log_odds_chosen": 0.3219223618507385, |
|
"log_odds_ratio": -0.649006187915802, |
|
"logits/chosen": -2.895038604736328, |
|
"logits/rejected": -2.9138269424438477, |
|
"logps/chosen": -0.7895429134368896, |
|
"logps/rejected": -0.9961126446723938, |
|
"loss": 0.4837, |
|
"nll_loss": 0.43109196424484253, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0394771471619606, |
|
"rewards/margins": 0.010328484699130058, |
|
"rewards/rejected": -0.04980562627315521, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9753539590980598, |
|
"grad_norm": 2.2191050074705405, |
|
"learning_rate": 6.55825835783953e-06, |
|
"log_odds_chosen": 0.21952304244041443, |
|
"log_odds_ratio": -0.6805615425109863, |
|
"logits/chosen": -2.8973617553710938, |
|
"logits/rejected": -2.900251865386963, |
|
"logps/chosen": -0.8730388879776001, |
|
"logps/rejected": -1.0255097150802612, |
|
"loss": 0.5135, |
|
"nll_loss": 0.5237925052642822, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04365193843841553, |
|
"rewards/margins": 0.007623549550771713, |
|
"rewards/rejected": -0.05127548426389694, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9858416360776088, |
|
"grad_norm": 1.9816052115352747, |
|
"learning_rate": 6.523280730534423e-06, |
|
"log_odds_chosen": 0.2554723024368286, |
|
"log_odds_ratio": -0.6887288689613342, |
|
"logits/chosen": -2.93623685836792, |
|
"logits/rejected": -2.9283607006073, |
|
"logps/chosen": -0.7786284685134888, |
|
"logps/rejected": -0.9273189306259155, |
|
"loss": 0.5095, |
|
"nll_loss": 0.4773116111755371, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03893141821026802, |
|
"rewards/margins": 0.007434530649334192, |
|
"rewards/rejected": -0.046365950256586075, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9963293130571579, |
|
"grad_norm": 2.074452011854083, |
|
"learning_rate": 6.488856845230502e-06, |
|
"log_odds_chosen": 0.2605803310871124, |
|
"log_odds_ratio": -0.6914502382278442, |
|
"logits/chosen": -2.9090209007263184, |
|
"logits/rejected": -2.9163012504577637, |
|
"logps/chosen": -0.8585780262947083, |
|
"logps/rejected": -1.0175925493240356, |
|
"loss": 0.5383, |
|
"nll_loss": 0.503527045249939, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04292890429496765, |
|
"rewards/margins": 0.007950720377266407, |
|
"rewards/rejected": -0.050879620015621185, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9994756161510225, |
|
"step": 953, |
|
"total_flos": 0.0, |
|
"train_loss": 0.56347276506494, |
|
"train_runtime": 19079.6454, |
|
"train_samples_per_second": 3.197, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 953, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|