diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,7 +2,7 @@ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994756161510225, - "eval_steps": 500, + "eval_steps": 100, "global_step": 953, "is_hyper_param_search": false, "is_local_process_zero": true, @@ -10,1722 +10,1893 @@ "log_history": [ { "epoch": 0.01048767697954903, - "grad_norm": 12.504458138350461, + "grad_norm": 11.269791488706222, "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.1660214066505432, - "log_odds_ratio": -0.6960338354110718, - "logits/chosen": -2.542905330657959, - "logits/rejected": -2.5316882133483887, - "logps/chosen": -0.9998037219047546, - "logps/rejected": -1.0999689102172852, - "loss": 2.7433, - "nll_loss": 2.6550583839416504, + "log_odds_chosen": 0.1659858673810959, + "log_odds_ratio": -0.6960253715515137, + "logits/chosen": -2.5437328815460205, + "logits/rejected": -2.532463550567627, + "logps/chosen": -0.9995189905166626, + "logps/rejected": -1.0994223356246948, + "loss": 2.7426, + "nll_loss": 2.6549222469329834, "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04999018833041191, - "rewards/margins": 0.005008256994187832, - "rewards/rejected": -0.05499844625592232, + "rewards/chosen": -0.04997594282031059, + "rewards/margins": 0.004995172377675772, + "rewards/rejected": -0.0549711212515831, "step": 10 }, { "epoch": 0.02097535395909806, - "grad_norm": 3.296398746092505, + "grad_norm": 3.2083352232231426, "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.1942831575870514, - "log_odds_ratio": -0.6660380959510803, - "logits/chosen": -3.148456335067749, - "logits/rejected": -3.171660900115967, - "logps/chosen": -0.7626909613609314, - "logps/rejected": -0.8731427192687988, - "loss": 0.563, - "nll_loss": 0.5225270986557007, + "log_odds_chosen": 0.19043061137199402, + "log_odds_ratio": -0.6681476831436157, + "logits/chosen": -3.149108409881592, + "logits/rejected": -3.1720833778381348, + "logps/chosen": -0.7663742303848267, + "logps/rejected": -0.8751267194747925, + "loss": 0.5628, + "nll_loss": 0.5223474502563477, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03813454881310463, - "rewards/margins": 0.00552258500829339, - "rewards/rejected": -0.04365713149309158, + "rewards/chosen": -0.03831871226429939, + "rewards/margins": 0.005437628366053104, + "rewards/rejected": -0.04375633969902992, "step": 20 }, { "epoch": 0.03146303093864709, - "grad_norm": 2.4400188978085695, + "grad_norm": 2.5438959591852903, "learning_rate": 6e-06, - "log_odds_chosen": 0.2339784801006317, - "log_odds_ratio": -0.6537522673606873, - "logits/chosen": -2.9630327224731445, - "logits/rejected": -2.9368481636047363, - "logps/chosen": -0.8345462679862976, - "logps/rejected": -0.9655241966247559, - "loss": 0.5355, - "nll_loss": 0.4940575659275055, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04172731190919876, - "rewards/margins": 0.0065488978289067745, - "rewards/rejected": -0.04827621206641197, + "log_odds_chosen": 0.24195578694343567, + "log_odds_ratio": -0.6542765498161316, + "logits/chosen": -2.974864959716797, + "logits/rejected": -2.9495468139648438, + "logps/chosen": -0.8126222491264343, + "logps/rejected": -0.9452728033065796, + "loss": 0.5332, + "nll_loss": 0.49184679985046387, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.04063111171126366, + "rewards/margins": 0.006632520817220211, + "rewards/rejected": -0.04726364091038704, "step": 30 }, { "epoch": 0.04195070791819612, - "grad_norm": 2.765802378357493, + "grad_norm": 2.6387687995337887, "learning_rate": 8.000000000000001e-06, - "log_odds_chosen": 0.15870003402233124, - "log_odds_ratio": -0.6969180107116699, - "logits/chosen": -2.8065195083618164, - "logits/rejected": -2.7910008430480957, - "logps/chosen": -0.8027766346931458, - "logps/rejected": -0.9165509343147278, - "loss": 0.5199, - "nll_loss": 0.48035889863967896, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04013883322477341, - "rewards/margins": 0.005688714794814587, - "rewards/rejected": -0.04582754150032997, + "log_odds_chosen": 0.16362647712230682, + "log_odds_ratio": -0.6933655738830566, + "logits/chosen": -2.880462408065796, + "logits/rejected": -2.8687615394592285, + "logps/chosen": -0.804220974445343, + "logps/rejected": -0.9210459589958191, + "loss": 0.5196, + "nll_loss": 0.4802279472351074, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04021105170249939, + "rewards/margins": 0.005841248203068972, + "rewards/rejected": -0.046052299439907074, "step": 40 }, { "epoch": 0.05243838489774515, - "grad_norm": 2.7404814506796704, + "grad_norm": 2.753689850023678, "learning_rate": 1e-05, - "log_odds_chosen": 0.24872338771820068, - "log_odds_ratio": -0.680080771446228, - "logits/chosen": -2.7704856395721436, - "logits/rejected": -2.77298641204834, - "logps/chosen": -0.7987793684005737, - "logps/rejected": -0.9668463468551636, - "loss": 0.5424, - "nll_loss": 0.48421746492385864, + "log_odds_chosen": 0.285639226436615, + "log_odds_ratio": -0.6802313327789307, + "logits/chosen": -2.7953293323516846, + "logits/rejected": -2.801888942718506, + "logps/chosen": -0.786683201789856, + "logps/rejected": -0.9665401577949524, + "loss": 0.5419, + "nll_loss": 0.4841863214969635, "rewards/accuracies": 0.59375, - "rewards/chosen": -0.03993896767497063, - "rewards/margins": 0.00840335339307785, - "rewards/rejected": -0.048342324793338776, + "rewards/chosen": -0.03933415934443474, + "rewards/margins": 0.008992847986519337, + "rewards/rejected": -0.0483270101249218, "step": 50 }, { "epoch": 0.06292606187729417, - "grad_norm": 2.7601739927853473, + "grad_norm": 2.9944776685892003, "learning_rate": 1.2e-05, - "log_odds_chosen": 0.21160352230072021, - "log_odds_ratio": -0.6764382123947144, - "logits/chosen": -3.0032615661621094, - "logits/rejected": -2.9960169792175293, - "logps/chosen": -0.7965995669364929, - "logps/rejected": -0.917363166809082, - "loss": 0.5463, - "nll_loss": 0.516124427318573, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.039829984307289124, - "rewards/margins": 0.006038171239197254, - "rewards/rejected": -0.045868150889873505, + "log_odds_chosen": 0.18177883327007294, + "log_odds_ratio": -0.6903725862503052, + "logits/chosen": -2.9931223392486572, + "logits/rejected": -2.9918220043182373, + "logps/chosen": -0.8297529220581055, + "logps/rejected": -0.9411457180976868, + "loss": 0.552, + "nll_loss": 0.5221412777900696, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.041487645357847214, + "rewards/margins": 0.005569641478359699, + "rewards/rejected": -0.04705728590488434, "step": 60 }, { "epoch": 0.07341373885684321, - "grad_norm": 3.2123267767300128, + "grad_norm": 2.7695397689637704, "learning_rate": 1.4e-05, - "log_odds_chosen": 0.19886036217212677, - "log_odds_ratio": -0.690485417842865, - "logits/chosen": -2.978163719177246, - "logits/rejected": -3.0078656673431396, - "logps/chosen": -0.8206535577774048, - "logps/rejected": -0.9310994148254395, - "loss": 0.5403, - "nll_loss": 0.530234694480896, + "log_odds_chosen": 0.18929322063922882, + "log_odds_ratio": -0.6986348032951355, + "logits/chosen": -2.928518056869507, + "logits/rejected": -2.952428102493286, + "logps/chosen": -0.8219515085220337, + "logps/rejected": -0.9297820925712585, + "loss": 0.5396, + "nll_loss": 0.5304870009422302, "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04103267565369606, - "rewards/margins": 0.0055222949013113976, - "rewards/rejected": -0.046554967761039734, + "rewards/chosen": -0.041097573935985565, + "rewards/margins": 0.00539153628051281, + "rewards/rejected": -0.046489108353853226, "step": 70 }, { "epoch": 0.08390141583639224, - "grad_norm": 3.267750524500123, + "grad_norm": 19.07043575642583, "learning_rate": 1.6000000000000003e-05, - "log_odds_chosen": 0.1725669652223587, - "log_odds_ratio": -0.689757764339447, - "logits/chosen": -2.963442087173462, - "logits/rejected": -2.953914165496826, - "logps/chosen": -0.8903671503067017, - "logps/rejected": -1.0184500217437744, - "loss": 0.5632, - "nll_loss": 0.48384732007980347, - "rewards/accuracies": 0.518750011920929, - "rewards/chosen": -0.0445183590054512, - "rewards/margins": 0.006404136773198843, - "rewards/rejected": -0.050922494381666183, + "log_odds_chosen": 0.18035998940467834, + "log_odds_ratio": -0.6837159395217896, + "logits/chosen": -2.7761759757995605, + "logits/rejected": -2.7504143714904785, + "logps/chosen": -0.8980675935745239, + "logps/rejected": -1.0327494144439697, + "loss": 0.5637, + "nll_loss": 0.48639434576034546, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04490337893366814, + "rewards/margins": 0.006734092719852924, + "rewards/rejected": -0.05163746327161789, "step": 80 }, { "epoch": 0.09438909281594127, - "grad_norm": 6.338896835312273, + "grad_norm": 3.590055499786838, "learning_rate": 1.8e-05, - "log_odds_chosen": 0.2590278387069702, - "log_odds_ratio": -0.6696828603744507, - "logits/chosen": -2.7556283473968506, - "logits/rejected": -2.759223461151123, - "logps/chosen": -0.8806008100509644, - "logps/rejected": -1.0427037477493286, - "loss": 0.5599, - "nll_loss": 0.49117976427078247, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.044030044227838516, - "rewards/margins": 0.008105142042040825, - "rewards/rejected": -0.05213518068194389, + "log_odds_chosen": 0.2686706781387329, + "log_odds_ratio": -0.6697625517845154, + "logits/chosen": -2.6665635108947754, + "logits/rejected": -2.664783239364624, + "logps/chosen": -0.8778934478759766, + "logps/rejected": -1.0414215326309204, + "loss": 0.5547, + "nll_loss": 0.49069148302078247, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04389467462897301, + "rewards/margins": 0.008176402188837528, + "rewards/rejected": -0.05207107588648796, "step": 90 }, { "epoch": 0.1048767697954903, - "grad_norm": 2.844482964932932, + "grad_norm": 3.4892365652397572, "learning_rate": 2e-05, - "log_odds_chosen": 0.20001336932182312, - "log_odds_ratio": -0.6672823429107666, - "logits/chosen": -2.836613178253174, - "logits/rejected": -2.826347827911377, - "logps/chosen": -0.8816211819648743, - "logps/rejected": -1.0050264596939087, - "loss": 0.5675, - "nll_loss": 0.5239149332046509, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.044081058353185654, - "rewards/margins": 0.006170268170535564, - "rewards/rejected": -0.05025132745504379, + "log_odds_chosen": 0.20862731337547302, + "log_odds_ratio": -0.6619225144386292, + "logits/chosen": -2.6862692832946777, + "logits/rejected": -2.673692226409912, + "logps/chosen": -0.9019685983657837, + "logps/rejected": -1.0285098552703857, + "loss": 0.5707, + "nll_loss": 0.5284041166305542, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.045098431408405304, + "rewards/margins": 0.006327061913907528, + "rewards/rejected": -0.051425494253635406, + "step": 100 + }, + { + "epoch": 0.1048767697954903, + "eval_log_odds_chosen": 0.2601078152656555, + "eval_log_odds_ratio": -0.6412674188613892, + "eval_logits/chosen": -2.5810625553131104, + "eval_logits/rejected": -2.5432214736938477, + "eval_logps/chosen": -0.9045050144195557, + "eval_logps/rejected": -1.077429175376892, + "eval_loss": 1.1267567873001099, + "eval_nll_loss": 1.0893229246139526, + "eval_rewards/accuracies": 0.636904776096344, + "eval_rewards/chosen": -0.045225247740745544, + "eval_rewards/margins": 0.008646207861602306, + "eval_rewards/rejected": -0.053871456533670425, + "eval_runtime": 137.3095, + "eval_samples_per_second": 14.522, + "eval_steps_per_second": 0.459, "step": 100 }, { "epoch": 0.11536444677503933, - "grad_norm": 2.717573270122186, + "grad_norm": 3.2610517529807947, "learning_rate": 1.9069251784911845e-05, - "log_odds_chosen": 0.26770642399787903, - "log_odds_ratio": -0.6399692296981812, - "logits/chosen": -2.8041529655456543, - "logits/rejected": -2.828374147415161, - "logps/chosen": -0.8482567071914673, - "logps/rejected": -1.021328330039978, - "loss": 0.568, - "nll_loss": 0.5094035863876343, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.042412832379341125, - "rewards/margins": 0.008653589524328709, - "rewards/rejected": -0.05106641724705696, + "log_odds_chosen": 0.2603258490562439, + "log_odds_ratio": -0.6417919397354126, + "logits/chosen": -2.632387399673462, + "logits/rejected": -2.6478092670440674, + "logps/chosen": -0.8465877771377563, + "logps/rejected": -1.0118043422698975, + "loss": 0.6247, + "nll_loss": 0.5625969171524048, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.04232938587665558, + "rewards/margins": 0.008260839618742466, + "rewards/rejected": -0.05059022456407547, "step": 110 }, { "epoch": 0.12585212375458835, - "grad_norm": 2.3522582585650906, + "grad_norm": 3.1929412426319397, "learning_rate": 1.825741858350554e-05, - "log_odds_chosen": 0.2770318388938904, - "log_odds_ratio": -0.6538770198822021, - "logits/chosen": -2.9046432971954346, - "logits/rejected": -2.921250343322754, - "logps/chosen": -0.8698671460151672, - "logps/rejected": -1.0593181848526, - "loss": 0.6048, - "nll_loss": 0.5620476007461548, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.0434933602809906, - "rewards/margins": 0.009472550824284554, - "rewards/rejected": -0.05296590179204941, + "log_odds_chosen": 0.242882639169693, + "log_odds_ratio": -0.6634533405303955, + "logits/chosen": -2.5689873695373535, + "logits/rejected": -2.536681652069092, + "logps/chosen": -0.8897055387496948, + "logps/rejected": -1.0510555505752563, + "loss": 0.6122, + "nll_loss": 0.5722111463546753, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04448527842760086, + "rewards/margins": 0.008067498914897442, + "rewards/rejected": -0.05255277082324028, "step": 120 }, { "epoch": 0.1363398007341374, - "grad_norm": 2.3512564845307704, + "grad_norm": 2.381017549769141, "learning_rate": 1.7541160386140587e-05, - "log_odds_chosen": 0.213302880525589, - "log_odds_ratio": -0.6861675977706909, - "logits/chosen": -2.926781177520752, - "logits/rejected": -2.930361747741699, - "logps/chosen": -0.9192083477973938, - "logps/rejected": -1.06519615650177, - "loss": 0.5923, - "nll_loss": 0.5574383735656738, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04596042260527611, - "rewards/margins": 0.007299385964870453, - "rewards/rejected": -0.05325980857014656, + "log_odds_chosen": 0.20046833157539368, + "log_odds_ratio": -0.6848769783973694, + "logits/chosen": -2.5286340713500977, + "logits/rejected": -2.503958225250244, + "logps/chosen": -0.914216160774231, + "logps/rejected": -1.0454927682876587, + "loss": 0.5902, + "nll_loss": 0.5541085004806519, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04571080952882767, + "rewards/margins": 0.006563832517713308, + "rewards/rejected": -0.052274636924266815, "step": 130 }, { "epoch": 0.14682747771368643, - "grad_norm": 2.2489368047485705, + "grad_norm": 2.2223756230190213, "learning_rate": 1.6903085094570334e-05, - "log_odds_chosen": 0.24789170920848846, - "log_odds_ratio": -0.655090868473053, - "logits/chosen": -2.9084389209747314, - "logits/rejected": -2.9173099994659424, - "logps/chosen": -0.9441210031509399, - "logps/rejected": -1.1045926809310913, - "loss": 0.5882, - "nll_loss": 0.5544429421424866, + "log_odds_chosen": 0.231459379196167, + "log_odds_ratio": -0.659934937953949, + "logits/chosen": -2.5273799896240234, + "logits/rejected": -2.5001978874206543, + "logps/chosen": -0.971345067024231, + "logps/rejected": -1.1217668056488037, + "loss": 0.5945, + "nll_loss": 0.564177393913269, "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.047206051647663116, - "rewards/margins": 0.008023588918149471, - "rewards/rejected": -0.05522964149713516, + "rewards/chosen": -0.04856724292039871, + "rewards/margins": 0.007521096616983414, + "rewards/rejected": -0.05608834698796272, "step": 140 }, { "epoch": 0.15731515469323545, - "grad_norm": 2.6715309670512903, + "grad_norm": 3.55513500930042, "learning_rate": 1.6329931618554523e-05, - "log_odds_chosen": 0.14654028415679932, - "log_odds_ratio": -0.7416929006576538, - "logits/chosen": -2.8286139965057373, - "logits/rejected": -2.842860698699951, - "logps/chosen": -0.9699670672416687, - "logps/rejected": -1.0669214725494385, - "loss": 0.5441, - "nll_loss": 0.5359360575675964, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.048498354852199554, - "rewards/margins": 0.004847715608775616, - "rewards/rejected": -0.053346067667007446, + "log_odds_chosen": 0.18197762966156006, + "log_odds_ratio": -0.735857367515564, + "logits/chosen": -2.5072078704833984, + "logits/rejected": -2.4954299926757812, + "logps/chosen": -0.9893903732299805, + "logps/rejected": -1.1020596027374268, + "loss": 0.553, + "nll_loss": 0.5451637506484985, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.04946952313184738, + "rewards/margins": 0.00563345942646265, + "rewards/rejected": -0.055102985352277756, "step": 150 }, { "epoch": 0.16780283167278448, - "grad_norm": 2.4917874181934616, + "grad_norm": 2.753579339789172, "learning_rate": 1.5811388300841898e-05, - "log_odds_chosen": 0.19475655257701874, - "log_odds_ratio": -0.664051353931427, - "logits/chosen": -2.8252522945404053, - "logits/rejected": -2.839994192123413, - "logps/chosen": -0.9179447889328003, - "logps/rejected": -1.0352815389633179, - "loss": 0.6078, - "nll_loss": 0.5540346503257751, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.045897237956523895, - "rewards/margins": 0.005866840481758118, - "rewards/rejected": -0.05176408216357231, + "log_odds_chosen": 0.2206648290157318, + "log_odds_ratio": -0.6601604223251343, + "logits/chosen": -2.54675030708313, + "logits/rejected": -2.53303861618042, + "logps/chosen": -0.9035905599594116, + "logps/rejected": -1.0334583520889282, + "loss": 0.6058, + "nll_loss": 0.5536268949508667, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04517952725291252, + "rewards/margins": 0.006493390537798405, + "rewards/rejected": -0.05167291685938835, "step": 160 }, { "epoch": 0.1782905086523335, - "grad_norm": 2.493896039254152, + "grad_norm": 2.4463207326823673, "learning_rate": 1.533929977694741e-05, - "log_odds_chosen": 0.25445470213890076, - "log_odds_ratio": -0.6574397087097168, - "logits/chosen": -2.895998477935791, - "logits/rejected": -2.9125123023986816, - "logps/chosen": -0.8917832374572754, - "logps/rejected": -1.0586717128753662, - "loss": 0.5884, - "nll_loss": 0.5544494986534119, + "log_odds_chosen": 0.3002270460128784, + "log_odds_ratio": -0.6512068510055542, + "logits/chosen": -2.55534029006958, + "logits/rejected": -2.53877592086792, + "logps/chosen": -0.8796469569206238, + "logps/rejected": -1.063819169998169, + "loss": 0.5849, + "nll_loss": 0.5501061677932739, "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04458915814757347, - "rewards/margins": 0.008344428613781929, - "rewards/rejected": -0.05293358489871025, + "rewards/chosen": -0.04398234561085701, + "rewards/margins": 0.009208607487380505, + "rewards/rejected": -0.05319095402956009, "step": 170 }, { "epoch": 0.18877818563188253, - "grad_norm": 2.368451448201635, + "grad_norm": 2.404564536005987, "learning_rate": 1.49071198499986e-05, - "log_odds_chosen": 0.2552924156188965, - "log_odds_ratio": -0.6543556451797485, - "logits/chosen": -2.8886399269104004, - "logits/rejected": -2.905686378479004, - "logps/chosen": -0.9206914901733398, - "logps/rejected": -1.091048240661621, - "loss": 0.5686, - "nll_loss": 0.551173985004425, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04603457450866699, - "rewards/margins": 0.008517834357917309, - "rewards/rejected": -0.054552413523197174, + "log_odds_chosen": 0.2884437143802643, + "log_odds_ratio": -0.6566611528396606, + "logits/chosen": -2.5615644454956055, + "logits/rejected": -2.5457139015197754, + "logps/chosen": -0.9158379435539246, + "logps/rejected": -1.0882136821746826, + "loss": 0.5658, + "nll_loss": 0.5478283166885376, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04579189792275429, + "rewards/margins": 0.008618785068392754, + "rewards/rejected": -0.05441068485379219, "step": 180 }, { "epoch": 0.19926586261143156, - "grad_norm": 4.734046585912702, + "grad_norm": 3.2974100665964885, "learning_rate": 1.4509525002200235e-05, - "log_odds_chosen": 0.21173310279846191, - "log_odds_ratio": -0.6579927206039429, - "logits/chosen": -2.9355111122131348, - "logits/rejected": -2.952430009841919, - "logps/chosen": -0.9388859868049622, - "logps/rejected": -1.0733187198638916, - "loss": 0.5936, - "nll_loss": 0.6142745018005371, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04694430157542229, - "rewards/margins": 0.006721635349094868, - "rewards/rejected": -0.05366594344377518, + "log_odds_chosen": 0.23702804744243622, + "log_odds_ratio": -0.6489595770835876, + "logits/chosen": -2.644819498062134, + "logits/rejected": -2.6255900859832764, + "logps/chosen": -0.9308468103408813, + "logps/rejected": -1.0799505710601807, + "loss": 0.5902, + "nll_loss": 0.6114972829818726, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04654233902692795, + "rewards/margins": 0.007455187849700451, + "rewards/rejected": -0.053997524082660675, "step": 190 }, { "epoch": 0.2097535395909806, - "grad_norm": 2.2391424397427073, + "grad_norm": 2.498750011275506, "learning_rate": 1.4142135623730951e-05, - "log_odds_chosen": 0.28418153524398804, - "log_odds_ratio": -0.6668760180473328, - "logits/chosen": -2.873599052429199, - "logits/rejected": -2.9066414833068848, - "logps/chosen": -0.9204713702201843, - "logps/rejected": -1.128112554550171, - "loss": 0.5689, - "nll_loss": 0.5723541975021362, + "log_odds_chosen": 0.29194706678390503, + "log_odds_ratio": -0.6627270579338074, + "logits/chosen": -2.5841925144195557, + "logits/rejected": -2.5723748207092285, + "logps/chosen": -0.917371928691864, + "logps/rejected": -1.126123070716858, + "loss": 0.5663, + "nll_loss": 0.5702028274536133, "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.046023570001125336, - "rewards/margins": 0.010382059030234814, - "rewards/rejected": -0.056405626237392426, + "rewards/chosen": -0.04586859419941902, + "rewards/margins": 0.0104375584051013, + "rewards/rejected": -0.0563061460852623, + "step": 200 + }, + { + "epoch": 0.2097535395909806, + "eval_log_odds_chosen": 0.28631675243377686, + "eval_log_odds_ratio": -0.644675076007843, + "eval_logits/chosen": -2.5596959590911865, + "eval_logits/rejected": -2.537684917449951, + "eval_logps/chosen": -0.8798824548721313, + "eval_logps/rejected": -1.0675764083862305, + "eval_loss": 0.5741076469421387, + "eval_nll_loss": 0.5351698398590088, + "eval_rewards/accuracies": 0.6269841194152832, + "eval_rewards/chosen": -0.043994128704071045, + "eval_rewards/margins": 0.009384696371853352, + "eval_rewards/rejected": -0.05337882414460182, + "eval_runtime": 137.7655, + "eval_samples_per_second": 14.474, + "eval_steps_per_second": 0.457, "step": 200 }, { "epoch": 0.22024121657052964, - "grad_norm": 2.1684330770876152, + "grad_norm": 2.310322648029005, "learning_rate": 1.3801311186847084e-05, - "log_odds_chosen": 0.11919783055782318, - "log_odds_ratio": -0.7173447012901306, - "logits/chosen": -2.884079933166504, - "logits/rejected": -2.8981668949127197, - "logps/chosen": -0.8726099729537964, - "logps/rejected": -0.9488958120346069, - "loss": 0.5693, - "nll_loss": 0.5325449109077454, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04363049939274788, - "rewards/margins": 0.0038142912089824677, - "rewards/rejected": -0.04744479060173035, + "log_odds_chosen": 0.1077527180314064, + "log_odds_ratio": -0.7207110524177551, + "logits/chosen": -2.5468177795410156, + "logits/rejected": -2.544996976852417, + "logps/chosen": -0.8708482980728149, + "logps/rejected": -0.9297773241996765, + "loss": 0.5676, + "nll_loss": 0.5340272188186646, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04354241490364075, + "rewards/margins": 0.002946457825601101, + "rewards/rejected": -0.04648887366056442, "step": 210 }, { "epoch": 0.23072889355007867, - "grad_norm": 2.510753834710904, + "grad_norm": 2.578087834768522, "learning_rate": 1.3483997249264842e-05, - "log_odds_chosen": 0.18100012838840485, - "log_odds_ratio": -0.7047401666641235, - "logits/chosen": -2.8885810375213623, - "logits/rejected": -2.8980116844177246, - "logps/chosen": -0.8880792856216431, - "logps/rejected": -1.0071966648101807, - "loss": 0.5589, - "nll_loss": 0.5211626291275024, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.044403962790966034, - "rewards/margins": 0.005955878179520369, - "rewards/rejected": -0.05035984516143799, + "log_odds_chosen": 0.1988961100578308, + "log_odds_ratio": -0.6947790384292603, + "logits/chosen": -2.582960605621338, + "logits/rejected": -2.5871338844299316, + "logps/chosen": -0.8790571093559265, + "logps/rejected": -1.0056135654449463, + "loss": 0.5604, + "nll_loss": 0.5243524312973022, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.043952859938144684, + "rewards/margins": 0.0063278162851929665, + "rewards/rejected": -0.05028067156672478, "step": 220 }, { "epoch": 0.2412165705296277, - "grad_norm": 2.0148191421861705, + "grad_norm": 2.166025203586939, "learning_rate": 1.3187609467915744e-05, - "log_odds_chosen": 0.2717307209968567, - "log_odds_ratio": -0.6763201951980591, - "logits/chosen": -2.829516887664795, - "logits/rejected": -2.842909574508667, - "logps/chosen": -0.9367680549621582, - "logps/rejected": -1.1125657558441162, - "loss": 0.5701, - "nll_loss": 0.5263533592224121, + "log_odds_chosen": 0.28293663263320923, + "log_odds_ratio": -0.6729618906974792, + "logits/chosen": -2.409632682800293, + "logits/rejected": -2.407254695892334, + "logps/chosen": -0.923631489276886, + "logps/rejected": -1.1052097082138062, + "loss": 0.5705, + "nll_loss": 0.5283125638961792, "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04683841019868851, - "rewards/margins": 0.008789879269897938, - "rewards/rejected": -0.05562828853726387, + "rewards/chosen": -0.0461815744638443, + "rewards/margins": 0.009078909642994404, + "rewards/rejected": -0.05526048690080643, "step": 230 }, { "epoch": 0.2517042475091767, - "grad_norm": 2.286828850039024, + "grad_norm": 3.8331735868635723, "learning_rate": 1.2909944487358057e-05, - "log_odds_chosen": 0.2564060091972351, - "log_odds_ratio": -0.651031494140625, - "logits/chosen": -2.979280471801758, - "logits/rejected": -3.0063037872314453, - "logps/chosen": -0.9010913968086243, - "logps/rejected": -1.065353512763977, - "loss": 0.5799, - "nll_loss": 0.5546143054962158, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04505457356572151, - "rewards/margins": 0.008213100023567677, - "rewards/rejected": -0.053267668932676315, + "log_odds_chosen": 0.23131528496742249, + "log_odds_ratio": -0.6579959988594055, + "logits/chosen": -2.456178665161133, + "logits/rejected": -2.4356391429901123, + "logps/chosen": -0.9076164960861206, + "logps/rejected": -1.0572835206985474, + "loss": 0.5795, + "nll_loss": 0.5539125800132751, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.04538082331418991, + "rewards/margins": 0.007483348250389099, + "rewards/rejected": -0.05286417528986931, "step": 240 }, { "epoch": 0.26219192448872575, - "grad_norm": 3.959216899336302, + "grad_norm": 2.3703558112076846, "learning_rate": 1.2649110640673518e-05, - "log_odds_chosen": 0.2661912143230438, - "log_odds_ratio": -0.6746715307235718, - "logits/chosen": -2.9726908206939697, - "logits/rejected": -2.974113941192627, - "logps/chosen": -0.8829942941665649, - "logps/rejected": -1.0264866352081299, - "loss": 0.5502, - "nll_loss": 0.5201153755187988, + "log_odds_chosen": 0.24735364317893982, + "log_odds_ratio": -0.6739610433578491, + "logits/chosen": -2.3664963245391846, + "logits/rejected": -2.3717617988586426, + "logps/chosen": -0.8910790681838989, + "logps/rejected": -1.0310931205749512, + "loss": 0.552, + "nll_loss": 0.521629810333252, "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04414971172809601, - "rewards/margins": 0.007174622267484665, - "rewards/rejected": -0.05132433772087097, + "rewards/chosen": -0.044553957879543304, + "rewards/margins": 0.007000704295933247, + "rewards/rejected": -0.05155465751886368, "step": 250 }, { "epoch": 0.2726796014682748, - "grad_norm": 2.2699181039817, + "grad_norm": 2.3126279019982494, "learning_rate": 1.2403473458920845e-05, - "log_odds_chosen": 0.2342940866947174, - "log_odds_ratio": -0.6783974766731262, - "logits/chosen": -2.9759726524353027, - "logits/rejected": -2.9923360347747803, - "logps/chosen": -0.9042210578918457, - "logps/rejected": -1.0481539964675903, - "loss": 0.5304, - "nll_loss": 0.45657747983932495, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.0452110581099987, - "rewards/margins": 0.007196647580713034, - "rewards/rejected": -0.052407700568437576, + "log_odds_chosen": 0.21803805232048035, + "log_odds_ratio": -0.6705144047737122, + "logits/chosen": -2.3871326446533203, + "logits/rejected": -2.3607029914855957, + "logps/chosen": -0.8851995468139648, + "logps/rejected": -1.0210189819335938, + "loss": 0.5318, + "nll_loss": 0.45665669441223145, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.044259972870349884, + "rewards/margins": 0.006790975574404001, + "rewards/rejected": -0.05105094984173775, "step": 260 }, { "epoch": 0.2831672784478238, - "grad_norm": 2.380998150273162, + "grad_norm": 3.0127014090338062, "learning_rate": 1.2171612389003691e-05, - "log_odds_chosen": 0.17961958050727844, - "log_odds_ratio": -0.6983593702316284, - "logits/chosen": -2.938765525817871, - "logits/rejected": -2.965757369995117, - "logps/chosen": -0.9548166990280151, - "logps/rejected": -1.0895111560821533, - "loss": 0.5673, - "nll_loss": 0.5430372357368469, + "log_odds_chosen": 0.19388818740844727, + "log_odds_ratio": -0.6943486928939819, + "logits/chosen": -2.4198155403137207, + "logits/rejected": -2.3934123516082764, + "logps/chosen": -0.9466629028320312, + "logps/rejected": -1.087548017501831, + "loss": 0.5675, + "nll_loss": 0.5421209335327148, "rewards/accuracies": 0.4937500059604645, - "rewards/chosen": -0.04774082824587822, - "rewards/margins": 0.006734730210155249, - "rewards/rejected": -0.0544755645096302, + "rewards/chosen": -0.04733314737677574, + "rewards/margins": 0.007044260855764151, + "rewards/rejected": -0.05437741428613663, "step": 270 }, { "epoch": 0.29365495542737285, - "grad_norm": 2.0870887262121323, + "grad_norm": 2.1321408503589745, "learning_rate": 1.1952286093343936e-05, - "log_odds_chosen": 0.2291949987411499, - "log_odds_ratio": -0.6750219464302063, - "logits/chosen": -2.928527355194092, - "logits/rejected": -2.9543163776397705, - "logps/chosen": -0.9355181455612183, - "logps/rejected": -1.0729036331176758, - "loss": 0.5434, - "nll_loss": 0.47713321447372437, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.04677591472864151, - "rewards/margins": 0.006869266740977764, - "rewards/rejected": -0.05364518240094185, + "log_odds_chosen": 0.23094406723976135, + "log_odds_ratio": -0.6691509485244751, + "logits/chosen": -2.3476357460021973, + "logits/rejected": -2.3334882259368896, + "logps/chosen": -0.9389116168022156, + "logps/rejected": -1.0817869901657104, + "loss": 0.5428, + "nll_loss": 0.4766770303249359, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.04694558307528496, + "rewards/margins": 0.007143768016248941, + "rewards/rejected": -0.05408934876322746, "step": 280 }, { "epoch": 0.30414263240692185, - "grad_norm": 2.661552133228645, + "grad_norm": 2.9832356292712654, "learning_rate": 1.1744404390294071e-05, - "log_odds_chosen": 0.36491650342941284, - "log_odds_ratio": -0.620793879032135, - "logits/chosen": -2.880122661590576, - "logits/rejected": -2.8935391902923584, - "logps/chosen": -0.836012065410614, - "logps/rejected": -1.05286705493927, - "loss": 0.5596, - "nll_loss": 0.4885989725589752, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0418006032705307, - "rewards/margins": 0.010842744261026382, - "rewards/rejected": -0.05264334753155708, + "log_odds_chosen": 0.3523382842540741, + "log_odds_ratio": -0.6227424740791321, + "logits/chosen": -2.2994518280029297, + "logits/rejected": -2.2809882164001465, + "logps/chosen": -0.8515156507492065, + "logps/rejected": -1.0561182498931885, + "loss": 0.5582, + "nll_loss": 0.49160194396972656, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04257578402757645, + "rewards/margins": 0.010230125859379768, + "rewards/rejected": -0.052805911749601364, "step": 290 }, { "epoch": 0.3146303093864709, - "grad_norm": 3.127285518362044, + "grad_norm": 2.455429454160177, "learning_rate": 1.1547005383792517e-05, - "log_odds_chosen": 0.255328893661499, - "log_odds_ratio": -0.6939107179641724, - "logits/chosen": -2.9603378772735596, - "logits/rejected": -2.992128372192383, - "logps/chosen": -0.8731514811515808, - "logps/rejected": -1.0526010990142822, - "loss": 0.5835, - "nll_loss": 0.5112031102180481, + "log_odds_chosen": 0.30407971143722534, + "log_odds_ratio": -0.6693702340126038, + "logits/chosen": -2.4093105792999268, + "logits/rejected": -2.360572099685669, + "logps/chosen": -0.8702648878097534, + "logps/rejected": -1.071603775024414, + "loss": 0.5817, + "nll_loss": 0.509266197681427, "rewards/accuracies": 0.5625, - "rewards/chosen": -0.0436575748026371, - "rewards/margins": 0.008972481824457645, - "rewards/rejected": -0.052630048245191574, + "rewards/chosen": -0.04351323843002319, + "rewards/margins": 0.010066945105791092, + "rewards/rejected": -0.053580187261104584, + "step": 300 + }, + { + "epoch": 0.3146303093864709, + "eval_log_odds_chosen": 0.2780136466026306, + "eval_log_odds_ratio": -0.650335431098938, + "eval_logits/chosen": -2.4818081855773926, + "eval_logits/rejected": -2.4498839378356934, + "eval_logps/chosen": -0.8807685971260071, + "eval_logps/rejected": -1.0628403425216675, + "eval_loss": 0.5571724772453308, + "eval_nll_loss": 0.5207235217094421, + "eval_rewards/accuracies": 0.6190476417541504, + "eval_rewards/chosen": -0.04403843358159065, + "eval_rewards/margins": 0.009103580377995968, + "eval_rewards/rejected": -0.053142011165618896, + "eval_runtime": 140.9657, + "eval_samples_per_second": 14.145, + "eval_steps_per_second": 0.447, "step": 300 }, { "epoch": 0.3251179863660199, - "grad_norm": 2.013637214040506, + "grad_norm": 2.1236305912642894, "learning_rate": 1.1359236684941297e-05, - "log_odds_chosen": 0.21040907502174377, - "log_odds_ratio": -0.688109278678894, - "logits/chosen": -2.9860305786132812, - "logits/rejected": -2.9820261001586914, - "logps/chosen": -0.9089478254318237, - "logps/rejected": -1.0382112264633179, - "loss": 0.585, - "nll_loss": 0.5399721264839172, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.045447397977113724, - "rewards/margins": 0.006463165394961834, - "rewards/rejected": -0.051910560578107834, + "log_odds_chosen": 0.2490301877260208, + "log_odds_ratio": -0.6818236112594604, + "logits/chosen": -2.438469409942627, + "logits/rejected": -2.4002931118011475, + "logps/chosen": -0.9081015586853027, + "logps/rejected": -1.0690175294876099, + "loss": 0.5876, + "nll_loss": 0.5490554571151733, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.045405078679323196, + "rewards/margins": 0.008045798167586327, + "rewards/rejected": -0.053450871258974075, "step": 310 }, { "epoch": 0.33560566334556896, - "grad_norm": 2.1577553752792995, + "grad_norm": 2.120713978353275, "learning_rate": 1.118033988749895e-05, - "log_odds_chosen": 0.27985960245132446, - "log_odds_ratio": -0.6601210832595825, - "logits/chosen": -3.0387003421783447, - "logits/rejected": -3.0464096069335938, - "logps/chosen": -0.9086373448371887, - "logps/rejected": -1.0836986303329468, - "loss": 0.5243, - "nll_loss": 0.4922841191291809, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.045431867241859436, - "rewards/margins": 0.008753069676458836, - "rewards/rejected": -0.0541849359869957, + "log_odds_chosen": 0.24050185084342957, + "log_odds_ratio": -0.6646271347999573, + "logits/chosen": -2.427072286605835, + "logits/rejected": -2.400460720062256, + "logps/chosen": -0.919741153717041, + "logps/rejected": -1.0707252025604248, + "loss": 0.5255, + "nll_loss": 0.4938685894012451, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04598705843091011, + "rewards/margins": 0.0075491988100111485, + "rewards/rejected": -0.05353625863790512, "step": 320 }, { "epoch": 0.34609334032511796, - "grad_norm": 2.422690319169778, + "grad_norm": 3.9130583978149622, "learning_rate": 1.1009637651263608e-05, - "log_odds_chosen": 0.28255337476730347, - "log_odds_ratio": -0.6909259557723999, - "logits/chosen": -2.950887441635132, - "logits/rejected": -2.9948947429656982, - "logps/chosen": -0.9054603576660156, - "logps/rejected": -1.0888211727142334, - "loss": 0.5544, - "nll_loss": 0.5376341342926025, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04527302458882332, - "rewards/margins": 0.009168041869997978, - "rewards/rejected": -0.05444106459617615, + "log_odds_chosen": 0.25334832072257996, + "log_odds_ratio": -0.6984423995018005, + "logits/chosen": -2.404737949371338, + "logits/rejected": -2.3937125205993652, + "logps/chosen": -0.9015901684761047, + "logps/rejected": -1.0603028535842896, + "loss": 0.5557, + "nll_loss": 0.5412198305130005, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04507950693368912, + "rewards/margins": 0.007935632951557636, + "rewards/rejected": -0.05301513522863388, "step": 330 }, { "epoch": 0.356581017304667, - "grad_norm": 2.2975046406882798, + "grad_norm": 2.354938087613489, "learning_rate": 1.0846522890932809e-05, - "log_odds_chosen": 0.2153971642255783, - "log_odds_ratio": -0.6926898956298828, - "logits/chosen": -2.9686572551727295, - "logits/rejected": -3.0199432373046875, - "logps/chosen": -0.8590608835220337, - "logps/rejected": -1.00636887550354, + "log_odds_chosen": 0.17314568161964417, + "log_odds_ratio": -0.6990125775337219, + "logits/chosen": -2.3741681575775146, + "logits/rejected": -2.372586727142334, + "logps/chosen": -0.8716468811035156, + "logps/rejected": -0.989061713218689, "loss": 0.5708, - "nll_loss": 0.5127817392349243, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.042953044176101685, - "rewards/margins": 0.007365405559539795, - "rewards/rejected": -0.05031844973564148, + "nll_loss": 0.5135380029678345, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.04358234256505966, + "rewards/margins": 0.005870741792023182, + "rewards/rejected": -0.04945308715105057, "step": 340 }, { "epoch": 0.36706869428421607, - "grad_norm": 2.135727653321979, + "grad_norm": 2.2044319965087382, "learning_rate": 1.0690449676496977e-05, - "log_odds_chosen": 0.2665565609931946, - "log_odds_ratio": -0.6829238533973694, - "logits/chosen": -3.044860363006592, - "logits/rejected": -3.0616378784179688, - "logps/chosen": -0.8791500329971313, - "logps/rejected": -1.0402672290802002, - "loss": 0.5495, - "nll_loss": 0.5228344202041626, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04395749792456627, - "rewards/margins": 0.00805586390197277, - "rewards/rejected": -0.05201335996389389, + "log_odds_chosen": 0.24199283123016357, + "log_odds_ratio": -0.687169075012207, + "logits/chosen": -2.426055908203125, + "logits/rejected": -2.37978196144104, + "logps/chosen": -0.8775957226753235, + "logps/rejected": -1.019217848777771, + "loss": 0.5463, + "nll_loss": 0.5177103281021118, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04387979209423065, + "rewards/margins": 0.007081110030412674, + "rewards/rejected": -0.05096089839935303, "step": 350 }, { "epoch": 0.37755637126376507, - "grad_norm": 3.150177435714442, + "grad_norm": 1.951247314132421, "learning_rate": 1.0540925533894598e-05, - "log_odds_chosen": 0.4033277928829193, - "log_odds_ratio": -0.602225124835968, - "logits/chosen": -2.9472672939300537, - "logits/rejected": -2.975858211517334, - "logps/chosen": -0.8669608235359192, - "logps/rejected": -1.110353708267212, - "loss": 0.5494, - "nll_loss": 0.5087054371833801, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.0433480478823185, - "rewards/margins": 0.01216964516788721, - "rewards/rejected": -0.05551769211888313, + "log_odds_chosen": 0.37273699045181274, + "log_odds_ratio": -0.6097368001937866, + "logits/chosen": -2.3991737365722656, + "logits/rejected": -2.3913745880126953, + "logps/chosen": -0.8743513226509094, + "logps/rejected": -1.1015660762786865, + "loss": 0.5509, + "nll_loss": 0.5144286155700684, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.04371756687760353, + "rewards/margins": 0.011360744014382362, + "rewards/rejected": -0.05507831647992134, "step": 360 }, { "epoch": 0.3880440482433141, - "grad_norm": 2.130197231019511, + "grad_norm": 2.1160835291077307, "learning_rate": 1.0397504898200728e-05, - "log_odds_chosen": 0.3966829478740692, - "log_odds_ratio": -0.6142522096633911, - "logits/chosen": -3.0528526306152344, - "logits/rejected": -3.0623490810394287, - "logps/chosen": -0.8640265464782715, - "logps/rejected": -1.1243717670440674, - "loss": 0.5232, - "nll_loss": 0.5101068615913391, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.043201327323913574, - "rewards/margins": 0.013017257675528526, - "rewards/rejected": -0.05621858313679695, + "log_odds_chosen": 0.37601083517074585, + "log_odds_ratio": -0.6155336499214172, + "logits/chosen": -2.4748804569244385, + "logits/rejected": -2.4376637935638428, + "logps/chosen": -0.8649997711181641, + "logps/rejected": -1.1139612197875977, + "loss": 0.5205, + "nll_loss": 0.502615749835968, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.043249987065792084, + "rewards/margins": 0.012448069639503956, + "rewards/rejected": -0.05569805949926376, "step": 370 }, { "epoch": 0.3985317252228631, - "grad_norm": 2.415549044992692, + "grad_norm": 2.226775744348268, "learning_rate": 1.0259783520851543e-05, - "log_odds_chosen": 0.46208301186561584, - "log_odds_ratio": -0.5873923301696777, - "logits/chosen": -3.055903196334839, - "logits/rejected": -3.089763879776001, - "logps/chosen": -0.8685981035232544, - "logps/rejected": -1.1247217655181885, - "loss": 0.5376, - "nll_loss": 0.5167646408081055, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.0434299036860466, - "rewards/margins": 0.01280617993324995, - "rewards/rejected": -0.056236088275909424, + "log_odds_chosen": 0.429561048746109, + "log_odds_ratio": -0.5968413949012756, + "logits/chosen": -2.519869089126587, + "logits/rejected": -2.494752883911133, + "logps/chosen": -0.8703508377075195, + "logps/rejected": -1.1160125732421875, + "loss": 0.5374, + "nll_loss": 0.5153257846832275, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.043517544865608215, + "rewards/margins": 0.012283083982765675, + "rewards/rejected": -0.05580062419176102, "step": 380 }, { "epoch": 0.4090194022024122, - "grad_norm": 2.4197618087673036, + "grad_norm": 2.401246607233204, "learning_rate": 1.0127393670836667e-05, - "log_odds_chosen": 0.08936772495508194, - "log_odds_ratio": -0.7186132073402405, - "logits/chosen": -2.998857021331787, - "logits/rejected": -3.021352529525757, - "logps/chosen": -0.9128287434577942, - "logps/rejected": -0.9754525423049927, - "loss": 0.5571, - "nll_loss": 0.5319759845733643, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04564143717288971, - "rewards/margins": 0.0031311833299696445, - "rewards/rejected": -0.048772621899843216, + "log_odds_chosen": 0.08164841681718826, + "log_odds_ratio": -0.730138897895813, + "logits/chosen": -2.456601619720459, + "logits/rejected": -2.4586360454559326, + "logps/chosen": -0.9149462580680847, + "logps/rejected": -0.9739354848861694, + "loss": 0.5576, + "nll_loss": 0.5350494384765625, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.045747317373752594, + "rewards/margins": 0.0029494580812752247, + "rewards/rejected": -0.048696767538785934, "step": 390 }, { "epoch": 0.4195070791819612, - "grad_norm": 2.0748995530757424, + "grad_norm": 1.9835782676616263, "learning_rate": 1e-05, - "log_odds_chosen": 0.23965713381767273, - "log_odds_ratio": -0.6899853348731995, - "logits/chosen": -2.883575201034546, - "logits/rejected": -2.908125400543213, - "logps/chosen": -0.9490350484848022, - "logps/rejected": -1.1106139421463013, - "loss": 0.5725, - "nll_loss": 0.5262094736099243, + "log_odds_chosen": 0.24804361164569855, + "log_odds_ratio": -0.6891428232192993, + "logits/chosen": -2.352999210357666, + "logits/rejected": -2.3628151416778564, + "logps/chosen": -0.9478782415390015, + "logps/rejected": -1.1192692518234253, + "loss": 0.5724, + "nll_loss": 0.5249911546707153, "rewards/accuracies": 0.53125, - "rewards/chosen": -0.04745175316929817, - "rewards/margins": 0.00807894580066204, - "rewards/rejected": -0.05553068965673447, + "rewards/chosen": -0.047393910586833954, + "rewards/margins": 0.008569559082388878, + "rewards/rejected": -0.05596347525715828, + "step": 400 + }, + { + "epoch": 0.4195070791819612, + "eval_log_odds_chosen": 0.2819042503833771, + "eval_log_odds_ratio": -0.6550887227058411, + "eval_logits/chosen": -2.4376399517059326, + "eval_logits/rejected": -2.4026126861572266, + "eval_logps/chosen": -0.8510361313819885, + "eval_logps/rejected": -1.029338002204895, + "eval_loss": 0.5415622591972351, + "eval_nll_loss": 0.5060027837753296, + "eval_rewards/accuracies": 0.625, + "eval_rewards/chosen": -0.042551808059215546, + "eval_rewards/margins": 0.008915101177990437, + "eval_rewards/rejected": -0.051466912031173706, + "eval_runtime": 135.9814, + "eval_samples_per_second": 14.664, + "eval_steps_per_second": 0.463, "step": 400 }, { "epoch": 0.4299947561615102, - "grad_norm": 2.0498490112152026, + "grad_norm": 2.0741408388417053, "learning_rate": 9.877295966495898e-06, - "log_odds_chosen": 0.14244404435157776, - "log_odds_ratio": -0.7278560996055603, - "logits/chosen": -2.988100051879883, - "logits/rejected": -2.9914164543151855, - "logps/chosen": -0.8709594011306763, - "logps/rejected": -0.9773006439208984, - "loss": 0.5455, - "nll_loss": 0.4832683503627777, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.04354798048734665, - "rewards/margins": 0.0053170593455433846, - "rewards/rejected": -0.04886503517627716, + "log_odds_chosen": 0.14674368500709534, + "log_odds_ratio": -0.7315293550491333, + "logits/chosen": -2.453657388687134, + "logits/rejected": -2.4033920764923096, + "logps/chosen": -0.8739027976989746, + "logps/rejected": -0.9881707429885864, + "loss": 0.546, + "nll_loss": 0.48288026452064514, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04369514063000679, + "rewards/margins": 0.0057133943773806095, + "rewards/rejected": -0.04940853267908096, "step": 410 }, { "epoch": 0.4404824331410593, - "grad_norm": 1.9311064341389872, + "grad_norm": 1.862967371631046, "learning_rate": 9.759000729485331e-06, - "log_odds_chosen": 0.30063071846961975, - "log_odds_ratio": -0.643203854560852, - "logits/chosen": -2.9488558769226074, - "logits/rejected": -2.9841551780700684, - "logps/chosen": -0.8707404136657715, - "logps/rejected": -1.0532442331314087, - "loss": 0.5355, - "nll_loss": 0.474843829870224, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04353701323270798, - "rewards/margins": 0.009125196374952793, - "rewards/rejected": -0.05266221612691879, + "log_odds_chosen": 0.3599195182323456, + "log_odds_ratio": -0.6281547546386719, + "logits/chosen": -2.3640646934509277, + "logits/rejected": -2.3699867725372314, + "logps/chosen": -0.8427717089653015, + "logps/rejected": -1.0523298978805542, + "loss": 0.5338, + "nll_loss": 0.475394070148468, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.042138583958148956, + "rewards/margins": 0.01047790888696909, + "rewards/rejected": -0.05261648818850517, "step": 420 }, { "epoch": 0.4509701101206083, - "grad_norm": 2.119895291758326, + "grad_norm": 2.2577027347270673, "learning_rate": 9.644856443408244e-06, - "log_odds_chosen": 0.2837393879890442, - "log_odds_ratio": -0.6551750898361206, - "logits/chosen": -2.9840757846832275, - "logits/rejected": -2.9921929836273193, - "logps/chosen": -0.8468173146247864, - "logps/rejected": -1.0135347843170166, - "loss": 0.5557, - "nll_loss": 0.5443450212478638, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.04234086349606514, - "rewards/margins": 0.00833587534725666, - "rewards/rejected": -0.05067674070596695, + "log_odds_chosen": 0.2772213816642761, + "log_odds_ratio": -0.6547843217849731, + "logits/chosen": -2.463442325592041, + "logits/rejected": -2.4424116611480713, + "logps/chosen": -0.8533428311347961, + "logps/rejected": -1.0268352031707764, + "loss": 0.5561, + "nll_loss": 0.5445196628570557, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.04266713932156563, + "rewards/margins": 0.008674620650708675, + "rewards/rejected": -0.051341764628887177, "step": 430 }, { "epoch": 0.46145778710015734, - "grad_norm": 2.095435518308805, + "grad_norm": 2.148366110891132, "learning_rate": 9.534625892455923e-06, - "log_odds_chosen": 0.2355252504348755, - "log_odds_ratio": -0.6598283648490906, - "logits/chosen": -3.0252740383148193, - "logits/rejected": -3.045849323272705, - "logps/chosen": -0.8709392547607422, - "logps/rejected": -1.0179613828659058, - "loss": 0.5508, - "nll_loss": 0.5189236998558044, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04354696720838547, - "rewards/margins": 0.0073511130176484585, - "rewards/rejected": -0.050898075103759766, + "log_odds_chosen": 0.251740038394928, + "log_odds_ratio": -0.6593549847602844, + "logits/chosen": -2.433262586593628, + "logits/rejected": -2.400451183319092, + "logps/chosen": -0.869005560874939, + "logps/rejected": -1.0262689590454102, + "loss": 0.5514, + "nll_loss": 0.518151044845581, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.043450284749269485, + "rewards/margins": 0.007863158360123634, + "rewards/rejected": -0.05131344124674797, "step": 440 }, { "epoch": 0.47194546407970633, - "grad_norm": 1.9017756846669818, + "grad_norm": 1.9641531890945303, "learning_rate": 9.428090415820635e-06, - "log_odds_chosen": 0.34075412154197693, - "log_odds_ratio": -0.6583858728408813, - "logits/chosen": -3.0218703746795654, - "logits/rejected": -3.0481696128845215, - "logps/chosen": -0.8293315768241882, - "logps/rejected": -1.047191858291626, - "loss": 0.5286, - "nll_loss": 0.4964592456817627, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.04146658256649971, - "rewards/margins": 0.010893006809055805, - "rewards/rejected": -0.05235959216952324, + "log_odds_chosen": 0.3584665358066559, + "log_odds_ratio": -0.6613593101501465, + "logits/chosen": -2.3730902671813965, + "logits/rejected": -2.3335137367248535, + "logps/chosen": -0.8309770822525024, + "logps/rejected": -1.0618839263916016, + "loss": 0.5284, + "nll_loss": 0.4951680600643158, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.041548848152160645, + "rewards/margins": 0.011545347049832344, + "rewards/rejected": -0.05309419706463814, "step": 450 }, { "epoch": 0.4824331410592554, - "grad_norm": 2.079766146123277, + "grad_norm": 2.1323921754635955, "learning_rate": 9.325048082403139e-06, - "log_odds_chosen": 0.16855968534946442, - "log_odds_ratio": -0.711928129196167, - "logits/chosen": -3.0086510181427, - "logits/rejected": -3.0489156246185303, - "logps/chosen": -0.9442957043647766, - "logps/rejected": -1.072997808456421, - "loss": 0.5326, - "nll_loss": 0.5338221788406372, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.04721478372812271, - "rewards/margins": 0.00643510278314352, - "rewards/rejected": -0.05364988371729851, + "log_odds_chosen": 0.18219377100467682, + "log_odds_ratio": -0.7052776217460632, + "logits/chosen": -2.417771577835083, + "logits/rejected": -2.391197681427002, + "logps/chosen": -0.9514438509941101, + "logps/rejected": -1.081947922706604, + "loss": 0.532, + "nll_loss": 0.5332220792770386, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.047572195529937744, + "rewards/margins": 0.006525200791656971, + "rewards/rejected": -0.05409740284085274, "step": 460 }, { "epoch": 0.4929208180388044, - "grad_norm": 2.4868491558153085, + "grad_norm": 2.0935836198507145, "learning_rate": 9.225312080288851e-06, - "log_odds_chosen": 0.23586861789226532, - "log_odds_ratio": -0.6902174949645996, - "logits/chosen": -2.986264705657959, - "logits/rejected": -3.0127644538879395, - "logps/chosen": -0.8882457613945007, - "logps/rejected": -1.034985899925232, - "loss": 0.5413, - "nll_loss": 0.5090312361717224, - "rewards/accuracies": 0.5562499761581421, - "rewards/chosen": -0.044412292540073395, - "rewards/margins": 0.007337009999901056, - "rewards/rejected": -0.051749296486377716, + "log_odds_chosen": 0.2585422098636627, + "log_odds_ratio": -0.681999683380127, + "logits/chosen": -2.4441823959350586, + "logits/rejected": -2.418107271194458, + "logps/chosen": -0.8849735260009766, + "logps/rejected": -1.0435359477996826, + "loss": 0.5416, + "nll_loss": 0.5094045400619507, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.04424867779016495, + "rewards/margins": 0.007928118109703064, + "rewards/rejected": -0.05217679589986801, "step": 470 }, { "epoch": 0.5034084950183534, - "grad_norm": 2.0043501739666882, + "grad_norm": 2.077184991073431, "learning_rate": 9.12870929175277e-06, - "log_odds_chosen": 0.17604230344295502, - "log_odds_ratio": -0.707550048828125, - "logits/chosen": -3.088604211807251, - "logits/rejected": -3.12184476852417, - "logps/chosen": -0.8456010818481445, - "logps/rejected": -0.9586717486381531, - "loss": 0.5178, - "nll_loss": 0.5126105546951294, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.042280055582523346, - "rewards/margins": 0.005653535481542349, - "rewards/rejected": -0.047933585941791534, + "log_odds_chosen": 0.1411927044391632, + "log_odds_ratio": -0.7211004495620728, + "logits/chosen": -2.478450298309326, + "logits/rejected": -2.4527339935302734, + "logps/chosen": -0.8615080118179321, + "logps/rejected": -0.9589959979057312, + "loss": 0.5176, + "nll_loss": 0.5134377479553223, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.04307539761066437, + "rewards/margins": 0.004874409642070532, + "rewards/rejected": -0.04794980585575104, "step": 480 }, { "epoch": 0.5138961719979025, - "grad_norm": 1.9415978406566505, + "grad_norm": 1.9047433825748046, "learning_rate": 9.035079029052514e-06, - "log_odds_chosen": 0.22476902604103088, - "log_odds_ratio": -0.6716736555099487, - "logits/chosen": -3.003417491912842, - "logits/rejected": -3.0048608779907227, - "logps/chosen": -0.9196673631668091, - "logps/rejected": -1.0358223915100098, - "loss": 0.5397, - "nll_loss": 0.5024985671043396, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.045983362942934036, - "rewards/margins": 0.005807754583656788, - "rewards/rejected": -0.051791124045848846, + "log_odds_chosen": 0.20130577683448792, + "log_odds_ratio": -0.687514066696167, + "logits/chosen": -2.404505491256714, + "logits/rejected": -2.3535995483398438, + "logps/chosen": -0.9324936866760254, + "logps/rejected": -1.036684274673462, + "loss": 0.54, + "nll_loss": 0.5031000375747681, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.04662468656897545, + "rewards/margins": 0.0052095321007072926, + "rewards/rejected": -0.05183422565460205, "step": 490 }, { "epoch": 0.5243838489774515, - "grad_norm": 2.2353701695425423, + "grad_norm": 2.2722995091864013, "learning_rate": 8.94427190999916e-06, - "log_odds_chosen": 0.20684054493904114, - "log_odds_ratio": -0.698712944984436, - "logits/chosen": -3.0111751556396484, - "logits/rejected": -3.0036330223083496, - "logps/chosen": -0.8826943635940552, - "logps/rejected": -1.0074814558029175, - "loss": 0.548, - "nll_loss": 0.5235316157341003, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.0441347137093544, - "rewards/margins": 0.006239361595362425, - "rewards/rejected": -0.050374072045087814, + "log_odds_chosen": 0.20798742771148682, + "log_odds_ratio": -0.6965998411178589, + "logits/chosen": -2.503917694091797, + "logits/rejected": -2.4567532539367676, + "logps/chosen": -0.882551372051239, + "logps/rejected": -1.009610652923584, + "loss": 0.5486, + "nll_loss": 0.5240460634231567, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.04412757605314255, + "rewards/margins": 0.00635296106338501, + "rewards/rejected": -0.05048053711652756, + "step": 500 + }, + { + "epoch": 0.5243838489774515, + "eval_log_odds_chosen": 0.319318950176239, + "eval_log_odds_ratio": -0.6438891291618347, + "eval_logits/chosen": -2.471830368041992, + "eval_logits/rejected": -2.437340021133423, + "eval_logps/chosen": -0.8492264151573181, + "eval_logps/rejected": -1.0513862371444702, + "eval_loss": 0.5343749523162842, + "eval_nll_loss": 0.49899229407310486, + "eval_rewards/accuracies": 0.6150793433189392, + "eval_rewards/chosen": -0.042461320757865906, + "eval_rewards/margins": 0.010107995010912418, + "eval_rewards/rejected": -0.05256931483745575, + "eval_runtime": 137.8752, + "eval_samples_per_second": 14.462, + "eval_steps_per_second": 0.457, "step": 500 }, { "epoch": 0.5348715259570005, - "grad_norm": 1.742537477144132, + "grad_norm": 1.7543648883606602, "learning_rate": 8.856148855400955e-06, - "log_odds_chosen": 0.3066679835319519, - "log_odds_ratio": -0.6453306674957275, - "logits/chosen": -2.9636032581329346, - "logits/rejected": -2.97407865524292, - "logps/chosen": -0.8404191136360168, - "logps/rejected": -1.0267155170440674, - "loss": 0.5264, - "nll_loss": 0.5354185104370117, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04202095791697502, - "rewards/margins": 0.009314822033047676, - "rewards/rejected": -0.05133577436208725, + "log_odds_chosen": 0.290159672498703, + "log_odds_ratio": -0.6539579629898071, + "logits/chosen": -2.5112712383270264, + "logits/rejected": -2.4847443103790283, + "logps/chosen": -0.8425674438476562, + "logps/rejected": -1.0141561031341553, + "loss": 0.5278, + "nll_loss": 0.5365942120552063, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.04212837293744087, + "rewards/margins": 0.00857943668961525, + "rewards/rejected": -0.05070780962705612, "step": 510 }, { "epoch": 0.5453592029365496, - "grad_norm": 1.6799388590726438, + "grad_norm": 1.6585367227101162, "learning_rate": 8.770580193070294e-06, - "log_odds_chosen": 0.24468369781970978, - "log_odds_ratio": -0.6710330247879028, - "logits/chosen": -2.959213972091675, - "logits/rejected": -2.966728687286377, - "logps/chosen": -0.9035038948059082, - "logps/rejected": -1.0690029859542847, - "loss": 0.5366, - "nll_loss": 0.47406935691833496, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.04517520219087601, - "rewards/margins": 0.008274954743683338, - "rewards/rejected": -0.053450148552656174, + "log_odds_chosen": 0.23928451538085938, + "log_odds_ratio": -0.6756108999252319, + "logits/chosen": -2.4492619037628174, + "logits/rejected": -2.413327693939209, + "logps/chosen": -0.9059408903121948, + "logps/rejected": -1.068650245666504, + "loss": 0.5372, + "nll_loss": 0.47487586736679077, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0452970452606678, + "rewards/margins": 0.008135473355650902, + "rewards/rejected": -0.053432513028383255, "step": 520 }, { "epoch": 0.5558468799160986, - "grad_norm": 1.8707354612150964, + "grad_norm": 2.041930709602407, "learning_rate": 8.687444855261389e-06, - "log_odds_chosen": 0.4215427339076996, - "log_odds_ratio": -0.6489927172660828, - "logits/chosen": -3.0756938457489014, - "logits/rejected": -3.0923542976379395, - "logps/chosen": -0.8253329992294312, - "logps/rejected": -1.1108949184417725, - "loss": 0.5365, - "nll_loss": 0.45042163133621216, - "rewards/accuracies": 0.53125, - "rewards/chosen": -0.04126664996147156, - "rewards/margins": 0.014278100803494453, - "rewards/rejected": -0.05554475262761116, + "log_odds_chosen": 0.4141673445701599, + "log_odds_ratio": -0.6465325355529785, + "logits/chosen": -2.50728178024292, + "logits/rejected": -2.4843533039093018, + "logps/chosen": -0.828266978263855, + "logps/rejected": -1.111327886581421, + "loss": 0.5372, + "nll_loss": 0.4500916004180908, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.04141335189342499, + "rewards/margins": 0.014153043739497662, + "rewards/rejected": -0.055566392838954926, "step": 530 }, { "epoch": 0.5663345568956476, - "grad_norm": 1.922705947748225, + "grad_norm": 1.8588001511511827, "learning_rate": 8.606629658238705e-06, - "log_odds_chosen": 0.1879667341709137, - "log_odds_ratio": -0.6903280019760132, - "logits/chosen": -2.975130796432495, - "logits/rejected": -3.0028696060180664, - "logps/chosen": -0.8695458173751831, - "logps/rejected": -0.9805169105529785, - "loss": 0.5535, - "nll_loss": 0.5275255441665649, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04347729682922363, - "rewards/margins": 0.005548550747334957, - "rewards/rejected": -0.049025844782590866, + "log_odds_chosen": 0.1719091385602951, + "log_odds_ratio": -0.697492241859436, + "logits/chosen": -2.500349760055542, + "logits/rejected": -2.4829323291778564, + "logps/chosen": -0.8647764325141907, + "logps/rejected": -0.9715200662612915, + "loss": 0.553, + "nll_loss": 0.526767373085022, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.04323882237076759, + "rewards/margins": 0.005337181035429239, + "rewards/rejected": -0.048576001077890396, "step": 540 }, { "epoch": 0.5768222338751966, - "grad_norm": 1.9089385183272836, + "grad_norm": 1.9151013438807294, "learning_rate": 8.528028654224417e-06, - "log_odds_chosen": 0.42722567915916443, - "log_odds_ratio": -0.6043616533279419, - "logits/chosen": -2.9973807334899902, - "logits/rejected": -3.0049965381622314, - "logps/chosen": -0.8592002987861633, - "logps/rejected": -1.1192405223846436, - "loss": 0.537, - "nll_loss": 0.5372708439826965, - "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.042960021644830704, - "rewards/margins": 0.013002010062336922, - "rewards/rejected": -0.05596202611923218, + "log_odds_chosen": 0.41694098711013794, + "log_odds_ratio": -0.6211504936218262, + "logits/chosen": -2.526711940765381, + "logits/rejected": -2.488142490386963, + "logps/chosen": -0.866176426410675, + "logps/rejected": -1.1338094472885132, + "loss": 0.5372, + "nll_loss": 0.5370919704437256, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04330882430076599, + "rewards/margins": 0.013381647877395153, + "rewards/rejected": -0.05669047310948372, "step": 550 }, { "epoch": 0.5873099108547457, - "grad_norm": 1.9519454661958895, + "grad_norm": 1.9475996513575733, "learning_rate": 8.451542547285167e-06, - "log_odds_chosen": 0.23686861991882324, - "log_odds_ratio": -0.679013192653656, - "logits/chosen": -3.0309016704559326, - "logits/rejected": -3.0620574951171875, - "logps/chosen": -0.8845365643501282, - "logps/rejected": -1.0314432382583618, - "loss": 0.5215, - "nll_loss": 0.5018130540847778, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04422682151198387, - "rewards/margins": 0.0073453388176858425, - "rewards/rejected": -0.05157216265797615, + "log_odds_chosen": 0.23696064949035645, + "log_odds_ratio": -0.6743646860122681, + "logits/chosen": -2.518937110900879, + "logits/rejected": -2.490901470184326, + "logps/chosen": -0.8790968060493469, + "logps/rejected": -1.026903748512268, + "loss": 0.5214, + "nll_loss": 0.5015530586242676, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.043954841792583466, + "rewards/margins": 0.007390348706394434, + "rewards/rejected": -0.05134518817067146, "step": 560 }, { "epoch": 0.5977975878342947, - "grad_norm": 1.902474576616517, + "grad_norm": 1.8968936654846589, "learning_rate": 8.37707816583391e-06, - "log_odds_chosen": 0.157462477684021, - "log_odds_ratio": -0.7165660858154297, - "logits/chosen": -2.971592903137207, - "logits/rejected": -2.9932913780212402, - "logps/chosen": -0.8898121118545532, - "logps/rejected": -0.9948716163635254, - "loss": 0.5041, - "nll_loss": 0.5276492834091187, + "log_odds_chosen": 0.1709347516298294, + "log_odds_ratio": -0.721364438533783, + "logits/chosen": -2.5332372188568115, + "logits/rejected": -2.5029869079589844, + "logps/chosen": -0.8752782940864563, + "logps/rejected": -0.9968281984329224, + "loss": 0.5044, + "nll_loss": 0.528136134147644, "rewards/accuracies": 0.5, - "rewards/chosen": -0.044490598142147064, - "rewards/margins": 0.005252980627119541, - "rewards/rejected": -0.04974358528852463, + "rewards/chosen": -0.043763916939496994, + "rewards/margins": 0.006077499594539404, + "rewards/rejected": -0.04984141141176224, "step": 570 }, { "epoch": 0.6082852648138437, - "grad_norm": 1.9526588876095308, + "grad_norm": 1.901568677283731, "learning_rate": 8.304547985373997e-06, - "log_odds_chosen": 0.27767136693000793, - "log_odds_ratio": -0.6578360199928284, - "logits/chosen": -3.0485613346099854, - "logits/rejected": -3.061281204223633, - "logps/chosen": -0.8733240962028503, - "logps/rejected": -1.0594861507415771, - "loss": 0.5456, - "nll_loss": 0.48286086320877075, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.043666206300258636, - "rewards/margins": 0.009308096952736378, - "rewards/rejected": -0.05297430604696274, + "log_odds_chosen": 0.29886722564697266, + "log_odds_ratio": -0.6539247632026672, + "logits/chosen": -2.5251448154449463, + "logits/rejected": -2.5202994346618652, + "logps/chosen": -0.8657291531562805, + "logps/rejected": -1.0712764263153076, + "loss": 0.5461, + "nll_loss": 0.48294153809547424, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.04328645393252373, + "rewards/margins": 0.01027736347168684, + "rewards/rejected": -0.05356382206082344, "step": 580 }, { "epoch": 0.6187729417933928, - "grad_norm": 1.963515177379308, + "grad_norm": 1.9902242754931676, "learning_rate": 8.233869695926184e-06, - "log_odds_chosen": 0.32016056776046753, - "log_odds_ratio": -0.6649240255355835, - "logits/chosen": -3.0834898948669434, - "logits/rejected": -3.123967409133911, - "logps/chosen": -0.8281318545341492, - "logps/rejected": -1.021436095237732, - "loss": 0.5124, - "nll_loss": 0.5498961210250854, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04140659421682358, - "rewards/margins": 0.009665210731327534, - "rewards/rejected": -0.05107180029153824, + "log_odds_chosen": 0.33919957280158997, + "log_odds_ratio": -0.6699340343475342, + "logits/chosen": -2.56527042388916, + "logits/rejected": -2.572580575942993, + "logps/chosen": -0.8352983593940735, + "logps/rejected": -1.0507652759552002, + "loss": 0.5138, + "nll_loss": 0.5514861345291138, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.041764914989471436, + "rewards/margins": 0.010773347690701485, + "rewards/rejected": -0.05253826454281807, "step": 590 }, { "epoch": 0.6292606187729418, - "grad_norm": 2.1416673571833584, + "grad_norm": 1.9968521848986331, "learning_rate": 8.164965809277262e-06, - "log_odds_chosen": 0.3141978085041046, - "log_odds_ratio": -0.6486893892288208, - "logits/chosen": -3.1147074699401855, - "logits/rejected": -3.11454176902771, - "logps/chosen": -0.8215556144714355, - "logps/rejected": -1.009476661682129, - "loss": 0.5144, - "nll_loss": 0.4836875796318054, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04107777774333954, - "rewards/margins": 0.009396053850650787, - "rewards/rejected": -0.05047383904457092, + "log_odds_chosen": 0.3369660973548889, + "log_odds_ratio": -0.6556235551834106, + "logits/chosen": -2.563744306564331, + "logits/rejected": -2.5540928840637207, + "logps/chosen": -0.8338971138000488, + "logps/rejected": -1.0589314699172974, + "loss": 0.5156, + "nll_loss": 0.4856153130531311, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04169485345482826, + "rewards/margins": 0.011251723393797874, + "rewards/rejected": -0.052946578711271286, + "step": 600 + }, + { + "epoch": 0.6292606187729418, + "eval_log_odds_chosen": 0.30560463666915894, + "eval_log_odds_ratio": -0.6469583511352539, + "eval_logits/chosen": -2.581132173538208, + "eval_logits/rejected": -2.555058717727661, + "eval_logps/chosen": -0.8332868218421936, + "eval_logps/rejected": -1.0284805297851562, + "eval_loss": 0.5242142677307129, + "eval_nll_loss": 0.48822054266929626, + "eval_rewards/accuracies": 0.6150793433189392, + "eval_rewards/chosen": -0.04166434332728386, + "eval_rewards/margins": 0.009759685955941677, + "eval_rewards/rejected": -0.05142403393983841, + "eval_runtime": 136.8777, + "eval_samples_per_second": 14.568, + "eval_steps_per_second": 0.46, "step": 600 }, { "epoch": 0.6397482957524908, - "grad_norm": 2.03894912155955, + "grad_norm": 2.0046645094729914, "learning_rate": 8.097763301789162e-06, - "log_odds_chosen": 0.1958848237991333, - "log_odds_ratio": -0.6933802366256714, - "logits/chosen": -3.016098737716675, - "logits/rejected": -3.046642780303955, - "logps/chosen": -0.8733209371566772, - "logps/rejected": -0.9883171916007996, - "loss": 0.526, - "nll_loss": 0.4880569875240326, + "log_odds_chosen": 0.1975608468055725, + "log_odds_ratio": -0.692371666431427, + "logits/chosen": -2.4713737964630127, + "logits/rejected": -2.4652817249298096, + "logps/chosen": -0.8778279423713684, + "logps/rejected": -0.9979953765869141, + "loss": 0.5255, + "nll_loss": 0.4872562289237976, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0436660535633564, - "rewards/margins": 0.005749809555709362, - "rewards/rejected": -0.049415864050388336, + "rewards/chosen": -0.04389139264822006, + "rewards/margins": 0.006008377764374018, + "rewards/rejected": -0.04989977926015854, "step": 610 }, { "epoch": 0.6502359727320398, - "grad_norm": 2.068974001178546, + "grad_norm": 2.040129936950799, "learning_rate": 8.03219328902499e-06, - "log_odds_chosen": 0.17991718649864197, - "log_odds_ratio": -0.7055822610855103, - "logits/chosen": -3.045403003692627, - "logits/rejected": -3.0644798278808594, - "logps/chosen": -0.8806620836257935, - "logps/rejected": -1.0145095586776733, - "loss": 0.5295, - "nll_loss": 0.5151625275611877, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.04403311014175415, - "rewards/margins": 0.006692370865494013, - "rewards/rejected": -0.05072547867894173, + "log_odds_chosen": 0.1849410980939865, + "log_odds_ratio": -0.7018038630485535, + "logits/chosen": -2.539135694503784, + "logits/rejected": -2.500748872756958, + "logps/chosen": -0.8882759213447571, + "logps/rejected": -1.0186254978179932, + "loss": 0.5299, + "nll_loss": 0.5155984163284302, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.04441379755735397, + "rewards/margins": 0.006517473608255386, + "rewards/rejected": -0.05093127489089966, "step": 620 }, { "epoch": 0.6607236497115889, - "grad_norm": 1.9705491215328443, + "grad_norm": 2.2341440675434683, "learning_rate": 7.968190728895958e-06, - "log_odds_chosen": 0.23948292434215546, - "log_odds_ratio": -0.6947344541549683, - "logits/chosen": -3.016519546508789, - "logits/rejected": -3.042133331298828, - "logps/chosen": -0.8557758331298828, - "logps/rejected": -1.0029237270355225, - "loss": 0.5331, - "nll_loss": 0.5245988368988037, - "rewards/accuracies": 0.53125, - "rewards/chosen": -0.0427887924015522, - "rewards/margins": 0.007357400842010975, - "rewards/rejected": -0.0501461923122406, + "log_odds_chosen": 0.2307681292295456, + "log_odds_ratio": -0.7022296786308289, + "logits/chosen": -2.498748779296875, + "logits/rejected": -2.490837812423706, + "logps/chosen": -0.8587957620620728, + "logps/rejected": -1.0074841976165771, + "loss": 0.5336, + "nll_loss": 0.5248268842697144, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.04293978586792946, + "rewards/margins": 0.007434426806867123, + "rewards/rejected": -0.050374217331409454, "step": 630 }, { "epoch": 0.6712113266911379, - "grad_norm": 2.664256522681278, + "grad_norm": 2.376219217041332, "learning_rate": 7.905694150420949e-06, - "log_odds_chosen": 0.3717094659805298, - "log_odds_ratio": -0.6480633020401001, - "logits/chosen": -3.0543761253356934, - "logits/rejected": -3.0751733779907227, - "logps/chosen": -0.8645519018173218, - "logps/rejected": -1.102386713027954, - "loss": 0.5149, - "nll_loss": 0.46133953332901, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.04322759807109833, - "rewards/margins": 0.011891739442944527, - "rewards/rejected": -0.0551193431019783, + "log_odds_chosen": 0.2888760268688202, + "log_odds_ratio": -0.6808607578277588, + "logits/chosen": -2.4707798957824707, + "logits/rejected": -2.437269687652588, + "logps/chosen": -0.8727089166641235, + "logps/rejected": -1.049213171005249, + "loss": 0.5154, + "nll_loss": 0.46024101972579956, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.043635450303554535, + "rewards/margins": 0.008825212717056274, + "rewards/rejected": -0.05246065929532051, "step": 640 }, { "epoch": 0.6816990036706869, - "grad_norm": 1.878621524799117, + "grad_norm": 1.8230434305650385, "learning_rate": 7.844645405527363e-06, - "log_odds_chosen": 0.1861819326877594, - "log_odds_ratio": -0.7022497057914734, - "logits/chosen": -3.0863146781921387, - "logits/rejected": -3.113098621368408, - "logps/chosen": -0.8403372764587402, - "logps/rejected": -0.9548438191413879, - "loss": 0.5336, - "nll_loss": 0.5122831463813782, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.04201686754822731, - "rewards/margins": 0.0057253288105130196, - "rewards/rejected": -0.047742195427417755, + "log_odds_chosen": 0.197784885764122, + "log_odds_ratio": -0.706741988658905, + "logits/chosen": -2.497851848602295, + "logits/rejected": -2.4905192852020264, + "logps/chosen": -0.8444819450378418, + "logps/rejected": -0.975223183631897, + "loss": 0.5333, + "nll_loss": 0.5124696493148804, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.04222410172224045, + "rewards/margins": 0.006537059787660837, + "rewards/rejected": -0.048761166632175446, "step": 650 }, { "epoch": 0.6921866806502359, - "grad_norm": 1.8977100039056058, + "grad_norm": 1.8970235810963871, "learning_rate": 7.78498944161523e-06, - "log_odds_chosen": 0.2854728400707245, - "log_odds_ratio": -0.6552462577819824, - "logits/chosen": -3.052263021469116, - "logits/rejected": -3.0898962020874023, - "logps/chosen": -0.8826674222946167, - "logps/rejected": -1.0711818933486938, - "loss": 0.5304, - "nll_loss": 0.4874996542930603, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.044133372604846954, - "rewards/margins": 0.009425725787878036, - "rewards/rejected": -0.05355909466743469, + "log_odds_chosen": 0.3309662640094757, + "log_odds_ratio": -0.6530941724777222, + "logits/chosen": -2.5231690406799316, + "logits/rejected": -2.494469404220581, + "logps/chosen": -0.8999967575073242, + "logps/rejected": -1.128688097000122, + "loss": 0.5297, + "nll_loss": 0.4878067970275879, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.04499983415007591, + "rewards/margins": 0.01143457181751728, + "rewards/rejected": -0.05643441155552864, "step": 660 }, { "epoch": 0.702674357629785, - "grad_norm": 1.8195731091765575, + "grad_norm": 1.8146696004735343, "learning_rate": 7.726674092862559e-06, - "log_odds_chosen": 0.4364054203033447, - "log_odds_ratio": -0.6321254968643188, - "logits/chosen": -2.9931445121765137, - "logits/rejected": -3.025317907333374, - "logps/chosen": -0.8416171073913574, - "logps/rejected": -1.1292223930358887, - "loss": 0.5237, - "nll_loss": 0.46936100721359253, - "rewards/accuracies": 0.643750011920929, - "rewards/chosen": -0.04208085685968399, - "rewards/margins": 0.014380265958607197, - "rewards/rejected": -0.05646112561225891, + "log_odds_chosen": 0.45232027769088745, + "log_odds_ratio": -0.6299984455108643, + "logits/chosen": -2.4613699913024902, + "logits/rejected": -2.447711944580078, + "logps/chosen": -0.8380166292190552, + "logps/rejected": -1.143587350845337, + "loss": 0.524, + "nll_loss": 0.4693591594696045, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.04190083220601082, + "rewards/margins": 0.015278531238436699, + "rewards/rejected": -0.05717936158180237, "step": 670 }, { "epoch": 0.713162034609334, - "grad_norm": 2.0599075037830192, + "grad_norm": 2.112620721807998, "learning_rate": 7.669649888473705e-06, - "log_odds_chosen": 0.31395241618156433, - "log_odds_ratio": -0.650139570236206, - "logits/chosen": -2.9855525493621826, - "logits/rejected": -2.9897267818450928, - "logps/chosen": -0.8750125169754028, - "logps/rejected": -1.0669299364089966, - "loss": 0.5075, - "nll_loss": 0.4943002760410309, + "log_odds_chosen": 0.31269508600234985, + "log_odds_ratio": -0.6543976664543152, + "logits/chosen": -2.464507818222046, + "logits/rejected": -2.418670892715454, + "logps/chosen": -0.871087908744812, + "logps/rejected": -1.055972695350647, + "loss": 0.5087, + "nll_loss": 0.4947647452354431, "rewards/accuracies": 0.6312500238418579, - "rewards/chosen": -0.04375062882900238, - "rewards/margins": 0.009595867246389389, - "rewards/rejected": -0.05334649235010147, + "rewards/chosen": -0.0435543991625309, + "rewards/margins": 0.009244237095117569, + "rewards/rejected": -0.05279862880706787, "step": 680 }, { "epoch": 0.723649711588883, - "grad_norm": 1.8347271674417223, + "grad_norm": 1.8678427871613372, "learning_rate": 7.61386987626881e-06, - "log_odds_chosen": 0.18291696906089783, - "log_odds_ratio": -0.7239105701446533, - "logits/chosen": -2.97595477104187, - "logits/rejected": -2.991725444793701, - "logps/chosen": -0.8641953468322754, - "logps/rejected": -0.9991108179092407, - "loss": 0.5304, - "nll_loss": 0.5499680638313293, + "log_odds_chosen": 0.1339389979839325, + "log_odds_ratio": -0.7375361919403076, + "logits/chosen": -2.5094785690307617, + "logits/rejected": -2.479645252227783, + "logps/chosen": -0.86939537525177, + "logps/rejected": -0.9705018997192383, + "loss": 0.5313, + "nll_loss": 0.5503351092338562, "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.04320976510643959, - "rewards/margins": 0.006745772901922464, - "rewards/rejected": -0.04995553940534592, + "rewards/chosen": -0.04346977174282074, + "rewards/margins": 0.005055318586528301, + "rewards/rejected": -0.04852508753538132, "step": 690 }, { "epoch": 0.7341373885684321, - "grad_norm": 2.2852704943230915, + "grad_norm": 2.2093003307729235, "learning_rate": 7.559289460184545e-06, - "log_odds_chosen": 0.3105728030204773, - "log_odds_ratio": -0.6319602727890015, - "logits/chosen": -2.985989809036255, - "logits/rejected": -3.0209579467773438, - "logps/chosen": -0.8320032358169556, - "logps/rejected": -1.0303562879562378, - "loss": 0.5296, - "nll_loss": 0.5422422885894775, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.04160016402602196, - "rewards/margins": 0.009917653165757656, - "rewards/rejected": -0.05151782184839249, + "log_odds_chosen": 0.31829774379730225, + "log_odds_ratio": -0.6285902261734009, + "logits/chosen": -2.4719974994659424, + "logits/rejected": -2.4300436973571777, + "logps/chosen": -0.8187413215637207, + "logps/rejected": -1.0199077129364014, + "loss": 0.5297, + "nll_loss": 0.5423263907432556, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.04093705862760544, + "rewards/margins": 0.010058322921395302, + "rewards/rejected": -0.05099538713693619, + "step": 700 + }, + { + "epoch": 0.7341373885684321, + "eval_log_odds_chosen": 0.3407081067562103, + "eval_log_odds_ratio": -0.6351403594017029, + "eval_logits/chosen": -2.4800758361816406, + "eval_logits/rejected": -2.447735548019409, + "eval_logps/chosen": -0.8215174674987793, + "eval_logps/rejected": -1.0422321557998657, + "eval_loss": 0.5191378593444824, + "eval_nll_loss": 0.48380225896835327, + "eval_rewards/accuracies": 0.6309523582458496, + "eval_rewards/chosen": -0.04107587784528732, + "eval_rewards/margins": 0.011035734787583351, + "eval_rewards/rejected": -0.052111607044935226, + "eval_runtime": 136.6803, + "eval_samples_per_second": 14.589, + "eval_steps_per_second": 0.461, "step": 700 }, { "epoch": 0.7446250655479811, - "grad_norm": 1.9768197452256755, + "grad_norm": 1.9677237822159654, "learning_rate": 7.505866250408016e-06, - "log_odds_chosen": 0.2948063015937805, - "log_odds_ratio": -0.6451742649078369, - "logits/chosen": -3.1170597076416016, - "logits/rejected": -3.136089324951172, - "logps/chosen": -0.8415013551712036, - "logps/rejected": -1.0454984903335571, - "loss": 0.5237, - "nll_loss": 0.47949719429016113, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04207506403326988, - "rewards/margins": 0.01019985694438219, - "rewards/rejected": -0.0522749237716198, + "log_odds_chosen": 0.25284355878829956, + "log_odds_ratio": -0.6641759872436523, + "logits/chosen": -2.5103344917297363, + "logits/rejected": -2.492450714111328, + "logps/chosen": -0.8418480753898621, + "logps/rejected": -1.0181466341018677, + "loss": 0.5238, + "nll_loss": 0.4800626337528229, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.04209240525960922, + "rewards/margins": 0.008814921602606773, + "rewards/rejected": -0.050907332450151443, "step": 710 }, { "epoch": 0.7551127425275301, - "grad_norm": 1.905599119477425, + "grad_norm": 1.836122210517328, "learning_rate": 7.4535599249993e-06, - "log_odds_chosen": 0.40306347608566284, - "log_odds_ratio": -0.6352882385253906, - "logits/chosen": -3.064483642578125, - "logits/rejected": -3.087808847427368, - "logps/chosen": -0.7972971200942993, - "logps/rejected": -1.046507477760315, - "loss": 0.5304, - "nll_loss": 0.4636651873588562, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.039864856749773026, - "rewards/margins": 0.012460513040423393, - "rewards/rejected": -0.05232536792755127, + "log_odds_chosen": 0.35044384002685547, + "log_odds_ratio": -0.6476293802261353, + "logits/chosen": -2.4395956993103027, + "logits/rejected": -2.4294960498809814, + "logps/chosen": -0.7990450859069824, + "logps/rejected": -1.0232374668121338, + "loss": 0.5307, + "nll_loss": 0.4627358317375183, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.03995225578546524, + "rewards/margins": 0.01120961643755436, + "rewards/rejected": -0.05116187408566475, "step": 720 }, { "epoch": 0.7656004195070791, - "grad_norm": 2.19124615484763, + "grad_norm": 2.1971858392846007, "learning_rate": 7.402332101976053e-06, - "log_odds_chosen": 0.12367966026067734, - "log_odds_ratio": -0.7226089239120483, - "logits/chosen": -3.0835583209991455, - "logits/rejected": -3.0826332569122314, - "logps/chosen": -0.8365408778190613, - "logps/rejected": -0.9029885530471802, - "loss": 0.5374, - "nll_loss": 0.5031268000602722, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.04182704538106918, - "rewards/margins": 0.0033223754726350307, - "rewards/rejected": -0.04514942690730095, + "log_odds_chosen": 0.119834303855896, + "log_odds_ratio": -0.7195987701416016, + "logits/chosen": -2.4724977016448975, + "logits/rejected": -2.466116189956665, + "logps/chosen": -0.8342846035957336, + "logps/rejected": -0.9009860754013062, + "loss": 0.5373, + "nll_loss": 0.5026193857192993, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04171422868967056, + "rewards/margins": 0.003335078712552786, + "rewards/rejected": -0.045049309730529785, "step": 730 }, { "epoch": 0.7760880964866282, - "grad_norm": 2.0835998895674837, + "grad_norm": 2.0508082994645074, "learning_rate": 7.352146220938079e-06, - "log_odds_chosen": 0.33691075444221497, - "log_odds_ratio": -0.6264201402664185, - "logits/chosen": -3.1278512477874756, - "logits/rejected": -3.139995574951172, - "logps/chosen": -0.8067742586135864, - "logps/rejected": -1.0221493244171143, - "loss": 0.5312, - "nll_loss": 0.4790155291557312, - "rewards/accuracies": 0.668749988079071, - "rewards/chosen": -0.040338706225156784, - "rewards/margins": 0.010768752545118332, - "rewards/rejected": -0.051107458770275116, + "log_odds_chosen": 0.3144014775753021, + "log_odds_ratio": -0.6337074041366577, + "logits/chosen": -2.502380847930908, + "logits/rejected": -2.4844462871551514, + "logps/chosen": -0.8134506940841675, + "logps/rejected": -1.0163192749023438, + "loss": 0.5318, + "nll_loss": 0.4793321192264557, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.040672533214092255, + "rewards/margins": 0.010143419727683067, + "rewards/rejected": -0.05081595852971077, "step": 740 }, { "epoch": 0.7865757734661772, - "grad_norm": 1.9667031119071154, + "grad_norm": 1.9563195894092036, "learning_rate": 7.3029674334022146e-06, - "log_odds_chosen": 0.23670358955860138, - "log_odds_ratio": -0.6752098202705383, - "logits/chosen": -3.1056113243103027, - "logits/rejected": -3.1298460960388184, - "logps/chosen": -0.8614869117736816, - "logps/rejected": -0.9949930310249329, - "loss": 0.5426, - "nll_loss": 0.4975660443305969, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0430743470788002, - "rewards/margins": 0.006675302051007748, - "rewards/rejected": -0.049749650061130524, + "log_odds_chosen": 0.26552754640579224, + "log_odds_ratio": -0.6666983366012573, + "logits/chosen": -2.502037763595581, + "logits/rejected": -2.4685168266296387, + "logps/chosen": -0.8672981262207031, + "logps/rejected": -1.0235345363616943, + "loss": 0.5431, + "nll_loss": 0.4982251226902008, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.043364908546209335, + "rewards/margins": 0.007811821065843105, + "rewards/rejected": -0.05117672681808472, "step": 750 }, { "epoch": 0.7970634504457262, - "grad_norm": 1.8638714551633075, + "grad_norm": 1.7911295245119114, "learning_rate": 7.254762501100117e-06, - "log_odds_chosen": 0.2394195795059204, - "log_odds_ratio": -0.6686865091323853, - "logits/chosen": -3.092322826385498, - "logits/rejected": -3.0998446941375732, - "logps/chosen": -0.8189753293991089, - "logps/rejected": -0.9735254049301147, - "loss": 0.5115, - "nll_loss": 0.4049908220767975, - "rewards/accuracies": 0.5687500238418579, - "rewards/chosen": -0.0409487709403038, - "rewards/margins": 0.007727508433163166, - "rewards/rejected": -0.048676274716854095, + "log_odds_chosen": 0.2668423354625702, + "log_odds_ratio": -0.6578959226608276, + "logits/chosen": -2.4371020793914795, + "logits/rejected": -2.417811870574951, + "logps/chosen": -0.812160849571228, + "logps/rejected": -0.9835097193717957, + "loss": 0.5124, + "nll_loss": 0.4057600498199463, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04060804471373558, + "rewards/margins": 0.008567440323531628, + "rewards/rejected": -0.04917549341917038, "step": 760 }, { "epoch": 0.8075511274252754, - "grad_norm": 2.098087236150393, + "grad_norm": 2.141524900160083, "learning_rate": 7.207499701564472e-06, - "log_odds_chosen": 0.21572642028331757, - "log_odds_ratio": -0.7029857635498047, - "logits/chosen": -3.0059127807617188, - "logits/rejected": -3.0258781909942627, - "logps/chosen": -0.8941653370857239, - "logps/rejected": -1.0438942909240723, - "loss": 0.5343, - "nll_loss": 0.5011810064315796, - "rewards/accuracies": 0.48750001192092896, - "rewards/chosen": -0.044708263128995895, - "rewards/margins": 0.007486448623239994, - "rewards/rejected": -0.05219471454620361, + "log_odds_chosen": 0.21438488364219666, + "log_odds_ratio": -0.7001821994781494, + "logits/chosen": -2.432426929473877, + "logits/rejected": -2.4075264930725098, + "logps/chosen": -0.8868153691291809, + "logps/rejected": -1.0428266525268555, + "loss": 0.5341, + "nll_loss": 0.5013958811759949, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.044340766966342926, + "rewards/margins": 0.007800562772899866, + "rewards/rejected": -0.052141331136226654, "step": 770 }, { "epoch": 0.8180388044048243, - "grad_norm": 1.908201970451478, + "grad_norm": 2.177116882955384, "learning_rate": 7.1611487403943295e-06, - "log_odds_chosen": 0.22588184475898743, - "log_odds_ratio": -0.6703106164932251, - "logits/chosen": -3.0057101249694824, - "logits/rejected": -3.0319108963012695, - "logps/chosen": -0.8802768588066101, - "logps/rejected": -0.997613787651062, - "loss": 0.5466, - "nll_loss": 0.5490036606788635, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.0440138503909111, - "rewards/margins": 0.005866837687790394, - "rewards/rejected": -0.04988069087266922, + "log_odds_chosen": 0.20159511268138885, + "log_odds_ratio": -0.6788761019706726, + "logits/chosen": -2.4562764167785645, + "logits/rejected": -2.4499940872192383, + "logps/chosen": -0.883618950843811, + "logps/rejected": -0.998609721660614, + "loss": 0.5476, + "nll_loss": 0.549780547618866, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.044180940836668015, + "rewards/margins": 0.0057495469227433205, + "rewards/rejected": -0.04993049427866936, "step": 780 }, { "epoch": 0.8285264813843733, - "grad_norm": 1.8452821315553456, + "grad_norm": 1.9065399955928464, "learning_rate": 7.115680669648201e-06, - "log_odds_chosen": 0.32251420617103577, - "log_odds_ratio": -0.6489396691322327, - "logits/chosen": -2.991415500640869, - "logits/rejected": -3.0075478553771973, - "logps/chosen": -0.8143788576126099, - "logps/rejected": -1.0171436071395874, - "loss": 0.5052, - "nll_loss": 0.4423222541809082, + "log_odds_chosen": 0.3184022307395935, + "log_odds_ratio": -0.6474640965461731, + "logits/chosen": -2.4409756660461426, + "logits/rejected": -2.4378621578216553, + "logps/chosen": -0.8189239501953125, + "logps/rejected": -1.0288126468658447, + "loss": 0.5063, + "nll_loss": 0.4424379765987396, "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04071894288063049, - "rewards/margins": 0.010138243436813354, - "rewards/rejected": -0.05085718631744385, + "rewards/chosen": -0.04094620421528816, + "rewards/margins": 0.010494431480765343, + "rewards/rejected": -0.051440637558698654, "step": 790 }, { "epoch": 0.8390141583639223, - "grad_norm": 2.099723593564682, + "grad_norm": 2.0323396486068086, "learning_rate": 7.0710678118654756e-06, - "log_odds_chosen": 0.4498319625854492, - "log_odds_ratio": -0.5986544489860535, - "logits/chosen": -2.9999208450317383, - "logits/rejected": -2.9963490962982178, - "logps/chosen": -0.782555341720581, - "logps/rejected": -1.068285584449768, - "loss": 0.5173, - "nll_loss": 0.4201901853084564, - "rewards/accuracies": 0.65625, - "rewards/chosen": -0.03912776708602905, - "rewards/margins": 0.014286505989730358, - "rewards/rejected": -0.053414274007081985, + "log_odds_chosen": 0.4407121241092682, + "log_odds_ratio": -0.6011781096458435, + "logits/chosen": -2.411447048187256, + "logits/rejected": -2.389554500579834, + "logps/chosen": -0.7789972424507141, + "logps/rejected": -1.0607415437698364, + "loss": 0.5184, + "nll_loss": 0.4200369715690613, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.03894985839724541, + "rewards/margins": 0.014087215065956116, + "rewards/rejected": -0.053037069737911224, + "step": 800 + }, + { + "epoch": 0.8390141583639223, + "eval_log_odds_chosen": 0.37830010056495667, + "eval_log_odds_ratio": -0.6304489970207214, + "eval_logits/chosen": -2.4921600818634033, + "eval_logits/rejected": -2.4575421810150146, + "eval_logps/chosen": -0.8178579211235046, + "eval_logps/rejected": -1.0646613836288452, + "eval_loss": 0.5137735605239868, + "eval_nll_loss": 0.4796440601348877, + "eval_rewards/accuracies": 0.6309523582458496, + "eval_rewards/chosen": -0.04089289531111717, + "eval_rewards/margins": 0.012340176850557327, + "eval_rewards/rejected": -0.0532330721616745, + "eval_runtime": 136.422, + "eval_samples_per_second": 14.616, + "eval_steps_per_second": 0.462, "step": 800 }, { "epoch": 0.8495018353434715, - "grad_norm": 1.9010573028789273, + "grad_norm": 1.8282488507148427, "learning_rate": 7.027283689263066e-06, - "log_odds_chosen": 0.34422335028648376, - "log_odds_ratio": -0.6322020292282104, - "logits/chosen": -3.0011842250823975, - "logits/rejected": -2.9966137409210205, - "logps/chosen": -0.8086786270141602, - "logps/rejected": -1.0155996084213257, - "loss": 0.5132, - "nll_loss": 0.4740920066833496, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.040433935821056366, - "rewards/margins": 0.010346042923629284, - "rewards/rejected": -0.05077998712658882, + "log_odds_chosen": 0.34346696734428406, + "log_odds_ratio": -0.6324015259742737, + "logits/chosen": -2.4684414863586426, + "logits/rejected": -2.419478178024292, + "logps/chosen": -0.7967440485954285, + "logps/rejected": -1.0016282796859741, + "loss": 0.513, + "nll_loss": 0.4749225676059723, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03983720391988754, + "rewards/margins": 0.0102442167699337, + "rewards/rejected": -0.050081413239240646, "step": 810 }, { "epoch": 0.8599895123230205, - "grad_norm": 2.3144073315770353, + "grad_norm": 2.4353704194820343, "learning_rate": 6.984302957695783e-06, - "log_odds_chosen": 0.29515784978866577, - "log_odds_ratio": -0.6521409749984741, - "logits/chosen": -2.943692445755005, - "logits/rejected": -2.9414219856262207, - "logps/chosen": -0.8414862751960754, - "logps/rejected": -1.0143965482711792, - "loss": 0.504, - "nll_loss": 0.4271189570426941, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04207431524991989, - "rewards/margins": 0.008645516820251942, - "rewards/rejected": -0.05071982741355896, + "log_odds_chosen": 0.3245043158531189, + "log_odds_ratio": -0.643069863319397, + "logits/chosen": -2.3819215297698975, + "logits/rejected": -2.349990129470825, + "logps/chosen": -0.8364176750183105, + "logps/rejected": -1.031416654586792, + "loss": 0.5047, + "nll_loss": 0.42748355865478516, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.041820887476205826, + "rewards/margins": 0.009749949909746647, + "rewards/rejected": -0.0515708327293396, "step": 820 }, { "epoch": 0.8704771893025695, - "grad_norm": 2.371001107698096, + "grad_norm": 2.34776921636757, "learning_rate": 6.942101345006233e-06, - "log_odds_chosen": 0.2455742061138153, - "log_odds_ratio": -0.7013689279556274, - "logits/chosen": -2.933568239212036, - "logits/rejected": -2.977832794189453, - "logps/chosen": -0.8553229570388794, - "logps/rejected": -1.0332233905792236, + "log_odds_chosen": 0.2353450506925583, + "log_odds_ratio": -0.7053114175796509, + "logits/chosen": -2.4152169227600098, + "logits/rejected": -2.4146676063537598, + "logps/chosen": -0.8572267293930054, + "logps/rejected": -1.0250964164733887, "loss": 0.5251, - "nll_loss": 0.46586036682128906, - "rewards/accuracies": 0.53125, - "rewards/chosen": -0.04276614636182785, - "rewards/margins": 0.008895025588572025, - "rewards/rejected": -0.0516611710190773, + "nll_loss": 0.4658161699771881, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.042861342430114746, + "rewards/margins": 0.008393481373786926, + "rewards/rejected": -0.051254820078611374, "step": 830 }, { "epoch": 0.8809648662821186, - "grad_norm": 1.977587507180873, + "grad_norm": 1.9903482887340351, "learning_rate": 6.900655593423542e-06, - "log_odds_chosen": 0.19387319684028625, - "log_odds_ratio": -0.6939007639884949, - "logits/chosen": -2.9483094215393066, - "logits/rejected": -2.966421365737915, - "logps/chosen": -0.8696029782295227, - "logps/rejected": -1.0034617185592651, - "loss": 0.5136, - "nll_loss": 0.48451894521713257, + "log_odds_chosen": 0.19377179443836212, + "log_odds_ratio": -0.6883701086044312, + "logits/chosen": -2.450711727142334, + "logits/rejected": -2.4275918006896973, + "logps/chosen": -0.8693481683731079, + "logps/rejected": -0.9985544085502625, + "loss": 0.5129, + "nll_loss": 0.4828173518180847, "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.043480150401592255, - "rewards/margins": 0.006692938506603241, - "rewards/rejected": -0.050173092633485794, + "rewards/chosen": -0.04346740245819092, + "rewards/margins": 0.006460316479206085, + "rewards/rejected": -0.0499277226626873, "step": 840 }, { "epoch": 0.8914525432616676, - "grad_norm": 2.0931872980265527, + "grad_norm": 2.0572418230575784, "learning_rate": 6.859943405700353e-06, - "log_odds_chosen": 0.27469760179519653, - "log_odds_ratio": -0.6496983170509338, - "logits/chosen": -2.882544994354248, - "logits/rejected": -2.907102584838867, - "logps/chosen": -0.8309645652770996, - "logps/rejected": -0.9983605146408081, - "loss": 0.5054, - "nll_loss": 0.4892002046108246, - "rewards/accuracies": 0.59375, - "rewards/chosen": -0.0415482297539711, - "rewards/margins": 0.008369805291295052, - "rewards/rejected": -0.049918033182621, + "log_odds_chosen": 0.2696766257286072, + "log_odds_ratio": -0.6502133011817932, + "logits/chosen": -2.4989540576934814, + "logits/rejected": -2.480175018310547, + "logps/chosen": -0.8377168774604797, + "logps/rejected": -1.0055049657821655, + "loss": 0.5056, + "nll_loss": 0.49036240577697754, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.041885845363140106, + "rewards/margins": 0.008389403112232685, + "rewards/rejected": -0.05027524754405022, "step": 850 }, { "epoch": 0.9019402202412166, - "grad_norm": 1.9059523373512675, + "grad_norm": 1.9654607653654201, "learning_rate": 6.819943394704736e-06, - "log_odds_chosen": 0.2372780740261078, - "log_odds_ratio": -0.6811105012893677, - "logits/chosen": -2.9579243659973145, - "logits/rejected": -2.9706907272338867, - "logps/chosen": -0.8282278180122375, - "logps/rejected": -0.982342541217804, - "loss": 0.5277, - "nll_loss": 0.4725598692893982, - "rewards/accuracies": 0.543749988079071, - "rewards/chosen": -0.0414113886654377, - "rewards/margins": 0.007705743424594402, - "rewards/rejected": -0.049117133021354675, + "log_odds_chosen": 0.21943990886211395, + "log_odds_ratio": -0.6898983716964722, + "logits/chosen": -2.5337636470794678, + "logits/rejected": -2.546189546585083, + "logps/chosen": -0.8453003168106079, + "logps/rejected": -0.9918826222419739, + "loss": 0.5285, + "nll_loss": 0.4758750796318054, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.042265016585588455, + "rewards/margins": 0.007329112850129604, + "rewards/rejected": -0.049594126641750336, "step": 860 }, { "epoch": 0.9124278972207656, - "grad_norm": 1.892543797666968, + "grad_norm": 1.9335503237781204, "learning_rate": 6.780635036208105e-06, - "log_odds_chosen": 0.287548691034317, - "log_odds_ratio": -0.6644268035888672, - "logits/chosen": -3.0049710273742676, - "logits/rejected": -3.0431902408599854, - "logps/chosen": -0.8620280027389526, - "logps/rejected": -1.0551369190216064, - "loss": 0.4935, - "nll_loss": 0.4828346371650696, - "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04310140386223793, - "rewards/margins": 0.009655444882810116, - "rewards/rejected": -0.05275684595108032, + "log_odds_chosen": 0.29112187027931213, + "log_odds_ratio": -0.6706128120422363, + "logits/chosen": -2.535698890686035, + "logits/rejected": -2.5173022747039795, + "logps/chosen": -0.8680477142333984, + "logps/rejected": -1.0733777284622192, + "loss": 0.4939, + "nll_loss": 0.4834807515144348, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.04340239241719246, + "rewards/margins": 0.010266497731208801, + "rewards/rejected": -0.05366888642311096, "step": 870 }, { "epoch": 0.9229155742003147, - "grad_norm": 1.6128728864363475, + "grad_norm": 1.6238877353235468, "learning_rate": 6.741998624632421e-06, - "log_odds_chosen": 0.2844703197479248, - "log_odds_ratio": -0.6617631316184998, - "logits/chosen": -3.044353723526001, - "logits/rejected": -3.0480034351348877, - "logps/chosen": -0.808245837688446, - "logps/rejected": -0.990073561668396, - "loss": 0.4881, - "nll_loss": 0.43747878074645996, - "rewards/accuracies": 0.606249988079071, - "rewards/chosen": -0.0404122956097126, - "rewards/margins": 0.009091392159461975, - "rewards/rejected": -0.04950368404388428, + "log_odds_chosen": 0.29792147874832153, + "log_odds_ratio": -0.6587230563163757, + "logits/chosen": -2.533979892730713, + "logits/rejected": -2.498964548110962, + "logps/chosen": -0.8105006217956543, + "logps/rejected": -0.999383807182312, + "loss": 0.4883, + "nll_loss": 0.4381064474582672, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.040525030344724655, + "rewards/margins": 0.009444162249565125, + "rewards/rejected": -0.04996918886899948, "step": 880 }, { "epoch": 0.9334032511798637, - "grad_norm": 2.329046484142618, + "grad_norm": 1.7501205893349534, "learning_rate": 6.70401523153991e-06, - "log_odds_chosen": 0.32051050662994385, - "log_odds_ratio": -0.6461818218231201, - "logits/chosen": -3.0071539878845215, - "logits/rejected": -3.0232186317443848, - "logps/chosen": -0.8105939030647278, - "logps/rejected": -0.993729293346405, - "loss": 0.4935, - "nll_loss": 0.46434158086776733, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.04052969440817833, - "rewards/margins": 0.00915677472949028, - "rewards/rejected": -0.04968646913766861, + "log_odds_chosen": 0.3352048397064209, + "log_odds_ratio": -0.6494973301887512, + "logits/chosen": -2.5071699619293213, + "logits/rejected": -2.488619327545166, + "logps/chosen": -0.811043381690979, + "logps/rejected": -1.0027059316635132, + "loss": 0.4932, + "nll_loss": 0.4646259844303131, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.04055216908454895, + "rewards/margins": 0.0095831248909235, + "rewards/rejected": -0.0501352921128273, "step": 890 }, { "epoch": 0.9438909281594127, - "grad_norm": 2.0086740635642073, + "grad_norm": 1.9660241717665867, "learning_rate": 6.666666666666667e-06, - "log_odds_chosen": 0.2798821032047272, - "log_odds_ratio": -0.664302408695221, - "logits/chosen": -2.9259209632873535, - "logits/rejected": -2.9381814002990723, - "logps/chosen": -0.7818757891654968, - "logps/rejected": -0.9571603536605835, - "loss": 0.5239, - "nll_loss": 0.4661863446235657, - "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.03909378498792648, - "rewards/margins": 0.008764232508838177, - "rewards/rejected": -0.04785802215337753, + "log_odds_chosen": 0.2810109555721283, + "log_odds_ratio": -0.6615744829177856, + "logits/chosen": -2.5149343013763428, + "logits/rejected": -2.5029118061065674, + "logps/chosen": -0.7809039354324341, + "logps/rejected": -0.9545317888259888, + "loss": 0.5235, + "nll_loss": 0.4655960202217102, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.039045192301273346, + "rewards/margins": 0.008681395091116428, + "rewards/rejected": -0.0477265901863575, + "step": 900 + }, + { + "epoch": 0.9438909281594127, + "eval_log_odds_chosen": 0.3305439352989197, + "eval_log_odds_ratio": -0.6379230618476868, + "eval_logits/chosen": -2.5633676052093506, + "eval_logits/rejected": -2.533735752105713, + "eval_logps/chosen": -0.8084598779678345, + "eval_logps/rejected": -1.0201667547225952, + "eval_loss": 0.5087887644767761, + "eval_nll_loss": 0.4741307497024536, + "eval_rewards/accuracies": 0.6289682388305664, + "eval_rewards/chosen": -0.040422990918159485, + "eval_rewards/margins": 0.010585347190499306, + "eval_rewards/rejected": -0.05100833997130394, + "eval_runtime": 137.3356, + "eval_samples_per_second": 14.519, + "eval_steps_per_second": 0.459, "step": 900 }, { "epoch": 0.9543786051389617, - "grad_norm": 2.068822950454407, + "grad_norm": 1.980970750978968, "learning_rate": 6.629935441317959e-06, - "log_odds_chosen": 0.479647159576416, - "log_odds_ratio": -0.6314842700958252, - "logits/chosen": -2.974902629852295, - "logits/rejected": -2.9787256717681885, - "logps/chosen": -0.8285977244377136, - "logps/rejected": -1.1534996032714844, - "loss": 0.5142, - "nll_loss": 0.46572408080101013, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.04142988473176956, - "rewards/margins": 0.016245096921920776, - "rewards/rejected": -0.05767498165369034, + "log_odds_chosen": 0.4885142743587494, + "log_odds_ratio": -0.6281706094741821, + "logits/chosen": -2.5022482872009277, + "logits/rejected": -2.4692506790161133, + "logps/chosen": -0.8217445611953735, + "logps/rejected": -1.1550103425979614, + "loss": 0.5139, + "nll_loss": 0.46534866094589233, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.04108722507953644, + "rewards/margins": 0.016663286834955215, + "rewards/rejected": -0.05775051191449165, "step": 910 }, { "epoch": 0.9648662821185108, - "grad_norm": 1.9606527520032064, + "grad_norm": 1.9369311282677693, "learning_rate": 6.593804733957872e-06, - "log_odds_chosen": 0.3219223618507385, - "log_odds_ratio": -0.649006187915802, - "logits/chosen": -2.895038604736328, - "logits/rejected": -2.9138269424438477, - "logps/chosen": -0.7895429134368896, - "logps/rejected": -0.9961126446723938, - "loss": 0.4837, - "nll_loss": 0.43109196424484253, - "rewards/accuracies": 0.6187499761581421, - "rewards/chosen": -0.0394771471619606, - "rewards/margins": 0.010328484699130058, - "rewards/rejected": -0.04980562627315521, + "log_odds_chosen": 0.30783259868621826, + "log_odds_ratio": -0.6484240293502808, + "logits/chosen": -2.43939208984375, + "logits/rejected": -2.4428539276123047, + "logps/chosen": -0.7850558161735535, + "logps/rejected": -0.9770357012748718, + "loss": 0.4836, + "nll_loss": 0.4291355013847351, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.03925279527902603, + "rewards/margins": 0.009598996490240097, + "rewards/rejected": -0.04885178059339523, "step": 920 }, { "epoch": 0.9753539590980598, - "grad_norm": 2.2191050074705405, + "grad_norm": 2.1104816487301523, "learning_rate": 6.55825835783953e-06, - "log_odds_chosen": 0.21952304244041443, - "log_odds_ratio": -0.6805615425109863, - "logits/chosen": -2.8973617553710938, - "logits/rejected": -2.900251865386963, - "logps/chosen": -0.8730388879776001, - "logps/rejected": -1.0255097150802612, - "loss": 0.5135, - "nll_loss": 0.5237925052642822, + "log_odds_chosen": 0.2358274906873703, + "log_odds_ratio": -0.6784078478813171, + "logits/chosen": -2.5169990062713623, + "logits/rejected": -2.4993927478790283, + "logps/chosen": -0.8740841746330261, + "logps/rejected": -1.0411127805709839, + "loss": 0.5133, + "nll_loss": 0.5242566466331482, "rewards/accuracies": 0.581250011920929, - "rewards/chosen": -0.04365193843841553, - "rewards/margins": 0.007623549550771713, - "rewards/rejected": -0.05127548426389694, + "rewards/chosen": -0.04370420426130295, + "rewards/margins": 0.008351435884833336, + "rewards/rejected": -0.05205564573407173, "step": 930 }, { "epoch": 0.9858416360776088, - "grad_norm": 1.9816052115352747, + "grad_norm": 1.9809353614537575, "learning_rate": 6.523280730534423e-06, - "log_odds_chosen": 0.2554723024368286, - "log_odds_ratio": -0.6887288689613342, - "logits/chosen": -2.93623685836792, - "logits/rejected": -2.9283607006073, - "logps/chosen": -0.7786284685134888, - "logps/rejected": -0.9273189306259155, - "loss": 0.5095, - "nll_loss": 0.4773116111755371, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": -0.03893141821026802, - "rewards/margins": 0.007434530649334192, - "rewards/rejected": -0.046365950256586075, + "log_odds_chosen": 0.25125178694725037, + "log_odds_ratio": -0.6891010403633118, + "logits/chosen": -2.52742862701416, + "logits/rejected": -2.5163397789001465, + "logps/chosen": -0.7782126069068909, + "logps/rejected": -0.9240644574165344, + "loss": 0.5102, + "nll_loss": 0.478424072265625, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.038910627365112305, + "rewards/margins": 0.007292595691978931, + "rewards/rejected": -0.04620322585105896, "step": 940 }, { "epoch": 0.9963293130571579, - "grad_norm": 2.074452011854083, + "grad_norm": 2.052392197513496, "learning_rate": 6.488856845230502e-06, - "log_odds_chosen": 0.2605803310871124, - "log_odds_ratio": -0.6914502382278442, - "logits/chosen": -2.9090209007263184, - "logits/rejected": -2.9163012504577637, - "logps/chosen": -0.8585780262947083, - "logps/rejected": -1.0175925493240356, + "log_odds_chosen": 0.22615018486976624, + "log_odds_ratio": -0.7002879977226257, + "logits/chosen": -2.4786956310272217, + "logits/rejected": -2.447265625, + "logps/chosen": -0.8600684404373169, + "logps/rejected": -0.9971193075180054, "loss": 0.5383, - "nll_loss": 0.503527045249939, + "nll_loss": 0.5037115812301636, "rewards/accuracies": 0.5625, - "rewards/chosen": -0.04292890429496765, - "rewards/margins": 0.007950720377266407, - "rewards/rejected": -0.050879620015621185, + "rewards/chosen": -0.043003425002098083, + "rewards/margins": 0.006852544844150543, + "rewards/rejected": -0.04985596612095833, "step": 950 }, { "epoch": 0.9994756161510225, "step": 953, "total_flos": 0.0, - "train_loss": 0.56347276506494, - "train_runtime": 19079.6454, - "train_samples_per_second": 3.197, - "train_steps_per_second": 0.05 + "train_loss": 0.5642813587989287, + "train_runtime": 20357.789, + "train_samples_per_second": 2.997, + "train_steps_per_second": 0.047 } ], "logging_steps": 10,