{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994756161510225, "eval_steps": 100, "global_step": 953, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01048767697954903, "grad_norm": 11.269791488706222, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.1659858673810959, "log_odds_ratio": -0.6960253715515137, "logits/chosen": -2.5437328815460205, "logits/rejected": -2.532463550567627, "logps/chosen": -0.9995189905166626, "logps/rejected": -1.0994223356246948, "loss": 2.7426, "nll_loss": 2.6549222469329834, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04997594282031059, "rewards/margins": 0.004995172377675772, "rewards/rejected": -0.0549711212515831, "step": 10 }, { "epoch": 0.02097535395909806, "grad_norm": 3.2083352232231426, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.19043061137199402, "log_odds_ratio": -0.6681476831436157, "logits/chosen": -3.149108409881592, "logits/rejected": -3.1720833778381348, "logps/chosen": -0.7663742303848267, "logps/rejected": -0.8751267194747925, "loss": 0.5628, "nll_loss": 0.5223474502563477, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03831871226429939, "rewards/margins": 0.005437628366053104, "rewards/rejected": -0.04375633969902992, "step": 20 }, { "epoch": 0.03146303093864709, "grad_norm": 2.5438959591852903, "learning_rate": 6e-06, "log_odds_chosen": 0.24195578694343567, "log_odds_ratio": -0.6542765498161316, "logits/chosen": -2.974864959716797, "logits/rejected": -2.9495468139648438, "logps/chosen": -0.8126222491264343, "logps/rejected": -0.9452728033065796, "loss": 0.5332, "nll_loss": 0.49184679985046387, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04063111171126366, "rewards/margins": 0.006632520817220211, "rewards/rejected": -0.04726364091038704, "step": 30 }, { "epoch": 0.04195070791819612, "grad_norm": 2.6387687995337887, "learning_rate": 8.000000000000001e-06, "log_odds_chosen": 0.16362647712230682, "log_odds_ratio": -0.6933655738830566, "logits/chosen": -2.880462408065796, "logits/rejected": -2.8687615394592285, "logps/chosen": -0.804220974445343, "logps/rejected": -0.9210459589958191, "loss": 0.5196, "nll_loss": 0.4802279472351074, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04021105170249939, "rewards/margins": 0.005841248203068972, "rewards/rejected": -0.046052299439907074, "step": 40 }, { "epoch": 0.05243838489774515, "grad_norm": 2.753689850023678, "learning_rate": 1e-05, "log_odds_chosen": 0.285639226436615, "log_odds_ratio": -0.6802313327789307, "logits/chosen": -2.7953293323516846, "logits/rejected": -2.801888942718506, "logps/chosen": -0.786683201789856, "logps/rejected": -0.9665401577949524, "loss": 0.5419, "nll_loss": 0.4841863214969635, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03933415934443474, "rewards/margins": 0.008992847986519337, "rewards/rejected": -0.0483270101249218, "step": 50 }, { "epoch": 0.06292606187729417, "grad_norm": 2.9944776685892003, "learning_rate": 1.2e-05, "log_odds_chosen": 0.18177883327007294, "log_odds_ratio": -0.6903725862503052, "logits/chosen": -2.9931223392486572, "logits/rejected": -2.9918220043182373, "logps/chosen": -0.8297529220581055, "logps/rejected": -0.9411457180976868, "loss": 0.552, "nll_loss": 0.5221412777900696, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.041487645357847214, "rewards/margins": 0.005569641478359699, "rewards/rejected": -0.04705728590488434, "step": 60 }, { "epoch": 0.07341373885684321, "grad_norm": 2.7695397689637704, "learning_rate": 1.4e-05, "log_odds_chosen": 0.18929322063922882, "log_odds_ratio": -0.6986348032951355, "logits/chosen": -2.928518056869507, "logits/rejected": -2.952428102493286, "logps/chosen": -0.8219515085220337, "logps/rejected": -0.9297820925712585, "loss": 0.5396, "nll_loss": 0.5304870009422302, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.041097573935985565, "rewards/margins": 0.00539153628051281, "rewards/rejected": -0.046489108353853226, "step": 70 }, { "epoch": 0.08390141583639224, "grad_norm": 19.07043575642583, "learning_rate": 1.6000000000000003e-05, "log_odds_chosen": 0.18035998940467834, "log_odds_ratio": -0.6837159395217896, "logits/chosen": -2.7761759757995605, "logits/rejected": -2.7504143714904785, "logps/chosen": -0.8980675935745239, "logps/rejected": -1.0327494144439697, "loss": 0.5637, "nll_loss": 0.48639434576034546, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04490337893366814, "rewards/margins": 0.006734092719852924, "rewards/rejected": -0.05163746327161789, "step": 80 }, { "epoch": 0.09438909281594127, "grad_norm": 3.590055499786838, "learning_rate": 1.8e-05, "log_odds_chosen": 0.2686706781387329, "log_odds_ratio": -0.6697625517845154, "logits/chosen": -2.6665635108947754, "logits/rejected": -2.664783239364624, "logps/chosen": -0.8778934478759766, "logps/rejected": -1.0414215326309204, "loss": 0.5547, "nll_loss": 0.49069148302078247, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04389467462897301, "rewards/margins": 0.008176402188837528, "rewards/rejected": -0.05207107588648796, "step": 90 }, { "epoch": 0.1048767697954903, "grad_norm": 3.4892365652397572, "learning_rate": 2e-05, "log_odds_chosen": 0.20862731337547302, "log_odds_ratio": -0.6619225144386292, "logits/chosen": -2.6862692832946777, "logits/rejected": -2.673692226409912, "logps/chosen": -0.9019685983657837, "logps/rejected": -1.0285098552703857, "loss": 0.5707, "nll_loss": 0.5284041166305542, "rewards/accuracies": 0.5625, "rewards/chosen": -0.045098431408405304, "rewards/margins": 0.006327061913907528, "rewards/rejected": -0.051425494253635406, "step": 100 }, { "epoch": 0.1048767697954903, "eval_log_odds_chosen": 0.2601078152656555, "eval_log_odds_ratio": -0.6412674188613892, "eval_logits/chosen": -2.5810625553131104, "eval_logits/rejected": -2.5432214736938477, "eval_logps/chosen": -0.9045050144195557, "eval_logps/rejected": -1.077429175376892, "eval_loss": 1.1267567873001099, "eval_nll_loss": 1.0893229246139526, "eval_rewards/accuracies": 0.636904776096344, "eval_rewards/chosen": -0.045225247740745544, "eval_rewards/margins": 0.008646207861602306, "eval_rewards/rejected": -0.053871456533670425, "eval_runtime": 137.3095, "eval_samples_per_second": 14.522, "eval_steps_per_second": 0.459, "step": 100 }, { "epoch": 0.11536444677503933, "grad_norm": 3.2610517529807947, "learning_rate": 1.9069251784911845e-05, "log_odds_chosen": 0.2603258490562439, "log_odds_ratio": -0.6417919397354126, "logits/chosen": -2.632387399673462, "logits/rejected": -2.6478092670440674, "logps/chosen": -0.8465877771377563, "logps/rejected": -1.0118043422698975, "loss": 0.6247, "nll_loss": 0.5625969171524048, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.04232938587665558, "rewards/margins": 0.008260839618742466, "rewards/rejected": -0.05059022456407547, "step": 110 }, { "epoch": 0.12585212375458835, "grad_norm": 3.1929412426319397, "learning_rate": 1.825741858350554e-05, "log_odds_chosen": 0.242882639169693, "log_odds_ratio": -0.6634533405303955, "logits/chosen": -2.5689873695373535, "logits/rejected": -2.536681652069092, "logps/chosen": -0.8897055387496948, "logps/rejected": -1.0510555505752563, "loss": 0.6122, "nll_loss": 0.5722111463546753, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04448527842760086, "rewards/margins": 0.008067498914897442, "rewards/rejected": -0.05255277082324028, "step": 120 }, { "epoch": 0.1363398007341374, "grad_norm": 2.381017549769141, "learning_rate": 1.7541160386140587e-05, "log_odds_chosen": 0.20046833157539368, "log_odds_ratio": -0.6848769783973694, "logits/chosen": -2.5286340713500977, "logits/rejected": -2.503958225250244, "logps/chosen": -0.914216160774231, "logps/rejected": -1.0454927682876587, "loss": 0.5902, "nll_loss": 0.5541085004806519, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04571080952882767, "rewards/margins": 0.006563832517713308, "rewards/rejected": -0.052274636924266815, "step": 130 }, { "epoch": 0.14682747771368643, "grad_norm": 2.2223756230190213, "learning_rate": 1.6903085094570334e-05, "log_odds_chosen": 0.231459379196167, "log_odds_ratio": -0.659934937953949, "logits/chosen": -2.5273799896240234, "logits/rejected": -2.5001978874206543, "logps/chosen": -0.971345067024231, "logps/rejected": -1.1217668056488037, "loss": 0.5945, "nll_loss": 0.564177393913269, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04856724292039871, "rewards/margins": 0.007521096616983414, "rewards/rejected": -0.05608834698796272, "step": 140 }, { "epoch": 0.15731515469323545, "grad_norm": 3.55513500930042, "learning_rate": 1.6329931618554523e-05, "log_odds_chosen": 0.18197762966156006, "log_odds_ratio": -0.735857367515564, "logits/chosen": -2.5072078704833984, "logits/rejected": -2.4954299926757812, "logps/chosen": -0.9893903732299805, "logps/rejected": -1.1020596027374268, "loss": 0.553, "nll_loss": 0.5451637506484985, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04946952313184738, "rewards/margins": 0.00563345942646265, "rewards/rejected": -0.055102985352277756, "step": 150 }, { "epoch": 0.16780283167278448, "grad_norm": 2.753579339789172, "learning_rate": 1.5811388300841898e-05, "log_odds_chosen": 0.2206648290157318, "log_odds_ratio": -0.6601604223251343, "logits/chosen": -2.54675030708313, "logits/rejected": -2.53303861618042, "logps/chosen": -0.9035905599594116, "logps/rejected": -1.0334583520889282, "loss": 0.6058, "nll_loss": 0.5536268949508667, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04517952725291252, "rewards/margins": 0.006493390537798405, "rewards/rejected": -0.05167291685938835, "step": 160 }, { "epoch": 0.1782905086523335, "grad_norm": 2.4463207326823673, "learning_rate": 1.533929977694741e-05, "log_odds_chosen": 0.3002270460128784, "log_odds_ratio": -0.6512068510055542, "logits/chosen": -2.55534029006958, "logits/rejected": -2.53877592086792, "logps/chosen": -0.8796469569206238, "logps/rejected": -1.063819169998169, "loss": 0.5849, "nll_loss": 0.5501061677932739, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04398234561085701, "rewards/margins": 0.009208607487380505, "rewards/rejected": -0.05319095402956009, "step": 170 }, { "epoch": 0.18877818563188253, "grad_norm": 2.404564536005987, "learning_rate": 1.49071198499986e-05, "log_odds_chosen": 0.2884437143802643, "log_odds_ratio": -0.6566611528396606, "logits/chosen": -2.5615644454956055, "logits/rejected": -2.5457139015197754, "logps/chosen": -0.9158379435539246, "logps/rejected": -1.0882136821746826, "loss": 0.5658, "nll_loss": 0.5478283166885376, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04579189792275429, "rewards/margins": 0.008618785068392754, "rewards/rejected": -0.05441068485379219, "step": 180 }, { "epoch": 0.19926586261143156, "grad_norm": 3.2974100665964885, "learning_rate": 1.4509525002200235e-05, "log_odds_chosen": 0.23702804744243622, "log_odds_ratio": -0.6489595770835876, "logits/chosen": -2.644819498062134, "logits/rejected": -2.6255900859832764, "logps/chosen": -0.9308468103408813, "logps/rejected": -1.0799505710601807, "loss": 0.5902, "nll_loss": 0.6114972829818726, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04654233902692795, "rewards/margins": 0.007455187849700451, "rewards/rejected": -0.053997524082660675, "step": 190 }, { "epoch": 0.2097535395909806, "grad_norm": 2.498750011275506, "learning_rate": 1.4142135623730951e-05, "log_odds_chosen": 0.29194706678390503, "log_odds_ratio": -0.6627270579338074, "logits/chosen": -2.5841925144195557, "logits/rejected": -2.5723748207092285, "logps/chosen": -0.917371928691864, "logps/rejected": -1.126123070716858, "loss": 0.5663, "nll_loss": 0.5702028274536133, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04586859419941902, "rewards/margins": 0.0104375584051013, "rewards/rejected": -0.0563061460852623, "step": 200 }, { "epoch": 0.2097535395909806, "eval_log_odds_chosen": 0.28631675243377686, "eval_log_odds_ratio": -0.644675076007843, "eval_logits/chosen": -2.5596959590911865, "eval_logits/rejected": -2.537684917449951, "eval_logps/chosen": -0.8798824548721313, "eval_logps/rejected": -1.0675764083862305, "eval_loss": 0.5741076469421387, "eval_nll_loss": 0.5351698398590088, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": -0.043994128704071045, "eval_rewards/margins": 0.009384696371853352, "eval_rewards/rejected": -0.05337882414460182, "eval_runtime": 137.7655, "eval_samples_per_second": 14.474, "eval_steps_per_second": 0.457, "step": 200 }, { "epoch": 0.22024121657052964, "grad_norm": 2.310322648029005, "learning_rate": 1.3801311186847084e-05, "log_odds_chosen": 0.1077527180314064, "log_odds_ratio": -0.7207110524177551, "logits/chosen": -2.5468177795410156, "logits/rejected": -2.544996976852417, "logps/chosen": -0.8708482980728149, "logps/rejected": -0.9297773241996765, "loss": 0.5676, "nll_loss": 0.5340272188186646, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04354241490364075, "rewards/margins": 0.002946457825601101, "rewards/rejected": -0.04648887366056442, "step": 210 }, { "epoch": 0.23072889355007867, "grad_norm": 2.578087834768522, "learning_rate": 1.3483997249264842e-05, "log_odds_chosen": 0.1988961100578308, "log_odds_ratio": -0.6947790384292603, "logits/chosen": -2.582960605621338, "logits/rejected": -2.5871338844299316, "logps/chosen": -0.8790571093559265, "logps/rejected": -1.0056135654449463, "loss": 0.5604, "nll_loss": 0.5243524312973022, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.043952859938144684, "rewards/margins": 0.0063278162851929665, "rewards/rejected": -0.05028067156672478, "step": 220 }, { "epoch": 0.2412165705296277, "grad_norm": 2.166025203586939, "learning_rate": 1.3187609467915744e-05, "log_odds_chosen": 0.28293663263320923, "log_odds_ratio": -0.6729618906974792, "logits/chosen": -2.409632682800293, "logits/rejected": -2.407254695892334, "logps/chosen": -0.923631489276886, "logps/rejected": -1.1052097082138062, "loss": 0.5705, "nll_loss": 0.5283125638961792, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0461815744638443, "rewards/margins": 0.009078909642994404, "rewards/rejected": -0.05526048690080643, "step": 230 }, { "epoch": 0.2517042475091767, "grad_norm": 3.8331735868635723, "learning_rate": 1.2909944487358057e-05, "log_odds_chosen": 0.23131528496742249, "log_odds_ratio": -0.6579959988594055, "logits/chosen": -2.456178665161133, "logits/rejected": -2.4356391429901123, "logps/chosen": -0.9076164960861206, "logps/rejected": -1.0572835206985474, "loss": 0.5795, "nll_loss": 0.5539125800132751, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04538082331418991, "rewards/margins": 0.007483348250389099, "rewards/rejected": -0.05286417528986931, "step": 240 }, { "epoch": 0.26219192448872575, "grad_norm": 2.3703558112076846, "learning_rate": 1.2649110640673518e-05, "log_odds_chosen": 0.24735364317893982, "log_odds_ratio": -0.6739610433578491, "logits/chosen": -2.3664963245391846, "logits/rejected": -2.3717617988586426, "logps/chosen": -0.8910790681838989, "logps/rejected": -1.0310931205749512, "loss": 0.552, "nll_loss": 0.521629810333252, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.044553957879543304, "rewards/margins": 0.007000704295933247, "rewards/rejected": -0.05155465751886368, "step": 250 }, { "epoch": 0.2726796014682748, "grad_norm": 2.3126279019982494, "learning_rate": 1.2403473458920845e-05, "log_odds_chosen": 0.21803805232048035, "log_odds_ratio": -0.6705144047737122, "logits/chosen": -2.3871326446533203, "logits/rejected": -2.3607029914855957, "logps/chosen": -0.8851995468139648, "logps/rejected": -1.0210189819335938, "loss": 0.5318, "nll_loss": 0.45665669441223145, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.044259972870349884, "rewards/margins": 0.006790975574404001, "rewards/rejected": -0.05105094984173775, "step": 260 }, { "epoch": 0.2831672784478238, "grad_norm": 3.0127014090338062, "learning_rate": 1.2171612389003691e-05, "log_odds_chosen": 0.19388818740844727, "log_odds_ratio": -0.6943486928939819, "logits/chosen": -2.4198155403137207, "logits/rejected": -2.3934123516082764, "logps/chosen": -0.9466629028320312, "logps/rejected": -1.087548017501831, "loss": 0.5675, "nll_loss": 0.5421209335327148, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.04733314737677574, "rewards/margins": 0.007044260855764151, "rewards/rejected": -0.05437741428613663, "step": 270 }, { "epoch": 0.29365495542737285, "grad_norm": 2.1321408503589745, "learning_rate": 1.1952286093343936e-05, "log_odds_chosen": 0.23094406723976135, "log_odds_ratio": -0.6691509485244751, "logits/chosen": -2.3476357460021973, "logits/rejected": -2.3334882259368896, "logps/chosen": -0.9389116168022156, "logps/rejected": -1.0817869901657104, "loss": 0.5428, "nll_loss": 0.4766770303249359, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.04694558307528496, "rewards/margins": 0.007143768016248941, "rewards/rejected": -0.05408934876322746, "step": 280 }, { "epoch": 0.30414263240692185, "grad_norm": 2.9832356292712654, "learning_rate": 1.1744404390294071e-05, "log_odds_chosen": 0.3523382842540741, "log_odds_ratio": -0.6227424740791321, "logits/chosen": -2.2994518280029297, "logits/rejected": -2.2809882164001465, "logps/chosen": -0.8515156507492065, "logps/rejected": -1.0561182498931885, "loss": 0.5582, "nll_loss": 0.49160194396972656, "rewards/accuracies": 0.625, "rewards/chosen": -0.04257578402757645, "rewards/margins": 0.010230125859379768, "rewards/rejected": -0.052805911749601364, "step": 290 }, { "epoch": 0.3146303093864709, "grad_norm": 2.455429454160177, "learning_rate": 1.1547005383792517e-05, "log_odds_chosen": 0.30407971143722534, "log_odds_ratio": -0.6693702340126038, "logits/chosen": -2.4093105792999268, "logits/rejected": -2.360572099685669, "logps/chosen": -0.8702648878097534, "logps/rejected": -1.071603775024414, "loss": 0.5817, "nll_loss": 0.509266197681427, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04351323843002319, "rewards/margins": 0.010066945105791092, "rewards/rejected": -0.053580187261104584, "step": 300 }, { "epoch": 0.3146303093864709, "eval_log_odds_chosen": 0.2780136466026306, "eval_log_odds_ratio": -0.650335431098938, "eval_logits/chosen": -2.4818081855773926, "eval_logits/rejected": -2.4498839378356934, "eval_logps/chosen": -0.8807685971260071, "eval_logps/rejected": -1.0628403425216675, "eval_loss": 0.5571724772453308, "eval_nll_loss": 0.5207235217094421, "eval_rewards/accuracies": 0.6190476417541504, "eval_rewards/chosen": -0.04403843358159065, "eval_rewards/margins": 0.009103580377995968, "eval_rewards/rejected": -0.053142011165618896, "eval_runtime": 140.9657, "eval_samples_per_second": 14.145, "eval_steps_per_second": 0.447, "step": 300 }, { "epoch": 0.3251179863660199, "grad_norm": 2.1236305912642894, "learning_rate": 1.1359236684941297e-05, "log_odds_chosen": 0.2490301877260208, "log_odds_ratio": -0.6818236112594604, "logits/chosen": -2.438469409942627, "logits/rejected": -2.4002931118011475, "logps/chosen": -0.9081015586853027, "logps/rejected": -1.0690175294876099, "loss": 0.5876, "nll_loss": 0.5490554571151733, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.045405078679323196, "rewards/margins": 0.008045798167586327, "rewards/rejected": -0.053450871258974075, "step": 310 }, { "epoch": 0.33560566334556896, "grad_norm": 2.120713978353275, "learning_rate": 1.118033988749895e-05, "log_odds_chosen": 0.24050185084342957, "log_odds_ratio": -0.6646271347999573, "logits/chosen": -2.427072286605835, "logits/rejected": -2.400460720062256, "logps/chosen": -0.919741153717041, "logps/rejected": -1.0707252025604248, "loss": 0.5255, "nll_loss": 0.4938685894012451, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04598705843091011, "rewards/margins": 0.0075491988100111485, "rewards/rejected": -0.05353625863790512, "step": 320 }, { "epoch": 0.34609334032511796, "grad_norm": 3.9130583978149622, "learning_rate": 1.1009637651263608e-05, "log_odds_chosen": 0.25334832072257996, "log_odds_ratio": -0.6984423995018005, "logits/chosen": -2.404737949371338, "logits/rejected": -2.3937125205993652, "logps/chosen": -0.9015901684761047, "logps/rejected": -1.0603028535842896, "loss": 0.5557, "nll_loss": 0.5412198305130005, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04507950693368912, "rewards/margins": 0.007935632951557636, "rewards/rejected": -0.05301513522863388, "step": 330 }, { "epoch": 0.356581017304667, "grad_norm": 2.354938087613489, "learning_rate": 1.0846522890932809e-05, "log_odds_chosen": 0.17314568161964417, "log_odds_ratio": -0.6990125775337219, "logits/chosen": -2.3741681575775146, "logits/rejected": -2.372586727142334, "logps/chosen": -0.8716468811035156, "logps/rejected": -0.989061713218689, "loss": 0.5708, "nll_loss": 0.5135380029678345, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.04358234256505966, "rewards/margins": 0.005870741792023182, "rewards/rejected": -0.04945308715105057, "step": 340 }, { "epoch": 0.36706869428421607, "grad_norm": 2.2044319965087382, "learning_rate": 1.0690449676496977e-05, "log_odds_chosen": 0.24199283123016357, "log_odds_ratio": -0.687169075012207, "logits/chosen": -2.426055908203125, "logits/rejected": -2.37978196144104, "logps/chosen": -0.8775957226753235, "logps/rejected": -1.019217848777771, "loss": 0.5463, "nll_loss": 0.5177103281021118, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04387979209423065, "rewards/margins": 0.007081110030412674, "rewards/rejected": -0.05096089839935303, "step": 350 }, { "epoch": 0.37755637126376507, "grad_norm": 1.951247314132421, "learning_rate": 1.0540925533894598e-05, "log_odds_chosen": 0.37273699045181274, "log_odds_ratio": -0.6097368001937866, "logits/chosen": -2.3991737365722656, "logits/rejected": -2.3913745880126953, "logps/chosen": -0.8743513226509094, "logps/rejected": -1.1015660762786865, "loss": 0.5509, "nll_loss": 0.5144286155700684, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.04371756687760353, "rewards/margins": 0.011360744014382362, "rewards/rejected": -0.05507831647992134, "step": 360 }, { "epoch": 0.3880440482433141, "grad_norm": 2.1160835291077307, "learning_rate": 1.0397504898200728e-05, "log_odds_chosen": 0.37601083517074585, "log_odds_ratio": -0.6155336499214172, "logits/chosen": -2.4748804569244385, "logits/rejected": -2.4376637935638428, "logps/chosen": -0.8649997711181641, "logps/rejected": -1.1139612197875977, "loss": 0.5205, "nll_loss": 0.502615749835968, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.043249987065792084, "rewards/margins": 0.012448069639503956, "rewards/rejected": -0.05569805949926376, "step": 370 }, { "epoch": 0.3985317252228631, "grad_norm": 2.226775744348268, "learning_rate": 1.0259783520851543e-05, "log_odds_chosen": 0.429561048746109, "log_odds_ratio": -0.5968413949012756, "logits/chosen": -2.519869089126587, "logits/rejected": -2.494752883911133, "logps/chosen": -0.8703508377075195, "logps/rejected": -1.1160125732421875, "loss": 0.5374, "nll_loss": 0.5153257846832275, "rewards/accuracies": 0.65625, "rewards/chosen": -0.043517544865608215, "rewards/margins": 0.012283083982765675, "rewards/rejected": -0.05580062419176102, "step": 380 }, { "epoch": 0.4090194022024122, "grad_norm": 2.401246607233204, "learning_rate": 1.0127393670836667e-05, "log_odds_chosen": 0.08164841681718826, "log_odds_ratio": -0.730138897895813, "logits/chosen": -2.456601619720459, "logits/rejected": -2.4586360454559326, "logps/chosen": -0.9149462580680847, "logps/rejected": -0.9739354848861694, "loss": 0.5576, "nll_loss": 0.5350494384765625, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.045747317373752594, "rewards/margins": 0.0029494580812752247, "rewards/rejected": -0.048696767538785934, "step": 390 }, { "epoch": 0.4195070791819612, "grad_norm": 1.9835782676616263, "learning_rate": 1e-05, "log_odds_chosen": 0.24804361164569855, "log_odds_ratio": -0.6891428232192993, "logits/chosen": -2.352999210357666, "logits/rejected": -2.3628151416778564, "logps/chosen": -0.9478782415390015, "logps/rejected": -1.1192692518234253, "loss": 0.5724, "nll_loss": 0.5249911546707153, "rewards/accuracies": 0.53125, "rewards/chosen": -0.047393910586833954, "rewards/margins": 0.008569559082388878, "rewards/rejected": -0.05596347525715828, "step": 400 }, { "epoch": 0.4195070791819612, "eval_log_odds_chosen": 0.2819042503833771, "eval_log_odds_ratio": -0.6550887227058411, "eval_logits/chosen": -2.4376399517059326, "eval_logits/rejected": -2.4026126861572266, "eval_logps/chosen": -0.8510361313819885, "eval_logps/rejected": -1.029338002204895, "eval_loss": 0.5415622591972351, "eval_nll_loss": 0.5060027837753296, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.042551808059215546, "eval_rewards/margins": 0.008915101177990437, "eval_rewards/rejected": -0.051466912031173706, "eval_runtime": 135.9814, "eval_samples_per_second": 14.664, "eval_steps_per_second": 0.463, "step": 400 }, { "epoch": 0.4299947561615102, "grad_norm": 2.0741408388417053, "learning_rate": 9.877295966495898e-06, "log_odds_chosen": 0.14674368500709534, "log_odds_ratio": -0.7315293550491333, "logits/chosen": -2.453657388687134, "logits/rejected": -2.4033920764923096, "logps/chosen": -0.8739027976989746, "logps/rejected": -0.9881707429885864, "loss": 0.546, "nll_loss": 0.48288026452064514, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04369514063000679, "rewards/margins": 0.0057133943773806095, "rewards/rejected": -0.04940853267908096, "step": 410 }, { "epoch": 0.4404824331410593, "grad_norm": 1.862967371631046, "learning_rate": 9.759000729485331e-06, "log_odds_chosen": 0.3599195182323456, "log_odds_ratio": -0.6281547546386719, "logits/chosen": -2.3640646934509277, "logits/rejected": -2.3699867725372314, "logps/chosen": -0.8427717089653015, "logps/rejected": -1.0523298978805542, "loss": 0.5338, "nll_loss": 0.475394070148468, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.042138583958148956, "rewards/margins": 0.01047790888696909, "rewards/rejected": -0.05261648818850517, "step": 420 }, { "epoch": 0.4509701101206083, "grad_norm": 2.2577027347270673, "learning_rate": 9.644856443408244e-06, "log_odds_chosen": 0.2772213816642761, "log_odds_ratio": -0.6547843217849731, "logits/chosen": -2.463442325592041, "logits/rejected": -2.4424116611480713, "logps/chosen": -0.8533428311347961, "logps/rejected": -1.0268352031707764, "loss": 0.5561, "nll_loss": 0.5445196628570557, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04266713932156563, "rewards/margins": 0.008674620650708675, "rewards/rejected": -0.051341764628887177, "step": 430 }, { "epoch": 0.46145778710015734, "grad_norm": 2.148366110891132, "learning_rate": 9.534625892455923e-06, "log_odds_chosen": 0.251740038394928, "log_odds_ratio": -0.6593549847602844, "logits/chosen": -2.433262586593628, "logits/rejected": -2.400451183319092, "logps/chosen": -0.869005560874939, "logps/rejected": -1.0262689590454102, "loss": 0.5514, "nll_loss": 0.518151044845581, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.043450284749269485, "rewards/margins": 0.007863158360123634, "rewards/rejected": -0.05131344124674797, "step": 440 }, { "epoch": 0.47194546407970633, "grad_norm": 1.9641531890945303, "learning_rate": 9.428090415820635e-06, "log_odds_chosen": 0.3584665358066559, "log_odds_ratio": -0.6613593101501465, "logits/chosen": -2.3730902671813965, "logits/rejected": -2.3335137367248535, "logps/chosen": -0.8309770822525024, "logps/rejected": -1.0618839263916016, "loss": 0.5284, "nll_loss": 0.4951680600643158, "rewards/accuracies": 0.625, "rewards/chosen": -0.041548848152160645, "rewards/margins": 0.011545347049832344, "rewards/rejected": -0.05309419706463814, "step": 450 }, { "epoch": 0.4824331410592554, "grad_norm": 2.1323921754635955, "learning_rate": 9.325048082403139e-06, "log_odds_chosen": 0.18219377100467682, "log_odds_ratio": -0.7052776217460632, "logits/chosen": -2.417771577835083, "logits/rejected": -2.391197681427002, "logps/chosen": -0.9514438509941101, "logps/rejected": -1.081947922706604, "loss": 0.532, "nll_loss": 0.5332220792770386, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.047572195529937744, "rewards/margins": 0.006525200791656971, "rewards/rejected": -0.05409740284085274, "step": 460 }, { "epoch": 0.4929208180388044, "grad_norm": 2.0935836198507145, "learning_rate": 9.225312080288851e-06, "log_odds_chosen": 0.2585422098636627, "log_odds_ratio": -0.681999683380127, "logits/chosen": -2.4441823959350586, "logits/rejected": -2.418107271194458, "logps/chosen": -0.8849735260009766, "logps/rejected": -1.0435359477996826, "loss": 0.5416, "nll_loss": 0.5094045400619507, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04424867779016495, "rewards/margins": 0.007928118109703064, "rewards/rejected": -0.05217679589986801, "step": 470 }, { "epoch": 0.5034084950183534, "grad_norm": 2.077184991073431, "learning_rate": 9.12870929175277e-06, "log_odds_chosen": 0.1411927044391632, "log_odds_ratio": -0.7211004495620728, "logits/chosen": -2.478450298309326, "logits/rejected": -2.4527339935302734, "logps/chosen": -0.8615080118179321, "logps/rejected": -0.9589959979057312, "loss": 0.5176, "nll_loss": 0.5134377479553223, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04307539761066437, "rewards/margins": 0.004874409642070532, "rewards/rejected": -0.04794980585575104, "step": 480 }, { "epoch": 0.5138961719979025, "grad_norm": 1.9047433825748046, "learning_rate": 9.035079029052514e-06, "log_odds_chosen": 0.20130577683448792, "log_odds_ratio": -0.687514066696167, "logits/chosen": -2.404505491256714, "logits/rejected": -2.3535995483398438, "logps/chosen": -0.9324936866760254, "logps/rejected": -1.036684274673462, "loss": 0.54, "nll_loss": 0.5031000375747681, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04662468656897545, "rewards/margins": 0.0052095321007072926, "rewards/rejected": -0.05183422565460205, "step": 490 }, { "epoch": 0.5243838489774515, "grad_norm": 2.2722995091864013, "learning_rate": 8.94427190999916e-06, "log_odds_chosen": 0.20798742771148682, "log_odds_ratio": -0.6965998411178589, "logits/chosen": -2.503917694091797, "logits/rejected": -2.4567532539367676, "logps/chosen": -0.882551372051239, "logps/rejected": -1.009610652923584, "loss": 0.5486, "nll_loss": 0.5240460634231567, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04412757605314255, "rewards/margins": 0.00635296106338501, "rewards/rejected": -0.05048053711652756, "step": 500 }, { "epoch": 0.5243838489774515, "eval_log_odds_chosen": 0.319318950176239, "eval_log_odds_ratio": -0.6438891291618347, "eval_logits/chosen": -2.471830368041992, "eval_logits/rejected": -2.437340021133423, "eval_logps/chosen": -0.8492264151573181, "eval_logps/rejected": -1.0513862371444702, "eval_loss": 0.5343749523162842, "eval_nll_loss": 0.49899229407310486, "eval_rewards/accuracies": 0.6150793433189392, "eval_rewards/chosen": -0.042461320757865906, "eval_rewards/margins": 0.010107995010912418, "eval_rewards/rejected": -0.05256931483745575, "eval_runtime": 137.8752, "eval_samples_per_second": 14.462, "eval_steps_per_second": 0.457, "step": 500 }, { "epoch": 0.5348715259570005, "grad_norm": 1.7543648883606602, "learning_rate": 8.856148855400955e-06, "log_odds_chosen": 0.290159672498703, "log_odds_ratio": -0.6539579629898071, "logits/chosen": -2.5112712383270264, "logits/rejected": -2.4847443103790283, "logps/chosen": -0.8425674438476562, "logps/rejected": -1.0141561031341553, "loss": 0.5278, "nll_loss": 0.5365942120552063, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04212837293744087, "rewards/margins": 0.00857943668961525, "rewards/rejected": -0.05070780962705612, "step": 510 }, { "epoch": 0.5453592029365496, "grad_norm": 1.6585367227101162, "learning_rate": 8.770580193070294e-06, "log_odds_chosen": 0.23928451538085938, "log_odds_ratio": -0.6756108999252319, "logits/chosen": -2.4492619037628174, "logits/rejected": -2.413327693939209, "logps/chosen": -0.9059408903121948, "logps/rejected": -1.068650245666504, "loss": 0.5372, "nll_loss": 0.47487586736679077, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0452970452606678, "rewards/margins": 0.008135473355650902, "rewards/rejected": -0.053432513028383255, "step": 520 }, { "epoch": 0.5558468799160986, "grad_norm": 2.041930709602407, "learning_rate": 8.687444855261389e-06, "log_odds_chosen": 0.4141673445701599, "log_odds_ratio": -0.6465325355529785, "logits/chosen": -2.50728178024292, "logits/rejected": -2.4843533039093018, "logps/chosen": -0.828266978263855, "logps/rejected": -1.111327886581421, "loss": 0.5372, "nll_loss": 0.4500916004180908, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04141335189342499, "rewards/margins": 0.014153043739497662, "rewards/rejected": -0.055566392838954926, "step": 530 }, { "epoch": 0.5663345568956476, "grad_norm": 1.8588001511511827, "learning_rate": 8.606629658238705e-06, "log_odds_chosen": 0.1719091385602951, "log_odds_ratio": -0.697492241859436, "logits/chosen": -2.500349760055542, "logits/rejected": -2.4829323291778564, "logps/chosen": -0.8647764325141907, "logps/rejected": -0.9715200662612915, "loss": 0.553, "nll_loss": 0.526767373085022, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04323882237076759, "rewards/margins": 0.005337181035429239, "rewards/rejected": -0.048576001077890396, "step": 540 }, { "epoch": 0.5768222338751966, "grad_norm": 1.9151013438807294, "learning_rate": 8.528028654224417e-06, "log_odds_chosen": 0.41694098711013794, "log_odds_ratio": -0.6211504936218262, "logits/chosen": -2.526711940765381, "logits/rejected": -2.488142490386963, "logps/chosen": -0.866176426410675, "logps/rejected": -1.1338094472885132, "loss": 0.5372, "nll_loss": 0.5370919704437256, "rewards/accuracies": 0.625, "rewards/chosen": -0.04330882430076599, "rewards/margins": 0.013381647877395153, "rewards/rejected": -0.05669047310948372, "step": 550 }, { "epoch": 0.5873099108547457, "grad_norm": 1.9475996513575733, "learning_rate": 8.451542547285167e-06, "log_odds_chosen": 0.23696064949035645, "log_odds_ratio": -0.6743646860122681, "logits/chosen": -2.518937110900879, "logits/rejected": -2.490901470184326, "logps/chosen": -0.8790968060493469, "logps/rejected": -1.026903748512268, "loss": 0.5214, "nll_loss": 0.5015530586242676, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.043954841792583466, "rewards/margins": 0.007390348706394434, "rewards/rejected": -0.05134518817067146, "step": 560 }, { "epoch": 0.5977975878342947, "grad_norm": 1.8968936654846589, "learning_rate": 8.37707816583391e-06, "log_odds_chosen": 0.1709347516298294, "log_odds_ratio": -0.721364438533783, "logits/chosen": -2.5332372188568115, "logits/rejected": -2.5029869079589844, "logps/chosen": -0.8752782940864563, "logps/rejected": -0.9968281984329224, "loss": 0.5044, "nll_loss": 0.528136134147644, "rewards/accuracies": 0.5, "rewards/chosen": -0.043763916939496994, "rewards/margins": 0.006077499594539404, "rewards/rejected": -0.04984141141176224, "step": 570 }, { "epoch": 0.6082852648138437, "grad_norm": 1.901568677283731, "learning_rate": 8.304547985373997e-06, "log_odds_chosen": 0.29886722564697266, "log_odds_ratio": -0.6539247632026672, "logits/chosen": -2.5251448154449463, "logits/rejected": -2.5202994346618652, "logps/chosen": -0.8657291531562805, "logps/rejected": -1.0712764263153076, "loss": 0.5461, "nll_loss": 0.48294153809547424, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04328645393252373, "rewards/margins": 0.01027736347168684, "rewards/rejected": -0.05356382206082344, "step": 580 }, { "epoch": 0.6187729417933928, "grad_norm": 1.9902242754931676, "learning_rate": 8.233869695926184e-06, "log_odds_chosen": 0.33919957280158997, "log_odds_ratio": -0.6699340343475342, "logits/chosen": -2.56527042388916, "logits/rejected": -2.572580575942993, "logps/chosen": -0.8352983593940735, "logps/rejected": -1.0507652759552002, "loss": 0.5138, "nll_loss": 0.5514861345291138, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.041764914989471436, "rewards/margins": 0.010773347690701485, "rewards/rejected": -0.05253826454281807, "step": 590 }, { "epoch": 0.6292606187729418, "grad_norm": 1.9968521848986331, "learning_rate": 8.164965809277262e-06, "log_odds_chosen": 0.3369660973548889, "log_odds_ratio": -0.6556235551834106, "logits/chosen": -2.563744306564331, "logits/rejected": -2.5540928840637207, "logps/chosen": -0.8338971138000488, "logps/rejected": -1.0589314699172974, "loss": 0.5156, "nll_loss": 0.4856153130531311, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04169485345482826, "rewards/margins": 0.011251723393797874, "rewards/rejected": -0.052946578711271286, "step": 600 }, { "epoch": 0.6292606187729418, "eval_log_odds_chosen": 0.30560463666915894, "eval_log_odds_ratio": -0.6469583511352539, "eval_logits/chosen": -2.581132173538208, "eval_logits/rejected": -2.555058717727661, "eval_logps/chosen": -0.8332868218421936, "eval_logps/rejected": -1.0284805297851562, "eval_loss": 0.5242142677307129, "eval_nll_loss": 0.48822054266929626, "eval_rewards/accuracies": 0.6150793433189392, "eval_rewards/chosen": -0.04166434332728386, "eval_rewards/margins": 0.009759685955941677, "eval_rewards/rejected": -0.05142403393983841, "eval_runtime": 136.8777, "eval_samples_per_second": 14.568, "eval_steps_per_second": 0.46, "step": 600 }, { "epoch": 0.6397482957524908, "grad_norm": 2.0046645094729914, "learning_rate": 8.097763301789162e-06, "log_odds_chosen": 0.1975608468055725, "log_odds_ratio": -0.692371666431427, "logits/chosen": -2.4713737964630127, "logits/rejected": -2.4652817249298096, "logps/chosen": -0.8778279423713684, "logps/rejected": -0.9979953765869141, "loss": 0.5255, "nll_loss": 0.4872562289237976, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04389139264822006, "rewards/margins": 0.006008377764374018, "rewards/rejected": -0.04989977926015854, "step": 610 }, { "epoch": 0.6502359727320398, "grad_norm": 2.040129936950799, "learning_rate": 8.03219328902499e-06, "log_odds_chosen": 0.1849410980939865, "log_odds_ratio": -0.7018038630485535, "logits/chosen": -2.539135694503784, "logits/rejected": -2.500748872756958, "logps/chosen": -0.8882759213447571, "logps/rejected": -1.0186254978179932, "loss": 0.5299, "nll_loss": 0.5155984163284302, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04441379755735397, "rewards/margins": 0.006517473608255386, "rewards/rejected": -0.05093127489089966, "step": 620 }, { "epoch": 0.6607236497115889, "grad_norm": 2.2341440675434683, "learning_rate": 7.968190728895958e-06, "log_odds_chosen": 0.2307681292295456, "log_odds_ratio": -0.7022296786308289, "logits/chosen": -2.498748779296875, "logits/rejected": -2.490837812423706, "logps/chosen": -0.8587957620620728, "logps/rejected": -1.0074841976165771, "loss": 0.5336, "nll_loss": 0.5248268842697144, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04293978586792946, "rewards/margins": 0.007434426806867123, "rewards/rejected": -0.050374217331409454, "step": 630 }, { "epoch": 0.6712113266911379, "grad_norm": 2.376219217041332, "learning_rate": 7.905694150420949e-06, "log_odds_chosen": 0.2888760268688202, "log_odds_ratio": -0.6808607578277588, "logits/chosen": -2.4707798957824707, "logits/rejected": -2.437269687652588, "logps/chosen": -0.8727089166641235, "logps/rejected": -1.049213171005249, "loss": 0.5154, "nll_loss": 0.46024101972579956, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.043635450303554535, "rewards/margins": 0.008825212717056274, "rewards/rejected": -0.05246065929532051, "step": 640 }, { "epoch": 0.6816990036706869, "grad_norm": 1.8230434305650385, "learning_rate": 7.844645405527363e-06, "log_odds_chosen": 0.197784885764122, "log_odds_ratio": -0.706741988658905, "logits/chosen": -2.497851848602295, "logits/rejected": -2.4905192852020264, "logps/chosen": -0.8444819450378418, "logps/rejected": -0.975223183631897, "loss": 0.5333, "nll_loss": 0.5124696493148804, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04222410172224045, "rewards/margins": 0.006537059787660837, "rewards/rejected": -0.048761166632175446, "step": 650 }, { "epoch": 0.6921866806502359, "grad_norm": 1.8970235810963871, "learning_rate": 7.78498944161523e-06, "log_odds_chosen": 0.3309662640094757, "log_odds_ratio": -0.6530941724777222, "logits/chosen": -2.5231690406799316, "logits/rejected": -2.494469404220581, "logps/chosen": -0.8999967575073242, "logps/rejected": -1.128688097000122, "loss": 0.5297, "nll_loss": 0.4878067970275879, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04499983415007591, "rewards/margins": 0.01143457181751728, "rewards/rejected": -0.05643441155552864, "step": 660 }, { "epoch": 0.702674357629785, "grad_norm": 1.8146696004735343, "learning_rate": 7.726674092862559e-06, "log_odds_chosen": 0.45232027769088745, "log_odds_ratio": -0.6299984455108643, "logits/chosen": -2.4613699913024902, "logits/rejected": -2.447711944580078, "logps/chosen": -0.8380166292190552, "logps/rejected": -1.143587350845337, "loss": 0.524, "nll_loss": 0.4693591594696045, "rewards/accuracies": 0.65625, "rewards/chosen": -0.04190083220601082, "rewards/margins": 0.015278531238436699, "rewards/rejected": -0.05717936158180237, "step": 670 }, { "epoch": 0.713162034609334, "grad_norm": 2.112620721807998, "learning_rate": 7.669649888473705e-06, "log_odds_chosen": 0.31269508600234985, "log_odds_ratio": -0.6543976664543152, "logits/chosen": -2.464507818222046, "logits/rejected": -2.418670892715454, "logps/chosen": -0.871087908744812, "logps/rejected": -1.055972695350647, "loss": 0.5087, "nll_loss": 0.4947647452354431, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.0435543991625309, "rewards/margins": 0.009244237095117569, "rewards/rejected": -0.05279862880706787, "step": 680 }, { "epoch": 0.723649711588883, "grad_norm": 1.8678427871613372, "learning_rate": 7.61386987626881e-06, "log_odds_chosen": 0.1339389979839325, "log_odds_ratio": -0.7375361919403076, "logits/chosen": -2.5094785690307617, "logits/rejected": -2.479645252227783, "logps/chosen": -0.86939537525177, "logps/rejected": -0.9705018997192383, "loss": 0.5313, "nll_loss": 0.5503351092338562, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04346977174282074, "rewards/margins": 0.005055318586528301, "rewards/rejected": -0.04852508753538132, "step": 690 }, { "epoch": 0.7341373885684321, "grad_norm": 2.2093003307729235, "learning_rate": 7.559289460184545e-06, "log_odds_chosen": 0.31829774379730225, "log_odds_ratio": -0.6285902261734009, "logits/chosen": -2.4719974994659424, "logits/rejected": -2.4300436973571777, "logps/chosen": -0.8187413215637207, "logps/rejected": -1.0199077129364014, "loss": 0.5297, "nll_loss": 0.5423263907432556, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.04093705862760544, "rewards/margins": 0.010058322921395302, "rewards/rejected": -0.05099538713693619, "step": 700 }, { "epoch": 0.7341373885684321, "eval_log_odds_chosen": 0.3407081067562103, "eval_log_odds_ratio": -0.6351403594017029, "eval_logits/chosen": -2.4800758361816406, "eval_logits/rejected": -2.447735548019409, "eval_logps/chosen": -0.8215174674987793, "eval_logps/rejected": -1.0422321557998657, "eval_loss": 0.5191378593444824, "eval_nll_loss": 0.48380225896835327, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.04107587784528732, "eval_rewards/margins": 0.011035734787583351, "eval_rewards/rejected": -0.052111607044935226, "eval_runtime": 136.6803, "eval_samples_per_second": 14.589, "eval_steps_per_second": 0.461, "step": 700 }, { "epoch": 0.7446250655479811, "grad_norm": 1.9677237822159654, "learning_rate": 7.505866250408016e-06, "log_odds_chosen": 0.25284355878829956, "log_odds_ratio": -0.6641759872436523, "logits/chosen": -2.5103344917297363, "logits/rejected": -2.492450714111328, "logps/chosen": -0.8418480753898621, "logps/rejected": -1.0181466341018677, "loss": 0.5238, "nll_loss": 0.4800626337528229, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04209240525960922, "rewards/margins": 0.008814921602606773, "rewards/rejected": -0.050907332450151443, "step": 710 }, { "epoch": 0.7551127425275301, "grad_norm": 1.836122210517328, "learning_rate": 7.4535599249993e-06, "log_odds_chosen": 0.35044384002685547, "log_odds_ratio": -0.6476293802261353, "logits/chosen": -2.4395956993103027, "logits/rejected": -2.4294960498809814, "logps/chosen": -0.7990450859069824, "logps/rejected": -1.0232374668121338, "loss": 0.5307, "nll_loss": 0.4627358317375183, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.03995225578546524, "rewards/margins": 0.01120961643755436, "rewards/rejected": -0.05116187408566475, "step": 720 }, { "epoch": 0.7656004195070791, "grad_norm": 2.1971858392846007, "learning_rate": 7.402332101976053e-06, "log_odds_chosen": 0.119834303855896, "log_odds_ratio": -0.7195987701416016, "logits/chosen": -2.4724977016448975, "logits/rejected": -2.466116189956665, "logps/chosen": -0.8342846035957336, "logps/rejected": -0.9009860754013062, "loss": 0.5373, "nll_loss": 0.5026193857192993, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04171422868967056, "rewards/margins": 0.003335078712552786, "rewards/rejected": -0.045049309730529785, "step": 730 }, { "epoch": 0.7760880964866282, "grad_norm": 2.0508082994645074, "learning_rate": 7.352146220938079e-06, "log_odds_chosen": 0.3144014775753021, "log_odds_ratio": -0.6337074041366577, "logits/chosen": -2.502380847930908, "logits/rejected": -2.4844462871551514, "logps/chosen": -0.8134506940841675, "logps/rejected": -1.0163192749023438, "loss": 0.5318, "nll_loss": 0.4793321192264557, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.040672533214092255, "rewards/margins": 0.010143419727683067, "rewards/rejected": -0.05081595852971077, "step": 740 }, { "epoch": 0.7865757734661772, "grad_norm": 1.9563195894092036, "learning_rate": 7.3029674334022146e-06, "log_odds_chosen": 0.26552754640579224, "log_odds_ratio": -0.6666983366012573, "logits/chosen": -2.502037763595581, "logits/rejected": -2.4685168266296387, "logps/chosen": -0.8672981262207031, "logps/rejected": -1.0235345363616943, "loss": 0.5431, "nll_loss": 0.4982251226902008, "rewards/accuracies": 0.59375, "rewards/chosen": -0.043364908546209335, "rewards/margins": 0.007811821065843105, "rewards/rejected": -0.05117672681808472, "step": 750 }, { "epoch": 0.7970634504457262, "grad_norm": 1.7911295245119114, "learning_rate": 7.254762501100117e-06, "log_odds_chosen": 0.2668423354625702, "log_odds_ratio": -0.6578959226608276, "logits/chosen": -2.4371020793914795, "logits/rejected": -2.417811870574951, "logps/chosen": -0.812160849571228, "logps/rejected": -0.9835097193717957, "loss": 0.5124, "nll_loss": 0.4057600498199463, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04060804471373558, "rewards/margins": 0.008567440323531628, "rewards/rejected": -0.04917549341917038, "step": 760 }, { "epoch": 0.8075511274252754, "grad_norm": 2.141524900160083, "learning_rate": 7.207499701564472e-06, "log_odds_chosen": 0.21438488364219666, "log_odds_ratio": -0.7001821994781494, "logits/chosen": -2.432426929473877, "logits/rejected": -2.4075264930725098, "logps/chosen": -0.8868153691291809, "logps/rejected": -1.0428266525268555, "loss": 0.5341, "nll_loss": 0.5013958811759949, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.044340766966342926, "rewards/margins": 0.007800562772899866, "rewards/rejected": -0.052141331136226654, "step": 770 }, { "epoch": 0.8180388044048243, "grad_norm": 2.177116882955384, "learning_rate": 7.1611487403943295e-06, "log_odds_chosen": 0.20159511268138885, "log_odds_ratio": -0.6788761019706726, "logits/chosen": -2.4562764167785645, "logits/rejected": -2.4499940872192383, "logps/chosen": -0.883618950843811, "logps/rejected": -0.998609721660614, "loss": 0.5476, "nll_loss": 0.549780547618866, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.044180940836668015, "rewards/margins": 0.0057495469227433205, "rewards/rejected": -0.04993049427866936, "step": 780 }, { "epoch": 0.8285264813843733, "grad_norm": 1.9065399955928464, "learning_rate": 7.115680669648201e-06, "log_odds_chosen": 0.3184022307395935, "log_odds_ratio": -0.6474640965461731, "logits/chosen": -2.4409756660461426, "logits/rejected": -2.4378621578216553, "logps/chosen": -0.8189239501953125, "logps/rejected": -1.0288126468658447, "loss": 0.5063, "nll_loss": 0.4424379765987396, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04094620421528816, "rewards/margins": 0.010494431480765343, "rewards/rejected": -0.051440637558698654, "step": 790 }, { "epoch": 0.8390141583639223, "grad_norm": 2.0323396486068086, "learning_rate": 7.0710678118654756e-06, "log_odds_chosen": 0.4407121241092682, "log_odds_ratio": -0.6011781096458435, "logits/chosen": -2.411447048187256, "logits/rejected": -2.389554500579834, "logps/chosen": -0.7789972424507141, "logps/rejected": -1.0607415437698364, "loss": 0.5184, "nll_loss": 0.4200369715690613, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03894985839724541, "rewards/margins": 0.014087215065956116, "rewards/rejected": -0.053037069737911224, "step": 800 }, { "epoch": 0.8390141583639223, "eval_log_odds_chosen": 0.37830010056495667, "eval_log_odds_ratio": -0.6304489970207214, "eval_logits/chosen": -2.4921600818634033, "eval_logits/rejected": -2.4575421810150146, "eval_logps/chosen": -0.8178579211235046, "eval_logps/rejected": -1.0646613836288452, "eval_loss": 0.5137735605239868, "eval_nll_loss": 0.4796440601348877, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.04089289531111717, "eval_rewards/margins": 0.012340176850557327, "eval_rewards/rejected": -0.0532330721616745, "eval_runtime": 136.422, "eval_samples_per_second": 14.616, "eval_steps_per_second": 0.462, "step": 800 }, { "epoch": 0.8495018353434715, "grad_norm": 1.8282488507148427, "learning_rate": 7.027283689263066e-06, "log_odds_chosen": 0.34346696734428406, "log_odds_ratio": -0.6324015259742737, "logits/chosen": -2.4684414863586426, "logits/rejected": -2.419478178024292, "logps/chosen": -0.7967440485954285, "logps/rejected": -1.0016282796859741, "loss": 0.513, "nll_loss": 0.4749225676059723, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03983720391988754, "rewards/margins": 0.0102442167699337, "rewards/rejected": -0.050081413239240646, "step": 810 }, { "epoch": 0.8599895123230205, "grad_norm": 2.4353704194820343, "learning_rate": 6.984302957695783e-06, "log_odds_chosen": 0.3245043158531189, "log_odds_ratio": -0.643069863319397, "logits/chosen": -2.3819215297698975, "logits/rejected": -2.349990129470825, "logps/chosen": -0.8364176750183105, "logps/rejected": -1.031416654586792, "loss": 0.5047, "nll_loss": 0.42748355865478516, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.041820887476205826, "rewards/margins": 0.009749949909746647, "rewards/rejected": -0.0515708327293396, "step": 820 }, { "epoch": 0.8704771893025695, "grad_norm": 2.34776921636757, "learning_rate": 6.942101345006233e-06, "log_odds_chosen": 0.2353450506925583, "log_odds_ratio": -0.7053114175796509, "logits/chosen": -2.4152169227600098, "logits/rejected": -2.4146676063537598, "logps/chosen": -0.8572267293930054, "logps/rejected": -1.0250964164733887, "loss": 0.5251, "nll_loss": 0.4658161699771881, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.042861342430114746, "rewards/margins": 0.008393481373786926, "rewards/rejected": -0.051254820078611374, "step": 830 }, { "epoch": 0.8809648662821186, "grad_norm": 1.9903482887340351, "learning_rate": 6.900655593423542e-06, "log_odds_chosen": 0.19377179443836212, "log_odds_ratio": -0.6883701086044312, "logits/chosen": -2.450711727142334, "logits/rejected": -2.4275918006896973, "logps/chosen": -0.8693481683731079, "logps/rejected": -0.9985544085502625, "loss": 0.5129, "nll_loss": 0.4828173518180847, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04346740245819092, "rewards/margins": 0.006460316479206085, "rewards/rejected": -0.0499277226626873, "step": 840 }, { "epoch": 0.8914525432616676, "grad_norm": 2.0572418230575784, "learning_rate": 6.859943405700353e-06, "log_odds_chosen": 0.2696766257286072, "log_odds_ratio": -0.6502133011817932, "logits/chosen": -2.4989540576934814, "logits/rejected": -2.480175018310547, "logps/chosen": -0.8377168774604797, "logps/rejected": -1.0055049657821655, "loss": 0.5056, "nll_loss": 0.49036240577697754, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.041885845363140106, "rewards/margins": 0.008389403112232685, "rewards/rejected": -0.05027524754405022, "step": 850 }, { "epoch": 0.9019402202412166, "grad_norm": 1.9654607653654201, "learning_rate": 6.819943394704736e-06, "log_odds_chosen": 0.21943990886211395, "log_odds_ratio": -0.6898983716964722, "logits/chosen": -2.5337636470794678, "logits/rejected": -2.546189546585083, "logps/chosen": -0.8453003168106079, "logps/rejected": -0.9918826222419739, "loss": 0.5285, "nll_loss": 0.4758750796318054, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.042265016585588455, "rewards/margins": 0.007329112850129604, "rewards/rejected": -0.049594126641750336, "step": 860 }, { "epoch": 0.9124278972207656, "grad_norm": 1.9335503237781204, "learning_rate": 6.780635036208105e-06, "log_odds_chosen": 0.29112187027931213, "log_odds_ratio": -0.6706128120422363, "logits/chosen": -2.535698890686035, "logits/rejected": -2.5173022747039795, "logps/chosen": -0.8680477142333984, "logps/rejected": -1.0733777284622192, "loss": 0.4939, "nll_loss": 0.4834807515144348, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.04340239241719246, "rewards/margins": 0.010266497731208801, "rewards/rejected": -0.05366888642311096, "step": 870 }, { "epoch": 0.9229155742003147, "grad_norm": 1.6238877353235468, "learning_rate": 6.741998624632421e-06, "log_odds_chosen": 0.29792147874832153, "log_odds_ratio": -0.6587230563163757, "logits/chosen": -2.533979892730713, "logits/rejected": -2.498964548110962, "logps/chosen": -0.8105006217956543, "logps/rejected": -0.999383807182312, "loss": 0.4883, "nll_loss": 0.4381064474582672, "rewards/accuracies": 0.59375, "rewards/chosen": -0.040525030344724655, "rewards/margins": 0.009444162249565125, "rewards/rejected": -0.04996918886899948, "step": 880 }, { "epoch": 0.9334032511798637, "grad_norm": 1.7501205893349534, "learning_rate": 6.70401523153991e-06, "log_odds_chosen": 0.3352048397064209, "log_odds_ratio": -0.6494973301887512, "logits/chosen": -2.5071699619293213, "logits/rejected": -2.488619327545166, "logps/chosen": -0.811043381690979, "logps/rejected": -1.0027059316635132, "loss": 0.4932, "nll_loss": 0.4646259844303131, "rewards/accuracies": 0.59375, "rewards/chosen": -0.04055216908454895, "rewards/margins": 0.0095831248909235, "rewards/rejected": -0.0501352921128273, "step": 890 }, { "epoch": 0.9438909281594127, "grad_norm": 1.9660241717665867, "learning_rate": 6.666666666666667e-06, "log_odds_chosen": 0.2810109555721283, "log_odds_ratio": -0.6615744829177856, "logits/chosen": -2.5149343013763428, "logits/rejected": -2.5029118061065674, "logps/chosen": -0.7809039354324341, "logps/rejected": -0.9545317888259888, "loss": 0.5235, "nll_loss": 0.4655960202217102, "rewards/accuracies": 0.59375, "rewards/chosen": -0.039045192301273346, "rewards/margins": 0.008681395091116428, "rewards/rejected": -0.0477265901863575, "step": 900 }, { "epoch": 0.9438909281594127, "eval_log_odds_chosen": 0.3305439352989197, "eval_log_odds_ratio": -0.6379230618476868, "eval_logits/chosen": -2.5633676052093506, "eval_logits/rejected": -2.533735752105713, "eval_logps/chosen": -0.8084598779678345, "eval_logps/rejected": -1.0201667547225952, "eval_loss": 0.5087887644767761, "eval_nll_loss": 0.4741307497024536, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.040422990918159485, "eval_rewards/margins": 0.010585347190499306, "eval_rewards/rejected": -0.05100833997130394, "eval_runtime": 137.3356, "eval_samples_per_second": 14.519, "eval_steps_per_second": 0.459, "step": 900 }, { "epoch": 0.9543786051389617, "grad_norm": 1.980970750978968, "learning_rate": 6.629935441317959e-06, "log_odds_chosen": 0.4885142743587494, "log_odds_ratio": -0.6281706094741821, "logits/chosen": -2.5022482872009277, "logits/rejected": -2.4692506790161133, "logps/chosen": -0.8217445611953735, "logps/rejected": -1.1550103425979614, "loss": 0.5139, "nll_loss": 0.46534866094589233, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.04108722507953644, "rewards/margins": 0.016663286834955215, "rewards/rejected": -0.05775051191449165, "step": 910 }, { "epoch": 0.9648662821185108, "grad_norm": 1.9369311282677693, "learning_rate": 6.593804733957872e-06, "log_odds_chosen": 0.30783259868621826, "log_odds_ratio": -0.6484240293502808, "logits/chosen": -2.43939208984375, "logits/rejected": -2.4428539276123047, "logps/chosen": -0.7850558161735535, "logps/rejected": -0.9770357012748718, "loss": 0.4836, "nll_loss": 0.4291355013847351, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03925279527902603, "rewards/margins": 0.009598996490240097, "rewards/rejected": -0.04885178059339523, "step": 920 }, { "epoch": 0.9753539590980598, "grad_norm": 2.1104816487301523, "learning_rate": 6.55825835783953e-06, "log_odds_chosen": 0.2358274906873703, "log_odds_ratio": -0.6784078478813171, "logits/chosen": -2.5169990062713623, "logits/rejected": -2.4993927478790283, "logps/chosen": -0.8740841746330261, "logps/rejected": -1.0411127805709839, "loss": 0.5133, "nll_loss": 0.5242566466331482, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04370420426130295, "rewards/margins": 0.008351435884833336, "rewards/rejected": -0.05205564573407173, "step": 930 }, { "epoch": 0.9858416360776088, "grad_norm": 1.9809353614537575, "learning_rate": 6.523280730534423e-06, "log_odds_chosen": 0.25125178694725037, "log_odds_ratio": -0.6891010403633118, "logits/chosen": -2.52742862701416, "logits/rejected": -2.5163397789001465, "logps/chosen": -0.7782126069068909, "logps/rejected": -0.9240644574165344, "loss": 0.5102, "nll_loss": 0.478424072265625, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.038910627365112305, "rewards/margins": 0.007292595691978931, "rewards/rejected": -0.04620322585105896, "step": 940 }, { "epoch": 0.9963293130571579, "grad_norm": 2.052392197513496, "learning_rate": 6.488856845230502e-06, "log_odds_chosen": 0.22615018486976624, "log_odds_ratio": -0.7002879977226257, "logits/chosen": -2.4786956310272217, "logits/rejected": -2.447265625, "logps/chosen": -0.8600684404373169, "logps/rejected": -0.9971193075180054, "loss": 0.5383, "nll_loss": 0.5037115812301636, "rewards/accuracies": 0.5625, "rewards/chosen": -0.043003425002098083, "rewards/margins": 0.006852544844150543, "rewards/rejected": -0.04985596612095833, "step": 950 }, { "epoch": 0.9994756161510225, "step": 953, "total_flos": 0.0, "train_loss": 0.5642813587989287, "train_runtime": 20357.789, "train_samples_per_second": 2.997, "train_steps_per_second": 0.047 } ], "logging_steps": 10, "max_steps": 953, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }