{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.988190836088805, "eval_steps": 50, "global_step": 880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.005668398677373642, "grad_norm": 26.827203675535984, "learning_rate": 1.1363636363636363e-08, "logits": -1.3147305250167847, "logps": -88.0877456665039, "loss": 0.4113, "objective": 0.41588976979255676, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.41588976979255676, "step": 1 }, { "dpo_loss": 0.6931466460227966, "epoch": 0.02834199338686821, "grad_norm": 26.655974166157932, "learning_rate": 5.6818181818181815e-08, "logits": -1.3678570985794067, "logps": -84.42396545410156, "loss": 0.413, "objective": 0.3755497932434082, "ranking_idealized": 0.6145833134651184, "ranking_idealized_expo": 0.546875, "ranking_simple": 0.546875, "regularize": 0.3755497932434082, "step": 5 }, { "dpo_loss": 0.6916109323501587, "epoch": 0.05668398677373642, "grad_norm": 25.202984552553435, "learning_rate": 1.1363636363636363e-07, "logits": -1.446576714515686, "logps": -83.28290557861328, "loss": 0.4165, "objective": 0.4402167499065399, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.4402167499065399, "step": 10 }, { "dpo_loss": 0.6918571591377258, "epoch": 0.08502598016060463, "grad_norm": 24.8928017897937, "learning_rate": 1.7045454545454543e-07, "logits": -1.4129120111465454, "logps": -83.23918151855469, "loss": 0.423, "objective": 0.40991583466529846, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.40991583466529846, "step": 15 }, { "dpo_loss": 0.6913864612579346, "epoch": 0.11336797354747284, "grad_norm": 26.1438361746268, "learning_rate": 2.2727272727272726e-07, "logits": -1.405305027961731, "logps": -83.78267669677734, "loss": 0.4098, "objective": 0.4017895758152008, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.4017895758152008, "step": 20 }, { "dpo_loss": 0.6848570108413696, "epoch": 0.14170996693434104, "grad_norm": 26.79124275787855, "learning_rate": 2.840909090909091e-07, "logits": -1.4560821056365967, "logps": -83.52696990966797, "loss": 0.4034, "objective": 0.41992515325546265, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.41992515325546265, "step": 25 }, { "dpo_loss": 0.6844711303710938, "epoch": 0.17005196032120926, "grad_norm": 26.78495469951858, "learning_rate": 3.4090909090909085e-07, "logits": -1.4348876476287842, "logps": -84.22993469238281, "loss": 0.4013, "objective": 0.40435200929641724, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5416666865348816, "regularize": 0.40435200929641724, "step": 30 }, { "dpo_loss": 0.674633264541626, "epoch": 0.19839395370807747, "grad_norm": 27.550998188131874, "learning_rate": 3.977272727272727e-07, "logits": -1.4130500555038452, "logps": -82.98973846435547, "loss": 0.3925, "objective": 0.37177178263664246, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.512499988079071, "regularize": 0.37177178263664246, "step": 35 }, { "dpo_loss": 0.6748062372207642, "epoch": 0.22673594709494568, "grad_norm": 30.08966136803542, "learning_rate": 4.545454545454545e-07, "logits": -1.4084281921386719, "logps": -83.05668640136719, "loss": 0.4041, "objective": 0.4255501925945282, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.512499988079071, "regularize": 0.4255501925945282, "step": 40 }, { "dpo_loss": 0.6630504727363586, "epoch": 0.25507794048181387, "grad_norm": 25.26840087998978, "learning_rate": 5.113636363636363e-07, "logits": -1.5426502227783203, "logps": -84.47521209716797, "loss": 0.3947, "objective": 0.4412144422531128, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5458333492279053, "regularize": 0.4412144422531128, "step": 45 }, { "dpo_loss": 0.659989058971405, "epoch": 0.2834199338686821, "grad_norm": 24.465381128270387, "learning_rate": 5.681818181818182e-07, "logits": -1.4524168968200684, "logps": -82.95875549316406, "loss": 0.3854, "objective": 0.364622563123703, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.4583333432674408, "regularize": 0.364622563123703, "step": 50 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6886485815048218, "eval_logits": -1.4800517559051514, "eval_logps": -91.4064712524414, "eval_loss": 0.4056198000907898, "eval_objective": 0.4075882136821747, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5123966932296753, "eval_regularize": 0.4075882136821747, "eval_runtime": 265.1514, "eval_samples_per_second": 21.837, "eval_steps_per_second": 0.913, "step": 50 }, { "dpo_loss": 0.6636093258857727, "epoch": 0.3117619272555503, "grad_norm": 27.096857998186312, "learning_rate": 6.249999999999999e-07, "logits": -1.4970166683197021, "logps": -85.03699493408203, "loss": 0.3728, "objective": 0.3725493848323822, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.3725493848323822, "step": 55 }, { "dpo_loss": 0.6567211151123047, "epoch": 0.3401039206424185, "grad_norm": 25.695749312088278, "learning_rate": 6.818181818181817e-07, "logits": -1.4813398122787476, "logps": -84.4722671508789, "loss": 0.3599, "objective": 0.3475739657878876, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5333333611488342, "regularize": 0.3475739657878876, "step": 60 }, { "dpo_loss": 0.6518040895462036, "epoch": 0.3684459140292867, "grad_norm": 29.49986445883662, "learning_rate": 7.386363636363636e-07, "logits": -1.430372714996338, "logps": -84.72962188720703, "loss": 0.3497, "objective": 0.345612108707428, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 0.345612108707428, "step": 65 }, { "dpo_loss": 0.6528828740119934, "epoch": 0.39678790741615494, "grad_norm": 29.563000130373773, "learning_rate": 7.954545454545454e-07, "logits": -1.5054484605789185, "logps": -86.26591491699219, "loss": 0.35, "objective": 0.3871075510978699, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5416666865348816, "regularize": 0.3871075510978699, "step": 70 }, { "dpo_loss": 0.6483267545700073, "epoch": 0.42512990080302315, "grad_norm": 27.602858223257197, "learning_rate": 8.522727272727273e-07, "logits": -1.516791582107544, "logps": -86.8262710571289, "loss": 0.3468, "objective": 0.3712550401687622, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5833333134651184, "regularize": 0.3712550401687622, "step": 75 }, { "dpo_loss": 0.6363473534584045, "epoch": 0.45347189418989137, "grad_norm": 25.853451932249023, "learning_rate": 9.09090909090909e-07, "logits": -1.5554119348526, "logps": -85.4685287475586, "loss": 0.3352, "objective": 0.3362359404563904, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5416666865348816, "regularize": 0.336235910654068, "step": 80 }, { "dpo_loss": 0.6442821025848389, "epoch": 0.4818138875767596, "grad_norm": 25.41070923572626, "learning_rate": 9.65909090909091e-07, "logits": -1.5026181936264038, "logps": -84.45774841308594, "loss": 0.3304, "objective": 0.3429431617259979, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.550000011920929, "regularize": 0.3429431617259979, "step": 85 }, { "dpo_loss": 0.6335326433181763, "epoch": 0.5101558809636277, "grad_norm": 25.187750521174056, "learning_rate": 9.999842657116664e-07, "logits": -1.2913075685501099, "logps": -86.8448257446289, "loss": 0.3243, "objective": 0.32520177960395813, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5083333253860474, "regularize": 0.32520177960395813, "step": 90 }, { "dpo_loss": 0.6084260940551758, "epoch": 0.538497874350496, "grad_norm": 23.93476735734447, "learning_rate": 9.998072663403656e-07, "logits": -1.3773174285888672, "logps": -85.11380767822266, "loss": 0.3036, "objective": 0.3108121454715729, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.3108121454715729, "step": 95 }, { "dpo_loss": 0.6009453535079956, "epoch": 0.5668398677373642, "grad_norm": 25.488579442690856, "learning_rate": 9.99433669591504e-07, "logits": -1.4631216526031494, "logps": -85.5998764038086, "loss": 0.3126, "objective": 0.3375842273235321, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5625, "regularize": 0.3375842273235321, "step": 100 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6816912293434143, "eval_logits": -1.45261812210083, "eval_logps": -91.31664276123047, "eval_loss": 0.40215975046157837, "eval_objective": 0.400903582572937, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5206611752510071, "eval_regularize": 0.400903582572937, "eval_runtime": 259.1884, "eval_samples_per_second": 22.339, "eval_steps_per_second": 0.934, "step": 100 }, { "dpo_loss": 0.5999605059623718, "epoch": 0.5951818611242324, "grad_norm": 25.38952651860073, "learning_rate": 9.988636224180095e-07, "logits": -1.352739930152893, "logps": -85.40930938720703, "loss": 0.3097, "objective": 0.32598960399627686, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5791666507720947, "regularize": 0.32598960399627686, "step": 105 }, { "dpo_loss": 0.6067489981651306, "epoch": 0.6235238545111006, "grad_norm": 31.045039069385457, "learning_rate": 9.980973490458728e-07, "logits": -1.5531387329101562, "logps": -84.0550537109375, "loss": 0.3104, "objective": 0.3359374403953552, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.4958333373069763, "regularize": 0.33593741059303284, "step": 110 }, { "dpo_loss": 0.6095985770225525, "epoch": 0.6518658478979689, "grad_norm": 26.435670420498003, "learning_rate": 9.971351508859486e-07, "logits": -1.5276844501495361, "logps": -84.30924987792969, "loss": 0.291, "objective": 0.28773021697998047, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5041666626930237, "regularize": 0.28773021697998047, "step": 115 }, { "dpo_loss": 0.6103960871696472, "epoch": 0.680207841284837, "grad_norm": 26.942509852249753, "learning_rate": 9.959774064153975e-07, "logits": -1.4677897691726685, "logps": -84.61531066894531, "loss": 0.2837, "objective": 0.2627010643482208, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5541666746139526, "regularize": 0.2627010643482208, "step": 120 }, { "dpo_loss": 0.5971355438232422, "epoch": 0.7085498346717053, "grad_norm": 25.495357006548982, "learning_rate": 9.94624571028813e-07, "logits": -1.4407005310058594, "logps": -84.40795135498047, "loss": 0.288, "objective": 0.29481950402259827, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5874999761581421, "regularize": 0.29481950402259827, "step": 125 }, { "dpo_loss": 0.5917614102363586, "epoch": 0.7368918280585735, "grad_norm": 27.139835865074275, "learning_rate": 9.930771768590933e-07, "logits": -1.5837173461914062, "logps": -83.2771987915039, "loss": 0.2887, "objective": 0.2870228886604309, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.5041666626930237, "regularize": 0.2870228886604309, "step": 130 }, { "dpo_loss": 0.6036564111709595, "epoch": 0.7652338214454416, "grad_norm": 24.259859808790555, "learning_rate": 9.91335832568129e-07, "logits": -1.528158187866211, "logps": -85.43966674804688, "loss": 0.2694, "objective": 0.270797461271286, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5541666746139526, "regularize": 0.270797461271286, "step": 135 }, { "dpo_loss": 0.596954345703125, "epoch": 0.7935758148323099, "grad_norm": 26.42799993318966, "learning_rate": 9.894012231073895e-07, "logits": -1.4152525663375854, "logps": -86.42430114746094, "loss": 0.2606, "objective": 0.2631489038467407, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.550000011920929, "regularize": 0.2631489038467407, "step": 140 }, { "dpo_loss": 0.58833909034729, "epoch": 0.821917808219178, "grad_norm": 26.472189025522844, "learning_rate": 9.872741094484964e-07, "logits": -1.5059914588928223, "logps": -85.94861602783203, "loss": 0.2555, "objective": 0.2643609344959259, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5333333611488342, "regularize": 0.2643609344959259, "step": 145 }, { "dpo_loss": 0.5924276113510132, "epoch": 0.8502598016060463, "grad_norm": 25.826528962819687, "learning_rate": 9.849553282839024e-07, "logits": -1.4773136377334595, "logps": -84.33631134033203, "loss": 0.2481, "objective": 0.24327746033668518, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5874999761581421, "regularize": 0.243277445435524, "step": 150 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6853220462799072, "eval_logits": -1.478104829788208, "eval_logps": -93.32852935791016, "eval_loss": 0.4118410348892212, "eval_objective": 0.41562050580978394, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5185950398445129, "eval_regularize": 0.41562050580978394, "eval_runtime": 260.1091, "eval_samples_per_second": 22.26, "eval_steps_per_second": 0.93, "step": 150 }, { "dpo_loss": 0.5857201814651489, "epoch": 0.8786017949929145, "grad_norm": 24.421694763767686, "learning_rate": 9.824457916977784e-07, "logits": -1.4784348011016846, "logps": -84.23937225341797, "loss": 0.25, "objective": 0.24794721603393555, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5916666388511658, "regularize": 0.24794721603393555, "step": 155 }, { "dpo_loss": 0.5842316746711731, "epoch": 0.9069437883797827, "grad_norm": 24.297754190889687, "learning_rate": 9.797464868072486e-07, "logits": -1.379388689994812, "logps": -84.26329803466797, "loss": 0.2417, "objective": 0.23959442973136902, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5458333492279053, "regularize": 0.23959442973136902, "step": 160 }, { "dpo_loss": 0.5881075263023376, "epoch": 0.9352857817666509, "grad_norm": 25.046440958455594, "learning_rate": 9.768584753741134e-07, "logits": -1.3925925493240356, "logps": -85.05484771728516, "loss": 0.2445, "objective": 0.24838505685329437, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5583333373069763, "regularize": 0.24838504195213318, "step": 165 }, { "dpo_loss": 0.5687467455863953, "epoch": 0.9636277751535192, "grad_norm": 24.80826032024146, "learning_rate": 9.737828933872073e-07, "logits": -1.440019130706787, "logps": -85.22455596923828, "loss": 0.2525, "objective": 0.24621081352233887, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5625, "regularize": 0.24621081352233887, "step": 170 }, { "dpo_loss": 0.5792465209960938, "epoch": 0.9919697685403873, "grad_norm": 25.657531696623572, "learning_rate": 9.705209506155634e-07, "logits": -1.3882230520248413, "logps": -85.2247085571289, "loss": 0.2408, "objective": 0.2368970364332199, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6041666865348816, "regularize": 0.2368970364332199, "step": 175 }, { "dpo_loss": 0.5573223233222961, "epoch": 1.0203117619272555, "grad_norm": 24.441555112350308, "learning_rate": 9.670739301325534e-07, "logits": -1.5630497932434082, "logps": -84.3948745727539, "loss": 0.2102, "objective": 0.20754273235797882, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5958333611488342, "regularize": 0.20754273235797882, "step": 180 }, { "dpo_loss": 0.5467338562011719, "epoch": 1.0486537553141237, "grad_norm": 26.114706754447813, "learning_rate": 9.63443187811197e-07, "logits": -1.4042932987213135, "logps": -84.7653579711914, "loss": 0.214, "objective": 0.21694259345531464, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5458333492279053, "regularize": 0.21694259345531464, "step": 185 }, { "dpo_loss": 0.5574190020561218, "epoch": 1.076995748700992, "grad_norm": 25.20524724848005, "learning_rate": 9.596301517908328e-07, "logits": -1.4538909196853638, "logps": -85.65680694580078, "loss": 0.2007, "objective": 0.21142269670963287, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.625, "regularize": 0.21142269670963287, "step": 190 }, { "dpo_loss": 0.561899721622467, "epoch": 1.10533774208786, "grad_norm": 28.03205694511378, "learning_rate": 9.556363219153662e-07, "logits": -1.435767650604248, "logps": -84.88529968261719, "loss": 0.2057, "objective": 0.19679027795791626, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6166666746139526, "regularize": 0.19679027795791626, "step": 195 }, { "dpo_loss": 0.5534842014312744, "epoch": 1.1336797354747283, "grad_norm": 23.06275024905121, "learning_rate": 9.514632691433106e-07, "logits": -1.517577052116394, "logps": -83.62954711914062, "loss": 0.1986, "objective": 0.19466033577919006, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.574999988079071, "regularize": 0.19466033577919006, "step": 200 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6827520132064819, "eval_logits": -1.46909761428833, "eval_logps": -90.6331558227539, "eval_loss": 0.40533673763275146, "eval_objective": 0.40887078642845154, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5206611752510071, "eval_regularize": 0.40887078642845154, "eval_runtime": 260.5987, "eval_samples_per_second": 22.218, "eval_steps_per_second": 0.929, "step": 200 }, { "dpo_loss": 0.5494053363800049, "epoch": 1.1620217288615966, "grad_norm": 22.941534169012083, "learning_rate": 9.471126349298556e-07, "logits": -1.5020116567611694, "logps": -83.8444595336914, "loss": 0.1994, "objective": 0.19596201181411743, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5583333373069763, "regularize": 0.19596201181411743, "step": 205 }, { "dpo_loss": 0.5515065789222717, "epoch": 1.1903637222484649, "grad_norm": 26.741821520067802, "learning_rate": 9.425861305812081e-07, "logits": -1.4875836372375488, "logps": -83.98831176757812, "loss": 0.1895, "objective": 0.20510397851467133, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5541666746139526, "regularize": 0.20510397851467133, "step": 210 }, { "dpo_loss": 0.55607670545578, "epoch": 1.2187057156353331, "grad_norm": 23.43637893497653, "learning_rate": 9.378855365814557e-07, "logits": -1.4646224975585938, "logps": -83.52363586425781, "loss": 0.1889, "objective": 0.19153118133544922, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5708333253860474, "regularize": 0.19153118133544922, "step": 215 }, { "dpo_loss": 0.556377112865448, "epoch": 1.2470477090222012, "grad_norm": 26.789286245107157, "learning_rate": 9.330127018922193e-07, "logits": -1.4145793914794922, "logps": -82.84550476074219, "loss": 0.1925, "objective": 0.17143851518630981, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.6041666865348816, "ranking_simple": 0.6499999761581421, "regularize": 0.17143851518630981, "step": 220 }, { "dpo_loss": 0.5455420613288879, "epoch": 1.2753897024090695, "grad_norm": 25.237511413060258, "learning_rate": 9.279695432253708e-07, "logits": -1.4910824298858643, "logps": -84.51390075683594, "loss": 0.1898, "objective": 0.1823263168334961, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6083333492279053, "regularize": 0.1823263168334961, "step": 225 }, { "dpo_loss": 0.5552546381950378, "epoch": 1.3037316957959377, "grad_norm": 23.65942718982369, "learning_rate": 9.227580442891021e-07, "logits": -1.4593993425369263, "logps": -84.47645568847656, "loss": 0.1809, "objective": 0.17018872499465942, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.550000011920929, "regularize": 0.17018872499465942, "step": 230 }, { "dpo_loss": 0.5385202169418335, "epoch": 1.3320736891828058, "grad_norm": 25.266299893397434, "learning_rate": 9.173802550076401e-07, "logits": -1.5345088243484497, "logps": -82.98789978027344, "loss": 0.1789, "objective": 0.1734149307012558, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5833333134651184, "regularize": 0.1734149307012558, "step": 235 }, { "dpo_loss": 0.5434895157814026, "epoch": 1.360415682569674, "grad_norm": 25.750551600333242, "learning_rate": 9.118382907149163e-07, "logits": -1.4756948947906494, "logps": -84.32857513427734, "loss": 0.1742, "objective": 0.1837477833032608, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5583333373069763, "regularize": 0.1837477684020996, "step": 240 }, { "dpo_loss": 0.5604755878448486, "epoch": 1.3887576759565423, "grad_norm": 24.129154340629153, "learning_rate": 9.061343313225087e-07, "logits": -1.4909014701843262, "logps": -83.4426498413086, "loss": 0.1789, "objective": 0.1817345917224884, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.1817345917224884, "step": 245 }, { "dpo_loss": 0.5357322692871094, "epoch": 1.4170996693434104, "grad_norm": 24.16224594925354, "learning_rate": 9.002706204621802e-07, "logits": -1.4255733489990234, "logps": -82.65512084960938, "loss": 0.1805, "objective": 0.17317816615104675, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5541666746139526, "regularize": 0.17317816615104675, "step": 250 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6830819249153137, "eval_logits": -1.464825987815857, "eval_logps": -90.24966430664062, "eval_loss": 0.4085530936717987, "eval_objective": 0.4083588719367981, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.4083588719367981, "eval_runtime": 262.2655, "eval_samples_per_second": 22.077, "eval_steps_per_second": 0.923, "step": 250 }, { "dpo_loss": 0.5522000193595886, "epoch": 1.4454416627302786, "grad_norm": 23.544028131135565, "learning_rate": 8.942494646033554e-07, "logits": -1.428904414176941, "logps": -83.82772827148438, "loss": 0.1816, "objective": 0.16092044115066528, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.612500011920929, "regularize": 0.16092044115066528, "step": 255 }, { "dpo_loss": 0.5535964369773865, "epoch": 1.473783656117147, "grad_norm": 24.007017906906484, "learning_rate": 8.880732321458784e-07, "logits": -1.4904005527496338, "logps": -83.97267150878906, "loss": 0.1703, "objective": 0.16837134957313538, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.550000011920929, "regularize": 0.16837134957313538, "step": 260 }, { "dpo_loss": 0.5446482300758362, "epoch": 1.5021256495040152, "grad_norm": 24.30764382402002, "learning_rate": 8.817443524884117e-07, "logits": -1.4601694345474243, "logps": -82.12098693847656, "loss": 0.1781, "objective": 0.17031626403331757, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5708333253860474, "regularize": 0.17031626403331757, "step": 265 }, { "dpo_loss": 0.5536972284317017, "epoch": 1.5304676428908834, "grad_norm": 24.675134737686058, "learning_rate": 8.752653150728411e-07, "logits": -1.471502661705017, "logps": -84.13450622558594, "loss": 0.1758, "objective": 0.18668265640735626, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5708333253860474, "regularize": 0.18668265640735626, "step": 270 }, { "dpo_loss": 0.5547968745231628, "epoch": 1.5588096362777515, "grad_norm": 22.77808390233293, "learning_rate": 8.68638668405062e-07, "logits": -1.4670997858047485, "logps": -85.27931213378906, "loss": 0.171, "objective": 0.16611038148403168, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.16611038148403168, "step": 275 }, { "dpo_loss": 0.5309798717498779, "epoch": 1.5871516296646198, "grad_norm": 22.23255904480611, "learning_rate": 8.61867019052535e-07, "logits": -1.387014389038086, "logps": -83.47966766357422, "loss": 0.1731, "objective": 0.18033398687839508, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5874999761581421, "regularize": 0.18033398687839508, "step": 280 }, { "dpo_loss": 0.5369495749473572, "epoch": 1.615493623051488, "grad_norm": 24.7467519907843, "learning_rate": 8.549530306190014e-07, "logits": -1.4981027841567993, "logps": -85.08309936523438, "loss": 0.1613, "objective": 0.15606491267681122, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5958333611488342, "regularize": 0.15606491267681122, "step": 285 }, { "dpo_loss": 0.5465491414070129, "epoch": 1.643835616438356, "grad_norm": 22.280063793784098, "learning_rate": 8.478994226967638e-07, "logits": -1.5392872095108032, "logps": -82.96480560302734, "loss": 0.1639, "objective": 0.1686221808195114, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6041666865348816, "regularize": 0.1686221808195114, "step": 290 }, { "dpo_loss": 0.5326969623565674, "epoch": 1.6721776098252243, "grad_norm": 22.516708106368693, "learning_rate": 8.407089697969456e-07, "logits": -1.430370569229126, "logps": -81.40605926513672, "loss": 0.1651, "objective": 0.16238288581371307, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5666666626930237, "regularize": 0.16238288581371307, "step": 295 }, { "dpo_loss": 0.5438053011894226, "epoch": 1.7005196032120926, "grad_norm": 22.982971147438153, "learning_rate": 8.333845002581458e-07, "logits": -1.5061898231506348, "logps": -82.67247009277344, "loss": 0.1668, "objective": 0.19721931219100952, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6208333373069763, "regularize": 0.19721931219100952, "step": 300 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6841849088668823, "eval_logits": -1.476090669631958, "eval_logps": -89.86566162109375, "eval_loss": 0.4079909026622772, "eval_objective": 0.4113588035106659, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5206611752510071, "eval_regularize": 0.4113588035106659, "eval_runtime": 259.3673, "eval_samples_per_second": 22.324, "eval_steps_per_second": 0.933, "step": 300 }, { "dpo_loss": 0.5529495477676392, "epoch": 1.7288615965989607, "grad_norm": 23.962805989899444, "learning_rate": 8.259288951339232e-07, "logits": -1.4737364053726196, "logps": -83.48453521728516, "loss": 0.1635, "objective": 0.17988164722919464, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5583333373069763, "regularize": 0.17988164722919464, "step": 305 }, { "dpo_loss": 0.5436158776283264, "epoch": 1.7572035899858292, "grad_norm": 26.010266526035746, "learning_rate": 8.183450870595441e-07, "logits": -1.5402640104293823, "logps": -81.41146087646484, "loss": 0.1725, "objective": 0.16945843398571014, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.16945843398571014, "step": 310 }, { "dpo_loss": 0.5490608811378479, "epoch": 1.7855455833726972, "grad_norm": 23.214852755265355, "learning_rate": 8.106360590984404e-07, "logits": -1.4412391185760498, "logps": -82.86125946044922, "loss": 0.1609, "objective": 0.15798324346542358, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5666666626930237, "regularize": 0.15798324346542358, "step": 315 }, { "dpo_loss": 0.5580403208732605, "epoch": 1.8138875767595655, "grad_norm": 25.270172487230024, "learning_rate": 8.028048435688333e-07, "logits": -1.489629864692688, "logps": -84.82173156738281, "loss": 0.1562, "objective": 0.15719416737556458, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5958333611488342, "regularize": 0.15719416737556458, "step": 320 }, { "dpo_loss": 0.5307654142379761, "epoch": 1.8422295701464337, "grad_norm": 24.866617020536584, "learning_rate": 7.948545208509811e-07, "logits": -1.5223475694656372, "logps": -85.49372100830078, "loss": 0.1605, "objective": 0.15138211846351624, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6291666626930237, "regularize": 0.15138211846351624, "step": 325 }, { "dpo_loss": 0.5346109867095947, "epoch": 1.8705715635333018, "grad_norm": 27.77712533482603, "learning_rate": 7.86788218175523e-07, "logits": -1.282273769378662, "logps": -83.1356201171875, "loss": 0.1554, "objective": 0.14494642615318298, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5583333373069763, "regularize": 0.14494642615318298, "step": 330 }, { "dpo_loss": 0.5577983260154724, "epoch": 1.89891355692017, "grad_norm": 23.806319516884738, "learning_rate": 7.786091083933949e-07, "logits": -1.4557408094406128, "logps": -83.1150131225586, "loss": 0.1472, "objective": 0.14962820708751678, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.14962820708751678, "step": 335 }, { "dpo_loss": 0.548663318157196, "epoch": 1.9272555503070383, "grad_norm": 25.2807889158847, "learning_rate": 7.703204087277988e-07, "logits": -1.463193416595459, "logps": -85.10281372070312, "loss": 0.1416, "objective": 0.14199069142341614, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6166666746139526, "regularize": 0.14199069142341614, "step": 340 }, { "dpo_loss": 0.5481914281845093, "epoch": 1.9555975436939064, "grad_norm": 23.034113253398804, "learning_rate": 7.619253795087208e-07, "logits": -1.4545904397964478, "logps": -83.42992401123047, "loss": 0.1457, "objective": 0.13813456892967224, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5833333134651184, "regularize": 0.13813456892967224, "step": 345 }, { "dpo_loss": 0.5435228943824768, "epoch": 1.9839395370807746, "grad_norm": 25.493404234037047, "learning_rate": 7.534273228904915e-07, "logits": -1.3632704019546509, "logps": -84.23902893066406, "loss": 0.1476, "objective": 0.13394585251808167, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5625, "regularize": 0.13394585251808167, "step": 350 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6835209131240845, "eval_logits": -1.4348496198654175, "eval_logps": -89.60076904296875, "eval_loss": 0.4086475670337677, "eval_objective": 0.4084475636482239, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5216942429542542, "eval_regularize": 0.4084475636482239, "eval_runtime": 259.621, "eval_samples_per_second": 22.302, "eval_steps_per_second": 0.932, "step": 350 }, { "dpo_loss": 0.5331315994262695, "epoch": 2.012281530467643, "grad_norm": 22.16231721451118, "learning_rate": 7.448295815528956e-07, "logits": -1.3494775295257568, "logps": -82.90995788574219, "loss": 0.1455, "objective": 0.1512984335422516, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6000000238418579, "regularize": 0.1512984186410904, "step": 355 }, { "dpo_loss": 0.5351840853691101, "epoch": 2.040623523854511, "grad_norm": 25.1718748641759, "learning_rate": 7.361355373863413e-07, "logits": -1.393783688545227, "logps": -81.44464874267578, "loss": 0.1343, "objective": 0.1370130479335785, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.637499988079071, "regularize": 0.1370130479335785, "step": 360 }, { "dpo_loss": 0.5345187783241272, "epoch": 2.0689655172413794, "grad_norm": 24.179993370065525, "learning_rate": 7.273486101616056e-07, "logits": -1.474308729171753, "logps": -83.76331329345703, "loss": 0.1347, "objective": 0.1313287615776062, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6041666865348816, "regularize": 0.1313287615776062, "step": 365 }, { "dpo_loss": 0.5465765595436096, "epoch": 2.0973075106282475, "grad_norm": 23.72652550591992, "learning_rate": 7.184722561846797e-07, "logits": -1.4518685340881348, "logps": -81.55240631103516, "loss": 0.124, "objective": 0.1166418269276619, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5541666746139526, "regularize": 0.1166418269276619, "step": 370 }, { "dpo_loss": 0.5262054204940796, "epoch": 2.1256495040151155, "grad_norm": 24.679557221698076, "learning_rate": 7.095099669372443e-07, "logits": -1.4321234226226807, "logps": -83.55628967285156, "loss": 0.1283, "objective": 0.12942390143871307, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.6625000238418579, "regularize": 0.12942390143871307, "step": 375 }, { "dpo_loss": 0.5403919219970703, "epoch": 2.153991497401984, "grad_norm": 23.122876869258256, "learning_rate": 7.004652677033068e-07, "logits": -1.338428020477295, "logps": -82.6377182006836, "loss": 0.1281, "objective": 0.10954796522855759, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.10954796522855759, "step": 380 }, { "dpo_loss": 0.5505331754684448, "epoch": 2.182333490788852, "grad_norm": 22.25736511993951, "learning_rate": 6.913417161825449e-07, "logits": -1.4360421895980835, "logps": -84.50902557373047, "loss": 0.1236, "objective": 0.11411557346582413, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.574999988079071, "regularize": 0.11411556601524353, "step": 385 }, { "dpo_loss": 0.5398189425468445, "epoch": 2.21067548417572, "grad_norm": 23.82479611784211, "learning_rate": 6.821429010908971e-07, "logits": -1.336391806602478, "logps": -83.15116882324219, "loss": 0.1245, "objective": 0.1218627318739891, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5375000238418579, "regularize": 0.1218627318739891, "step": 390 }, { "dpo_loss": 0.5215297341346741, "epoch": 2.2390174775625886, "grad_norm": 22.591578381119685, "learning_rate": 6.728724407489553e-07, "logits": -1.3484855890274048, "logps": -83.57234954833984, "loss": 0.1263, "objective": 0.1272638440132141, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5916666388511658, "regularize": 0.12726382911205292, "step": 395 }, { "dpo_loss": 0.5246094465255737, "epoch": 2.2673594709494567, "grad_norm": 22.99471999109431, "learning_rate": 6.635339816587108e-07, "logits": -1.4181877374649048, "logps": -84.8980712890625, "loss": 0.1232, "objective": 0.1278635859489441, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.675000011920929, "regularize": 0.1278635859489441, "step": 400 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6825190186500549, "eval_logits": -1.4141640663146973, "eval_logps": -89.93671417236328, "eval_loss": 0.40635946393013, "eval_objective": 0.4059920310974121, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.4059920310974121, "eval_runtime": 259.3604, "eval_samples_per_second": 22.324, "eval_steps_per_second": 0.933, "step": 400 }, { "dpo_loss": 0.5345380902290344, "epoch": 2.295701464336325, "grad_norm": 22.452414561821904, "learning_rate": 6.541311970692162e-07, "logits": -1.484344720840454, "logps": -82.7432861328125, "loss": 0.1237, "objective": 0.1316702663898468, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6541666388511658, "regularize": 0.1316702663898468, "step": 405 }, { "dpo_loss": 0.5351517200469971, "epoch": 2.324043457723193, "grad_norm": 24.318684153528356, "learning_rate": 6.446677855317264e-07, "logits": -1.3660470247268677, "logps": -82.44485473632812, "loss": 0.1164, "objective": 0.11186593025922775, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5583333373069763, "regularize": 0.11186593025922775, "step": 410 }, { "dpo_loss": 0.5329793691635132, "epoch": 2.3523854511100613, "grad_norm": 22.50760313963993, "learning_rate": 6.351474694448864e-07, "logits": -1.437878131866455, "logps": -83.41373443603516, "loss": 0.1186, "objective": 0.12183640152215958, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5708333253860474, "regularize": 0.12183640152215958, "step": 415 }, { "dpo_loss": 0.5409477949142456, "epoch": 2.3807274444969297, "grad_norm": 23.39263075574448, "learning_rate": 6.255739935905395e-07, "logits": -1.349250078201294, "logps": -85.22098541259766, "loss": 0.1175, "objective": 0.10631230473518372, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.550000011920929, "regularize": 0.10631229728460312, "step": 420 }, { "dpo_loss": 0.5271558165550232, "epoch": 2.409069437883798, "grad_norm": 23.840070879325513, "learning_rate": 6.159511236607315e-07, "logits": -1.4124720096588135, "logps": -84.24110412597656, "loss": 0.1153, "objective": 0.11380250006914139, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5958333611488342, "regularize": 0.11380250006914139, "step": 425 }, { "dpo_loss": 0.5327500700950623, "epoch": 2.4374114312706663, "grad_norm": 22.9996288815754, "learning_rate": 6.062826447764883e-07, "logits": -1.4347702264785767, "logps": -84.58445739746094, "loss": 0.1076, "objective": 0.10175766050815582, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.10175765305757523, "step": 430 }, { "dpo_loss": 0.5315712690353394, "epoch": 2.4657534246575343, "grad_norm": 22.21161853218669, "learning_rate": 5.965723599989528e-07, "logits": -1.4599779844284058, "logps": -84.16157531738281, "loss": 0.1148, "objective": 0.11776481568813324, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5916666388511658, "regularize": 0.11776480078697205, "step": 435 }, { "dpo_loss": 0.5355103611946106, "epoch": 2.4940954180444024, "grad_norm": 23.031781845673333, "learning_rate": 5.868240888334652e-07, "logits": -1.385536789894104, "logps": -83.61788940429688, "loss": 0.1125, "objective": 0.11075066775083542, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5708333253860474, "regularize": 0.11075066775083542, "step": 440 }, { "dpo_loss": 0.5411112904548645, "epoch": 2.5224374114312704, "grad_norm": 25.203231448824464, "learning_rate": 5.770416657271728e-07, "logits": -1.4106037616729736, "logps": -81.53707885742188, "loss": 0.1119, "objective": 0.11902200430631638, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6041666865348816, "regularize": 0.11902199685573578, "step": 445 }, { "dpo_loss": 0.55417400598526, "epoch": 2.550779404818139, "grad_norm": 24.455868446022734, "learning_rate": 5.67228938560766e-07, "logits": -1.4431836605072021, "logps": -83.54483795166016, "loss": 0.1085, "objective": 0.10727948695421219, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5333333611488342, "regularize": 0.10727948695421219, "step": 450 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6829083561897278, "eval_logits": -1.4380848407745361, "eval_logps": -90.61122131347656, "eval_loss": 0.40571001172065735, "eval_objective": 0.406777560710907, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.406777560710907, "eval_runtime": 259.0425, "eval_samples_per_second": 22.352, "eval_steps_per_second": 0.934, "step": 450 }, { "dpo_loss": 0.5320044159889221, "epoch": 2.579121398205007, "grad_norm": 22.906053050143626, "learning_rate": 5.573897671349268e-07, "logits": -1.4764381647109985, "logps": -84.27240753173828, "loss": 0.1117, "objective": 0.11940006166696548, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.574999988079071, "regularize": 0.11940006166696548, "step": 455 }, { "dpo_loss": 0.5233482122421265, "epoch": 2.6074633915918755, "grad_norm": 22.258361780067798, "learning_rate": 5.475280216520912e-07, "logits": -1.5429632663726807, "logps": -84.30569458007812, "loss": 0.1103, "objective": 0.10580132901668549, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5958333611488342, "regularize": 0.10580132901668549, "step": 460 }, { "dpo_loss": 0.5289517641067505, "epoch": 2.6358053849787435, "grad_norm": 23.240912033270092, "learning_rate": 5.376475811941191e-07, "logits": -1.428727388381958, "logps": -83.95030212402344, "loss": 0.1071, "objective": 0.10987317562103271, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5541666746139526, "regularize": 0.10987316071987152, "step": 465 }, { "dpo_loss": 0.5318377614021301, "epoch": 2.6641473783656116, "grad_norm": 24.64272982925985, "learning_rate": 5.277523321964701e-07, "logits": -1.4431354999542236, "logps": -83.10697937011719, "loss": 0.105, "objective": 0.1006205826997757, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.1006205826997757, "step": 470 }, { "dpo_loss": 0.5384759306907654, "epoch": 2.69248937175248, "grad_norm": 21.994194573090148, "learning_rate": 5.178461669194903e-07, "logits": -1.4019439220428467, "logps": -82.92670440673828, "loss": 0.101, "objective": 0.0988389179110527, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5916666388511658, "regularize": 0.0988389179110527, "step": 475 }, { "dpo_loss": 0.539698600769043, "epoch": 2.720831365139348, "grad_norm": 24.874583032447394, "learning_rate": 5.07932981917404e-07, "logits": -1.5038942098617554, "logps": -82.17936706542969, "loss": 0.1017, "objective": 0.10505501180887222, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5791666507720947, "regularize": 0.10505500435829163, "step": 480 }, { "dpo_loss": 0.5295734405517578, "epoch": 2.7491733585262166, "grad_norm": 24.131350896743502, "learning_rate": 4.980166765056193e-07, "logits": -1.4220199584960938, "logps": -84.46988677978516, "loss": 0.1033, "objective": 0.10565243661403656, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5958333611488342, "regularize": 0.10565243661403656, "step": 485 }, { "dpo_loss": 0.5229516625404358, "epoch": 2.7775153519130846, "grad_norm": 23.380731245805677, "learning_rate": 4.881011512269463e-07, "logits": -1.4164656400680542, "logps": -82.1783676147461, "loss": 0.1056, "objective": 0.10975264012813568, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5958333611488342, "regularize": 0.10975264012813568, "step": 490 }, { "dpo_loss": 0.536858856678009, "epoch": 2.8058573452999527, "grad_norm": 25.010956720921584, "learning_rate": 4.78190306317332e-07, "logits": -1.4320250749588013, "logps": -81.11976623535156, "loss": 0.0977, "objective": 0.09322524815797806, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5666666626930237, "regularize": 0.09322523325681686, "step": 495 }, { "dpo_loss": 0.5231731534004211, "epoch": 2.8341993386868207, "grad_norm": 24.981319167329183, "learning_rate": 4.682880401717177e-07, "logits": -1.479564905166626, "logps": -80.21460723876953, "loss": 0.099, "objective": 0.09580207616090775, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6083333492279053, "regularize": 0.09580207616090775, "step": 500 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6836758255958557, "eval_logits": -1.4538483619689941, "eval_logps": -89.78665924072266, "eval_loss": 0.4075116813182831, "eval_objective": 0.40899595618247986, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.40899595618247986, "eval_runtime": 259.2881, "eval_samples_per_second": 22.33, "eval_steps_per_second": 0.933, "step": 500 }, { "dpo_loss": 0.5333107113838196, "epoch": 2.862541332073689, "grad_norm": 22.440897537859303, "learning_rate": 4.5839824781061886e-07, "logits": -1.4319252967834473, "logps": -82.19851684570312, "loss": 0.0974, "objective": 0.09931109100580215, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.612500011920929, "regularize": 0.09931109100580215, "step": 505 }, { "dpo_loss": 0.5374515056610107, "epoch": 2.8908833254605573, "grad_norm": 22.71050128727261, "learning_rate": 4.4852481934803277e-07, "logits": -1.3620020151138306, "logps": -82.26110076904297, "loss": 0.0964, "objective": 0.10236553847789764, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5916666388511658, "regularize": 0.10236553847789764, "step": 510 }, { "dpo_loss": 0.5393768548965454, "epoch": 2.9192253188474258, "grad_norm": 26.294279777028603, "learning_rate": 4.3867163846127674e-07, "logits": -1.5220664739608765, "logps": -82.21379852294922, "loss": 0.0962, "objective": 0.09978827089071274, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5833333134651184, "regularize": 0.09978827089071274, "step": 515 }, { "dpo_loss": 0.5407862067222595, "epoch": 2.947567312234294, "grad_norm": 22.719373903401866, "learning_rate": 4.2884258086335745e-07, "logits": -1.4105883836746216, "logps": -84.40800476074219, "loss": 0.0917, "objective": 0.08780403435230255, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6000000238418579, "regularize": 0.08780403435230255, "step": 520 }, { "dpo_loss": 0.5382903814315796, "epoch": 2.975909305621162, "grad_norm": 22.439739653406917, "learning_rate": 4.1904151277847305e-07, "logits": -1.3989008665084839, "logps": -83.13529205322266, "loss": 0.0909, "objective": 0.10328014940023422, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.637499988079071, "regularize": 0.10328014940023422, "step": 525 }, { "dpo_loss": 0.5224732756614685, "epoch": 3.0042512990080303, "grad_norm": 23.2445043242505, "learning_rate": 4.092722894212487e-07, "logits": -1.4099732637405396, "logps": -82.2646484375, "loss": 0.0906, "objective": 0.08990009129047394, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6000000238418579, "regularize": 0.08990008383989334, "step": 530 }, { "dpo_loss": 0.5297616720199585, "epoch": 3.0325932923948984, "grad_norm": 24.595241433656245, "learning_rate": 3.995387534803005e-07, "logits": -1.4481351375579834, "logps": -84.04501342773438, "loss": 0.0863, "objective": 0.09028714150190353, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6208333373069763, "regularize": 0.09028714150190353, "step": 535 }, { "dpo_loss": 0.5298644304275513, "epoch": 3.0609352857817664, "grad_norm": 22.819470538427282, "learning_rate": 3.8984473360672967e-07, "logits": -1.5335410833358765, "logps": -82.01764678955078, "loss": 0.0786, "objective": 0.07253900170326233, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6291666626930237, "regularize": 0.07253900170326233, "step": 540 }, { "dpo_loss": 0.5357497930526733, "epoch": 3.089277279168635, "grad_norm": 23.587959979388312, "learning_rate": 3.801940429081345e-07, "logits": -1.475661039352417, "logps": -83.04609680175781, "loss": 0.0786, "objective": 0.08452685922384262, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5666666626930237, "regularize": 0.08452685922384262, "step": 545 }, { "dpo_loss": 0.5293916463851929, "epoch": 3.117619272555503, "grad_norm": 23.742387802519247, "learning_rate": 3.7059047744873955e-07, "logits": -1.3145067691802979, "logps": -83.14439392089844, "loss": 0.0841, "objective": 0.07637524604797363, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.612500011920929, "regularize": 0.07637524604797363, "step": 550 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 0.6836426854133606, "eval_logits": -1.4287773370742798, "eval_logps": -89.19234466552734, "eval_loss": 0.4074074625968933, "eval_objective": 0.4091208279132843, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5268595218658447, "eval_regularize": 0.4091208279132843, "eval_runtime": 259.888, "eval_samples_per_second": 22.279, "eval_steps_per_second": 0.931, "step": 550 }, { "dpo_loss": 0.5199671983718872, "epoch": 3.1459612659423715, "grad_norm": 23.606389156724106, "learning_rate": 3.6103781475622786e-07, "logits": -1.4020836353302002, "logps": -83.6429214477539, "loss": 0.0826, "objective": 0.08424239605665207, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.637499988079071, "regularize": 0.08424239605665207, "step": 555 }, { "dpo_loss": 0.5297064185142517, "epoch": 3.1743032593292395, "grad_norm": 21.283296032324174, "learning_rate": 3.5153981233586274e-07, "logits": -1.375638484954834, "logps": -80.67549896240234, "loss": 0.0764, "objective": 0.0754186362028122, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5625, "regularize": 0.0754186362028122, "step": 560 }, { "dpo_loss": 0.5281550884246826, "epoch": 3.2026452527161076, "grad_norm": 25.383548239078706, "learning_rate": 3.421002061924876e-07, "logits": -1.4403051137924194, "logps": -82.08113098144531, "loss": 0.0745, "objective": 0.0825800895690918, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6333333253860474, "regularize": 0.0825800821185112, "step": 565 }, { "dpo_loss": 0.5239064693450928, "epoch": 3.230987246102976, "grad_norm": 25.606035120731306, "learning_rate": 3.327227093609824e-07, "logits": -1.3596783876419067, "logps": -82.14395141601562, "loss": 0.0748, "objective": 0.07690493017435074, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5708333253860474, "regularize": 0.07690493017435074, "step": 570 }, { "dpo_loss": 0.5288205146789551, "epoch": 3.259329239489844, "grad_norm": 21.36265788871065, "learning_rate": 3.234110104457536e-07, "logits": -1.4363545179367065, "logps": -82.7227554321289, "loss": 0.0765, "objective": 0.08387748897075653, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5833333134651184, "regularize": 0.08387748897075653, "step": 575 }, { "dpo_loss": 0.5455772876739502, "epoch": 3.287671232876712, "grad_norm": 22.23742629967835, "learning_rate": 3.141687721698363e-07, "logits": -1.4502298831939697, "logps": -83.70122528076172, "loss": 0.074, "objective": 0.0667726993560791, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.612500011920929, "regularize": 0.0667726919054985, "step": 580 }, { "dpo_loss": 0.5377206206321716, "epoch": 3.3160132262635806, "grad_norm": 21.976427115545793, "learning_rate": 3.049996299341742e-07, "logits": -1.478832483291626, "logps": -84.10258483886719, "loss": 0.074, "objective": 0.07396882027387619, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6291666626930237, "regularize": 0.07396882027387619, "step": 585 }, { "dpo_loss": 0.540601372718811, "epoch": 3.3443552196504487, "grad_norm": 24.248150339564425, "learning_rate": 2.959071903876486e-07, "logits": -1.490022897720337, "logps": -84.40371704101562, "loss": 0.0726, "objective": 0.06912810355424881, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5833333134651184, "regularize": 0.06912810355424881, "step": 590 }, { "dpo_loss": 0.530450165271759, "epoch": 3.372697213037317, "grad_norm": 23.904834128431904, "learning_rate": 2.86895030008416e-07, "logits": -1.4088099002838135, "logps": -83.5683822631836, "loss": 0.0716, "objective": 0.06921317428350449, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.06921316683292389, "step": 595 }, { "dpo_loss": 0.510923445224762, "epoch": 3.4010392064241852, "grad_norm": 21.999466319441446, "learning_rate": 2.779666936971129e-07, "logits": -1.4195644855499268, "logps": -83.0455551147461, "loss": 0.0673, "objective": 0.06648312509059906, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5874999761581421, "regularize": 0.06648311764001846, "step": 600 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 0.6824304461479187, "eval_logits": -1.4325991868972778, "eval_logps": -89.83067321777344, "eval_loss": 0.40557217597961426, "eval_objective": 0.40685591101646423, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.40685591101646423, "eval_runtime": 259.0599, "eval_samples_per_second": 22.35, "eval_steps_per_second": 0.934, "step": 600 }, { "dpo_loss": 0.5408468246459961, "epoch": 3.4293811998110533, "grad_norm": 21.826287125403734, "learning_rate": 2.6912569338248315e-07, "logits": -1.4806511402130127, "logps": -85.08236694335938, "loss": 0.0687, "objective": 0.0708792433142662, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5874999761581421, "regularize": 0.0708792433142662, "step": 605 }, { "dpo_loss": 0.5326560139656067, "epoch": 3.4577231931979218, "grad_norm": 23.721876415078565, "learning_rate": 2.603755066399718e-07, "logits": -1.4362066984176636, "logps": -83.59281158447266, "loss": 0.0693, "objective": 0.06495842337608337, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.574999988079071, "regularize": 0.06495841592550278, "step": 610 }, { "dpo_loss": 0.5220057964324951, "epoch": 3.48606518658479, "grad_norm": 23.867397255620617, "learning_rate": 2.517195753238345e-07, "logits": -1.459093451499939, "logps": -83.89041137695312, "loss": 0.0677, "objective": 0.06726350635290146, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5458333492279053, "regularize": 0.06726350635290146, "step": 615 }, { "dpo_loss": 0.5138709545135498, "epoch": 3.514407179971658, "grad_norm": 22.48517117265223, "learning_rate": 2.4316130421329696e-07, "logits": -1.3361726999282837, "logps": -83.23828887939453, "loss": 0.0661, "objective": 0.05854518711566925, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.058545153588056564, "step": 620 }, { "dpo_loss": 0.5306848287582397, "epoch": 3.5427491733585263, "grad_norm": 22.374230054745578, "learning_rate": 2.3470405967329604e-07, "logits": -1.406466007232666, "logps": -82.32576751708984, "loss": 0.0639, "objective": 0.06265277415513992, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6041666865348816, "regularize": 0.06265277415513992, "step": 625 }, { "dpo_loss": 0.5401536226272583, "epoch": 3.5710911667453944, "grad_norm": 22.323503974192004, "learning_rate": 2.2635116833033392e-07, "logits": -1.4880479574203491, "logps": -82.74535369873047, "loss": 0.0671, "objective": 0.06858905404806137, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5874999761581421, "regularize": 0.06858905404806137, "step": 630 }, { "dpo_loss": 0.5335288643836975, "epoch": 3.5994331601322624, "grad_norm": 22.950166480099814, "learning_rate": 2.181059157639598e-07, "logits": -1.426721215248108, "logps": -82.85971069335938, "loss": 0.06, "objective": 0.0622558668255806, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5708333253860474, "regularize": 0.062255859375, "step": 635 }, { "dpo_loss": 0.5119226574897766, "epoch": 3.627775153519131, "grad_norm": 25.079864254767315, "learning_rate": 2.0997154521440097e-07, "logits": -1.3697155714035034, "logps": -83.90760803222656, "loss": 0.0613, "objective": 0.0635208860039711, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5458333492279053, "regularize": 0.0635208785533905, "step": 640 }, { "dpo_loss": 0.522363007068634, "epoch": 3.656117146905999, "grad_norm": 22.441342121743332, "learning_rate": 2.0195125630684428e-07, "logits": -1.3928742408752441, "logps": -81.88297271728516, "loss": 0.0634, "objective": 0.05965565890073776, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.612500011920929, "regularize": 0.05965564027428627, "step": 645 }, { "dpo_loss": 0.5373592376708984, "epoch": 3.6844591402928675, "grad_norm": 22.133762729051785, "learning_rate": 1.9404820379287672e-07, "logits": -1.3841991424560547, "logps": -83.1523208618164, "loss": 0.0589, "objective": 0.055038776248693466, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.055038776248693466, "step": 650 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 0.6828624606132507, "eval_logits": -1.4302468299865723, "eval_logps": -89.47576904296875, "eval_loss": 0.40598276257514954, "eval_objective": 0.4077259600162506, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.4077259600162506, "eval_runtime": 258.9725, "eval_samples_per_second": 22.358, "eval_steps_per_second": 0.934, "step": 650 }, { "dpo_loss": 0.5351348519325256, "epoch": 3.7128011336797355, "grad_norm": 23.905512006208795, "learning_rate": 1.8626549630957395e-07, "logits": -1.429569125175476, "logps": -82.42403411865234, "loss": 0.0624, "objective": 0.05734870210289955, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5958333611488342, "regularize": 0.05734868720173836, "step": 655 }, { "dpo_loss": 0.5322324633598328, "epoch": 3.7411431270666036, "grad_norm": 24.42468424510045, "learning_rate": 1.7860619515673032e-07, "logits": -1.5189285278320312, "logps": -83.2733383178711, "loss": 0.0612, "objective": 0.06605425477027893, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.625, "regularize": 0.06605424731969833, "step": 660 }, { "dpo_loss": 0.5305153131484985, "epoch": 3.769485120453472, "grad_norm": 21.98557345680479, "learning_rate": 1.7107331309270684e-07, "logits": -1.4122134447097778, "logps": -83.17848205566406, "loss": 0.0579, "objective": 0.05437133088707924, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5916666388511658, "regularize": 0.05437132343649864, "step": 665 }, { "dpo_loss": 0.5314101576805115, "epoch": 3.79782711384034, "grad_norm": 22.57049790061395, "learning_rate": 1.6366981314937372e-07, "logits": -1.5129222869873047, "logps": -83.30918884277344, "loss": 0.0549, "objective": 0.06075560674071312, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.675000011920929, "regularize": 0.06075560301542282, "step": 670 }, { "dpo_loss": 0.5331992506980896, "epoch": 3.826169107227208, "grad_norm": 21.51450391411621, "learning_rate": 1.5639860746661338e-07, "logits": -1.464658498764038, "logps": -82.55012512207031, "loss": 0.0562, "objective": 0.05308786779642105, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6083333492279053, "regularize": 0.05308786407113075, "step": 675 }, { "dpo_loss": 0.544487714767456, "epoch": 3.8545111006140766, "grad_norm": 21.91828532034966, "learning_rate": 1.492625561468393e-07, "logits": -1.401973009109497, "logps": -83.26588439941406, "loss": 0.0543, "objective": 0.055845096707344055, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5833333134651184, "regularize": 0.05584508553147316, "step": 680 }, { "dpo_loss": 0.5221087336540222, "epoch": 3.8828530940009447, "grad_norm": 23.338800601233537, "learning_rate": 1.4226446612998671e-07, "logits": -1.483197569847107, "logps": -82.65924835205078, "loss": 0.0543, "objective": 0.04644104465842247, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5958333611488342, "regularize": 0.04644103720784187, "step": 685 }, { "dpo_loss": 0.5242043137550354, "epoch": 3.9111950873878127, "grad_norm": 22.026766940460053, "learning_rate": 1.3540709008941147e-07, "logits": -1.449702501296997, "logps": -81.98009490966797, "loss": 0.0547, "objective": 0.055739615112543106, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6416666507720947, "regularize": 0.05573960393667221, "step": 690 }, { "dpo_loss": 0.5308277606964111, "epoch": 3.9395370807746812, "grad_norm": 22.736825591526987, "learning_rate": 1.2869312534913685e-07, "logits": -1.3683240413665771, "logps": -83.3951187133789, "loss": 0.056, "objective": 0.05744828283786774, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.05744827911257744, "step": 695 }, { "dpo_loss": 0.5327464938163757, "epoch": 3.9678790741615493, "grad_norm": 24.974758066705547, "learning_rate": 1.2212521282287093e-07, "logits": -1.416201114654541, "logps": -83.47090148925781, "loss": 0.0551, "objective": 0.05039297044277191, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.574999988079071, "regularize": 0.05039296671748161, "step": 700 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 0.683082640171051, "eval_logits": -1.4301180839538574, "eval_logps": -90.06600952148438, "eval_loss": 0.40649789571762085, "eval_objective": 0.4080060124397278, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.4080060124397278, "eval_runtime": 258.866, "eval_samples_per_second": 22.367, "eval_steps_per_second": 0.935, "step": 700 }, { "dpo_loss": 0.5257295966148376, "epoch": 3.9962210675484178, "grad_norm": 21.66945207844546, "learning_rate": 1.15705935975212e-07, "logits": -1.3355560302734375, "logps": -81.95101928710938, "loss": 0.0536, "objective": 0.04855410382151604, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6166666746139526, "regularize": 0.04855410382151604, "step": 705 }, { "dpo_loss": 0.5204980373382568, "epoch": 4.024563060935286, "grad_norm": 21.87585318414452, "learning_rate": 1.094378198054533e-07, "logits": -1.4359726905822754, "logps": -83.67707061767578, "loss": 0.0474, "objective": 0.05088849365711212, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5708333253860474, "regularize": 0.050888482481241226, "step": 710 }, { "dpo_loss": 0.5301558375358582, "epoch": 4.052905054322154, "grad_norm": 22.01280193333486, "learning_rate": 1.0332332985438247e-07, "logits": -1.3890125751495361, "logps": -83.36654663085938, "loss": 0.0434, "objective": 0.040184516459703445, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6041666865348816, "regularize": 0.04018450155854225, "step": 715 }, { "dpo_loss": 0.5191416144371033, "epoch": 4.081247047709022, "grad_norm": 21.943342871470353, "learning_rate": 9.736487123447068e-08, "logits": -1.3216856718063354, "logps": -85.42113494873047, "loss": 0.0441, "objective": 0.03967616334557533, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.039676155894994736, "step": 720 }, { "dpo_loss": 0.5419493913650513, "epoch": 4.109589041095891, "grad_norm": 22.065151941072486, "learning_rate": 9.156478768383058e-08, "logits": -1.4097427129745483, "logps": -83.27389526367188, "loss": 0.0477, "objective": 0.04659968614578247, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5874999761581421, "regularize": 0.046599678695201874, "step": 725 }, { "dpo_loss": 0.5275304317474365, "epoch": 4.137931034482759, "grad_norm": 22.997003588267155, "learning_rate": 8.592536064431466e-08, "logits": -1.4810242652893066, "logps": -83.33085632324219, "loss": 0.0479, "objective": 0.05003201588988304, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5916666388511658, "regularize": 0.05003199726343155, "step": 730 }, { "dpo_loss": 0.5354489684104919, "epoch": 4.166273027869627, "grad_norm": 22.750124706779673, "learning_rate": 8.044880836411888e-08, "logits": -1.3749909400939941, "logps": -84.28314971923828, "loss": 0.042, "objective": 0.04194118455052376, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6041666865348816, "regularize": 0.04194117337465286, "step": 735 }, { "dpo_loss": 0.5109390616416931, "epoch": 4.194615021256495, "grad_norm": 23.35643629791226, "learning_rate": 7.513728502524286e-08, "logits": -1.3980611562728882, "logps": -83.87706756591797, "loss": 0.0437, "objective": 0.042474415153265, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6041666865348816, "regularize": 0.0424744077026844, "step": 740 }, { "dpo_loss": 0.5253542065620422, "epoch": 4.222957014643363, "grad_norm": 22.418675908813192, "learning_rate": 6.999287989614971e-08, "logits": -1.4651761054992676, "logps": -81.21513366699219, "loss": 0.0406, "objective": 0.04062732681632042, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.574999988079071, "regularize": 0.040627315640449524, "step": 745 }, { "dpo_loss": 0.5217363834381104, "epoch": 4.251299008030231, "grad_norm": 22.888185894990265, "learning_rate": 6.501761650996052e-08, "logits": -1.5698094367980957, "logps": -83.2958984375, "loss": 0.042, "objective": 0.045288145542144775, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.04528813809156418, "step": 750 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 0.6830218434333801, "eval_logits": -1.4307194948196411, "eval_logps": -90.04474639892578, "eval_loss": 0.4063892364501953, "eval_objective": 0.4078083634376526, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.4078083634376526, "eval_runtime": 258.9989, "eval_samples_per_second": 22.355, "eval_steps_per_second": 0.934, "step": 750 }, { "dpo_loss": 0.5249465107917786, "epoch": 4.2796410014171, "grad_norm": 22.190575430128455, "learning_rate": 6.021345186850418e-08, "logits": -1.4760249853134155, "logps": -83.12273406982422, "loss": 0.0418, "objective": 0.04030155390501022, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6166666746139526, "regularize": 0.04030154272913933, "step": 755 }, { "dpo_loss": 0.5280516147613525, "epoch": 4.307982994803968, "grad_norm": 22.195011354775016, "learning_rate": 5.5582275672538316e-08, "logits": -1.460343837738037, "logps": -83.6526870727539, "loss": 0.0395, "objective": 0.040188662707805634, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6458333134651184, "regularize": 0.04018864780664444, "step": 760 }, { "dpo_loss": 0.508765459060669, "epoch": 4.336324988190836, "grad_norm": 21.99198419312676, "learning_rate": 5.112590957844232e-08, "logits": -1.4831253290176392, "logps": -83.9940414428711, "loss": 0.0416, "objective": 0.03937076777219772, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6291666626930237, "regularize": 0.03937075287103653, "step": 765 }, { "dpo_loss": 0.5139289498329163, "epoch": 4.364666981577704, "grad_norm": 22.21570497564684, "learning_rate": 4.684610648167503e-08, "logits": -1.355908751487732, "logps": -82.18904113769531, "loss": 0.0418, "objective": 0.041529521346092224, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6000000238418579, "regularize": 0.04152949899435043, "step": 770 }, { "dpo_loss": 0.5221685171127319, "epoch": 4.393008974964572, "grad_norm": 21.306801693131447, "learning_rate": 4.274454982728032e-08, "logits": -1.4285643100738525, "logps": -83.1854476928711, "loss": 0.0394, "objective": 0.04110860824584961, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5958333611488342, "regularize": 0.041108593344688416, "step": 775 }, { "dpo_loss": 0.5304800868034363, "epoch": 4.42135096835144, "grad_norm": 21.938217857408958, "learning_rate": 3.882285294770937e-08, "logits": -1.4632736444473267, "logps": -81.85124969482422, "loss": 0.0379, "objective": 0.03418119251728058, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5666666626930237, "regularize": 0.03418118134140968, "step": 780 }, { "dpo_loss": 0.5404612421989441, "epoch": 4.449692961738309, "grad_norm": 21.77705913902379, "learning_rate": 3.508255842822255e-08, "logits": -1.4751582145690918, "logps": -81.96646118164062, "loss": 0.0448, "objective": 0.04277818650007248, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6041666865348816, "regularize": 0.04277818277478218, "step": 785 }, { "dpo_loss": 0.5209127068519592, "epoch": 4.478034955125177, "grad_norm": 21.724227546519376, "learning_rate": 3.15251375001192e-08, "logits": -1.4253805875778198, "logps": -84.63212585449219, "loss": 0.0402, "objective": 0.050088923424482346, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.625, "regularize": 0.05008890852332115, "step": 790 }, { "dpo_loss": 0.5272155404090881, "epoch": 4.506376948512045, "grad_norm": 21.960441297110094, "learning_rate": 2.8151989462033787e-08, "logits": -1.3359031677246094, "logps": -84.30043029785156, "loss": 0.0412, "objective": 0.03479573875665665, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5916666388511658, "regularize": 0.034795720130205154, "step": 795 }, { "dpo_loss": 0.5258675813674927, "epoch": 4.534718941898913, "grad_norm": 22.834668811719133, "learning_rate": 2.4964441129527335e-08, "logits": -1.3358808755874634, "logps": -83.53750610351562, "loss": 0.0411, "objective": 0.04309748858213425, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6083333492279053, "regularize": 0.04309746250510216, "step": 800 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 0.6830146908760071, "eval_logits": -1.431044578552246, "eval_logps": -90.11402893066406, "eval_loss": 0.406222939491272, "eval_objective": 0.4077996015548706, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.4077996015548706, "eval_runtime": 258.8062, "eval_samples_per_second": 22.372, "eval_steps_per_second": 0.935, "step": 800 }, { "dpo_loss": 0.5273416042327881, "epoch": 4.563060935285781, "grad_norm": 21.794535718115338, "learning_rate": 2.1963746313188757e-08, "logits": -1.4133697748184204, "logps": -82.60270690917969, "loss": 0.0414, "objective": 0.046149447560310364, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.550000011920929, "regularize": 0.04614944010972977, "step": 805 }, { "dpo_loss": 0.5305873155593872, "epoch": 4.59140292867265, "grad_norm": 21.298734472415376, "learning_rate": 1.915108532545351e-08, "logits": -1.481737494468689, "logps": -82.04961395263672, "loss": 0.0395, "objective": 0.03058464638888836, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.5333333611488342, "regularize": 0.030584635213017464, "step": 810 }, { "dpo_loss": 0.5338551998138428, "epoch": 4.619744922059518, "grad_norm": 21.722779837853974, "learning_rate": 1.6527564516331638e-08, "logits": -1.3470157384872437, "logps": -83.43151092529297, "loss": 0.0369, "objective": 0.030139055103063583, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.6708333492279053, "regularize": 0.030139045789837837, "step": 815 }, { "dpo_loss": 0.5377717614173889, "epoch": 4.648086915446386, "grad_norm": 23.027732641639304, "learning_rate": 1.4094215838229172e-08, "logits": -1.439835786819458, "logps": -83.44994354248047, "loss": 0.0373, "objective": 0.03681868314743042, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.612500011920929, "regularize": 0.036818671971559525, "step": 820 }, { "dpo_loss": 0.5387639999389648, "epoch": 4.6764289088332545, "grad_norm": 22.893892489361072, "learning_rate": 1.1851996440033318e-08, "logits": -1.3366633653640747, "logps": -81.3759765625, "loss": 0.0369, "objective": 0.03668622300028801, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5375000238418579, "regularize": 0.03668620437383652, "step": 825 }, { "dpo_loss": 0.5243638753890991, "epoch": 4.7047709022201225, "grad_norm": 21.58395292653118, "learning_rate": 9.801788290621505e-09, "logits": -1.506198525428772, "logps": -83.259033203125, "loss": 0.0407, "objective": 0.041429486125707626, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5916666388511658, "regularize": 0.04142947867512703, "step": 830 }, { "dpo_loss": 0.5125473737716675, "epoch": 4.733112895606991, "grad_norm": 21.98641530853052, "learning_rate": 7.944397831941951e-09, "logits": -1.4062670469284058, "logps": -83.29720306396484, "loss": 0.0372, "objective": 0.03951678425073624, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5583333373069763, "regularize": 0.03951676934957504, "step": 835 }, { "dpo_loss": 0.5017682909965515, "epoch": 4.7614548889938595, "grad_norm": 21.972117419289066, "learning_rate": 6.280555661802856e-09, "logits": -1.423843264579773, "logps": -83.54265594482422, "loss": 0.0372, "objective": 0.03352176770567894, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.612500011920929, "regularize": 0.033521756529808044, "step": 840 }, { "dpo_loss": 0.5365482568740845, "epoch": 4.7897968823807275, "grad_norm": 21.356793654139537, "learning_rate": 4.810916246494157e-09, "logits": -1.45553719997406, "logps": -83.4180679321289, "loss": 0.0383, "objective": 0.040656425058841705, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.5625, "regularize": 0.04065641388297081, "step": 845 }, { "dpo_loss": 0.5246464014053345, "epoch": 4.818138875767596, "grad_norm": 22.81185797664159, "learning_rate": 3.5360576633558513e-09, "logits": -1.4138314723968506, "logps": -82.19649505615234, "loss": 0.0355, "objective": 0.03642057999968529, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6208333373069763, "regularize": 0.03642057254910469, "step": 850 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 0.6829268932342529, "eval_logits": -1.4302399158477783, "eval_logps": -90.043212890625, "eval_loss": 0.40620195865631104, "eval_objective": 0.40770116448402405, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.40770116448402405, "eval_runtime": 259.1263, "eval_samples_per_second": 22.344, "eval_steps_per_second": 0.934, "step": 850 }, { "dpo_loss": 0.5365470051765442, "epoch": 4.846480869154464, "grad_norm": 22.602716102552016, "learning_rate": 2.4564813733932155e-09, "logits": -1.3940719366073608, "logps": -82.6231460571289, "loss": 0.0347, "objective": 0.03581225126981735, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6458333134651184, "regularize": 0.03581221401691437, "step": 855 }, { "dpo_loss": 0.5150249004364014, "epoch": 4.874822862541333, "grad_norm": 23.704671287447177, "learning_rate": 1.5726120240288631e-09, "logits": -1.3679381608963013, "logps": -82.33541870117188, "loss": 0.0348, "objective": 0.031035231426358223, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6458333134651184, "regularize": 0.031035220250487328, "step": 860 }, { "dpo_loss": 0.5223459005355835, "epoch": 4.903164855928201, "grad_norm": 21.42329131044869, "learning_rate": 8.847972820693051e-10, "logits": -1.4437813758850098, "logps": -81.53370666503906, "loss": 0.0355, "objective": 0.04200226441025734, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5958333611488342, "regularize": 0.042002253234386444, "step": 865 }, { "dpo_loss": 0.5215969681739807, "epoch": 4.931506849315069, "grad_norm": 21.701501283901965, "learning_rate": 3.933076969516724e-10, "logits": -1.4914921522140503, "logps": -83.26063537597656, "loss": 0.0393, "objective": 0.04051649197936058, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6083333492279053, "regularize": 0.0405164435505867, "step": 870 }, { "dpo_loss": 0.5250566005706787, "epoch": 4.959848842701937, "grad_norm": 21.86259624413417, "learning_rate": 9.833659432367803e-11, "logits": -1.4107563495635986, "logps": -83.20445251464844, "loss": 0.0346, "objective": 0.027810534462332726, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6166666746139526, "regularize": 0.027810489758849144, "step": 875 }, { "dpo_loss": 0.520707905292511, "epoch": 4.988190836088805, "grad_norm": 23.229102177877856, "learning_rate": 0.0, "logits": -1.4621251821517944, "logps": -83.79481506347656, "loss": 0.035, "objective": 0.029516249895095825, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.625, "regularize": 0.02951624244451523, "step": 880 }, { "epoch": 4.988190836088805, "step": 880, "total_flos": 0.0, "train_loss": 0.1442635908045552, "train_runtime": 35242.7125, "train_samples_per_second": 7.207, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 880, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }