|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 26.827203675535984, |
|
"learning_rate": 1.1363636363636363e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931466460227966, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 26.655974166157932, |
|
"learning_rate": 5.6818181818181815e-08, |
|
"logits": -1.3678570985794067, |
|
"logps": -84.42396545410156, |
|
"loss": 0.413, |
|
"objective": 0.3755497932434082, |
|
"ranking_idealized": 0.6145833134651184, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.3755497932434082, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916109323501587, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 25.202984552553435, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits": -1.446576714515686, |
|
"logps": -83.28290557861328, |
|
"loss": 0.4165, |
|
"objective": 0.4402167499065399, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4402167499065399, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918571591377258, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 24.8928017897937, |
|
"learning_rate": 1.7045454545454543e-07, |
|
"logits": -1.4129120111465454, |
|
"logps": -83.23918151855469, |
|
"loss": 0.423, |
|
"objective": 0.40991583466529846, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.40991583466529846, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913864612579346, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 26.1438361746268, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits": -1.405305027961731, |
|
"logps": -83.78267669677734, |
|
"loss": 0.4098, |
|
"objective": 0.4017895758152008, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.4017895758152008, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6848570108413696, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 26.79124275787855, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.4560821056365967, |
|
"logps": -83.52696990966797, |
|
"loss": 0.4034, |
|
"objective": 0.41992515325546265, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.41992515325546265, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6844711303710938, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 26.78495469951858, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits": -1.4348876476287842, |
|
"logps": -84.22993469238281, |
|
"loss": 0.4013, |
|
"objective": 0.40435200929641724, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.40435200929641724, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.674633264541626, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 27.550998188131874, |
|
"learning_rate": 3.977272727272727e-07, |
|
"logits": -1.4130500555038452, |
|
"logps": -82.98973846435547, |
|
"loss": 0.3925, |
|
"objective": 0.37177178263664246, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.37177178263664246, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6748062372207642, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 30.08966136803542, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits": -1.4084281921386719, |
|
"logps": -83.05668640136719, |
|
"loss": 0.4041, |
|
"objective": 0.4255501925945282, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4255501925945282, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6630504727363586, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 25.26840087998978, |
|
"learning_rate": 5.113636363636363e-07, |
|
"logits": -1.5426502227783203, |
|
"logps": -84.47521209716797, |
|
"loss": 0.3947, |
|
"objective": 0.4412144422531128, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4412144422531128, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.659989058971405, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 24.465381128270387, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.4524168968200684, |
|
"logps": -82.95875549316406, |
|
"loss": 0.3854, |
|
"objective": 0.364622563123703, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.4583333432674408, |
|
"regularize": 0.364622563123703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6886485815048218, |
|
"eval_logits": -1.4800517559051514, |
|
"eval_logps": -91.4064712524414, |
|
"eval_loss": 0.4056198000907898, |
|
"eval_objective": 0.4075882136821747, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 0.4075882136821747, |
|
"eval_runtime": 265.1514, |
|
"eval_samples_per_second": 21.837, |
|
"eval_steps_per_second": 0.913, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6636093258857727, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 27.096857998186312, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits": -1.4970166683197021, |
|
"logps": -85.03699493408203, |
|
"loss": 0.3728, |
|
"objective": 0.3725493848323822, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3725493848323822, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6567211151123047, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 25.695749312088278, |
|
"learning_rate": 6.818181818181817e-07, |
|
"logits": -1.4813398122787476, |
|
"logps": -84.4722671508789, |
|
"loss": 0.3599, |
|
"objective": 0.3475739657878876, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.3475739657878876, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6518040895462036, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 29.49986445883662, |
|
"learning_rate": 7.386363636363636e-07, |
|
"logits": -1.430372714996338, |
|
"logps": -84.72962188720703, |
|
"loss": 0.3497, |
|
"objective": 0.345612108707428, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.345612108707428, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6528828740119934, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 29.563000130373773, |
|
"learning_rate": 7.954545454545454e-07, |
|
"logits": -1.5054484605789185, |
|
"logps": -86.26591491699219, |
|
"loss": 0.35, |
|
"objective": 0.3871075510978699, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3871075510978699, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.6483267545700073, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 27.602858223257197, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.516791582107544, |
|
"logps": -86.8262710571289, |
|
"loss": 0.3468, |
|
"objective": 0.3712550401687622, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3712550401687622, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6363473534584045, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 25.853451932249023, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits": -1.5554119348526, |
|
"logps": -85.4685287475586, |
|
"loss": 0.3352, |
|
"objective": 0.3362359404563904, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.336235910654068, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.6442821025848389, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 25.41070923572626, |
|
"learning_rate": 9.65909090909091e-07, |
|
"logits": -1.5026181936264038, |
|
"logps": -84.45774841308594, |
|
"loss": 0.3304, |
|
"objective": 0.3429431617259979, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.3429431617259979, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.6335326433181763, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 25.187750521174056, |
|
"learning_rate": 9.999842657116664e-07, |
|
"logits": -1.2913075685501099, |
|
"logps": -86.8448257446289, |
|
"loss": 0.3243, |
|
"objective": 0.32520177960395813, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.32520177960395813, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.6084260940551758, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 23.93476735734447, |
|
"learning_rate": 9.998072663403656e-07, |
|
"logits": -1.3773174285888672, |
|
"logps": -85.11380767822266, |
|
"loss": 0.3036, |
|
"objective": 0.3108121454715729, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3108121454715729, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.6009453535079956, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 25.488579442690856, |
|
"learning_rate": 9.99433669591504e-07, |
|
"logits": -1.4631216526031494, |
|
"logps": -85.5998764038086, |
|
"loss": 0.3126, |
|
"objective": 0.3375842273235321, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3375842273235321, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6816912293434143, |
|
"eval_logits": -1.45261812210083, |
|
"eval_logps": -91.31664276123047, |
|
"eval_loss": 0.40215975046157837, |
|
"eval_objective": 0.400903582572937, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5206611752510071, |
|
"eval_regularize": 0.400903582572937, |
|
"eval_runtime": 259.1884, |
|
"eval_samples_per_second": 22.339, |
|
"eval_steps_per_second": 0.934, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5999605059623718, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 25.38952651860073, |
|
"learning_rate": 9.988636224180095e-07, |
|
"logits": -1.352739930152893, |
|
"logps": -85.40930938720703, |
|
"loss": 0.3097, |
|
"objective": 0.32598960399627686, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.32598960399627686, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6067489981651306, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 31.045039069385457, |
|
"learning_rate": 9.980973490458728e-07, |
|
"logits": -1.5531387329101562, |
|
"logps": -84.0550537109375, |
|
"loss": 0.3104, |
|
"objective": 0.3359374403953552, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.33593741059303284, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6095985770225525, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 26.435670420498003, |
|
"learning_rate": 9.971351508859486e-07, |
|
"logits": -1.5276844501495361, |
|
"logps": -84.30924987792969, |
|
"loss": 0.291, |
|
"objective": 0.28773021697998047, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.28773021697998047, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.6103960871696472, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 26.942509852249753, |
|
"learning_rate": 9.959774064153975e-07, |
|
"logits": -1.4677897691726685, |
|
"logps": -84.61531066894531, |
|
"loss": 0.2837, |
|
"objective": 0.2627010643482208, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.2627010643482208, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.5971355438232422, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 25.495357006548982, |
|
"learning_rate": 9.94624571028813e-07, |
|
"logits": -1.4407005310058594, |
|
"logps": -84.40795135498047, |
|
"loss": 0.288, |
|
"objective": 0.29481950402259827, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.29481950402259827, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5917614102363586, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 27.139835865074275, |
|
"learning_rate": 9.930771768590933e-07, |
|
"logits": -1.5837173461914062, |
|
"logps": -83.2771987915039, |
|
"loss": 0.2887, |
|
"objective": 0.2870228886604309, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.2870228886604309, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.6036564111709595, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 24.259859808790555, |
|
"learning_rate": 9.91335832568129e-07, |
|
"logits": -1.528158187866211, |
|
"logps": -85.43966674804688, |
|
"loss": 0.2694, |
|
"objective": 0.270797461271286, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.270797461271286, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.596954345703125, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 26.42799993318966, |
|
"learning_rate": 9.894012231073895e-07, |
|
"logits": -1.4152525663375854, |
|
"logps": -86.42430114746094, |
|
"loss": 0.2606, |
|
"objective": 0.2631489038467407, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.2631489038467407, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.58833909034729, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 26.472189025522844, |
|
"learning_rate": 9.872741094484964e-07, |
|
"logits": -1.5059914588928223, |
|
"logps": -85.94861602783203, |
|
"loss": 0.2555, |
|
"objective": 0.2643609344959259, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.2643609344959259, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.5924276113510132, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 25.826528962819687, |
|
"learning_rate": 9.849553282839024e-07, |
|
"logits": -1.4773136377334595, |
|
"logps": -84.33631134033203, |
|
"loss": 0.2481, |
|
"objective": 0.24327746033668518, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.243277445435524, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6853220462799072, |
|
"eval_logits": -1.478104829788208, |
|
"eval_logps": -93.32852935791016, |
|
"eval_loss": 0.4118410348892212, |
|
"eval_objective": 0.41562050580978394, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5185950398445129, |
|
"eval_regularize": 0.41562050580978394, |
|
"eval_runtime": 260.1091, |
|
"eval_samples_per_second": 22.26, |
|
"eval_steps_per_second": 0.93, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5857201814651489, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 24.421694763767686, |
|
"learning_rate": 9.824457916977784e-07, |
|
"logits": -1.4784348011016846, |
|
"logps": -84.23937225341797, |
|
"loss": 0.25, |
|
"objective": 0.24794721603393555, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.24794721603393555, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.5842316746711731, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 24.297754190889687, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits": -1.379388689994812, |
|
"logps": -84.26329803466797, |
|
"loss": 0.2417, |
|
"objective": 0.23959442973136902, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.23959442973136902, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.5881075263023376, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 25.046440958455594, |
|
"learning_rate": 9.768584753741134e-07, |
|
"logits": -1.3925925493240356, |
|
"logps": -85.05484771728516, |
|
"loss": 0.2445, |
|
"objective": 0.24838505685329437, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.24838504195213318, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.5687467455863953, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 24.80826032024146, |
|
"learning_rate": 9.737828933872073e-07, |
|
"logits": -1.440019130706787, |
|
"logps": -85.22455596923828, |
|
"loss": 0.2525, |
|
"objective": 0.24621081352233887, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.24621081352233887, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.5792465209960938, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 25.657531696623572, |
|
"learning_rate": 9.705209506155634e-07, |
|
"logits": -1.3882230520248413, |
|
"logps": -85.2247085571289, |
|
"loss": 0.2408, |
|
"objective": 0.2368970364332199, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.2368970364332199, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.5573223233222961, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 24.441555112350308, |
|
"learning_rate": 9.670739301325534e-07, |
|
"logits": -1.5630497932434082, |
|
"logps": -84.3948745727539, |
|
"loss": 0.2102, |
|
"objective": 0.20754273235797882, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.20754273235797882, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.5467338562011719, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 26.114706754447813, |
|
"learning_rate": 9.63443187811197e-07, |
|
"logits": -1.4042932987213135, |
|
"logps": -84.7653579711914, |
|
"loss": 0.214, |
|
"objective": 0.21694259345531464, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.21694259345531464, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.5574190020561218, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 25.20524724848005, |
|
"learning_rate": 9.596301517908328e-07, |
|
"logits": -1.4538909196853638, |
|
"logps": -85.65680694580078, |
|
"loss": 0.2007, |
|
"objective": 0.21142269670963287, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.21142269670963287, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.561899721622467, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 28.03205694511378, |
|
"learning_rate": 9.556363219153662e-07, |
|
"logits": -1.435767650604248, |
|
"logps": -84.88529968261719, |
|
"loss": 0.2057, |
|
"objective": 0.19679027795791626, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.19679027795791626, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.5534842014312744, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 23.06275024905121, |
|
"learning_rate": 9.514632691433106e-07, |
|
"logits": -1.517577052116394, |
|
"logps": -83.62954711914062, |
|
"loss": 0.1986, |
|
"objective": 0.19466033577919006, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.19466033577919006, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.6827520132064819, |
|
"eval_logits": -1.46909761428833, |
|
"eval_logps": -90.6331558227539, |
|
"eval_loss": 0.40533673763275146, |
|
"eval_objective": 0.40887078642845154, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5206611752510071, |
|
"eval_regularize": 0.40887078642845154, |
|
"eval_runtime": 260.5987, |
|
"eval_samples_per_second": 22.218, |
|
"eval_steps_per_second": 0.929, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5494053363800049, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 22.941534169012083, |
|
"learning_rate": 9.471126349298556e-07, |
|
"logits": -1.5020116567611694, |
|
"logps": -83.8444595336914, |
|
"loss": 0.1994, |
|
"objective": 0.19596201181411743, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.19596201181411743, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.5515065789222717, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 26.741821520067802, |
|
"learning_rate": 9.425861305812081e-07, |
|
"logits": -1.4875836372375488, |
|
"logps": -83.98831176757812, |
|
"loss": 0.1895, |
|
"objective": 0.20510397851467133, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.20510397851467133, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.55607670545578, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 23.43637893497653, |
|
"learning_rate": 9.378855365814557e-07, |
|
"logits": -1.4646224975585938, |
|
"logps": -83.52363586425781, |
|
"loss": 0.1889, |
|
"objective": 0.19153118133544922, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.19153118133544922, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.556377112865448, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 26.789286245107157, |
|
"learning_rate": 9.330127018922193e-07, |
|
"logits": -1.4145793914794922, |
|
"logps": -82.84550476074219, |
|
"loss": 0.1925, |
|
"objective": 0.17143851518630981, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.17143851518630981, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.5455420613288879, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 25.237511413060258, |
|
"learning_rate": 9.279695432253708e-07, |
|
"logits": -1.4910824298858643, |
|
"logps": -84.51390075683594, |
|
"loss": 0.1898, |
|
"objective": 0.1823263168334961, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.1823263168334961, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.5552546381950378, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 23.65942718982369, |
|
"learning_rate": 9.227580442891021e-07, |
|
"logits": -1.4593993425369263, |
|
"logps": -84.47645568847656, |
|
"loss": 0.1809, |
|
"objective": 0.17018872499465942, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.17018872499465942, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.5385202169418335, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 25.266299893397434, |
|
"learning_rate": 9.173802550076401e-07, |
|
"logits": -1.5345088243484497, |
|
"logps": -82.98789978027344, |
|
"loss": 0.1789, |
|
"objective": 0.1734149307012558, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.1734149307012558, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.5434895157814026, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 25.750551600333242, |
|
"learning_rate": 9.118382907149163e-07, |
|
"logits": -1.4756948947906494, |
|
"logps": -84.32857513427734, |
|
"loss": 0.1742, |
|
"objective": 0.1837477833032608, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.1837477684020996, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.5604755878448486, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 24.129154340629153, |
|
"learning_rate": 9.061343313225087e-07, |
|
"logits": -1.4909014701843262, |
|
"logps": -83.4426498413086, |
|
"loss": 0.1789, |
|
"objective": 0.1817345917224884, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.1817345917224884, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.5357322692871094, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 24.16224594925354, |
|
"learning_rate": 9.002706204621802e-07, |
|
"logits": -1.4255733489990234, |
|
"logps": -82.65512084960938, |
|
"loss": 0.1805, |
|
"objective": 0.17317816615104675, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.17317816615104675, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.6830819249153137, |
|
"eval_logits": -1.464825987815857, |
|
"eval_logps": -90.24966430664062, |
|
"eval_loss": 0.4085530936717987, |
|
"eval_objective": 0.4083588719367981, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.4083588719367981, |
|
"eval_runtime": 262.2655, |
|
"eval_samples_per_second": 22.077, |
|
"eval_steps_per_second": 0.923, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5522000193595886, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 23.544028131135565, |
|
"learning_rate": 8.942494646033554e-07, |
|
"logits": -1.428904414176941, |
|
"logps": -83.82772827148438, |
|
"loss": 0.1816, |
|
"objective": 0.16092044115066528, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.16092044115066528, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.5535964369773865, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 24.007017906906484, |
|
"learning_rate": 8.880732321458784e-07, |
|
"logits": -1.4904005527496338, |
|
"logps": -83.97267150878906, |
|
"loss": 0.1703, |
|
"objective": 0.16837134957313538, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.16837134957313538, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.5446482300758362, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 24.30764382402002, |
|
"learning_rate": 8.817443524884117e-07, |
|
"logits": -1.4601694345474243, |
|
"logps": -82.12098693847656, |
|
"loss": 0.1781, |
|
"objective": 0.17031626403331757, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.17031626403331757, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.5536972284317017, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 24.675134737686058, |
|
"learning_rate": 8.752653150728411e-07, |
|
"logits": -1.471502661705017, |
|
"logps": -84.13450622558594, |
|
"loss": 0.1758, |
|
"objective": 0.18668265640735626, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.18668265640735626, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.5547968745231628, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 22.77808390233293, |
|
"learning_rate": 8.68638668405062e-07, |
|
"logits": -1.4670997858047485, |
|
"logps": -85.27931213378906, |
|
"loss": 0.171, |
|
"objective": 0.16611038148403168, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.16611038148403168, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.5309798717498779, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 22.23255904480611, |
|
"learning_rate": 8.61867019052535e-07, |
|
"logits": -1.387014389038086, |
|
"logps": -83.47966766357422, |
|
"loss": 0.1731, |
|
"objective": 0.18033398687839508, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.18033398687839508, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.5369495749473572, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 24.7467519907843, |
|
"learning_rate": 8.549530306190014e-07, |
|
"logits": -1.4981027841567993, |
|
"logps": -85.08309936523438, |
|
"loss": 0.1613, |
|
"objective": 0.15606491267681122, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.15606491267681122, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5465491414070129, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 22.280063793784098, |
|
"learning_rate": 8.478994226967638e-07, |
|
"logits": -1.5392872095108032, |
|
"logps": -82.96480560302734, |
|
"loss": 0.1639, |
|
"objective": 0.1686221808195114, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1686221808195114, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.5326969623565674, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 22.516708106368693, |
|
"learning_rate": 8.407089697969456e-07, |
|
"logits": -1.430370569229126, |
|
"logps": -81.40605926513672, |
|
"loss": 0.1651, |
|
"objective": 0.16238288581371307, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.16238288581371307, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.5438053011894226, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 22.982971147438153, |
|
"learning_rate": 8.333845002581458e-07, |
|
"logits": -1.5061898231506348, |
|
"logps": -82.67247009277344, |
|
"loss": 0.1668, |
|
"objective": 0.19721931219100952, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.19721931219100952, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6841849088668823, |
|
"eval_logits": -1.476090669631958, |
|
"eval_logps": -89.86566162109375, |
|
"eval_loss": 0.4079909026622772, |
|
"eval_objective": 0.4113588035106659, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5206611752510071, |
|
"eval_regularize": 0.4113588035106659, |
|
"eval_runtime": 259.3673, |
|
"eval_samples_per_second": 22.324, |
|
"eval_steps_per_second": 0.933, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5529495477676392, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 23.962805989899444, |
|
"learning_rate": 8.259288951339232e-07, |
|
"logits": -1.4737364053726196, |
|
"logps": -83.48453521728516, |
|
"loss": 0.1635, |
|
"objective": 0.17988164722919464, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.17988164722919464, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5436158776283264, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 26.010266526035746, |
|
"learning_rate": 8.183450870595441e-07, |
|
"logits": -1.5402640104293823, |
|
"logps": -81.41146087646484, |
|
"loss": 0.1725, |
|
"objective": 0.16945843398571014, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.16945843398571014, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.5490608811378479, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 23.214852755265355, |
|
"learning_rate": 8.106360590984404e-07, |
|
"logits": -1.4412391185760498, |
|
"logps": -82.86125946044922, |
|
"loss": 0.1609, |
|
"objective": 0.15798324346542358, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.15798324346542358, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.5580403208732605, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 25.270172487230024, |
|
"learning_rate": 8.028048435688333e-07, |
|
"logits": -1.489629864692688, |
|
"logps": -84.82173156738281, |
|
"loss": 0.1562, |
|
"objective": 0.15719416737556458, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.15719416737556458, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.5307654142379761, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 24.866617020536584, |
|
"learning_rate": 7.948545208509811e-07, |
|
"logits": -1.5223475694656372, |
|
"logps": -85.49372100830078, |
|
"loss": 0.1605, |
|
"objective": 0.15138211846351624, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.15138211846351624, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.5346109867095947, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 27.77712533482603, |
|
"learning_rate": 7.86788218175523e-07, |
|
"logits": -1.282273769378662, |
|
"logps": -83.1356201171875, |
|
"loss": 0.1554, |
|
"objective": 0.14494642615318298, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.14494642615318298, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.5577983260154724, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 23.806319516884738, |
|
"learning_rate": 7.786091083933949e-07, |
|
"logits": -1.4557408094406128, |
|
"logps": -83.1150131225586, |
|
"loss": 0.1472, |
|
"objective": 0.14962820708751678, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.14962820708751678, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.548663318157196, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 25.2807889158847, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits": -1.463193416595459, |
|
"logps": -85.10281372070312, |
|
"loss": 0.1416, |
|
"objective": 0.14199069142341614, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.14199069142341614, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.5481914281845093, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 23.034113253398804, |
|
"learning_rate": 7.619253795087208e-07, |
|
"logits": -1.4545904397964478, |
|
"logps": -83.42992401123047, |
|
"loss": 0.1457, |
|
"objective": 0.13813456892967224, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.13813456892967224, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.5435228943824768, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 25.493404234037047, |
|
"learning_rate": 7.534273228904915e-07, |
|
"logits": -1.3632704019546509, |
|
"logps": -84.23902893066406, |
|
"loss": 0.1476, |
|
"objective": 0.13394585251808167, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.13394585251808167, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.6835209131240845, |
|
"eval_logits": -1.4348496198654175, |
|
"eval_logps": -89.60076904296875, |
|
"eval_loss": 0.4086475670337677, |
|
"eval_objective": 0.4084475636482239, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.4084475636482239, |
|
"eval_runtime": 259.621, |
|
"eval_samples_per_second": 22.302, |
|
"eval_steps_per_second": 0.932, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5331315994262695, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 22.16231721451118, |
|
"learning_rate": 7.448295815528956e-07, |
|
"logits": -1.3494775295257568, |
|
"logps": -82.90995788574219, |
|
"loss": 0.1455, |
|
"objective": 0.1512984335422516, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1512984186410904, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5351840853691101, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 25.1718748641759, |
|
"learning_rate": 7.361355373863413e-07, |
|
"logits": -1.393783688545227, |
|
"logps": -81.44464874267578, |
|
"loss": 0.1343, |
|
"objective": 0.1370130479335785, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.1370130479335785, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.5345187783241272, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 24.179993370065525, |
|
"learning_rate": 7.273486101616056e-07, |
|
"logits": -1.474308729171753, |
|
"logps": -83.76331329345703, |
|
"loss": 0.1347, |
|
"objective": 0.1313287615776062, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1313287615776062, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.5465765595436096, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 23.72652550591992, |
|
"learning_rate": 7.184722561846797e-07, |
|
"logits": -1.4518685340881348, |
|
"logps": -81.55240631103516, |
|
"loss": 0.124, |
|
"objective": 0.1166418269276619, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.1166418269276619, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.5262054204940796, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 24.679557221698076, |
|
"learning_rate": 7.095099669372443e-07, |
|
"logits": -1.4321234226226807, |
|
"logps": -83.55628967285156, |
|
"loss": 0.1283, |
|
"objective": 0.12942390143871307, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.12942390143871307, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5403919219970703, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 23.122876869258256, |
|
"learning_rate": 7.004652677033068e-07, |
|
"logits": -1.338428020477295, |
|
"logps": -82.6377182006836, |
|
"loss": 0.1281, |
|
"objective": 0.10954796522855759, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.10954796522855759, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.5505331754684448, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 22.25736511993951, |
|
"learning_rate": 6.913417161825449e-07, |
|
"logits": -1.4360421895980835, |
|
"logps": -84.50902557373047, |
|
"loss": 0.1236, |
|
"objective": 0.11411557346582413, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.11411556601524353, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.5398189425468445, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 23.82479611784211, |
|
"learning_rate": 6.821429010908971e-07, |
|
"logits": -1.336391806602478, |
|
"logps": -83.15116882324219, |
|
"loss": 0.1245, |
|
"objective": 0.1218627318739891, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.1218627318739891, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.5215297341346741, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 22.591578381119685, |
|
"learning_rate": 6.728724407489553e-07, |
|
"logits": -1.3484855890274048, |
|
"logps": -83.57234954833984, |
|
"loss": 0.1263, |
|
"objective": 0.1272638440132141, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.12726382911205292, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.5246094465255737, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 22.99471999109431, |
|
"learning_rate": 6.635339816587108e-07, |
|
"logits": -1.4181877374649048, |
|
"logps": -84.8980712890625, |
|
"loss": 0.1232, |
|
"objective": 0.1278635859489441, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.1278635859489441, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6825190186500549, |
|
"eval_logits": -1.4141640663146973, |
|
"eval_logps": -89.93671417236328, |
|
"eval_loss": 0.40635946393013, |
|
"eval_objective": 0.4059920310974121, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.4059920310974121, |
|
"eval_runtime": 259.3604, |
|
"eval_samples_per_second": 22.324, |
|
"eval_steps_per_second": 0.933, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5345380902290344, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 22.452414561821904, |
|
"learning_rate": 6.541311970692162e-07, |
|
"logits": -1.484344720840454, |
|
"logps": -82.7432861328125, |
|
"loss": 0.1237, |
|
"objective": 0.1316702663898468, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.1316702663898468, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.5351517200469971, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 24.318684153528356, |
|
"learning_rate": 6.446677855317264e-07, |
|
"logits": -1.3660470247268677, |
|
"logps": -82.44485473632812, |
|
"loss": 0.1164, |
|
"objective": 0.11186593025922775, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.11186593025922775, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.5329793691635132, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 22.50760313963993, |
|
"learning_rate": 6.351474694448864e-07, |
|
"logits": -1.437878131866455, |
|
"logps": -83.41373443603516, |
|
"loss": 0.1186, |
|
"objective": 0.12183640152215958, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.12183640152215958, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.5409477949142456, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 23.39263075574448, |
|
"learning_rate": 6.255739935905395e-07, |
|
"logits": -1.349250078201294, |
|
"logps": -85.22098541259766, |
|
"loss": 0.1175, |
|
"objective": 0.10631230473518372, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.10631229728460312, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.5271558165550232, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 23.840070879325513, |
|
"learning_rate": 6.159511236607315e-07, |
|
"logits": -1.4124720096588135, |
|
"logps": -84.24110412597656, |
|
"loss": 0.1153, |
|
"objective": 0.11380250006914139, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.11380250006914139, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.5327500700950623, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 22.9996288815754, |
|
"learning_rate": 6.062826447764883e-07, |
|
"logits": -1.4347702264785767, |
|
"logps": -84.58445739746094, |
|
"loss": 0.1076, |
|
"objective": 0.10175766050815582, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.10175765305757523, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.5315712690353394, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 22.21161853218669, |
|
"learning_rate": 5.965723599989528e-07, |
|
"logits": -1.4599779844284058, |
|
"logps": -84.16157531738281, |
|
"loss": 0.1148, |
|
"objective": 0.11776481568813324, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.11776480078697205, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.5355103611946106, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 23.031781845673333, |
|
"learning_rate": 5.868240888334652e-07, |
|
"logits": -1.385536789894104, |
|
"logps": -83.61788940429688, |
|
"loss": 0.1125, |
|
"objective": 0.11075066775083542, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.11075066775083542, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.5411112904548645, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 25.203231448824464, |
|
"learning_rate": 5.770416657271728e-07, |
|
"logits": -1.4106037616729736, |
|
"logps": -81.53707885742188, |
|
"loss": 0.1119, |
|
"objective": 0.11902200430631638, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.11902199685573578, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.55417400598526, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 24.455868446022734, |
|
"learning_rate": 5.67228938560766e-07, |
|
"logits": -1.4431836605072021, |
|
"logps": -83.54483795166016, |
|
"loss": 0.1085, |
|
"objective": 0.10727948695421219, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.10727948695421219, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.6829083561897278, |
|
"eval_logits": -1.4380848407745361, |
|
"eval_logps": -90.61122131347656, |
|
"eval_loss": 0.40571001172065735, |
|
"eval_objective": 0.406777560710907, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.406777560710907, |
|
"eval_runtime": 259.0425, |
|
"eval_samples_per_second": 22.352, |
|
"eval_steps_per_second": 0.934, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5320044159889221, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 22.906053050143626, |
|
"learning_rate": 5.573897671349268e-07, |
|
"logits": -1.4764381647109985, |
|
"logps": -84.27240753173828, |
|
"loss": 0.1117, |
|
"objective": 0.11940006166696548, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.11940006166696548, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5233482122421265, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 22.258361780067798, |
|
"learning_rate": 5.475280216520912e-07, |
|
"logits": -1.5429632663726807, |
|
"logps": -84.30569458007812, |
|
"loss": 0.1103, |
|
"objective": 0.10580132901668549, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10580132901668549, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.5289517641067505, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 23.240912033270092, |
|
"learning_rate": 5.376475811941191e-07, |
|
"logits": -1.428727388381958, |
|
"logps": -83.95030212402344, |
|
"loss": 0.1071, |
|
"objective": 0.10987317562103271, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.10987316071987152, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.5318377614021301, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 24.64272982925985, |
|
"learning_rate": 5.277523321964701e-07, |
|
"logits": -1.4431354999542236, |
|
"logps": -83.10697937011719, |
|
"loss": 0.105, |
|
"objective": 0.1006205826997757, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.1006205826997757, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.5384759306907654, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 21.994194573090148, |
|
"learning_rate": 5.178461669194903e-07, |
|
"logits": -1.4019439220428467, |
|
"logps": -82.92670440673828, |
|
"loss": 0.101, |
|
"objective": 0.0988389179110527, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.0988389179110527, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.539698600769043, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 24.874583032447394, |
|
"learning_rate": 5.07932981917404e-07, |
|
"logits": -1.5038942098617554, |
|
"logps": -82.17936706542969, |
|
"loss": 0.1017, |
|
"objective": 0.10505501180887222, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.10505500435829163, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.5295734405517578, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 24.131350896743502, |
|
"learning_rate": 4.980166765056193e-07, |
|
"logits": -1.4220199584960938, |
|
"logps": -84.46988677978516, |
|
"loss": 0.1033, |
|
"objective": 0.10565243661403656, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10565243661403656, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.5229516625404358, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 23.380731245805677, |
|
"learning_rate": 4.881011512269463e-07, |
|
"logits": -1.4164656400680542, |
|
"logps": -82.1783676147461, |
|
"loss": 0.1056, |
|
"objective": 0.10975264012813568, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10975264012813568, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.536858856678009, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 25.010956720921584, |
|
"learning_rate": 4.78190306317332e-07, |
|
"logits": -1.4320250749588013, |
|
"logps": -81.11976623535156, |
|
"loss": 0.0977, |
|
"objective": 0.09322524815797806, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.09322523325681686, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.5231731534004211, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 24.981319167329183, |
|
"learning_rate": 4.682880401717177e-07, |
|
"logits": -1.479564905166626, |
|
"logps": -80.21460723876953, |
|
"loss": 0.099, |
|
"objective": 0.09580207616090775, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.09580207616090775, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6836758255958557, |
|
"eval_logits": -1.4538483619689941, |
|
"eval_logps": -89.78665924072266, |
|
"eval_loss": 0.4075116813182831, |
|
"eval_objective": 0.40899595618247986, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.40899595618247986, |
|
"eval_runtime": 259.2881, |
|
"eval_samples_per_second": 22.33, |
|
"eval_steps_per_second": 0.933, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5333107113838196, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 22.440897537859303, |
|
"learning_rate": 4.5839824781061886e-07, |
|
"logits": -1.4319252967834473, |
|
"logps": -82.19851684570312, |
|
"loss": 0.0974, |
|
"objective": 0.09931109100580215, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.09931109100580215, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.5374515056610107, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 22.71050128727261, |
|
"learning_rate": 4.4852481934803277e-07, |
|
"logits": -1.3620020151138306, |
|
"logps": -82.26110076904297, |
|
"loss": 0.0964, |
|
"objective": 0.10236553847789764, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.10236553847789764, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.5393768548965454, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 26.294279777028603, |
|
"learning_rate": 4.3867163846127674e-07, |
|
"logits": -1.5220664739608765, |
|
"logps": -82.21379852294922, |
|
"loss": 0.0962, |
|
"objective": 0.09978827089071274, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.09978827089071274, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5407862067222595, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 22.719373903401866, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits": -1.4105883836746216, |
|
"logps": -84.40800476074219, |
|
"loss": 0.0917, |
|
"objective": 0.08780403435230255, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.08780403435230255, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.5382903814315796, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 22.439739653406917, |
|
"learning_rate": 4.1904151277847305e-07, |
|
"logits": -1.3989008665084839, |
|
"logps": -83.13529205322266, |
|
"loss": 0.0909, |
|
"objective": 0.10328014940023422, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.10328014940023422, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5224732756614685, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 23.2445043242505, |
|
"learning_rate": 4.092722894212487e-07, |
|
"logits": -1.4099732637405396, |
|
"logps": -82.2646484375, |
|
"loss": 0.0906, |
|
"objective": 0.08990009129047394, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.08990008383989334, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.5297616720199585, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 24.595241433656245, |
|
"learning_rate": 3.995387534803005e-07, |
|
"logits": -1.4481351375579834, |
|
"logps": -84.04501342773438, |
|
"loss": 0.0863, |
|
"objective": 0.09028714150190353, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.09028714150190353, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5298644304275513, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 22.819470538427282, |
|
"learning_rate": 3.8984473360672967e-07, |
|
"logits": -1.5335410833358765, |
|
"logps": -82.01764678955078, |
|
"loss": 0.0786, |
|
"objective": 0.07253900170326233, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.07253900170326233, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.5357497930526733, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 23.587959979388312, |
|
"learning_rate": 3.801940429081345e-07, |
|
"logits": -1.475661039352417, |
|
"logps": -83.04609680175781, |
|
"loss": 0.0786, |
|
"objective": 0.08452685922384262, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.08452685922384262, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5293916463851929, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 23.742387802519247, |
|
"learning_rate": 3.7059047744873955e-07, |
|
"logits": -1.3145067691802979, |
|
"logps": -83.14439392089844, |
|
"loss": 0.0841, |
|
"objective": 0.07637524604797363, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.07637524604797363, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 0.6836426854133606, |
|
"eval_logits": -1.4287773370742798, |
|
"eval_logps": -89.19234466552734, |
|
"eval_loss": 0.4074074625968933, |
|
"eval_objective": 0.4091208279132843, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5268595218658447, |
|
"eval_regularize": 0.4091208279132843, |
|
"eval_runtime": 259.888, |
|
"eval_samples_per_second": 22.279, |
|
"eval_steps_per_second": 0.931, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5199671983718872, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 23.606389156724106, |
|
"learning_rate": 3.6103781475622786e-07, |
|
"logits": -1.4020836353302002, |
|
"logps": -83.6429214477539, |
|
"loss": 0.0826, |
|
"objective": 0.08424239605665207, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.08424239605665207, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5297064185142517, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 21.283296032324174, |
|
"learning_rate": 3.5153981233586274e-07, |
|
"logits": -1.375638484954834, |
|
"logps": -80.67549896240234, |
|
"loss": 0.0764, |
|
"objective": 0.0754186362028122, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.0754186362028122, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.5281550884246826, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 25.383548239078706, |
|
"learning_rate": 3.421002061924876e-07, |
|
"logits": -1.4403051137924194, |
|
"logps": -82.08113098144531, |
|
"loss": 0.0745, |
|
"objective": 0.0825800895690918, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.0825800821185112, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.5239064693450928, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 25.606035120731306, |
|
"learning_rate": 3.327227093609824e-07, |
|
"logits": -1.3596783876419067, |
|
"logps": -82.14395141601562, |
|
"loss": 0.0748, |
|
"objective": 0.07690493017435074, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.07690493017435074, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5288205146789551, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 21.36265788871065, |
|
"learning_rate": 3.234110104457536e-07, |
|
"logits": -1.4363545179367065, |
|
"logps": -82.7227554321289, |
|
"loss": 0.0765, |
|
"objective": 0.08387748897075653, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.08387748897075653, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5455772876739502, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 22.23742629967835, |
|
"learning_rate": 3.141687721698363e-07, |
|
"logits": -1.4502298831939697, |
|
"logps": -83.70122528076172, |
|
"loss": 0.074, |
|
"objective": 0.0667726993560791, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.0667726919054985, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.5377206206321716, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 21.976427115545793, |
|
"learning_rate": 3.049996299341742e-07, |
|
"logits": -1.478832483291626, |
|
"logps": -84.10258483886719, |
|
"loss": 0.074, |
|
"objective": 0.07396882027387619, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.07396882027387619, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.540601372718811, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 24.248150339564425, |
|
"learning_rate": 2.959071903876486e-07, |
|
"logits": -1.490022897720337, |
|
"logps": -84.40371704101562, |
|
"loss": 0.0726, |
|
"objective": 0.06912810355424881, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.06912810355424881, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.530450165271759, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 23.904834128431904, |
|
"learning_rate": 2.86895030008416e-07, |
|
"logits": -1.4088099002838135, |
|
"logps": -83.5683822631836, |
|
"loss": 0.0716, |
|
"objective": 0.06921317428350449, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.06921316683292389, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.510923445224762, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 21.999466319441446, |
|
"learning_rate": 2.779666936971129e-07, |
|
"logits": -1.4195644855499268, |
|
"logps": -83.0455551147461, |
|
"loss": 0.0673, |
|
"objective": 0.06648312509059906, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.06648311764001846, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 0.6824304461479187, |
|
"eval_logits": -1.4325991868972778, |
|
"eval_logps": -89.83067321777344, |
|
"eval_loss": 0.40557217597961426, |
|
"eval_objective": 0.40685591101646423, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.40685591101646423, |
|
"eval_runtime": 259.0599, |
|
"eval_samples_per_second": 22.35, |
|
"eval_steps_per_second": 0.934, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5408468246459961, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 21.826287125403734, |
|
"learning_rate": 2.6912569338248315e-07, |
|
"logits": -1.4806511402130127, |
|
"logps": -85.08236694335938, |
|
"loss": 0.0687, |
|
"objective": 0.0708792433142662, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.0708792433142662, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5326560139656067, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 23.721876415078565, |
|
"learning_rate": 2.603755066399718e-07, |
|
"logits": -1.4362066984176636, |
|
"logps": -83.59281158447266, |
|
"loss": 0.0693, |
|
"objective": 0.06495842337608337, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.06495841592550278, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.5220057964324951, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 23.867397255620617, |
|
"learning_rate": 2.517195753238345e-07, |
|
"logits": -1.459093451499939, |
|
"logps": -83.89041137695312, |
|
"loss": 0.0677, |
|
"objective": 0.06726350635290146, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.06726350635290146, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.5138709545135498, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 22.48517117265223, |
|
"learning_rate": 2.4316130421329696e-07, |
|
"logits": -1.3361726999282837, |
|
"logps": -83.23828887939453, |
|
"loss": 0.0661, |
|
"objective": 0.05854518711566925, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.058545153588056564, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5306848287582397, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 22.374230054745578, |
|
"learning_rate": 2.3470405967329604e-07, |
|
"logits": -1.406466007232666, |
|
"logps": -82.32576751708984, |
|
"loss": 0.0639, |
|
"objective": 0.06265277415513992, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.06265277415513992, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5401536226272583, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 22.323503974192004, |
|
"learning_rate": 2.2635116833033392e-07, |
|
"logits": -1.4880479574203491, |
|
"logps": -82.74535369873047, |
|
"loss": 0.0671, |
|
"objective": 0.06858905404806137, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.06858905404806137, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.5335288643836975, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 22.950166480099814, |
|
"learning_rate": 2.181059157639598e-07, |
|
"logits": -1.426721215248108, |
|
"logps": -82.85971069335938, |
|
"loss": 0.06, |
|
"objective": 0.0622558668255806, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.062255859375, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5119226574897766, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 25.079864254767315, |
|
"learning_rate": 2.0997154521440097e-07, |
|
"logits": -1.3697155714035034, |
|
"logps": -83.90760803222656, |
|
"loss": 0.0613, |
|
"objective": 0.0635208860039711, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.0635208785533905, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.522363007068634, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 22.441342121743332, |
|
"learning_rate": 2.0195125630684428e-07, |
|
"logits": -1.3928742408752441, |
|
"logps": -81.88297271728516, |
|
"loss": 0.0634, |
|
"objective": 0.05965565890073776, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.05965564027428627, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5373592376708984, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 22.133762729051785, |
|
"learning_rate": 1.9404820379287672e-07, |
|
"logits": -1.3841991424560547, |
|
"logps": -83.1523208618164, |
|
"loss": 0.0589, |
|
"objective": 0.055038776248693466, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.055038776248693466, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 0.6828624606132507, |
|
"eval_logits": -1.4302468299865723, |
|
"eval_logps": -89.47576904296875, |
|
"eval_loss": 0.40598276257514954, |
|
"eval_objective": 0.4077259600162506, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.4077259600162506, |
|
"eval_runtime": 258.9725, |
|
"eval_samples_per_second": 22.358, |
|
"eval_steps_per_second": 0.934, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5351348519325256, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 23.905512006208795, |
|
"learning_rate": 1.8626549630957395e-07, |
|
"logits": -1.429569125175476, |
|
"logps": -82.42403411865234, |
|
"loss": 0.0624, |
|
"objective": 0.05734870210289955, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.05734868720173836, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.5322324633598328, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 24.42468424510045, |
|
"learning_rate": 1.7860619515673032e-07, |
|
"logits": -1.5189285278320312, |
|
"logps": -83.2733383178711, |
|
"loss": 0.0612, |
|
"objective": 0.06605425477027893, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.06605424731969833, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.5305153131484985, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 21.98557345680479, |
|
"learning_rate": 1.7107331309270684e-07, |
|
"logits": -1.4122134447097778, |
|
"logps": -83.17848205566406, |
|
"loss": 0.0579, |
|
"objective": 0.05437133088707924, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.05437132343649864, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.5314101576805115, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 22.57049790061395, |
|
"learning_rate": 1.6366981314937372e-07, |
|
"logits": -1.5129222869873047, |
|
"logps": -83.30918884277344, |
|
"loss": 0.0549, |
|
"objective": 0.06075560674071312, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.06075560301542282, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.5331992506980896, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 21.51450391411621, |
|
"learning_rate": 1.5639860746661338e-07, |
|
"logits": -1.464658498764038, |
|
"logps": -82.55012512207031, |
|
"loss": 0.0562, |
|
"objective": 0.05308786779642105, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.05308786407113075, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.544487714767456, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 21.91828532034966, |
|
"learning_rate": 1.492625561468393e-07, |
|
"logits": -1.401973009109497, |
|
"logps": -83.26588439941406, |
|
"loss": 0.0543, |
|
"objective": 0.055845096707344055, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.05584508553147316, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221087336540222, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 23.338800601233537, |
|
"learning_rate": 1.4226446612998671e-07, |
|
"logits": -1.483197569847107, |
|
"logps": -82.65924835205078, |
|
"loss": 0.0543, |
|
"objective": 0.04644104465842247, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.04644103720784187, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5242043137550354, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 22.026766940460053, |
|
"learning_rate": 1.3540709008941147e-07, |
|
"logits": -1.449702501296997, |
|
"logps": -81.98009490966797, |
|
"loss": 0.0547, |
|
"objective": 0.055739615112543106, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.05573960393667221, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5308277606964111, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 22.736825591526987, |
|
"learning_rate": 1.2869312534913685e-07, |
|
"logits": -1.3683240413665771, |
|
"logps": -83.3951187133789, |
|
"loss": 0.056, |
|
"objective": 0.05744828283786774, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.05744827911257744, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5327464938163757, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 24.974758066705547, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits": -1.416201114654541, |
|
"logps": -83.47090148925781, |
|
"loss": 0.0551, |
|
"objective": 0.05039297044277191, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.05039296671748161, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 0.683082640171051, |
|
"eval_logits": -1.4301180839538574, |
|
"eval_logps": -90.06600952148438, |
|
"eval_loss": 0.40649789571762085, |
|
"eval_objective": 0.4080060124397278, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.4080060124397278, |
|
"eval_runtime": 258.866, |
|
"eval_samples_per_second": 22.367, |
|
"eval_steps_per_second": 0.935, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5257295966148376, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 21.66945207844546, |
|
"learning_rate": 1.15705935975212e-07, |
|
"logits": -1.3355560302734375, |
|
"logps": -81.95101928710938, |
|
"loss": 0.0536, |
|
"objective": 0.04855410382151604, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.04855410382151604, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5204980373382568, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 21.87585318414452, |
|
"learning_rate": 1.094378198054533e-07, |
|
"logits": -1.4359726905822754, |
|
"logps": -83.67707061767578, |
|
"loss": 0.0474, |
|
"objective": 0.05088849365711212, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.050888482481241226, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.5301558375358582, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 22.01280193333486, |
|
"learning_rate": 1.0332332985438247e-07, |
|
"logits": -1.3890125751495361, |
|
"logps": -83.36654663085938, |
|
"loss": 0.0434, |
|
"objective": 0.040184516459703445, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.04018450155854225, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.5191416144371033, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 21.943342871470353, |
|
"learning_rate": 9.736487123447068e-08, |
|
"logits": -1.3216856718063354, |
|
"logps": -85.42113494873047, |
|
"loss": 0.0441, |
|
"objective": 0.03967616334557533, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.039676155894994736, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.5419493913650513, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 22.065151941072486, |
|
"learning_rate": 9.156478768383058e-08, |
|
"logits": -1.4097427129745483, |
|
"logps": -83.27389526367188, |
|
"loss": 0.0477, |
|
"objective": 0.04659968614578247, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.046599678695201874, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5275304317474365, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 22.997003588267155, |
|
"learning_rate": 8.592536064431466e-08, |
|
"logits": -1.4810242652893066, |
|
"logps": -83.33085632324219, |
|
"loss": 0.0479, |
|
"objective": 0.05003201588988304, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.05003199726343155, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.5354489684104919, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 22.750124706779673, |
|
"learning_rate": 8.044880836411888e-08, |
|
"logits": -1.3749909400939941, |
|
"logps": -84.28314971923828, |
|
"loss": 0.042, |
|
"objective": 0.04194118455052376, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.04194117337465286, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.5109390616416931, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 23.35643629791226, |
|
"learning_rate": 7.513728502524286e-08, |
|
"logits": -1.3980611562728882, |
|
"logps": -83.87706756591797, |
|
"loss": 0.0437, |
|
"objective": 0.042474415153265, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.0424744077026844, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.5253542065620422, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 22.418675908813192, |
|
"learning_rate": 6.999287989614971e-08, |
|
"logits": -1.4651761054992676, |
|
"logps": -81.21513366699219, |
|
"loss": 0.0406, |
|
"objective": 0.04062732681632042, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.040627315640449524, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.5217363834381104, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 22.888185894990265, |
|
"learning_rate": 6.501761650996052e-08, |
|
"logits": -1.5698094367980957, |
|
"logps": -83.2958984375, |
|
"loss": 0.042, |
|
"objective": 0.045288145542144775, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.04528813809156418, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 0.6830218434333801, |
|
"eval_logits": -1.4307194948196411, |
|
"eval_logps": -90.04474639892578, |
|
"eval_loss": 0.4063892364501953, |
|
"eval_objective": 0.4078083634376526, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.4078083634376526, |
|
"eval_runtime": 258.9989, |
|
"eval_samples_per_second": 22.355, |
|
"eval_steps_per_second": 0.934, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5249465107917786, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 22.190575430128455, |
|
"learning_rate": 6.021345186850418e-08, |
|
"logits": -1.4760249853134155, |
|
"logps": -83.12273406982422, |
|
"loss": 0.0418, |
|
"objective": 0.04030155390501022, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.04030154272913933, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.5280516147613525, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 22.195011354775016, |
|
"learning_rate": 5.5582275672538316e-08, |
|
"logits": -1.460343837738037, |
|
"logps": -83.6526870727539, |
|
"loss": 0.0395, |
|
"objective": 0.040188662707805634, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.04018864780664444, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.508765459060669, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 21.99198419312676, |
|
"learning_rate": 5.112590957844232e-08, |
|
"logits": -1.4831253290176392, |
|
"logps": -83.9940414428711, |
|
"loss": 0.0416, |
|
"objective": 0.03937076777219772, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.03937075287103653, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.5139289498329163, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 22.21570497564684, |
|
"learning_rate": 4.684610648167503e-08, |
|
"logits": -1.355908751487732, |
|
"logps": -82.18904113769531, |
|
"loss": 0.0418, |
|
"objective": 0.041529521346092224, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.04152949899435043, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221685171127319, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 21.306801693131447, |
|
"learning_rate": 4.274454982728032e-08, |
|
"logits": -1.4285643100738525, |
|
"logps": -83.1854476928711, |
|
"loss": 0.0394, |
|
"objective": 0.04110860824584961, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.041108593344688416, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5304800868034363, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 21.938217857408958, |
|
"learning_rate": 3.882285294770937e-08, |
|
"logits": -1.4632736444473267, |
|
"logps": -81.85124969482422, |
|
"loss": 0.0379, |
|
"objective": 0.03418119251728058, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.03418118134140968, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.5404612421989441, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 21.77705913902379, |
|
"learning_rate": 3.508255842822255e-08, |
|
"logits": -1.4751582145690918, |
|
"logps": -81.96646118164062, |
|
"loss": 0.0448, |
|
"objective": 0.04277818650007248, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.04277818277478218, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.5209127068519592, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 21.724227546519376, |
|
"learning_rate": 3.15251375001192e-08, |
|
"logits": -1.4253805875778198, |
|
"logps": -84.63212585449219, |
|
"loss": 0.0402, |
|
"objective": 0.050088923424482346, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.05008890852332115, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.5272155404090881, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 21.960441297110094, |
|
"learning_rate": 2.8151989462033787e-08, |
|
"logits": -1.3359031677246094, |
|
"logps": -84.30043029785156, |
|
"loss": 0.0412, |
|
"objective": 0.03479573875665665, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.034795720130205154, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.5258675813674927, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 22.834668811719133, |
|
"learning_rate": 2.4964441129527335e-08, |
|
"logits": -1.3358808755874634, |
|
"logps": -83.53750610351562, |
|
"loss": 0.0411, |
|
"objective": 0.04309748858213425, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.04309746250510216, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 0.6830146908760071, |
|
"eval_logits": -1.431044578552246, |
|
"eval_logps": -90.11402893066406, |
|
"eval_loss": 0.406222939491272, |
|
"eval_objective": 0.4077996015548706, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.4077996015548706, |
|
"eval_runtime": 258.8062, |
|
"eval_samples_per_second": 22.372, |
|
"eval_steps_per_second": 0.935, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5273416042327881, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 21.794535718115338, |
|
"learning_rate": 2.1963746313188757e-08, |
|
"logits": -1.4133697748184204, |
|
"logps": -82.60270690917969, |
|
"loss": 0.0414, |
|
"objective": 0.046149447560310364, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.04614944010972977, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5305873155593872, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 21.298734472415376, |
|
"learning_rate": 1.915108532545351e-08, |
|
"logits": -1.481737494468689, |
|
"logps": -82.04961395263672, |
|
"loss": 0.0395, |
|
"objective": 0.03058464638888836, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.030584635213017464, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.5338551998138428, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 21.722779837853974, |
|
"learning_rate": 1.6527564516331638e-08, |
|
"logits": -1.3470157384872437, |
|
"logps": -83.43151092529297, |
|
"loss": 0.0369, |
|
"objective": 0.030139055103063583, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.030139045789837837, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5377717614173889, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 23.027732641639304, |
|
"learning_rate": 1.4094215838229172e-08, |
|
"logits": -1.439835786819458, |
|
"logps": -83.44994354248047, |
|
"loss": 0.0373, |
|
"objective": 0.03681868314743042, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.036818671971559525, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.5387639999389648, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 22.893892489361072, |
|
"learning_rate": 1.1851996440033318e-08, |
|
"logits": -1.3366633653640747, |
|
"logps": -81.3759765625, |
|
"loss": 0.0369, |
|
"objective": 0.03668622300028801, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.03668620437383652, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5243638753890991, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 21.58395292653118, |
|
"learning_rate": 9.801788290621505e-09, |
|
"logits": -1.506198525428772, |
|
"logps": -83.259033203125, |
|
"loss": 0.0407, |
|
"objective": 0.041429486125707626, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.04142947867512703, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.5125473737716675, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 21.98641530853052, |
|
"learning_rate": 7.944397831941951e-09, |
|
"logits": -1.4062670469284058, |
|
"logps": -83.29720306396484, |
|
"loss": 0.0372, |
|
"objective": 0.03951678425073624, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.03951676934957504, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5017682909965515, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 21.972117419289066, |
|
"learning_rate": 6.280555661802856e-09, |
|
"logits": -1.423843264579773, |
|
"logps": -83.54265594482422, |
|
"loss": 0.0372, |
|
"objective": 0.03352176770567894, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.033521756529808044, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.5365482568740845, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 21.356793654139537, |
|
"learning_rate": 4.810916246494157e-09, |
|
"logits": -1.45553719997406, |
|
"logps": -83.4180679321289, |
|
"loss": 0.0383, |
|
"objective": 0.040656425058841705, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.04065641388297081, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5246464014053345, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 22.81185797664159, |
|
"learning_rate": 3.5360576633558513e-09, |
|
"logits": -1.4138314723968506, |
|
"logps": -82.19649505615234, |
|
"loss": 0.0355, |
|
"objective": 0.03642057999968529, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.03642057254910469, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 0.6829268932342529, |
|
"eval_logits": -1.4302399158477783, |
|
"eval_logps": -90.043212890625, |
|
"eval_loss": 0.40620195865631104, |
|
"eval_objective": 0.40770116448402405, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.40770116448402405, |
|
"eval_runtime": 259.1263, |
|
"eval_samples_per_second": 22.344, |
|
"eval_steps_per_second": 0.934, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5365470051765442, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 22.602716102552016, |
|
"learning_rate": 2.4564813733932155e-09, |
|
"logits": -1.3940719366073608, |
|
"logps": -82.6231460571289, |
|
"loss": 0.0347, |
|
"objective": 0.03581225126981735, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.03581221401691437, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5150249004364014, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 23.704671287447177, |
|
"learning_rate": 1.5726120240288631e-09, |
|
"logits": -1.3679381608963013, |
|
"logps": -82.33541870117188, |
|
"loss": 0.0348, |
|
"objective": 0.031035231426358223, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.031035220250487328, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.5223459005355835, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 21.42329131044869, |
|
"learning_rate": 8.847972820693051e-10, |
|
"logits": -1.4437813758850098, |
|
"logps": -81.53370666503906, |
|
"loss": 0.0355, |
|
"objective": 0.04200226441025734, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.042002253234386444, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5215969681739807, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 21.701501283901965, |
|
"learning_rate": 3.933076969516724e-10, |
|
"logits": -1.4914921522140503, |
|
"logps": -83.26063537597656, |
|
"loss": 0.0393, |
|
"objective": 0.04051649197936058, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.0405164435505867, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.5250566005706787, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 21.86259624413417, |
|
"learning_rate": 9.833659432367803e-11, |
|
"logits": -1.4107563495635986, |
|
"logps": -83.20445251464844, |
|
"loss": 0.0346, |
|
"objective": 0.027810534462332726, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.027810489758849144, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.520707905292511, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 23.229102177877856, |
|
"learning_rate": 0.0, |
|
"logits": -1.4621251821517944, |
|
"logps": -83.79481506347656, |
|
"loss": 0.035, |
|
"objective": 0.029516249895095825, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.02951624244451523, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1442635908045552, |
|
"train_runtime": 35242.7125, |
|
"train_samples_per_second": 7.207, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|