hZzy's picture
Model save
327f389 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 26.827203675535984,
"learning_rate": 1.1363636363636363e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6931466460227966,
"epoch": 0.02834199338686821,
"grad_norm": 26.655974166157932,
"learning_rate": 5.6818181818181815e-08,
"logits": -1.3678570985794067,
"logps": -84.42396545410156,
"loss": 0.413,
"objective": 0.3755497932434082,
"ranking_idealized": 0.6145833134651184,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 0.3755497932434082,
"step": 5
},
{
"dpo_loss": 0.6916109323501587,
"epoch": 0.05668398677373642,
"grad_norm": 25.202984552553435,
"learning_rate": 1.1363636363636363e-07,
"logits": -1.446576714515686,
"logps": -83.28290557861328,
"loss": 0.4165,
"objective": 0.4402167499065399,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 0.4402167499065399,
"step": 10
},
{
"dpo_loss": 0.6918571591377258,
"epoch": 0.08502598016060463,
"grad_norm": 24.8928017897937,
"learning_rate": 1.7045454545454543e-07,
"logits": -1.4129120111465454,
"logps": -83.23918151855469,
"loss": 0.423,
"objective": 0.40991583466529846,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.40991583466529846,
"step": 15
},
{
"dpo_loss": 0.6913864612579346,
"epoch": 0.11336797354747284,
"grad_norm": 26.1438361746268,
"learning_rate": 2.2727272727272726e-07,
"logits": -1.405305027961731,
"logps": -83.78267669677734,
"loss": 0.4098,
"objective": 0.4017895758152008,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.4017895758152008,
"step": 20
},
{
"dpo_loss": 0.6848570108413696,
"epoch": 0.14170996693434104,
"grad_norm": 26.79124275787855,
"learning_rate": 2.840909090909091e-07,
"logits": -1.4560821056365967,
"logps": -83.52696990966797,
"loss": 0.4034,
"objective": 0.41992515325546265,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.41992515325546265,
"step": 25
},
{
"dpo_loss": 0.6844711303710938,
"epoch": 0.17005196032120926,
"grad_norm": 26.78495469951858,
"learning_rate": 3.4090909090909085e-07,
"logits": -1.4348876476287842,
"logps": -84.22993469238281,
"loss": 0.4013,
"objective": 0.40435200929641724,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 0.40435200929641724,
"step": 30
},
{
"dpo_loss": 0.674633264541626,
"epoch": 0.19839395370807747,
"grad_norm": 27.550998188131874,
"learning_rate": 3.977272727272727e-07,
"logits": -1.4130500555038452,
"logps": -82.98973846435547,
"loss": 0.3925,
"objective": 0.37177178263664246,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.37177178263664246,
"step": 35
},
{
"dpo_loss": 0.6748062372207642,
"epoch": 0.22673594709494568,
"grad_norm": 30.08966136803542,
"learning_rate": 4.545454545454545e-07,
"logits": -1.4084281921386719,
"logps": -83.05668640136719,
"loss": 0.4041,
"objective": 0.4255501925945282,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.512499988079071,
"regularize": 0.4255501925945282,
"step": 40
},
{
"dpo_loss": 0.6630504727363586,
"epoch": 0.25507794048181387,
"grad_norm": 25.26840087998978,
"learning_rate": 5.113636363636363e-07,
"logits": -1.5426502227783203,
"logps": -84.47521209716797,
"loss": 0.3947,
"objective": 0.4412144422531128,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 0.4412144422531128,
"step": 45
},
{
"dpo_loss": 0.659989058971405,
"epoch": 0.2834199338686821,
"grad_norm": 24.465381128270387,
"learning_rate": 5.681818181818182e-07,
"logits": -1.4524168968200684,
"logps": -82.95875549316406,
"loss": 0.3854,
"objective": 0.364622563123703,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.4583333432674408,
"regularize": 0.364622563123703,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6886485815048218,
"eval_logits": -1.4800517559051514,
"eval_logps": -91.4064712524414,
"eval_loss": 0.4056198000907898,
"eval_objective": 0.4075882136821747,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 0.4075882136821747,
"eval_runtime": 265.1514,
"eval_samples_per_second": 21.837,
"eval_steps_per_second": 0.913,
"step": 50
},
{
"dpo_loss": 0.6636093258857727,
"epoch": 0.3117619272555503,
"grad_norm": 27.096857998186312,
"learning_rate": 6.249999999999999e-07,
"logits": -1.4970166683197021,
"logps": -85.03699493408203,
"loss": 0.3728,
"objective": 0.3725493848323822,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3725493848323822,
"step": 55
},
{
"dpo_loss": 0.6567211151123047,
"epoch": 0.3401039206424185,
"grad_norm": 25.695749312088278,
"learning_rate": 6.818181818181817e-07,
"logits": -1.4813398122787476,
"logps": -84.4722671508789,
"loss": 0.3599,
"objective": 0.3475739657878876,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5333333611488342,
"regularize": 0.3475739657878876,
"step": 60
},
{
"dpo_loss": 0.6518040895462036,
"epoch": 0.3684459140292867,
"grad_norm": 29.49986445883662,
"learning_rate": 7.386363636363636e-07,
"logits": -1.430372714996338,
"logps": -84.72962188720703,
"loss": 0.3497,
"objective": 0.345612108707428,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 0.345612108707428,
"step": 65
},
{
"dpo_loss": 0.6528828740119934,
"epoch": 0.39678790741615494,
"grad_norm": 29.563000130373773,
"learning_rate": 7.954545454545454e-07,
"logits": -1.5054484605789185,
"logps": -86.26591491699219,
"loss": 0.35,
"objective": 0.3871075510978699,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 0.3871075510978699,
"step": 70
},
{
"dpo_loss": 0.6483267545700073,
"epoch": 0.42512990080302315,
"grad_norm": 27.602858223257197,
"learning_rate": 8.522727272727273e-07,
"logits": -1.516791582107544,
"logps": -86.8262710571289,
"loss": 0.3468,
"objective": 0.3712550401687622,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3712550401687622,
"step": 75
},
{
"dpo_loss": 0.6363473534584045,
"epoch": 0.45347189418989137,
"grad_norm": 25.853451932249023,
"learning_rate": 9.09090909090909e-07,
"logits": -1.5554119348526,
"logps": -85.4685287475586,
"loss": 0.3352,
"objective": 0.3362359404563904,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 0.336235910654068,
"step": 80
},
{
"dpo_loss": 0.6442821025848389,
"epoch": 0.4818138875767596,
"grad_norm": 25.41070923572626,
"learning_rate": 9.65909090909091e-07,
"logits": -1.5026181936264038,
"logps": -84.45774841308594,
"loss": 0.3304,
"objective": 0.3429431617259979,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.550000011920929,
"regularize": 0.3429431617259979,
"step": 85
},
{
"dpo_loss": 0.6335326433181763,
"epoch": 0.5101558809636277,
"grad_norm": 25.187750521174056,
"learning_rate": 9.999842657116664e-07,
"logits": -1.2913075685501099,
"logps": -86.8448257446289,
"loss": 0.3243,
"objective": 0.32520177960395813,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5083333253860474,
"regularize": 0.32520177960395813,
"step": 90
},
{
"dpo_loss": 0.6084260940551758,
"epoch": 0.538497874350496,
"grad_norm": 23.93476735734447,
"learning_rate": 9.998072663403656e-07,
"logits": -1.3773174285888672,
"logps": -85.11380767822266,
"loss": 0.3036,
"objective": 0.3108121454715729,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3108121454715729,
"step": 95
},
{
"dpo_loss": 0.6009453535079956,
"epoch": 0.5668398677373642,
"grad_norm": 25.488579442690856,
"learning_rate": 9.99433669591504e-07,
"logits": -1.4631216526031494,
"logps": -85.5998764038086,
"loss": 0.3126,
"objective": 0.3375842273235321,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5625,
"regularize": 0.3375842273235321,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6816912293434143,
"eval_logits": -1.45261812210083,
"eval_logps": -91.31664276123047,
"eval_loss": 0.40215975046157837,
"eval_objective": 0.400903582572937,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.400903582572937,
"eval_runtime": 259.1884,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 0.934,
"step": 100
},
{
"dpo_loss": 0.5999605059623718,
"epoch": 0.5951818611242324,
"grad_norm": 25.38952651860073,
"learning_rate": 9.988636224180095e-07,
"logits": -1.352739930152893,
"logps": -85.40930938720703,
"loss": 0.3097,
"objective": 0.32598960399627686,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5791666507720947,
"regularize": 0.32598960399627686,
"step": 105
},
{
"dpo_loss": 0.6067489981651306,
"epoch": 0.6235238545111006,
"grad_norm": 31.045039069385457,
"learning_rate": 9.980973490458728e-07,
"logits": -1.5531387329101562,
"logps": -84.0550537109375,
"loss": 0.3104,
"objective": 0.3359374403953552,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.4958333373069763,
"regularize": 0.33593741059303284,
"step": 110
},
{
"dpo_loss": 0.6095985770225525,
"epoch": 0.6518658478979689,
"grad_norm": 26.435670420498003,
"learning_rate": 9.971351508859486e-07,
"logits": -1.5276844501495361,
"logps": -84.30924987792969,
"loss": 0.291,
"objective": 0.28773021697998047,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5041666626930237,
"regularize": 0.28773021697998047,
"step": 115
},
{
"dpo_loss": 0.6103960871696472,
"epoch": 0.680207841284837,
"grad_norm": 26.942509852249753,
"learning_rate": 9.959774064153975e-07,
"logits": -1.4677897691726685,
"logps": -84.61531066894531,
"loss": 0.2837,
"objective": 0.2627010643482208,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5541666746139526,
"regularize": 0.2627010643482208,
"step": 120
},
{
"dpo_loss": 0.5971355438232422,
"epoch": 0.7085498346717053,
"grad_norm": 25.495357006548982,
"learning_rate": 9.94624571028813e-07,
"logits": -1.4407005310058594,
"logps": -84.40795135498047,
"loss": 0.288,
"objective": 0.29481950402259827,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5874999761581421,
"regularize": 0.29481950402259827,
"step": 125
},
{
"dpo_loss": 0.5917614102363586,
"epoch": 0.7368918280585735,
"grad_norm": 27.139835865074275,
"learning_rate": 9.930771768590933e-07,
"logits": -1.5837173461914062,
"logps": -83.2771987915039,
"loss": 0.2887,
"objective": 0.2870228886604309,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.5041666626930237,
"regularize": 0.2870228886604309,
"step": 130
},
{
"dpo_loss": 0.6036564111709595,
"epoch": 0.7652338214454416,
"grad_norm": 24.259859808790555,
"learning_rate": 9.91335832568129e-07,
"logits": -1.528158187866211,
"logps": -85.43966674804688,
"loss": 0.2694,
"objective": 0.270797461271286,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5541666746139526,
"regularize": 0.270797461271286,
"step": 135
},
{
"dpo_loss": 0.596954345703125,
"epoch": 0.7935758148323099,
"grad_norm": 26.42799993318966,
"learning_rate": 9.894012231073895e-07,
"logits": -1.4152525663375854,
"logps": -86.42430114746094,
"loss": 0.2606,
"objective": 0.2631489038467407,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.550000011920929,
"regularize": 0.2631489038467407,
"step": 140
},
{
"dpo_loss": 0.58833909034729,
"epoch": 0.821917808219178,
"grad_norm": 26.472189025522844,
"learning_rate": 9.872741094484964e-07,
"logits": -1.5059914588928223,
"logps": -85.94861602783203,
"loss": 0.2555,
"objective": 0.2643609344959259,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5333333611488342,
"regularize": 0.2643609344959259,
"step": 145
},
{
"dpo_loss": 0.5924276113510132,
"epoch": 0.8502598016060463,
"grad_norm": 25.826528962819687,
"learning_rate": 9.849553282839024e-07,
"logits": -1.4773136377334595,
"logps": -84.33631134033203,
"loss": 0.2481,
"objective": 0.24327746033668518,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5874999761581421,
"regularize": 0.243277445435524,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6853220462799072,
"eval_logits": -1.478104829788208,
"eval_logps": -93.32852935791016,
"eval_loss": 0.4118410348892212,
"eval_objective": 0.41562050580978394,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5185950398445129,
"eval_regularize": 0.41562050580978394,
"eval_runtime": 260.1091,
"eval_samples_per_second": 22.26,
"eval_steps_per_second": 0.93,
"step": 150
},
{
"dpo_loss": 0.5857201814651489,
"epoch": 0.8786017949929145,
"grad_norm": 24.421694763767686,
"learning_rate": 9.824457916977784e-07,
"logits": -1.4784348011016846,
"logps": -84.23937225341797,
"loss": 0.25,
"objective": 0.24794721603393555,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5916666388511658,
"regularize": 0.24794721603393555,
"step": 155
},
{
"dpo_loss": 0.5842316746711731,
"epoch": 0.9069437883797827,
"grad_norm": 24.297754190889687,
"learning_rate": 9.797464868072486e-07,
"logits": -1.379388689994812,
"logps": -84.26329803466797,
"loss": 0.2417,
"objective": 0.23959442973136902,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5458333492279053,
"regularize": 0.23959442973136902,
"step": 160
},
{
"dpo_loss": 0.5881075263023376,
"epoch": 0.9352857817666509,
"grad_norm": 25.046440958455594,
"learning_rate": 9.768584753741134e-07,
"logits": -1.3925925493240356,
"logps": -85.05484771728516,
"loss": 0.2445,
"objective": 0.24838505685329437,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5583333373069763,
"regularize": 0.24838504195213318,
"step": 165
},
{
"dpo_loss": 0.5687467455863953,
"epoch": 0.9636277751535192,
"grad_norm": 24.80826032024146,
"learning_rate": 9.737828933872073e-07,
"logits": -1.440019130706787,
"logps": -85.22455596923828,
"loss": 0.2525,
"objective": 0.24621081352233887,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5625,
"regularize": 0.24621081352233887,
"step": 170
},
{
"dpo_loss": 0.5792465209960938,
"epoch": 0.9919697685403873,
"grad_norm": 25.657531696623572,
"learning_rate": 9.705209506155634e-07,
"logits": -1.3882230520248413,
"logps": -85.2247085571289,
"loss": 0.2408,
"objective": 0.2368970364332199,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6041666865348816,
"regularize": 0.2368970364332199,
"step": 175
},
{
"dpo_loss": 0.5573223233222961,
"epoch": 1.0203117619272555,
"grad_norm": 24.441555112350308,
"learning_rate": 9.670739301325534e-07,
"logits": -1.5630497932434082,
"logps": -84.3948745727539,
"loss": 0.2102,
"objective": 0.20754273235797882,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5958333611488342,
"regularize": 0.20754273235797882,
"step": 180
},
{
"dpo_loss": 0.5467338562011719,
"epoch": 1.0486537553141237,
"grad_norm": 26.114706754447813,
"learning_rate": 9.63443187811197e-07,
"logits": -1.4042932987213135,
"logps": -84.7653579711914,
"loss": 0.214,
"objective": 0.21694259345531464,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5458333492279053,
"regularize": 0.21694259345531464,
"step": 185
},
{
"dpo_loss": 0.5574190020561218,
"epoch": 1.076995748700992,
"grad_norm": 25.20524724848005,
"learning_rate": 9.596301517908328e-07,
"logits": -1.4538909196853638,
"logps": -85.65680694580078,
"loss": 0.2007,
"objective": 0.21142269670963287,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.625,
"regularize": 0.21142269670963287,
"step": 190
},
{
"dpo_loss": 0.561899721622467,
"epoch": 1.10533774208786,
"grad_norm": 28.03205694511378,
"learning_rate": 9.556363219153662e-07,
"logits": -1.435767650604248,
"logps": -84.88529968261719,
"loss": 0.2057,
"objective": 0.19679027795791626,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6166666746139526,
"regularize": 0.19679027795791626,
"step": 195
},
{
"dpo_loss": 0.5534842014312744,
"epoch": 1.1336797354747283,
"grad_norm": 23.06275024905121,
"learning_rate": 9.514632691433106e-07,
"logits": -1.517577052116394,
"logps": -83.62954711914062,
"loss": 0.1986,
"objective": 0.19466033577919006,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.574999988079071,
"regularize": 0.19466033577919006,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6827520132064819,
"eval_logits": -1.46909761428833,
"eval_logps": -90.6331558227539,
"eval_loss": 0.40533673763275146,
"eval_objective": 0.40887078642845154,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.40887078642845154,
"eval_runtime": 260.5987,
"eval_samples_per_second": 22.218,
"eval_steps_per_second": 0.929,
"step": 200
},
{
"dpo_loss": 0.5494053363800049,
"epoch": 1.1620217288615966,
"grad_norm": 22.941534169012083,
"learning_rate": 9.471126349298556e-07,
"logits": -1.5020116567611694,
"logps": -83.8444595336914,
"loss": 0.1994,
"objective": 0.19596201181411743,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5583333373069763,
"regularize": 0.19596201181411743,
"step": 205
},
{
"dpo_loss": 0.5515065789222717,
"epoch": 1.1903637222484649,
"grad_norm": 26.741821520067802,
"learning_rate": 9.425861305812081e-07,
"logits": -1.4875836372375488,
"logps": -83.98831176757812,
"loss": 0.1895,
"objective": 0.20510397851467133,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5541666746139526,
"regularize": 0.20510397851467133,
"step": 210
},
{
"dpo_loss": 0.55607670545578,
"epoch": 1.2187057156353331,
"grad_norm": 23.43637893497653,
"learning_rate": 9.378855365814557e-07,
"logits": -1.4646224975585938,
"logps": -83.52363586425781,
"loss": 0.1889,
"objective": 0.19153118133544922,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5708333253860474,
"regularize": 0.19153118133544922,
"step": 215
},
{
"dpo_loss": 0.556377112865448,
"epoch": 1.2470477090222012,
"grad_norm": 26.789286245107157,
"learning_rate": 9.330127018922193e-07,
"logits": -1.4145793914794922,
"logps": -82.84550476074219,
"loss": 0.1925,
"objective": 0.17143851518630981,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.6499999761581421,
"regularize": 0.17143851518630981,
"step": 220
},
{
"dpo_loss": 0.5455420613288879,
"epoch": 1.2753897024090695,
"grad_norm": 25.237511413060258,
"learning_rate": 9.279695432253708e-07,
"logits": -1.4910824298858643,
"logps": -84.51390075683594,
"loss": 0.1898,
"objective": 0.1823263168334961,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6083333492279053,
"regularize": 0.1823263168334961,
"step": 225
},
{
"dpo_loss": 0.5552546381950378,
"epoch": 1.3037316957959377,
"grad_norm": 23.65942718982369,
"learning_rate": 9.227580442891021e-07,
"logits": -1.4593993425369263,
"logps": -84.47645568847656,
"loss": 0.1809,
"objective": 0.17018872499465942,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.550000011920929,
"regularize": 0.17018872499465942,
"step": 230
},
{
"dpo_loss": 0.5385202169418335,
"epoch": 1.3320736891828058,
"grad_norm": 25.266299893397434,
"learning_rate": 9.173802550076401e-07,
"logits": -1.5345088243484497,
"logps": -82.98789978027344,
"loss": 0.1789,
"objective": 0.1734149307012558,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5833333134651184,
"regularize": 0.1734149307012558,
"step": 235
},
{
"dpo_loss": 0.5434895157814026,
"epoch": 1.360415682569674,
"grad_norm": 25.750551600333242,
"learning_rate": 9.118382907149163e-07,
"logits": -1.4756948947906494,
"logps": -84.32857513427734,
"loss": 0.1742,
"objective": 0.1837477833032608,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5583333373069763,
"regularize": 0.1837477684020996,
"step": 240
},
{
"dpo_loss": 0.5604755878448486,
"epoch": 1.3887576759565423,
"grad_norm": 24.129154340629153,
"learning_rate": 9.061343313225087e-07,
"logits": -1.4909014701843262,
"logps": -83.4426498413086,
"loss": 0.1789,
"objective": 0.1817345917224884,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.1817345917224884,
"step": 245
},
{
"dpo_loss": 0.5357322692871094,
"epoch": 1.4170996693434104,
"grad_norm": 24.16224594925354,
"learning_rate": 9.002706204621802e-07,
"logits": -1.4255733489990234,
"logps": -82.65512084960938,
"loss": 0.1805,
"objective": 0.17317816615104675,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5541666746139526,
"regularize": 0.17317816615104675,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6830819249153137,
"eval_logits": -1.464825987815857,
"eval_logps": -90.24966430664062,
"eval_loss": 0.4085530936717987,
"eval_objective": 0.4083588719367981,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.4083588719367981,
"eval_runtime": 262.2655,
"eval_samples_per_second": 22.077,
"eval_steps_per_second": 0.923,
"step": 250
},
{
"dpo_loss": 0.5522000193595886,
"epoch": 1.4454416627302786,
"grad_norm": 23.544028131135565,
"learning_rate": 8.942494646033554e-07,
"logits": -1.428904414176941,
"logps": -83.82772827148438,
"loss": 0.1816,
"objective": 0.16092044115066528,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.612500011920929,
"regularize": 0.16092044115066528,
"step": 255
},
{
"dpo_loss": 0.5535964369773865,
"epoch": 1.473783656117147,
"grad_norm": 24.007017906906484,
"learning_rate": 8.880732321458784e-07,
"logits": -1.4904005527496338,
"logps": -83.97267150878906,
"loss": 0.1703,
"objective": 0.16837134957313538,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.550000011920929,
"regularize": 0.16837134957313538,
"step": 260
},
{
"dpo_loss": 0.5446482300758362,
"epoch": 1.5021256495040152,
"grad_norm": 24.30764382402002,
"learning_rate": 8.817443524884117e-07,
"logits": -1.4601694345474243,
"logps": -82.12098693847656,
"loss": 0.1781,
"objective": 0.17031626403331757,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5708333253860474,
"regularize": 0.17031626403331757,
"step": 265
},
{
"dpo_loss": 0.5536972284317017,
"epoch": 1.5304676428908834,
"grad_norm": 24.675134737686058,
"learning_rate": 8.752653150728411e-07,
"logits": -1.471502661705017,
"logps": -84.13450622558594,
"loss": 0.1758,
"objective": 0.18668265640735626,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5708333253860474,
"regularize": 0.18668265640735626,
"step": 270
},
{
"dpo_loss": 0.5547968745231628,
"epoch": 1.5588096362777515,
"grad_norm": 22.77808390233293,
"learning_rate": 8.68638668405062e-07,
"logits": -1.4670997858047485,
"logps": -85.27931213378906,
"loss": 0.171,
"objective": 0.16611038148403168,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.16611038148403168,
"step": 275
},
{
"dpo_loss": 0.5309798717498779,
"epoch": 1.5871516296646198,
"grad_norm": 22.23255904480611,
"learning_rate": 8.61867019052535e-07,
"logits": -1.387014389038086,
"logps": -83.47966766357422,
"loss": 0.1731,
"objective": 0.18033398687839508,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5874999761581421,
"regularize": 0.18033398687839508,
"step": 280
},
{
"dpo_loss": 0.5369495749473572,
"epoch": 1.615493623051488,
"grad_norm": 24.7467519907843,
"learning_rate": 8.549530306190014e-07,
"logits": -1.4981027841567993,
"logps": -85.08309936523438,
"loss": 0.1613,
"objective": 0.15606491267681122,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5958333611488342,
"regularize": 0.15606491267681122,
"step": 285
},
{
"dpo_loss": 0.5465491414070129,
"epoch": 1.643835616438356,
"grad_norm": 22.280063793784098,
"learning_rate": 8.478994226967638e-07,
"logits": -1.5392872095108032,
"logps": -82.96480560302734,
"loss": 0.1639,
"objective": 0.1686221808195114,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1686221808195114,
"step": 290
},
{
"dpo_loss": 0.5326969623565674,
"epoch": 1.6721776098252243,
"grad_norm": 22.516708106368693,
"learning_rate": 8.407089697969456e-07,
"logits": -1.430370569229126,
"logps": -81.40605926513672,
"loss": 0.1651,
"objective": 0.16238288581371307,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.16238288581371307,
"step": 295
},
{
"dpo_loss": 0.5438053011894226,
"epoch": 1.7005196032120926,
"grad_norm": 22.982971147438153,
"learning_rate": 8.333845002581458e-07,
"logits": -1.5061898231506348,
"logps": -82.67247009277344,
"loss": 0.1668,
"objective": 0.19721931219100952,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.19721931219100952,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6841849088668823,
"eval_logits": -1.476090669631958,
"eval_logps": -89.86566162109375,
"eval_loss": 0.4079909026622772,
"eval_objective": 0.4113588035106659,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.4113588035106659,
"eval_runtime": 259.3673,
"eval_samples_per_second": 22.324,
"eval_steps_per_second": 0.933,
"step": 300
},
{
"dpo_loss": 0.5529495477676392,
"epoch": 1.7288615965989607,
"grad_norm": 23.962805989899444,
"learning_rate": 8.259288951339232e-07,
"logits": -1.4737364053726196,
"logps": -83.48453521728516,
"loss": 0.1635,
"objective": 0.17988164722919464,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5583333373069763,
"regularize": 0.17988164722919464,
"step": 305
},
{
"dpo_loss": 0.5436158776283264,
"epoch": 1.7572035899858292,
"grad_norm": 26.010266526035746,
"learning_rate": 8.183450870595441e-07,
"logits": -1.5402640104293823,
"logps": -81.41146087646484,
"loss": 0.1725,
"objective": 0.16945843398571014,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.16945843398571014,
"step": 310
},
{
"dpo_loss": 0.5490608811378479,
"epoch": 1.7855455833726972,
"grad_norm": 23.214852755265355,
"learning_rate": 8.106360590984404e-07,
"logits": -1.4412391185760498,
"logps": -82.86125946044922,
"loss": 0.1609,
"objective": 0.15798324346542358,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.15798324346542358,
"step": 315
},
{
"dpo_loss": 0.5580403208732605,
"epoch": 1.8138875767595655,
"grad_norm": 25.270172487230024,
"learning_rate": 8.028048435688333e-07,
"logits": -1.489629864692688,
"logps": -84.82173156738281,
"loss": 0.1562,
"objective": 0.15719416737556458,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5958333611488342,
"regularize": 0.15719416737556458,
"step": 320
},
{
"dpo_loss": 0.5307654142379761,
"epoch": 1.8422295701464337,
"grad_norm": 24.866617020536584,
"learning_rate": 7.948545208509811e-07,
"logits": -1.5223475694656372,
"logps": -85.49372100830078,
"loss": 0.1605,
"objective": 0.15138211846351624,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6291666626930237,
"regularize": 0.15138211846351624,
"step": 325
},
{
"dpo_loss": 0.5346109867095947,
"epoch": 1.8705715635333018,
"grad_norm": 27.77712533482603,
"learning_rate": 7.86788218175523e-07,
"logits": -1.282273769378662,
"logps": -83.1356201171875,
"loss": 0.1554,
"objective": 0.14494642615318298,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5583333373069763,
"regularize": 0.14494642615318298,
"step": 330
},
{
"dpo_loss": 0.5577983260154724,
"epoch": 1.89891355692017,
"grad_norm": 23.806319516884738,
"learning_rate": 7.786091083933949e-07,
"logits": -1.4557408094406128,
"logps": -83.1150131225586,
"loss": 0.1472,
"objective": 0.14962820708751678,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.14962820708751678,
"step": 335
},
{
"dpo_loss": 0.548663318157196,
"epoch": 1.9272555503070383,
"grad_norm": 25.2807889158847,
"learning_rate": 7.703204087277988e-07,
"logits": -1.463193416595459,
"logps": -85.10281372070312,
"loss": 0.1416,
"objective": 0.14199069142341614,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6166666746139526,
"regularize": 0.14199069142341614,
"step": 340
},
{
"dpo_loss": 0.5481914281845093,
"epoch": 1.9555975436939064,
"grad_norm": 23.034113253398804,
"learning_rate": 7.619253795087208e-07,
"logits": -1.4545904397964478,
"logps": -83.42992401123047,
"loss": 0.1457,
"objective": 0.13813456892967224,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5833333134651184,
"regularize": 0.13813456892967224,
"step": 345
},
{
"dpo_loss": 0.5435228943824768,
"epoch": 1.9839395370807746,
"grad_norm": 25.493404234037047,
"learning_rate": 7.534273228904915e-07,
"logits": -1.3632704019546509,
"logps": -84.23902893066406,
"loss": 0.1476,
"objective": 0.13394585251808167,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5625,
"regularize": 0.13394585251808167,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6835209131240845,
"eval_logits": -1.4348496198654175,
"eval_logps": -89.60076904296875,
"eval_loss": 0.4086475670337677,
"eval_objective": 0.4084475636482239,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.4084475636482239,
"eval_runtime": 259.621,
"eval_samples_per_second": 22.302,
"eval_steps_per_second": 0.932,
"step": 350
},
{
"dpo_loss": 0.5331315994262695,
"epoch": 2.012281530467643,
"grad_norm": 22.16231721451118,
"learning_rate": 7.448295815528956e-07,
"logits": -1.3494775295257568,
"logps": -82.90995788574219,
"loss": 0.1455,
"objective": 0.1512984335422516,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1512984186410904,
"step": 355
},
{
"dpo_loss": 0.5351840853691101,
"epoch": 2.040623523854511,
"grad_norm": 25.1718748641759,
"learning_rate": 7.361355373863413e-07,
"logits": -1.393783688545227,
"logps": -81.44464874267578,
"loss": 0.1343,
"objective": 0.1370130479335785,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.637499988079071,
"regularize": 0.1370130479335785,
"step": 360
},
{
"dpo_loss": 0.5345187783241272,
"epoch": 2.0689655172413794,
"grad_norm": 24.179993370065525,
"learning_rate": 7.273486101616056e-07,
"logits": -1.474308729171753,
"logps": -83.76331329345703,
"loss": 0.1347,
"objective": 0.1313287615776062,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1313287615776062,
"step": 365
},
{
"dpo_loss": 0.5465765595436096,
"epoch": 2.0973075106282475,
"grad_norm": 23.72652550591992,
"learning_rate": 7.184722561846797e-07,
"logits": -1.4518685340881348,
"logps": -81.55240631103516,
"loss": 0.124,
"objective": 0.1166418269276619,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5541666746139526,
"regularize": 0.1166418269276619,
"step": 370
},
{
"dpo_loss": 0.5262054204940796,
"epoch": 2.1256495040151155,
"grad_norm": 24.679557221698076,
"learning_rate": 7.095099669372443e-07,
"logits": -1.4321234226226807,
"logps": -83.55628967285156,
"loss": 0.1283,
"objective": 0.12942390143871307,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6625000238418579,
"regularize": 0.12942390143871307,
"step": 375
},
{
"dpo_loss": 0.5403919219970703,
"epoch": 2.153991497401984,
"grad_norm": 23.122876869258256,
"learning_rate": 7.004652677033068e-07,
"logits": -1.338428020477295,
"logps": -82.6377182006836,
"loss": 0.1281,
"objective": 0.10954796522855759,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.10954796522855759,
"step": 380
},
{
"dpo_loss": 0.5505331754684448,
"epoch": 2.182333490788852,
"grad_norm": 22.25736511993951,
"learning_rate": 6.913417161825449e-07,
"logits": -1.4360421895980835,
"logps": -84.50902557373047,
"loss": 0.1236,
"objective": 0.11411557346582413,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.574999988079071,
"regularize": 0.11411556601524353,
"step": 385
},
{
"dpo_loss": 0.5398189425468445,
"epoch": 2.21067548417572,
"grad_norm": 23.82479611784211,
"learning_rate": 6.821429010908971e-07,
"logits": -1.336391806602478,
"logps": -83.15116882324219,
"loss": 0.1245,
"objective": 0.1218627318739891,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5375000238418579,
"regularize": 0.1218627318739891,
"step": 390
},
{
"dpo_loss": 0.5215297341346741,
"epoch": 2.2390174775625886,
"grad_norm": 22.591578381119685,
"learning_rate": 6.728724407489553e-07,
"logits": -1.3484855890274048,
"logps": -83.57234954833984,
"loss": 0.1263,
"objective": 0.1272638440132141,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5916666388511658,
"regularize": 0.12726382911205292,
"step": 395
},
{
"dpo_loss": 0.5246094465255737,
"epoch": 2.2673594709494567,
"grad_norm": 22.99471999109431,
"learning_rate": 6.635339816587108e-07,
"logits": -1.4181877374649048,
"logps": -84.8980712890625,
"loss": 0.1232,
"objective": 0.1278635859489441,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.675000011920929,
"regularize": 0.1278635859489441,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6825190186500549,
"eval_logits": -1.4141640663146973,
"eval_logps": -89.93671417236328,
"eval_loss": 0.40635946393013,
"eval_objective": 0.4059920310974121,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.4059920310974121,
"eval_runtime": 259.3604,
"eval_samples_per_second": 22.324,
"eval_steps_per_second": 0.933,
"step": 400
},
{
"dpo_loss": 0.5345380902290344,
"epoch": 2.295701464336325,
"grad_norm": 22.452414561821904,
"learning_rate": 6.541311970692162e-07,
"logits": -1.484344720840454,
"logps": -82.7432861328125,
"loss": 0.1237,
"objective": 0.1316702663898468,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6541666388511658,
"regularize": 0.1316702663898468,
"step": 405
},
{
"dpo_loss": 0.5351517200469971,
"epoch": 2.324043457723193,
"grad_norm": 24.318684153528356,
"learning_rate": 6.446677855317264e-07,
"logits": -1.3660470247268677,
"logps": -82.44485473632812,
"loss": 0.1164,
"objective": 0.11186593025922775,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5583333373069763,
"regularize": 0.11186593025922775,
"step": 410
},
{
"dpo_loss": 0.5329793691635132,
"epoch": 2.3523854511100613,
"grad_norm": 22.50760313963993,
"learning_rate": 6.351474694448864e-07,
"logits": -1.437878131866455,
"logps": -83.41373443603516,
"loss": 0.1186,
"objective": 0.12183640152215958,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5708333253860474,
"regularize": 0.12183640152215958,
"step": 415
},
{
"dpo_loss": 0.5409477949142456,
"epoch": 2.3807274444969297,
"grad_norm": 23.39263075574448,
"learning_rate": 6.255739935905395e-07,
"logits": -1.349250078201294,
"logps": -85.22098541259766,
"loss": 0.1175,
"objective": 0.10631230473518372,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.550000011920929,
"regularize": 0.10631229728460312,
"step": 420
},
{
"dpo_loss": 0.5271558165550232,
"epoch": 2.409069437883798,
"grad_norm": 23.840070879325513,
"learning_rate": 6.159511236607315e-07,
"logits": -1.4124720096588135,
"logps": -84.24110412597656,
"loss": 0.1153,
"objective": 0.11380250006914139,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5958333611488342,
"regularize": 0.11380250006914139,
"step": 425
},
{
"dpo_loss": 0.5327500700950623,
"epoch": 2.4374114312706663,
"grad_norm": 22.9996288815754,
"learning_rate": 6.062826447764883e-07,
"logits": -1.4347702264785767,
"logps": -84.58445739746094,
"loss": 0.1076,
"objective": 0.10175766050815582,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.10175765305757523,
"step": 430
},
{
"dpo_loss": 0.5315712690353394,
"epoch": 2.4657534246575343,
"grad_norm": 22.21161853218669,
"learning_rate": 5.965723599989528e-07,
"logits": -1.4599779844284058,
"logps": -84.16157531738281,
"loss": 0.1148,
"objective": 0.11776481568813324,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5916666388511658,
"regularize": 0.11776480078697205,
"step": 435
},
{
"dpo_loss": 0.5355103611946106,
"epoch": 2.4940954180444024,
"grad_norm": 23.031781845673333,
"learning_rate": 5.868240888334652e-07,
"logits": -1.385536789894104,
"logps": -83.61788940429688,
"loss": 0.1125,
"objective": 0.11075066775083542,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5708333253860474,
"regularize": 0.11075066775083542,
"step": 440
},
{
"dpo_loss": 0.5411112904548645,
"epoch": 2.5224374114312704,
"grad_norm": 25.203231448824464,
"learning_rate": 5.770416657271728e-07,
"logits": -1.4106037616729736,
"logps": -81.53707885742188,
"loss": 0.1119,
"objective": 0.11902200430631638,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6041666865348816,
"regularize": 0.11902199685573578,
"step": 445
},
{
"dpo_loss": 0.55417400598526,
"epoch": 2.550779404818139,
"grad_norm": 24.455868446022734,
"learning_rate": 5.67228938560766e-07,
"logits": -1.4431836605072021,
"logps": -83.54483795166016,
"loss": 0.1085,
"objective": 0.10727948695421219,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5333333611488342,
"regularize": 0.10727948695421219,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6829083561897278,
"eval_logits": -1.4380848407745361,
"eval_logps": -90.61122131347656,
"eval_loss": 0.40571001172065735,
"eval_objective": 0.406777560710907,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.406777560710907,
"eval_runtime": 259.0425,
"eval_samples_per_second": 22.352,
"eval_steps_per_second": 0.934,
"step": 450
},
{
"dpo_loss": 0.5320044159889221,
"epoch": 2.579121398205007,
"grad_norm": 22.906053050143626,
"learning_rate": 5.573897671349268e-07,
"logits": -1.4764381647109985,
"logps": -84.27240753173828,
"loss": 0.1117,
"objective": 0.11940006166696548,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.574999988079071,
"regularize": 0.11940006166696548,
"step": 455
},
{
"dpo_loss": 0.5233482122421265,
"epoch": 2.6074633915918755,
"grad_norm": 22.258361780067798,
"learning_rate": 5.475280216520912e-07,
"logits": -1.5429632663726807,
"logps": -84.30569458007812,
"loss": 0.1103,
"objective": 0.10580132901668549,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10580132901668549,
"step": 460
},
{
"dpo_loss": 0.5289517641067505,
"epoch": 2.6358053849787435,
"grad_norm": 23.240912033270092,
"learning_rate": 5.376475811941191e-07,
"logits": -1.428727388381958,
"logps": -83.95030212402344,
"loss": 0.1071,
"objective": 0.10987317562103271,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5541666746139526,
"regularize": 0.10987316071987152,
"step": 465
},
{
"dpo_loss": 0.5318377614021301,
"epoch": 2.6641473783656116,
"grad_norm": 24.64272982925985,
"learning_rate": 5.277523321964701e-07,
"logits": -1.4431354999542236,
"logps": -83.10697937011719,
"loss": 0.105,
"objective": 0.1006205826997757,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.1006205826997757,
"step": 470
},
{
"dpo_loss": 0.5384759306907654,
"epoch": 2.69248937175248,
"grad_norm": 21.994194573090148,
"learning_rate": 5.178461669194903e-07,
"logits": -1.4019439220428467,
"logps": -82.92670440673828,
"loss": 0.101,
"objective": 0.0988389179110527,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5916666388511658,
"regularize": 0.0988389179110527,
"step": 475
},
{
"dpo_loss": 0.539698600769043,
"epoch": 2.720831365139348,
"grad_norm": 24.874583032447394,
"learning_rate": 5.07932981917404e-07,
"logits": -1.5038942098617554,
"logps": -82.17936706542969,
"loss": 0.1017,
"objective": 0.10505501180887222,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5791666507720947,
"regularize": 0.10505500435829163,
"step": 480
},
{
"dpo_loss": 0.5295734405517578,
"epoch": 2.7491733585262166,
"grad_norm": 24.131350896743502,
"learning_rate": 4.980166765056193e-07,
"logits": -1.4220199584960938,
"logps": -84.46988677978516,
"loss": 0.1033,
"objective": 0.10565243661403656,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10565243661403656,
"step": 485
},
{
"dpo_loss": 0.5229516625404358,
"epoch": 2.7775153519130846,
"grad_norm": 23.380731245805677,
"learning_rate": 4.881011512269463e-07,
"logits": -1.4164656400680542,
"logps": -82.1783676147461,
"loss": 0.1056,
"objective": 0.10975264012813568,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10975264012813568,
"step": 490
},
{
"dpo_loss": 0.536858856678009,
"epoch": 2.8058573452999527,
"grad_norm": 25.010956720921584,
"learning_rate": 4.78190306317332e-07,
"logits": -1.4320250749588013,
"logps": -81.11976623535156,
"loss": 0.0977,
"objective": 0.09322524815797806,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.09322523325681686,
"step": 495
},
{
"dpo_loss": 0.5231731534004211,
"epoch": 2.8341993386868207,
"grad_norm": 24.981319167329183,
"learning_rate": 4.682880401717177e-07,
"logits": -1.479564905166626,
"logps": -80.21460723876953,
"loss": 0.099,
"objective": 0.09580207616090775,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6083333492279053,
"regularize": 0.09580207616090775,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6836758255958557,
"eval_logits": -1.4538483619689941,
"eval_logps": -89.78665924072266,
"eval_loss": 0.4075116813182831,
"eval_objective": 0.40899595618247986,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.40899595618247986,
"eval_runtime": 259.2881,
"eval_samples_per_second": 22.33,
"eval_steps_per_second": 0.933,
"step": 500
},
{
"dpo_loss": 0.5333107113838196,
"epoch": 2.862541332073689,
"grad_norm": 22.440897537859303,
"learning_rate": 4.5839824781061886e-07,
"logits": -1.4319252967834473,
"logps": -82.19851684570312,
"loss": 0.0974,
"objective": 0.09931109100580215,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.612500011920929,
"regularize": 0.09931109100580215,
"step": 505
},
{
"dpo_loss": 0.5374515056610107,
"epoch": 2.8908833254605573,
"grad_norm": 22.71050128727261,
"learning_rate": 4.4852481934803277e-07,
"logits": -1.3620020151138306,
"logps": -82.26110076904297,
"loss": 0.0964,
"objective": 0.10236553847789764,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5916666388511658,
"regularize": 0.10236553847789764,
"step": 510
},
{
"dpo_loss": 0.5393768548965454,
"epoch": 2.9192253188474258,
"grad_norm": 26.294279777028603,
"learning_rate": 4.3867163846127674e-07,
"logits": -1.5220664739608765,
"logps": -82.21379852294922,
"loss": 0.0962,
"objective": 0.09978827089071274,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5833333134651184,
"regularize": 0.09978827089071274,
"step": 515
},
{
"dpo_loss": 0.5407862067222595,
"epoch": 2.947567312234294,
"grad_norm": 22.719373903401866,
"learning_rate": 4.2884258086335745e-07,
"logits": -1.4105883836746216,
"logps": -84.40800476074219,
"loss": 0.0917,
"objective": 0.08780403435230255,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.08780403435230255,
"step": 520
},
{
"dpo_loss": 0.5382903814315796,
"epoch": 2.975909305621162,
"grad_norm": 22.439739653406917,
"learning_rate": 4.1904151277847305e-07,
"logits": -1.3989008665084839,
"logps": -83.13529205322266,
"loss": 0.0909,
"objective": 0.10328014940023422,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.637499988079071,
"regularize": 0.10328014940023422,
"step": 525
},
{
"dpo_loss": 0.5224732756614685,
"epoch": 3.0042512990080303,
"grad_norm": 23.2445043242505,
"learning_rate": 4.092722894212487e-07,
"logits": -1.4099732637405396,
"logps": -82.2646484375,
"loss": 0.0906,
"objective": 0.08990009129047394,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6000000238418579,
"regularize": 0.08990008383989334,
"step": 530
},
{
"dpo_loss": 0.5297616720199585,
"epoch": 3.0325932923948984,
"grad_norm": 24.595241433656245,
"learning_rate": 3.995387534803005e-07,
"logits": -1.4481351375579834,
"logps": -84.04501342773438,
"loss": 0.0863,
"objective": 0.09028714150190353,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6208333373069763,
"regularize": 0.09028714150190353,
"step": 535
},
{
"dpo_loss": 0.5298644304275513,
"epoch": 3.0609352857817664,
"grad_norm": 22.819470538427282,
"learning_rate": 3.8984473360672967e-07,
"logits": -1.5335410833358765,
"logps": -82.01764678955078,
"loss": 0.0786,
"objective": 0.07253900170326233,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6291666626930237,
"regularize": 0.07253900170326233,
"step": 540
},
{
"dpo_loss": 0.5357497930526733,
"epoch": 3.089277279168635,
"grad_norm": 23.587959979388312,
"learning_rate": 3.801940429081345e-07,
"logits": -1.475661039352417,
"logps": -83.04609680175781,
"loss": 0.0786,
"objective": 0.08452685922384262,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5666666626930237,
"regularize": 0.08452685922384262,
"step": 545
},
{
"dpo_loss": 0.5293916463851929,
"epoch": 3.117619272555503,
"grad_norm": 23.742387802519247,
"learning_rate": 3.7059047744873955e-07,
"logits": -1.3145067691802979,
"logps": -83.14439392089844,
"loss": 0.0841,
"objective": 0.07637524604797363,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.612500011920929,
"regularize": 0.07637524604797363,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.6836426854133606,
"eval_logits": -1.4287773370742798,
"eval_logps": -89.19234466552734,
"eval_loss": 0.4074074625968933,
"eval_objective": 0.4091208279132843,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5268595218658447,
"eval_regularize": 0.4091208279132843,
"eval_runtime": 259.888,
"eval_samples_per_second": 22.279,
"eval_steps_per_second": 0.931,
"step": 550
},
{
"dpo_loss": 0.5199671983718872,
"epoch": 3.1459612659423715,
"grad_norm": 23.606389156724106,
"learning_rate": 3.6103781475622786e-07,
"logits": -1.4020836353302002,
"logps": -83.6429214477539,
"loss": 0.0826,
"objective": 0.08424239605665207,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.637499988079071,
"regularize": 0.08424239605665207,
"step": 555
},
{
"dpo_loss": 0.5297064185142517,
"epoch": 3.1743032593292395,
"grad_norm": 21.283296032324174,
"learning_rate": 3.5153981233586274e-07,
"logits": -1.375638484954834,
"logps": -80.67549896240234,
"loss": 0.0764,
"objective": 0.0754186362028122,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5625,
"regularize": 0.0754186362028122,
"step": 560
},
{
"dpo_loss": 0.5281550884246826,
"epoch": 3.2026452527161076,
"grad_norm": 25.383548239078706,
"learning_rate": 3.421002061924876e-07,
"logits": -1.4403051137924194,
"logps": -82.08113098144531,
"loss": 0.0745,
"objective": 0.0825800895690918,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6333333253860474,
"regularize": 0.0825800821185112,
"step": 565
},
{
"dpo_loss": 0.5239064693450928,
"epoch": 3.230987246102976,
"grad_norm": 25.606035120731306,
"learning_rate": 3.327227093609824e-07,
"logits": -1.3596783876419067,
"logps": -82.14395141601562,
"loss": 0.0748,
"objective": 0.07690493017435074,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5708333253860474,
"regularize": 0.07690493017435074,
"step": 570
},
{
"dpo_loss": 0.5288205146789551,
"epoch": 3.259329239489844,
"grad_norm": 21.36265788871065,
"learning_rate": 3.234110104457536e-07,
"logits": -1.4363545179367065,
"logps": -82.7227554321289,
"loss": 0.0765,
"objective": 0.08387748897075653,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5833333134651184,
"regularize": 0.08387748897075653,
"step": 575
},
{
"dpo_loss": 0.5455772876739502,
"epoch": 3.287671232876712,
"grad_norm": 22.23742629967835,
"learning_rate": 3.141687721698363e-07,
"logits": -1.4502298831939697,
"logps": -83.70122528076172,
"loss": 0.074,
"objective": 0.0667726993560791,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.612500011920929,
"regularize": 0.0667726919054985,
"step": 580
},
{
"dpo_loss": 0.5377206206321716,
"epoch": 3.3160132262635806,
"grad_norm": 21.976427115545793,
"learning_rate": 3.049996299341742e-07,
"logits": -1.478832483291626,
"logps": -84.10258483886719,
"loss": 0.074,
"objective": 0.07396882027387619,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6291666626930237,
"regularize": 0.07396882027387619,
"step": 585
},
{
"dpo_loss": 0.540601372718811,
"epoch": 3.3443552196504487,
"grad_norm": 24.248150339564425,
"learning_rate": 2.959071903876486e-07,
"logits": -1.490022897720337,
"logps": -84.40371704101562,
"loss": 0.0726,
"objective": 0.06912810355424881,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5833333134651184,
"regularize": 0.06912810355424881,
"step": 590
},
{
"dpo_loss": 0.530450165271759,
"epoch": 3.372697213037317,
"grad_norm": 23.904834128431904,
"learning_rate": 2.86895030008416e-07,
"logits": -1.4088099002838135,
"logps": -83.5683822631836,
"loss": 0.0716,
"objective": 0.06921317428350449,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.06921316683292389,
"step": 595
},
{
"dpo_loss": 0.510923445224762,
"epoch": 3.4010392064241852,
"grad_norm": 21.999466319441446,
"learning_rate": 2.779666936971129e-07,
"logits": -1.4195644855499268,
"logps": -83.0455551147461,
"loss": 0.0673,
"objective": 0.06648312509059906,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5874999761581421,
"regularize": 0.06648311764001846,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.6824304461479187,
"eval_logits": -1.4325991868972778,
"eval_logps": -89.83067321777344,
"eval_loss": 0.40557217597961426,
"eval_objective": 0.40685591101646423,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.40685591101646423,
"eval_runtime": 259.0599,
"eval_samples_per_second": 22.35,
"eval_steps_per_second": 0.934,
"step": 600
},
{
"dpo_loss": 0.5408468246459961,
"epoch": 3.4293811998110533,
"grad_norm": 21.826287125403734,
"learning_rate": 2.6912569338248315e-07,
"logits": -1.4806511402130127,
"logps": -85.08236694335938,
"loss": 0.0687,
"objective": 0.0708792433142662,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5874999761581421,
"regularize": 0.0708792433142662,
"step": 605
},
{
"dpo_loss": 0.5326560139656067,
"epoch": 3.4577231931979218,
"grad_norm": 23.721876415078565,
"learning_rate": 2.603755066399718e-07,
"logits": -1.4362066984176636,
"logps": -83.59281158447266,
"loss": 0.0693,
"objective": 0.06495842337608337,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.574999988079071,
"regularize": 0.06495841592550278,
"step": 610
},
{
"dpo_loss": 0.5220057964324951,
"epoch": 3.48606518658479,
"grad_norm": 23.867397255620617,
"learning_rate": 2.517195753238345e-07,
"logits": -1.459093451499939,
"logps": -83.89041137695312,
"loss": 0.0677,
"objective": 0.06726350635290146,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5458333492279053,
"regularize": 0.06726350635290146,
"step": 615
},
{
"dpo_loss": 0.5138709545135498,
"epoch": 3.514407179971658,
"grad_norm": 22.48517117265223,
"learning_rate": 2.4316130421329696e-07,
"logits": -1.3361726999282837,
"logps": -83.23828887939453,
"loss": 0.0661,
"objective": 0.05854518711566925,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.058545153588056564,
"step": 620
},
{
"dpo_loss": 0.5306848287582397,
"epoch": 3.5427491733585263,
"grad_norm": 22.374230054745578,
"learning_rate": 2.3470405967329604e-07,
"logits": -1.406466007232666,
"logps": -82.32576751708984,
"loss": 0.0639,
"objective": 0.06265277415513992,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.06265277415513992,
"step": 625
},
{
"dpo_loss": 0.5401536226272583,
"epoch": 3.5710911667453944,
"grad_norm": 22.323503974192004,
"learning_rate": 2.2635116833033392e-07,
"logits": -1.4880479574203491,
"logps": -82.74535369873047,
"loss": 0.0671,
"objective": 0.06858905404806137,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5874999761581421,
"regularize": 0.06858905404806137,
"step": 630
},
{
"dpo_loss": 0.5335288643836975,
"epoch": 3.5994331601322624,
"grad_norm": 22.950166480099814,
"learning_rate": 2.181059157639598e-07,
"logits": -1.426721215248108,
"logps": -82.85971069335938,
"loss": 0.06,
"objective": 0.0622558668255806,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5708333253860474,
"regularize": 0.062255859375,
"step": 635
},
{
"dpo_loss": 0.5119226574897766,
"epoch": 3.627775153519131,
"grad_norm": 25.079864254767315,
"learning_rate": 2.0997154521440097e-07,
"logits": -1.3697155714035034,
"logps": -83.90760803222656,
"loss": 0.0613,
"objective": 0.0635208860039711,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5458333492279053,
"regularize": 0.0635208785533905,
"step": 640
},
{
"dpo_loss": 0.522363007068634,
"epoch": 3.656117146905999,
"grad_norm": 22.441342121743332,
"learning_rate": 2.0195125630684428e-07,
"logits": -1.3928742408752441,
"logps": -81.88297271728516,
"loss": 0.0634,
"objective": 0.05965565890073776,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.612500011920929,
"regularize": 0.05965564027428627,
"step": 645
},
{
"dpo_loss": 0.5373592376708984,
"epoch": 3.6844591402928675,
"grad_norm": 22.133762729051785,
"learning_rate": 1.9404820379287672e-07,
"logits": -1.3841991424560547,
"logps": -83.1523208618164,
"loss": 0.0589,
"objective": 0.055038776248693466,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.055038776248693466,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.6828624606132507,
"eval_logits": -1.4302468299865723,
"eval_logps": -89.47576904296875,
"eval_loss": 0.40598276257514954,
"eval_objective": 0.4077259600162506,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.4077259600162506,
"eval_runtime": 258.9725,
"eval_samples_per_second": 22.358,
"eval_steps_per_second": 0.934,
"step": 650
},
{
"dpo_loss": 0.5351348519325256,
"epoch": 3.7128011336797355,
"grad_norm": 23.905512006208795,
"learning_rate": 1.8626549630957395e-07,
"logits": -1.429569125175476,
"logps": -82.42403411865234,
"loss": 0.0624,
"objective": 0.05734870210289955,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5958333611488342,
"regularize": 0.05734868720173836,
"step": 655
},
{
"dpo_loss": 0.5322324633598328,
"epoch": 3.7411431270666036,
"grad_norm": 24.42468424510045,
"learning_rate": 1.7860619515673032e-07,
"logits": -1.5189285278320312,
"logps": -83.2733383178711,
"loss": 0.0612,
"objective": 0.06605425477027893,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.625,
"regularize": 0.06605424731969833,
"step": 660
},
{
"dpo_loss": 0.5305153131484985,
"epoch": 3.769485120453472,
"grad_norm": 21.98557345680479,
"learning_rate": 1.7107331309270684e-07,
"logits": -1.4122134447097778,
"logps": -83.17848205566406,
"loss": 0.0579,
"objective": 0.05437133088707924,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5916666388511658,
"regularize": 0.05437132343649864,
"step": 665
},
{
"dpo_loss": 0.5314101576805115,
"epoch": 3.79782711384034,
"grad_norm": 22.57049790061395,
"learning_rate": 1.6366981314937372e-07,
"logits": -1.5129222869873047,
"logps": -83.30918884277344,
"loss": 0.0549,
"objective": 0.06075560674071312,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.675000011920929,
"regularize": 0.06075560301542282,
"step": 670
},
{
"dpo_loss": 0.5331992506980896,
"epoch": 3.826169107227208,
"grad_norm": 21.51450391411621,
"learning_rate": 1.5639860746661338e-07,
"logits": -1.464658498764038,
"logps": -82.55012512207031,
"loss": 0.0562,
"objective": 0.05308786779642105,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6083333492279053,
"regularize": 0.05308786407113075,
"step": 675
},
{
"dpo_loss": 0.544487714767456,
"epoch": 3.8545111006140766,
"grad_norm": 21.91828532034966,
"learning_rate": 1.492625561468393e-07,
"logits": -1.401973009109497,
"logps": -83.26588439941406,
"loss": 0.0543,
"objective": 0.055845096707344055,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5833333134651184,
"regularize": 0.05584508553147316,
"step": 680
},
{
"dpo_loss": 0.5221087336540222,
"epoch": 3.8828530940009447,
"grad_norm": 23.338800601233537,
"learning_rate": 1.4226446612998671e-07,
"logits": -1.483197569847107,
"logps": -82.65924835205078,
"loss": 0.0543,
"objective": 0.04644104465842247,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5958333611488342,
"regularize": 0.04644103720784187,
"step": 685
},
{
"dpo_loss": 0.5242043137550354,
"epoch": 3.9111950873878127,
"grad_norm": 22.026766940460053,
"learning_rate": 1.3540709008941147e-07,
"logits": -1.449702501296997,
"logps": -81.98009490966797,
"loss": 0.0547,
"objective": 0.055739615112543106,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6416666507720947,
"regularize": 0.05573960393667221,
"step": 690
},
{
"dpo_loss": 0.5308277606964111,
"epoch": 3.9395370807746812,
"grad_norm": 22.736825591526987,
"learning_rate": 1.2869312534913685e-07,
"logits": -1.3683240413665771,
"logps": -83.3951187133789,
"loss": 0.056,
"objective": 0.05744828283786774,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.05744827911257744,
"step": 695
},
{
"dpo_loss": 0.5327464938163757,
"epoch": 3.9678790741615493,
"grad_norm": 24.974758066705547,
"learning_rate": 1.2212521282287093e-07,
"logits": -1.416201114654541,
"logps": -83.47090148925781,
"loss": 0.0551,
"objective": 0.05039297044277191,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.574999988079071,
"regularize": 0.05039296671748161,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.683082640171051,
"eval_logits": -1.4301180839538574,
"eval_logps": -90.06600952148438,
"eval_loss": 0.40649789571762085,
"eval_objective": 0.4080060124397278,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.4080060124397278,
"eval_runtime": 258.866,
"eval_samples_per_second": 22.367,
"eval_steps_per_second": 0.935,
"step": 700
},
{
"dpo_loss": 0.5257295966148376,
"epoch": 3.9962210675484178,
"grad_norm": 21.66945207844546,
"learning_rate": 1.15705935975212e-07,
"logits": -1.3355560302734375,
"logps": -81.95101928710938,
"loss": 0.0536,
"objective": 0.04855410382151604,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6166666746139526,
"regularize": 0.04855410382151604,
"step": 705
},
{
"dpo_loss": 0.5204980373382568,
"epoch": 4.024563060935286,
"grad_norm": 21.87585318414452,
"learning_rate": 1.094378198054533e-07,
"logits": -1.4359726905822754,
"logps": -83.67707061767578,
"loss": 0.0474,
"objective": 0.05088849365711212,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5708333253860474,
"regularize": 0.050888482481241226,
"step": 710
},
{
"dpo_loss": 0.5301558375358582,
"epoch": 4.052905054322154,
"grad_norm": 22.01280193333486,
"learning_rate": 1.0332332985438247e-07,
"logits": -1.3890125751495361,
"logps": -83.36654663085938,
"loss": 0.0434,
"objective": 0.040184516459703445,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.04018450155854225,
"step": 715
},
{
"dpo_loss": 0.5191416144371033,
"epoch": 4.081247047709022,
"grad_norm": 21.943342871470353,
"learning_rate": 9.736487123447068e-08,
"logits": -1.3216856718063354,
"logps": -85.42113494873047,
"loss": 0.0441,
"objective": 0.03967616334557533,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.039676155894994736,
"step": 720
},
{
"dpo_loss": 0.5419493913650513,
"epoch": 4.109589041095891,
"grad_norm": 22.065151941072486,
"learning_rate": 9.156478768383058e-08,
"logits": -1.4097427129745483,
"logps": -83.27389526367188,
"loss": 0.0477,
"objective": 0.04659968614578247,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5874999761581421,
"regularize": 0.046599678695201874,
"step": 725
},
{
"dpo_loss": 0.5275304317474365,
"epoch": 4.137931034482759,
"grad_norm": 22.997003588267155,
"learning_rate": 8.592536064431466e-08,
"logits": -1.4810242652893066,
"logps": -83.33085632324219,
"loss": 0.0479,
"objective": 0.05003201588988304,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5916666388511658,
"regularize": 0.05003199726343155,
"step": 730
},
{
"dpo_loss": 0.5354489684104919,
"epoch": 4.166273027869627,
"grad_norm": 22.750124706779673,
"learning_rate": 8.044880836411888e-08,
"logits": -1.3749909400939941,
"logps": -84.28314971923828,
"loss": 0.042,
"objective": 0.04194118455052376,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.04194117337465286,
"step": 735
},
{
"dpo_loss": 0.5109390616416931,
"epoch": 4.194615021256495,
"grad_norm": 23.35643629791226,
"learning_rate": 7.513728502524286e-08,
"logits": -1.3980611562728882,
"logps": -83.87706756591797,
"loss": 0.0437,
"objective": 0.042474415153265,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6041666865348816,
"regularize": 0.0424744077026844,
"step": 740
},
{
"dpo_loss": 0.5253542065620422,
"epoch": 4.222957014643363,
"grad_norm": 22.418675908813192,
"learning_rate": 6.999287989614971e-08,
"logits": -1.4651761054992676,
"logps": -81.21513366699219,
"loss": 0.0406,
"objective": 0.04062732681632042,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.574999988079071,
"regularize": 0.040627315640449524,
"step": 745
},
{
"dpo_loss": 0.5217363834381104,
"epoch": 4.251299008030231,
"grad_norm": 22.888185894990265,
"learning_rate": 6.501761650996052e-08,
"logits": -1.5698094367980957,
"logps": -83.2958984375,
"loss": 0.042,
"objective": 0.045288145542144775,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.04528813809156418,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.6830218434333801,
"eval_logits": -1.4307194948196411,
"eval_logps": -90.04474639892578,
"eval_loss": 0.4063892364501953,
"eval_objective": 0.4078083634376526,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.4078083634376526,
"eval_runtime": 258.9989,
"eval_samples_per_second": 22.355,
"eval_steps_per_second": 0.934,
"step": 750
},
{
"dpo_loss": 0.5249465107917786,
"epoch": 4.2796410014171,
"grad_norm": 22.190575430128455,
"learning_rate": 6.021345186850418e-08,
"logits": -1.4760249853134155,
"logps": -83.12273406982422,
"loss": 0.0418,
"objective": 0.04030155390501022,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6166666746139526,
"regularize": 0.04030154272913933,
"step": 755
},
{
"dpo_loss": 0.5280516147613525,
"epoch": 4.307982994803968,
"grad_norm": 22.195011354775016,
"learning_rate": 5.5582275672538316e-08,
"logits": -1.460343837738037,
"logps": -83.6526870727539,
"loss": 0.0395,
"objective": 0.040188662707805634,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6458333134651184,
"regularize": 0.04018864780664444,
"step": 760
},
{
"dpo_loss": 0.508765459060669,
"epoch": 4.336324988190836,
"grad_norm": 21.99198419312676,
"learning_rate": 5.112590957844232e-08,
"logits": -1.4831253290176392,
"logps": -83.9940414428711,
"loss": 0.0416,
"objective": 0.03937076777219772,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6291666626930237,
"regularize": 0.03937075287103653,
"step": 765
},
{
"dpo_loss": 0.5139289498329163,
"epoch": 4.364666981577704,
"grad_norm": 22.21570497564684,
"learning_rate": 4.684610648167503e-08,
"logits": -1.355908751487732,
"logps": -82.18904113769531,
"loss": 0.0418,
"objective": 0.041529521346092224,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6000000238418579,
"regularize": 0.04152949899435043,
"step": 770
},
{
"dpo_loss": 0.5221685171127319,
"epoch": 4.393008974964572,
"grad_norm": 21.306801693131447,
"learning_rate": 4.274454982728032e-08,
"logits": -1.4285643100738525,
"logps": -83.1854476928711,
"loss": 0.0394,
"objective": 0.04110860824584961,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5958333611488342,
"regularize": 0.041108593344688416,
"step": 775
},
{
"dpo_loss": 0.5304800868034363,
"epoch": 4.42135096835144,
"grad_norm": 21.938217857408958,
"learning_rate": 3.882285294770937e-08,
"logits": -1.4632736444473267,
"logps": -81.85124969482422,
"loss": 0.0379,
"objective": 0.03418119251728058,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5666666626930237,
"regularize": 0.03418118134140968,
"step": 780
},
{
"dpo_loss": 0.5404612421989441,
"epoch": 4.449692961738309,
"grad_norm": 21.77705913902379,
"learning_rate": 3.508255842822255e-08,
"logits": -1.4751582145690918,
"logps": -81.96646118164062,
"loss": 0.0448,
"objective": 0.04277818650007248,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6041666865348816,
"regularize": 0.04277818277478218,
"step": 785
},
{
"dpo_loss": 0.5209127068519592,
"epoch": 4.478034955125177,
"grad_norm": 21.724227546519376,
"learning_rate": 3.15251375001192e-08,
"logits": -1.4253805875778198,
"logps": -84.63212585449219,
"loss": 0.0402,
"objective": 0.050088923424482346,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.625,
"regularize": 0.05008890852332115,
"step": 790
},
{
"dpo_loss": 0.5272155404090881,
"epoch": 4.506376948512045,
"grad_norm": 21.960441297110094,
"learning_rate": 2.8151989462033787e-08,
"logits": -1.3359031677246094,
"logps": -84.30043029785156,
"loss": 0.0412,
"objective": 0.03479573875665665,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5916666388511658,
"regularize": 0.034795720130205154,
"step": 795
},
{
"dpo_loss": 0.5258675813674927,
"epoch": 4.534718941898913,
"grad_norm": 22.834668811719133,
"learning_rate": 2.4964441129527335e-08,
"logits": -1.3358808755874634,
"logps": -83.53750610351562,
"loss": 0.0411,
"objective": 0.04309748858213425,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6083333492279053,
"regularize": 0.04309746250510216,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.6830146908760071,
"eval_logits": -1.431044578552246,
"eval_logps": -90.11402893066406,
"eval_loss": 0.406222939491272,
"eval_objective": 0.4077996015548706,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.4077996015548706,
"eval_runtime": 258.8062,
"eval_samples_per_second": 22.372,
"eval_steps_per_second": 0.935,
"step": 800
},
{
"dpo_loss": 0.5273416042327881,
"epoch": 4.563060935285781,
"grad_norm": 21.794535718115338,
"learning_rate": 2.1963746313188757e-08,
"logits": -1.4133697748184204,
"logps": -82.60270690917969,
"loss": 0.0414,
"objective": 0.046149447560310364,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.550000011920929,
"regularize": 0.04614944010972977,
"step": 805
},
{
"dpo_loss": 0.5305873155593872,
"epoch": 4.59140292867265,
"grad_norm": 21.298734472415376,
"learning_rate": 1.915108532545351e-08,
"logits": -1.481737494468689,
"logps": -82.04961395263672,
"loss": 0.0395,
"objective": 0.03058464638888836,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.5333333611488342,
"regularize": 0.030584635213017464,
"step": 810
},
{
"dpo_loss": 0.5338551998138428,
"epoch": 4.619744922059518,
"grad_norm": 21.722779837853974,
"learning_rate": 1.6527564516331638e-08,
"logits": -1.3470157384872437,
"logps": -83.43151092529297,
"loss": 0.0369,
"objective": 0.030139055103063583,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6708333492279053,
"regularize": 0.030139045789837837,
"step": 815
},
{
"dpo_loss": 0.5377717614173889,
"epoch": 4.648086915446386,
"grad_norm": 23.027732641639304,
"learning_rate": 1.4094215838229172e-08,
"logits": -1.439835786819458,
"logps": -83.44994354248047,
"loss": 0.0373,
"objective": 0.03681868314743042,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.612500011920929,
"regularize": 0.036818671971559525,
"step": 820
},
{
"dpo_loss": 0.5387639999389648,
"epoch": 4.6764289088332545,
"grad_norm": 22.893892489361072,
"learning_rate": 1.1851996440033318e-08,
"logits": -1.3366633653640747,
"logps": -81.3759765625,
"loss": 0.0369,
"objective": 0.03668622300028801,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5375000238418579,
"regularize": 0.03668620437383652,
"step": 825
},
{
"dpo_loss": 0.5243638753890991,
"epoch": 4.7047709022201225,
"grad_norm": 21.58395292653118,
"learning_rate": 9.801788290621505e-09,
"logits": -1.506198525428772,
"logps": -83.259033203125,
"loss": 0.0407,
"objective": 0.041429486125707626,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5916666388511658,
"regularize": 0.04142947867512703,
"step": 830
},
{
"dpo_loss": 0.5125473737716675,
"epoch": 4.733112895606991,
"grad_norm": 21.98641530853052,
"learning_rate": 7.944397831941951e-09,
"logits": -1.4062670469284058,
"logps": -83.29720306396484,
"loss": 0.0372,
"objective": 0.03951678425073624,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5583333373069763,
"regularize": 0.03951676934957504,
"step": 835
},
{
"dpo_loss": 0.5017682909965515,
"epoch": 4.7614548889938595,
"grad_norm": 21.972117419289066,
"learning_rate": 6.280555661802856e-09,
"logits": -1.423843264579773,
"logps": -83.54265594482422,
"loss": 0.0372,
"objective": 0.03352176770567894,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.612500011920929,
"regularize": 0.033521756529808044,
"step": 840
},
{
"dpo_loss": 0.5365482568740845,
"epoch": 4.7897968823807275,
"grad_norm": 21.356793654139537,
"learning_rate": 4.810916246494157e-09,
"logits": -1.45553719997406,
"logps": -83.4180679321289,
"loss": 0.0383,
"objective": 0.040656425058841705,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.5625,
"regularize": 0.04065641388297081,
"step": 845
},
{
"dpo_loss": 0.5246464014053345,
"epoch": 4.818138875767596,
"grad_norm": 22.81185797664159,
"learning_rate": 3.5360576633558513e-09,
"logits": -1.4138314723968506,
"logps": -82.19649505615234,
"loss": 0.0355,
"objective": 0.03642057999968529,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6208333373069763,
"regularize": 0.03642057254910469,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.6829268932342529,
"eval_logits": -1.4302399158477783,
"eval_logps": -90.043212890625,
"eval_loss": 0.40620195865631104,
"eval_objective": 0.40770116448402405,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.40770116448402405,
"eval_runtime": 259.1263,
"eval_samples_per_second": 22.344,
"eval_steps_per_second": 0.934,
"step": 850
},
{
"dpo_loss": 0.5365470051765442,
"epoch": 4.846480869154464,
"grad_norm": 22.602716102552016,
"learning_rate": 2.4564813733932155e-09,
"logits": -1.3940719366073608,
"logps": -82.6231460571289,
"loss": 0.0347,
"objective": 0.03581225126981735,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6458333134651184,
"regularize": 0.03581221401691437,
"step": 855
},
{
"dpo_loss": 0.5150249004364014,
"epoch": 4.874822862541333,
"grad_norm": 23.704671287447177,
"learning_rate": 1.5726120240288631e-09,
"logits": -1.3679381608963013,
"logps": -82.33541870117188,
"loss": 0.0348,
"objective": 0.031035231426358223,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6458333134651184,
"regularize": 0.031035220250487328,
"step": 860
},
{
"dpo_loss": 0.5223459005355835,
"epoch": 4.903164855928201,
"grad_norm": 21.42329131044869,
"learning_rate": 8.847972820693051e-10,
"logits": -1.4437813758850098,
"logps": -81.53370666503906,
"loss": 0.0355,
"objective": 0.04200226441025734,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5958333611488342,
"regularize": 0.042002253234386444,
"step": 865
},
{
"dpo_loss": 0.5215969681739807,
"epoch": 4.931506849315069,
"grad_norm": 21.701501283901965,
"learning_rate": 3.933076969516724e-10,
"logits": -1.4914921522140503,
"logps": -83.26063537597656,
"loss": 0.0393,
"objective": 0.04051649197936058,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6083333492279053,
"regularize": 0.0405164435505867,
"step": 870
},
{
"dpo_loss": 0.5250566005706787,
"epoch": 4.959848842701937,
"grad_norm": 21.86259624413417,
"learning_rate": 9.833659432367803e-11,
"logits": -1.4107563495635986,
"logps": -83.20445251464844,
"loss": 0.0346,
"objective": 0.027810534462332726,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6166666746139526,
"regularize": 0.027810489758849144,
"step": 875
},
{
"dpo_loss": 0.520707905292511,
"epoch": 4.988190836088805,
"grad_norm": 23.229102177877856,
"learning_rate": 0.0,
"logits": -1.4621251821517944,
"logps": -83.79481506347656,
"loss": 0.035,
"objective": 0.029516249895095825,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.625,
"regularize": 0.02951624244451523,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.1442635908045552,
"train_runtime": 35242.7125,
"train_samples_per_second": 7.207,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}