hZzy's picture
Model save
d476c28 verified
raw
history blame
99.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 134.13619593724954,
"learning_rate": 5.681818181818182e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.693040668964386,
"epoch": 0.02834199338686821,
"grad_norm": 130.40196272454386,
"learning_rate": 2.840909090909091e-07,
"logits": -1.3680235147476196,
"logps": -84.29497528076172,
"loss": 0.412,
"objective": 0.3764810860157013,
"ranking_idealized": 0.5677083134651184,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 0.3764810860157013,
"step": 5
},
{
"dpo_loss": 0.6761094927787781,
"epoch": 0.05668398677373642,
"grad_norm": 220.67402488716627,
"learning_rate": 5.681818181818182e-07,
"logits": -1.4485151767730713,
"logps": -83.00025177001953,
"loss": 0.4272,
"objective": 0.4414474666118622,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.4414474666118622,
"step": 10
},
{
"dpo_loss": 0.6819472908973694,
"epoch": 0.08502598016060463,
"grad_norm": 123.4201155289935,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4201914072036743,
"logps": -83.36089324951172,
"loss": 0.4193,
"objective": 0.41622886061668396,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.41622886061668396,
"step": 15
},
{
"dpo_loss": 0.6776646375656128,
"epoch": 0.11336797354747284,
"grad_norm": 139.10378037423342,
"learning_rate": 1.1363636363636364e-06,
"logits": -1.4012433290481567,
"logps": -84.29528045654297,
"loss": 0.442,
"objective": 0.45644310116767883,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.45644310116767883,
"step": 20
},
{
"dpo_loss": 0.6203070878982544,
"epoch": 0.14170996693434104,
"grad_norm": 135.72754480990383,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.4394680261611938,
"logps": -84.2686996459961,
"loss": 0.4742,
"objective": 0.4576638340950012,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.4576638340950012,
"step": 25
},
{
"dpo_loss": 0.6442328691482544,
"epoch": 0.17005196032120926,
"grad_norm": 129.86543736019303,
"learning_rate": 1.7045454545454546e-06,
"logits": -1.406640887260437,
"logps": -84.90333557128906,
"loss": 0.5291,
"objective": 0.5116989016532898,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.5116989016532898,
"step": 30
},
{
"dpo_loss": 0.6669540405273438,
"epoch": 0.19839395370807747,
"grad_norm": 149.40597914469663,
"learning_rate": 1.9886363636363638e-06,
"logits": -1.3766274452209473,
"logps": -83.32654571533203,
"loss": 0.6031,
"objective": 0.6114931702613831,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 0.6114931702613831,
"step": 35
},
{
"dpo_loss": 0.6527742743492126,
"epoch": 0.22673594709494568,
"grad_norm": 158.6310433342775,
"learning_rate": 2.2727272727272728e-06,
"logits": -1.3420394659042358,
"logps": -83.61425018310547,
"loss": 0.6378,
"objective": 0.639444887638092,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.639444887638092,
"step": 40
},
{
"dpo_loss": 0.6698108911514282,
"epoch": 0.25507794048181387,
"grad_norm": 129.09910758768564,
"learning_rate": 2.556818181818182e-06,
"logits": -1.4372307062149048,
"logps": -85.24830627441406,
"loss": 0.7526,
"objective": 0.8735028505325317,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5291666388511658,
"regularize": 0.8735028505325317,
"step": 45
},
{
"dpo_loss": 0.681568443775177,
"epoch": 0.2834199338686821,
"grad_norm": 118.24660372299884,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.30116605758667,
"logps": -84.97547912597656,
"loss": 0.8081,
"objective": 0.7729283571243286,
"ranking_idealized": 0.4416666626930237,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.4375,
"regularize": 0.7729283571243286,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.7266114950180054,
"eval_logits": -1.330773115158081,
"eval_logps": -91.93638610839844,
"eval_loss": 0.665158212184906,
"eval_objective": 0.6722058653831482,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 0.6722058653831482,
"eval_runtime": 260.1252,
"eval_samples_per_second": 22.259,
"eval_steps_per_second": 0.93,
"step": 50
},
{
"dpo_loss": 0.7984517812728882,
"epoch": 0.3117619272555503,
"grad_norm": 113.66354191682501,
"learning_rate": 3.125e-06,
"logits": -1.3412364721298218,
"logps": -85.0008316040039,
"loss": 0.966,
"objective": 0.9933412075042725,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 0.9933412075042725,
"step": 55
},
{
"dpo_loss": 0.686260461807251,
"epoch": 0.3401039206424185,
"grad_norm": 120.77585128117505,
"learning_rate": 3.409090909090909e-06,
"logits": -1.278722882270813,
"logps": -82.67823791503906,
"loss": 0.9965,
"objective": 0.994083046913147,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5416666865348816,
"regularize": 0.994083046913147,
"step": 60
},
{
"dpo_loss": 0.6437157988548279,
"epoch": 0.3684459140292867,
"grad_norm": 114.63196029717214,
"learning_rate": 3.6931818181818186e-06,
"logits": -1.1975092887878418,
"logps": -80.52145385742188,
"loss": 1.0631,
"objective": 1.0634738206863403,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 1.0634738206863403,
"step": 65
},
{
"dpo_loss": 0.7649748921394348,
"epoch": 0.39678790741615494,
"grad_norm": 120.79825029146289,
"learning_rate": 3.9772727272727275e-06,
"logits": -1.2268587350845337,
"logps": -81.57954406738281,
"loss": 1.1744,
"objective": 1.216639757156372,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 1.216639757156372,
"step": 70
},
{
"dpo_loss": 0.8916628360748291,
"epoch": 0.42512990080302315,
"grad_norm": 94.61834145368111,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.2359107732772827,
"logps": -80.95670318603516,
"loss": 1.2475,
"objective": 1.3055096864700317,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5708333253860474,
"regularize": 1.3055096864700317,
"step": 75
},
{
"dpo_loss": 0.8090317845344543,
"epoch": 0.45347189418989137,
"grad_norm": 90.2013102285757,
"learning_rate": 4.5454545454545455e-06,
"logits": -1.2852014303207397,
"logps": -79.4792251586914,
"loss": 1.2963,
"objective": 1.3557077646255493,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5249999761581421,
"regularize": 1.3557077646255493,
"step": 80
},
{
"dpo_loss": 0.8598435521125793,
"epoch": 0.4818138875767596,
"grad_norm": 88.10607117738199,
"learning_rate": 4.829545454545455e-06,
"logits": -1.1217026710510254,
"logps": -78.83904266357422,
"loss": 1.3155,
"objective": 1.3455144166946411,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 1.3455144166946411,
"step": 85
},
{
"dpo_loss": 0.9262712001800537,
"epoch": 0.5101558809636277,
"grad_norm": 91.01793262969673,
"learning_rate": 4.999921328558333e-06,
"logits": -1.0270463228225708,
"logps": -78.34434509277344,
"loss": 1.3316,
"objective": 1.2936842441558838,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.46666666865348816,
"regularize": 1.2936842441558838,
"step": 90
},
{
"dpo_loss": 0.8197596073150635,
"epoch": 0.538497874350496,
"grad_norm": 88.9287273472026,
"learning_rate": 4.999036331701828e-06,
"logits": -1.1374804973602295,
"logps": -75.8565902709961,
"loss": 1.4668,
"objective": 1.495007038116455,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 1.495007038116455,
"step": 95
},
{
"dpo_loss": 0.9542521834373474,
"epoch": 0.5668398677373642,
"grad_norm": 92.9939055097423,
"learning_rate": 4.997168347957521e-06,
"logits": -1.1251710653305054,
"logps": -78.35294342041016,
"loss": 1.4482,
"objective": 1.6726794242858887,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5625,
"regularize": 1.6726794242858887,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.9744860529899597,
"eval_logits": -1.087980031967163,
"eval_logps": -83.3250503540039,
"eval_loss": 1.4160186052322388,
"eval_objective": 1.3662116527557373,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 1.3662116527557373,
"eval_runtime": 258.3781,
"eval_samples_per_second": 22.409,
"eval_steps_per_second": 0.937,
"step": 100
},
{
"dpo_loss": 1.0364277362823486,
"epoch": 0.5951818611242324,
"grad_norm": 95.45527468177522,
"learning_rate": 4.994318112090048e-06,
"logits": -0.8985040187835693,
"logps": -79.13915252685547,
"loss": 1.6082,
"objective": 1.6931790113449097,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5375000238418579,
"regularize": 1.6931790113449097,
"step": 105
},
{
"dpo_loss": 0.9066151976585388,
"epoch": 0.6235238545111006,
"grad_norm": 82.78277978571477,
"learning_rate": 4.990486745229364e-06,
"logits": -1.0420721769332886,
"logps": -75.80839538574219,
"loss": 1.6336,
"objective": 1.8092875480651855,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.48750001192092896,
"regularize": 1.8092875480651855,
"step": 110
},
{
"dpo_loss": 1.0691540241241455,
"epoch": 0.6518658478979689,
"grad_norm": 83.71556017238615,
"learning_rate": 4.985675754429744e-06,
"logits": -1.0556254386901855,
"logps": -75.40866088867188,
"loss": 1.6395,
"objective": 1.7828887701034546,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4958333373069763,
"regularize": 1.7828887701034546,
"step": 115
},
{
"dpo_loss": 1.0725338459014893,
"epoch": 0.680207841284837,
"grad_norm": 75.45511417337472,
"learning_rate": 4.9798870320769884e-06,
"logits": -0.9715719819068909,
"logps": -74.33720397949219,
"loss": 1.5808,
"objective": 1.6460652351379395,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 1.6460652351379395,
"step": 120
},
{
"dpo_loss": 0.9674696326255798,
"epoch": 0.7085498346717053,
"grad_norm": 81.29845851322025,
"learning_rate": 4.973122855144066e-06,
"logits": -0.935053288936615,
"logps": -72.40234375,
"loss": 1.5695,
"objective": 1.5490193367004395,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5375000238418579,
"regularize": 1.5490193367004395,
"step": 125
},
{
"dpo_loss": 0.9349392056465149,
"epoch": 0.7368918280585735,
"grad_norm": 70.71836455513233,
"learning_rate": 4.965385884295467e-06,
"logits": -1.109536051750183,
"logps": -70.39344024658203,
"loss": 1.5584,
"objective": 1.6358364820480347,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.48750001192092896,
"regularize": 1.6358364820480347,
"step": 130
},
{
"dpo_loss": 0.9241760969161987,
"epoch": 0.7652338214454416,
"grad_norm": 71.12542306553239,
"learning_rate": 4.956679162840646e-06,
"logits": -1.092426061630249,
"logps": -73.43895721435547,
"loss": 1.5904,
"objective": 1.5671554803848267,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.512499988079071,
"regularize": 1.5671554803848267,
"step": 135
},
{
"dpo_loss": 0.8581911325454712,
"epoch": 0.7935758148323099,
"grad_norm": 74.29158643708146,
"learning_rate": 4.947006115536947e-06,
"logits": -0.9535221457481384,
"logps": -74.29177856445312,
"loss": 1.5839,
"objective": 1.4818050861358643,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5291666388511658,
"regularize": 1.4818050861358643,
"step": 140
},
{
"dpo_loss": 0.9467170238494873,
"epoch": 0.821917808219178,
"grad_norm": 69.83033897795461,
"learning_rate": 4.9363705472424825e-06,
"logits": -0.9065474271774292,
"logps": -75.74617004394531,
"loss": 1.5265,
"objective": 1.5216258764266968,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 1.5216258764266968,
"step": 145
},
{
"dpo_loss": 0.8972322940826416,
"epoch": 0.8502598016060463,
"grad_norm": 70.00491587575249,
"learning_rate": 4.924776641419513e-06,
"logits": -0.8950843811035156,
"logps": -74.2447509765625,
"loss": 1.5063,
"objective": 1.563843846321106,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5791666507720947,
"regularize": 1.563843846321106,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 1.1387892961502075,
"eval_logits": -0.9763504266738892,
"eval_logps": -79.4244613647461,
"eval_loss": 1.840279221534729,
"eval_objective": 1.8307207822799683,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5154958963394165,
"eval_regularize": 1.8307207822799683,
"eval_runtime": 259.3897,
"eval_samples_per_second": 22.322,
"eval_steps_per_second": 0.933,
"step": 150
},
{
"dpo_loss": 0.8782619833946228,
"epoch": 0.8786017949929145,
"grad_norm": 69.80918589105492,
"learning_rate": 4.9122289584888926e-06,
"logits": -0.9591015577316284,
"logps": -72.81404876708984,
"loss": 1.5351,
"objective": 1.4583401679992676,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5416666865348816,
"regularize": 1.4583401679992676,
"step": 155
},
{
"dpo_loss": 0.9660459756851196,
"epoch": 0.9069437883797827,
"grad_norm": 66.22318117843135,
"learning_rate": 4.8987324340362445e-06,
"logits": -0.9040888547897339,
"logps": -73.70983123779297,
"loss": 1.5597,
"objective": 1.562262773513794,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5166666507720947,
"regularize": 1.562262773513794,
"step": 160
},
{
"dpo_loss": 1.0612136125564575,
"epoch": 0.9352857817666509,
"grad_norm": 70.50798575492288,
"learning_rate": 4.884292376870567e-06,
"logits": -0.8070274591445923,
"logps": -75.22913360595703,
"loss": 1.5216,
"objective": 1.6139070987701416,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 1.6139070987701416,
"step": 165
},
{
"dpo_loss": 0.8528857827186584,
"epoch": 0.9636277751535192,
"grad_norm": 70.82614721912003,
"learning_rate": 4.868914466936038e-06,
"logits": -0.6839962601661682,
"logps": -76.570068359375,
"loss": 1.483,
"objective": 1.5318470001220703,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5291666388511658,
"regularize": 1.5318470001220703,
"step": 170
},
{
"dpo_loss": 0.8230343461036682,
"epoch": 0.9919697685403873,
"grad_norm": 73.28269267652773,
"learning_rate": 4.8526047530778175e-06,
"logits": -0.6581661105155945,
"logps": -75.77076721191406,
"loss": 1.4604,
"objective": 1.486951470375061,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 1.486951470375061,
"step": 175
},
{
"dpo_loss": 0.9295377135276794,
"epoch": 1.0203117619272555,
"grad_norm": 68.13139272327797,
"learning_rate": 4.835369650662767e-06,
"logits": -0.7830471396446228,
"logps": -75.41452026367188,
"loss": 1.4384,
"objective": 1.5701080560684204,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5541666746139526,
"regularize": 1.5701080560684204,
"step": 180
},
{
"dpo_loss": 0.9832867980003357,
"epoch": 1.0486537553141237,
"grad_norm": 68.27487274290651,
"learning_rate": 4.817215939055984e-06,
"logits": -0.7766852974891663,
"logps": -75.4141845703125,
"loss": 1.4597,
"objective": 1.5295368432998657,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 1.5295368432998657,
"step": 185
},
{
"dpo_loss": 0.9126808047294617,
"epoch": 1.076995748700992,
"grad_norm": 72.02486344760798,
"learning_rate": 4.798150758954164e-06,
"logits": -0.7806929349899292,
"logps": -76.5052490234375,
"loss": 1.4245,
"objective": 1.6002991199493408,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5833333134651184,
"regularize": 1.6002991199493408,
"step": 190
},
{
"dpo_loss": 0.876176655292511,
"epoch": 1.10533774208786,
"grad_norm": 72.39146825694654,
"learning_rate": 4.778181609576832e-06,
"logits": -0.742654025554657,
"logps": -75.56478881835938,
"loss": 1.4047,
"objective": 1.4097263813018799,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5791666507720947,
"regularize": 1.4097263813018799,
"step": 195
},
{
"dpo_loss": 0.8269080519676208,
"epoch": 1.1336797354747283,
"grad_norm": 65.92725380936474,
"learning_rate": 4.757316345716554e-06,
"logits": -0.7813295125961304,
"logps": -73.13416290283203,
"loss": 1.3427,
"objective": 1.2968580722808838,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 1.2968580722808838,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 1.19429349899292,
"eval_logits": -0.844602108001709,
"eval_logps": -78.08975982666016,
"eval_loss": 1.9410725831985474,
"eval_objective": 1.904191493988037,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 1.904191493988037,
"eval_runtime": 259.3374,
"eval_samples_per_second": 22.326,
"eval_steps_per_second": 0.933,
"step": 200
},
{
"dpo_loss": 0.9577538371086121,
"epoch": 1.1620217288615966,
"grad_norm": 65.93331247907419,
"learning_rate": 4.735563174649278e-06,
"logits": -0.8114036321640015,
"logps": -74.05875396728516,
"loss": 1.3942,
"objective": 1.4247114658355713,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.48750001192092896,
"regularize": 1.4247114658355713,
"step": 205
},
{
"dpo_loss": 0.7938932180404663,
"epoch": 1.1903637222484649,
"grad_norm": 71.43014040389335,
"learning_rate": 4.7129306529060415e-06,
"logits": -0.883682131767273,
"logps": -74.72289276123047,
"loss": 1.397,
"objective": 1.3820544481277466,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5333333611488342,
"regularize": 1.3820544481277466,
"step": 210
},
{
"dpo_loss": 0.8905848860740662,
"epoch": 1.2187057156353331,
"grad_norm": 73.75682155225577,
"learning_rate": 4.68942768290728e-06,
"logits": -0.8337818384170532,
"logps": -75.42349243164062,
"loss": 1.346,
"objective": 1.3334628343582153,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 1.3334628343582153,
"step": 215
},
{
"dpo_loss": 0.7579005360603333,
"epoch": 1.2470477090222012,
"grad_norm": 67.10552679086015,
"learning_rate": 4.665063509461098e-06,
"logits": -0.6136857867240906,
"logps": -76.30582427978516,
"loss": 1.3537,
"objective": 1.1673569679260254,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.5916666388511658,
"regularize": 1.1673569679260254,
"step": 220
},
{
"dpo_loss": 0.8865377306938171,
"epoch": 1.2753897024090695,
"grad_norm": 61.86276513251386,
"learning_rate": 4.639847716126855e-06,
"logits": -0.712106466293335,
"logps": -77.76806640625,
"loss": 1.2903,
"objective": 1.3318673372268677,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5333333611488342,
"regularize": 1.3318673372268677,
"step": 225
},
{
"dpo_loss": 0.7389308214187622,
"epoch": 1.3037316957959377,
"grad_norm": 64.8981585658753,
"learning_rate": 4.613790221445511e-06,
"logits": -0.7096338868141174,
"logps": -77.84996032714844,
"loss": 1.2526,
"objective": 1.1143450736999512,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5333333611488342,
"regularize": 1.1143450736999512,
"step": 230
},
{
"dpo_loss": 0.7792016267776489,
"epoch": 1.3320736891828058,
"grad_norm": 70.5473568363482,
"learning_rate": 4.586901275038201e-06,
"logits": -0.8352426886558533,
"logps": -74.80459594726562,
"loss": 1.3276,
"objective": 1.2759090662002563,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5166666507720947,
"regularize": 1.2759090662002563,
"step": 235
},
{
"dpo_loss": 0.7532822489738464,
"epoch": 1.360415682569674,
"grad_norm": 66.15538794121927,
"learning_rate": 4.559191453574582e-06,
"logits": -0.7979737520217896,
"logps": -75.91637420654297,
"loss": 1.2644,
"objective": 1.1841062307357788,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5083333253860474,
"regularize": 1.1841062307357788,
"step": 240
},
{
"dpo_loss": 0.8654258847236633,
"epoch": 1.3887576759565423,
"grad_norm": 60.664093680465626,
"learning_rate": 4.530671656612544e-06,
"logits": -0.7953319549560547,
"logps": -75.70112609863281,
"loss": 1.2143,
"objective": 1.248257040977478,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 1.248257040977478,
"step": 245
},
{
"dpo_loss": 0.7265617847442627,
"epoch": 1.4170996693434104,
"grad_norm": 59.62226844787694,
"learning_rate": 4.501353102310901e-06,
"logits": -0.7469329833984375,
"logps": -75.32097625732422,
"loss": 1.2385,
"objective": 1.2008994817733765,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4833333194255829,
"regularize": 1.2008994817733765,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 1.2812141180038452,
"eval_logits": -0.8252052664756775,
"eval_logps": -81.07825469970703,
"eval_loss": 2.100358247756958,
"eval_objective": 2.0779922008514404,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5072314143180847,
"eval_regularize": 2.0779922008514404,
"eval_runtime": 259.328,
"eval_samples_per_second": 22.327,
"eval_steps_per_second": 0.933,
"step": 250
},
{
"dpo_loss": 0.73033607006073,
"epoch": 1.4454416627302786,
"grad_norm": 62.42895757690262,
"learning_rate": 4.4712473230167775e-06,
"logits": -0.6974473595619202,
"logps": -77.66381072998047,
"loss": 1.2105,
"objective": 1.1618343591690063,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6041666865348816,
"regularize": 1.1618343591690063,
"step": 255
},
{
"dpo_loss": 0.8443369269371033,
"epoch": 1.473783656117147,
"grad_norm": 61.04515459515312,
"learning_rate": 4.440366160729393e-06,
"logits": -0.705303430557251,
"logps": -77.5326156616211,
"loss": 1.1973,
"objective": 1.1547825336456299,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.49166667461395264,
"regularize": 1.1547825336456299,
"step": 260
},
{
"dpo_loss": 0.7983213663101196,
"epoch": 1.5021256495040152,
"grad_norm": 63.36584736778421,
"learning_rate": 4.4087217624420595e-06,
"logits": -0.6919764280319214,
"logps": -75.56660461425781,
"loss": 1.1823,
"objective": 1.208877444267273,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 1.208877444267273,
"step": 265
},
{
"dpo_loss": 0.719446063041687,
"epoch": 1.5304676428908834,
"grad_norm": 63.46733089619852,
"learning_rate": 4.376326575364206e-06,
"logits": -0.6683894991874695,
"logps": -76.26860809326172,
"loss": 1.1785,
"objective": 1.1638675928115845,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 1.1638675928115845,
"step": 270
},
{
"dpo_loss": 0.7665132284164429,
"epoch": 1.5588096362777515,
"grad_norm": 58.99295412475345,
"learning_rate": 4.34319334202531e-06,
"logits": -0.6416487097740173,
"logps": -76.59992218017578,
"loss": 1.1881,
"objective": 1.1479988098144531,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5416666865348816,
"regularize": 1.1479988098144531,
"step": 275
},
{
"dpo_loss": 0.7462816834449768,
"epoch": 1.5871516296646198,
"grad_norm": 60.75435946216164,
"learning_rate": 4.309335095262675e-06,
"logits": -0.601517915725708,
"logps": -74.82725524902344,
"loss": 1.145,
"objective": 1.1755616664886475,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 1.1755616664886475,
"step": 280
},
{
"dpo_loss": 0.6989460587501526,
"epoch": 1.615493623051488,
"grad_norm": 64.56412273044151,
"learning_rate": 4.274765153095008e-06,
"logits": -0.644619882106781,
"logps": -76.7342758178711,
"loss": 1.1688,
"objective": 1.1958999633789062,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 1.1958999633789062,
"step": 285
},
{
"dpo_loss": 0.751794695854187,
"epoch": 1.643835616438356,
"grad_norm": 65.15922239336996,
"learning_rate": 4.239497113483819e-06,
"logits": -0.6588698625564575,
"logps": -74.58145141601562,
"loss": 1.1147,
"objective": 0.9951308369636536,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5625,
"regularize": 0.9951308369636536,
"step": 290
},
{
"dpo_loss": 0.7910669445991516,
"epoch": 1.6721776098252243,
"grad_norm": 59.311015532755086,
"learning_rate": 4.203544848984729e-06,
"logits": -0.5501735210418701,
"logps": -73.76367950439453,
"loss": 1.0785,
"objective": 1.1607948541641235,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 1.1607948541641235,
"step": 295
},
{
"dpo_loss": 0.787829577922821,
"epoch": 1.7005196032120926,
"grad_norm": 59.72417531410974,
"learning_rate": 4.16692250129073e-06,
"logits": -0.5574866533279419,
"logps": -74.32286834716797,
"loss": 1.1013,
"objective": 1.11799156665802,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.550000011920929,
"regularize": 1.11799156665802,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 1.3091331720352173,
"eval_logits": -0.6190080046653748,
"eval_logps": -78.51614379882812,
"eval_loss": 2.195436716079712,
"eval_objective": 2.2003257274627686,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 2.2003257274627686,
"eval_runtime": 259.0657,
"eval_samples_per_second": 22.35,
"eval_steps_per_second": 0.934,
"step": 300
},
{
"dpo_loss": 0.751851499080658,
"epoch": 1.7288615965989607,
"grad_norm": 63.853803550753945,
"learning_rate": 4.129644475669617e-06,
"logits": -0.5315951108932495,
"logps": -74.70060729980469,
"loss": 1.1126,
"objective": 1.1344873905181885,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 1.1344873905181885,
"step": 305
},
{
"dpo_loss": 0.7526522278785706,
"epoch": 1.7572035899858292,
"grad_norm": 62.541695397328844,
"learning_rate": 4.091725435297721e-06,
"logits": -0.6003395915031433,
"logps": -71.47956848144531,
"loss": 1.104,
"objective": 1.1590847969055176,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5375000238418579,
"regularize": 1.1590847969055176,
"step": 310
},
{
"dpo_loss": 0.7785258889198303,
"epoch": 1.7855455833726972,
"grad_norm": 56.52492221503349,
"learning_rate": 4.053180295492203e-06,
"logits": -0.5509213209152222,
"logps": -72.33855438232422,
"loss": 1.086,
"objective": 1.1518981456756592,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 1.1518981456756592,
"step": 315
},
{
"dpo_loss": 0.7460355758666992,
"epoch": 1.8138875767595655,
"grad_norm": 60.186149725562295,
"learning_rate": 4.014024217844167e-06,
"logits": -0.5665237903594971,
"logps": -73.6473159790039,
"loss": 1.0789,
"objective": 1.1513078212738037,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5333333611488342,
"regularize": 1.1513078212738037,
"step": 320
},
{
"dpo_loss": 0.7017228603363037,
"epoch": 1.8422295701464337,
"grad_norm": 64.38612525774512,
"learning_rate": 3.974272604254906e-06,
"logits": -0.5879048109054565,
"logps": -75.27242279052734,
"loss": 1.0824,
"objective": 1.1118178367614746,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5666666626930237,
"regularize": 1.1118178367614746,
"step": 325
},
{
"dpo_loss": 0.6825158596038818,
"epoch": 1.8705715635333018,
"grad_norm": 59.75239189291987,
"learning_rate": 3.933941090877615e-06,
"logits": -0.4700223505496979,
"logps": -71.7901382446289,
"loss": 1.0596,
"objective": 0.9758342504501343,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4541666805744171,
"regularize": 0.9758342504501343,
"step": 330
},
{
"dpo_loss": 0.6629473567008972,
"epoch": 1.89891355692017,
"grad_norm": 61.304279089943705,
"learning_rate": 3.893045541966975e-06,
"logits": -0.617573618888855,
"logps": -72.22500610351562,
"loss": 1.0234,
"objective": 1.1313632726669312,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5041666626930237,
"regularize": 1.1313632726669312,
"step": 335
},
{
"dpo_loss": 0.7187826633453369,
"epoch": 1.9272555503070383,
"grad_norm": 59.753785455065014,
"learning_rate": 3.8516020436389945e-06,
"logits": -0.560102105140686,
"logps": -75.12157440185547,
"loss": 1.0127,
"objective": 1.0483639240264893,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 1.0483639240264893,
"step": 340
},
{
"dpo_loss": 0.6934853792190552,
"epoch": 1.9555975436939064,
"grad_norm": 57.97084895323718,
"learning_rate": 3.8096268975436045e-06,
"logits": -0.5952755212783813,
"logps": -74.10826873779297,
"loss": 1.022,
"objective": 1.0359365940093994,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5291666388511658,
"regularize": 1.0359365940093994,
"step": 345
},
{
"dpo_loss": 0.6595560908317566,
"epoch": 1.9839395370807746,
"grad_norm": 61.69795496268391,
"learning_rate": 3.767136614452458e-06,
"logits": -0.562609851360321,
"logps": -75.11538696289062,
"loss": 0.9795,
"objective": 0.9731999635696411,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5333333611488342,
"regularize": 0.9731999635696411,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 1.2865926027297974,
"eval_logits": -0.6907580494880676,
"eval_logps": -78.29139709472656,
"eval_loss": 2.2000670433044434,
"eval_objective": 2.1849544048309326,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 2.1849544048309326,
"eval_runtime": 259.7345,
"eval_samples_per_second": 22.292,
"eval_steps_per_second": 0.932,
"step": 350
},
{
"dpo_loss": 0.7099167108535767,
"epoch": 2.012281530467643,
"grad_norm": 61.48654412749636,
"learning_rate": 3.724147907764478e-06,
"logits": -0.5564183592796326,
"logps": -74.59834289550781,
"loss": 0.9585,
"objective": 1.041745901107788,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5208333134651184,
"regularize": 1.041745901107788,
"step": 355
},
{
"dpo_loss": 0.7087345123291016,
"epoch": 2.040623523854511,
"grad_norm": 61.77503577871403,
"learning_rate": 3.6806776869317074e-06,
"logits": -0.5873940587043762,
"logps": -73.76538848876953,
"loss": 0.9533,
"objective": 0.9487842917442322,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5458333492279053,
"regularize": 0.9487842917442322,
"step": 360
},
{
"dpo_loss": 0.6601606607437134,
"epoch": 2.0689655172413794,
"grad_norm": 62.194779314868725,
"learning_rate": 3.6367430508080283e-06,
"logits": -0.6485423445701599,
"logps": -74.84728240966797,
"loss": 0.9459,
"objective": 0.9501416683197021,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5291666388511658,
"regularize": 0.9501416683197021,
"step": 365
},
{
"dpo_loss": 0.6689183712005615,
"epoch": 2.0973075106282475,
"grad_norm": 62.24375049929638,
"learning_rate": 3.5923612809233987e-06,
"logits": -0.6007247567176819,
"logps": -72.23168182373047,
"loss": 0.9236,
"objective": 0.9433914422988892,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.512499988079071,
"regularize": 0.9433914422988892,
"step": 370
},
{
"dpo_loss": 0.6166276335716248,
"epoch": 2.1256495040151155,
"grad_norm": 55.9855513993403,
"learning_rate": 3.547549834686222e-06,
"logits": -0.6374623775482178,
"logps": -73.1789321899414,
"loss": 0.9218,
"objective": 0.9237673878669739,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.9237673878669739,
"step": 375
},
{
"dpo_loss": 0.7234205007553101,
"epoch": 2.153991497401984,
"grad_norm": 60.877496205710976,
"learning_rate": 3.5023263385165346e-06,
"logits": -0.5776531100273132,
"logps": -72.65286254882812,
"loss": 0.8853,
"objective": 0.9437097311019897,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 0.9437097311019897,
"step": 380
},
{
"dpo_loss": 0.706203818321228,
"epoch": 2.182333490788852,
"grad_norm": 57.432586251491834,
"learning_rate": 3.4567085809127247e-06,
"logits": -0.5748823881149292,
"logps": -75.21902465820312,
"loss": 0.8699,
"objective": 0.9273825287818909,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.512499988079071,
"regularize": 0.9273825287818909,
"step": 385
},
{
"dpo_loss": 0.6570317149162292,
"epoch": 2.21067548417572,
"grad_norm": 59.58556599473961,
"learning_rate": 3.410714505454486e-06,
"logits": -0.5105537176132202,
"logps": -73.1999282836914,
"loss": 0.8797,
"objective": 0.7775837779045105,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4791666567325592,
"regularize": 0.7775837779045105,
"step": 390
},
{
"dpo_loss": 0.6566095352172852,
"epoch": 2.2390174775625886,
"grad_norm": 61.70716494378518,
"learning_rate": 3.364362203744777e-06,
"logits": -0.48478570580482483,
"logps": -75.13355255126953,
"loss": 0.9257,
"objective": 0.9057443737983704,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5583333373069763,
"regularize": 0.9057443737983704,
"step": 395
},
{
"dpo_loss": 0.6266034245491028,
"epoch": 2.2673594709494567,
"grad_norm": 61.76261428542362,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.5294731855392456,
"logps": -76.35556030273438,
"loss": 0.8853,
"objective": 0.8211384415626526,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6000000238418579,
"regularize": 0.8211384415626526,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 1.3223490715026855,
"eval_logits": -0.6215647459030151,
"eval_logps": -78.5732421875,
"eval_loss": 2.267859697341919,
"eval_objective": 2.261888027191162,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 2.261888027191162,
"eval_runtime": 258.7107,
"eval_samples_per_second": 22.38,
"eval_steps_per_second": 0.935,
"step": 400
},
{
"dpo_loss": 0.7414664626121521,
"epoch": 2.295701464336325,
"grad_norm": 59.90696960742683,
"learning_rate": 3.2706559853460818e-06,
"logits": -0.6069660186767578,
"logps": -74.20203399658203,
"loss": 0.8443,
"objective": 0.830698549747467,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.830698549747467,
"step": 405
},
{
"dpo_loss": 0.6774530410766602,
"epoch": 2.324043457723193,
"grad_norm": 55.80310964403766,
"learning_rate": 3.2233389276586325e-06,
"logits": -0.524910032749176,
"logps": -73.77034759521484,
"loss": 0.8216,
"objective": 0.7954517006874084,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.4833333194255829,
"regularize": 0.7954517006874084,
"step": 410
},
{
"dpo_loss": 0.6917945742607117,
"epoch": 2.3523854511100613,
"grad_norm": 58.97332543380404,
"learning_rate": 3.1757373472244324e-06,
"logits": -0.5027827024459839,
"logps": -74.41297912597656,
"loss": 0.8371,
"objective": 0.8500952124595642,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.8500952124595642,
"step": 415
},
{
"dpo_loss": 0.7004589438438416,
"epoch": 2.3807274444969297,
"grad_norm": 58.55747179048871,
"learning_rate": 3.127869967952698e-06,
"logits": -0.42817217111587524,
"logps": -76.00125122070312,
"loss": 0.8196,
"objective": 0.8261451125144958,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4791666567325592,
"regularize": 0.8261451125144958,
"step": 420
},
{
"dpo_loss": 0.6052082777023315,
"epoch": 2.409069437883798,
"grad_norm": 64.29650245686375,
"learning_rate": 3.0797556183036582e-06,
"logits": -0.48321303725242615,
"logps": -74.37902069091797,
"loss": 0.8213,
"objective": 0.7835978865623474,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5416666865348816,
"regularize": 0.7835978865623474,
"step": 425
},
{
"dpo_loss": 0.6209239959716797,
"epoch": 2.4374114312706663,
"grad_norm": 57.31240800051826,
"learning_rate": 3.0314132238824416e-06,
"logits": -0.5284911394119263,
"logps": -74.74346160888672,
"loss": 0.7639,
"objective": 0.7543562054634094,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5625,
"regularize": 0.7543562054634094,
"step": 430
},
{
"dpo_loss": 0.6278953552246094,
"epoch": 2.4657534246575343,
"grad_norm": 55.78371923658181,
"learning_rate": 2.9828617999947647e-06,
"logits": -0.6039460301399231,
"logps": -74.01110076904297,
"loss": 0.7742,
"objective": 0.7583914995193481,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5333333611488342,
"regularize": 0.7583914995193481,
"step": 435
},
{
"dpo_loss": 0.6551163792610168,
"epoch": 2.4940954180444024,
"grad_norm": 57.20639534632825,
"learning_rate": 2.9341204441673267e-06,
"logits": -0.6006036996841431,
"logps": -73.72498321533203,
"loss": 0.8175,
"objective": 0.8128156661987305,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5208333134651184,
"regularize": 0.8128156661987305,
"step": 440
},
{
"dpo_loss": 0.6569511890411377,
"epoch": 2.5224374114312704,
"grad_norm": 59.028297224317896,
"learning_rate": 2.8852083286358647e-06,
"logits": -0.6477051973342896,
"logps": -71.30754852294922,
"loss": 0.7389,
"objective": 0.6897608637809753,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 0.6897608637809753,
"step": 445
},
{
"dpo_loss": 0.6735912561416626,
"epoch": 2.550779404818139,
"grad_norm": 62.76694554498361,
"learning_rate": 2.8361446928038298e-06,
"logits": -0.6754371523857117,
"logps": -73.46631622314453,
"loss": 0.7605,
"objective": 0.8445902466773987,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.8445902466773987,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 1.3571884632110596,
"eval_logits": -0.6825547218322754,
"eval_logps": -78.28398895263672,
"eval_loss": 2.2654569149017334,
"eval_objective": 2.2744035720825195,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 2.2744035720825195,
"eval_runtime": 259.3083,
"eval_samples_per_second": 22.329,
"eval_steps_per_second": 0.933,
"step": 450
},
{
"dpo_loss": 0.6667983531951904,
"epoch": 2.579121398205007,
"grad_norm": 58.3447211977729,
"learning_rate": 2.7869488356746344e-06,
"logits": -0.6426534056663513,
"logps": -74.59416961669922,
"loss": 0.7591,
"objective": 0.768614649772644,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.512499988079071,
"regularize": 0.768614649772644,
"step": 455
},
{
"dpo_loss": 0.5558815598487854,
"epoch": 2.6074633915918755,
"grad_norm": 65.11260237104646,
"learning_rate": 2.7376401082604563e-06,
"logits": -0.6525983810424805,
"logps": -74.7845230102539,
"loss": 0.7638,
"objective": 0.691643476486206,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5375000238418579,
"regularize": 0.691643476486206,
"step": 460
},
{
"dpo_loss": 0.6411501169204712,
"epoch": 2.6358053849787435,
"grad_norm": 62.60981601751369,
"learning_rate": 2.6882379059705953e-06,
"logits": -0.5221896767616272,
"logps": -75.2436752319336,
"loss": 0.7359,
"objective": 0.7508728504180908,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.512499988079071,
"regularize": 0.7508728504180908,
"step": 465
},
{
"dpo_loss": 0.6434755325317383,
"epoch": 2.6641473783656116,
"grad_norm": 56.58366056844157,
"learning_rate": 2.6387616609823506e-06,
"logits": -0.5284293293952942,
"logps": -74.4692153930664,
"loss": 0.7269,
"objective": 0.6894288659095764,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5583333373069763,
"regularize": 0.6894288659095764,
"step": 470
},
{
"dpo_loss": 0.6124823093414307,
"epoch": 2.69248937175248,
"grad_norm": 63.898667131172786,
"learning_rate": 2.5892308345974517e-06,
"logits": -0.5336421728134155,
"logps": -74.76774597167969,
"loss": 0.7216,
"objective": 0.6816112995147705,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.512499988079071,
"regularize": 0.6816112995147705,
"step": 475
},
{
"dpo_loss": 0.6480967998504639,
"epoch": 2.720831365139348,
"grad_norm": 70.14559307144371,
"learning_rate": 2.53966490958702e-06,
"logits": -0.6288893222808838,
"logps": -73.35899353027344,
"loss": 0.722,
"objective": 0.7448421716690063,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5208333134651184,
"regularize": 0.7448421716690063,
"step": 480
},
{
"dpo_loss": 0.6456955075263977,
"epoch": 2.7491733585262166,
"grad_norm": 55.88623408443581,
"learning_rate": 2.490083382528097e-06,
"logits": -0.6045793294906616,
"logps": -75.4445571899414,
"loss": 0.7142,
"objective": 0.7276442050933838,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.7276442050933838,
"step": 485
},
{
"dpo_loss": 0.600917637348175,
"epoch": 2.7775153519130846,
"grad_norm": 59.05306899045766,
"learning_rate": 2.440505756134732e-06,
"logits": -0.5500164031982422,
"logps": -74.36378479003906,
"loss": 0.7053,
"objective": 0.6555976867675781,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.49166667461395264,
"regularize": 0.6555976867675781,
"step": 490
},
{
"dpo_loss": 0.645307719707489,
"epoch": 2.8058573452999527,
"grad_norm": 57.07656552315778,
"learning_rate": 2.3909515315866606e-06,
"logits": -0.5600085854530334,
"logps": -73.18649291992188,
"loss": 0.673,
"objective": 0.7392704486846924,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.7392704486846924,
"step": 495
},
{
"dpo_loss": 0.5844512581825256,
"epoch": 2.8341993386868207,
"grad_norm": 58.17740843884364,
"learning_rate": 2.341440200858589e-06,
"logits": -0.588214099407196,
"logps": -73.16432189941406,
"loss": 0.6709,
"objective": 0.6501026153564453,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6501026153564453,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 1.3375275135040283,
"eval_logits": -0.648563027381897,
"eval_logps": -79.71849822998047,
"eval_loss": 2.2687840461730957,
"eval_objective": 2.2577877044677734,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5185950398445129,
"eval_regularize": 2.2577877044677734,
"eval_runtime": 258.6203,
"eval_samples_per_second": 22.388,
"eval_steps_per_second": 0.936,
"step": 500
},
{
"dpo_loss": 0.6095997095108032,
"epoch": 2.862541332073689,
"grad_norm": 61.323078307757875,
"learning_rate": 2.2919912390530945e-06,
"logits": -0.5695565938949585,
"logps": -74.67700958251953,
"loss": 0.6466,
"objective": 0.6186994910240173,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6186994910240173,
"step": 505
},
{
"dpo_loss": 0.6002517938613892,
"epoch": 2.8908833254605573,
"grad_norm": 57.75553868657935,
"learning_rate": 2.242624096740164e-06,
"logits": -0.5447255373001099,
"logps": -75.35613250732422,
"loss": 0.6493,
"objective": 0.6379550099372864,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5583333373069763,
"regularize": 0.6379550099372864,
"step": 510
},
{
"dpo_loss": 0.6183599233627319,
"epoch": 2.9192253188474258,
"grad_norm": 56.91540287998159,
"learning_rate": 2.193358192306384e-06,
"logits": -0.5950519442558289,
"logps": -75.38876342773438,
"loss": 0.6549,
"objective": 0.646551251411438,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4958333373069763,
"regularize": 0.646551251411438,
"step": 515
},
{
"dpo_loss": 0.5939019918441772,
"epoch": 2.947567312234294,
"grad_norm": 56.28970465624792,
"learning_rate": 2.1442129043167877e-06,
"logits": -0.5761558413505554,
"logps": -77.31433868408203,
"loss": 0.6045,
"objective": 0.5895420908927917,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5375000238418579,
"regularize": 0.5895420908927917,
"step": 520
},
{
"dpo_loss": 0.6051114201545715,
"epoch": 2.975909305621162,
"grad_norm": 59.57121178469057,
"learning_rate": 2.0952075638923656e-06,
"logits": -0.5915683507919312,
"logps": -75.8340835571289,
"loss": 0.611,
"objective": 0.5858420133590698,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.5858420133590698,
"step": 525
},
{
"dpo_loss": 0.5843603610992432,
"epoch": 3.0042512990080303,
"grad_norm": 58.501519481588275,
"learning_rate": 2.046361447106244e-06,
"logits": -0.5682967901229858,
"logps": -75.45525360107422,
"loss": 0.6198,
"objective": 0.6126269698143005,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5333333611488342,
"regularize": 0.6126269698143005,
"step": 530
},
{
"dpo_loss": 0.5678777098655701,
"epoch": 3.0325932923948984,
"grad_norm": 56.81311454823308,
"learning_rate": 1.997693767401503e-06,
"logits": -0.572821855545044,
"logps": -76.72408294677734,
"loss": 0.5527,
"objective": 0.5598254799842834,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5708333253860474,
"regularize": 0.5598254799842834,
"step": 535
},
{
"dpo_loss": 0.603760838508606,
"epoch": 3.0609352857817664,
"grad_norm": 55.24119887208726,
"learning_rate": 1.9492236680336486e-06,
"logits": -0.6185352802276611,
"logps": -75.44217681884766,
"loss": 0.5398,
"objective": 0.5248841047286987,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5625,
"regularize": 0.5248841047286987,
"step": 540
},
{
"dpo_loss": 0.6126823425292969,
"epoch": 3.089277279168635,
"grad_norm": 59.23770495109222,
"learning_rate": 1.9009702145406728e-06,
"logits": -0.6088476181030273,
"logps": -76.27928161621094,
"loss": 0.5381,
"objective": 0.521655797958374,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5041666626930237,
"regularize": 0.521655797958374,
"step": 545
},
{
"dpo_loss": 0.5726143717765808,
"epoch": 3.117619272555503,
"grad_norm": 56.22075610887121,
"learning_rate": 1.852952387243698e-06,
"logits": -0.5207330584526062,
"logps": -77.1482162475586,
"loss": 0.5302,
"objective": 0.5327464938163757,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5541666746139526,
"regularize": 0.5327464938163757,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 1.3210206031799316,
"eval_logits": -0.6266895532608032,
"eval_logps": -80.1419448852539,
"eval_loss": 2.259814500808716,
"eval_objective": 2.2429771423339844,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 2.2429771423339844,
"eval_runtime": 259.8661,
"eval_samples_per_second": 22.281,
"eval_steps_per_second": 0.931,
"step": 550
},
{
"dpo_loss": 0.5455856919288635,
"epoch": 3.1459612659423715,
"grad_norm": 58.00758823689939,
"learning_rate": 1.8051890737811395e-06,
"logits": -0.4721320569515228,
"logps": -77.1658706665039,
"loss": 0.5207,
"objective": 0.5126071572303772,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5625,
"regularize": 0.5126071572303772,
"step": 555
},
{
"dpo_loss": 0.5464572906494141,
"epoch": 3.1743032593292395,
"grad_norm": 55.95277148909765,
"learning_rate": 1.7576990616793139e-06,
"logits": -0.49959325790405273,
"logps": -74.85193634033203,
"loss": 0.529,
"objective": 0.4622223675251007,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4622223675251007,
"step": 560
},
{
"dpo_loss": 0.5949917435646057,
"epoch": 3.2026452527161076,
"grad_norm": 59.42402517752121,
"learning_rate": 1.7105010309624381e-06,
"logits": -0.5134323835372925,
"logps": -76.86905670166016,
"loss": 0.5143,
"objective": 0.49525225162506104,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5666666626930237,
"regularize": 0.49525225162506104,
"step": 565
},
{
"dpo_loss": 0.5860133767127991,
"epoch": 3.230987246102976,
"grad_norm": 57.89045181223991,
"learning_rate": 1.6636135468049122e-06,
"logits": -0.5014190077781677,
"logps": -76.04814147949219,
"loss": 0.4959,
"objective": 0.48642733693122864,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.48642733693122864,
"step": 570
},
{
"dpo_loss": 0.5744526386260986,
"epoch": 3.259329239489844,
"grad_norm": 54.09506631184751,
"learning_rate": 1.617055052228768e-06,
"logits": -0.5578089356422424,
"logps": -76.63516998291016,
"loss": 0.5051,
"objective": 0.4754410982131958,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5208333134651184,
"regularize": 0.4754410982131958,
"step": 575
},
{
"dpo_loss": 0.5690687298774719,
"epoch": 3.287671232876712,
"grad_norm": 57.935165692154584,
"learning_rate": 1.5708438608491816e-06,
"logits": -0.5680824518203735,
"logps": -77.54747772216797,
"loss": 0.5044,
"objective": 0.47244158387184143,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 0.47244158387184143,
"step": 580
},
{
"dpo_loss": 0.633466362953186,
"epoch": 3.3160132262635806,
"grad_norm": 61.57647838064842,
"learning_rate": 1.524998149670871e-06,
"logits": -0.5756654143333435,
"logps": -78.29401397705078,
"loss": 0.4977,
"objective": 0.5307816863059998,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.5307816863059998,
"step": 585
},
{
"dpo_loss": 0.594211220741272,
"epoch": 3.3443552196504487,
"grad_norm": 58.66653194688653,
"learning_rate": 1.479535951938243e-06,
"logits": -0.5960578918457031,
"logps": -77.94918060302734,
"loss": 0.4782,
"objective": 0.46626031398773193,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.512499988079071,
"regularize": 0.46626031398773193,
"step": 590
},
{
"dpo_loss": 0.5661875605583191,
"epoch": 3.372697213037317,
"grad_norm": 55.75147905971709,
"learning_rate": 1.43447515004208e-06,
"logits": -0.514352560043335,
"logps": -77.09761047363281,
"loss": 0.474,
"objective": 0.5005635023117065,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.5005635023117065,
"step": 595
},
{
"dpo_loss": 0.5296740531921387,
"epoch": 3.4010392064241852,
"grad_norm": 56.950549116726854,
"learning_rate": 1.3898334684855647e-06,
"logits": -0.5301509499549866,
"logps": -76.18799591064453,
"loss": 0.4552,
"objective": 0.4339993894100189,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.4339993894100189,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 1.3297724723815918,
"eval_logits": -0.6006779670715332,
"eval_logps": -79.95822143554688,
"eval_loss": 2.2547333240509033,
"eval_objective": 2.237860679626465,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 2.237860679626465,
"eval_runtime": 258.6303,
"eval_samples_per_second": 22.387,
"eval_steps_per_second": 0.936,
"step": 600
},
{
"dpo_loss": 0.606982409954071,
"epoch": 3.4293811998110533,
"grad_norm": 55.47374374411107,
"learning_rate": 1.3456284669124159e-06,
"logits": -0.5279320478439331,
"logps": -78.00320434570312,
"loss": 0.4475,
"objective": 0.46218839287757874,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5583333373069763,
"regularize": 0.46218839287757874,
"step": 605
},
{
"dpo_loss": 0.5908689498901367,
"epoch": 3.4577231931979218,
"grad_norm": 60.157606585793644,
"learning_rate": 1.301877533199859e-06,
"logits": -0.5327341556549072,
"logps": -76.2584457397461,
"loss": 0.455,
"objective": 0.4404201805591583,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5208333134651184,
"regularize": 0.4404201805591583,
"step": 610
},
{
"dpo_loss": 0.5425374507904053,
"epoch": 3.48606518658479,
"grad_norm": 56.316614210866504,
"learning_rate": 1.2585978766191726e-06,
"logits": -0.5640905499458313,
"logps": -76.85858917236328,
"loss": 0.4328,
"objective": 0.4495522081851959,
"ranking_idealized": 0.4749999940395355,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.48750001192092896,
"regularize": 0.4495522081851959,
"step": 615
},
{
"dpo_loss": 0.5412243008613586,
"epoch": 3.514407179971658,
"grad_norm": 59.449782514540445,
"learning_rate": 1.2158065210664848e-06,
"logits": -0.5073726773262024,
"logps": -75.8833999633789,
"loss": 0.45,
"objective": 0.40877532958984375,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4958333373069763,
"regularize": 0.40877532958984375,
"step": 620
},
{
"dpo_loss": 0.5705503225326538,
"epoch": 3.5427491733585263,
"grad_norm": 60.75475260172388,
"learning_rate": 1.1735202983664803e-06,
"logits": -0.5309284925460815,
"logps": -75.0905990600586,
"loss": 0.4327,
"objective": 0.4199952185153961,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5416666865348816,
"regularize": 0.4199952185153961,
"step": 625
},
{
"dpo_loss": 0.5579532384872437,
"epoch": 3.5710911667453944,
"grad_norm": 59.775794629962114,
"learning_rate": 1.1317558416516696e-06,
"logits": -0.5706030130386353,
"logps": -74.92536163330078,
"loss": 0.4206,
"objective": 0.3978765606880188,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5249999761581421,
"regularize": 0.3978765606880188,
"step": 630
},
{
"dpo_loss": 0.5702130794525146,
"epoch": 3.5994331601322624,
"grad_norm": 58.150159448234085,
"learning_rate": 1.0905295788197993e-06,
"logits": -0.5399581789970398,
"logps": -75.56917572021484,
"loss": 0.4106,
"objective": 0.4598848223686218,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.49166667461395264,
"regularize": 0.4598848223686218,
"step": 635
},
{
"dpo_loss": 0.5239140391349792,
"epoch": 3.627775153519131,
"grad_norm": 56.82700141030008,
"learning_rate": 1.049857726072005e-06,
"logits": -0.490993469953537,
"logps": -76.47193145751953,
"loss": 0.3946,
"objective": 0.41905397176742554,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5,
"regularize": 0.41905397176742554,
"step": 640
},
{
"dpo_loss": 0.576458215713501,
"epoch": 3.656117146905999,
"grad_norm": 56.89880418064234,
"learning_rate": 1.0097562815342215e-06,
"logits": -0.5056424140930176,
"logps": -75.477783203125,
"loss": 0.3926,
"objective": 0.38865312933921814,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 0.38865312933921814,
"step": 645
},
{
"dpo_loss": 0.5464863181114197,
"epoch": 3.6844591402928675,
"grad_norm": 56.60252182656229,
"learning_rate": 9.702410189643838e-07,
"logits": -0.5153852105140686,
"logps": -76.41727447509766,
"loss": 0.3981,
"objective": 0.380422979593277,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.380422979593277,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 1.3237992525100708,
"eval_logits": -0.5994979739189148,
"eval_logps": -80.18803405761719,
"eval_loss": 2.254920482635498,
"eval_objective": 2.239741802215576,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5154958963394165,
"eval_regularize": 2.239741802215576,
"eval_runtime": 259.1878,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 0.934,
"step": 650
},
{
"dpo_loss": 0.5698133707046509,
"epoch": 3.7128011336797355,
"grad_norm": 60.818049397288426,
"learning_rate": 9.313274815478698e-07,
"logits": -0.5156189799308777,
"logps": -75.30026245117188,
"loss": 0.3817,
"objective": 0.3786256015300751,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3786256015300751,
"step": 655
},
{
"dpo_loss": 0.5675181150436401,
"epoch": 3.7411431270666036,
"grad_norm": 57.75499051269253,
"learning_rate": 8.930309757836517e-07,
"logits": -0.5635392069816589,
"logps": -76.37955474853516,
"loss": 0.3793,
"objective": 0.37651321291923523,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 0.37651321291923523,
"step": 660
},
{
"dpo_loss": 0.5627972483634949,
"epoch": 3.769485120453472,
"grad_norm": 55.789081356284925,
"learning_rate": 8.553665654635343e-07,
"logits": -0.5413954854011536,
"logps": -76.61347198486328,
"loss": 0.358,
"objective": 0.36223313212394714,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.36223313212394714,
"step": 665
},
{
"dpo_loss": 0.565936267375946,
"epoch": 3.79782711384034,
"grad_norm": 56.9797576995613,
"learning_rate": 8.183490657468687e-07,
"logits": -0.5987848043441772,
"logps": -76.39659881591797,
"loss": 0.3404,
"objective": 0.33699342608451843,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6041666865348816,
"regularize": 0.33699342608451843,
"step": 670
},
{
"dpo_loss": 0.5550724267959595,
"epoch": 3.826169107227208,
"grad_norm": 55.12534400843312,
"learning_rate": 7.819930373330669e-07,
"logits": -0.583532989025116,
"logps": -75.4254150390625,
"loss": 0.337,
"objective": 0.33032530546188354,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5791666507720947,
"regularize": 0.33032530546188354,
"step": 675
},
{
"dpo_loss": 0.5683528780937195,
"epoch": 3.8545111006140766,
"grad_norm": 58.24346477498415,
"learning_rate": 7.463127807341966e-07,
"logits": -0.5395128130912781,
"logps": -75.92244720458984,
"loss": 0.3391,
"objective": 0.3696479797363281,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 0.3696479797363281,
"step": 680
},
{
"dpo_loss": 0.5345361232757568,
"epoch": 3.8828530940009447,
"grad_norm": 60.795090855194246,
"learning_rate": 7.113223306499336e-07,
"logits": -0.5642789006233215,
"logps": -75.37647247314453,
"loss": 0.3278,
"objective": 0.30459117889404297,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5583333373069763,
"regularize": 0.30459117889404297,
"step": 685
},
{
"dpo_loss": 0.5224164128303528,
"epoch": 3.9111950873878127,
"grad_norm": 53.66311212284037,
"learning_rate": 6.770354504470575e-07,
"logits": -0.6012124419212341,
"logps": -75.34683227539062,
"loss": 0.3242,
"objective": 0.30577850341796875,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5833333134651184,
"regularize": 0.30577850341796875,
"step": 690
},
{
"dpo_loss": 0.5453590750694275,
"epoch": 3.9395370807746812,
"grad_norm": 57.79953484046242,
"learning_rate": 6.434656267456843e-07,
"logits": -0.5451433658599854,
"logps": -76.42092895507812,
"loss": 0.3124,
"objective": 0.324345201253891,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4791666567325592,
"regularize": 0.324345201253891,
"step": 695
},
{
"dpo_loss": 0.561674952507019,
"epoch": 3.9678790741615493,
"grad_norm": 54.53867904105972,
"learning_rate": 6.106260641143547e-07,
"logits": -0.5508003830909729,
"logps": -76.36372375488281,
"loss": 0.3178,
"objective": 0.31011003255844116,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.31011003255844116,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 1.3332420587539673,
"eval_logits": -0.62151700258255,
"eval_logps": -80.45596313476562,
"eval_loss": 2.2615702152252197,
"eval_objective": 2.2539472579956055,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 2.2539472579956055,
"eval_runtime": 259.1185,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 0.934,
"step": 700
},
{
"dpo_loss": 0.5321690440177917,
"epoch": 3.9962210675484178,
"grad_norm": 59.452065695343926,
"learning_rate": 5.785296798760601e-07,
"logits": -0.5087547898292542,
"logps": -75.18541717529297,
"loss": 0.3225,
"objective": 0.30213478207588196,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5625,
"regularize": 0.30213478207588196,
"step": 705
},
{
"dpo_loss": 0.5166592001914978,
"epoch": 4.024563060935286,
"grad_norm": 65.96197683204998,
"learning_rate": 5.471890990272666e-07,
"logits": -0.5448976159095764,
"logps": -76.6087875366211,
"loss": 0.2527,
"objective": 0.2436264157295227,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.2436264157295227,
"step": 710
},
{
"dpo_loss": 0.5354845523834229,
"epoch": 4.052905054322154,
"grad_norm": 56.07206249663241,
"learning_rate": 5.166166492719124e-07,
"logits": -0.5357650518417358,
"logps": -75.84851837158203,
"loss": 0.2618,
"objective": 0.2531677484512329,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5458333492279053,
"regularize": 0.2531677484512329,
"step": 715
},
{
"dpo_loss": 0.5235564112663269,
"epoch": 4.081247047709022,
"grad_norm": 57.7958952485207,
"learning_rate": 4.868243561723535e-07,
"logits": -0.49407169222831726,
"logps": -78.26904296875,
"loss": 0.2451,
"objective": 0.25101524591445923,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.25101524591445923,
"step": 720
},
{
"dpo_loss": 0.5391948819160461,
"epoch": 4.109589041095891,
"grad_norm": 55.93504200270769,
"learning_rate": 4.57823938419153e-07,
"logits": -0.5219827890396118,
"logps": -75.88143157958984,
"loss": 0.2571,
"objective": 0.2235218584537506,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5458333492279053,
"regularize": 0.2235218584537506,
"step": 725
},
{
"dpo_loss": 0.5403502583503723,
"epoch": 4.137931034482759,
"grad_norm": 56.99961878175316,
"learning_rate": 4.2962680322157335e-07,
"logits": -0.5911334753036499,
"logps": -76.09339904785156,
"loss": 0.2446,
"objective": 0.230418860912323,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.550000011920929,
"regularize": 0.230418860912323,
"step": 730
},
{
"dpo_loss": 0.5401233434677124,
"epoch": 4.166273027869627,
"grad_norm": 59.493122058302546,
"learning_rate": 4.0224404182059443e-07,
"logits": -0.5024449229240417,
"logps": -77.3831558227539,
"loss": 0.2499,
"objective": 0.24507470428943634,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.24507470428943634,
"step": 735
},
{
"dpo_loss": 0.5141600966453552,
"epoch": 4.194615021256495,
"grad_norm": 56.30361364827208,
"learning_rate": 3.756864251262143e-07,
"logits": -0.5385380983352661,
"logps": -76.72105407714844,
"loss": 0.2374,
"objective": 0.24047289788722992,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.24047289788722992,
"step": 740
},
{
"dpo_loss": 0.5445539951324463,
"epoch": 4.222957014643363,
"grad_norm": 56.95695963320432,
"learning_rate": 3.499643994807486e-07,
"logits": -0.5771783590316772,
"logps": -74.20252990722656,
"loss": 0.2476,
"objective": 0.24541395902633667,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4958333373069763,
"regularize": 0.24541395902633667,
"step": 745
},
{
"dpo_loss": 0.5280672907829285,
"epoch": 4.251299008030231,
"grad_norm": 54.37624429775478,
"learning_rate": 3.250880825498026e-07,
"logits": -0.609397828578949,
"logps": -75.99716186523438,
"loss": 0.2213,
"objective": 0.2197273075580597,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5375000238418579,
"regularize": 0.2197273075580597,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 1.3297063112258911,
"eval_logits": -0.6153666973114014,
"eval_logps": -80.15010833740234,
"eval_loss": 2.26202392578125,
"eval_objective": 2.2499067783355713,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 2.2499067783355713,
"eval_runtime": 258.6337,
"eval_samples_per_second": 22.387,
"eval_steps_per_second": 0.936,
"step": 750
},
{
"dpo_loss": 0.5372040271759033,
"epoch": 4.2796410014171,
"grad_norm": 60.72195132083163,
"learning_rate": 3.0106725934252095e-07,
"logits": -0.5743114948272705,
"logps": -75.755859375,
"loss": 0.2404,
"objective": 0.22062353789806366,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5416666865348816,
"regularize": 0.22062353789806366,
"step": 755
},
{
"dpo_loss": 0.541923463344574,
"epoch": 4.307982994803968,
"grad_norm": 59.60019088112453,
"learning_rate": 2.779113783626916e-07,
"logits": -0.5603171586990356,
"logps": -75.97834777832031,
"loss": 0.2213,
"objective": 0.20643459260463715,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.574999988079071,
"regularize": 0.20643459260463715,
"step": 760
},
{
"dpo_loss": 0.519105851650238,
"epoch": 4.336324988190836,
"grad_norm": 57.13784414910619,
"learning_rate": 2.5562954789221164e-07,
"logits": -0.5705324411392212,
"logps": -76.40815734863281,
"loss": 0.2268,
"objective": 0.23255951702594757,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.23255951702594757,
"step": 765
},
{
"dpo_loss": 0.5270203351974487,
"epoch": 4.364666981577704,
"grad_norm": 57.88827391411817,
"learning_rate": 2.3423053240837518e-07,
"logits": -0.5453040599822998,
"logps": -75.09925842285156,
"loss": 0.2271,
"objective": 0.1950923055410385,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5375000238418579,
"regularize": 0.1950923055410385,
"step": 770
},
{
"dpo_loss": 0.5309551954269409,
"epoch": 4.393008974964572,
"grad_norm": 53.8846273259703,
"learning_rate": 2.137227491364016e-07,
"logits": -0.5447086691856384,
"logps": -76.1126480102539,
"loss": 0.2246,
"objective": 0.2088509202003479,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 0.2088509202003479,
"step": 775
},
{
"dpo_loss": 0.5339339971542358,
"epoch": 4.42135096835144,
"grad_norm": 59.22579729731716,
"learning_rate": 1.941142647385469e-07,
"logits": -0.5564789175987244,
"logps": -75.06421661376953,
"loss": 0.2149,
"objective": 0.21485432982444763,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 0.21485432982444763,
"step": 780
},
{
"dpo_loss": 0.5728757381439209,
"epoch": 4.449692961738309,
"grad_norm": 58.36366674226128,
"learning_rate": 1.7541279214111277e-07,
"logits": -0.5958257913589478,
"logps": -75.01850891113281,
"loss": 0.2181,
"objective": 0.25161251425743103,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 0.25161251425743103,
"step": 785
},
{
"dpo_loss": 0.5186070203781128,
"epoch": 4.478034955125177,
"grad_norm": 54.916993664319754,
"learning_rate": 1.5762568750059604e-07,
"logits": -0.5728685259819031,
"logps": -77.33419036865234,
"loss": 0.2061,
"objective": 0.18065284192562103,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5874999761581421,
"regularize": 0.18065284192562103,
"step": 790
},
{
"dpo_loss": 0.5318642854690552,
"epoch": 4.506376948512045,
"grad_norm": 58.53760269495991,
"learning_rate": 1.4075994731016895e-07,
"logits": -0.49769601225852966,
"logps": -77.13802337646484,
"loss": 0.2015,
"objective": 0.18465597927570343,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5583333373069763,
"regularize": 0.18465597927570343,
"step": 795
},
{
"dpo_loss": 0.5291022658348083,
"epoch": 4.534718941898913,
"grad_norm": 56.73087705325052,
"learning_rate": 1.2482220564763669e-07,
"logits": -0.47620663046836853,
"logps": -76.15321350097656,
"loss": 0.2032,
"objective": 0.1981140822172165,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5375000238418579,
"regularize": 0.1981140822172165,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 1.3294748067855835,
"eval_logits": -0.6175000667572021,
"eval_logps": -80.12406158447266,
"eval_loss": 2.2583107948303223,
"eval_objective": 2.2455341815948486,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 2.2455341815948486,
"eval_runtime": 259.1188,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 0.934,
"step": 800
},
{
"dpo_loss": 0.5472068786621094,
"epoch": 4.563060935285781,
"grad_norm": 55.12256671902985,
"learning_rate": 1.0981873156594381e-07,
"logits": -0.5433961153030396,
"logps": -75.67558288574219,
"loss": 0.2028,
"objective": 0.21665817499160767,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.48750001192092896,
"regularize": 0.21665817499160767,
"step": 805
},
{
"dpo_loss": 0.5366577506065369,
"epoch": 4.59140292867265,
"grad_norm": 57.38834520577077,
"learning_rate": 9.575542662726756e-08,
"logits": -0.574076235294342,
"logps": -74.78955078125,
"loss": 0.1956,
"objective": 0.22629684209823608,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4749999940395355,
"regularize": 0.22629684209823608,
"step": 810
},
{
"dpo_loss": 0.5316675901412964,
"epoch": 4.619744922059518,
"grad_norm": 59.23667969355199,
"learning_rate": 8.26378225816582e-08,
"logits": -0.48441505432128906,
"logps": -76.50004577636719,
"loss": 0.1986,
"objective": 0.195883110165596,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.195883110165596,
"step": 815
},
{
"dpo_loss": 0.5366373658180237,
"epoch": 4.648086915446386,
"grad_norm": 55.28284867900087,
"learning_rate": 7.047107919114588e-08,
"logits": -0.5500171184539795,
"logps": -75.87386322021484,
"loss": 0.2028,
"objective": 0.20712776482105255,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.20712776482105255,
"step": 820
},
{
"dpo_loss": 0.5389170050621033,
"epoch": 4.6764289088332545,
"grad_norm": 57.21598882034636,
"learning_rate": 5.92599822001666e-08,
"logits": -0.5184782147407532,
"logps": -74.3449478149414,
"loss": 0.1976,
"objective": 0.18227988481521606,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 0.18227988481521606,
"step": 825
},
{
"dpo_loss": 0.5467627048492432,
"epoch": 4.7047709022201225,
"grad_norm": 54.82857089312232,
"learning_rate": 4.9008941453107527e-08,
"logits": -0.5783690214157104,
"logps": -76.44660186767578,
"loss": 0.2072,
"objective": 0.19597838819026947,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5416666865348816,
"regularize": 0.19597838819026947,
"step": 830
},
{
"dpo_loss": 0.5296519994735718,
"epoch": 4.733112895606991,
"grad_norm": 54.828212438355145,
"learning_rate": 3.972198915970976e-08,
"logits": -0.5317445993423462,
"logps": -75.7918701171875,
"loss": 0.2013,
"objective": 0.20263001322746277,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5083333253860474,
"regularize": 0.20263001322746277,
"step": 835
},
{
"dpo_loss": 0.5090625882148743,
"epoch": 4.7614548889938595,
"grad_norm": 55.3362837507438,
"learning_rate": 3.1402778309014284e-08,
"logits": -0.5363056063652039,
"logps": -76.1655044555664,
"loss": 0.1893,
"objective": 0.19915518164634705,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 0.19915518164634705,
"step": 840
},
{
"dpo_loss": 0.5551865100860596,
"epoch": 4.7897968823807275,
"grad_norm": 54.49437843853531,
"learning_rate": 2.4054581232470785e-08,
"logits": -0.541746199131012,
"logps": -76.23973083496094,
"loss": 0.1902,
"objective": 0.20770463347434998,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.4791666567325592,
"regularize": 0.20770463347434998,
"step": 845
},
{
"dpo_loss": 0.5436845421791077,
"epoch": 4.818138875767596,
"grad_norm": 57.49186751897613,
"learning_rate": 1.768028831677926e-08,
"logits": -0.5542425513267517,
"logps": -74.9803237915039,
"loss": 0.1935,
"objective": 0.17371943593025208,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 0.17371943593025208,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 1.3283706903457642,
"eval_logits": -0.6168581247329712,
"eval_logps": -80.06609344482422,
"eval_loss": 2.2561168670654297,
"eval_objective": 2.242354154586792,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 2.242354154586792,
"eval_runtime": 259.267,
"eval_samples_per_second": 22.332,
"eval_steps_per_second": 0.933,
"step": 850
},
{
"dpo_loss": 0.5587651133537292,
"epoch": 4.846480869154464,
"grad_norm": 57.56815145820478,
"learning_rate": 1.2282406866966078e-08,
"logits": -0.5165177583694458,
"logps": -75.15120697021484,
"loss": 0.1923,
"objective": 0.2012535035610199,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.550000011920929,
"regularize": 0.2012535035610199,
"step": 855
},
{
"dpo_loss": 0.5044924020767212,
"epoch": 4.874822862541333,
"grad_norm": 57.45096886360892,
"learning_rate": 7.863060120144316e-09,
"logits": -0.508831799030304,
"logps": -75.34513854980469,
"loss": 0.1944,
"objective": 0.2123396247625351,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.574999988079071,
"regularize": 0.2123396247625351,
"step": 860
},
{
"dpo_loss": 0.52206951379776,
"epoch": 4.903164855928201,
"grad_norm": 58.02043264781572,
"learning_rate": 4.423986410346526e-09,
"logits": -0.5500971078872681,
"logps": -74.25553131103516,
"loss": 0.1747,
"objective": 0.16172558069229126,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5166666507720947,
"regularize": 0.16172558069229126,
"step": 865
},
{
"dpo_loss": 0.5237457752227783,
"epoch": 4.931506849315069,
"grad_norm": 55.98430316965056,
"learning_rate": 1.9665384847583622e-09,
"logits": -0.5589514970779419,
"logps": -75.981689453125,
"loss": 0.1822,
"objective": 0.18714649975299835,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5291666388511658,
"regularize": 0.18714649975299835,
"step": 870
},
{
"dpo_loss": 0.5439503192901611,
"epoch": 4.959848842701937,
"grad_norm": 55.03768486905713,
"learning_rate": 4.916829716183901e-10,
"logits": -0.5287134051322937,
"logps": -75.68090057373047,
"loss": 0.1955,
"objective": 0.20633025467395782,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5583333373069763,
"regularize": 0.20633025467395782,
"step": 875
},
{
"dpo_loss": 0.5364298224449158,
"epoch": 4.988190836088805,
"grad_norm": 56.14268444647409,
"learning_rate": 0.0,
"logits": -0.5877522826194763,
"logps": -76.51420593261719,
"loss": 0.1905,
"objective": 0.20404843986034393,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.20404843986034393,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.7624400413849137,
"train_runtime": 35099.769,
"train_samples_per_second": 7.237,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}