|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 134.13619593724954, |
|
"learning_rate": 5.681818181818182e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.693040668964386, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 130.40196272454386, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.3680235147476196, |
|
"logps": -84.29497528076172, |
|
"loss": 0.412, |
|
"objective": 0.3764810860157013, |
|
"ranking_idealized": 0.5677083134651184, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.3764810860157013, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6761094927787781, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 220.67402488716627, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.4485151767730713, |
|
"logps": -83.00025177001953, |
|
"loss": 0.4272, |
|
"objective": 0.4414474666118622, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4414474666118622, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6819472908973694, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 123.4201155289935, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.4201914072036743, |
|
"logps": -83.36089324951172, |
|
"loss": 0.4193, |
|
"objective": 0.41622886061668396, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.41622886061668396, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6776646375656128, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 139.10378037423342, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"logits": -1.4012433290481567, |
|
"logps": -84.29528045654297, |
|
"loss": 0.442, |
|
"objective": 0.45644310116767883, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.45644310116767883, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6203070878982544, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 135.72754480990383, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.4394680261611938, |
|
"logps": -84.2686996459961, |
|
"loss": 0.4742, |
|
"objective": 0.4576638340950012, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.4576638340950012, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6442328691482544, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 129.86543736019303, |
|
"learning_rate": 1.7045454545454546e-06, |
|
"logits": -1.406640887260437, |
|
"logps": -84.90333557128906, |
|
"loss": 0.5291, |
|
"objective": 0.5116989016532898, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.5116989016532898, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6669540405273438, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 149.40597914469663, |
|
"learning_rate": 1.9886363636363638e-06, |
|
"logits": -1.3766274452209473, |
|
"logps": -83.32654571533203, |
|
"loss": 0.6031, |
|
"objective": 0.6114931702613831, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.6114931702613831, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6527742743492126, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 158.6310433342775, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits": -1.3420394659042358, |
|
"logps": -83.61425018310547, |
|
"loss": 0.6378, |
|
"objective": 0.639444887638092, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.639444887638092, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6698108911514282, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 129.09910758768564, |
|
"learning_rate": 2.556818181818182e-06, |
|
"logits": -1.4372307062149048, |
|
"logps": -85.24830627441406, |
|
"loss": 0.7526, |
|
"objective": 0.8735028505325317, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.8735028505325317, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.681568443775177, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 118.24660372299884, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.30116605758667, |
|
"logps": -84.97547912597656, |
|
"loss": 0.8081, |
|
"objective": 0.7729283571243286, |
|
"ranking_idealized": 0.4416666626930237, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.4375, |
|
"regularize": 0.7729283571243286, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.7266114950180054, |
|
"eval_logits": -1.330773115158081, |
|
"eval_logps": -91.93638610839844, |
|
"eval_loss": 0.665158212184906, |
|
"eval_objective": 0.6722058653831482, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 0.6722058653831482, |
|
"eval_runtime": 260.1252, |
|
"eval_samples_per_second": 22.259, |
|
"eval_steps_per_second": 0.93, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.7984517812728882, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 113.66354191682501, |
|
"learning_rate": 3.125e-06, |
|
"logits": -1.3412364721298218, |
|
"logps": -85.0008316040039, |
|
"loss": 0.966, |
|
"objective": 0.9933412075042725, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.9933412075042725, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.686260461807251, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 120.77585128117505, |
|
"learning_rate": 3.409090909090909e-06, |
|
"logits": -1.278722882270813, |
|
"logps": -82.67823791503906, |
|
"loss": 0.9965, |
|
"objective": 0.994083046913147, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.994083046913147, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6437157988548279, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 114.63196029717214, |
|
"learning_rate": 3.6931818181818186e-06, |
|
"logits": -1.1975092887878418, |
|
"logps": -80.52145385742188, |
|
"loss": 1.0631, |
|
"objective": 1.0634738206863403, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1.0634738206863403, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.7649748921394348, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 120.79825029146289, |
|
"learning_rate": 3.9772727272727275e-06, |
|
"logits": -1.2268587350845337, |
|
"logps": -81.57954406738281, |
|
"loss": 1.1744, |
|
"objective": 1.216639757156372, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5, |
|
"regularize": 1.216639757156372, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.8916628360748291, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 94.61834145368111, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.2359107732772827, |
|
"logps": -80.95670318603516, |
|
"loss": 1.2475, |
|
"objective": 1.3055096864700317, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 1.3055096864700317, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.8090317845344543, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 90.2013102285757, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits": -1.2852014303207397, |
|
"logps": -79.4792251586914, |
|
"loss": 1.2963, |
|
"objective": 1.3557077646255493, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 1.3557077646255493, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.8598435521125793, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 88.10607117738199, |
|
"learning_rate": 4.829545454545455e-06, |
|
"logits": -1.1217026710510254, |
|
"logps": -78.83904266357422, |
|
"loss": 1.3155, |
|
"objective": 1.3455144166946411, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.3455144166946411, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.9262712001800537, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 91.01793262969673, |
|
"learning_rate": 4.999921328558333e-06, |
|
"logits": -1.0270463228225708, |
|
"logps": -78.34434509277344, |
|
"loss": 1.3316, |
|
"objective": 1.2936842441558838, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 1.2936842441558838, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.8197596073150635, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 88.9287273472026, |
|
"learning_rate": 4.999036331701828e-06, |
|
"logits": -1.1374804973602295, |
|
"logps": -75.8565902709961, |
|
"loss": 1.4668, |
|
"objective": 1.495007038116455, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 1.495007038116455, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.9542521834373474, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 92.9939055097423, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.1251710653305054, |
|
"logps": -78.35294342041016, |
|
"loss": 1.4482, |
|
"objective": 1.6726794242858887, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 1.6726794242858887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.9744860529899597, |
|
"eval_logits": -1.087980031967163, |
|
"eval_logps": -83.3250503540039, |
|
"eval_loss": 1.4160186052322388, |
|
"eval_objective": 1.3662116527557373, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 1.3662116527557373, |
|
"eval_runtime": 258.3781, |
|
"eval_samples_per_second": 22.409, |
|
"eval_steps_per_second": 0.937, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 1.0364277362823486, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 95.45527468177522, |
|
"learning_rate": 4.994318112090048e-06, |
|
"logits": -0.8985040187835693, |
|
"logps": -79.13915252685547, |
|
"loss": 1.6082, |
|
"objective": 1.6931790113449097, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 1.6931790113449097, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.9066151976585388, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 82.78277978571477, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits": -1.0420721769332886, |
|
"logps": -75.80839538574219, |
|
"loss": 1.6336, |
|
"objective": 1.8092875480651855, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 1.8092875480651855, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 1.0691540241241455, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 83.71556017238615, |
|
"learning_rate": 4.985675754429744e-06, |
|
"logits": -1.0556254386901855, |
|
"logps": -75.40866088867188, |
|
"loss": 1.6395, |
|
"objective": 1.7828887701034546, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 1.7828887701034546, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 1.0725338459014893, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 75.45511417337472, |
|
"learning_rate": 4.9798870320769884e-06, |
|
"logits": -0.9715719819068909, |
|
"logps": -74.33720397949219, |
|
"loss": 1.5808, |
|
"objective": 1.6460652351379395, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.6460652351379395, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.9674696326255798, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 81.29845851322025, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -0.935053288936615, |
|
"logps": -72.40234375, |
|
"loss": 1.5695, |
|
"objective": 1.5490193367004395, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 1.5490193367004395, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.9349392056465149, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 70.71836455513233, |
|
"learning_rate": 4.965385884295467e-06, |
|
"logits": -1.109536051750183, |
|
"logps": -70.39344024658203, |
|
"loss": 1.5584, |
|
"objective": 1.6358364820480347, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 1.6358364820480347, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.9241760969161987, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 71.12542306553239, |
|
"learning_rate": 4.956679162840646e-06, |
|
"logits": -1.092426061630249, |
|
"logps": -73.43895721435547, |
|
"loss": 1.5904, |
|
"objective": 1.5671554803848267, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1.5671554803848267, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.8581911325454712, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 74.29158643708146, |
|
"learning_rate": 4.947006115536947e-06, |
|
"logits": -0.9535221457481384, |
|
"logps": -74.29177856445312, |
|
"loss": 1.5839, |
|
"objective": 1.4818050861358643, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.4818050861358643, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.9467170238494873, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 69.83033897795461, |
|
"learning_rate": 4.9363705472424825e-06, |
|
"logits": -0.9065474271774292, |
|
"logps": -75.74617004394531, |
|
"loss": 1.5265, |
|
"objective": 1.5216258764266968, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 1.5216258764266968, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.8972322940826416, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 70.00491587575249, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.8950843811035156, |
|
"logps": -74.2447509765625, |
|
"loss": 1.5063, |
|
"objective": 1.563843846321106, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 1.563843846321106, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 1.1387892961502075, |
|
"eval_logits": -0.9763504266738892, |
|
"eval_logps": -79.4244613647461, |
|
"eval_loss": 1.840279221534729, |
|
"eval_objective": 1.8307207822799683, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5154958963394165, |
|
"eval_regularize": 1.8307207822799683, |
|
"eval_runtime": 259.3897, |
|
"eval_samples_per_second": 22.322, |
|
"eval_steps_per_second": 0.933, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.8782619833946228, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 69.80918589105492, |
|
"learning_rate": 4.9122289584888926e-06, |
|
"logits": -0.9591015577316284, |
|
"logps": -72.81404876708984, |
|
"loss": 1.5351, |
|
"objective": 1.4583401679992676, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 1.4583401679992676, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.9660459756851196, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 66.22318117843135, |
|
"learning_rate": 4.8987324340362445e-06, |
|
"logits": -0.9040888547897339, |
|
"logps": -73.70983123779297, |
|
"loss": 1.5597, |
|
"objective": 1.562262773513794, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1.562262773513794, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 1.0612136125564575, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 70.50798575492288, |
|
"learning_rate": 4.884292376870567e-06, |
|
"logits": -0.8070274591445923, |
|
"logps": -75.22913360595703, |
|
"loss": 1.5216, |
|
"objective": 1.6139070987701416, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1.6139070987701416, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.8528857827186584, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 70.82614721912003, |
|
"learning_rate": 4.868914466936038e-06, |
|
"logits": -0.6839962601661682, |
|
"logps": -76.570068359375, |
|
"loss": 1.483, |
|
"objective": 1.5318470001220703, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.5318470001220703, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.8230343461036682, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 73.28269267652773, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.6581661105155945, |
|
"logps": -75.77076721191406, |
|
"loss": 1.4604, |
|
"objective": 1.486951470375061, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 1.486951470375061, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.9295377135276794, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 68.13139272327797, |
|
"learning_rate": 4.835369650662767e-06, |
|
"logits": -0.7830471396446228, |
|
"logps": -75.41452026367188, |
|
"loss": 1.4384, |
|
"objective": 1.5701080560684204, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 1.5701080560684204, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.9832867980003357, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 68.27487274290651, |
|
"learning_rate": 4.817215939055984e-06, |
|
"logits": -0.7766852974891663, |
|
"logps": -75.4141845703125, |
|
"loss": 1.4597, |
|
"objective": 1.5295368432998657, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 1.5295368432998657, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.9126808047294617, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 72.02486344760798, |
|
"learning_rate": 4.798150758954164e-06, |
|
"logits": -0.7806929349899292, |
|
"logps": -76.5052490234375, |
|
"loss": 1.4245, |
|
"objective": 1.6002991199493408, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 1.6002991199493408, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.876176655292511, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 72.39146825694654, |
|
"learning_rate": 4.778181609576832e-06, |
|
"logits": -0.742654025554657, |
|
"logps": -75.56478881835938, |
|
"loss": 1.4047, |
|
"objective": 1.4097263813018799, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 1.4097263813018799, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.8269080519676208, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 65.92725380936474, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.7813295125961304, |
|
"logps": -73.13416290283203, |
|
"loss": 1.3427, |
|
"objective": 1.2968580722808838, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1.2968580722808838, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 1.19429349899292, |
|
"eval_logits": -0.844602108001709, |
|
"eval_logps": -78.08975982666016, |
|
"eval_loss": 1.9410725831985474, |
|
"eval_objective": 1.904191493988037, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 1.904191493988037, |
|
"eval_runtime": 259.3374, |
|
"eval_samples_per_second": 22.326, |
|
"eval_steps_per_second": 0.933, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.9577538371086121, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 65.93331247907419, |
|
"learning_rate": 4.735563174649278e-06, |
|
"logits": -0.8114036321640015, |
|
"logps": -74.05875396728516, |
|
"loss": 1.3942, |
|
"objective": 1.4247114658355713, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 1.4247114658355713, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.7938932180404663, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 71.43014040389335, |
|
"learning_rate": 4.7129306529060415e-06, |
|
"logits": -0.883682131767273, |
|
"logps": -74.72289276123047, |
|
"loss": 1.397, |
|
"objective": 1.3820544481277466, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.3820544481277466, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.8905848860740662, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 73.75682155225577, |
|
"learning_rate": 4.68942768290728e-06, |
|
"logits": -0.8337818384170532, |
|
"logps": -75.42349243164062, |
|
"loss": 1.346, |
|
"objective": 1.3334628343582153, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 1.3334628343582153, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.7579005360603333, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 67.10552679086015, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits": -0.6136857867240906, |
|
"logps": -76.30582427978516, |
|
"loss": 1.3537, |
|
"objective": 1.1673569679260254, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 1.1673569679260254, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.8865377306938171, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 61.86276513251386, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.712106466293335, |
|
"logps": -77.76806640625, |
|
"loss": 1.2903, |
|
"objective": 1.3318673372268677, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.3318673372268677, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.7389308214187622, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 64.8981585658753, |
|
"learning_rate": 4.613790221445511e-06, |
|
"logits": -0.7096338868141174, |
|
"logps": -77.84996032714844, |
|
"loss": 1.2526, |
|
"objective": 1.1143450736999512, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.1143450736999512, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.7792016267776489, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 70.5473568363482, |
|
"learning_rate": 4.586901275038201e-06, |
|
"logits": -0.8352426886558533, |
|
"logps": -74.80459594726562, |
|
"loss": 1.3276, |
|
"objective": 1.2759090662002563, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1.2759090662002563, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.7532822489738464, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 66.15538794121927, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits": -0.7979737520217896, |
|
"logps": -75.91637420654297, |
|
"loss": 1.2644, |
|
"objective": 1.1841062307357788, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1.1841062307357788, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.8654258847236633, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 60.664093680465626, |
|
"learning_rate": 4.530671656612544e-06, |
|
"logits": -0.7953319549560547, |
|
"logps": -75.70112609863281, |
|
"loss": 1.2143, |
|
"objective": 1.248257040977478, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1.248257040977478, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.7265617847442627, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 59.62226844787694, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.7469329833984375, |
|
"logps": -75.32097625732422, |
|
"loss": 1.2385, |
|
"objective": 1.2008994817733765, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 1.2008994817733765, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 1.2812141180038452, |
|
"eval_logits": -0.8252052664756775, |
|
"eval_logps": -81.07825469970703, |
|
"eval_loss": 2.100358247756958, |
|
"eval_objective": 2.0779922008514404, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5072314143180847, |
|
"eval_regularize": 2.0779922008514404, |
|
"eval_runtime": 259.328, |
|
"eval_samples_per_second": 22.327, |
|
"eval_steps_per_second": 0.933, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.73033607006073, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 62.42895757690262, |
|
"learning_rate": 4.4712473230167775e-06, |
|
"logits": -0.6974473595619202, |
|
"logps": -77.66381072998047, |
|
"loss": 1.2105, |
|
"objective": 1.1618343591690063, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 1.1618343591690063, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.8443369269371033, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 61.04515459515312, |
|
"learning_rate": 4.440366160729393e-06, |
|
"logits": -0.705303430557251, |
|
"logps": -77.5326156616211, |
|
"loss": 1.1973, |
|
"objective": 1.1547825336456299, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 1.1547825336456299, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.7983213663101196, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 63.36584736778421, |
|
"learning_rate": 4.4087217624420595e-06, |
|
"logits": -0.6919764280319214, |
|
"logps": -75.56660461425781, |
|
"loss": 1.1823, |
|
"objective": 1.208877444267273, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 1.208877444267273, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.719446063041687, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 63.46733089619852, |
|
"learning_rate": 4.376326575364206e-06, |
|
"logits": -0.6683894991874695, |
|
"logps": -76.26860809326172, |
|
"loss": 1.1785, |
|
"objective": 1.1638675928115845, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1.1638675928115845, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.7665132284164429, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 58.99295412475345, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.6416487097740173, |
|
"logps": -76.59992218017578, |
|
"loss": 1.1881, |
|
"objective": 1.1479988098144531, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 1.1479988098144531, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.7462816834449768, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 60.75435946216164, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits": -0.601517915725708, |
|
"logps": -74.82725524902344, |
|
"loss": 1.145, |
|
"objective": 1.1755616664886475, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1.1755616664886475, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.6989460587501526, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 64.56412273044151, |
|
"learning_rate": 4.274765153095008e-06, |
|
"logits": -0.644619882106781, |
|
"logps": -76.7342758178711, |
|
"loss": 1.1688, |
|
"objective": 1.1958999633789062, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 1.1958999633789062, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.751794695854187, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 65.15922239336996, |
|
"learning_rate": 4.239497113483819e-06, |
|
"logits": -0.6588698625564575, |
|
"logps": -74.58145141601562, |
|
"loss": 1.1147, |
|
"objective": 0.9951308369636536, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.9951308369636536, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.7910669445991516, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 59.311015532755086, |
|
"learning_rate": 4.203544848984729e-06, |
|
"logits": -0.5501735210418701, |
|
"logps": -73.76367950439453, |
|
"loss": 1.0785, |
|
"objective": 1.1607948541641235, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 1.1607948541641235, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.787829577922821, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 59.72417531410974, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.5574866533279419, |
|
"logps": -74.32286834716797, |
|
"loss": 1.1013, |
|
"objective": 1.11799156665802, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 1.11799156665802, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 1.3091331720352173, |
|
"eval_logits": -0.6190080046653748, |
|
"eval_logps": -78.51614379882812, |
|
"eval_loss": 2.195436716079712, |
|
"eval_objective": 2.2003257274627686, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 2.2003257274627686, |
|
"eval_runtime": 259.0657, |
|
"eval_samples_per_second": 22.35, |
|
"eval_steps_per_second": 0.934, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.751851499080658, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 63.853803550753945, |
|
"learning_rate": 4.129644475669617e-06, |
|
"logits": -0.5315951108932495, |
|
"logps": -74.70060729980469, |
|
"loss": 1.1126, |
|
"objective": 1.1344873905181885, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1.1344873905181885, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.7526522278785706, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 62.541695397328844, |
|
"learning_rate": 4.091725435297721e-06, |
|
"logits": -0.6003395915031433, |
|
"logps": -71.47956848144531, |
|
"loss": 1.104, |
|
"objective": 1.1590847969055176, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 1.1590847969055176, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.7785258889198303, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 56.52492221503349, |
|
"learning_rate": 4.053180295492203e-06, |
|
"logits": -0.5509213209152222, |
|
"logps": -72.33855438232422, |
|
"loss": 1.086, |
|
"objective": 1.1518981456756592, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 1.1518981456756592, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.7460355758666992, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 60.186149725562295, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits": -0.5665237903594971, |
|
"logps": -73.6473159790039, |
|
"loss": 1.0789, |
|
"objective": 1.1513078212738037, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.1513078212738037, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.7017228603363037, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 64.38612525774512, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.5879048109054565, |
|
"logps": -75.27242279052734, |
|
"loss": 1.0824, |
|
"objective": 1.1118178367614746, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 1.1118178367614746, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.6825158596038818, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 59.75239189291987, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits": -0.4700223505496979, |
|
"logps": -71.7901382446289, |
|
"loss": 1.0596, |
|
"objective": 0.9758342504501343, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4541666805744171, |
|
"regularize": 0.9758342504501343, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.6629473567008972, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 61.304279089943705, |
|
"learning_rate": 3.893045541966975e-06, |
|
"logits": -0.617573618888855, |
|
"logps": -72.22500610351562, |
|
"loss": 1.0234, |
|
"objective": 1.1313632726669312, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 1.1313632726669312, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.7187826633453369, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 59.753785455065014, |
|
"learning_rate": 3.8516020436389945e-06, |
|
"logits": -0.560102105140686, |
|
"logps": -75.12157440185547, |
|
"loss": 1.0127, |
|
"objective": 1.0483639240264893, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 1.0483639240264893, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934853792190552, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 57.97084895323718, |
|
"learning_rate": 3.8096268975436045e-06, |
|
"logits": -0.5952755212783813, |
|
"logps": -74.10826873779297, |
|
"loss": 1.022, |
|
"objective": 1.0359365940093994, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.0359365940093994, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.6595560908317566, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 61.69795496268391, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.562609851360321, |
|
"logps": -75.11538696289062, |
|
"loss": 0.9795, |
|
"objective": 0.9731999635696411, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.9731999635696411, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 1.2865926027297974, |
|
"eval_logits": -0.6907580494880676, |
|
"eval_logps": -78.29139709472656, |
|
"eval_loss": 2.2000670433044434, |
|
"eval_objective": 2.1849544048309326, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 2.1849544048309326, |
|
"eval_runtime": 259.7345, |
|
"eval_samples_per_second": 22.292, |
|
"eval_steps_per_second": 0.932, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.7099167108535767, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 61.48654412749636, |
|
"learning_rate": 3.724147907764478e-06, |
|
"logits": -0.5564183592796326, |
|
"logps": -74.59834289550781, |
|
"loss": 0.9585, |
|
"objective": 1.041745901107788, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1.041745901107788, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.7087345123291016, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 61.77503577871403, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits": -0.5873940587043762, |
|
"logps": -73.76538848876953, |
|
"loss": 0.9533, |
|
"objective": 0.9487842917442322, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.9487842917442322, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.6601606607437134, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 62.194779314868725, |
|
"learning_rate": 3.6367430508080283e-06, |
|
"logits": -0.6485423445701599, |
|
"logps": -74.84728240966797, |
|
"loss": 0.9459, |
|
"objective": 0.9501416683197021, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.9501416683197021, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.6689183712005615, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 62.24375049929638, |
|
"learning_rate": 3.5923612809233987e-06, |
|
"logits": -0.6007247567176819, |
|
"logps": -72.23168182373047, |
|
"loss": 0.9236, |
|
"objective": 0.9433914422988892, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.9433914422988892, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.6166276335716248, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 55.9855513993403, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.6374623775482178, |
|
"logps": -73.1789321899414, |
|
"loss": 0.9218, |
|
"objective": 0.9237673878669739, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.9237673878669739, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.7234205007553101, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 60.877496205710976, |
|
"learning_rate": 3.5023263385165346e-06, |
|
"logits": -0.5776531100273132, |
|
"logps": -72.65286254882812, |
|
"loss": 0.8853, |
|
"objective": 0.9437097311019897, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.9437097311019897, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.706203818321228, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 57.432586251491834, |
|
"learning_rate": 3.4567085809127247e-06, |
|
"logits": -0.5748823881149292, |
|
"logps": -75.21902465820312, |
|
"loss": 0.8699, |
|
"objective": 0.9273825287818909, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.9273825287818909, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.6570317149162292, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 59.58556599473961, |
|
"learning_rate": 3.410714505454486e-06, |
|
"logits": -0.5105537176132202, |
|
"logps": -73.1999282836914, |
|
"loss": 0.8797, |
|
"objective": 0.7775837779045105, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.7775837779045105, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.6566095352172852, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 61.70716494378518, |
|
"learning_rate": 3.364362203744777e-06, |
|
"logits": -0.48478570580482483, |
|
"logps": -75.13355255126953, |
|
"loss": 0.9257, |
|
"objective": 0.9057443737983704, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.9057443737983704, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.6266034245491028, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 61.76261428542362, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.5294731855392456, |
|
"logps": -76.35556030273438, |
|
"loss": 0.8853, |
|
"objective": 0.8211384415626526, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.8211384415626526, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 1.3223490715026855, |
|
"eval_logits": -0.6215647459030151, |
|
"eval_logps": -78.5732421875, |
|
"eval_loss": 2.267859697341919, |
|
"eval_objective": 2.261888027191162, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2.261888027191162, |
|
"eval_runtime": 258.7107, |
|
"eval_samples_per_second": 22.38, |
|
"eval_steps_per_second": 0.935, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.7414664626121521, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 59.90696960742683, |
|
"learning_rate": 3.2706559853460818e-06, |
|
"logits": -0.6069660186767578, |
|
"logps": -74.20203399658203, |
|
"loss": 0.8443, |
|
"objective": 0.830698549747467, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.830698549747467, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.6774530410766602, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 55.80310964403766, |
|
"learning_rate": 3.2233389276586325e-06, |
|
"logits": -0.524910032749176, |
|
"logps": -73.77034759521484, |
|
"loss": 0.8216, |
|
"objective": 0.7954517006874084, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.7954517006874084, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917945742607117, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 58.97332543380404, |
|
"learning_rate": 3.1757373472244324e-06, |
|
"logits": -0.5027827024459839, |
|
"logps": -74.41297912597656, |
|
"loss": 0.8371, |
|
"objective": 0.8500952124595642, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.8500952124595642, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.7004589438438416, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 58.55747179048871, |
|
"learning_rate": 3.127869967952698e-06, |
|
"logits": -0.42817217111587524, |
|
"logps": -76.00125122070312, |
|
"loss": 0.8196, |
|
"objective": 0.8261451125144958, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.8261451125144958, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.6052082777023315, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 64.29650245686375, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.48321303725242615, |
|
"logps": -74.37902069091797, |
|
"loss": 0.8213, |
|
"objective": 0.7835978865623474, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.7835978865623474, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.6209239959716797, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 57.31240800051826, |
|
"learning_rate": 3.0314132238824416e-06, |
|
"logits": -0.5284911394119263, |
|
"logps": -74.74346160888672, |
|
"loss": 0.7639, |
|
"objective": 0.7543562054634094, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.7543562054634094, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.6278953552246094, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 55.78371923658181, |
|
"learning_rate": 2.9828617999947647e-06, |
|
"logits": -0.6039460301399231, |
|
"logps": -74.01110076904297, |
|
"loss": 0.7742, |
|
"objective": 0.7583914995193481, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.7583914995193481, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.6551163792610168, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 57.20639534632825, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits": -0.6006036996841431, |
|
"logps": -73.72498321533203, |
|
"loss": 0.8175, |
|
"objective": 0.8128156661987305, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.8128156661987305, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.6569511890411377, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 59.028297224317896, |
|
"learning_rate": 2.8852083286358647e-06, |
|
"logits": -0.6477051973342896, |
|
"logps": -71.30754852294922, |
|
"loss": 0.7389, |
|
"objective": 0.6897608637809753, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.6897608637809753, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.6735912561416626, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 62.76694554498361, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.6754371523857117, |
|
"logps": -73.46631622314453, |
|
"loss": 0.7605, |
|
"objective": 0.8445902466773987, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.8445902466773987, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 1.3571884632110596, |
|
"eval_logits": -0.6825547218322754, |
|
"eval_logps": -78.28398895263672, |
|
"eval_loss": 2.2654569149017334, |
|
"eval_objective": 2.2744035720825195, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 2.2744035720825195, |
|
"eval_runtime": 259.3083, |
|
"eval_samples_per_second": 22.329, |
|
"eval_steps_per_second": 0.933, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.6667983531951904, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 58.3447211977729, |
|
"learning_rate": 2.7869488356746344e-06, |
|
"logits": -0.6426534056663513, |
|
"logps": -74.59416961669922, |
|
"loss": 0.7591, |
|
"objective": 0.768614649772644, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.768614649772644, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5558815598487854, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 65.11260237104646, |
|
"learning_rate": 2.7376401082604563e-06, |
|
"logits": -0.6525983810424805, |
|
"logps": -74.7845230102539, |
|
"loss": 0.7638, |
|
"objective": 0.691643476486206, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.691643476486206, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.6411501169204712, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 62.60981601751369, |
|
"learning_rate": 2.6882379059705953e-06, |
|
"logits": -0.5221896767616272, |
|
"logps": -75.2436752319336, |
|
"loss": 0.7359, |
|
"objective": 0.7508728504180908, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.7508728504180908, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.6434755325317383, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 56.58366056844157, |
|
"learning_rate": 2.6387616609823506e-06, |
|
"logits": -0.5284293293952942, |
|
"logps": -74.4692153930664, |
|
"loss": 0.7269, |
|
"objective": 0.6894288659095764, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.6894288659095764, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.6124823093414307, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 63.898667131172786, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.5336421728134155, |
|
"logps": -74.76774597167969, |
|
"loss": 0.7216, |
|
"objective": 0.6816112995147705, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.6816112995147705, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.6480967998504639, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 70.14559307144371, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits": -0.6288893222808838, |
|
"logps": -73.35899353027344, |
|
"loss": 0.722, |
|
"objective": 0.7448421716690063, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.7448421716690063, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.6456955075263977, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 55.88623408443581, |
|
"learning_rate": 2.490083382528097e-06, |
|
"logits": -0.6045793294906616, |
|
"logps": -75.4445571899414, |
|
"loss": 0.7142, |
|
"objective": 0.7276442050933838, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.7276442050933838, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.600917637348175, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 59.05306899045766, |
|
"learning_rate": 2.440505756134732e-06, |
|
"logits": -0.5500164031982422, |
|
"logps": -74.36378479003906, |
|
"loss": 0.7053, |
|
"objective": 0.6555976867675781, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.6555976867675781, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.645307719707489, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 57.07656552315778, |
|
"learning_rate": 2.3909515315866606e-06, |
|
"logits": -0.5600085854530334, |
|
"logps": -73.18649291992188, |
|
"loss": 0.673, |
|
"objective": 0.7392704486846924, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.7392704486846924, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.5844512581825256, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 58.17740843884364, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.588214099407196, |
|
"logps": -73.16432189941406, |
|
"loss": 0.6709, |
|
"objective": 0.6501026153564453, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6501026153564453, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 1.3375275135040283, |
|
"eval_logits": -0.648563027381897, |
|
"eval_logps": -79.71849822998047, |
|
"eval_loss": 2.2687840461730957, |
|
"eval_objective": 2.2577877044677734, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5185950398445129, |
|
"eval_regularize": 2.2577877044677734, |
|
"eval_runtime": 258.6203, |
|
"eval_samples_per_second": 22.388, |
|
"eval_steps_per_second": 0.936, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.6095997095108032, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 61.323078307757875, |
|
"learning_rate": 2.2919912390530945e-06, |
|
"logits": -0.5695565938949585, |
|
"logps": -74.67700958251953, |
|
"loss": 0.6466, |
|
"objective": 0.6186994910240173, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6186994910240173, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.6002517938613892, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 57.75553868657935, |
|
"learning_rate": 2.242624096740164e-06, |
|
"logits": -0.5447255373001099, |
|
"logps": -75.35613250732422, |
|
"loss": 0.6493, |
|
"objective": 0.6379550099372864, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.6379550099372864, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.6183599233627319, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 56.91540287998159, |
|
"learning_rate": 2.193358192306384e-06, |
|
"logits": -0.5950519442558289, |
|
"logps": -75.38876342773438, |
|
"loss": 0.6549, |
|
"objective": 0.646551251411438, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.646551251411438, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5939019918441772, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 56.28970465624792, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits": -0.5761558413505554, |
|
"logps": -77.31433868408203, |
|
"loss": 0.6045, |
|
"objective": 0.5895420908927917, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.5895420908927917, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.6051114201545715, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 59.57121178469057, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.5915683507919312, |
|
"logps": -75.8340835571289, |
|
"loss": 0.611, |
|
"objective": 0.5858420133590698, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.5858420133590698, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5843603610992432, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 58.501519481588275, |
|
"learning_rate": 2.046361447106244e-06, |
|
"logits": -0.5682967901229858, |
|
"logps": -75.45525360107422, |
|
"loss": 0.6198, |
|
"objective": 0.6126269698143005, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.6126269698143005, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.5678777098655701, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 56.81311454823308, |
|
"learning_rate": 1.997693767401503e-06, |
|
"logits": -0.572821855545044, |
|
"logps": -76.72408294677734, |
|
"loss": 0.5527, |
|
"objective": 0.5598254799842834, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.5598254799842834, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.603760838508606, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 55.24119887208726, |
|
"learning_rate": 1.9492236680336486e-06, |
|
"logits": -0.6185352802276611, |
|
"logps": -75.44217681884766, |
|
"loss": 0.5398, |
|
"objective": 0.5248841047286987, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.5248841047286987, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.6126823425292969, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 59.23770495109222, |
|
"learning_rate": 1.9009702145406728e-06, |
|
"logits": -0.6088476181030273, |
|
"logps": -76.27928161621094, |
|
"loss": 0.5381, |
|
"objective": 0.521655797958374, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.521655797958374, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5726143717765808, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 56.22075610887121, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits": -0.5207330584526062, |
|
"logps": -77.1482162475586, |
|
"loss": 0.5302, |
|
"objective": 0.5327464938163757, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.5327464938163757, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 1.3210206031799316, |
|
"eval_logits": -0.6266895532608032, |
|
"eval_logps": -80.1419448852539, |
|
"eval_loss": 2.259814500808716, |
|
"eval_objective": 2.2429771423339844, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.51962810754776, |
|
"eval_regularize": 2.2429771423339844, |
|
"eval_runtime": 259.8661, |
|
"eval_samples_per_second": 22.281, |
|
"eval_steps_per_second": 0.931, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5455856919288635, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 58.00758823689939, |
|
"learning_rate": 1.8051890737811395e-06, |
|
"logits": -0.4721320569515228, |
|
"logps": -77.1658706665039, |
|
"loss": 0.5207, |
|
"objective": 0.5126071572303772, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.5126071572303772, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5464572906494141, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 55.95277148909765, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits": -0.49959325790405273, |
|
"logps": -74.85193634033203, |
|
"loss": 0.529, |
|
"objective": 0.4622223675251007, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4622223675251007, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.5949917435646057, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 59.42402517752121, |
|
"learning_rate": 1.7105010309624381e-06, |
|
"logits": -0.5134323835372925, |
|
"logps": -76.86905670166016, |
|
"loss": 0.5143, |
|
"objective": 0.49525225162506104, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.49525225162506104, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.5860133767127991, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 57.89045181223991, |
|
"learning_rate": 1.6636135468049122e-06, |
|
"logits": -0.5014190077781677, |
|
"logps": -76.04814147949219, |
|
"loss": 0.4959, |
|
"objective": 0.48642733693122864, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.48642733693122864, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5744526386260986, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 54.09506631184751, |
|
"learning_rate": 1.617055052228768e-06, |
|
"logits": -0.5578089356422424, |
|
"logps": -76.63516998291016, |
|
"loss": 0.5051, |
|
"objective": 0.4754410982131958, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.4754410982131958, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5690687298774719, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 57.935165692154584, |
|
"learning_rate": 1.5708438608491816e-06, |
|
"logits": -0.5680824518203735, |
|
"logps": -77.54747772216797, |
|
"loss": 0.5044, |
|
"objective": 0.47244158387184143, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.47244158387184143, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.633466362953186, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 61.57647838064842, |
|
"learning_rate": 1.524998149670871e-06, |
|
"logits": -0.5756654143333435, |
|
"logps": -78.29401397705078, |
|
"loss": 0.4977, |
|
"objective": 0.5307816863059998, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.5307816863059998, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.594211220741272, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 58.66653194688653, |
|
"learning_rate": 1.479535951938243e-06, |
|
"logits": -0.5960578918457031, |
|
"logps": -77.94918060302734, |
|
"loss": 0.4782, |
|
"objective": 0.46626031398773193, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.46626031398773193, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.5661875605583191, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 55.75147905971709, |
|
"learning_rate": 1.43447515004208e-06, |
|
"logits": -0.514352560043335, |
|
"logps": -77.09761047363281, |
|
"loss": 0.474, |
|
"objective": 0.5005635023117065, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.5005635023117065, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.5296740531921387, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 56.950549116726854, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits": -0.5301509499549866, |
|
"logps": -76.18799591064453, |
|
"loss": 0.4552, |
|
"objective": 0.4339993894100189, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4339993894100189, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 1.3297724723815918, |
|
"eval_logits": -0.6006779670715332, |
|
"eval_logps": -79.95822143554688, |
|
"eval_loss": 2.2547333240509033, |
|
"eval_objective": 2.237860679626465, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 2.237860679626465, |
|
"eval_runtime": 258.6303, |
|
"eval_samples_per_second": 22.387, |
|
"eval_steps_per_second": 0.936, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.606982409954071, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 55.47374374411107, |
|
"learning_rate": 1.3456284669124159e-06, |
|
"logits": -0.5279320478439331, |
|
"logps": -78.00320434570312, |
|
"loss": 0.4475, |
|
"objective": 0.46218839287757874, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.46218839287757874, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5908689498901367, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 60.157606585793644, |
|
"learning_rate": 1.301877533199859e-06, |
|
"logits": -0.5327341556549072, |
|
"logps": -76.2584457397461, |
|
"loss": 0.455, |
|
"objective": 0.4404201805591583, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.4404201805591583, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.5425374507904053, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 56.316614210866504, |
|
"learning_rate": 1.2585978766191726e-06, |
|
"logits": -0.5640905499458313, |
|
"logps": -76.85858917236328, |
|
"loss": 0.4328, |
|
"objective": 0.4495522081851959, |
|
"ranking_idealized": 0.4749999940395355, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.4495522081851959, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.5412243008613586, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 59.449782514540445, |
|
"learning_rate": 1.2158065210664848e-06, |
|
"logits": -0.5073726773262024, |
|
"logps": -75.8833999633789, |
|
"loss": 0.45, |
|
"objective": 0.40877532958984375, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.40877532958984375, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5705503225326538, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 60.75475260172388, |
|
"learning_rate": 1.1735202983664803e-06, |
|
"logits": -0.5309284925460815, |
|
"logps": -75.0905990600586, |
|
"loss": 0.4327, |
|
"objective": 0.4199952185153961, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.4199952185153961, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5579532384872437, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 59.775794629962114, |
|
"learning_rate": 1.1317558416516696e-06, |
|
"logits": -0.5706030130386353, |
|
"logps": -74.92536163330078, |
|
"loss": 0.4206, |
|
"objective": 0.3978765606880188, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.3978765606880188, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.5702130794525146, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 58.150159448234085, |
|
"learning_rate": 1.0905295788197993e-06, |
|
"logits": -0.5399581789970398, |
|
"logps": -75.56917572021484, |
|
"loss": 0.4106, |
|
"objective": 0.4598848223686218, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.4598848223686218, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5239140391349792, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 56.82700141030008, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits": -0.490993469953537, |
|
"logps": -76.47193145751953, |
|
"loss": 0.3946, |
|
"objective": 0.41905397176742554, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.41905397176742554, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.576458215713501, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 56.89880418064234, |
|
"learning_rate": 1.0097562815342215e-06, |
|
"logits": -0.5056424140930176, |
|
"logps": -75.477783203125, |
|
"loss": 0.3926, |
|
"objective": 0.38865312933921814, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.38865312933921814, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5464863181114197, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 56.60252182656229, |
|
"learning_rate": 9.702410189643838e-07, |
|
"logits": -0.5153852105140686, |
|
"logps": -76.41727447509766, |
|
"loss": 0.3981, |
|
"objective": 0.380422979593277, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.380422979593277, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 1.3237992525100708, |
|
"eval_logits": -0.5994979739189148, |
|
"eval_logps": -80.18803405761719, |
|
"eval_loss": 2.254920482635498, |
|
"eval_objective": 2.239741802215576, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5154958963394165, |
|
"eval_regularize": 2.239741802215576, |
|
"eval_runtime": 259.1878, |
|
"eval_samples_per_second": 22.339, |
|
"eval_steps_per_second": 0.934, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5698133707046509, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 60.818049397288426, |
|
"learning_rate": 9.313274815478698e-07, |
|
"logits": -0.5156189799308777, |
|
"logps": -75.30026245117188, |
|
"loss": 0.3817, |
|
"objective": 0.3786256015300751, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3786256015300751, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.5675181150436401, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 57.75499051269253, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits": -0.5635392069816589, |
|
"logps": -76.37955474853516, |
|
"loss": 0.3793, |
|
"objective": 0.37651321291923523, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.37651321291923523, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.5627972483634949, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 55.789081356284925, |
|
"learning_rate": 8.553665654635343e-07, |
|
"logits": -0.5413954854011536, |
|
"logps": -76.61347198486328, |
|
"loss": 0.358, |
|
"objective": 0.36223313212394714, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.36223313212394714, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.565936267375946, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 56.9797576995613, |
|
"learning_rate": 8.183490657468687e-07, |
|
"logits": -0.5987848043441772, |
|
"logps": -76.39659881591797, |
|
"loss": 0.3404, |
|
"objective": 0.33699342608451843, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.33699342608451843, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.5550724267959595, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 55.12534400843312, |
|
"learning_rate": 7.819930373330669e-07, |
|
"logits": -0.583532989025116, |
|
"logps": -75.4254150390625, |
|
"loss": 0.337, |
|
"objective": 0.33032530546188354, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.33032530546188354, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.5683528780937195, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 58.24346477498415, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits": -0.5395128130912781, |
|
"logps": -75.92244720458984, |
|
"loss": 0.3391, |
|
"objective": 0.3696479797363281, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.3696479797363281, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5345361232757568, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 60.795090855194246, |
|
"learning_rate": 7.113223306499336e-07, |
|
"logits": -0.5642789006233215, |
|
"logps": -75.37647247314453, |
|
"loss": 0.3278, |
|
"objective": 0.30459117889404297, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.30459117889404297, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5224164128303528, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 53.66311212284037, |
|
"learning_rate": 6.770354504470575e-07, |
|
"logits": -0.6012124419212341, |
|
"logps": -75.34683227539062, |
|
"loss": 0.3242, |
|
"objective": 0.30577850341796875, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.30577850341796875, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5453590750694275, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 57.79953484046242, |
|
"learning_rate": 6.434656267456843e-07, |
|
"logits": -0.5451433658599854, |
|
"logps": -76.42092895507812, |
|
"loss": 0.3124, |
|
"objective": 0.324345201253891, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.324345201253891, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.561674952507019, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 54.53867904105972, |
|
"learning_rate": 6.106260641143547e-07, |
|
"logits": -0.5508003830909729, |
|
"logps": -76.36372375488281, |
|
"loss": 0.3178, |
|
"objective": 0.31011003255844116, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.31011003255844116, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 1.3332420587539673, |
|
"eval_logits": -0.62151700258255, |
|
"eval_logps": -80.45596313476562, |
|
"eval_loss": 2.2615702152252197, |
|
"eval_objective": 2.2539472579956055, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2.2539472579956055, |
|
"eval_runtime": 259.1185, |
|
"eval_samples_per_second": 22.345, |
|
"eval_steps_per_second": 0.934, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5321690440177917, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 59.452065695343926, |
|
"learning_rate": 5.785296798760601e-07, |
|
"logits": -0.5087547898292542, |
|
"logps": -75.18541717529297, |
|
"loss": 0.3225, |
|
"objective": 0.30213478207588196, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.30213478207588196, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5166592001914978, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 65.96197683204998, |
|
"learning_rate": 5.471890990272666e-07, |
|
"logits": -0.5448976159095764, |
|
"logps": -76.6087875366211, |
|
"loss": 0.2527, |
|
"objective": 0.2436264157295227, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.2436264157295227, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.5354845523834229, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 56.07206249663241, |
|
"learning_rate": 5.166166492719124e-07, |
|
"logits": -0.5357650518417358, |
|
"logps": -75.84851837158203, |
|
"loss": 0.2618, |
|
"objective": 0.2531677484512329, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.2531677484512329, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.5235564112663269, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 57.7958952485207, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits": -0.49407169222831726, |
|
"logps": -78.26904296875, |
|
"loss": 0.2451, |
|
"objective": 0.25101524591445923, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.25101524591445923, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.5391948819160461, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 55.93504200270769, |
|
"learning_rate": 4.57823938419153e-07, |
|
"logits": -0.5219827890396118, |
|
"logps": -75.88143157958984, |
|
"loss": 0.2571, |
|
"objective": 0.2235218584537506, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.2235218584537506, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5403502583503723, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 56.99961878175316, |
|
"learning_rate": 4.2962680322157335e-07, |
|
"logits": -0.5911334753036499, |
|
"logps": -76.09339904785156, |
|
"loss": 0.2446, |
|
"objective": 0.230418860912323, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.230418860912323, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.5401233434677124, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 59.493122058302546, |
|
"learning_rate": 4.0224404182059443e-07, |
|
"logits": -0.5024449229240417, |
|
"logps": -77.3831558227539, |
|
"loss": 0.2499, |
|
"objective": 0.24507470428943634, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.24507470428943634, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.5141600966453552, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 56.30361364827208, |
|
"learning_rate": 3.756864251262143e-07, |
|
"logits": -0.5385380983352661, |
|
"logps": -76.72105407714844, |
|
"loss": 0.2374, |
|
"objective": 0.24047289788722992, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.24047289788722992, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.5445539951324463, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 56.95695963320432, |
|
"learning_rate": 3.499643994807486e-07, |
|
"logits": -0.5771783590316772, |
|
"logps": -74.20252990722656, |
|
"loss": 0.2476, |
|
"objective": 0.24541395902633667, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.24541395902633667, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.5280672907829285, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 54.37624429775478, |
|
"learning_rate": 3.250880825498026e-07, |
|
"logits": -0.609397828578949, |
|
"logps": -75.99716186523438, |
|
"loss": 0.2213, |
|
"objective": 0.2197273075580597, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.2197273075580597, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 1.3297063112258911, |
|
"eval_logits": -0.6153666973114014, |
|
"eval_logps": -80.15010833740234, |
|
"eval_loss": 2.26202392578125, |
|
"eval_objective": 2.2499067783355713, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2.2499067783355713, |
|
"eval_runtime": 258.6337, |
|
"eval_samples_per_second": 22.387, |
|
"eval_steps_per_second": 0.936, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5372040271759033, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 60.72195132083163, |
|
"learning_rate": 3.0106725934252095e-07, |
|
"logits": -0.5743114948272705, |
|
"logps": -75.755859375, |
|
"loss": 0.2404, |
|
"objective": 0.22062353789806366, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.22062353789806366, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.541923463344574, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 59.60019088112453, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits": -0.5603171586990356, |
|
"logps": -75.97834777832031, |
|
"loss": 0.2213, |
|
"objective": 0.20643459260463715, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.20643459260463715, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.519105851650238, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 57.13784414910619, |
|
"learning_rate": 2.5562954789221164e-07, |
|
"logits": -0.5705324411392212, |
|
"logps": -76.40815734863281, |
|
"loss": 0.2268, |
|
"objective": 0.23255951702594757, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.23255951702594757, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.5270203351974487, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 57.88827391411817, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"logits": -0.5453040599822998, |
|
"logps": -75.09925842285156, |
|
"loss": 0.2271, |
|
"objective": 0.1950923055410385, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.1950923055410385, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5309551954269409, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 53.8846273259703, |
|
"learning_rate": 2.137227491364016e-07, |
|
"logits": -0.5447086691856384, |
|
"logps": -76.1126480102539, |
|
"loss": 0.2246, |
|
"objective": 0.2088509202003479, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.2088509202003479, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5339339971542358, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 59.22579729731716, |
|
"learning_rate": 1.941142647385469e-07, |
|
"logits": -0.5564789175987244, |
|
"logps": -75.06421661376953, |
|
"loss": 0.2149, |
|
"objective": 0.21485432982444763, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.21485432982444763, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.5728757381439209, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 58.36366674226128, |
|
"learning_rate": 1.7541279214111277e-07, |
|
"logits": -0.5958257913589478, |
|
"logps": -75.01850891113281, |
|
"loss": 0.2181, |
|
"objective": 0.25161251425743103, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.25161251425743103, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.5186070203781128, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 54.916993664319754, |
|
"learning_rate": 1.5762568750059604e-07, |
|
"logits": -0.5728685259819031, |
|
"logps": -77.33419036865234, |
|
"loss": 0.2061, |
|
"objective": 0.18065284192562103, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.18065284192562103, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.5318642854690552, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 58.53760269495991, |
|
"learning_rate": 1.4075994731016895e-07, |
|
"logits": -0.49769601225852966, |
|
"logps": -77.13802337646484, |
|
"loss": 0.2015, |
|
"objective": 0.18465597927570343, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.18465597927570343, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.5291022658348083, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 56.73087705325052, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits": -0.47620663046836853, |
|
"logps": -76.15321350097656, |
|
"loss": 0.2032, |
|
"objective": 0.1981140822172165, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.1981140822172165, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 1.3294748067855835, |
|
"eval_logits": -0.6175000667572021, |
|
"eval_logps": -80.12406158447266, |
|
"eval_loss": 2.2583107948303223, |
|
"eval_objective": 2.2455341815948486, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2.2455341815948486, |
|
"eval_runtime": 259.1188, |
|
"eval_samples_per_second": 22.345, |
|
"eval_steps_per_second": 0.934, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5472068786621094, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 55.12256671902985, |
|
"learning_rate": 1.0981873156594381e-07, |
|
"logits": -0.5433961153030396, |
|
"logps": -75.67558288574219, |
|
"loss": 0.2028, |
|
"objective": 0.21665817499160767, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.21665817499160767, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5366577506065369, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 57.38834520577077, |
|
"learning_rate": 9.575542662726756e-08, |
|
"logits": -0.574076235294342, |
|
"logps": -74.78955078125, |
|
"loss": 0.1956, |
|
"objective": 0.22629684209823608, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.22629684209823608, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.5316675901412964, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 59.23667969355199, |
|
"learning_rate": 8.26378225816582e-08, |
|
"logits": -0.48441505432128906, |
|
"logps": -76.50004577636719, |
|
"loss": 0.1986, |
|
"objective": 0.195883110165596, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.195883110165596, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5366373658180237, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 55.28284867900087, |
|
"learning_rate": 7.047107919114588e-08, |
|
"logits": -0.5500171184539795, |
|
"logps": -75.87386322021484, |
|
"loss": 0.2028, |
|
"objective": 0.20712776482105255, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.20712776482105255, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.5389170050621033, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 57.21598882034636, |
|
"learning_rate": 5.92599822001666e-08, |
|
"logits": -0.5184782147407532, |
|
"logps": -74.3449478149414, |
|
"loss": 0.1976, |
|
"objective": 0.18227988481521606, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.18227988481521606, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5467627048492432, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 54.82857089312232, |
|
"learning_rate": 4.9008941453107527e-08, |
|
"logits": -0.5783690214157104, |
|
"logps": -76.44660186767578, |
|
"loss": 0.2072, |
|
"objective": 0.19597838819026947, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.19597838819026947, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.5296519994735718, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 54.828212438355145, |
|
"learning_rate": 3.972198915970976e-08, |
|
"logits": -0.5317445993423462, |
|
"logps": -75.7918701171875, |
|
"loss": 0.2013, |
|
"objective": 0.20263001322746277, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.20263001322746277, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5090625882148743, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 55.3362837507438, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits": -0.5363056063652039, |
|
"logps": -76.1655044555664, |
|
"loss": 0.1893, |
|
"objective": 0.19915518164634705, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.19915518164634705, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.5551865100860596, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 54.49437843853531, |
|
"learning_rate": 2.4054581232470785e-08, |
|
"logits": -0.541746199131012, |
|
"logps": -76.23973083496094, |
|
"loss": 0.1902, |
|
"objective": 0.20770463347434998, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.20770463347434998, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5436845421791077, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 57.49186751897613, |
|
"learning_rate": 1.768028831677926e-08, |
|
"logits": -0.5542425513267517, |
|
"logps": -74.9803237915039, |
|
"loss": 0.1935, |
|
"objective": 0.17371943593025208, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.17371943593025208, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 1.3283706903457642, |
|
"eval_logits": -0.6168581247329712, |
|
"eval_logps": -80.06609344482422, |
|
"eval_loss": 2.2561168670654297, |
|
"eval_objective": 2.242354154586792, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2.242354154586792, |
|
"eval_runtime": 259.267, |
|
"eval_samples_per_second": 22.332, |
|
"eval_steps_per_second": 0.933, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5587651133537292, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 57.56815145820478, |
|
"learning_rate": 1.2282406866966078e-08, |
|
"logits": -0.5165177583694458, |
|
"logps": -75.15120697021484, |
|
"loss": 0.1923, |
|
"objective": 0.2012535035610199, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.2012535035610199, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5044924020767212, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 57.45096886360892, |
|
"learning_rate": 7.863060120144316e-09, |
|
"logits": -0.508831799030304, |
|
"logps": -75.34513854980469, |
|
"loss": 0.1944, |
|
"objective": 0.2123396247625351, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.2123396247625351, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.52206951379776, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 58.02043264781572, |
|
"learning_rate": 4.423986410346526e-09, |
|
"logits": -0.5500971078872681, |
|
"logps": -74.25553131103516, |
|
"loss": 0.1747, |
|
"objective": 0.16172558069229126, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.16172558069229126, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5237457752227783, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 55.98430316965056, |
|
"learning_rate": 1.9665384847583622e-09, |
|
"logits": -0.5589514970779419, |
|
"logps": -75.981689453125, |
|
"loss": 0.1822, |
|
"objective": 0.18714649975299835, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.18714649975299835, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.5439503192901611, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 55.03768486905713, |
|
"learning_rate": 4.916829716183901e-10, |
|
"logits": -0.5287134051322937, |
|
"logps": -75.68090057373047, |
|
"loss": 0.1955, |
|
"objective": 0.20633025467395782, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.20633025467395782, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5364298224449158, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 56.14268444647409, |
|
"learning_rate": 0.0, |
|
"logits": -0.5877522826194763, |
|
"logps": -76.51420593261719, |
|
"loss": 0.1905, |
|
"objective": 0.20404843986034393, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.20404843986034393, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7624400413849137, |
|
"train_runtime": 35099.769, |
|
"train_samples_per_second": 7.237, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|