|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999080761654629, |
|
"eval_steps": 500, |
|
"global_step": 951, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005252790544977019, |
|
"grad_norm": 33.25, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"log_odds_chosen": -0.12333051860332489, |
|
"log_odds_ratio": -0.8621311187744141, |
|
"logits/chosen": -2.540858030319214, |
|
"logits/rejected": -2.1144332885742188, |
|
"logps/chosen": -1.1002752780914307, |
|
"logps/rejected": -1.0134268999099731, |
|
"loss": 2.3046, |
|
"nll_loss": 1.4424240589141846, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -1.1002752780914307, |
|
"rewards/margins": -0.08684836328029633, |
|
"rewards/rejected": -1.0134268999099731, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010505581089954037, |
|
"grad_norm": 30.125, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"log_odds_chosen": -0.1254591941833496, |
|
"log_odds_ratio": -0.8488509058952332, |
|
"logits/chosen": -2.521646022796631, |
|
"logits/rejected": -2.12934947013855, |
|
"logps/chosen": -1.0548789501190186, |
|
"logps/rejected": -0.9548781514167786, |
|
"loss": 2.2337, |
|
"nll_loss": 1.3848837614059448, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -1.0548789501190186, |
|
"rewards/margins": -0.10000075399875641, |
|
"rewards/rejected": -0.9548781514167786, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015758371634931056, |
|
"grad_norm": 28.25, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"log_odds_chosen": -0.092379130423069, |
|
"log_odds_ratio": -0.839794933795929, |
|
"logits/chosen": -2.496335744857788, |
|
"logits/rejected": -2.134352445602417, |
|
"logps/chosen": -1.0547659397125244, |
|
"logps/rejected": -0.9888293147087097, |
|
"loss": 2.2323, |
|
"nll_loss": 1.3924893140792847, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -1.0547659397125244, |
|
"rewards/margins": -0.06593648344278336, |
|
"rewards/rejected": -0.9888293147087097, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.021011162179908074, |
|
"grad_norm": 27.75, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"log_odds_chosen": -0.08341892063617706, |
|
"log_odds_ratio": -0.845537006855011, |
|
"logits/chosen": -2.502532720565796, |
|
"logits/rejected": -2.0534327030181885, |
|
"logps/chosen": -1.0713450908660889, |
|
"logps/rejected": -1.0228570699691772, |
|
"loss": 2.2615, |
|
"nll_loss": 1.415948748588562, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.0713450908660889, |
|
"rewards/margins": -0.04848797246813774, |
|
"rewards/rejected": -1.0228570699691772, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.026263952724885097, |
|
"grad_norm": 31.625, |
|
"learning_rate": 5.208333333333334e-07, |
|
"log_odds_chosen": -0.05041329935193062, |
|
"log_odds_ratio": -0.8150845766067505, |
|
"logits/chosen": -2.3506855964660645, |
|
"logits/rejected": -2.041471481323242, |
|
"logps/chosen": -1.0723893642425537, |
|
"logps/rejected": -1.0427805185317993, |
|
"loss": 2.2269, |
|
"nll_loss": 1.411767601966858, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -1.0723893642425537, |
|
"rewards/margins": -0.029608914628624916, |
|
"rewards/rejected": -1.0427805185317993, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03151674326986211, |
|
"grad_norm": 34.75, |
|
"learning_rate": 6.249999999999999e-07, |
|
"log_odds_chosen": -0.16907325387001038, |
|
"log_odds_ratio": -0.8892423510551453, |
|
"logits/chosen": -2.4877123832702637, |
|
"logits/rejected": -2.091643810272217, |
|
"logps/chosen": -1.0780900716781616, |
|
"logps/rejected": -0.960413932800293, |
|
"loss": 2.2862, |
|
"nll_loss": 1.3969789743423462, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -1.0780900716781616, |
|
"rewards/margins": -0.1176760345697403, |
|
"rewards/rejected": -0.960413932800293, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.036769533814839134, |
|
"grad_norm": 24.625, |
|
"learning_rate": 7.291666666666666e-07, |
|
"log_odds_chosen": -0.12296156585216522, |
|
"log_odds_ratio": -0.8445537686347961, |
|
"logits/chosen": -2.460153579711914, |
|
"logits/rejected": -2.100581169128418, |
|
"logps/chosen": -0.9918639063835144, |
|
"logps/rejected": -0.8978347778320312, |
|
"loss": 2.1014, |
|
"nll_loss": 1.256840467453003, |
|
"rewards/accuracies": 0.43437498807907104, |
|
"rewards/chosen": -0.9918639063835144, |
|
"rewards/margins": -0.09402903914451599, |
|
"rewards/rejected": -0.8978347778320312, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04202232435981615, |
|
"grad_norm": 20.375, |
|
"learning_rate": 8.333333333333333e-07, |
|
"log_odds_chosen": -0.10392768681049347, |
|
"log_odds_ratio": -0.8281729817390442, |
|
"logits/chosen": -2.4672484397888184, |
|
"logits/rejected": -2.1189260482788086, |
|
"logps/chosen": -0.9796692132949829, |
|
"logps/rejected": -0.8947553634643555, |
|
"loss": 2.0709, |
|
"nll_loss": 1.2427122592926025, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.9796692132949829, |
|
"rewards/margins": -0.08491390943527222, |
|
"rewards/rejected": -0.8947553634643555, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04727511490479317, |
|
"grad_norm": 25.75, |
|
"learning_rate": 9.374999999999999e-07, |
|
"log_odds_chosen": -0.07403279840946198, |
|
"log_odds_ratio": -0.8119841814041138, |
|
"logits/chosen": -2.5748581886291504, |
|
"logits/rejected": -2.2311367988586426, |
|
"logps/chosen": -0.9425970911979675, |
|
"logps/rejected": -0.8925843238830566, |
|
"loss": 1.966, |
|
"nll_loss": 1.1540277004241943, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.9425970911979675, |
|
"rewards/margins": -0.05001285672187805, |
|
"rewards/rejected": -0.8925843238830566, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05252790544977019, |
|
"grad_norm": 15.5625, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"log_odds_chosen": -0.015203160233795643, |
|
"log_odds_ratio": -0.7965196371078491, |
|
"logits/chosen": -2.517662763595581, |
|
"logits/rejected": -2.291977882385254, |
|
"logps/chosen": -1.0069010257720947, |
|
"logps/rejected": -0.9928563833236694, |
|
"loss": 2.001, |
|
"nll_loss": 1.2044353485107422, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -1.0069010257720947, |
|
"rewards/margins": -0.014044714160263538, |
|
"rewards/rejected": -0.9928563833236694, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05778069599474721, |
|
"grad_norm": 19.125, |
|
"learning_rate": 1.1458333333333333e-06, |
|
"log_odds_chosen": -0.06918958574533463, |
|
"log_odds_ratio": -0.8064200282096863, |
|
"logits/chosen": -2.7286930084228516, |
|
"logits/rejected": -2.3158278465270996, |
|
"logps/chosen": -0.9621369242668152, |
|
"logps/rejected": -0.9042080044746399, |
|
"loss": 1.9673, |
|
"nll_loss": 1.1608707904815674, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9621369242668152, |
|
"rewards/margins": -0.05792900174856186, |
|
"rewards/rejected": -0.9042080044746399, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06303348653972422, |
|
"grad_norm": 20.375, |
|
"learning_rate": 1.2499999999999999e-06, |
|
"log_odds_chosen": -0.055296190083026886, |
|
"log_odds_ratio": -0.795842170715332, |
|
"logits/chosen": -2.733304500579834, |
|
"logits/rejected": -2.257201671600342, |
|
"logps/chosen": -0.9258626699447632, |
|
"logps/rejected": -0.8971433639526367, |
|
"loss": 1.9557, |
|
"nll_loss": 1.159847378730774, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.9258626699447632, |
|
"rewards/margins": -0.028719374909996986, |
|
"rewards/rejected": -0.8971433639526367, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06828627708470125, |
|
"grad_norm": 18.25, |
|
"learning_rate": 1.3541666666666667e-06, |
|
"log_odds_chosen": -0.05717029422521591, |
|
"log_odds_ratio": -0.7737418413162231, |
|
"logits/chosen": -2.6654744148254395, |
|
"logits/rejected": -2.187049627304077, |
|
"logps/chosen": -0.8003360033035278, |
|
"logps/rejected": -0.7723677754402161, |
|
"loss": 1.8696, |
|
"nll_loss": 1.0958433151245117, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.8003360033035278, |
|
"rewards/margins": -0.02796824648976326, |
|
"rewards/rejected": -0.7723677754402161, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07353906762967827, |
|
"grad_norm": 19.375, |
|
"learning_rate": 1.4583333333333333e-06, |
|
"log_odds_chosen": 0.002531373407691717, |
|
"log_odds_ratio": -0.7339381575584412, |
|
"logits/chosen": -2.5733718872070312, |
|
"logits/rejected": -2.1028685569763184, |
|
"logps/chosen": -0.7143228054046631, |
|
"logps/rejected": -0.718761146068573, |
|
"loss": 1.7947, |
|
"nll_loss": 1.0607960224151611, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.7143228054046631, |
|
"rewards/margins": 0.004438319243490696, |
|
"rewards/rejected": -0.718761146068573, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07879185817465528, |
|
"grad_norm": 15.9375, |
|
"learning_rate": 1.5624999999999999e-06, |
|
"log_odds_chosen": 0.06011660769581795, |
|
"log_odds_ratio": -0.7009418606758118, |
|
"logits/chosen": -2.5496840476989746, |
|
"logits/rejected": -2.0580315589904785, |
|
"logps/chosen": -0.6317678689956665, |
|
"logps/rejected": -0.6753242611885071, |
|
"loss": 1.6452, |
|
"nll_loss": 0.9442570805549622, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -0.6317678689956665, |
|
"rewards/margins": 0.04355642572045326, |
|
"rewards/rejected": -0.6753242611885071, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0840446487196323, |
|
"grad_norm": 14.875, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"log_odds_chosen": 0.10804717242717743, |
|
"log_odds_ratio": -0.6780250072479248, |
|
"logits/chosen": -2.371317148208618, |
|
"logits/rejected": -1.9558740854263306, |
|
"logps/chosen": -0.5971282124519348, |
|
"logps/rejected": -0.6553691029548645, |
|
"loss": 1.6518, |
|
"nll_loss": 0.9737834930419922, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.5971282124519348, |
|
"rewards/margins": 0.05824087932705879, |
|
"rewards/rejected": -0.6553691029548645, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08929743926460933, |
|
"grad_norm": 15.375, |
|
"learning_rate": 1.7708333333333332e-06, |
|
"log_odds_chosen": 0.13051114976406097, |
|
"log_odds_ratio": -0.6608899235725403, |
|
"logits/chosen": -2.441239833831787, |
|
"logits/rejected": -2.080503225326538, |
|
"logps/chosen": -0.5396751165390015, |
|
"logps/rejected": -0.6057919263839722, |
|
"loss": 1.6033, |
|
"nll_loss": 0.9424022436141968, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5396751165390015, |
|
"rewards/margins": 0.06611678004264832, |
|
"rewards/rejected": -0.6057919263839722, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09455022980958634, |
|
"grad_norm": 15.5625, |
|
"learning_rate": 1.8749999999999998e-06, |
|
"log_odds_chosen": 0.19523096084594727, |
|
"log_odds_ratio": -0.6398605108261108, |
|
"logits/chosen": -2.388965606689453, |
|
"logits/rejected": -2.051954507827759, |
|
"logps/chosen": -0.514168381690979, |
|
"logps/rejected": -0.6006937623023987, |
|
"loss": 1.5701, |
|
"nll_loss": 0.9302393794059753, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.514168381690979, |
|
"rewards/margins": 0.08652535825967789, |
|
"rewards/rejected": -0.6006937623023987, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09980302035456336, |
|
"grad_norm": 10.625, |
|
"learning_rate": 1.9791666666666666e-06, |
|
"log_odds_chosen": 0.12450599670410156, |
|
"log_odds_ratio": -0.6654147505760193, |
|
"logits/chosen": -2.3805699348449707, |
|
"logits/rejected": -2.010688304901123, |
|
"logps/chosen": -0.49114733934402466, |
|
"logps/rejected": -0.5494757890701294, |
|
"loss": 1.5446, |
|
"nll_loss": 0.8791642189025879, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.49114733934402466, |
|
"rewards/margins": 0.05832843855023384, |
|
"rewards/rejected": -0.5494757890701294, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10505581089954039, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 1.9998919935516766e-06, |
|
"log_odds_chosen": 0.17239874601364136, |
|
"log_odds_ratio": -0.6507178544998169, |
|
"logits/chosen": -2.2754485607147217, |
|
"logits/rejected": -2.040553569793701, |
|
"logps/chosen": -0.485573947429657, |
|
"logps/rejected": -0.5674648284912109, |
|
"loss": 1.4726, |
|
"nll_loss": 0.8218661546707153, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.485573947429657, |
|
"rewards/margins": 0.08189092576503754, |
|
"rewards/rejected": -0.5674648284912109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1103086014445174, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 1.999453257340926e-06, |
|
"log_odds_chosen": 0.2180129736661911, |
|
"log_odds_ratio": -0.6303091049194336, |
|
"logits/chosen": -2.4427425861358643, |
|
"logits/rejected": -2.181597948074341, |
|
"logps/chosen": -0.4835621416568756, |
|
"logps/rejected": -0.5780085325241089, |
|
"loss": 1.4945, |
|
"nll_loss": 0.8642352223396301, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.4835621416568756, |
|
"rewards/margins": 0.09444637596607208, |
|
"rewards/rejected": -0.5780085325241089, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11556139198949442, |
|
"grad_norm": 8.0, |
|
"learning_rate": 1.998677188931617e-06, |
|
"log_odds_chosen": 0.27974802255630493, |
|
"log_odds_ratio": -0.6000305414199829, |
|
"logits/chosen": -2.4073500633239746, |
|
"logits/rejected": -2.158104419708252, |
|
"logps/chosen": -0.4692881107330322, |
|
"logps/rejected": -0.5915614366531372, |
|
"loss": 1.5236, |
|
"nll_loss": 0.9235590100288391, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4692881107330322, |
|
"rewards/margins": 0.12227334082126617, |
|
"rewards/rejected": -0.5915614366531372, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12081418253447143, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 1.997564050259824e-06, |
|
"log_odds_chosen": 0.28100112080574036, |
|
"log_odds_ratio": -0.601650595664978, |
|
"logits/chosen": -2.3918166160583496, |
|
"logits/rejected": -2.029897689819336, |
|
"logps/chosen": -0.4723443388938904, |
|
"logps/rejected": -0.5918693542480469, |
|
"loss": 1.5166, |
|
"nll_loss": 0.9149250984191895, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.4723443388938904, |
|
"rewards/margins": 0.11952495574951172, |
|
"rewards/rejected": -0.5918693542480469, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12606697307944845, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.996114217028476e-06, |
|
"log_odds_chosen": 0.25655943155288696, |
|
"log_odds_ratio": -0.6146520376205444, |
|
"logits/chosen": -2.470524311065674, |
|
"logits/rejected": -2.134540557861328, |
|
"logps/chosen": -0.477255642414093, |
|
"logps/rejected": -0.5925866961479187, |
|
"loss": 1.5111, |
|
"nll_loss": 0.8964211344718933, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.477255642414093, |
|
"rewards/margins": 0.11533106863498688, |
|
"rewards/rejected": -0.5925866961479187, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1313197636244255, |
|
"grad_norm": 8.75, |
|
"learning_rate": 1.994328178580548e-06, |
|
"log_odds_chosen": 0.2803216576576233, |
|
"log_odds_ratio": -0.601326584815979, |
|
"logits/chosen": -2.367903232574463, |
|
"logits/rejected": -2.018990993499756, |
|
"logps/chosen": -0.46639877557754517, |
|
"logps/rejected": -0.5851758718490601, |
|
"loss": 1.481, |
|
"nll_loss": 0.8796539306640625, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.46639877557754517, |
|
"rewards/margins": 0.11877720057964325, |
|
"rewards/rejected": -0.5851758718490601, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1365725541694025, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 1.9922065377339033e-06, |
|
"log_odds_chosen": 0.2894327640533447, |
|
"log_odds_ratio": -0.6087297201156616, |
|
"logits/chosen": -2.5040173530578613, |
|
"logits/rejected": -2.2061374187469482, |
|
"logps/chosen": -0.4694454073905945, |
|
"logps/rejected": -0.5906943678855896, |
|
"loss": 1.4968, |
|
"nll_loss": 0.888100266456604, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.4694454073905945, |
|
"rewards/margins": 0.12124893814325333, |
|
"rewards/rejected": -0.5906943678855896, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14182534471437952, |
|
"grad_norm": 7.375, |
|
"learning_rate": 1.98975001057783e-06, |
|
"log_odds_chosen": 0.30140143632888794, |
|
"log_odds_ratio": -0.5964145660400391, |
|
"logits/chosen": -2.4213032722473145, |
|
"logits/rejected": -2.004279375076294, |
|
"logps/chosen": -0.44823235273361206, |
|
"logps/rejected": -0.583377480506897, |
|
"loss": 1.4442, |
|
"nll_loss": 0.8478012084960938, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44823235273361206, |
|
"rewards/margins": 0.13514509797096252, |
|
"rewards/rejected": -0.583377480506897, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14707813525935653, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.986959426231349e-06, |
|
"log_odds_chosen": 0.33596453070640564, |
|
"log_odds_ratio": -0.5885840654373169, |
|
"logits/chosen": -2.471541166305542, |
|
"logits/rejected": -2.1307930946350098, |
|
"logps/chosen": -0.47856172919273376, |
|
"logps/rejected": -0.6193875074386597, |
|
"loss": 1.4974, |
|
"nll_loss": 0.9087700843811035, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.47856172919273376, |
|
"rewards/margins": 0.14082582294940948, |
|
"rewards/rejected": -0.6193875074386597, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15233092580433355, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 1.9838357265633724e-06, |
|
"log_odds_chosen": 0.35230931639671326, |
|
"log_odds_ratio": -0.5799855589866638, |
|
"logits/chosen": -2.4745469093322754, |
|
"logits/rejected": -2.0399346351623535, |
|
"logps/chosen": -0.45584583282470703, |
|
"logps/rejected": -0.6081861257553101, |
|
"loss": 1.4708, |
|
"nll_loss": 0.8907746076583862, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.45584583282470703, |
|
"rewards/margins": 0.15234029293060303, |
|
"rewards/rejected": -0.6081861257553101, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.15758371634931057, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 1.9803799658748095e-06, |
|
"log_odds_chosen": 0.32377585768699646, |
|
"log_odds_ratio": -0.5951502919197083, |
|
"logits/chosen": -2.3601431846618652, |
|
"logits/rejected": -2.0099222660064697, |
|
"logps/chosen": -0.46314555406570435, |
|
"logps/rejected": -0.6009119153022766, |
|
"loss": 1.4988, |
|
"nll_loss": 0.9036917686462402, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.46314555406570435, |
|
"rewards/margins": 0.13776634633541107, |
|
"rewards/rejected": -0.6009119153022766, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16283650689428758, |
|
"grad_norm": 9.75, |
|
"learning_rate": 1.9765933105427177e-06, |
|
"log_odds_chosen": 0.29054537415504456, |
|
"log_odds_ratio": -0.6080166101455688, |
|
"logits/chosen": -2.429213762283325, |
|
"logits/rejected": -2.1127424240112305, |
|
"logps/chosen": -0.48361191153526306, |
|
"logps/rejected": -0.6187745928764343, |
|
"loss": 1.5015, |
|
"nll_loss": 0.8934603929519653, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.48361191153526306, |
|
"rewards/margins": 0.13516271114349365, |
|
"rewards/rejected": -0.6187745928764343, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1680892974392646, |
|
"grad_norm": 14.0625, |
|
"learning_rate": 1.972477038626636e-06, |
|
"log_odds_chosen": 0.27817827463150024, |
|
"log_odds_ratio": -0.6112152338027954, |
|
"logits/chosen": -2.4246554374694824, |
|
"logits/rejected": -2.0224289894104004, |
|
"logps/chosen": -0.49589210748672485, |
|
"logps/rejected": -0.6248718500137329, |
|
"loss": 1.4978, |
|
"nll_loss": 0.886622428894043, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.49589210748672485, |
|
"rewards/margins": 0.12897971272468567, |
|
"rewards/rejected": -0.6248718500137329, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17334208798424164, |
|
"grad_norm": 10.875, |
|
"learning_rate": 1.9680325394372147e-06, |
|
"log_odds_chosen": 0.35008612275123596, |
|
"log_odds_ratio": -0.5786347389221191, |
|
"logits/chosen": -2.506772756576538, |
|
"logits/rejected": -2.057096004486084, |
|
"logps/chosen": -0.46079978346824646, |
|
"logps/rejected": -0.6112517714500427, |
|
"loss": 1.4896, |
|
"nll_loss": 0.9109176397323608, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.46079978346824646, |
|
"rewards/margins": 0.15045206248760223, |
|
"rewards/rejected": -0.6112517714500427, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.17859487852921865, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 1.9632613130673015e-06, |
|
"log_odds_chosen": 0.33634597063064575, |
|
"log_odds_ratio": -0.589142918586731, |
|
"logits/chosen": -2.467883348464966, |
|
"logits/rejected": -1.9834989309310913, |
|
"logps/chosen": -0.4864015579223633, |
|
"logps/rejected": -0.6304683089256287, |
|
"loss": 1.4988, |
|
"nll_loss": 0.9096533060073853, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.4864015579223633, |
|
"rewards/margins": 0.144066721200943, |
|
"rewards/rejected": -0.6304683089256287, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18384766907419567, |
|
"grad_norm": 26.75, |
|
"learning_rate": 1.9581649698856357e-06, |
|
"log_odds_chosen": 0.351374089717865, |
|
"log_odds_ratio": -0.5786073207855225, |
|
"logits/chosen": -2.3902525901794434, |
|
"logits/rejected": -2.0138325691223145, |
|
"logps/chosen": -0.45923271775245667, |
|
"logps/rejected": -0.6129686236381531, |
|
"loss": 1.477, |
|
"nll_loss": 0.8983781933784485, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.45923271775245667, |
|
"rewards/margins": 0.1537359207868576, |
|
"rewards/rejected": -0.6129686236381531, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.18910045961917268, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 1.952745229993319e-06, |
|
"log_odds_chosen": 0.3817608952522278, |
|
"log_odds_ratio": -0.5729137659072876, |
|
"logits/chosen": -2.52931547164917, |
|
"logits/rejected": -2.1916394233703613, |
|
"logps/chosen": -0.48729705810546875, |
|
"logps/rejected": -0.6591955423355103, |
|
"loss": 1.4891, |
|
"nll_loss": 0.9161707758903503, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.48729705810546875, |
|
"rewards/margins": 0.1718985140323639, |
|
"rewards/rejected": -0.6591955423355103, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1943532501641497, |
|
"grad_norm": 8.5, |
|
"learning_rate": 1.947003922643256e-06, |
|
"log_odds_chosen": 0.379459023475647, |
|
"log_odds_ratio": -0.5737109184265137, |
|
"logits/chosen": -2.282898426055908, |
|
"logits/rejected": -1.9805419445037842, |
|
"logps/chosen": -0.47503146529197693, |
|
"logps/rejected": -0.6488234400749207, |
|
"loss": 1.4454, |
|
"nll_loss": 0.8717378377914429, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.47503146529197693, |
|
"rewards/margins": 0.1737920045852661, |
|
"rewards/rejected": -0.6488234400749207, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.19960604070912671, |
|
"grad_norm": 10.125, |
|
"learning_rate": 1.9409429856227482e-06, |
|
"log_odds_chosen": 0.4121369421482086, |
|
"log_odds_ratio": -0.5561366081237793, |
|
"logits/chosen": -2.488356113433838, |
|
"logits/rejected": -2.0776686668395996, |
|
"logps/chosen": -0.4683772921562195, |
|
"logps/rejected": -0.647982656955719, |
|
"loss": 1.4436, |
|
"nll_loss": 0.8874515295028687, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4683772921562195, |
|
"rewards/margins": 0.17960533499717712, |
|
"rewards/rejected": -0.647982656955719, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20485883125410373, |
|
"grad_norm": 11.5, |
|
"learning_rate": 1.934564464599461e-06, |
|
"log_odds_chosen": 0.32919231057167053, |
|
"log_odds_ratio": -0.5908551812171936, |
|
"logits/chosen": -2.501392364501953, |
|
"logits/rejected": -2.0592591762542725, |
|
"logps/chosen": -0.49434512853622437, |
|
"logps/rejected": -0.6509113311767578, |
|
"loss": 1.4187, |
|
"nll_loss": 0.8278582692146301, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.49434512853622437, |
|
"rewards/margins": 0.15656621754169464, |
|
"rewards/rejected": -0.6509113311767578, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.21011162179908077, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 1.927870512430972e-06, |
|
"log_odds_chosen": 0.42371082305908203, |
|
"log_odds_ratio": -0.5525480508804321, |
|
"logits/chosen": -2.4069533348083496, |
|
"logits/rejected": -2.019406795501709, |
|
"logps/chosen": -0.4768436551094055, |
|
"logps/rejected": -0.6629732251167297, |
|
"loss": 1.4572, |
|
"nll_loss": 0.9046151041984558, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4768436551094055, |
|
"rewards/margins": 0.18612954020500183, |
|
"rewards/rejected": -0.6629732251167297, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2153644123440578, |
|
"grad_norm": 9.0, |
|
"learning_rate": 1.9208633884381526e-06, |
|
"log_odds_chosen": 0.42966872453689575, |
|
"log_odds_ratio": -0.5522044897079468, |
|
"logits/chosen": -2.430342197418213, |
|
"logits/rejected": -2.0743634700775146, |
|
"logps/chosen": -0.4722970426082611, |
|
"logps/rejected": -0.6621736884117126, |
|
"loss": 1.4295, |
|
"nll_loss": 0.8772872090339661, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4722970426082611, |
|
"rewards/margins": 0.18987664580345154, |
|
"rewards/rejected": -0.6621736884117126, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2206172028890348, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.9135454576426007e-06, |
|
"log_odds_chosen": 0.40302562713623047, |
|
"log_odds_ratio": -0.5604028105735779, |
|
"logits/chosen": -2.412562847137451, |
|
"logits/rejected": -2.0246427059173584, |
|
"logps/chosen": -0.4761424660682678, |
|
"logps/rejected": -0.661251425743103, |
|
"loss": 1.3993, |
|
"nll_loss": 0.8388580083847046, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4761424660682678, |
|
"rewards/margins": 0.18510892987251282, |
|
"rewards/rejected": -0.661251425743103, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22586999343401182, |
|
"grad_norm": 7.875, |
|
"learning_rate": 1.905919189968415e-06, |
|
"log_odds_chosen": 0.4606761932373047, |
|
"log_odds_ratio": -0.5445691347122192, |
|
"logits/chosen": -2.4419312477111816, |
|
"logits/rejected": -2.030771493911743, |
|
"logps/chosen": -0.4771277904510498, |
|
"logps/rejected": -0.6932464838027954, |
|
"loss": 1.4377, |
|
"nll_loss": 0.8931263089179993, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.4771277904510498, |
|
"rewards/margins": 0.21611860394477844, |
|
"rewards/rejected": -0.6932464838027954, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.23112278397898883, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 1.897987159408548e-06, |
|
"log_odds_chosen": 0.4278109073638916, |
|
"log_odds_ratio": -0.5563892722129822, |
|
"logits/chosen": -2.4070868492126465, |
|
"logits/rejected": -2.033133029937744, |
|
"logps/chosen": -0.4777792990207672, |
|
"logps/rejected": -0.6746242642402649, |
|
"loss": 1.3836, |
|
"nll_loss": 0.827177882194519, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4777792990207672, |
|
"rewards/margins": 0.19684496521949768, |
|
"rewards/rejected": -0.6746242642402649, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23637557452396585, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.8897520431560433e-06, |
|
"log_odds_chosen": 0.39412638545036316, |
|
"log_odds_ratio": -0.5616167187690735, |
|
"logits/chosen": -2.437281608581543, |
|
"logits/rejected": -2.0233240127563477, |
|
"logps/chosen": -0.49209141731262207, |
|
"logps/rejected": -0.670540988445282, |
|
"loss": 1.3984, |
|
"nll_loss": 0.8367835879325867, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.49209141731262207, |
|
"rewards/margins": 0.17844951152801514, |
|
"rewards/rejected": -0.670540988445282, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.24162836506894286, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.8812166207004366e-06, |
|
"log_odds_chosen": 0.45934948325157166, |
|
"log_odds_ratio": -0.5536540746688843, |
|
"logits/chosen": -2.4575705528259277, |
|
"logits/rejected": -2.0787205696105957, |
|
"logps/chosen": -0.4777277112007141, |
|
"logps/rejected": -0.6928449869155884, |
|
"loss": 1.3871, |
|
"nll_loss": 0.8334070444107056, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.4777277112007141, |
|
"rewards/margins": 0.21511724591255188, |
|
"rewards/rejected": -0.6928449869155884, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2468811556139199, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 1.8723837728896337e-06, |
|
"log_odds_chosen": 0.45329445600509644, |
|
"log_odds_ratio": -0.5616171360015869, |
|
"logits/chosen": -2.522167682647705, |
|
"logits/rejected": -2.1475300788879395, |
|
"logps/chosen": -0.4945332407951355, |
|
"logps/rejected": -0.7140644788742065, |
|
"loss": 1.4402, |
|
"nll_loss": 0.878614068031311, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.4945332407951355, |
|
"rewards/margins": 0.21953122317790985, |
|
"rewards/rejected": -0.7140644788742065, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2521339461588969, |
|
"grad_norm": 7.75, |
|
"learning_rate": 1.8632564809575738e-06, |
|
"log_odds_chosen": 0.4688095152378082, |
|
"log_odds_ratio": -0.5438790917396545, |
|
"logits/chosen": -2.512554168701172, |
|
"logits/rejected": -2.105734348297119, |
|
"logps/chosen": -0.48634210228919983, |
|
"logps/rejected": -0.7048304677009583, |
|
"loss": 1.4387, |
|
"nll_loss": 0.8948429226875305, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.48634210228919983, |
|
"rewards/margins": 0.21848826110363007, |
|
"rewards/rejected": -0.7048304677009583, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2573867367038739, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 1.8538378255180138e-06, |
|
"log_odds_chosen": 0.488097608089447, |
|
"log_odds_ratio": -0.5403500199317932, |
|
"logits/chosen": -2.3577160835266113, |
|
"logits/rejected": -2.0601189136505127, |
|
"logps/chosen": -0.5090717077255249, |
|
"logps/rejected": -0.7453780174255371, |
|
"loss": 1.4193, |
|
"nll_loss": 0.878923773765564, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5090717077255249, |
|
"rewards/margins": 0.23630623519420624, |
|
"rewards/rejected": -0.7453780174255371, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.262639527248851, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 1.8441309855247707e-06, |
|
"log_odds_chosen": 0.6032781004905701, |
|
"log_odds_ratio": -0.5000559091567993, |
|
"logits/chosen": -2.403979539871216, |
|
"logits/rejected": -2.1050338745117188, |
|
"logps/chosen": -0.5098007917404175, |
|
"logps/rejected": -0.8097056150436401, |
|
"loss": 1.4018, |
|
"nll_loss": 0.9017453193664551, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5098007917404175, |
|
"rewards/margins": 0.29990485310554504, |
|
"rewards/rejected": -0.8097056150436401, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.267892317793828, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 1.83413923719877e-06, |
|
"log_odds_chosen": 0.5410558581352234, |
|
"log_odds_ratio": -0.5238425135612488, |
|
"logits/chosen": -2.42203688621521, |
|
"logits/rejected": -2.095054864883423, |
|
"logps/chosen": -0.49079209566116333, |
|
"logps/rejected": -0.763100266456604, |
|
"loss": 1.3797, |
|
"nll_loss": 0.8558791875839233, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.49079209566116333, |
|
"rewards/margins": 0.2723081707954407, |
|
"rewards/rejected": -0.763100266456604, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.273145108338805, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 1.8238659529222668e-06, |
|
"log_odds_chosen": 0.5387502908706665, |
|
"log_odds_ratio": -0.5273549556732178, |
|
"logits/chosen": -2.458590269088745, |
|
"logits/rejected": -2.1467177867889404, |
|
"logps/chosen": -0.5123028755187988, |
|
"logps/rejected": -0.781539797782898, |
|
"loss": 1.4312, |
|
"nll_loss": 0.9038845300674438, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5123028755187988, |
|
"rewards/margins": 0.2692369818687439, |
|
"rewards/rejected": -0.781539797782898, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.278397898883782, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 1.8133146001006117e-06, |
|
"log_odds_chosen": 0.585041880607605, |
|
"log_odds_ratio": -0.5241442322731018, |
|
"logits/chosen": -2.434957504272461, |
|
"logits/rejected": -2.08172345161438, |
|
"logps/chosen": -0.5419186353683472, |
|
"logps/rejected": -0.8563257455825806, |
|
"loss": 1.4995, |
|
"nll_loss": 0.9753583669662476, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.5419186353683472, |
|
"rewards/margins": 0.314407080411911, |
|
"rewards/rejected": -0.8563257455825806, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.28365068942875904, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 1.8024887399919408e-06, |
|
"log_odds_chosen": 0.686429500579834, |
|
"log_odds_ratio": -0.49835652112960815, |
|
"logits/chosen": -2.493675947189331, |
|
"logits/rejected": -2.192899465560913, |
|
"logps/chosen": -0.533765435218811, |
|
"logps/rejected": -0.9061405062675476, |
|
"loss": 1.4053, |
|
"nll_loss": 0.9069935083389282, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.533765435218811, |
|
"rewards/margins": 0.3723750710487366, |
|
"rewards/rejected": -0.9061405062675476, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.28890347997373605, |
|
"grad_norm": 11.5, |
|
"learning_rate": 1.7913920265051946e-06, |
|
"log_odds_chosen": 0.7045778036117554, |
|
"log_odds_ratio": -0.49370041489601135, |
|
"logits/chosen": -2.4899590015411377, |
|
"logits/rejected": -2.1618402004241943, |
|
"logps/chosen": -0.5214771032333374, |
|
"logps/rejected": -0.9220815896987915, |
|
"loss": 1.4176, |
|
"nll_loss": 0.92388916015625, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.5214771032333374, |
|
"rewards/margins": 0.4006044268608093, |
|
"rewards/rejected": -0.9220815896987915, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.29415627051871307, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 1.780028204966859e-06, |
|
"log_odds_chosen": 0.6810405254364014, |
|
"log_odds_ratio": -0.4989449381828308, |
|
"logits/chosen": -2.3327696323394775, |
|
"logits/rejected": -2.0119078159332275, |
|
"logps/chosen": -0.5228633880615234, |
|
"logps/rejected": -0.8827990293502808, |
|
"loss": 1.405, |
|
"nll_loss": 0.9060786962509155, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5228633880615234, |
|
"rewards/margins": 0.3599356710910797, |
|
"rewards/rejected": -0.8827990293502808, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2994090610636901, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 1.768401110856859e-06, |
|
"log_odds_chosen": 0.7910138964653015, |
|
"log_odds_ratio": -0.47219276428222656, |
|
"logits/chosen": -2.465003252029419, |
|
"logits/rejected": -2.085939407348633, |
|
"logps/chosen": -0.5146728754043579, |
|
"logps/rejected": -0.9470351934432983, |
|
"loss": 1.3015, |
|
"nll_loss": 0.8292847871780396, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.5146728754043579, |
|
"rewards/margins": 0.43236231803894043, |
|
"rewards/rejected": -0.9470351934432983, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3046618516086671, |
|
"grad_norm": 13.6875, |
|
"learning_rate": 1.7565146685140167e-06, |
|
"log_odds_chosen": 0.771044135093689, |
|
"log_odds_ratio": -0.4853692948818207, |
|
"logits/chosen": -2.4471678733825684, |
|
"logits/rejected": -2.1012349128723145, |
|
"logps/chosen": -0.5462040901184082, |
|
"logps/rejected": -0.9886453747749329, |
|
"loss": 1.3976, |
|
"nll_loss": 0.9122269749641418, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5462040901184082, |
|
"rewards/margins": 0.4424411654472351, |
|
"rewards/rejected": -0.9886453747749329, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3099146421536441, |
|
"grad_norm": 12.0, |
|
"learning_rate": 1.7443728898115224e-06, |
|
"log_odds_chosen": 0.6316434144973755, |
|
"log_odds_ratio": -0.5107887983322144, |
|
"logits/chosen": -2.432225465774536, |
|
"logits/rejected": -2.0828986167907715, |
|
"logps/chosen": -0.5212147235870361, |
|
"logps/rejected": -0.8626314997673035, |
|
"loss": 1.3532, |
|
"nll_loss": 0.8424150347709656, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.5212147235870361, |
|
"rewards/margins": 0.34141671657562256, |
|
"rewards/rejected": -0.8626314997673035, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.31516743269862113, |
|
"grad_norm": 19.875, |
|
"learning_rate": 1.7319798728028616e-06, |
|
"log_odds_chosen": 0.8003711700439453, |
|
"log_odds_ratio": -0.4749313294887543, |
|
"logits/chosen": -2.4634110927581787, |
|
"logits/rejected": -2.111607313156128, |
|
"logps/chosen": -0.5615866780281067, |
|
"logps/rejected": -1.0098183155059814, |
|
"loss": 1.4088, |
|
"nll_loss": 0.9338866472244263, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5615866780281067, |
|
"rewards/margins": 0.44823163747787476, |
|
"rewards/rejected": -1.0098183155059814, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32042022324359815, |
|
"grad_norm": 17.25, |
|
"learning_rate": 1.719339800338651e-06, |
|
"log_odds_chosen": 0.8279815912246704, |
|
"log_odds_ratio": -0.4675443172454834, |
|
"logits/chosen": -2.5601465702056885, |
|
"logits/rejected": -2.2116811275482178, |
|
"logps/chosen": -0.5433454513549805, |
|
"logps/rejected": -1.0226011276245117, |
|
"loss": 1.3768, |
|
"nll_loss": 0.9092954397201538, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.5433454513549805, |
|
"rewards/margins": 0.479255735874176, |
|
"rewards/rejected": -1.0226011276245117, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.32567301378857516, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 1.7064569386548585e-06, |
|
"log_odds_chosen": 0.859075665473938, |
|
"log_odds_ratio": -0.4543831944465637, |
|
"logits/chosen": -2.531367301940918, |
|
"logits/rejected": -2.2318122386932373, |
|
"logps/chosen": -0.5256025195121765, |
|
"logps/rejected": -1.0284937620162964, |
|
"loss": 1.3533, |
|
"nll_loss": 0.8989534378051758, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.5256025195121765, |
|
"rewards/margins": 0.5028911828994751, |
|
"rewards/rejected": -1.0284937620162964, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3309258043335522, |
|
"grad_norm": 16.625, |
|
"learning_rate": 1.6933356359328754e-06, |
|
"log_odds_chosen": 0.7117995619773865, |
|
"log_odds_ratio": -0.4905334413051605, |
|
"logits/chosen": -2.5292108058929443, |
|
"logits/rejected": -2.1856768131256104, |
|
"logps/chosen": -0.5284509658813477, |
|
"logps/rejected": -0.915407657623291, |
|
"loss": 1.371, |
|
"nll_loss": 0.8804505467414856, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5284509658813477, |
|
"rewards/margins": 0.3869567811489105, |
|
"rewards/rejected": -0.915407657623291, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3361785948785292, |
|
"grad_norm": 17.875, |
|
"learning_rate": 1.679980320831934e-06, |
|
"log_odds_chosen": 0.7291110754013062, |
|
"log_odds_ratio": -0.4787971079349518, |
|
"logits/chosen": -2.4682400226593018, |
|
"logits/rejected": -2.2220332622528076, |
|
"logps/chosen": -0.5479062795639038, |
|
"logps/rejected": -0.9491809606552124, |
|
"loss": 1.3781, |
|
"nll_loss": 0.8992602229118347, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.5479062795639038, |
|
"rewards/margins": 0.40127477049827576, |
|
"rewards/rejected": -0.9491809606552124, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34143138542350626, |
|
"grad_norm": 32.0, |
|
"learning_rate": 1.6663955009943602e-06, |
|
"log_odds_chosen": 0.9077841639518738, |
|
"log_odds_ratio": -0.4515516757965088, |
|
"logits/chosen": -2.4324584007263184, |
|
"logits/rejected": -2.178394317626953, |
|
"logps/chosen": -0.5766757726669312, |
|
"logps/rejected": -1.1069071292877197, |
|
"loss": 1.374, |
|
"nll_loss": 0.9224408268928528, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.5766757726669312, |
|
"rewards/margins": 0.530231237411499, |
|
"rewards/rejected": -1.1069071292877197, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3466841759684833, |
|
"grad_norm": 23.0, |
|
"learning_rate": 1.6525857615241685e-06, |
|
"log_odds_chosen": 0.733812689781189, |
|
"log_odds_ratio": -0.4906436800956726, |
|
"logits/chosen": -2.523135185241699, |
|
"logits/rejected": -2.1835999488830566, |
|
"logps/chosen": -0.5466452836990356, |
|
"logps/rejected": -0.9662971496582031, |
|
"loss": 1.4195, |
|
"nll_loss": 0.9288629293441772, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.5466452836990356, |
|
"rewards/margins": 0.4196518361568451, |
|
"rewards/rejected": -0.9662971496582031, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3519369665134603, |
|
"grad_norm": 21.875, |
|
"learning_rate": 1.6385557634395136e-06, |
|
"log_odds_chosen": 0.7822979688644409, |
|
"log_odds_ratio": -0.47422999143600464, |
|
"logits/chosen": -2.4535679817199707, |
|
"logits/rejected": -2.2028393745422363, |
|
"logps/chosen": -0.5340802669525146, |
|
"logps/rejected": -0.9806568026542664, |
|
"loss": 1.3555, |
|
"nll_loss": 0.881304144859314, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5340802669525146, |
|
"rewards/margins": 0.44657665491104126, |
|
"rewards/rejected": -0.9806568026542664, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3571897570584373, |
|
"grad_norm": 21.5, |
|
"learning_rate": 1.624310242099518e-06, |
|
"log_odds_chosen": 0.7664231061935425, |
|
"log_odds_ratio": -0.48080235719680786, |
|
"logits/chosen": -2.453505039215088, |
|
"logits/rejected": -2.18292498588562, |
|
"logps/chosen": -0.5327800512313843, |
|
"logps/rejected": -0.9523041844367981, |
|
"loss": 1.4089, |
|
"nll_loss": 0.9281209111213684, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5327800512313843, |
|
"rewards/margins": 0.4195241332054138, |
|
"rewards/rejected": -0.9523041844367981, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3624425476034143, |
|
"grad_norm": 24.5, |
|
"learning_rate": 1.609854005606009e-06, |
|
"log_odds_chosen": 0.9470375776290894, |
|
"log_odds_ratio": -0.4272763729095459, |
|
"logits/chosen": -2.5423166751861572, |
|
"logits/rejected": -2.210846424102783, |
|
"logps/chosen": -0.5365777015686035, |
|
"logps/rejected": -1.076774001121521, |
|
"loss": 1.3329, |
|
"nll_loss": 0.9056490063667297, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.5365777015686035, |
|
"rewards/margins": 0.5401962995529175, |
|
"rewards/rejected": -1.076774001121521, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.36769533814839134, |
|
"grad_norm": 15.125, |
|
"learning_rate": 1.5951919331807048e-06, |
|
"log_odds_chosen": 0.9901137351989746, |
|
"log_odds_ratio": -0.43201208114624023, |
|
"logits/chosen": -2.3910915851593018, |
|
"logits/rejected": -2.085310935974121, |
|
"logps/chosen": -0.5491678714752197, |
|
"logps/rejected": -1.141390085220337, |
|
"loss": 1.3711, |
|
"nll_loss": 0.9390678405761719, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5491678714752197, |
|
"rewards/margins": 0.5922220945358276, |
|
"rewards/rejected": -1.141390085220337, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37294812869336835, |
|
"grad_norm": 19.25, |
|
"learning_rate": 1.5803289735183949e-06, |
|
"log_odds_chosen": 0.9613128900527954, |
|
"log_odds_ratio": -0.43703293800354004, |
|
"logits/chosen": -2.404744863510132, |
|
"logits/rejected": -2.0907814502716064, |
|
"logps/chosen": -0.5635210871696472, |
|
"logps/rejected": -1.1492810249328613, |
|
"loss": 1.3534, |
|
"nll_loss": 0.9164144396781921, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.5635210871696472, |
|
"rewards/margins": 0.5857599377632141, |
|
"rewards/rejected": -1.1492810249328613, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.37820091923834537, |
|
"grad_norm": 32.25, |
|
"learning_rate": 1.5652701431166717e-06, |
|
"log_odds_chosen": 0.9359542727470398, |
|
"log_odds_ratio": -0.4396037459373474, |
|
"logits/chosen": -2.4650635719299316, |
|
"logits/rejected": -2.122915267944336, |
|
"logps/chosen": -0.5267240405082703, |
|
"logps/rejected": -1.0681325197219849, |
|
"loss": 1.3381, |
|
"nll_loss": 0.8984518051147461, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.5267240405082703, |
|
"rewards/margins": 0.5414084792137146, |
|
"rewards/rejected": -1.0681325197219849, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3834537097833224, |
|
"grad_norm": 22.5, |
|
"learning_rate": 1.550020524582781e-06, |
|
"log_odds_chosen": 0.9607855677604675, |
|
"log_odds_ratio": -0.4296341836452484, |
|
"logits/chosen": -2.556321620941162, |
|
"logits/rejected": -2.233931064605713, |
|
"logps/chosen": -0.5581452250480652, |
|
"logps/rejected": -1.131134033203125, |
|
"loss": 1.2919, |
|
"nll_loss": 0.8622277975082397, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5581452250480652, |
|
"rewards/margins": 0.5729888677597046, |
|
"rewards/rejected": -1.131134033203125, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3887065003282994, |
|
"grad_norm": 20.375, |
|
"learning_rate": 1.5345852649181553e-06, |
|
"log_odds_chosen": 0.9939554333686829, |
|
"log_odds_ratio": -0.4331156313419342, |
|
"logits/chosen": -2.4889018535614014, |
|
"logits/rejected": -2.2245144844055176, |
|
"logps/chosen": -0.5625091791152954, |
|
"logps/rejected": -1.159073829650879, |
|
"loss": 1.3688, |
|
"nll_loss": 0.9356663823127747, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5625091791152954, |
|
"rewards/margins": 0.5965645909309387, |
|
"rewards/rejected": -1.159073829650879, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3939592908732764, |
|
"grad_norm": 23.625, |
|
"learning_rate": 1.5189695737812151e-06, |
|
"log_odds_chosen": 1.057094931602478, |
|
"log_odds_ratio": -0.4173505902290344, |
|
"logits/chosen": -2.63775634765625, |
|
"logits/rejected": -2.2736358642578125, |
|
"logps/chosen": -0.5382205843925476, |
|
"logps/rejected": -1.1550116539001465, |
|
"loss": 1.3662, |
|
"nll_loss": 0.9488565325737, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.5382205843925476, |
|
"rewards/margins": 0.6167910099029541, |
|
"rewards/rejected": -1.1550116539001465, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.39921208141825343, |
|
"grad_norm": 20.375, |
|
"learning_rate": 1.5031787217290216e-06, |
|
"log_odds_chosen": 1.2109272480010986, |
|
"log_odds_ratio": -0.40476536750793457, |
|
"logits/chosen": -2.441784143447876, |
|
"logits/rejected": -2.141080856323242, |
|
"logps/chosen": -0.5574549436569214, |
|
"logps/rejected": -1.3256219625473022, |
|
"loss": 1.3395, |
|
"nll_loss": 0.9347711801528931, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5574549436569214, |
|
"rewards/margins": 0.7681670188903809, |
|
"rewards/rejected": -1.3256219625473022, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40446487196323044, |
|
"grad_norm": 22.125, |
|
"learning_rate": 1.487218038438377e-06, |
|
"log_odds_chosen": 1.0492345094680786, |
|
"log_odds_ratio": -0.41920414566993713, |
|
"logits/chosen": -2.4877960681915283, |
|
"logits/rejected": -2.2220120429992676, |
|
"logps/chosen": -0.5476792454719543, |
|
"logps/rejected": -1.1791220903396606, |
|
"loss": 1.3255, |
|
"nll_loss": 0.9063073992729187, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -0.5476792454719543, |
|
"rewards/margins": 0.6314427256584167, |
|
"rewards/rejected": -1.1791220903396606, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.40971766250820746, |
|
"grad_norm": 52.5, |
|
"learning_rate": 1.4710929109069672e-06, |
|
"log_odds_chosen": 1.1698648929595947, |
|
"log_odds_ratio": -0.4003461003303528, |
|
"logits/chosen": -2.450030565261841, |
|
"logits/rejected": -2.1449716091156006, |
|
"logps/chosen": -0.5605112314224243, |
|
"logps/rejected": -1.2878248691558838, |
|
"loss": 1.3523, |
|
"nll_loss": 0.9519191980361938, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5605112314224243, |
|
"rewards/margins": 0.7273136377334595, |
|
"rewards/rejected": -1.2878248691558838, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41497045305318453, |
|
"grad_norm": 23.75, |
|
"learning_rate": 1.4548087816351614e-06, |
|
"log_odds_chosen": 1.1297777891159058, |
|
"log_odds_ratio": -0.41146859526634216, |
|
"logits/chosen": -2.514195442199707, |
|
"logits/rejected": -2.1877148151397705, |
|
"logps/chosen": -0.5281041860580444, |
|
"logps/rejected": -1.2085294723510742, |
|
"loss": 1.2817, |
|
"nll_loss": 0.8702155947685242, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5281041860580444, |
|
"rewards/margins": 0.6804252862930298, |
|
"rewards/rejected": -1.2085294723510742, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.42022324359816154, |
|
"grad_norm": 68.0, |
|
"learning_rate": 1.4383711467890773e-06, |
|
"log_odds_chosen": 1.1593742370605469, |
|
"log_odds_ratio": -0.4072793424129486, |
|
"logits/chosen": -2.410384178161621, |
|
"logits/rejected": -2.1880173683166504, |
|
"logps/chosen": -0.5577239990234375, |
|
"logps/rejected": -1.2925007343292236, |
|
"loss": 1.2823, |
|
"nll_loss": 0.8749955892562866, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.5577239990234375, |
|
"rewards/margins": 0.7347767353057861, |
|
"rewards/rejected": -1.2925007343292236, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42547603414313856, |
|
"grad_norm": 26.75, |
|
"learning_rate": 1.4217855543455323e-06, |
|
"log_odds_chosen": 1.0840833187103271, |
|
"log_odds_ratio": -0.4106718599796295, |
|
"logits/chosen": -2.384483575820923, |
|
"logits/rejected": -2.11120343208313, |
|
"logps/chosen": -0.5574430227279663, |
|
"logps/rejected": -1.2079960107803345, |
|
"loss": 1.3143, |
|
"nll_loss": 0.9036461710929871, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.5574430227279663, |
|
"rewards/margins": 0.6505529880523682, |
|
"rewards/rejected": -1.2079960107803345, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.4307288246881156, |
|
"grad_norm": 22.625, |
|
"learning_rate": 1.4050576022195082e-06, |
|
"log_odds_chosen": 0.8836471438407898, |
|
"log_odds_ratio": -0.4627167582511902, |
|
"logits/chosen": -2.4845831394195557, |
|
"logits/rejected": -2.3066840171813965, |
|
"logps/chosen": -0.5467715263366699, |
|
"logps/rejected": -1.0581128597259521, |
|
"loss": 1.3641, |
|
"nll_loss": 0.9014018774032593, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5467715263366699, |
|
"rewards/margins": 0.5113412141799927, |
|
"rewards/rejected": -1.0581128597259521, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4359816152330926, |
|
"grad_norm": 34.0, |
|
"learning_rate": 1.3881929363747626e-06, |
|
"log_odds_chosen": 1.0594258308410645, |
|
"log_odds_ratio": -0.4148578643798828, |
|
"logits/chosen": -2.3405816555023193, |
|
"logits/rejected": -2.115149974822998, |
|
"logps/chosen": -0.5290128588676453, |
|
"logps/rejected": -1.1592894792556763, |
|
"loss": 1.3394, |
|
"nll_loss": 0.9245734214782715, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.5290128588676453, |
|
"rewards/margins": 0.6302765607833862, |
|
"rewards/rejected": -1.1592894792556763, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4412344057780696, |
|
"grad_norm": 47.0, |
|
"learning_rate": 1.3711972489182206e-06, |
|
"log_odds_chosen": 1.4167802333831787, |
|
"log_odds_ratio": -0.3603227734565735, |
|
"logits/chosen": -2.4658875465393066, |
|
"logits/rejected": -2.18940806388855, |
|
"logps/chosen": -0.5862340331077576, |
|
"logps/rejected": -1.5004864931106567, |
|
"loss": 1.305, |
|
"nll_loss": 0.9447038769721985, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5862340331077576, |
|
"rewards/margins": 0.9142524003982544, |
|
"rewards/rejected": -1.5004864931106567, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4464871963230466, |
|
"grad_norm": 19.75, |
|
"learning_rate": 1.3540762761787936e-06, |
|
"log_odds_chosen": 1.2667293548583984, |
|
"log_odds_ratio": -0.3922019898891449, |
|
"logits/chosen": -2.449897289276123, |
|
"logits/rejected": -2.1496291160583496, |
|
"logps/chosen": -0.5754435658454895, |
|
"logps/rejected": -1.3866373300552368, |
|
"loss": 1.2676, |
|
"nll_loss": 0.8754428625106812, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5754435658454895, |
|
"rewards/margins": 0.8111938238143921, |
|
"rewards/rejected": -1.3866373300552368, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.45173998686802364, |
|
"grad_norm": 39.0, |
|
"learning_rate": 1.3368357967712725e-06, |
|
"log_odds_chosen": 1.156019687652588, |
|
"log_odds_ratio": -0.395340234041214, |
|
"logits/chosen": -2.553677797317505, |
|
"logits/rejected": -2.2673325538635254, |
|
"logps/chosen": -0.5371165871620178, |
|
"logps/rejected": -1.2541286945343018, |
|
"loss": 1.3117, |
|
"nll_loss": 0.9164005517959595, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5371165871620178, |
|
"rewards/margins": 0.7170120477676392, |
|
"rewards/rejected": -1.2541286945343018, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45699277741300065, |
|
"grad_norm": 44.25, |
|
"learning_rate": 1.3194816296459482e-06, |
|
"log_odds_chosen": 1.1215965747833252, |
|
"log_odds_ratio": -0.40178972482681274, |
|
"logits/chosen": -2.4841268062591553, |
|
"logits/rejected": -2.2464358806610107, |
|
"logps/chosen": -0.6227961182594299, |
|
"logps/rejected": -1.3194401264190674, |
|
"loss": 1.3687, |
|
"nll_loss": 0.9668703079223633, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.6227961182594299, |
|
"rewards/margins": 0.6966440081596375, |
|
"rewards/rejected": -1.3194401264190674, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.46224556795797767, |
|
"grad_norm": 30.625, |
|
"learning_rate": 1.302019632124619e-06, |
|
"log_odds_chosen": 1.4459072351455688, |
|
"log_odds_ratio": -0.3312341868877411, |
|
"logits/chosen": -2.497469902038574, |
|
"logits/rejected": -2.215177297592163, |
|
"logps/chosen": -0.5155361294746399, |
|
"logps/rejected": -1.4294028282165527, |
|
"loss": 1.227, |
|
"nll_loss": 0.8957819938659668, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.5155361294746399, |
|
"rewards/margins": 0.9138666391372681, |
|
"rewards/rejected": -1.4294028282165527, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4674983585029547, |
|
"grad_norm": 27.5, |
|
"learning_rate": 1.284455697923646e-06, |
|
"log_odds_chosen": 1.5342215299606323, |
|
"log_odds_ratio": -0.3261391222476959, |
|
"logits/chosen": -2.5461294651031494, |
|
"logits/rejected": -2.2099266052246094, |
|
"logps/chosen": -0.5843450427055359, |
|
"logps/rejected": -1.5760066509246826, |
|
"loss": 1.312, |
|
"nll_loss": 0.9858700037002563, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.5843450427055359, |
|
"rewards/margins": 0.991661548614502, |
|
"rewards/rejected": -1.5760066509246826, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4727511490479317, |
|
"grad_norm": 43.0, |
|
"learning_rate": 1.2667957551647261e-06, |
|
"log_odds_chosen": 1.2222964763641357, |
|
"log_odds_ratio": -0.3712048828601837, |
|
"logits/chosen": -2.5557785034179688, |
|
"logits/rejected": -2.261915922164917, |
|
"logps/chosen": -0.5360510945320129, |
|
"logps/rejected": -1.2696157693862915, |
|
"loss": 1.233, |
|
"nll_loss": 0.8618295788764954, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.5360510945320129, |
|
"rewards/margins": 0.7335647344589233, |
|
"rewards/rejected": -1.2696157693862915, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4780039395929087, |
|
"grad_norm": 50.5, |
|
"learning_rate": 1.24904576437405e-06, |
|
"log_odds_chosen": 1.1964861154556274, |
|
"log_odds_ratio": -0.380424439907074, |
|
"logits/chosen": -2.387500762939453, |
|
"logits/rejected": -2.2171878814697266, |
|
"logps/chosen": -0.5144879221916199, |
|
"logps/rejected": -1.2391068935394287, |
|
"loss": 1.182, |
|
"nll_loss": 0.801527202129364, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.5144879221916199, |
|
"rewards/margins": 0.7246190309524536, |
|
"rewards/rejected": -1.2391068935394287, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4832567301378857, |
|
"grad_norm": 34.5, |
|
"learning_rate": 1.2312117164705265e-06, |
|
"log_odds_chosen": 1.319461703300476, |
|
"log_odds_ratio": -0.37714654207229614, |
|
"logits/chosen": -2.5138354301452637, |
|
"logits/rejected": -2.2482171058654785, |
|
"logps/chosen": -0.5467159748077393, |
|
"logps/rejected": -1.3964442014694214, |
|
"loss": 1.2877, |
|
"nll_loss": 0.9105404019355774, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5467159748077393, |
|
"rewards/margins": 0.8497281074523926, |
|
"rewards/rejected": -1.3964442014694214, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4885095206828628, |
|
"grad_norm": 43.5, |
|
"learning_rate": 1.2132996307437468e-06, |
|
"log_odds_chosen": 1.3355519771575928, |
|
"log_odds_ratio": -0.3902519941329956, |
|
"logits/chosen": -2.482901096343994, |
|
"logits/rejected": -2.2286696434020996, |
|
"logps/chosen": -0.566125750541687, |
|
"logps/rejected": -1.4363183975219727, |
|
"loss": 1.3035, |
|
"nll_loss": 0.9132728576660156, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.566125750541687, |
|
"rewards/margins": 0.8701925277709961, |
|
"rewards/rejected": -1.4363183975219727, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4937623112278398, |
|
"grad_norm": 83.0, |
|
"learning_rate": 1.1953155528223725e-06, |
|
"log_odds_chosen": 1.1865278482437134, |
|
"log_odds_ratio": -0.392407089471817, |
|
"logits/chosen": -2.425886869430542, |
|
"logits/rejected": -2.155287265777588, |
|
"logps/chosen": -0.5029312968254089, |
|
"logps/rejected": -1.2368618249893188, |
|
"loss": 1.2357, |
|
"nll_loss": 0.8432880640029907, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5029312968254089, |
|
"rewards/margins": 0.7339304089546204, |
|
"rewards/rejected": -1.2368618249893188, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4990151017728168, |
|
"grad_norm": 40.5, |
|
"learning_rate": 1.1772655526336367e-06, |
|
"log_odds_chosen": 1.4356929063796997, |
|
"log_odds_ratio": -0.3839671313762665, |
|
"logits/chosen": -2.398430585861206, |
|
"logits/rejected": -2.104560136795044, |
|
"logps/chosen": -0.5578696131706238, |
|
"logps/rejected": -1.5088526010513306, |
|
"loss": 1.2412, |
|
"nll_loss": 0.8572656512260437, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5578696131706238, |
|
"rewards/margins": 0.9509830474853516, |
|
"rewards/rejected": -1.5088526010513306, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5042678923177938, |
|
"grad_norm": 28.25, |
|
"learning_rate": 1.1591557223546393e-06, |
|
"log_odds_chosen": 1.148279070854187, |
|
"log_odds_ratio": -0.3996050953865051, |
|
"logits/chosen": -2.365521192550659, |
|
"logits/rejected": -2.152665615081787, |
|
"logps/chosen": -0.566467821598053, |
|
"logps/rejected": -1.2856696844100952, |
|
"loss": 1.3237, |
|
"nll_loss": 0.9241225123405457, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.566467821598053, |
|
"rewards/margins": 0.719201922416687, |
|
"rewards/rejected": -1.2856696844100952, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5095206828627709, |
|
"grad_norm": 36.25, |
|
"learning_rate": 1.1409921743561381e-06, |
|
"log_odds_chosen": 1.1759016513824463, |
|
"log_odds_ratio": -0.41472458839416504, |
|
"logits/chosen": -2.404526472091675, |
|
"logits/rejected": -2.2163596153259277, |
|
"logps/chosen": -0.5324310064315796, |
|
"logps/rejected": -1.2714060544967651, |
|
"loss": 1.293, |
|
"nll_loss": 0.8782441020011902, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5324310064315796, |
|
"rewards/margins": 0.7389749884605408, |
|
"rewards/rejected": -1.2714060544967651, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5147734734077478, |
|
"grad_norm": 223.0, |
|
"learning_rate": 1.1227810391395199e-06, |
|
"log_odds_chosen": 1.385846734046936, |
|
"log_odds_ratio": -0.3814238905906677, |
|
"logits/chosen": -2.4934306144714355, |
|
"logits/rejected": -2.2085797786712646, |
|
"logps/chosen": -0.5657092928886414, |
|
"logps/rejected": -1.4650784730911255, |
|
"loss": 1.2852, |
|
"nll_loss": 0.9037421345710754, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.5657092928886414, |
|
"rewards/margins": 0.8993691205978394, |
|
"rewards/rejected": -1.4650784730911255, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5200262639527249, |
|
"grad_norm": 27.625, |
|
"learning_rate": 1.1045284632676535e-06, |
|
"log_odds_chosen": 1.637117624282837, |
|
"log_odds_ratio": -0.36074963212013245, |
|
"logits/chosen": -2.505157947540283, |
|
"logits/rejected": -2.18147611618042, |
|
"logps/chosen": -0.5794259905815125, |
|
"logps/rejected": -1.7134405374526978, |
|
"loss": 1.2555, |
|
"nll_loss": 0.8947887420654297, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5794259905815125, |
|
"rewards/margins": 1.1340144872665405, |
|
"rewards/rejected": -1.7134405374526978, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.525279054497702, |
|
"grad_norm": 25.375, |
|
"learning_rate": 1.0862406072903223e-06, |
|
"log_odds_chosen": 1.4640438556671143, |
|
"log_odds_ratio": -0.36846035718917847, |
|
"logits/chosen": -2.5681748390197754, |
|
"logits/rejected": -2.232964038848877, |
|
"logps/chosen": -0.5701361298561096, |
|
"logps/rejected": -1.5233440399169922, |
|
"loss": 1.2435, |
|
"nll_loss": 0.8750120997428894, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5701361298561096, |
|
"rewards/margins": 0.9532078504562378, |
|
"rewards/rejected": -1.5233440399169922, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5305318450426789, |
|
"grad_norm": 23.75, |
|
"learning_rate": 1.067923643664936e-06, |
|
"log_odds_chosen": 1.4654853343963623, |
|
"log_odds_ratio": -0.35504215955734253, |
|
"logits/chosen": -2.502295970916748, |
|
"logits/rejected": -2.181178569793701, |
|
"logps/chosen": -0.5419307947158813, |
|
"logps/rejected": -1.5056110620498657, |
|
"loss": 1.2431, |
|
"nll_loss": 0.8880621790885925, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.5419307947158813, |
|
"rewards/margins": 0.9636803865432739, |
|
"rewards/rejected": -1.5056110620498657, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.535784635587656, |
|
"grad_norm": 35.0, |
|
"learning_rate": 1.0495837546732222e-06, |
|
"log_odds_chosen": 1.5194576978683472, |
|
"log_odds_ratio": -0.37253108620643616, |
|
"logits/chosen": -2.413229465484619, |
|
"logits/rejected": -2.184525728225708, |
|
"logps/chosen": -0.5820909738540649, |
|
"logps/rejected": -1.6039245128631592, |
|
"loss": 1.3383, |
|
"nll_loss": 0.9657222032546997, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5820909738540649, |
|
"rewards/margins": 1.0218335390090942, |
|
"rewards/rejected": -1.6039245128631592, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.541037426132633, |
|
"grad_norm": 32.25, |
|
"learning_rate": 1.0312271303346038e-06, |
|
"log_odds_chosen": 1.314542531967163, |
|
"log_odds_ratio": -0.396615594625473, |
|
"logits/chosen": -2.545009136199951, |
|
"logits/rejected": -2.301347017288208, |
|
"logps/chosen": -0.562983512878418, |
|
"logps/rejected": -1.4147989749908447, |
|
"loss": 1.3396, |
|
"nll_loss": 0.9429594278335571, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.562983512878418, |
|
"rewards/margins": 0.851815402507782, |
|
"rewards/rejected": -1.4147989749908447, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.54629021667761, |
|
"grad_norm": 47.75, |
|
"learning_rate": 1.0128599663169628e-06, |
|
"log_odds_chosen": 1.084162950515747, |
|
"log_odds_ratio": -0.4125159680843353, |
|
"logits/chosen": -2.4878952503204346, |
|
"logits/rejected": -2.245314359664917, |
|
"logps/chosen": -0.5130459666252136, |
|
"logps/rejected": -1.1407145261764526, |
|
"loss": 1.3142, |
|
"nll_loss": 0.901726245880127, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.5130459666252136, |
|
"rewards/margins": 0.6276686191558838, |
|
"rewards/rejected": -1.1407145261764526, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.551543007222587, |
|
"grad_norm": 74.0, |
|
"learning_rate": 9.944884618454995e-07, |
|
"log_odds_chosen": 1.5892114639282227, |
|
"log_odds_ratio": -0.3318895697593689, |
|
"logits/chosen": -2.5057709217071533, |
|
"logits/rejected": -2.110414505004883, |
|
"logps/chosen": -0.5387485027313232, |
|
"logps/rejected": -1.5842351913452148, |
|
"loss": 1.2507, |
|
"nll_loss": 0.9187744855880737, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.5387485027313232, |
|
"rewards/margins": 1.0454866886138916, |
|
"rewards/rejected": -1.5842351913452148, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.556795797767564, |
|
"grad_norm": 73.0, |
|
"learning_rate": 9.7611881761039e-07, |
|
"log_odds_chosen": 1.6785354614257812, |
|
"log_odds_ratio": -0.3325541019439697, |
|
"logits/chosen": -2.462970733642578, |
|
"logits/rejected": -2.220999240875244, |
|
"logps/chosen": -0.6112784147262573, |
|
"logps/rejected": -1.7486165761947632, |
|
"loss": 1.3345, |
|
"nll_loss": 1.0019125938415527, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.6112784147262573, |
|
"rewards/margins": 1.1373381614685059, |
|
"rewards/rejected": -1.7486165761947632, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.562048588312541, |
|
"grad_norm": 36.0, |
|
"learning_rate": 9.57757233673949e-07, |
|
"log_odds_chosen": 1.4563804864883423, |
|
"log_odds_ratio": -0.36100301146507263, |
|
"logits/chosen": -2.4625449180603027, |
|
"logits/rejected": -2.1974194049835205, |
|
"logps/chosen": -0.5516290664672852, |
|
"logps/rejected": -1.515852928161621, |
|
"loss": 1.2346, |
|
"nll_loss": 0.8735913038253784, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5516290664672852, |
|
"rewards/margins": 0.9642238616943359, |
|
"rewards/rejected": -1.515852928161621, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5673013788575181, |
|
"grad_norm": 36.0, |
|
"learning_rate": 9.394099073780066e-07, |
|
"log_odds_chosen": 1.4258034229278564, |
|
"log_odds_ratio": -0.36102384328842163, |
|
"logits/chosen": -2.5518240928649902, |
|
"logits/rejected": -2.2731943130493164, |
|
"logps/chosen": -0.5590797662734985, |
|
"logps/rejected": -1.4916408061981201, |
|
"loss": 1.2649, |
|
"nll_loss": 0.9038828015327454, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.5590797662734985, |
|
"rewards/margins": 0.9325610399246216, |
|
"rewards/rejected": -1.4916408061981201, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.572554169402495, |
|
"grad_norm": 64.0, |
|
"learning_rate": 9.210830312521991e-07, |
|
"log_odds_chosen": 1.605653166770935, |
|
"log_odds_ratio": -0.338408887386322, |
|
"logits/chosen": -2.5818705558776855, |
|
"logits/rejected": -2.311086416244507, |
|
"logps/chosen": -0.5466338992118835, |
|
"logps/rejected": -1.6157076358795166, |
|
"loss": 1.3041, |
|
"nll_loss": 0.9657169580459595, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5466338992118835, |
|
"rewards/margins": 1.0690736770629883, |
|
"rewards/rejected": -1.6157076358795166, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5778069599474721, |
|
"grad_norm": 57.25, |
|
"learning_rate": 9.027827909238901e-07, |
|
"log_odds_chosen": 1.8266319036483765, |
|
"log_odds_ratio": -0.3148033320903778, |
|
"logits/chosen": -2.48435115814209, |
|
"logits/rejected": -2.166586399078369, |
|
"logps/chosen": -0.5606757402420044, |
|
"logps/rejected": -1.8262403011322021, |
|
"loss": 1.2896, |
|
"nll_loss": 0.97479248046875, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.5606757402420044, |
|
"rewards/margins": 1.2655649185180664, |
|
"rewards/rejected": -1.8262403011322021, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5830597504924491, |
|
"grad_norm": 47.5, |
|
"learning_rate": 8.845153630304139e-07, |
|
"log_odds_chosen": 1.663627028465271, |
|
"log_odds_ratio": -0.3311775028705597, |
|
"logits/chosen": -2.4467196464538574, |
|
"logits/rejected": -2.2170791625976562, |
|
"logps/chosen": -0.5954256057739258, |
|
"logps/rejected": -1.7486213445663452, |
|
"loss": 1.2756, |
|
"nll_loss": 0.9444006085395813, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.5954256057739258, |
|
"rewards/margins": 1.1531956195831299, |
|
"rewards/rejected": -1.7486213445663452, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5883125410374261, |
|
"grad_norm": 45.5, |
|
"learning_rate": 8.662869131343606e-07, |
|
"log_odds_chosen": 1.4104127883911133, |
|
"log_odds_ratio": -0.39170485734939575, |
|
"logits/chosen": -2.5256340503692627, |
|
"logits/rejected": -2.213099241256714, |
|
"logps/chosen": -0.5794434547424316, |
|
"logps/rejected": -1.5348830223083496, |
|
"loss": 1.3375, |
|
"nll_loss": 0.9457686543464661, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5794434547424316, |
|
"rewards/margins": 0.955439567565918, |
|
"rewards/rejected": -1.5348830223083496, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5935653315824031, |
|
"grad_norm": 72.0, |
|
"learning_rate": 8.481035936425926e-07, |
|
"log_odds_chosen": 1.1931443214416504, |
|
"log_odds_ratio": -0.3968736529350281, |
|
"logits/chosen": -2.56657338142395, |
|
"logits/rejected": -2.191765785217285, |
|
"logps/chosen": -0.5020140409469604, |
|
"logps/rejected": -1.227325201034546, |
|
"loss": 1.2792, |
|
"nll_loss": 0.8822978138923645, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5020140409469604, |
|
"rewards/margins": 0.7253111600875854, |
|
"rewards/rejected": -1.227325201034546, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5988181221273802, |
|
"grad_norm": 37.25, |
|
"learning_rate": 8.29971541729707e-07, |
|
"log_odds_chosen": 1.549736738204956, |
|
"log_odds_ratio": -0.3515177369117737, |
|
"logits/chosen": -2.526639461517334, |
|
"logits/rejected": -2.2129909992218018, |
|
"logps/chosen": -0.5579209923744202, |
|
"logps/rejected": -1.5522905588150024, |
|
"loss": 1.2671, |
|
"nll_loss": 0.9156067967414856, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.5579209923744202, |
|
"rewards/margins": 0.9943695068359375, |
|
"rewards/rejected": -1.5522905588150024, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6040709126723572, |
|
"grad_norm": 78.5, |
|
"learning_rate": 8.118968772666338e-07, |
|
"log_odds_chosen": 1.9918029308319092, |
|
"log_odds_ratio": -0.33105817437171936, |
|
"logits/chosen": -2.5553669929504395, |
|
"logits/rejected": -2.255253791809082, |
|
"logps/chosen": -0.6138916015625, |
|
"logps/rejected": -2.058006763458252, |
|
"loss": 1.261, |
|
"nll_loss": 0.9299631118774414, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.6138916015625, |
|
"rewards/margins": 1.4441156387329102, |
|
"rewards/rejected": -2.058006763458252, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6093237032173342, |
|
"grad_norm": 32.25, |
|
"learning_rate": 7.938857007550796e-07, |
|
"log_odds_chosen": 1.5095994472503662, |
|
"log_odds_ratio": -0.36659660935401917, |
|
"logits/chosen": -2.4949142932891846, |
|
"logits/rejected": -2.217616558074951, |
|
"logps/chosen": -0.5693143606185913, |
|
"logps/rejected": -1.5770564079284668, |
|
"loss": 1.2795, |
|
"nll_loss": 0.9128750562667847, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5693143606185913, |
|
"rewards/margins": 1.007741928100586, |
|
"rewards/rejected": -1.5770564079284668, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6145764937623113, |
|
"grad_norm": 44.5, |
|
"learning_rate": 7.759440912685042e-07, |
|
"log_odds_chosen": 1.313231348991394, |
|
"log_odds_ratio": -0.39206627011299133, |
|
"logits/chosen": -2.4366495609283447, |
|
"logits/rejected": -2.1927928924560547, |
|
"logps/chosen": -0.5398006439208984, |
|
"logps/rejected": -1.4002869129180908, |
|
"loss": 1.2987, |
|
"nll_loss": 0.9065971374511719, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -0.5398006439208984, |
|
"rewards/margins": 0.8604865074157715, |
|
"rewards/rejected": -1.4002869129180908, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.6198292843072882, |
|
"grad_norm": 41.75, |
|
"learning_rate": 7.580781044003324e-07, |
|
"log_odds_chosen": 1.5099523067474365, |
|
"log_odds_ratio": -0.37858808040618896, |
|
"logits/chosen": -2.5282700061798096, |
|
"logits/rejected": -2.1985023021698, |
|
"logps/chosen": -0.554128110408783, |
|
"logps/rejected": -1.5762214660644531, |
|
"loss": 1.2642, |
|
"nll_loss": 0.885593593120575, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.554128110408783, |
|
"rewards/margins": 1.022093415260315, |
|
"rewards/rejected": -1.5762214660644531, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6250820748522653, |
|
"grad_norm": 94.0, |
|
"learning_rate": 7.402937702200904e-07, |
|
"log_odds_chosen": 1.7455905675888062, |
|
"log_odds_ratio": -0.3350276052951813, |
|
"logits/chosen": -2.5306236743927, |
|
"logits/rejected": -2.249689817428589, |
|
"logps/chosen": -0.5238341093063354, |
|
"logps/rejected": -1.7180259227752686, |
|
"loss": 1.2212, |
|
"nll_loss": 0.8861449956893921, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5238341093063354, |
|
"rewards/margins": 1.1941916942596436, |
|
"rewards/rejected": -1.7180259227752686, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6303348653972423, |
|
"grad_norm": 57.0, |
|
"learning_rate": 7.225970912381556e-07, |
|
"log_odds_chosen": 1.5003291368484497, |
|
"log_odds_ratio": -0.391081303358078, |
|
"logits/chosen": -2.381641387939453, |
|
"logits/rejected": -2.1322736740112305, |
|
"logps/chosen": -0.5944348573684692, |
|
"logps/rejected": -1.6424591541290283, |
|
"loss": 1.3066, |
|
"nll_loss": 0.9154736399650574, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -0.5944348573684692, |
|
"rewards/margins": 1.048024296760559, |
|
"rewards/rejected": -1.6424591541290283, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6355876559422193, |
|
"grad_norm": 41.0, |
|
"learning_rate": 7.049940403798089e-07, |
|
"log_odds_chosen": 1.531709909439087, |
|
"log_odds_ratio": -0.3830433487892151, |
|
"logits/chosen": -2.4697697162628174, |
|
"logits/rejected": -2.217533826828003, |
|
"logps/chosen": -0.5523134469985962, |
|
"logps/rejected": -1.5712653398513794, |
|
"loss": 1.314, |
|
"nll_loss": 0.9309525489807129, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5523134469985962, |
|
"rewards/margins": 1.0189517736434937, |
|
"rewards/rejected": -1.5712653398513794, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.6408404464871963, |
|
"grad_norm": 46.5, |
|
"learning_rate": 6.874905589692733e-07, |
|
"log_odds_chosen": 1.6414533853530884, |
|
"log_odds_ratio": -0.34355098009109497, |
|
"logits/chosen": -2.509610176086426, |
|
"logits/rejected": -2.1736972332000732, |
|
"logps/chosen": -0.5539788007736206, |
|
"logps/rejected": -1.6842210292816162, |
|
"loss": 1.2389, |
|
"nll_loss": 0.8953197598457336, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.5539788007736206, |
|
"rewards/margins": 1.1302422285079956, |
|
"rewards/rejected": -1.6842210292816162, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6460932370321734, |
|
"grad_norm": 32.25, |
|
"learning_rate": 6.700925547244171e-07, |
|
"log_odds_chosen": 1.9415044784545898, |
|
"log_odds_ratio": -0.31946122646331787, |
|
"logits/chosen": -2.4332690238952637, |
|
"logits/rejected": -2.26471209526062, |
|
"logps/chosen": -0.6300308704376221, |
|
"logps/rejected": -2.049290180206299, |
|
"loss": 1.2482, |
|
"nll_loss": 0.9287741780281067, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.6300308704376221, |
|
"rewards/margins": 1.4192593097686768, |
|
"rewards/rejected": -2.049290180206299, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6513460275771503, |
|
"grad_norm": 32.5, |
|
"learning_rate": 6.528058997627995e-07, |
|
"log_odds_chosen": 1.9388889074325562, |
|
"log_odds_ratio": -0.3166273534297943, |
|
"logits/chosen": -2.5412425994873047, |
|
"logits/rejected": -2.1768264770507812, |
|
"logps/chosen": -0.5474293828010559, |
|
"logps/rejected": -1.9378162622451782, |
|
"loss": 1.2866, |
|
"nll_loss": 0.9699424505233765, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.5474293828010559, |
|
"rewards/margins": 1.3903871774673462, |
|
"rewards/rejected": -1.9378162622451782, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6565988181221274, |
|
"grad_norm": 40.0, |
|
"learning_rate": 6.35636428619734e-07, |
|
"log_odds_chosen": 1.7123737335205078, |
|
"log_odds_ratio": -0.34193840622901917, |
|
"logits/chosen": -2.5048129558563232, |
|
"logits/rejected": -2.1842281818389893, |
|
"logps/chosen": -0.5440694093704224, |
|
"logps/rejected": -1.7357890605926514, |
|
"loss": 1.2903, |
|
"nll_loss": 0.9483565092086792, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5440694093704224, |
|
"rewards/margins": 1.1917197704315186, |
|
"rewards/rejected": -1.7357890605926514, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6618516086671044, |
|
"grad_norm": 46.25, |
|
"learning_rate": 6.185899362790338e-07, |
|
"log_odds_chosen": 1.6516172885894775, |
|
"log_odds_ratio": -0.3549567461013794, |
|
"logits/chosen": -2.4393770694732666, |
|
"logits/rejected": -2.138049602508545, |
|
"logps/chosen": -0.5555499196052551, |
|
"logps/rejected": -1.7016226053237915, |
|
"loss": 1.2573, |
|
"nll_loss": 0.9023006558418274, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5555499196052551, |
|
"rewards/margins": 1.1460726261138916, |
|
"rewards/rejected": -1.7016226053237915, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6671043992120814, |
|
"grad_norm": 101.5, |
|
"learning_rate": 6.016721762171098e-07, |
|
"log_odds_chosen": 1.636366605758667, |
|
"log_odds_ratio": -0.3687242567539215, |
|
"logits/chosen": -2.469954252243042, |
|
"logits/rejected": -2.2552268505096436, |
|
"logps/chosen": -0.6394462585449219, |
|
"logps/rejected": -1.7851154804229736, |
|
"loss": 1.3697, |
|
"nll_loss": 1.000967025756836, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6394462585449219, |
|
"rewards/margins": 1.1456692218780518, |
|
"rewards/rejected": -1.7851154804229736, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6723571897570584, |
|
"grad_norm": 64.5, |
|
"learning_rate": 5.848888584610726e-07, |
|
"log_odds_chosen": 1.693683385848999, |
|
"log_odds_ratio": -0.34921011328697205, |
|
"logits/chosen": -2.486765146255493, |
|
"logits/rejected": -2.2645862102508545, |
|
"logps/chosen": -0.5731798410415649, |
|
"logps/rejected": -1.7742217779159546, |
|
"loss": 1.261, |
|
"nll_loss": 0.9118081331253052, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5731798410415649, |
|
"rewards/margins": 1.2010419368743896, |
|
"rewards/rejected": -1.7742217779159546, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6776099803020355, |
|
"grad_norm": 58.5, |
|
"learning_rate": 5.682456476615072e-07, |
|
"log_odds_chosen": 1.4461402893066406, |
|
"log_odds_ratio": -0.3787740170955658, |
|
"logits/chosen": -2.355269432067871, |
|
"logits/rejected": -2.16302490234375, |
|
"logps/chosen": -0.5690776705741882, |
|
"logps/rejected": -1.5551892518997192, |
|
"loss": 1.2771, |
|
"nll_loss": 0.8983281254768372, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.5690776705741882, |
|
"rewards/margins": 0.9861115217208862, |
|
"rewards/rejected": -1.5551892518997192, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6828627708470125, |
|
"grad_norm": 36.75, |
|
"learning_rate": 5.517481611805539e-07, |
|
"log_odds_chosen": 1.5578912496566772, |
|
"log_odds_ratio": -0.35105592012405396, |
|
"logits/chosen": -2.3847219944000244, |
|
"logits/rejected": -2.130415439605713, |
|
"logps/chosen": -0.537613570690155, |
|
"logps/rejected": -1.5832931995391846, |
|
"loss": 1.246, |
|
"nll_loss": 0.8949264287948608, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.537613570690155, |
|
"rewards/margins": 1.0456795692443848, |
|
"rewards/rejected": -1.5832931995391846, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6881155613919895, |
|
"grad_norm": 28.25, |
|
"learning_rate": 5.354019671959599e-07, |
|
"log_odds_chosen": 1.4725126028060913, |
|
"log_odds_ratio": -0.38070547580718994, |
|
"logits/chosen": -2.3801114559173584, |
|
"logits/rejected": -2.134171724319458, |
|
"logps/chosen": -0.5319515466690063, |
|
"logps/rejected": -1.5217872858047485, |
|
"loss": 1.3054, |
|
"nll_loss": 0.9246999621391296, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5319515466690063, |
|
"rewards/margins": 0.9898357391357422, |
|
"rewards/rejected": -1.5217872858047485, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6933683519369666, |
|
"grad_norm": 35.5, |
|
"learning_rate": 5.192125828217202e-07, |
|
"log_odds_chosen": 1.628064513206482, |
|
"log_odds_ratio": -0.370327889919281, |
|
"logits/chosen": -2.5233168601989746, |
|
"logits/rejected": -2.1562933921813965, |
|
"logps/chosen": -0.5629066824913025, |
|
"logps/rejected": -1.6909490823745728, |
|
"loss": 1.2606, |
|
"nll_loss": 0.8903215527534485, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.5629066824913025, |
|
"rewards/margins": 1.128042459487915, |
|
"rewards/rejected": -1.6909490823745728, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6986211424819435, |
|
"grad_norm": 51.5, |
|
"learning_rate": 5.031854722459652e-07, |
|
"log_odds_chosen": 1.8480112552642822, |
|
"log_odds_ratio": -0.3127003610134125, |
|
"logits/chosen": -2.4370510578155518, |
|
"logits/rejected": -2.0890867710113525, |
|
"logps/chosen": -0.5302228927612305, |
|
"logps/rejected": -1.8121706247329712, |
|
"loss": 1.2074, |
|
"nll_loss": 0.8947356939315796, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.5302228927612305, |
|
"rewards/margins": 1.2819478511810303, |
|
"rewards/rejected": -1.8121706247329712, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7038739330269206, |
|
"grad_norm": 31.5, |
|
"learning_rate": 4.873260448867004e-07, |
|
"log_odds_chosen": 2.02109956741333, |
|
"log_odds_ratio": -0.31728652119636536, |
|
"logits/chosen": -2.470301628112793, |
|
"logits/rejected": -2.2189319133758545, |
|
"logps/chosen": -0.6230054497718811, |
|
"logps/rejected": -2.0598232746124268, |
|
"loss": 1.3239, |
|
"nll_loss": 1.0066121816635132, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.6230054497718811, |
|
"rewards/margins": 1.4368176460266113, |
|
"rewards/rejected": -2.0598232746124268, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7091267235718975, |
|
"grad_norm": 68.0, |
|
"learning_rate": 4.7163965356604117e-07, |
|
"log_odds_chosen": 1.897443413734436, |
|
"log_odds_ratio": -0.3486331105232239, |
|
"logits/chosen": -2.554206132888794, |
|
"logits/rejected": -2.1669750213623047, |
|
"logps/chosen": -0.64203941822052, |
|
"logps/rejected": -2.0166876316070557, |
|
"loss": 1.3553, |
|
"nll_loss": 1.0066633224487305, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.64203941822052, |
|
"rewards/margins": 1.3746483325958252, |
|
"rewards/rejected": -2.0166876316070557, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7143795141168746, |
|
"grad_norm": 34.5, |
|
"learning_rate": 4.561315927035445e-07, |
|
"log_odds_chosen": 1.707550048828125, |
|
"log_odds_ratio": -0.34410637617111206, |
|
"logits/chosen": -2.440441846847534, |
|
"logits/rejected": -2.1145124435424805, |
|
"logps/chosen": -0.5574239492416382, |
|
"logps/rejected": -1.7339591979980469, |
|
"loss": 1.2025, |
|
"nll_loss": 0.8583625555038452, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5574239492416382, |
|
"rewards/margins": 1.1765353679656982, |
|
"rewards/rejected": -1.7339591979980469, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7196323046618516, |
|
"grad_norm": 64.5, |
|
"learning_rate": 4.408070965292533e-07, |
|
"log_odds_chosen": 1.7007535696029663, |
|
"log_odds_ratio": -0.35346347093582153, |
|
"logits/chosen": -2.456326961517334, |
|
"logits/rejected": -2.1892619132995605, |
|
"logps/chosen": -0.5550821423530579, |
|
"logps/rejected": -1.7205698490142822, |
|
"loss": 1.2778, |
|
"nll_loss": 0.9243642091751099, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5550821423530579, |
|
"rewards/margins": 1.1654876470565796, |
|
"rewards/rejected": -1.7205698490142822, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.7248850952068286, |
|
"grad_norm": 42.25, |
|
"learning_rate": 4.256713373170564e-07, |
|
"log_odds_chosen": 1.5547049045562744, |
|
"log_odds_ratio": -0.36127448081970215, |
|
"logits/chosen": -2.46553373336792, |
|
"logits/rejected": -2.2510862350463867, |
|
"logps/chosen": -0.603643536567688, |
|
"logps/rejected": -1.6664206981658936, |
|
"loss": 1.3272, |
|
"nll_loss": 0.9659638404846191, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.603643536567688, |
|
"rewards/margins": 1.0627771615982056, |
|
"rewards/rejected": -1.6664206981658936, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7301378857518056, |
|
"grad_norm": 38.5, |
|
"learning_rate": 4.1072942363896025e-07, |
|
"log_odds_chosen": 1.6411514282226562, |
|
"log_odds_ratio": -0.3377731442451477, |
|
"logits/chosen": -2.5552942752838135, |
|
"logits/rejected": -2.229196071624756, |
|
"logps/chosen": -0.5576506853103638, |
|
"logps/rejected": -1.6714286804199219, |
|
"loss": 1.2942, |
|
"nll_loss": 0.9564154744148254, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.5576506853103638, |
|
"rewards/margins": 1.1137781143188477, |
|
"rewards/rejected": -1.6714286804199219, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.7353906762967827, |
|
"grad_norm": 26.125, |
|
"learning_rate": 3.9598639864085925e-07, |
|
"log_odds_chosen": 1.2980868816375732, |
|
"log_odds_ratio": -0.3892515301704407, |
|
"logits/chosen": -2.417532444000244, |
|
"logits/rejected": -2.2620291709899902, |
|
"logps/chosen": -0.5406171083450317, |
|
"logps/rejected": -1.3745439052581787, |
|
"loss": 1.283, |
|
"nll_loss": 0.893776535987854, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5406171083450317, |
|
"rewards/margins": 0.833926796913147, |
|
"rewards/rejected": -1.3745439052581787, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7406434668417596, |
|
"grad_norm": 57.75, |
|
"learning_rate": 3.8144723834039073e-07, |
|
"log_odds_chosen": 1.3730871677398682, |
|
"log_odds_ratio": -0.38403210043907166, |
|
"logits/chosen": -2.492102861404419, |
|
"logits/rejected": -2.1305251121520996, |
|
"logps/chosen": -0.5153442621231079, |
|
"logps/rejected": -1.4106855392456055, |
|
"loss": 1.2797, |
|
"nll_loss": 0.8956896662712097, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.5153442621231079, |
|
"rewards/margins": 0.8953412175178528, |
|
"rewards/rejected": -1.4106855392456055, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7458962573867367, |
|
"grad_norm": 28.25, |
|
"learning_rate": 3.6711684994744486e-07, |
|
"log_odds_chosen": 1.7186520099639893, |
|
"log_odds_ratio": -0.33004146814346313, |
|
"logits/chosen": -2.537470817565918, |
|
"logits/rejected": -2.23635196685791, |
|
"logps/chosen": -0.4957657754421234, |
|
"logps/rejected": -1.6590726375579834, |
|
"loss": 1.2277, |
|
"nll_loss": 0.8976136445999146, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.4957657754421234, |
|
"rewards/margins": 1.1633068323135376, |
|
"rewards/rejected": -1.6590726375579834, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7511490479317138, |
|
"grad_norm": 50.25, |
|
"learning_rate": 3.530000702078999e-07, |
|
"log_odds_chosen": 1.9104875326156616, |
|
"log_odds_ratio": -0.30225199460983276, |
|
"logits/chosen": -2.41103196144104, |
|
"logits/rejected": -2.163609743118286, |
|
"logps/chosen": -0.535643458366394, |
|
"logps/rejected": -1.8592544794082642, |
|
"loss": 1.2363, |
|
"nll_loss": 0.9340142011642456, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.535643458366394, |
|
"rewards/margins": 1.3236110210418701, |
|
"rewards/rejected": -1.8592544794082642, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7564018384766907, |
|
"grad_norm": 215.0, |
|
"learning_rate": 3.391016637711389e-07, |
|
"log_odds_chosen": 1.9387279748916626, |
|
"log_odds_ratio": -0.32732483744621277, |
|
"logits/chosen": -2.503373861312866, |
|
"logits/rejected": -2.184051990509033, |
|
"logps/chosen": -0.6013236045837402, |
|
"logps/rejected": -1.979087233543396, |
|
"loss": 1.2995, |
|
"nll_loss": 0.9722166061401367, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.6013236045837402, |
|
"rewards/margins": 1.3777637481689453, |
|
"rewards/rejected": -1.979087233543396, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7616546290216678, |
|
"grad_norm": 64.5, |
|
"learning_rate": 3.2542632158190133e-07, |
|
"log_odds_chosen": 1.8217693567276, |
|
"log_odds_ratio": -0.3460733890533447, |
|
"logits/chosen": -2.4695355892181396, |
|
"logits/rejected": -2.266535758972168, |
|
"logps/chosen": -0.5930324792861938, |
|
"logps/rejected": -1.8648335933685303, |
|
"loss": 1.2692, |
|
"nll_loss": 0.9231220483779907, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5930324792861938, |
|
"rewards/margins": 1.271801233291626, |
|
"rewards/rejected": -1.8648335933685303, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7669074195666448, |
|
"grad_norm": 60.0, |
|
"learning_rate": 3.1197865929701017e-07, |
|
"log_odds_chosen": 1.9611870050430298, |
|
"log_odds_ratio": -0.3502156138420105, |
|
"logits/chosen": -2.595439910888672, |
|
"logits/rejected": -2.2361018657684326, |
|
"logps/chosen": -0.5836862921714783, |
|
"logps/rejected": -2.0304791927337646, |
|
"loss": 1.3186, |
|
"nll_loss": 0.9683855175971985, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5836862921714783, |
|
"rewards/margins": 1.4467928409576416, |
|
"rewards/rejected": -2.0304791927337646, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7721602101116218, |
|
"grad_norm": 63.75, |
|
"learning_rate": 2.987632157275114e-07, |
|
"log_odds_chosen": 1.6977773904800415, |
|
"log_odds_ratio": -0.3493327796459198, |
|
"logits/chosen": -2.5089340209960938, |
|
"logits/rejected": -2.2651724815368652, |
|
"logps/chosen": -0.5790574550628662, |
|
"logps/rejected": -1.748196005821228, |
|
"loss": 1.2328, |
|
"nll_loss": 0.883512020111084, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.5790574550628662, |
|
"rewards/margins": 1.1691386699676514, |
|
"rewards/rejected": -1.748196005821228, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7774130006565988, |
|
"grad_norm": 47.0, |
|
"learning_rate": 2.8578445130674833e-07, |
|
"log_odds_chosen": 1.5758211612701416, |
|
"log_odds_ratio": -0.3468172550201416, |
|
"logits/chosen": -2.4574217796325684, |
|
"logits/rejected": -2.2445011138916016, |
|
"logps/chosen": -0.5336965322494507, |
|
"logps/rejected": -1.6166375875473022, |
|
"loss": 1.2211, |
|
"nll_loss": 0.8742717504501343, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5336965322494507, |
|
"rewards/margins": 1.082940936088562, |
|
"rewards/rejected": -1.6166375875473022, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7826657912015759, |
|
"grad_norm": 33.25, |
|
"learning_rate": 2.73046746584891e-07, |
|
"log_odds_chosen": 1.6906464099884033, |
|
"log_odds_ratio": -0.3406273126602173, |
|
"logits/chosen": -2.5112786293029785, |
|
"logits/rejected": -2.2304630279541016, |
|
"logps/chosen": -0.5315414667129517, |
|
"logps/rejected": -1.6976295709609985, |
|
"loss": 1.2098, |
|
"nll_loss": 0.8692021369934082, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5315414667129517, |
|
"rewards/margins": 1.1660881042480469, |
|
"rewards/rejected": -1.6976295709609985, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7879185817465528, |
|
"grad_norm": 40.25, |
|
"learning_rate": 2.605544007504279e-07, |
|
"log_odds_chosen": 1.7450376749038696, |
|
"log_odds_ratio": -0.32459336519241333, |
|
"logits/chosen": -2.553576946258545, |
|
"logits/rejected": -2.259354591369629, |
|
"logps/chosen": -0.5844911336898804, |
|
"logps/rejected": -1.801825761795044, |
|
"loss": 1.2855, |
|
"nll_loss": 0.9608856439590454, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.5844911336898804, |
|
"rewards/margins": 1.217334508895874, |
|
"rewards/rejected": -1.801825761795044, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7931713722915299, |
|
"grad_norm": 36.5, |
|
"learning_rate": 2.4831163017911683e-07, |
|
"log_odds_chosen": 1.651958703994751, |
|
"log_odds_ratio": -0.34634822607040405, |
|
"logits/chosen": -2.405233144760132, |
|
"logits/rejected": -2.138745069503784, |
|
"logps/chosen": -0.5561404228210449, |
|
"logps/rejected": -1.6944749355316162, |
|
"loss": 1.2428, |
|
"nll_loss": 0.8964967727661133, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5561404228210449, |
|
"rewards/margins": 1.1383345127105713, |
|
"rewards/rejected": -1.6944749355316162, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7984241628365069, |
|
"grad_norm": 46.75, |
|
"learning_rate": 2.3632256701088814e-07, |
|
"log_odds_chosen": 1.698676347732544, |
|
"log_odds_ratio": -0.3407271206378937, |
|
"logits/chosen": -2.5164520740509033, |
|
"logits/rejected": -2.169098377227783, |
|
"logps/chosen": -0.546515166759491, |
|
"logps/rejected": -1.726548433303833, |
|
"loss": 1.2007, |
|
"nll_loss": 0.8599587678909302, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.546515166759491, |
|
"rewards/margins": 1.1800330877304077, |
|
"rewards/rejected": -1.726548433303833, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8036769533814839, |
|
"grad_norm": 31.625, |
|
"learning_rate": 2.245912577551785e-07, |
|
"log_odds_chosen": 1.7021366357803345, |
|
"log_odds_ratio": -0.36240798234939575, |
|
"logits/chosen": -2.583963632583618, |
|
"logits/rejected": -2.3067448139190674, |
|
"logps/chosen": -0.610865592956543, |
|
"logps/rejected": -1.795292854309082, |
|
"loss": 1.3449, |
|
"nll_loss": 0.9824475049972534, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.610865592956543, |
|
"rewards/margins": 1.184427261352539, |
|
"rewards/rejected": -1.795292854309082, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.8089297439264609, |
|
"grad_norm": 97.5, |
|
"learning_rate": 2.131216619251659e-07, |
|
"log_odds_chosen": 1.825273871421814, |
|
"log_odds_ratio": -0.3238641917705536, |
|
"logits/chosen": -2.533202648162842, |
|
"logits/rejected": -2.3293657302856445, |
|
"logps/chosen": -0.6178978681564331, |
|
"logps/rejected": -1.9215917587280273, |
|
"loss": 1.3183, |
|
"nll_loss": 0.9943979978561401, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.6178978681564331, |
|
"rewards/margins": 1.3036938905715942, |
|
"rewards/rejected": -1.9215917587280273, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.814182534471438, |
|
"grad_norm": 63.0, |
|
"learning_rate": 2.0191765070136768e-07, |
|
"log_odds_chosen": 1.8990042209625244, |
|
"log_odds_ratio": -0.3358913064002991, |
|
"logits/chosen": -2.4345898628234863, |
|
"logits/rejected": -2.134831190109253, |
|
"logps/chosen": -0.5476903915405273, |
|
"logps/rejected": -1.865012526512146, |
|
"loss": 1.2756, |
|
"nll_loss": 0.9396783709526062, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.5476903915405273, |
|
"rewards/margins": 1.317322015762329, |
|
"rewards/rejected": -1.865012526512146, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8194353250164149, |
|
"grad_norm": 102.5, |
|
"learning_rate": 1.9098300562505264e-07, |
|
"log_odds_chosen": 1.6969549655914307, |
|
"log_odds_ratio": -0.3712518811225891, |
|
"logits/chosen": -2.4698281288146973, |
|
"logits/rejected": -2.181797981262207, |
|
"logps/chosen": -0.5788697004318237, |
|
"logps/rejected": -1.7834043502807617, |
|
"loss": 1.2457, |
|
"nll_loss": 0.8744741678237915, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.5788697004318237, |
|
"rewards/margins": 1.2045344114303589, |
|
"rewards/rejected": -1.7834043502807617, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.824688115561392, |
|
"grad_norm": 28.625, |
|
"learning_rate": 1.803214173219072e-07, |
|
"log_odds_chosen": 1.9696476459503174, |
|
"log_odds_ratio": -0.30190950632095337, |
|
"logits/chosen": -2.483811616897583, |
|
"logits/rejected": -2.173767328262329, |
|
"logps/chosen": -0.535027265548706, |
|
"logps/rejected": -1.9312782287597656, |
|
"loss": 1.2045, |
|
"nll_loss": 0.902554988861084, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.535027265548706, |
|
"rewards/margins": 1.3962510824203491, |
|
"rewards/rejected": -1.9312782287597656, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.8299409061063691, |
|
"grad_norm": 31.125, |
|
"learning_rate": 1.6993648425638796e-07, |
|
"log_odds_chosen": 1.6274923086166382, |
|
"log_odds_ratio": -0.3982171416282654, |
|
"logits/chosen": -2.5815181732177734, |
|
"logits/rejected": -2.206310987472534, |
|
"logps/chosen": -0.5905428528785706, |
|
"logps/rejected": -1.7506492137908936, |
|
"loss": 1.3427, |
|
"nll_loss": 0.9444986581802368, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.5905428528785706, |
|
"rewards/margins": 1.1601064205169678, |
|
"rewards/rejected": -1.7506492137908936, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.835193696651346, |
|
"grad_norm": 61.0, |
|
"learning_rate": 1.5983171151717921e-07, |
|
"log_odds_chosen": 1.5922825336456299, |
|
"log_odds_ratio": -0.3533628284931183, |
|
"logits/chosen": -2.4570369720458984, |
|
"logits/rejected": -2.210930824279785, |
|
"logps/chosen": -0.581910252571106, |
|
"logps/rejected": -1.6624376773834229, |
|
"loss": 1.2185, |
|
"nll_loss": 0.8651579022407532, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.581910252571106, |
|
"rewards/margins": 1.080527424812317, |
|
"rewards/rejected": -1.6624376773834229, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.8404464871963231, |
|
"grad_norm": 56.25, |
|
"learning_rate": 1.5001050963416716e-07, |
|
"log_odds_chosen": 1.7499481439590454, |
|
"log_odds_ratio": -0.3268365263938904, |
|
"logits/chosen": -2.4593491554260254, |
|
"logits/rejected": -2.1416468620300293, |
|
"logps/chosen": -0.5591254234313965, |
|
"logps/rejected": -1.7770287990570068, |
|
"loss": 1.1877, |
|
"nll_loss": 0.8609007596969604, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5591254234313965, |
|
"rewards/margins": 1.2179033756256104, |
|
"rewards/rejected": -1.7770287990570068, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8456992777413, |
|
"grad_norm": 59.25, |
|
"learning_rate": 1.4047619342732908e-07, |
|
"log_odds_chosen": 1.5950630903244019, |
|
"log_odds_ratio": -0.3615456819534302, |
|
"logits/chosen": -2.5065274238586426, |
|
"logits/rejected": -2.24869441986084, |
|
"logps/chosen": -0.6077946424484253, |
|
"logps/rejected": -1.7057603597640991, |
|
"loss": 1.273, |
|
"nll_loss": 0.911415696144104, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6077946424484253, |
|
"rewards/margins": 1.0979657173156738, |
|
"rewards/rejected": -1.7057603597640991, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.8509520682862771, |
|
"grad_norm": 30.0, |
|
"learning_rate": 1.3123198088792577e-07, |
|
"log_odds_chosen": 1.6475883722305298, |
|
"log_odds_ratio": -0.37195760011672974, |
|
"logits/chosen": -2.4656014442443848, |
|
"logits/rejected": -2.1296868324279785, |
|
"logps/chosen": -0.5928062200546265, |
|
"logps/rejected": -1.764866828918457, |
|
"loss": 1.2998, |
|
"nll_loss": 0.9278379678726196, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5928062200546265, |
|
"rewards/margins": 1.172060489654541, |
|
"rewards/rejected": -1.764866828918457, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8562048588312541, |
|
"grad_norm": 48.25, |
|
"learning_rate": 1.2228099209237607e-07, |
|
"log_odds_chosen": 1.6707931756973267, |
|
"log_odds_ratio": -0.35219767689704895, |
|
"logits/chosen": -2.416558027267456, |
|
"logits/rejected": -2.1250758171081543, |
|
"logps/chosen": -0.577375054359436, |
|
"logps/rejected": -1.7357622385025024, |
|
"loss": 1.3212, |
|
"nll_loss": 0.969050407409668, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.577375054359436, |
|
"rewards/margins": 1.1583871841430664, |
|
"rewards/rejected": -1.7357622385025024, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.8614576493762311, |
|
"grad_norm": 48.5, |
|
"learning_rate": 1.1362624814917842e-07, |
|
"log_odds_chosen": 1.469254732131958, |
|
"log_odds_ratio": -0.3808806836605072, |
|
"logits/chosen": -2.428011178970337, |
|
"logits/rejected": -2.1460485458374023, |
|
"logps/chosen": -0.549521803855896, |
|
"logps/rejected": -1.5279179811477661, |
|
"loss": 1.2772, |
|
"nll_loss": 0.8962807655334473, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.549521803855896, |
|
"rewards/margins": 0.9783961176872253, |
|
"rewards/rejected": -1.5279179811477661, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8667104399212081, |
|
"grad_norm": 34.5, |
|
"learning_rate": 1.0527067017923652e-07, |
|
"log_odds_chosen": 1.5520200729370117, |
|
"log_odds_ratio": -0.3591814637184143, |
|
"logits/chosen": -2.5619750022888184, |
|
"logits/rejected": -2.3039004802703857, |
|
"logps/chosen": -0.5574966073036194, |
|
"logps/rejected": -1.6048591136932373, |
|
"loss": 1.2779, |
|
"nll_loss": 0.918703556060791, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5574966073036194, |
|
"rewards/margins": 1.0473625659942627, |
|
"rewards/rejected": -1.6048591136932373, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8719632304661852, |
|
"grad_norm": 39.0, |
|
"learning_rate": 9.721707832993231e-08, |
|
"log_odds_chosen": 1.7053543329238892, |
|
"log_odds_ratio": -0.33547329902648926, |
|
"logits/chosen": -2.483564615249634, |
|
"logits/rejected": -2.2165513038635254, |
|
"logps/chosen": -0.5104734301567078, |
|
"logps/rejected": -1.664214849472046, |
|
"loss": 1.1955, |
|
"nll_loss": 0.8600661158561707, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5104734301567078, |
|
"rewards/margins": 1.1537415981292725, |
|
"rewards/rejected": -1.664214849472046, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8772160210111621, |
|
"grad_norm": 34.25, |
|
"learning_rate": 8.946819082327828e-08, |
|
"log_odds_chosen": 1.5886516571044922, |
|
"log_odds_ratio": -0.3529045283794403, |
|
"logits/chosen": -2.3829362392425537, |
|
"logits/rejected": -2.1005430221557617, |
|
"logps/chosen": -0.5660222172737122, |
|
"logps/rejected": -1.645013451576233, |
|
"loss": 1.2596, |
|
"nll_loss": 0.9066807627677917, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5660222172737122, |
|
"rewards/margins": 1.078991174697876, |
|
"rewards/rejected": -1.645013451576233, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8824688115561392, |
|
"grad_norm": 36.75, |
|
"learning_rate": 8.202662303847297e-08, |
|
"log_odds_chosen": 1.7980045080184937, |
|
"log_odds_ratio": -0.3362274765968323, |
|
"logits/chosen": -2.490861654281616, |
|
"logits/rejected": -2.1576590538024902, |
|
"logps/chosen": -0.5558806657791138, |
|
"logps/rejected": -1.7892097234725952, |
|
"loss": 1.2791, |
|
"nll_loss": 0.9428805112838745, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.5558806657791138, |
|
"rewards/margins": 1.2333290576934814, |
|
"rewards/rejected": -1.7892097234725952, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8877216021011162, |
|
"grad_norm": 55.5, |
|
"learning_rate": 7.48948866291661e-08, |
|
"log_odds_chosen": 1.7913442850112915, |
|
"log_odds_ratio": -0.32501915097236633, |
|
"logits/chosen": -2.5119128227233887, |
|
"logits/rejected": -2.193650960922241, |
|
"logps/chosen": -0.5597657561302185, |
|
"logps/rejected": -1.8090870380401611, |
|
"loss": 1.234, |
|
"nll_loss": 0.9089807271957397, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.5597657561302185, |
|
"rewards/margins": 1.2493212223052979, |
|
"rewards/rejected": -1.8090870380401611, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8929743926460932, |
|
"grad_norm": 39.5, |
|
"learning_rate": 6.80753886757336e-08, |
|
"log_odds_chosen": 1.5741755962371826, |
|
"log_odds_ratio": -0.34667596220970154, |
|
"logits/chosen": -2.4587669372558594, |
|
"logits/rejected": -2.187401056289673, |
|
"logps/chosen": -0.5418094396591187, |
|
"logps/rejected": -1.594808578491211, |
|
"loss": 1.2259, |
|
"nll_loss": 0.8791839480400085, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.5418094396591187, |
|
"rewards/margins": 1.0529991388320923, |
|
"rewards/rejected": -1.594808578491211, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8982271831910703, |
|
"grad_norm": 25.625, |
|
"learning_rate": 6.157043087284797e-08, |
|
"log_odds_chosen": 1.708722710609436, |
|
"log_odds_ratio": -0.34805282950401306, |
|
"logits/chosen": -2.472571849822998, |
|
"logits/rejected": -2.1671009063720703, |
|
"logps/chosen": -0.5452659130096436, |
|
"logps/rejected": -1.7160043716430664, |
|
"loss": 1.2583, |
|
"nll_loss": 0.9102743268013, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.5452659130096436, |
|
"rewards/margins": 1.1707384586334229, |
|
"rewards/rejected": -1.7160043716430664, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.9034799737360473, |
|
"grad_norm": 30.75, |
|
"learning_rate": 5.538220875261734e-08, |
|
"log_odds_chosen": 1.7142833471298218, |
|
"log_odds_ratio": -0.31549376249313354, |
|
"logits/chosen": -2.5251572132110596, |
|
"logits/rejected": -2.228562593460083, |
|
"logps/chosen": -0.5315389633178711, |
|
"logps/rejected": -1.699853539466858, |
|
"loss": 1.2153, |
|
"nll_loss": 0.8998427391052246, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.5315389633178711, |
|
"rewards/margins": 1.1683146953582764, |
|
"rewards/rejected": -1.699853539466858, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9087327642810243, |
|
"grad_norm": 50.75, |
|
"learning_rate": 4.9512810943557083e-08, |
|
"log_odds_chosen": 1.7466316223144531, |
|
"log_odds_ratio": -0.3088250756263733, |
|
"logits/chosen": -2.492593288421631, |
|
"logits/rejected": -2.1745035648345947, |
|
"logps/chosen": -0.5664678812026978, |
|
"logps/rejected": -1.7472212314605713, |
|
"loss": 1.2514, |
|
"nll_loss": 0.9425439834594727, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.5664678812026978, |
|
"rewards/margins": 1.1807533502578735, |
|
"rewards/rejected": -1.7472212314605713, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.9139855548260013, |
|
"grad_norm": 42.25, |
|
"learning_rate": 4.396421846564235e-08, |
|
"log_odds_chosen": 1.420175313949585, |
|
"log_odds_ratio": -0.39961543679237366, |
|
"logits/chosen": -2.5364463329315186, |
|
"logits/rejected": -2.272904634475708, |
|
"logps/chosen": -0.5728206038475037, |
|
"logps/rejected": -1.547858715057373, |
|
"loss": 1.3665, |
|
"nll_loss": 0.9668663144111633, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5728206038475037, |
|
"rewards/margins": 0.9750380516052246, |
|
"rewards/rejected": -1.547858715057373, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9192383453709784, |
|
"grad_norm": 88.0, |
|
"learning_rate": 3.87383040616811e-08, |
|
"log_odds_chosen": 1.8361127376556396, |
|
"log_odds_ratio": -0.3314815163612366, |
|
"logits/chosen": -2.5305237770080566, |
|
"logits/rejected": -2.205706834793091, |
|
"logps/chosen": -0.5290949940681458, |
|
"logps/rejected": -1.7841472625732422, |
|
"loss": 1.2038, |
|
"nll_loss": 0.8723037838935852, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.5290949940681458, |
|
"rewards/margins": 1.2550525665283203, |
|
"rewards/rejected": -1.7841472625732422, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9244911359159553, |
|
"grad_norm": 59.25, |
|
"learning_rate": 3.383683156523187e-08, |
|
"log_odds_chosen": 1.5235865116119385, |
|
"log_odds_ratio": -0.3648485541343689, |
|
"logits/chosen": -2.4326975345611572, |
|
"logits/rejected": -2.0849132537841797, |
|
"logps/chosen": -0.5309010744094849, |
|
"logps/rejected": -1.563246726989746, |
|
"loss": 1.2608, |
|
"nll_loss": 0.8959411382675171, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5309010744094849, |
|
"rewards/margins": 1.0323456525802612, |
|
"rewards/rejected": -1.563246726989746, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9297439264609324, |
|
"grad_norm": 33.25, |
|
"learning_rate": 2.9261455305280014e-08, |
|
"log_odds_chosen": 1.715073585510254, |
|
"log_odds_ratio": -0.3189467787742615, |
|
"logits/chosen": -2.4626471996307373, |
|
"logits/rejected": -2.1226587295532227, |
|
"logps/chosen": -0.5422563552856445, |
|
"logps/rejected": -1.709451675415039, |
|
"loss": 1.2541, |
|
"nll_loss": 0.9351384043693542, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.5422563552856445, |
|
"rewards/margins": 1.1671955585479736, |
|
"rewards/rejected": -1.709451675415039, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.9349967170059094, |
|
"grad_norm": 60.0, |
|
"learning_rate": 2.5013719547874788e-08, |
|
"log_odds_chosen": 1.6406991481781006, |
|
"log_odds_ratio": -0.37183278799057007, |
|
"logits/chosen": -2.503505229949951, |
|
"logits/rejected": -2.177072525024414, |
|
"logps/chosen": -0.5782598257064819, |
|
"logps/rejected": -1.7351022958755493, |
|
"loss": 1.2879, |
|
"nll_loss": 0.9161151051521301, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5782598257064819, |
|
"rewards/margins": 1.1568424701690674, |
|
"rewards/rejected": -1.7351022958755493, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9402495075508864, |
|
"grad_norm": 30.5, |
|
"learning_rate": 2.1095057974913177e-08, |
|
"log_odds_chosen": 1.5425198078155518, |
|
"log_odds_ratio": -0.3476109504699707, |
|
"logits/chosen": -2.463806390762329, |
|
"logits/rejected": -2.2360615730285645, |
|
"logps/chosen": -0.5494548082351685, |
|
"logps/rejected": -1.5607731342315674, |
|
"loss": 1.2287, |
|
"nll_loss": 0.8811271786689758, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5494548082351685, |
|
"rewards/margins": 1.011318325996399, |
|
"rewards/rejected": -1.5607731342315674, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.9455022980958634, |
|
"grad_norm": 40.25, |
|
"learning_rate": 1.7506793200248504e-08, |
|
"log_odds_chosen": 1.79372239112854, |
|
"log_odds_ratio": -0.34891271591186523, |
|
"logits/chosen": -2.4137704372406006, |
|
"logits/rejected": -2.1525025367736816, |
|
"logps/chosen": -0.5806652307510376, |
|
"logps/rejected": -1.8389291763305664, |
|
"loss": 1.2788, |
|
"nll_loss": 0.9298731684684753, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5806652307510376, |
|
"rewards/margins": 1.2582640647888184, |
|
"rewards/rejected": -1.8389291763305664, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9507550886408405, |
|
"grad_norm": 33.75, |
|
"learning_rate": 1.4250136323285866e-08, |
|
"log_odds_chosen": 1.7694854736328125, |
|
"log_odds_ratio": -0.339056134223938, |
|
"logits/chosen": -2.458627223968506, |
|
"logits/rejected": -2.133309841156006, |
|
"logps/chosen": -0.5246182680130005, |
|
"logps/rejected": -1.748004674911499, |
|
"loss": 1.2399, |
|
"nll_loss": 0.9008275866508484, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.5246182680130005, |
|
"rewards/margins": 1.2233861684799194, |
|
"rewards/rejected": -1.748004674911499, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.9560078791858174, |
|
"grad_norm": 42.25, |
|
"learning_rate": 1.1326186520215885e-08, |
|
"log_odds_chosen": 1.4994810819625854, |
|
"log_odds_ratio": -0.3889666199684143, |
|
"logits/chosen": -2.42987322807312, |
|
"logits/rejected": -2.2474777698516846, |
|
"logps/chosen": -0.5686417818069458, |
|
"logps/rejected": -1.6017091274261475, |
|
"loss": 1.3525, |
|
"nll_loss": 0.9635759592056274, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5686417818069458, |
|
"rewards/margins": 1.0330675840377808, |
|
"rewards/rejected": -1.6017091274261475, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9612606697307945, |
|
"grad_norm": 77.0, |
|
"learning_rate": 8.735930673024805e-09, |
|
"log_odds_chosen": 1.6517369747161865, |
|
"log_odds_ratio": -0.34624212980270386, |
|
"logits/chosen": -2.3800384998321533, |
|
"logits/rejected": -2.0897443294525146, |
|
"logps/chosen": -0.5255088806152344, |
|
"logps/rejected": -1.6485977172851562, |
|
"loss": 1.3009, |
|
"nll_loss": 0.9546435475349426, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.5255088806152344, |
|
"rewards/margins": 1.1230888366699219, |
|
"rewards/rejected": -1.6485977172851562, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9665134602757715, |
|
"grad_norm": 30.875, |
|
"learning_rate": 6.480243036404598e-09, |
|
"log_odds_chosen": 1.8001991510391235, |
|
"log_odds_ratio": -0.3332251012325287, |
|
"logits/chosen": -2.499809980392456, |
|
"logits/rejected": -2.291926860809326, |
|
"logps/chosen": -0.5624955892562866, |
|
"logps/rejected": -1.824375867843628, |
|
"loss": 1.2743, |
|
"nll_loss": 0.941113293170929, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5624955892562866, |
|
"rewards/margins": 1.2618802785873413, |
|
"rewards/rejected": -1.824375867843628, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9717662508207485, |
|
"grad_norm": 44.75, |
|
"learning_rate": 4.559884942677783e-09, |
|
"log_odds_chosen": 1.4665955305099487, |
|
"log_odds_ratio": -0.38308554887771606, |
|
"logits/chosen": -2.397916078567505, |
|
"logits/rejected": -2.142017126083374, |
|
"logps/chosen": -0.5283843874931335, |
|
"logps/rejected": -1.4880872964859009, |
|
"loss": 1.2288, |
|
"nll_loss": 0.8457143902778625, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5283843874931335, |
|
"rewards/margins": 0.9597029685974121, |
|
"rewards/rejected": -1.4880872964859009, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9770190413657256, |
|
"grad_norm": 47.75, |
|
"learning_rate": 2.9755045448351944e-09, |
|
"log_odds_chosen": 1.4579670429229736, |
|
"log_odds_ratio": -0.3762872815132141, |
|
"logits/chosen": -2.4584195613861084, |
|
"logits/rejected": -2.198525905609131, |
|
"logps/chosen": -0.5691961050033569, |
|
"logps/rejected": -1.5534415245056152, |
|
"loss": 1.2789, |
|
"nll_loss": 0.9026187062263489, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5691961050033569, |
|
"rewards/margins": 0.9842453002929688, |
|
"rewards/rejected": -1.5534415245056152, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9822718319107026, |
|
"grad_norm": 49.0, |
|
"learning_rate": 1.7276365977730856e-09, |
|
"log_odds_chosen": 1.5441417694091797, |
|
"log_odds_ratio": -0.3624028265476227, |
|
"logits/chosen": -2.535742998123169, |
|
"logits/rejected": -2.1748859882354736, |
|
"logps/chosen": -0.5510035753250122, |
|
"logps/rejected": -1.6074680089950562, |
|
"loss": 1.2792, |
|
"nll_loss": 0.9167704582214355, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5510035753250122, |
|
"rewards/margins": 1.056464433670044, |
|
"rewards/rejected": -1.6074680089950562, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9875246224556796, |
|
"grad_norm": 50.0, |
|
"learning_rate": 8.16702277804504e-10, |
|
"log_odds_chosen": 1.6150617599487305, |
|
"log_odds_ratio": -0.3433099687099457, |
|
"logits/chosen": -2.4907350540161133, |
|
"logits/rejected": -2.166508674621582, |
|
"logps/chosen": -0.5353943109512329, |
|
"logps/rejected": -1.6448442935943604, |
|
"loss": 1.2318, |
|
"nll_loss": 0.888446033000946, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.5353943109512329, |
|
"rewards/margins": 1.1094499826431274, |
|
"rewards/rejected": -1.6448442935943604, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9927774130006566, |
|
"grad_norm": 44.75, |
|
"learning_rate": 2.430090405054486e-10, |
|
"log_odds_chosen": 1.457880973815918, |
|
"log_odds_ratio": -0.36118173599243164, |
|
"logits/chosen": -2.4720263481140137, |
|
"logits/rejected": -2.178345203399658, |
|
"logps/chosen": -0.5418224334716797, |
|
"logps/rejected": -1.4925849437713623, |
|
"loss": 1.309, |
|
"nll_loss": 0.9478532671928406, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5418224334716797, |
|
"rewards/margins": 0.9507624506950378, |
|
"rewards/rejected": -1.4925849437713623, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9980302035456337, |
|
"grad_norm": 33.0, |
|
"learning_rate": 6.750516943321294e-12, |
|
"log_odds_chosen": 1.7491207122802734, |
|
"log_odds_ratio": -0.319837361574173, |
|
"logits/chosen": -2.4439542293548584, |
|
"logits/rejected": -2.1569535732269287, |
|
"logps/chosen": -0.5160128474235535, |
|
"logps/rejected": -1.7111313343048096, |
|
"loss": 1.2106, |
|
"nll_loss": 0.8908060193061829, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.5160128474235535, |
|
"rewards/margins": 1.1951183080673218, |
|
"rewards/rejected": -1.7111313343048096, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.999080761654629, |
|
"step": 951, |
|
"total_flos": 0.0, |
|
"train_loss": 1.3879666121600178, |
|
"train_runtime": 22584.718, |
|
"train_samples_per_second": 2.697, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 951, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|