|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 316.0, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 0.4211854934692383, |
|
"log_odds_ratio": -0.7698944807052612, |
|
"logits/chosen": -2.970022678375244, |
|
"logits/rejected": -2.879845142364502, |
|
"logps/chosen": -1.293312430381775, |
|
"logps/rejected": -1.638897180557251, |
|
"loss": 51.9849, |
|
"nll_loss": 1.513171672821045, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06466563045978546, |
|
"rewards/margins": 0.017279230058193207, |
|
"rewards/rejected": -0.08194486051797867, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 70.5, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": 0.2721399664878845, |
|
"log_odds_ratio": -0.71299147605896, |
|
"logits/chosen": -2.927764892578125, |
|
"logits/rejected": -2.7637641429901123, |
|
"logps/chosen": -1.0732358694076538, |
|
"logps/rejected": -1.2794198989868164, |
|
"loss": 47.5455, |
|
"nll_loss": 1.3997136354446411, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.05366179347038269, |
|
"rewards/margins": 0.010309201665222645, |
|
"rewards/rejected": -0.06397099792957306, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 65.0, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": 0.2643585801124573, |
|
"log_odds_ratio": -0.6822870373725891, |
|
"logits/chosen": -2.615933418273926, |
|
"logits/rejected": -2.5095884799957275, |
|
"logps/chosen": -0.937173068523407, |
|
"logps/rejected": -1.1011604070663452, |
|
"loss": 47.0388, |
|
"nll_loss": 1.4694709777832031, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04685864970088005, |
|
"rewards/margins": 0.008199378848075867, |
|
"rewards/rejected": -0.05505802482366562, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 76.5, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": 0.18562307953834534, |
|
"log_odds_ratio": -0.7016376852989197, |
|
"logits/chosen": -2.505859851837158, |
|
"logits/rejected": -2.3995349407196045, |
|
"logps/chosen": -0.9065143465995789, |
|
"logps/rejected": -1.0429041385650635, |
|
"loss": 44.2092, |
|
"nll_loss": 1.3257687091827393, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04532571882009506, |
|
"rewards/margins": 0.006819483824074268, |
|
"rewards/rejected": -0.052145205438137054, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 42.0, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 0.2035745084285736, |
|
"log_odds_ratio": -0.7089617252349854, |
|
"logits/chosen": -2.5317232608795166, |
|
"logits/rejected": -2.4331934452056885, |
|
"logps/chosen": -0.9255577325820923, |
|
"logps/rejected": -1.0593881607055664, |
|
"loss": 41.9435, |
|
"nll_loss": 1.3194372653961182, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.046277888119220734, |
|
"rewards/margins": 0.0066915168426930904, |
|
"rewards/rejected": -0.05296940729022026, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 45.0, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.13239887356758118, |
|
"log_odds_ratio": -0.7149346470832825, |
|
"logits/chosen": -2.518152952194214, |
|
"logits/rejected": -2.181896686553955, |
|
"logps/chosen": -0.8738727569580078, |
|
"logps/rejected": -0.9619097709655762, |
|
"loss": 41.8499, |
|
"nll_loss": 1.2650072574615479, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04369363933801651, |
|
"rewards/margins": 0.0044018542394042015, |
|
"rewards/rejected": -0.04809548705816269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 57.75, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 0.05717567354440689, |
|
"log_odds_ratio": -0.7552961111068726, |
|
"logits/chosen": -2.357409954071045, |
|
"logits/rejected": -2.4911861419677734, |
|
"logps/chosen": -0.8994057774543762, |
|
"logps/rejected": -0.9373610615730286, |
|
"loss": 40.7348, |
|
"nll_loss": 1.2594066858291626, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04497029259800911, |
|
"rewards/margins": 0.0018977627623826265, |
|
"rewards/rejected": -0.04686804860830307, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 39.5, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.2211678922176361, |
|
"log_odds_ratio": -0.6827085018157959, |
|
"logits/chosen": -2.330791711807251, |
|
"logits/rejected": -2.138035297393799, |
|
"logps/chosen": -0.8551017642021179, |
|
"logps/rejected": -0.9983331561088562, |
|
"loss": 40.247, |
|
"nll_loss": 1.2133491039276123, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04275508597493172, |
|
"rewards/margins": 0.007161576300859451, |
|
"rewards/rejected": -0.04991666227579117, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 34.25, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.16872502863407135, |
|
"log_odds_ratio": -0.7335752248764038, |
|
"logits/chosen": -2.0423266887664795, |
|
"logits/rejected": -2.0761630535125732, |
|
"logps/chosen": -0.8704800605773926, |
|
"logps/rejected": -0.9801710247993469, |
|
"loss": 40.2401, |
|
"nll_loss": 1.2335753440856934, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04352400451898575, |
|
"rewards/margins": 0.005484549794346094, |
|
"rewards/rejected": -0.049008551985025406, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 42.75, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.15859460830688477, |
|
"log_odds_ratio": -0.725114643573761, |
|
"logits/chosen": -2.0112791061401367, |
|
"logits/rejected": -1.9848893880844116, |
|
"logps/chosen": -0.9168117642402649, |
|
"logps/rejected": -1.001848816871643, |
|
"loss": 42.4937, |
|
"nll_loss": 1.3201428651809692, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04584059864282608, |
|
"rewards/margins": 0.00425184890627861, |
|
"rewards/rejected": -0.050092440098524094, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 36.0, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.07739923894405365, |
|
"log_odds_ratio": -0.7347651124000549, |
|
"logits/chosen": -1.9431930780410767, |
|
"logits/rejected": -1.7705405950546265, |
|
"logps/chosen": -0.879494309425354, |
|
"logps/rejected": -0.9302487373352051, |
|
"loss": 39.5692, |
|
"nll_loss": 1.2233737707138062, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04397471994161606, |
|
"rewards/margins": 0.0025377131532877684, |
|
"rewards/rejected": -0.046512432396411896, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 34.0, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.2258971929550171, |
|
"log_odds_ratio": -0.6636900901794434, |
|
"logits/chosen": -2.2370095252990723, |
|
"logits/rejected": -1.8938239812850952, |
|
"logps/chosen": -0.8246580958366394, |
|
"logps/rejected": -0.947004497051239, |
|
"loss": 38.5665, |
|
"nll_loss": 1.1598410606384277, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04123290628194809, |
|
"rewards/margins": 0.006117324344813824, |
|
"rewards/rejected": -0.04735022783279419, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 33.75, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.15176931023597717, |
|
"log_odds_ratio": -0.7135123014450073, |
|
"logits/chosen": -2.0366923809051514, |
|
"logits/rejected": -1.9623115062713623, |
|
"logps/chosen": -0.8879337310791016, |
|
"logps/rejected": -0.9904235601425171, |
|
"loss": 38.569, |
|
"nll_loss": 1.1747570037841797, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04439668729901314, |
|
"rewards/margins": 0.005124491639435291, |
|
"rewards/rejected": -0.04952118173241615, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 32.75, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.20915071666240692, |
|
"log_odds_ratio": -0.6894658803939819, |
|
"logits/chosen": -2.418097972869873, |
|
"logits/rejected": -1.8635040521621704, |
|
"logps/chosen": -0.8480987548828125, |
|
"logps/rejected": -1.0004138946533203, |
|
"loss": 38.1133, |
|
"nll_loss": 1.167352557182312, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04240493103861809, |
|
"rewards/margins": 0.007615759968757629, |
|
"rewards/rejected": -0.050020694732666016, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 36.75, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.3536559045314789, |
|
"log_odds_ratio": -0.614356279373169, |
|
"logits/chosen": -2.0318870544433594, |
|
"logits/rejected": -2.1529486179351807, |
|
"logps/chosen": -0.7885429263114929, |
|
"logps/rejected": -1.0032585859298706, |
|
"loss": 38.2823, |
|
"nll_loss": 1.1829156875610352, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03942714259028435, |
|
"rewards/margins": 0.010735789313912392, |
|
"rewards/rejected": -0.05016293376684189, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 32.25, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 0.26812687516212463, |
|
"log_odds_ratio": -0.6916329264640808, |
|
"logits/chosen": -2.3767809867858887, |
|
"logits/rejected": -2.0225253105163574, |
|
"logps/chosen": -0.8772233128547668, |
|
"logps/rejected": -1.039623498916626, |
|
"loss": 37.914, |
|
"nll_loss": 1.1898010969161987, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04386116936802864, |
|
"rewards/margins": 0.008120008744299412, |
|
"rewards/rejected": -0.05198117345571518, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 27.5, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.24539685249328613, |
|
"log_odds_ratio": -0.6647487282752991, |
|
"logits/chosen": -2.4006893634796143, |
|
"logits/rejected": -1.6851059198379517, |
|
"logps/chosen": -0.8265100717544556, |
|
"logps/rejected": -0.9836961030960083, |
|
"loss": 37.7959, |
|
"nll_loss": 1.145559549331665, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04132550209760666, |
|
"rewards/margins": 0.007859298959374428, |
|
"rewards/rejected": -0.049184806644916534, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 34.0, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.2770017087459564, |
|
"log_odds_ratio": -0.6566962003707886, |
|
"logits/chosen": -2.3361473083496094, |
|
"logits/rejected": -1.969603180885315, |
|
"logps/chosen": -0.7993417978286743, |
|
"logps/rejected": -0.9568243026733398, |
|
"loss": 39.106, |
|
"nll_loss": 1.167353868484497, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.039967089891433716, |
|
"rewards/margins": 0.007874125614762306, |
|
"rewards/rejected": -0.04784121364355087, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 28.375, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.30786556005477905, |
|
"log_odds_ratio": -0.6334537267684937, |
|
"logits/chosen": -2.409719944000244, |
|
"logits/rejected": -1.9475492238998413, |
|
"logps/chosen": -0.7539029121398926, |
|
"logps/rejected": -0.9330542683601379, |
|
"loss": 38.1185, |
|
"nll_loss": 1.0971053838729858, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03769514709711075, |
|
"rewards/margins": 0.008957570418715477, |
|
"rewards/rejected": -0.046652715653181076, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 29.25, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.14969900250434875, |
|
"log_odds_ratio": -0.7243752479553223, |
|
"logits/chosen": -2.311769962310791, |
|
"logits/rejected": -1.863987922668457, |
|
"logps/chosen": -0.8708950877189636, |
|
"logps/rejected": -0.9668153524398804, |
|
"loss": 38.4239, |
|
"nll_loss": 1.1951215267181396, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04354475811123848, |
|
"rewards/margins": 0.004796011373400688, |
|
"rewards/rejected": -0.04834076762199402, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 28.375, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.256233274936676, |
|
"log_odds_ratio": -0.6507912278175354, |
|
"logits/chosen": -2.378213882446289, |
|
"logits/rejected": -1.8799747228622437, |
|
"logps/chosen": -0.8061249852180481, |
|
"logps/rejected": -0.981080174446106, |
|
"loss": 37.3571, |
|
"nll_loss": 1.124801754951477, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.040306247770786285, |
|
"rewards/margins": 0.008747758343815804, |
|
"rewards/rejected": -0.04905400425195694, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0429629629629629, |
|
"grad_norm": 27.375, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.625437319278717, |
|
"log_odds_ratio": -0.5325326919555664, |
|
"logits/chosen": -2.225511074066162, |
|
"logits/rejected": -1.7995342016220093, |
|
"logps/chosen": -0.6497036814689636, |
|
"logps/rejected": -0.9784590005874634, |
|
"loss": 31.8512, |
|
"nll_loss": 0.9471429586410522, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03248518705368042, |
|
"rewards/margins": 0.016437767073512077, |
|
"rewards/rejected": -0.04892294853925705, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0903703703703704, |
|
"grad_norm": 35.0, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.8257783055305481, |
|
"log_odds_ratio": -0.4480782449245453, |
|
"logits/chosen": -2.1574816703796387, |
|
"logits/rejected": -1.7314865589141846, |
|
"logps/chosen": -0.6095727682113647, |
|
"logps/rejected": -1.0454200506210327, |
|
"loss": 31.1204, |
|
"nll_loss": 0.9177495837211609, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.030478637665510178, |
|
"rewards/margins": 0.021792367100715637, |
|
"rewards/rejected": -0.052271001040935516, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 28.0, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.9428482055664062, |
|
"log_odds_ratio": -0.4201650619506836, |
|
"logits/chosen": -2.2846007347106934, |
|
"logits/rejected": -1.9496290683746338, |
|
"logps/chosen": -0.5810345411300659, |
|
"logps/rejected": -1.0667062997817993, |
|
"loss": 31.3146, |
|
"nll_loss": 0.9261938333511353, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.029051730409264565, |
|
"rewards/margins": 0.02428358420729637, |
|
"rewards/rejected": -0.05333530902862549, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 29.75, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.8759912252426147, |
|
"log_odds_ratio": -0.4607653021812439, |
|
"logits/chosen": -2.2224507331848145, |
|
"logits/rejected": -2.008059024810791, |
|
"logps/chosen": -0.626649022102356, |
|
"logps/rejected": -1.0588552951812744, |
|
"loss": 29.1443, |
|
"nll_loss": 0.8976675271987915, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.031332455575466156, |
|
"rewards/margins": 0.021610312163829803, |
|
"rewards/rejected": -0.05294276401400566, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2325925925925927, |
|
"grad_norm": 26.625, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.7104489803314209, |
|
"log_odds_ratio": -0.5004889369010925, |
|
"logits/chosen": -2.22855281829834, |
|
"logits/rejected": -1.9164679050445557, |
|
"logps/chosen": -0.6360154747962952, |
|
"logps/rejected": -0.9897836446762085, |
|
"loss": 30.3971, |
|
"nll_loss": 0.9533861875534058, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0318007729947567, |
|
"rewards/margins": 0.017688410356640816, |
|
"rewards/rejected": -0.049489181488752365, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 26.125, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 0.8610748052597046, |
|
"log_odds_ratio": -0.46825847029685974, |
|
"logits/chosen": -2.071824312210083, |
|
"logits/rejected": -1.9194958209991455, |
|
"logps/chosen": -0.5708586573600769, |
|
"logps/rejected": -0.9937461018562317, |
|
"loss": 30.3261, |
|
"nll_loss": 0.8964756727218628, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.028542935848236084, |
|
"rewards/margins": 0.02114437334239483, |
|
"rewards/rejected": -0.049687307327985764, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3274074074074074, |
|
"grad_norm": 25.625, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.7734408378601074, |
|
"log_odds_ratio": -0.4906557500362396, |
|
"logits/chosen": -2.035011053085327, |
|
"logits/rejected": -1.9261163473129272, |
|
"logps/chosen": -0.6563907861709595, |
|
"logps/rejected": -1.0614535808563232, |
|
"loss": 31.3792, |
|
"nll_loss": 0.9687965512275696, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03281954303383827, |
|
"rewards/margins": 0.020253140479326248, |
|
"rewards/rejected": -0.05307268351316452, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.374814814814815, |
|
"grad_norm": 28.125, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.7957239151000977, |
|
"log_odds_ratio": -0.464630126953125, |
|
"logits/chosen": -2.238290309906006, |
|
"logits/rejected": -1.8286195993423462, |
|
"logps/chosen": -0.656244158744812, |
|
"logps/rejected": -1.0872323513031006, |
|
"loss": 30.8355, |
|
"nll_loss": 0.9408265352249146, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0328122153878212, |
|
"rewards/margins": 0.0215494092553854, |
|
"rewards/rejected": -0.05436162278056145, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 31.875, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.7696617245674133, |
|
"log_odds_ratio": -0.5069034099578857, |
|
"logits/chosen": -2.127892255783081, |
|
"logits/rejected": -2.1146488189697266, |
|
"logps/chosen": -0.6157188415527344, |
|
"logps/rejected": -0.9757100939750671, |
|
"loss": 30.3204, |
|
"nll_loss": 0.9243167042732239, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.030785944312810898, |
|
"rewards/margins": 0.017999568954110146, |
|
"rewards/rejected": -0.048785511404275894, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4696296296296296, |
|
"grad_norm": 29.75, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.8135054707527161, |
|
"log_odds_ratio": -0.469794362783432, |
|
"logits/chosen": -2.3246893882751465, |
|
"logits/rejected": -1.855337381362915, |
|
"logps/chosen": -0.620639979839325, |
|
"logps/rejected": -1.0325305461883545, |
|
"loss": 30.8186, |
|
"nll_loss": 0.93718022108078, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.031032001599669456, |
|
"rewards/margins": 0.020594522356987, |
|
"rewards/rejected": -0.05162652209401131, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5170370370370372, |
|
"grad_norm": 29.625, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.7895947694778442, |
|
"log_odds_ratio": -0.4534526467323303, |
|
"logits/chosen": -2.1780362129211426, |
|
"logits/rejected": -2.215193271636963, |
|
"logps/chosen": -0.6615229845046997, |
|
"logps/rejected": -1.07595694065094, |
|
"loss": 31.4972, |
|
"nll_loss": 0.9787559509277344, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03307614475488663, |
|
"rewards/margins": 0.02072170190513134, |
|
"rewards/rejected": -0.053797848522663116, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 26.375, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.8594538569450378, |
|
"log_odds_ratio": -0.4534938335418701, |
|
"logits/chosen": -2.0608856678009033, |
|
"logits/rejected": -2.206718921661377, |
|
"logps/chosen": -0.63676917552948, |
|
"logps/rejected": -1.0896189212799072, |
|
"loss": 30.3848, |
|
"nll_loss": 0.9158208966255188, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.03183846175670624, |
|
"rewards/margins": 0.022642482072114944, |
|
"rewards/rejected": -0.05448094755411148, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6118518518518519, |
|
"grad_norm": 26.625, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.9427574276924133, |
|
"log_odds_ratio": -0.423784077167511, |
|
"logits/chosen": -2.239577054977417, |
|
"logits/rejected": -2.034268856048584, |
|
"logps/chosen": -0.6077014803886414, |
|
"logps/rejected": -1.1037745475769043, |
|
"loss": 30.1295, |
|
"nll_loss": 0.9275667071342468, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.030385076999664307, |
|
"rewards/margins": 0.024803655222058296, |
|
"rewards/rejected": -0.05518873408436775, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 30.5, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.8319600224494934, |
|
"log_odds_ratio": -0.4648515582084656, |
|
"logits/chosen": -2.0492475032806396, |
|
"logits/rejected": -1.7490644454956055, |
|
"logps/chosen": -0.6048796772956848, |
|
"logps/rejected": -1.0312615633010864, |
|
"loss": 29.7934, |
|
"nll_loss": 0.9257003664970398, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.03024398349225521, |
|
"rewards/margins": 0.021319100633263588, |
|
"rewards/rejected": -0.0515630766749382, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 32.0, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.726507842540741, |
|
"log_odds_ratio": -0.4928598999977112, |
|
"logits/chosen": -1.812819480895996, |
|
"logits/rejected": -1.7913591861724854, |
|
"logps/chosen": -0.6322233080863953, |
|
"logps/rejected": -0.9904964566230774, |
|
"loss": 30.9407, |
|
"nll_loss": 0.9636886715888977, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03161117061972618, |
|
"rewards/margins": 0.01791365072131157, |
|
"rewards/rejected": -0.04952482134103775, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7540740740740741, |
|
"grad_norm": 25.75, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.836793065071106, |
|
"log_odds_ratio": -0.4456283450126648, |
|
"logits/chosen": -2.196798324584961, |
|
"logits/rejected": -1.9926494359970093, |
|
"logps/chosen": -0.608914315700531, |
|
"logps/rejected": -1.0365701913833618, |
|
"loss": 30.3171, |
|
"nll_loss": 0.9089025259017944, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03044571913778782, |
|
"rewards/margins": 0.021382790058851242, |
|
"rewards/rejected": -0.05182851105928421, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8014814814814815, |
|
"grad_norm": 27.5, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.8888555765151978, |
|
"log_odds_ratio": -0.4468957781791687, |
|
"logits/chosen": -2.1875064373016357, |
|
"logits/rejected": -1.800842523574829, |
|
"logps/chosen": -0.6447620987892151, |
|
"logps/rejected": -1.0986078977584839, |
|
"loss": 31.1917, |
|
"nll_loss": 0.9418613314628601, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.032238103449344635, |
|
"rewards/margins": 0.022692296653985977, |
|
"rewards/rejected": -0.05493040010333061, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 36.5, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.908234715461731, |
|
"log_odds_ratio": -0.44621172547340393, |
|
"logits/chosen": -2.1104514598846436, |
|
"logits/rejected": -1.507428526878357, |
|
"logps/chosen": -0.6148265600204468, |
|
"logps/rejected": -1.0961024761199951, |
|
"loss": 30.0598, |
|
"nll_loss": 0.9062894582748413, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03074132837355137, |
|
"rewards/margins": 0.024063793942332268, |
|
"rewards/rejected": -0.054805122315883636, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 32.5, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.7305320501327515, |
|
"log_odds_ratio": -0.5053830146789551, |
|
"logits/chosen": -2.168057918548584, |
|
"logits/rejected": -1.917382836341858, |
|
"logps/chosen": -0.6424310207366943, |
|
"logps/rejected": -1.0104854106903076, |
|
"loss": 32.0888, |
|
"nll_loss": 0.9418985247612, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.03212154656648636, |
|
"rewards/margins": 0.018402721732854843, |
|
"rewards/rejected": -0.0505242720246315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9437037037037037, |
|
"grad_norm": 26.625, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.7646613717079163, |
|
"log_odds_ratio": -0.48494815826416016, |
|
"logits/chosen": -2.296674966812134, |
|
"logits/rejected": -1.611789345741272, |
|
"logps/chosen": -0.6354637145996094, |
|
"logps/rejected": -1.0375540256500244, |
|
"loss": 28.9033, |
|
"nll_loss": 0.8906237483024597, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.03177318722009659, |
|
"rewards/margins": 0.020104512572288513, |
|
"rewards/rejected": -0.0518776997923851, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 28.375, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.709034264087677, |
|
"log_odds_ratio": -0.4996446967124939, |
|
"logits/chosen": -2.0014171600341797, |
|
"logits/rejected": -2.168572425842285, |
|
"logps/chosen": -0.6113818287849426, |
|
"logps/rejected": -0.9349877238273621, |
|
"loss": 28.8765, |
|
"nll_loss": 0.8932281732559204, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.030569087713956833, |
|
"rewards/margins": 0.01618029922246933, |
|
"rewards/rejected": -0.04674938693642616, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0385185185185186, |
|
"grad_norm": 24.5, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 1.2737390995025635, |
|
"log_odds_ratio": -0.3561268448829651, |
|
"logits/chosen": -2.001032590866089, |
|
"logits/rejected": -1.6088378429412842, |
|
"logps/chosen": -0.5276457071304321, |
|
"logps/rejected": -1.1020501852035522, |
|
"loss": 26.3212, |
|
"nll_loss": 0.7937939167022705, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.026382286101579666, |
|
"rewards/margins": 0.028720220550894737, |
|
"rewards/rejected": -0.055102504789829254, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.0859259259259257, |
|
"grad_norm": 40.0, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 1.5025131702423096, |
|
"log_odds_ratio": -0.32322412729263306, |
|
"logits/chosen": -1.936810851097107, |
|
"logits/rejected": -1.9919002056121826, |
|
"logps/chosen": -0.47912636399269104, |
|
"logps/rejected": -1.1569387912750244, |
|
"loss": 24.7362, |
|
"nll_loss": 0.776683509349823, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.02395631931722164, |
|
"rewards/margins": 0.03389061614871025, |
|
"rewards/rejected": -0.05784693360328674, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 30.5, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 1.4707694053649902, |
|
"log_odds_ratio": -0.3371773660182953, |
|
"logits/chosen": -1.941663146018982, |
|
"logits/rejected": -1.7569644451141357, |
|
"logps/chosen": -0.476045697927475, |
|
"logps/rejected": -1.1464457511901855, |
|
"loss": 25.7473, |
|
"nll_loss": 0.7753463983535767, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.023802287876605988, |
|
"rewards/margins": 0.03352000191807747, |
|
"rewards/rejected": -0.05732228606939316, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.180740740740741, |
|
"grad_norm": 26.625, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 1.630690574645996, |
|
"log_odds_ratio": -0.28257861733436584, |
|
"logits/chosen": -2.0433297157287598, |
|
"logits/rejected": -1.7494617700576782, |
|
"logps/chosen": -0.45956555008888245, |
|
"logps/rejected": -1.192975640296936, |
|
"loss": 24.3207, |
|
"nll_loss": 0.7468871474266052, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.022978277876973152, |
|
"rewards/margins": 0.0366705060005188, |
|
"rewards/rejected": -0.0596487820148468, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.228148148148148, |
|
"grad_norm": 34.5, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 1.3293721675872803, |
|
"log_odds_ratio": -0.3706313967704773, |
|
"logits/chosen": -2.0370144844055176, |
|
"logits/rejected": -2.092005968093872, |
|
"logps/chosen": -0.5131632089614868, |
|
"logps/rejected": -1.0899993181228638, |
|
"loss": 24.796, |
|
"nll_loss": 0.7735158205032349, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.02565816417336464, |
|
"rewards/margins": 0.02884179912507534, |
|
"rewards/rejected": -0.05449996143579483, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 33.75, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 1.5746119022369385, |
|
"log_odds_ratio": -0.30449697375297546, |
|
"logits/chosen": -2.0129730701446533, |
|
"logits/rejected": -1.8451099395751953, |
|
"logps/chosen": -0.46393972635269165, |
|
"logps/rejected": -1.1728386878967285, |
|
"loss": 24.7856, |
|
"nll_loss": 0.7696127891540527, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.023196987807750702, |
|
"rewards/margins": 0.0354449488222599, |
|
"rewards/rejected": -0.058641932904720306, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.322962962962963, |
|
"grad_norm": 26.875, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.5067981481552124, |
|
"log_odds_ratio": -0.3081058859825134, |
|
"logits/chosen": -2.2492246627807617, |
|
"logits/rejected": -1.7415387630462646, |
|
"logps/chosen": -0.47982436418533325, |
|
"logps/rejected": -1.1375417709350586, |
|
"loss": 24.6482, |
|
"nll_loss": 0.7710675001144409, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.02399122156202793, |
|
"rewards/margins": 0.032885871827602386, |
|
"rewards/rejected": -0.05687708780169487, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 29.25, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 1.550789713859558, |
|
"log_odds_ratio": -0.28812703490257263, |
|
"logits/chosen": -1.9938061237335205, |
|
"logits/rejected": -1.957233190536499, |
|
"logps/chosen": -0.46859461069107056, |
|
"logps/rejected": -1.1675255298614502, |
|
"loss": 24.0579, |
|
"nll_loss": 0.72864830493927, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.023429730907082558, |
|
"rewards/margins": 0.034946538507938385, |
|
"rewards/rejected": -0.05837627500295639, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 27.625, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 1.6658976078033447, |
|
"log_odds_ratio": -0.29064321517944336, |
|
"logits/chosen": -2.095778226852417, |
|
"logits/rejected": -1.5485340356826782, |
|
"logps/chosen": -0.4274715483188629, |
|
"logps/rejected": -1.1617481708526611, |
|
"loss": 23.8564, |
|
"nll_loss": 0.724585771560669, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.021373575553297997, |
|
"rewards/margins": 0.03671382740139961, |
|
"rewards/rejected": -0.05808740109205246, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4651851851851854, |
|
"grad_norm": 30.375, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 1.706284761428833, |
|
"log_odds_ratio": -0.28669941425323486, |
|
"logits/chosen": -2.0861926078796387, |
|
"logits/rejected": -1.9107942581176758, |
|
"logps/chosen": -0.4499754011631012, |
|
"logps/rejected": -1.1831719875335693, |
|
"loss": 23.9298, |
|
"nll_loss": 0.7361353039741516, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.02249876968562603, |
|
"rewards/margins": 0.03665982931852341, |
|
"rewards/rejected": -0.059158600866794586, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5125925925925925, |
|
"grad_norm": 32.25, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 1.5768635272979736, |
|
"log_odds_ratio": -0.33476293087005615, |
|
"logits/chosen": -1.945469617843628, |
|
"logits/rejected": -1.9802621603012085, |
|
"logps/chosen": -0.47499436140060425, |
|
"logps/rejected": -1.1629369258880615, |
|
"loss": 23.8576, |
|
"nll_loss": 0.7378355264663696, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.023749716579914093, |
|
"rewards/margins": 0.034397125244140625, |
|
"rewards/rejected": -0.05814684182405472, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 30.375, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 1.5624961853027344, |
|
"log_odds_ratio": -0.29299020767211914, |
|
"logits/chosen": -2.2144691944122314, |
|
"logits/rejected": -2.014341115951538, |
|
"logps/chosen": -0.4924210011959076, |
|
"logps/rejected": -1.1973953247070312, |
|
"loss": 24.864, |
|
"nll_loss": 0.7814281582832336, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.02462105080485344, |
|
"rewards/margins": 0.03524871915578842, |
|
"rewards/rejected": -0.05986977368593216, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 30.875, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 1.5759508609771729, |
|
"log_odds_ratio": -0.31793758273124695, |
|
"logits/chosen": -2.1429502964019775, |
|
"logits/rejected": -1.46464204788208, |
|
"logps/chosen": -0.48924770951271057, |
|
"logps/rejected": -1.1759014129638672, |
|
"loss": 24.7519, |
|
"nll_loss": 0.7631200551986694, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.024462386965751648, |
|
"rewards/margins": 0.03433268517255783, |
|
"rewards/rejected": -0.05879507586359978, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.6548148148148147, |
|
"grad_norm": 36.25, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 1.4352095127105713, |
|
"log_odds_ratio": -0.31683534383773804, |
|
"logits/chosen": -1.945744276046753, |
|
"logits/rejected": -2.0830960273742676, |
|
"logps/chosen": -0.4646902084350586, |
|
"logps/rejected": -1.098145842552185, |
|
"loss": 23.7151, |
|
"nll_loss": 0.7180293798446655, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02323450893163681, |
|
"rewards/margins": 0.03167278692126274, |
|
"rewards/rejected": -0.05490729957818985, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 31.125, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 1.4293460845947266, |
|
"log_odds_ratio": -0.329708069562912, |
|
"logits/chosen": -2.1722819805145264, |
|
"logits/rejected": -1.6502447128295898, |
|
"logps/chosen": -0.4705706536769867, |
|
"logps/rejected": -1.1116634607315063, |
|
"loss": 24.7964, |
|
"nll_loss": 0.745871901512146, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.023528533056378365, |
|
"rewards/margins": 0.03205464407801628, |
|
"rewards/rejected": -0.055583178997039795, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.74962962962963, |
|
"grad_norm": 31.625, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 1.6304314136505127, |
|
"log_odds_ratio": -0.2968464195728302, |
|
"logits/chosen": -1.9601917266845703, |
|
"logits/rejected": -2.0320401191711426, |
|
"logps/chosen": -0.4668458104133606, |
|
"logps/rejected": -1.1929422616958618, |
|
"loss": 24.5905, |
|
"nll_loss": 0.7679024934768677, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.02334229089319706, |
|
"rewards/margins": 0.03630482777953148, |
|
"rewards/rejected": -0.05964711308479309, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.797037037037037, |
|
"grad_norm": 32.75, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 1.5559337139129639, |
|
"log_odds_ratio": -0.3252851665019989, |
|
"logits/chosen": -1.9125938415527344, |
|
"logits/rejected": -1.8471267223358154, |
|
"logps/chosen": -0.4792943000793457, |
|
"logps/rejected": -1.1848201751708984, |
|
"loss": 24.4441, |
|
"nll_loss": 0.756227970123291, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.023964714258909225, |
|
"rewards/margins": 0.03527629375457764, |
|
"rewards/rejected": -0.05924100801348686, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 28.875, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 1.3979889154434204, |
|
"log_odds_ratio": -0.33928608894348145, |
|
"logits/chosen": -2.341634511947632, |
|
"logits/rejected": -1.843711495399475, |
|
"logps/chosen": -0.5055073499679565, |
|
"logps/rejected": -1.187756896018982, |
|
"loss": 24.503, |
|
"nll_loss": 0.7757240533828735, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.025275370106101036, |
|
"rewards/margins": 0.03411247208714485, |
|
"rewards/rejected": -0.05938784033060074, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.891851851851852, |
|
"grad_norm": 29.5, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 1.506742238998413, |
|
"log_odds_ratio": -0.3147231638431549, |
|
"logits/chosen": -2.2259716987609863, |
|
"logits/rejected": -1.8140947818756104, |
|
"logps/chosen": -0.5070487260818481, |
|
"logps/rejected": -1.1675385236740112, |
|
"loss": 23.4515, |
|
"nll_loss": 0.720944344997406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.025352437049150467, |
|
"rewards/margins": 0.033024489879608154, |
|
"rewards/rejected": -0.05837692692875862, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9392592592592592, |
|
"grad_norm": 31.75, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 1.6132290363311768, |
|
"log_odds_ratio": -0.2838875949382782, |
|
"logits/chosen": -2.0488831996917725, |
|
"logits/rejected": -1.5571346282958984, |
|
"logps/chosen": -0.4689159393310547, |
|
"logps/rejected": -1.1605467796325684, |
|
"loss": 24.3086, |
|
"nll_loss": 0.7292035818099976, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.023445798084139824, |
|
"rewards/margins": 0.034581538289785385, |
|
"rewards/rejected": -0.05802733823657036, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 35.25, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.6901721954345703, |
|
"log_odds_ratio": -0.2843396067619324, |
|
"logits/chosen": -2.0129687786102295, |
|
"logits/rejected": -1.6522471904754639, |
|
"logps/chosen": -0.4355766177177429, |
|
"logps/rejected": -1.1733875274658203, |
|
"loss": 23.7902, |
|
"nll_loss": 0.744986891746521, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.021778833121061325, |
|
"rewards/margins": 0.03689054772257805, |
|
"rewards/rejected": -0.058669377118349075, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 32.00103834848555, |
|
"train_runtime": 7481.6163, |
|
"train_samples_per_second": 2.707, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|