mistral-7b-orpo-noisy-v2 / trainer_state.json
silviasapora's picture
Model save
e8a9384 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986666666666667,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 316.0,
"learning_rate": 7.8125e-06,
"log_odds_chosen": 0.4211854934692383,
"log_odds_ratio": -0.7698944807052612,
"logits/chosen": -2.970022678375244,
"logits/rejected": -2.879845142364502,
"logps/chosen": -1.293312430381775,
"logps/rejected": -1.638897180557251,
"loss": 51.9849,
"nll_loss": 1.513171672821045,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.06466563045978546,
"rewards/margins": 0.017279230058193207,
"rewards/rejected": -0.08194486051797867,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 70.5,
"learning_rate": 1.5625e-05,
"log_odds_chosen": 0.2721399664878845,
"log_odds_ratio": -0.71299147605896,
"logits/chosen": -2.927764892578125,
"logits/rejected": -2.7637641429901123,
"logps/chosen": -1.0732358694076538,
"logps/rejected": -1.2794198989868164,
"loss": 47.5455,
"nll_loss": 1.3997136354446411,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.05366179347038269,
"rewards/margins": 0.010309201665222645,
"rewards/rejected": -0.06397099792957306,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 65.0,
"learning_rate": 2.34375e-05,
"log_odds_chosen": 0.2643585801124573,
"log_odds_ratio": -0.6822870373725891,
"logits/chosen": -2.615933418273926,
"logits/rejected": -2.5095884799957275,
"logps/chosen": -0.937173068523407,
"logps/rejected": -1.1011604070663452,
"loss": 47.0388,
"nll_loss": 1.4694709777832031,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04685864970088005,
"rewards/margins": 0.008199378848075867,
"rewards/rejected": -0.05505802482366562,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 76.5,
"learning_rate": 3.125e-05,
"log_odds_chosen": 0.18562307953834534,
"log_odds_ratio": -0.7016376852989197,
"logits/chosen": -2.505859851837158,
"logits/rejected": -2.3995349407196045,
"logps/chosen": -0.9065143465995789,
"logps/rejected": -1.0429041385650635,
"loss": 44.2092,
"nll_loss": 1.3257687091827393,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.04532571882009506,
"rewards/margins": 0.006819483824074268,
"rewards/rejected": -0.052145205438137054,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 42.0,
"learning_rate": 3.90625e-05,
"log_odds_chosen": 0.2035745084285736,
"log_odds_ratio": -0.7089617252349854,
"logits/chosen": -2.5317232608795166,
"logits/rejected": -2.4331934452056885,
"logps/chosen": -0.9255577325820923,
"logps/rejected": -1.0593881607055664,
"loss": 41.9435,
"nll_loss": 1.3194372653961182,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.046277888119220734,
"rewards/margins": 0.0066915168426930904,
"rewards/rejected": -0.05296940729022026,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 45.0,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 0.13239887356758118,
"log_odds_ratio": -0.7149346470832825,
"logits/chosen": -2.518152952194214,
"logits/rejected": -2.181896686553955,
"logps/chosen": -0.8738727569580078,
"logps/rejected": -0.9619097709655762,
"loss": 41.8499,
"nll_loss": 1.2650072574615479,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.04369363933801651,
"rewards/margins": 0.0044018542394042015,
"rewards/rejected": -0.04809548705816269,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 57.75,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.05717567354440689,
"log_odds_ratio": -0.7552961111068726,
"logits/chosen": -2.357409954071045,
"logits/rejected": -2.4911861419677734,
"logps/chosen": -0.8994057774543762,
"logps/rejected": -0.9373610615730286,
"loss": 40.7348,
"nll_loss": 1.2594066858291626,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.04497029259800911,
"rewards/margins": 0.0018977627623826265,
"rewards/rejected": -0.04686804860830307,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 39.5,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.2211678922176361,
"log_odds_ratio": -0.6827085018157959,
"logits/chosen": -2.330791711807251,
"logits/rejected": -2.138035297393799,
"logps/chosen": -0.8551017642021179,
"logps/rejected": -0.9983331561088562,
"loss": 40.247,
"nll_loss": 1.2133491039276123,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04275508597493172,
"rewards/margins": 0.007161576300859451,
"rewards/rejected": -0.04991666227579117,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 34.25,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.16872502863407135,
"log_odds_ratio": -0.7335752248764038,
"logits/chosen": -2.0423266887664795,
"logits/rejected": -2.0761630535125732,
"logps/chosen": -0.8704800605773926,
"logps/rejected": -0.9801710247993469,
"loss": 40.2401,
"nll_loss": 1.2335753440856934,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04352400451898575,
"rewards/margins": 0.005484549794346094,
"rewards/rejected": -0.049008551985025406,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 42.75,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.15859460830688477,
"log_odds_ratio": -0.725114643573761,
"logits/chosen": -2.0112791061401367,
"logits/rejected": -1.9848893880844116,
"logps/chosen": -0.9168117642402649,
"logps/rejected": -1.001848816871643,
"loss": 42.4937,
"nll_loss": 1.3201428651809692,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04584059864282608,
"rewards/margins": 0.00425184890627861,
"rewards/rejected": -0.050092440098524094,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 36.0,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 0.07739923894405365,
"log_odds_ratio": -0.7347651124000549,
"logits/chosen": -1.9431930780410767,
"logits/rejected": -1.7705405950546265,
"logps/chosen": -0.879494309425354,
"logps/rejected": -0.9302487373352051,
"loss": 39.5692,
"nll_loss": 1.2233737707138062,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04397471994161606,
"rewards/margins": 0.0025377131532877684,
"rewards/rejected": -0.046512432396411896,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 34.0,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 0.2258971929550171,
"log_odds_ratio": -0.6636900901794434,
"logits/chosen": -2.2370095252990723,
"logits/rejected": -1.8938239812850952,
"logps/chosen": -0.8246580958366394,
"logps/rejected": -0.947004497051239,
"loss": 38.5665,
"nll_loss": 1.1598410606384277,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.04123290628194809,
"rewards/margins": 0.006117324344813824,
"rewards/rejected": -0.04735022783279419,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 33.75,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 0.15176931023597717,
"log_odds_ratio": -0.7135123014450073,
"logits/chosen": -2.0366923809051514,
"logits/rejected": -1.9623115062713623,
"logps/chosen": -0.8879337310791016,
"logps/rejected": -0.9904235601425171,
"loss": 38.569,
"nll_loss": 1.1747570037841797,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04439668729901314,
"rewards/margins": 0.005124491639435291,
"rewards/rejected": -0.04952118173241615,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 32.75,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 0.20915071666240692,
"log_odds_ratio": -0.6894658803939819,
"logits/chosen": -2.418097972869873,
"logits/rejected": -1.8635040521621704,
"logps/chosen": -0.8480987548828125,
"logps/rejected": -1.0004138946533203,
"loss": 38.1133,
"nll_loss": 1.167352557182312,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.04240493103861809,
"rewards/margins": 0.007615759968757629,
"rewards/rejected": -0.050020694732666016,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 36.75,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 0.3536559045314789,
"log_odds_ratio": -0.614356279373169,
"logits/chosen": -2.0318870544433594,
"logits/rejected": -2.1529486179351807,
"logps/chosen": -0.7885429263114929,
"logps/rejected": -1.0032585859298706,
"loss": 38.2823,
"nll_loss": 1.1829156875610352,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.03942714259028435,
"rewards/margins": 0.010735789313912392,
"rewards/rejected": -0.05016293376684189,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 32.25,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 0.26812687516212463,
"log_odds_ratio": -0.6916329264640808,
"logits/chosen": -2.3767809867858887,
"logits/rejected": -2.0225253105163574,
"logps/chosen": -0.8772233128547668,
"logps/rejected": -1.039623498916626,
"loss": 37.914,
"nll_loss": 1.1898010969161987,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.04386116936802864,
"rewards/margins": 0.008120008744299412,
"rewards/rejected": -0.05198117345571518,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 27.5,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 0.24539685249328613,
"log_odds_ratio": -0.6647487282752991,
"logits/chosen": -2.4006893634796143,
"logits/rejected": -1.6851059198379517,
"logps/chosen": -0.8265100717544556,
"logps/rejected": -0.9836961030960083,
"loss": 37.7959,
"nll_loss": 1.145559549331665,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.04132550209760666,
"rewards/margins": 0.007859298959374428,
"rewards/rejected": -0.049184806644916534,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 34.0,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 0.2770017087459564,
"log_odds_ratio": -0.6566962003707886,
"logits/chosen": -2.3361473083496094,
"logits/rejected": -1.969603180885315,
"logps/chosen": -0.7993417978286743,
"logps/rejected": -0.9568243026733398,
"loss": 39.106,
"nll_loss": 1.167353868484497,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.039967089891433716,
"rewards/margins": 0.007874125614762306,
"rewards/rejected": -0.04784121364355087,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 28.375,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 0.30786556005477905,
"log_odds_ratio": -0.6334537267684937,
"logits/chosen": -2.409719944000244,
"logits/rejected": -1.9475492238998413,
"logps/chosen": -0.7539029121398926,
"logps/rejected": -0.9330542683601379,
"loss": 38.1185,
"nll_loss": 1.0971053838729858,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.03769514709711075,
"rewards/margins": 0.008957570418715477,
"rewards/rejected": -0.046652715653181076,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 29.25,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 0.14969900250434875,
"log_odds_ratio": -0.7243752479553223,
"logits/chosen": -2.311769962310791,
"logits/rejected": -1.863987922668457,
"logps/chosen": -0.8708950877189636,
"logps/rejected": -0.9668153524398804,
"loss": 38.4239,
"nll_loss": 1.1951215267181396,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.04354475811123848,
"rewards/margins": 0.004796011373400688,
"rewards/rejected": -0.04834076762199402,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 28.375,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 0.256233274936676,
"log_odds_ratio": -0.6507912278175354,
"logits/chosen": -2.378213882446289,
"logits/rejected": -1.8799747228622437,
"logps/chosen": -0.8061249852180481,
"logps/rejected": -0.981080174446106,
"loss": 37.3571,
"nll_loss": 1.124801754951477,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.040306247770786285,
"rewards/margins": 0.008747758343815804,
"rewards/rejected": -0.04905400425195694,
"step": 105
},
{
"epoch": 1.0429629629629629,
"grad_norm": 27.375,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 0.625437319278717,
"log_odds_ratio": -0.5325326919555664,
"logits/chosen": -2.225511074066162,
"logits/rejected": -1.7995342016220093,
"logps/chosen": -0.6497036814689636,
"logps/rejected": -0.9784590005874634,
"loss": 31.8512,
"nll_loss": 0.9471429586410522,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.03248518705368042,
"rewards/margins": 0.016437767073512077,
"rewards/rejected": -0.04892294853925705,
"step": 110
},
{
"epoch": 1.0903703703703704,
"grad_norm": 35.0,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 0.8257783055305481,
"log_odds_ratio": -0.4480782449245453,
"logits/chosen": -2.1574816703796387,
"logits/rejected": -1.7314865589141846,
"logps/chosen": -0.6095727682113647,
"logps/rejected": -1.0454200506210327,
"loss": 31.1204,
"nll_loss": 0.9177495837211609,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.030478637665510178,
"rewards/margins": 0.021792367100715637,
"rewards/rejected": -0.052271001040935516,
"step": 115
},
{
"epoch": 1.1377777777777778,
"grad_norm": 28.0,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 0.9428482055664062,
"log_odds_ratio": -0.4201650619506836,
"logits/chosen": -2.2846007347106934,
"logits/rejected": -1.9496290683746338,
"logps/chosen": -0.5810345411300659,
"logps/rejected": -1.0667062997817993,
"loss": 31.3146,
"nll_loss": 0.9261938333511353,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.029051730409264565,
"rewards/margins": 0.02428358420729637,
"rewards/rejected": -0.05333530902862549,
"step": 120
},
{
"epoch": 1.1851851851851851,
"grad_norm": 29.75,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 0.8759912252426147,
"log_odds_ratio": -0.4607653021812439,
"logits/chosen": -2.2224507331848145,
"logits/rejected": -2.008059024810791,
"logps/chosen": -0.626649022102356,
"logps/rejected": -1.0588552951812744,
"loss": 29.1443,
"nll_loss": 0.8976675271987915,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.031332455575466156,
"rewards/margins": 0.021610312163829803,
"rewards/rejected": -0.05294276401400566,
"step": 125
},
{
"epoch": 1.2325925925925927,
"grad_norm": 26.625,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 0.7104489803314209,
"log_odds_ratio": -0.5004889369010925,
"logits/chosen": -2.22855281829834,
"logits/rejected": -1.9164679050445557,
"logps/chosen": -0.6360154747962952,
"logps/rejected": -0.9897836446762085,
"loss": 30.3971,
"nll_loss": 0.9533861875534058,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.0318007729947567,
"rewards/margins": 0.017688410356640816,
"rewards/rejected": -0.049489181488752365,
"step": 130
},
{
"epoch": 1.28,
"grad_norm": 26.125,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 0.8610748052597046,
"log_odds_ratio": -0.46825847029685974,
"logits/chosen": -2.071824312210083,
"logits/rejected": -1.9194958209991455,
"logps/chosen": -0.5708586573600769,
"logps/rejected": -0.9937461018562317,
"loss": 30.3261,
"nll_loss": 0.8964756727218628,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.028542935848236084,
"rewards/margins": 0.02114437334239483,
"rewards/rejected": -0.049687307327985764,
"step": 135
},
{
"epoch": 1.3274074074074074,
"grad_norm": 25.625,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 0.7734408378601074,
"log_odds_ratio": -0.4906557500362396,
"logits/chosen": -2.035011053085327,
"logits/rejected": -1.9261163473129272,
"logps/chosen": -0.6563907861709595,
"logps/rejected": -1.0614535808563232,
"loss": 31.3792,
"nll_loss": 0.9687965512275696,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03281954303383827,
"rewards/margins": 0.020253140479326248,
"rewards/rejected": -0.05307268351316452,
"step": 140
},
{
"epoch": 1.374814814814815,
"grad_norm": 28.125,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 0.7957239151000977,
"log_odds_ratio": -0.464630126953125,
"logits/chosen": -2.238290309906006,
"logits/rejected": -1.8286195993423462,
"logps/chosen": -0.656244158744812,
"logps/rejected": -1.0872323513031006,
"loss": 30.8355,
"nll_loss": 0.9408265352249146,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.0328122153878212,
"rewards/margins": 0.0215494092553854,
"rewards/rejected": -0.05436162278056145,
"step": 145
},
{
"epoch": 1.4222222222222223,
"grad_norm": 31.875,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 0.7696617245674133,
"log_odds_ratio": -0.5069034099578857,
"logits/chosen": -2.127892255783081,
"logits/rejected": -2.1146488189697266,
"logps/chosen": -0.6157188415527344,
"logps/rejected": -0.9757100939750671,
"loss": 30.3204,
"nll_loss": 0.9243167042732239,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.030785944312810898,
"rewards/margins": 0.017999568954110146,
"rewards/rejected": -0.048785511404275894,
"step": 150
},
{
"epoch": 1.4696296296296296,
"grad_norm": 29.75,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 0.8135054707527161,
"log_odds_ratio": -0.469794362783432,
"logits/chosen": -2.3246893882751465,
"logits/rejected": -1.855337381362915,
"logps/chosen": -0.620639979839325,
"logps/rejected": -1.0325305461883545,
"loss": 30.8186,
"nll_loss": 0.93718022108078,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.031032001599669456,
"rewards/margins": 0.020594522356987,
"rewards/rejected": -0.05162652209401131,
"step": 155
},
{
"epoch": 1.5170370370370372,
"grad_norm": 29.625,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 0.7895947694778442,
"log_odds_ratio": -0.4534526467323303,
"logits/chosen": -2.1780362129211426,
"logits/rejected": -2.215193271636963,
"logps/chosen": -0.6615229845046997,
"logps/rejected": -1.07595694065094,
"loss": 31.4972,
"nll_loss": 0.9787559509277344,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03307614475488663,
"rewards/margins": 0.02072170190513134,
"rewards/rejected": -0.053797848522663116,
"step": 160
},
{
"epoch": 1.5644444444444443,
"grad_norm": 26.375,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 0.8594538569450378,
"log_odds_ratio": -0.4534938335418701,
"logits/chosen": -2.0608856678009033,
"logits/rejected": -2.206718921661377,
"logps/chosen": -0.63676917552948,
"logps/rejected": -1.0896189212799072,
"loss": 30.3848,
"nll_loss": 0.9158208966255188,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.03183846175670624,
"rewards/margins": 0.022642482072114944,
"rewards/rejected": -0.05448094755411148,
"step": 165
},
{
"epoch": 1.6118518518518519,
"grad_norm": 26.625,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 0.9427574276924133,
"log_odds_ratio": -0.423784077167511,
"logits/chosen": -2.239577054977417,
"logits/rejected": -2.034268856048584,
"logps/chosen": -0.6077014803886414,
"logps/rejected": -1.1037745475769043,
"loss": 30.1295,
"nll_loss": 0.9275667071342468,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.030385076999664307,
"rewards/margins": 0.024803655222058296,
"rewards/rejected": -0.05518873408436775,
"step": 170
},
{
"epoch": 1.6592592592592592,
"grad_norm": 30.5,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 0.8319600224494934,
"log_odds_ratio": -0.4648515582084656,
"logits/chosen": -2.0492475032806396,
"logits/rejected": -1.7490644454956055,
"logps/chosen": -0.6048796772956848,
"logps/rejected": -1.0312615633010864,
"loss": 29.7934,
"nll_loss": 0.9257003664970398,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.03024398349225521,
"rewards/margins": 0.021319100633263588,
"rewards/rejected": -0.0515630766749382,
"step": 175
},
{
"epoch": 1.7066666666666666,
"grad_norm": 32.0,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 0.726507842540741,
"log_odds_ratio": -0.4928598999977112,
"logits/chosen": -1.812819480895996,
"logits/rejected": -1.7913591861724854,
"logps/chosen": -0.6322233080863953,
"logps/rejected": -0.9904964566230774,
"loss": 30.9407,
"nll_loss": 0.9636886715888977,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.03161117061972618,
"rewards/margins": 0.01791365072131157,
"rewards/rejected": -0.04952482134103775,
"step": 180
},
{
"epoch": 1.7540740740740741,
"grad_norm": 25.75,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 0.836793065071106,
"log_odds_ratio": -0.4456283450126648,
"logits/chosen": -2.196798324584961,
"logits/rejected": -1.9926494359970093,
"logps/chosen": -0.608914315700531,
"logps/rejected": -1.0365701913833618,
"loss": 30.3171,
"nll_loss": 0.9089025259017944,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.03044571913778782,
"rewards/margins": 0.021382790058851242,
"rewards/rejected": -0.05182851105928421,
"step": 185
},
{
"epoch": 1.8014814814814815,
"grad_norm": 27.5,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 0.8888555765151978,
"log_odds_ratio": -0.4468957781791687,
"logits/chosen": -2.1875064373016357,
"logits/rejected": -1.800842523574829,
"logps/chosen": -0.6447620987892151,
"logps/rejected": -1.0986078977584839,
"loss": 31.1917,
"nll_loss": 0.9418613314628601,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.032238103449344635,
"rewards/margins": 0.022692296653985977,
"rewards/rejected": -0.05493040010333061,
"step": 190
},
{
"epoch": 1.8488888888888888,
"grad_norm": 36.5,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 0.908234715461731,
"log_odds_ratio": -0.44621172547340393,
"logits/chosen": -2.1104514598846436,
"logits/rejected": -1.507428526878357,
"logps/chosen": -0.6148265600204468,
"logps/rejected": -1.0961024761199951,
"loss": 30.0598,
"nll_loss": 0.9062894582748413,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.03074132837355137,
"rewards/margins": 0.024063793942332268,
"rewards/rejected": -0.054805122315883636,
"step": 195
},
{
"epoch": 1.8962962962962964,
"grad_norm": 32.5,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 0.7305320501327515,
"log_odds_ratio": -0.5053830146789551,
"logits/chosen": -2.168057918548584,
"logits/rejected": -1.917382836341858,
"logps/chosen": -0.6424310207366943,
"logps/rejected": -1.0104854106903076,
"loss": 32.0888,
"nll_loss": 0.9418985247612,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.03212154656648636,
"rewards/margins": 0.018402721732854843,
"rewards/rejected": -0.0505242720246315,
"step": 200
},
{
"epoch": 1.9437037037037037,
"grad_norm": 26.625,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 0.7646613717079163,
"log_odds_ratio": -0.48494815826416016,
"logits/chosen": -2.296674966812134,
"logits/rejected": -1.611789345741272,
"logps/chosen": -0.6354637145996094,
"logps/rejected": -1.0375540256500244,
"loss": 28.9033,
"nll_loss": 0.8906237483024597,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.03177318722009659,
"rewards/margins": 0.020104512572288513,
"rewards/rejected": -0.0518776997923851,
"step": 205
},
{
"epoch": 1.991111111111111,
"grad_norm": 28.375,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 0.709034264087677,
"log_odds_ratio": -0.4996446967124939,
"logits/chosen": -2.0014171600341797,
"logits/rejected": -2.168572425842285,
"logps/chosen": -0.6113818287849426,
"logps/rejected": -0.9349877238273621,
"loss": 28.8765,
"nll_loss": 0.8932281732559204,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.030569087713956833,
"rewards/margins": 0.01618029922246933,
"rewards/rejected": -0.04674938693642616,
"step": 210
},
{
"epoch": 2.0385185185185186,
"grad_norm": 24.5,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 1.2737390995025635,
"log_odds_ratio": -0.3561268448829651,
"logits/chosen": -2.001032590866089,
"logits/rejected": -1.6088378429412842,
"logps/chosen": -0.5276457071304321,
"logps/rejected": -1.1020501852035522,
"loss": 26.3212,
"nll_loss": 0.7937939167022705,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.026382286101579666,
"rewards/margins": 0.028720220550894737,
"rewards/rejected": -0.055102504789829254,
"step": 215
},
{
"epoch": 2.0859259259259257,
"grad_norm": 40.0,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 1.5025131702423096,
"log_odds_ratio": -0.32322412729263306,
"logits/chosen": -1.936810851097107,
"logits/rejected": -1.9919002056121826,
"logps/chosen": -0.47912636399269104,
"logps/rejected": -1.1569387912750244,
"loss": 24.7362,
"nll_loss": 0.776683509349823,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.02395631931722164,
"rewards/margins": 0.03389061614871025,
"rewards/rejected": -0.05784693360328674,
"step": 220
},
{
"epoch": 2.1333333333333333,
"grad_norm": 30.5,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 1.4707694053649902,
"log_odds_ratio": -0.3371773660182953,
"logits/chosen": -1.941663146018982,
"logits/rejected": -1.7569644451141357,
"logps/chosen": -0.476045697927475,
"logps/rejected": -1.1464457511901855,
"loss": 25.7473,
"nll_loss": 0.7753463983535767,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.023802287876605988,
"rewards/margins": 0.03352000191807747,
"rewards/rejected": -0.05732228606939316,
"step": 225
},
{
"epoch": 2.180740740740741,
"grad_norm": 26.625,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 1.630690574645996,
"log_odds_ratio": -0.28257861733436584,
"logits/chosen": -2.0433297157287598,
"logits/rejected": -1.7494617700576782,
"logps/chosen": -0.45956555008888245,
"logps/rejected": -1.192975640296936,
"loss": 24.3207,
"nll_loss": 0.7468871474266052,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.022978277876973152,
"rewards/margins": 0.0366705060005188,
"rewards/rejected": -0.0596487820148468,
"step": 230
},
{
"epoch": 2.228148148148148,
"grad_norm": 34.5,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 1.3293721675872803,
"log_odds_ratio": -0.3706313967704773,
"logits/chosen": -2.0370144844055176,
"logits/rejected": -2.092005968093872,
"logps/chosen": -0.5131632089614868,
"logps/rejected": -1.0899993181228638,
"loss": 24.796,
"nll_loss": 0.7735158205032349,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.02565816417336464,
"rewards/margins": 0.02884179912507534,
"rewards/rejected": -0.05449996143579483,
"step": 235
},
{
"epoch": 2.2755555555555556,
"grad_norm": 33.75,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 1.5746119022369385,
"log_odds_ratio": -0.30449697375297546,
"logits/chosen": -2.0129730701446533,
"logits/rejected": -1.8451099395751953,
"logps/chosen": -0.46393972635269165,
"logps/rejected": -1.1728386878967285,
"loss": 24.7856,
"nll_loss": 0.7696127891540527,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.023196987807750702,
"rewards/margins": 0.0354449488222599,
"rewards/rejected": -0.058641932904720306,
"step": 240
},
{
"epoch": 2.322962962962963,
"grad_norm": 26.875,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 1.5067981481552124,
"log_odds_ratio": -0.3081058859825134,
"logits/chosen": -2.2492246627807617,
"logits/rejected": -1.7415387630462646,
"logps/chosen": -0.47982436418533325,
"logps/rejected": -1.1375417709350586,
"loss": 24.6482,
"nll_loss": 0.7710675001144409,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.02399122156202793,
"rewards/margins": 0.032885871827602386,
"rewards/rejected": -0.05687708780169487,
"step": 245
},
{
"epoch": 2.3703703703703702,
"grad_norm": 29.25,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 1.550789713859558,
"log_odds_ratio": -0.28812703490257263,
"logits/chosen": -1.9938061237335205,
"logits/rejected": -1.957233190536499,
"logps/chosen": -0.46859461069107056,
"logps/rejected": -1.1675255298614502,
"loss": 24.0579,
"nll_loss": 0.72864830493927,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.023429730907082558,
"rewards/margins": 0.034946538507938385,
"rewards/rejected": -0.05837627500295639,
"step": 250
},
{
"epoch": 2.417777777777778,
"grad_norm": 27.625,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 1.6658976078033447,
"log_odds_ratio": -0.29064321517944336,
"logits/chosen": -2.095778226852417,
"logits/rejected": -1.5485340356826782,
"logps/chosen": -0.4274715483188629,
"logps/rejected": -1.1617481708526611,
"loss": 23.8564,
"nll_loss": 0.724585771560669,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -0.021373575553297997,
"rewards/margins": 0.03671382740139961,
"rewards/rejected": -0.05808740109205246,
"step": 255
},
{
"epoch": 2.4651851851851854,
"grad_norm": 30.375,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 1.706284761428833,
"log_odds_ratio": -0.28669941425323486,
"logits/chosen": -2.0861926078796387,
"logits/rejected": -1.9107942581176758,
"logps/chosen": -0.4499754011631012,
"logps/rejected": -1.1831719875335693,
"loss": 23.9298,
"nll_loss": 0.7361353039741516,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02249876968562603,
"rewards/margins": 0.03665982931852341,
"rewards/rejected": -0.059158600866794586,
"step": 260
},
{
"epoch": 2.5125925925925925,
"grad_norm": 32.25,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 1.5768635272979736,
"log_odds_ratio": -0.33476293087005615,
"logits/chosen": -1.945469617843628,
"logits/rejected": -1.9802621603012085,
"logps/chosen": -0.47499436140060425,
"logps/rejected": -1.1629369258880615,
"loss": 23.8576,
"nll_loss": 0.7378355264663696,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.023749716579914093,
"rewards/margins": 0.034397125244140625,
"rewards/rejected": -0.05814684182405472,
"step": 265
},
{
"epoch": 2.56,
"grad_norm": 30.375,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 1.5624961853027344,
"log_odds_ratio": -0.29299020767211914,
"logits/chosen": -2.2144691944122314,
"logits/rejected": -2.014341115951538,
"logps/chosen": -0.4924210011959076,
"logps/rejected": -1.1973953247070312,
"loss": 24.864,
"nll_loss": 0.7814281582832336,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.02462105080485344,
"rewards/margins": 0.03524871915578842,
"rewards/rejected": -0.05986977368593216,
"step": 270
},
{
"epoch": 2.6074074074074076,
"grad_norm": 30.875,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 1.5759508609771729,
"log_odds_ratio": -0.31793758273124695,
"logits/chosen": -2.1429502964019775,
"logits/rejected": -1.46464204788208,
"logps/chosen": -0.48924770951271057,
"logps/rejected": -1.1759014129638672,
"loss": 24.7519,
"nll_loss": 0.7631200551986694,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.024462386965751648,
"rewards/margins": 0.03433268517255783,
"rewards/rejected": -0.05879507586359978,
"step": 275
},
{
"epoch": 2.6548148148148147,
"grad_norm": 36.25,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 1.4352095127105713,
"log_odds_ratio": -0.31683534383773804,
"logits/chosen": -1.945744276046753,
"logits/rejected": -2.0830960273742676,
"logps/chosen": -0.4646902084350586,
"logps/rejected": -1.098145842552185,
"loss": 23.7151,
"nll_loss": 0.7180293798446655,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.02323450893163681,
"rewards/margins": 0.03167278692126274,
"rewards/rejected": -0.05490729957818985,
"step": 280
},
{
"epoch": 2.7022222222222223,
"grad_norm": 31.125,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 1.4293460845947266,
"log_odds_ratio": -0.329708069562912,
"logits/chosen": -2.1722819805145264,
"logits/rejected": -1.6502447128295898,
"logps/chosen": -0.4705706536769867,
"logps/rejected": -1.1116634607315063,
"loss": 24.7964,
"nll_loss": 0.745871901512146,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.023528533056378365,
"rewards/margins": 0.03205464407801628,
"rewards/rejected": -0.055583178997039795,
"step": 285
},
{
"epoch": 2.74962962962963,
"grad_norm": 31.625,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 1.6304314136505127,
"log_odds_ratio": -0.2968464195728302,
"logits/chosen": -1.9601917266845703,
"logits/rejected": -2.0320401191711426,
"logps/chosen": -0.4668458104133606,
"logps/rejected": -1.1929422616958618,
"loss": 24.5905,
"nll_loss": 0.7679024934768677,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02334229089319706,
"rewards/margins": 0.03630482777953148,
"rewards/rejected": -0.05964711308479309,
"step": 290
},
{
"epoch": 2.797037037037037,
"grad_norm": 32.75,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 1.5559337139129639,
"log_odds_ratio": -0.3252851665019989,
"logits/chosen": -1.9125938415527344,
"logits/rejected": -1.8471267223358154,
"logps/chosen": -0.4792943000793457,
"logps/rejected": -1.1848201751708984,
"loss": 24.4441,
"nll_loss": 0.756227970123291,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.023964714258909225,
"rewards/margins": 0.03527629375457764,
"rewards/rejected": -0.05924100801348686,
"step": 295
},
{
"epoch": 2.8444444444444446,
"grad_norm": 28.875,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 1.3979889154434204,
"log_odds_ratio": -0.33928608894348145,
"logits/chosen": -2.341634511947632,
"logits/rejected": -1.843711495399475,
"logps/chosen": -0.5055073499679565,
"logps/rejected": -1.187756896018982,
"loss": 24.503,
"nll_loss": 0.7757240533828735,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.025275370106101036,
"rewards/margins": 0.03411247208714485,
"rewards/rejected": -0.05938784033060074,
"step": 300
},
{
"epoch": 2.891851851851852,
"grad_norm": 29.5,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 1.506742238998413,
"log_odds_ratio": -0.3147231638431549,
"logits/chosen": -2.2259716987609863,
"logits/rejected": -1.8140947818756104,
"logps/chosen": -0.5070487260818481,
"logps/rejected": -1.1675385236740112,
"loss": 23.4515,
"nll_loss": 0.720944344997406,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.025352437049150467,
"rewards/margins": 0.033024489879608154,
"rewards/rejected": -0.05837692692875862,
"step": 305
},
{
"epoch": 2.9392592592592592,
"grad_norm": 31.75,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 1.6132290363311768,
"log_odds_ratio": -0.2838875949382782,
"logits/chosen": -2.0488831996917725,
"logits/rejected": -1.5571346282958984,
"logps/chosen": -0.4689159393310547,
"logps/rejected": -1.1605467796325684,
"loss": 24.3086,
"nll_loss": 0.7292035818099976,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.023445798084139824,
"rewards/margins": 0.034581538289785385,
"rewards/rejected": -0.05802733823657036,
"step": 310
},
{
"epoch": 2.986666666666667,
"grad_norm": 35.25,
"learning_rate": 0.0,
"log_odds_chosen": 1.6901721954345703,
"log_odds_ratio": -0.2843396067619324,
"logits/chosen": -2.0129687786102295,
"logits/rejected": -1.6522471904754639,
"logps/chosen": -0.4355766177177429,
"logps/rejected": -1.1733875274658203,
"loss": 23.7902,
"nll_loss": 0.744986891746521,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.021778833121061325,
"rewards/margins": 0.03689054772257805,
"rewards/rejected": -0.058669377118349075,
"step": 315
},
{
"epoch": 2.986666666666667,
"step": 315,
"total_flos": 0.0,
"train_loss": 32.00103834848555,
"train_runtime": 7481.6163,
"train_samples_per_second": 2.707,
"train_steps_per_second": 0.042
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}