silviasapora's picture
Model save
78703ca verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9881235154394297,
"eval_steps": 500,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06334125098970704,
"grad_norm": 1566.8653564453125,
"learning_rate": 6.25e-07,
"log_odds_chosen": -2.225217342376709,
"log_odds_ratio": -13.344230651855469,
"logps/chosen": -26.207233428955078,
"logps/rejected": -23.98147964477539,
"loss": 166.3142,
"nll_loss": 10.39463996887207,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -1.3103615045547485,
"rewards/margins": -0.11128749698400497,
"rewards/rejected": -1.1990740299224854,
"step": 5
},
{
"epoch": 0.12668250197941408,
"grad_norm": 954.8382568359375,
"learning_rate": 1.40625e-06,
"log_odds_chosen": -0.6191844344139099,
"log_odds_ratio": -14.636589050292969,
"logps/chosen": -27.205514907836914,
"logps/rejected": -26.585662841796875,
"loss": 169.2441,
"nll_loss": 10.57776165008545,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -1.3602757453918457,
"rewards/margins": -0.030992573127150536,
"rewards/rejected": -1.3292831182479858,
"step": 10
},
{
"epoch": 0.19002375296912113,
"grad_norm": 355.5755615234375,
"learning_rate": 2.1875000000000002e-06,
"log_odds_chosen": 2.383756160736084,
"log_odds_ratio": -11.047012329101562,
"logps/chosen": -24.0307559967041,
"logps/rejected": -26.413951873779297,
"loss": 171.9778,
"nll_loss": 10.748617172241211,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -1.201537847518921,
"rewards/margins": 0.11915971338748932,
"rewards/rejected": -1.320697546005249,
"step": 15
},
{
"epoch": 0.25336500395882816,
"grad_norm": 281.12158203125,
"learning_rate": 2.96875e-06,
"log_odds_chosen": -5.130737781524658,
"log_odds_ratio": -15.810519218444824,
"logps/chosen": -29.556884765625,
"logps/rejected": -24.425743103027344,
"loss": 175.1297,
"nll_loss": 10.945618629455566,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -1.477844476699829,
"rewards/margins": -0.2565571963787079,
"rewards/rejected": -1.2212872505187988,
"step": 20
},
{
"epoch": 0.3167062549485352,
"grad_norm": 199.64466857910156,
"learning_rate": 3.7500000000000005e-06,
"log_odds_chosen": 0.16405829787254333,
"log_odds_ratio": -13.288398742675781,
"logps/chosen": -26.04940414428711,
"logps/rejected": -26.214153289794922,
"loss": 163.7713,
"nll_loss": 10.235721588134766,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": -1.302470326423645,
"rewards/margins": 0.008237527683377266,
"rewards/rejected": -1.310707688331604,
"step": 25
},
{
"epoch": 0.38004750593824227,
"grad_norm": 195.967529296875,
"learning_rate": 4.53125e-06,
"log_odds_chosen": -4.0349016189575195,
"log_odds_ratio": -13.333663940429688,
"logps/chosen": -25.42357635498047,
"logps/rejected": -21.388246536254883,
"loss": 162.8051,
"nll_loss": 10.175333976745605,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -1.271178960800171,
"rewards/margins": -0.20176656544208527,
"rewards/rejected": -1.069412350654602,
"step": 30
},
{
"epoch": 0.4433887569279493,
"grad_norm": 149.69757080078125,
"learning_rate": 4.999370587356267e-06,
"log_odds_chosen": 1.761802315711975,
"log_odds_ratio": -10.454744338989258,
"logps/chosen": -21.783058166503906,
"logps/rejected": -23.544517517089844,
"loss": 149.4591,
"nll_loss": 9.341216087341309,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": -1.0891529321670532,
"rewards/margins": 0.08807289600372314,
"rewards/rejected": -1.1772258281707764,
"step": 35
},
{
"epoch": 0.5067300079176563,
"grad_norm": 347.0418395996094,
"learning_rate": 4.992293334332821e-06,
"log_odds_chosen": 0.6469250321388245,
"log_odds_ratio": -9.636307716369629,
"logps/chosen": -20.772506713867188,
"logps/rejected": -21.41815185546875,
"loss": 147.193,
"nll_loss": 9.199592590332031,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -1.0386254787445068,
"rewards/margins": 0.032282136380672455,
"rewards/rejected": -1.0709075927734375,
"step": 40
},
{
"epoch": 0.5700712589073634,
"grad_norm": 450.3840026855469,
"learning_rate": 4.977374404419838e-06,
"log_odds_chosen": -0.41757726669311523,
"log_odds_ratio": -10.531739234924316,
"logps/chosen": -21.214900970458984,
"logps/rejected": -20.79790496826172,
"loss": 130.2091,
"nll_loss": 8.138101577758789,
"rewards/accuracies": 0.46562498807907104,
"rewards/chosen": -1.060745120048523,
"rewards/margins": -0.02084996923804283,
"rewards/rejected": -1.0398951768875122,
"step": 45
},
{
"epoch": 0.6334125098970704,
"grad_norm": 678.9149169921875,
"learning_rate": 4.954660738090297e-06,
"log_odds_chosen": -0.34605133533477783,
"log_odds_ratio": -6.101162910461426,
"logps/chosen": -13.776224136352539,
"logps/rejected": -13.429880142211914,
"loss": 99.1414,
"nll_loss": 6.196375846862793,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.688811182975769,
"rewards/margins": -0.01731729507446289,
"rewards/rejected": -0.6714939475059509,
"step": 50
},
{
"epoch": 0.6967537608867775,
"grad_norm": 641.2720947265625,
"learning_rate": 4.924223800941718e-06,
"log_odds_chosen": -2.388538360595703,
"log_odds_ratio": -5.602341651916504,
"logps/chosen": -10.671758651733398,
"logps/rejected": -8.287276268005371,
"loss": 73.5918,
"nll_loss": 4.599525451660156,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.5335879325866699,
"rewards/margins": -0.11922411620616913,
"rewards/rejected": -0.4143638014793396,
"step": 55
},
{
"epoch": 0.7600950118764845,
"grad_norm": 189.5673370361328,
"learning_rate": 4.886159358838952e-06,
"log_odds_chosen": -0.5304662585258484,
"log_odds_ratio": -2.2777798175811768,
"logps/chosen": -5.11728572845459,
"logps/rejected": -4.571669578552246,
"loss": 47.0867,
"nll_loss": 2.9429495334625244,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.2558642327785492,
"rewards/margins": -0.027280762791633606,
"rewards/rejected": -0.22858352959156036,
"step": 60
},
{
"epoch": 0.8234362628661916,
"grad_norm": 120.43927764892578,
"learning_rate": 4.8405871765993435e-06,
"log_odds_chosen": -0.2079111635684967,
"log_odds_ratio": -1.2568554878234863,
"logps/chosen": -2.487165927886963,
"logps/rejected": -2.300987482070923,
"loss": 37.4116,
"nll_loss": 2.3382601737976074,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.12435829639434814,
"rewards/margins": -0.009308922104537487,
"rewards/rejected": -0.11504938453435898,
"step": 65
},
{
"epoch": 0.8867775138558986,
"grad_norm": 85.35801696777344,
"learning_rate": 4.7876506411683e-06,
"log_odds_chosen": 0.16362255811691284,
"log_odds_ratio": -0.9617290496826172,
"logps/chosen": -1.9240213632583618,
"logps/rejected": -2.0835094451904297,
"loss": 34.9569,
"nll_loss": 2.1848533153533936,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09620106220245361,
"rewards/margins": 0.007974403910338879,
"rewards/rejected": -0.10417548567056656,
"step": 70
},
{
"epoch": 0.9501187648456056,
"grad_norm": 67.66626739501953,
"learning_rate": 4.72751631047092e-06,
"log_odds_chosen": 0.08022845536470413,
"log_odds_ratio": -0.8937705755233765,
"logps/chosen": -1.7418140172958374,
"logps/rejected": -1.8140445947647095,
"loss": 32.5467,
"nll_loss": 2.0342295169830322,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08709069341421127,
"rewards/margins": 0.003611528780311346,
"rewards/rejected": -0.09070222824811935,
"step": 75
},
{
"epoch": 1.0253365003958828,
"grad_norm": 67.96096801757812,
"learning_rate": 4.660373389359137e-06,
"log_odds_chosen": 0.220803365111351,
"log_odds_ratio": -0.8697079420089722,
"logps/chosen": -1.7520455121994019,
"logps/rejected": -1.9549840688705444,
"loss": 36.1895,
"nll_loss": 2.0106112957000732,
"rewards/accuracies": 0.5472221970558167,
"rewards/chosen": -0.08760227262973785,
"rewards/margins": 0.01014692522585392,
"rewards/rejected": -0.09774920344352722,
"step": 80
},
{
"epoch": 1.0886777513855899,
"grad_norm": 55.48482894897461,
"learning_rate": 4.586433134303257e-06,
"log_odds_chosen": 0.2403596192598343,
"log_odds_ratio": -0.8039296865463257,
"logps/chosen": -1.6341924667358398,
"logps/rejected": -1.8617744445800781,
"loss": 29.8033,
"nll_loss": 1.862810730934143,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -0.08170963078737259,
"rewards/margins": 0.011379102244973183,
"rewards/rejected": -0.09308873116970062,
"step": 85
},
{
"epoch": 1.152019002375297,
"grad_norm": 67.8202896118164,
"learning_rate": 4.505928188700946e-06,
"log_odds_chosen": 0.4101799428462982,
"log_odds_ratio": -0.825219452381134,
"logps/chosen": -1.640615463256836,
"logps/rejected": -2.0214312076568604,
"loss": 29.6306,
"nll_loss": 1.8520597219467163,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.0820307806134224,
"rewards/margins": 0.019040774554014206,
"rewards/rejected": -0.1010715514421463,
"step": 90
},
{
"epoch": 1.2153602533650039,
"grad_norm": 62.16216278076172,
"learning_rate": 4.4191118508950286e-06,
"log_odds_chosen": -0.0038254274986684322,
"log_odds_ratio": -0.9625605344772339,
"logps/chosen": -1.7339099645614624,
"logps/rejected": -1.7298612594604492,
"loss": 30.1161,
"nll_loss": 1.8824611902236938,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.08669550716876984,
"rewards/margins": -0.0002024378627538681,
"rewards/rejected": -0.08649305999279022,
"step": 95
},
{
"epoch": 1.278701504354711,
"grad_norm": 61.8267936706543,
"learning_rate": 4.326257277203194e-06,
"log_odds_chosen": 0.19552679359912872,
"log_odds_ratio": -0.8283188939094543,
"logps/chosen": -1.5923553705215454,
"logps/rejected": -1.7873952388763428,
"loss": 28.8905,
"nll_loss": 1.8059203624725342,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.07961776852607727,
"rewards/margins": 0.00975199043750763,
"rewards/rejected": -0.0893697664141655,
"step": 100
},
{
"epoch": 1.342042755344418,
"grad_norm": 51.51774215698242,
"learning_rate": 4.227656622467162e-06,
"log_odds_chosen": 0.08534004539251328,
"log_odds_ratio": -0.8807274103164673,
"logps/chosen": -1.6110286712646484,
"logps/rejected": -1.6765788793563843,
"loss": 28.3032,
"nll_loss": 1.769310712814331,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.08055143058300018,
"rewards/margins": 0.0032775108702480793,
"rewards/rejected": -0.08382894098758698,
"step": 105
},
{
"epoch": 1.405384006334125,
"grad_norm": 63.78523635864258,
"learning_rate": 4.123620120825459e-06,
"log_odds_chosen": 0.01861184649169445,
"log_odds_ratio": -0.907670795917511,
"logps/chosen": -1.5369993448257446,
"logps/rejected": -1.5605800151824951,
"loss": 27.58,
"nll_loss": 1.7242324352264404,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.07684997469186783,
"rewards/margins": 0.0011790260905399919,
"rewards/rejected": -0.07802899926900864,
"step": 110
},
{
"epoch": 1.468725257323832,
"grad_norm": 46.49420928955078,
"learning_rate": 4.01447510960205e-06,
"log_odds_chosen": 0.17250235378742218,
"log_odds_ratio": -0.7509892582893372,
"logps/chosen": -1.469199776649475,
"logps/rejected": -1.6311143636703491,
"loss": 28.0838,
"nll_loss": 1.7559159994125366,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.07345998287200928,
"rewards/margins": 0.008095743134617805,
"rewards/rejected": -0.08155572414398193,
"step": 115
},
{
"epoch": 1.5320665083135392,
"grad_norm": 116.36309814453125,
"learning_rate": 3.900564999382007e-06,
"log_odds_chosen": 0.18579277396202087,
"log_odds_ratio": -0.859032928943634,
"logps/chosen": -1.5738681554794312,
"logps/rejected": -1.7462905645370483,
"loss": 28.7531,
"nll_loss": 1.7980244159698486,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.07869341224431992,
"rewards/margins": 0.008621118031442165,
"rewards/rejected": -0.08731453120708466,
"step": 120
},
{
"epoch": 1.5954077593032463,
"grad_norm": 49.367523193359375,
"learning_rate": 3.782248193514766e-06,
"log_odds_chosen": 0.03143889456987381,
"log_odds_ratio": -0.9138350486755371,
"logps/chosen": -1.5310590267181396,
"logps/rejected": -1.5749614238739014,
"loss": 27.4813,
"nll_loss": 1.7188360691070557,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": -0.07655295729637146,
"rewards/margins": 0.002195121254771948,
"rewards/rejected": -0.07874806970357895,
"step": 125
},
{
"epoch": 1.6587490102929534,
"grad_norm": 82.95511627197266,
"learning_rate": 3.6598969604445854e-06,
"log_odds_chosen": 0.23761017620563507,
"log_odds_ratio": -0.773469090461731,
"logps/chosen": -1.401659369468689,
"logps/rejected": -1.619177222251892,
"loss": 26.8694,
"nll_loss": 1.6810429096221924,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.07008296996355057,
"rewards/margins": 0.010875897482037544,
"rewards/rejected": -0.08095885813236237,
"step": 130
},
{
"epoch": 1.7220902612826603,
"grad_norm": 40.690589904785156,
"learning_rate": 3.533896262416302e-06,
"log_odds_chosen": 0.13715331256389618,
"log_odds_ratio": -0.7931355237960815,
"logps/chosen": -1.3749868869781494,
"logps/rejected": -1.4867315292358398,
"loss": 26.6296,
"nll_loss": 1.666666030883789,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.06874934583902359,
"rewards/margins": 0.005587225314229727,
"rewards/rejected": -0.07433657348155975,
"step": 135
},
{
"epoch": 1.7854315122723674,
"grad_norm": 50.21955108642578,
"learning_rate": 3.4046425442416807e-06,
"log_odds_chosen": 0.1383642852306366,
"log_odds_ratio": -0.8326429128646851,
"logps/chosen": -1.381520390510559,
"logps/rejected": -1.5053998231887817,
"loss": 25.8257,
"nll_loss": 1.617205262184143,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.06907602399587631,
"rewards/margins": 0.0061939675360918045,
"rewards/rejected": -0.07526998966932297,
"step": 140
},
{
"epoch": 1.8487727632620743,
"grad_norm": 66.92253875732422,
"learning_rate": 3.272542485937369e-06,
"log_odds_chosen": 0.1765662282705307,
"log_odds_ratio": -0.7538725137710571,
"logps/chosen": -1.3644089698791504,
"logps/rejected": -1.5174672603607178,
"loss": 26.2355,
"nll_loss": 1.644059419631958,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -0.06822045147418976,
"rewards/margins": 0.00765291228890419,
"rewards/rejected": -0.07587336748838425,
"step": 145
},
{
"epoch": 1.9121140142517814,
"grad_norm": 76.11998748779297,
"learning_rate": 3.138011723159107e-06,
"log_odds_chosen": 0.030925732105970383,
"log_odds_ratio": -0.8653675317764282,
"logps/chosen": -1.4222267866134644,
"logps/rejected": -1.4430891275405884,
"loss": 25.9597,
"nll_loss": 1.6283820867538452,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": -0.07111133635044098,
"rewards/margins": 0.0010431179543957114,
"rewards/rejected": -0.0721544548869133,
"step": 150
},
{
"epoch": 1.9754552652414885,
"grad_norm": 56.858306884765625,
"learning_rate": 3.0014735394581824e-06,
"log_odds_chosen": 0.11269225925207138,
"log_odds_ratio": -0.7712882161140442,
"logps/chosen": -1.349379062652588,
"logps/rejected": -1.4386845827102661,
"loss": 25.4824,
"nll_loss": 1.6006284952163696,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.06746895611286163,
"rewards/margins": 0.004465264733880758,
"rewards/rejected": -0.07193422317504883,
"step": 155
},
{
"epoch": 2.0506730007917655,
"grad_norm": 51.41761779785156,
"learning_rate": 2.863357534474782e-06,
"log_odds_chosen": 0.21707920730113983,
"log_odds_ratio": -0.7254990339279175,
"logps/chosen": -1.2790353298187256,
"logps/rejected": -1.455739974975586,
"loss": 27.9438,
"nll_loss": 1.563090205192566,
"rewards/accuracies": 0.5444444417953491,
"rewards/chosen": -0.0639517679810524,
"rewards/margins": 0.008835244923830032,
"rewards/rejected": -0.07278700917959213,
"step": 160
},
{
"epoch": 2.1140142517814726,
"grad_norm": 56.75939178466797,
"learning_rate": 2.724098272258584e-06,
"log_odds_chosen": 0.22648778557777405,
"log_odds_ratio": -0.7485660314559937,
"logps/chosen": -1.2268383502960205,
"logps/rejected": -1.4281115531921387,
"loss": 24.2974,
"nll_loss": 1.5330586433410645,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.061341919004917145,
"rewards/margins": 0.010063661262392998,
"rewards/rejected": -0.07140558212995529,
"step": 165
},
{
"epoch": 2.1773555027711797,
"grad_norm": 45.81589889526367,
"learning_rate": 2.5841339139694856e-06,
"log_odds_chosen": 0.23791304230690002,
"log_odds_ratio": -0.7226032018661499,
"logps/chosen": -1.2346957921981812,
"logps/rejected": -1.4143835306167603,
"loss": 24.1894,
"nll_loss": 1.531683087348938,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.061734795570373535,
"rewards/margins": 0.008984383195638657,
"rewards/rejected": -0.0707191675901413,
"step": 170
},
{
"epoch": 2.240696753760887,
"grad_norm": 61.37849044799805,
"learning_rate": 2.443904839260488e-06,
"log_odds_chosen": 0.13805775344371796,
"log_odds_ratio": -0.7800249457359314,
"logps/chosen": -1.316514015197754,
"logps/rejected": -1.434812307357788,
"loss": 24.873,
"nll_loss": 1.582626461982727,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.0658257007598877,
"rewards/margins": 0.005914908833801746,
"rewards/rejected": -0.07174061238765717,
"step": 175
},
{
"epoch": 2.304038004750594,
"grad_norm": 54.45339584350586,
"learning_rate": 2.3038522606803882e-06,
"log_odds_chosen": 0.295113205909729,
"log_odds_ratio": -0.7289345264434814,
"logps/chosen": -1.1970077753067017,
"logps/rejected": -1.4469192028045654,
"loss": 23.3061,
"nll_loss": 1.4928674697875977,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.059850387275218964,
"rewards/margins": 0.012495574541389942,
"rewards/rejected": -0.07234595715999603,
"step": 180
},
{
"epoch": 2.367379255740301,
"grad_norm": 49.3673210144043,
"learning_rate": 2.1644168354558623e-06,
"log_odds_chosen": 0.24153447151184082,
"log_odds_ratio": -0.7290275692939758,
"logps/chosen": -1.1816279888153076,
"logps/rejected": -1.363236665725708,
"loss": 22.865,
"nll_loss": 1.4779325723648071,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.05908140540122986,
"rewards/margins": 0.009080426767468452,
"rewards/rejected": -0.06816183030605316,
"step": 185
},
{
"epoch": 2.4307205067300077,
"grad_norm": 52.562843322753906,
"learning_rate": 2.026037279020804e-06,
"log_odds_chosen": 0.2805042862892151,
"log_odds_ratio": -0.714387834072113,
"logps/chosen": -1.2529584169387817,
"logps/rejected": -1.496994972229004,
"loss": 23.8218,
"nll_loss": 1.5591580867767334,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.06264792382717133,
"rewards/margins": 0.012201832607388496,
"rewards/rejected": -0.07484976202249527,
"step": 190
},
{
"epoch": 2.494061757719715,
"grad_norm": 43.61114501953125,
"learning_rate": 1.8891489846552645e-06,
"log_odds_chosen": 0.29797351360321045,
"log_odds_ratio": -0.6934127807617188,
"logps/chosen": -1.1956589221954346,
"logps/rejected": -1.4286657571792603,
"loss": 22.5176,
"nll_loss": 1.4987757205963135,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.05978294461965561,
"rewards/margins": 0.01165033970028162,
"rewards/rejected": -0.07143328338861465,
"step": 195
},
{
"epoch": 2.557403008709422,
"grad_norm": 41.446319580078125,
"learning_rate": 1.75418265357711e-06,
"log_odds_chosen": 0.33347612619400024,
"log_odds_ratio": -0.6794474720954895,
"logps/chosen": -1.2364885807037354,
"logps/rejected": -1.4904797077178955,
"loss": 22.3802,
"nll_loss": 1.5241345167160034,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.061824433505535126,
"rewards/margins": 0.01269955188035965,
"rewards/rejected": -0.07452399283647537,
"step": 200
},
{
"epoch": 2.620744259699129,
"grad_norm": 45.948020935058594,
"learning_rate": 1.6215629397966432e-06,
"log_odds_chosen": 0.19341301918029785,
"log_odds_ratio": -0.7365654706954956,
"logps/chosen": -1.2314597368240356,
"logps/rejected": -1.3811973333358765,
"loss": 21.58,
"nll_loss": 1.5147475004196167,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.06157298758625984,
"rewards/margins": 0.007486879825592041,
"rewards/rejected": -0.06905986368656158,
"step": 205
},
{
"epoch": 2.684085510688836,
"grad_norm": 70.68650817871094,
"learning_rate": 1.4917071139979877e-06,
"log_odds_chosen": 0.28118547797203064,
"log_odds_ratio": -0.6911450624465942,
"logps/chosen": -1.1725728511810303,
"logps/rejected": -1.3876063823699951,
"loss": 20.3847,
"nll_loss": 1.48942232131958,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.05862864851951599,
"rewards/margins": 0.010751673951745033,
"rewards/rejected": -0.06938032060861588,
"step": 210
},
{
"epoch": 2.7474267616785433,
"grad_norm": 43.78276443481445,
"learning_rate": 1.3650237506511333e-06,
"log_odds_chosen": 0.26468196511268616,
"log_odds_ratio": -0.703626275062561,
"logps/chosen": -1.1834118366241455,
"logps/rejected": -1.3953739404678345,
"loss": 19.2108,
"nll_loss": 1.4794647693634033,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.059170592576265335,
"rewards/margins": 0.010598111897706985,
"rewards/rejected": -0.06976870447397232,
"step": 215
},
{
"epoch": 2.81076801266825,
"grad_norm": 37.63428497314453,
"learning_rate": 1.2419114424855e-06,
"log_odds_chosen": 0.22841043770313263,
"log_odds_ratio": -0.718733549118042,
"logps/chosen": -1.152614951133728,
"logps/rejected": -1.3169124126434326,
"loss": 18.0619,
"nll_loss": 1.4889500141143799,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.057630755007267,
"rewards/margins": 0.008214866742491722,
"rewards/rejected": -0.06584562361240387,
"step": 220
},
{
"epoch": 2.8741092636579575,
"grad_norm": 33.8170280456543,
"learning_rate": 1.122757546369744e-06,
"log_odds_chosen": 0.19230252504348755,
"log_odds_ratio": -0.7320453524589539,
"logps/chosen": -1.1981362104415894,
"logps/rejected": -1.353736162185669,
"loss": 16.7321,
"nll_loss": 1.5025402307510376,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.05990681052207947,
"rewards/margins": 0.007779995445162058,
"rewards/rejected": -0.06768681108951569,
"step": 225
},
{
"epoch": 2.937450514647664,
"grad_norm": 26.931453704833984,
"learning_rate": 1.0079369645437411e-06,
"log_odds_chosen": 0.2564031183719635,
"log_odds_ratio": -0.7086187601089478,
"logps/chosen": -1.181774377822876,
"logps/rejected": -1.377415418624878,
"loss": 14.6404,
"nll_loss": 1.4596824645996094,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.05908871814608574,
"rewards/margins": 0.009782059118151665,
"rewards/rejected": -0.06887076795101166,
"step": 230
},
{
"epoch": 3.0126682501979416,
"grad_norm": 32.48230743408203,
"learning_rate": 8.978109650374398e-07,
"log_odds_chosen": 0.19491888582706451,
"log_odds_ratio": -0.7259272933006287,
"logps/chosen": -1.1522880792617798,
"logps/rejected": -1.3237059116363525,
"loss": 14.4248,
"nll_loss": 1.4603744745254517,
"rewards/accuracies": 0.5666666626930237,
"rewards/chosen": -0.05761440098285675,
"rewards/margins": 0.008570893667638302,
"rewards/rejected": -0.06618530303239822,
"step": 235
},
{
"epoch": 3.0760095011876483,
"grad_norm": 18.48870277404785,
"learning_rate": 7.927260449879828e-07,
"log_odds_chosen": 0.38648003339767456,
"log_odds_ratio": -0.6636689901351929,
"logps/chosen": -1.115918517112732,
"logps/rejected": -1.420893907546997,
"loss": 10.7067,
"nll_loss": 1.409071683883667,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.055795930325984955,
"rewards/margins": 0.015248763374984264,
"rewards/rejected": -0.07104469835758209,
"step": 240
},
{
"epoch": 3.1393507521773554,
"grad_norm": 20.157772064208984,
"learning_rate": 6.930128404315214e-07,
"log_odds_chosen": 0.3515693247318268,
"log_odds_ratio": -0.6867735981941223,
"logps/chosen": -1.1619575023651123,
"logps/rejected": -1.4377037286758423,
"loss": 9.4715,
"nll_loss": 1.4781396389007568,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.058097876608371735,
"rewards/margins": 0.01378730870783329,
"rewards/rejected": -0.07188518345355988,
"step": 245
},
{
"epoch": 3.2026920031670625,
"grad_norm": 16.846302032470703,
"learning_rate": 5.989850859999227e-07,
"log_odds_chosen": 0.2286454439163208,
"log_odds_ratio": -0.7409519553184509,
"logps/chosen": -1.2201191186904907,
"logps/rejected": -1.3998501300811768,
"loss": 7.952,
"nll_loss": 1.4928877353668213,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.061005957424640656,
"rewards/margins": 0.008986548520624638,
"rewards/rejected": -0.06999249756336212,
"step": 250
},
{
"epoch": 3.2660332541567696,
"grad_norm": 15.757951736450195,
"learning_rate": 5.109386277955477e-07,
"log_odds_chosen": 0.3435971736907959,
"log_odds_ratio": -0.6829465627670288,
"logps/chosen": -1.1322981119155884,
"logps/rejected": -1.391939640045166,
"loss": 6.2963,
"nll_loss": 1.4439783096313477,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.05661489814519882,
"rewards/margins": 0.01298207975924015,
"rewards/rejected": -0.06959697604179382,
"step": 255
},
{
"epoch": 3.3293745051464767,
"grad_norm": 9.453774452209473,
"learning_rate": 4.29150492549959e-07,
"log_odds_chosen": 0.31623855233192444,
"log_odds_ratio": -0.674622654914856,
"logps/chosen": -1.0845999717712402,
"logps/rejected": -1.3284567594528198,
"loss": 4.9207,
"nll_loss": 1.3823580741882324,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.05422999709844589,
"rewards/margins": 0.012192841619253159,
"rewards/rejected": -0.06642283499240875,
"step": 260
},
{
"epoch": 3.392715756136184,
"grad_norm": 13.926580429077148,
"learning_rate": 3.538780159953348e-07,
"log_odds_chosen": 0.33605459332466125,
"log_odds_ratio": -0.6477686762809753,
"logps/chosen": -1.1137669086456299,
"logps/rejected": -1.3602466583251953,
"loss": 4.1082,
"nll_loss": 1.437723994255066,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -0.055688343942165375,
"rewards/margins": 0.01232399232685566,
"rewards/rejected": -0.06801234185695648,
"step": 265
},
{
"epoch": 3.456057007125891,
"grad_norm": 6.114862442016602,
"learning_rate": 2.8535803319105047e-07,
"log_odds_chosen": 0.28404486179351807,
"log_odds_ratio": -0.7073220610618591,
"logps/chosen": -1.134637475013733,
"logps/rejected": -1.340289831161499,
"loss": 3.3425,
"nll_loss": 1.4215402603149414,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.05673186853528023,
"rewards/margins": 0.010282614268362522,
"rewards/rejected": -0.06701448559761047,
"step": 270
},
{
"epoch": 3.519398258115598,
"grad_norm": 7.700738430023193,
"learning_rate": 2.2380613335296037e-07,
"log_odds_chosen": 0.40184488892555237,
"log_odds_ratio": -0.629717230796814,
"logps/chosen": -1.0624561309814453,
"logps/rejected": -1.356720209121704,
"loss": 2.6924,
"nll_loss": 1.4157472848892212,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.05312279984354973,
"rewards/margins": 0.014713202603161335,
"rewards/rejected": -0.06783600151538849,
"step": 275
},
{
"epoch": 3.5827395091053047,
"grad_norm": 6.366260528564453,
"learning_rate": 1.6941598152996453e-07,
"log_odds_chosen": 0.31379514932632446,
"log_odds_ratio": -0.6721755266189575,
"logps/chosen": -1.153221845626831,
"logps/rejected": -1.3958936929702759,
"loss": 2.3316,
"nll_loss": 1.4696441888809204,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.05766110494732857,
"rewards/margins": 0.012133581563830376,
"rewards/rejected": -0.0697946771979332,
"step": 280
},
{
"epoch": 3.646080760095012,
"grad_norm": 8.542328834533691,
"learning_rate": 1.223587092621162e-07,
"log_odds_chosen": 0.4368967115879059,
"log_odds_ratio": -0.6306554079055786,
"logps/chosen": -1.1088006496429443,
"logps/rejected": -1.4423551559448242,
"loss": 1.9015,
"nll_loss": 1.4303786754608154,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.055440038442611694,
"rewards/margins": 0.016677727922797203,
"rewards/rejected": -0.07211776077747345,
"step": 285
},
{
"epoch": 3.709422011084719,
"grad_norm": 5.483981609344482,
"learning_rate": 8.278237613748408e-08,
"log_odds_chosen": 0.25653380155563354,
"log_odds_ratio": -0.6835523843765259,
"logps/chosen": -1.115917444229126,
"logps/rejected": -1.3009469509124756,
"loss": 1.7019,
"nll_loss": 1.435164451599121,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.05579587072134018,
"rewards/margins": 0.009251468814909458,
"rewards/rejected": -0.06504733860492706,
"step": 290
},
{
"epoch": 3.772763262074426,
"grad_norm": 5.989284992218018,
"learning_rate": 5.0811503941911314e-08,
"log_odds_chosen": 0.36020272970199585,
"log_odds_ratio": -0.6700825691223145,
"logps/chosen": -1.1353535652160645,
"logps/rejected": -1.425724744796753,
"loss": 1.4849,
"nll_loss": 1.438443899154663,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.05676767975091934,
"rewards/margins": 0.014518563635647297,
"rewards/rejected": -0.07128624618053436,
"step": 295
},
{
"epoch": 3.836104513064133,
"grad_norm": 5.665014743804932,
"learning_rate": 2.6546684867408412e-08,
"log_odds_chosen": 0.2839321196079254,
"log_odds_ratio": -0.693397045135498,
"logps/chosen": -1.183774471282959,
"logps/rejected": -1.4149149656295776,
"loss": 1.3752,
"nll_loss": 1.4539821147918701,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.05918872356414795,
"rewards/margins": 0.011557026766240597,
"rewards/rejected": -0.07074575871229172,
"step": 300
},
{
"epoch": 3.89944576405384,
"grad_norm": 6.5417280197143555,
"learning_rate": 1.006426501190233e-08,
"log_odds_chosen": 0.2721025347709656,
"log_odds_ratio": -0.6981784105300903,
"logps/chosen": -1.153509259223938,
"logps/rejected": -1.3679428100585938,
"loss": 1.2736,
"nll_loss": 1.4551602602005005,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.05767546221613884,
"rewards/margins": 0.010721677914261818,
"rewards/rejected": -0.06839713454246521,
"step": 305
},
{
"epoch": 3.9627870150435474,
"grad_norm": 6.849119186401367,
"learning_rate": 1.4161041661667208e-09,
"log_odds_chosen": 0.20419207215309143,
"log_odds_ratio": -0.7056440114974976,
"logps/chosen": -1.1733808517456055,
"logps/rejected": -1.3275495767593384,
"loss": 1.2192,
"nll_loss": 1.4886562824249268,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.058669041842222214,
"rewards/margins": 0.00770843680948019,
"rewards/rejected": -0.06637748330831528,
"step": 310
},
{
"epoch": 3.9881235154394297,
"step": 312,
"total_flos": 0.0,
"train_loss": 41.90841192007065,
"train_runtime": 3951.2607,
"train_samples_per_second": 5.112,
"train_steps_per_second": 0.079
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}