DR-Tulu-SFT-8B / trainer_state.json
hamishivi's picture
Upload folder using huggingface_hub
528281f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.039860488290981565,
"grad_norm": 11.324122796709977,
"learning_rate": 2.53968253968254e-06,
"loss": 1.6286,
"step": 5
},
{
"epoch": 0.07972097658196313,
"grad_norm": 1.991457650057207,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.4713,
"step": 10
},
{
"epoch": 0.11958146487294469,
"grad_norm": 1.0610273535036991,
"learning_rate": 8.888888888888888e-06,
"loss": 1.3152,
"step": 15
},
{
"epoch": 0.15944195316392626,
"grad_norm": 0.7999727467798167,
"learning_rate": 1.2063492063492064e-05,
"loss": 1.2185,
"step": 20
},
{
"epoch": 0.19930244145490783,
"grad_norm": 0.6462858961182688,
"learning_rate": 1.523809523809524e-05,
"loss": 1.1579,
"step": 25
},
{
"epoch": 0.23916292974588937,
"grad_norm": 0.47561233028025485,
"learning_rate": 1.8412698412698415e-05,
"loss": 1.1295,
"step": 30
},
{
"epoch": 0.279023418036871,
"grad_norm": 0.4460008015131473,
"learning_rate": 2.158730158730159e-05,
"loss": 1.1069,
"step": 35
},
{
"epoch": 0.3188839063278525,
"grad_norm": 0.35932407714480935,
"learning_rate": 2.4761904761904766e-05,
"loss": 1.0887,
"step": 40
},
{
"epoch": 0.35874439461883406,
"grad_norm": 0.3717286127467533,
"learning_rate": 2.7936507936507936e-05,
"loss": 1.0869,
"step": 45
},
{
"epoch": 0.39860488290981566,
"grad_norm": 0.38261468570309465,
"learning_rate": 3.111111111111112e-05,
"loss": 1.0774,
"step": 50
},
{
"epoch": 0.4384653712007972,
"grad_norm": 0.383323190556911,
"learning_rate": 3.4285714285714284e-05,
"loss": 1.0636,
"step": 55
},
{
"epoch": 0.47832585949177875,
"grad_norm": 0.4100491937186789,
"learning_rate": 3.7460317460317464e-05,
"loss": 1.0535,
"step": 60
},
{
"epoch": 0.5181863477827603,
"grad_norm": 0.3975249651869064,
"learning_rate": 3.9999693004141615e-05,
"loss": 1.0297,
"step": 65
},
{
"epoch": 0.558046836073742,
"grad_norm": 0.3819095926536229,
"learning_rate": 3.998894913865352e-05,
"loss": 1.0337,
"step": 70
},
{
"epoch": 0.5979073243647235,
"grad_norm": 0.36490844056440797,
"learning_rate": 3.9962864903591375e-05,
"loss": 1.0063,
"step": 75
},
{
"epoch": 0.637767812655705,
"grad_norm": 0.4076298707032981,
"learning_rate": 3.992146031710637e-05,
"loss": 1.0237,
"step": 80
},
{
"epoch": 0.6776283009466866,
"grad_norm": 0.37566110161128824,
"learning_rate": 3.9864767154838864e-05,
"loss": 1.0145,
"step": 85
},
{
"epoch": 0.7174887892376681,
"grad_norm": 0.34530643882424805,
"learning_rate": 3.9792828925532376e-05,
"loss": 1.0296,
"step": 90
},
{
"epoch": 0.7573492775286498,
"grad_norm": 0.3432558970267317,
"learning_rate": 3.970570083764316e-05,
"loss": 1.0059,
"step": 95
},
{
"epoch": 0.7972097658196313,
"grad_norm": 0.3408833181353496,
"learning_rate": 3.9603449756970877e-05,
"loss": 1.004,
"step": 100
},
{
"epoch": 0.8370702541106129,
"grad_norm": 0.3589480700028462,
"learning_rate": 3.948615415534294e-05,
"loss": 0.9936,
"step": 105
},
{
"epoch": 0.8769307424015944,
"grad_norm": 0.33854687709123976,
"learning_rate": 3.9353904050391874e-05,
"loss": 1.006,
"step": 110
},
{
"epoch": 0.916791230692576,
"grad_norm": 0.3606947276733311,
"learning_rate": 3.9206800936472e-05,
"loss": 1.0033,
"step": 115
},
{
"epoch": 0.9566517189835575,
"grad_norm": 0.3524202550527317,
"learning_rate": 3.904495770676831e-05,
"loss": 0.9917,
"step": 120
},
{
"epoch": 0.9965122072745392,
"grad_norm": 0.3695008401673435,
"learning_rate": 3.886849856665746e-05,
"loss": 1.0137,
"step": 125
},
{
"epoch": 1.0318883906327851,
"grad_norm": 0.5103901242908977,
"learning_rate": 3.8677558938387276e-05,
"loss": 0.885,
"step": 130
},
{
"epoch": 1.0717488789237668,
"grad_norm": 0.39411785206718664,
"learning_rate": 3.8472285357147966e-05,
"loss": 0.8679,
"step": 135
},
{
"epoch": 1.1116093672147485,
"grad_norm": 0.3898193416463423,
"learning_rate": 3.825283535861476e-05,
"loss": 0.8733,
"step": 140
},
{
"epoch": 1.15146985550573,
"grad_norm": 0.7348802698342378,
"learning_rate": 3.801937735804838e-05,
"loss": 0.8434,
"step": 145
},
{
"epoch": 1.1913303437967115,
"grad_norm": 0.36945102723915507,
"learning_rate": 3.777209052104598e-05,
"loss": 0.8461,
"step": 150
},
{
"epoch": 1.2311908320876932,
"grad_norm": 0.4602742420015167,
"learning_rate": 3.7511164626041823e-05,
"loss": 0.8606,
"step": 155
},
{
"epoch": 1.2710513203786746,
"grad_norm": 0.35621072409911864,
"learning_rate": 3.7236799918663284e-05,
"loss": 0.8555,
"step": 160
},
{
"epoch": 1.310911808669656,
"grad_norm": 0.42801215480497146,
"learning_rate": 3.6949206958053825e-05,
"loss": 0.8437,
"step": 165
},
{
"epoch": 1.3507722969606377,
"grad_norm": 0.36223537090099234,
"learning_rate": 3.6648606455280944e-05,
"loss": 0.8566,
"step": 170
},
{
"epoch": 1.3906327852516194,
"grad_norm": 0.702392888184491,
"learning_rate": 3.633522910395314e-05,
"loss": 0.8665,
"step": 175
},
{
"epoch": 1.4304932735426008,
"grad_norm": 0.36020937464475294,
"learning_rate": 3.6009315403175786e-05,
"loss": 0.8363,
"step": 180
},
{
"epoch": 1.4703537618335825,
"grad_norm": 0.3500105740239205,
"learning_rate": 3.567111547298194e-05,
"loss": 0.853,
"step": 185
},
{
"epoch": 1.5102142501245641,
"grad_norm": 0.4768600668120408,
"learning_rate": 3.532088886237956e-05,
"loss": 0.8496,
"step": 190
},
{
"epoch": 1.5500747384155455,
"grad_norm": 0.36399360324025654,
"learning_rate": 3.495890435016258e-05,
"loss": 0.8636,
"step": 195
},
{
"epoch": 1.5899352267065272,
"grad_norm": 0.3380619981423237,
"learning_rate": 3.458543973863859e-05,
"loss": 0.8538,
"step": 200
},
{
"epoch": 1.6297957149975089,
"grad_norm": 0.3389261022528899,
"learning_rate": 3.420078164043161e-05,
"loss": 0.8591,
"step": 205
},
{
"epoch": 1.6696562032884903,
"grad_norm": 0.395644963130336,
"learning_rate": 3.38052252585233e-05,
"loss": 0.8401,
"step": 210
},
{
"epoch": 1.7095166915794717,
"grad_norm": 0.30378900619983906,
"learning_rate": 3.339907415970168e-05,
"loss": 0.8476,
"step": 215
},
{
"epoch": 1.7493771798704534,
"grad_norm": 0.3900044033629726,
"learning_rate": 3.298264004159104e-05,
"loss": 0.8413,
"step": 220
},
{
"epoch": 1.789237668161435,
"grad_norm": 0.36925152268695366,
"learning_rate": 3.255624249344198e-05,
"loss": 0.8534,
"step": 225
},
{
"epoch": 1.8290981564524165,
"grad_norm": 0.3334498107167973,
"learning_rate": 3.212020875086495e-05,
"loss": 0.8734,
"step": 230
},
{
"epoch": 1.8689586447433981,
"grad_norm": 0.3231836736531515,
"learning_rate": 3.1674873444695804e-05,
"loss": 0.8619,
"step": 235
},
{
"epoch": 1.9088191330343798,
"grad_norm": 0.3352794024716405,
"learning_rate": 3.122057834418582e-05,
"loss": 0.8604,
"step": 240
},
{
"epoch": 1.9486796213253612,
"grad_norm": 0.34760623558396486,
"learning_rate": 3.075767209471345e-05,
"loss": 0.8712,
"step": 245
},
{
"epoch": 1.9885401096163426,
"grad_norm": 0.3494983937948561,
"learning_rate": 3.0286509950219077e-05,
"loss": 0.8449,
"step": 250
},
{
"epoch": 2.023916292974589,
"grad_norm": 0.6438619381464517,
"learning_rate": 2.9807453500567937e-05,
"loss": 0.7382,
"step": 255
},
{
"epoch": 2.0637767812655703,
"grad_norm": 0.48837947056765113,
"learning_rate": 2.9320870394050783e-05,
"loss": 0.6794,
"step": 260
},
{
"epoch": 2.103637269556552,
"grad_norm": 0.4249085778688401,
"learning_rate": 2.8827134055234883e-05,
"loss": 0.6878,
"step": 265
},
{
"epoch": 2.1434977578475336,
"grad_norm": 0.4112164304088742,
"learning_rate": 2.8326623398382174e-05,
"loss": 0.6895,
"step": 270
},
{
"epoch": 2.183358246138515,
"grad_norm": 0.4393590314899133,
"learning_rate": 2.781972253665431e-05,
"loss": 0.6684,
"step": 275
},
{
"epoch": 2.223218734429497,
"grad_norm": 0.37274870892195433,
"learning_rate": 2.7306820487327906e-05,
"loss": 0.6719,
"step": 280
},
{
"epoch": 2.2630792227204783,
"grad_norm": 0.38062444497520725,
"learning_rate": 2.6788310873246133e-05,
"loss": 0.6735,
"step": 285
},
{
"epoch": 2.30293971101146,
"grad_norm": 0.3754103984568679,
"learning_rate": 2.62645916207358e-05,
"loss": 0.6757,
"step": 290
},
{
"epoch": 2.3428001993024417,
"grad_norm": 0.38094693535399177,
"learning_rate": 2.5736064654221808e-05,
"loss": 0.6544,
"step": 295
},
{
"epoch": 2.382660687593423,
"grad_norm": 0.37267334400373975,
"learning_rate": 2.5203135587773196e-05,
"loss": 0.6612,
"step": 300
},
{
"epoch": 2.4225211758844045,
"grad_norm": 0.3511334496003159,
"learning_rate": 2.4666213413817696e-05,
"loss": 0.6763,
"step": 305
},
{
"epoch": 2.4623816641753864,
"grad_norm": 0.368576714561806,
"learning_rate": 2.4125710189263555e-05,
"loss": 0.6563,
"step": 310
},
{
"epoch": 2.502242152466368,
"grad_norm": 0.35196724604768753,
"learning_rate": 2.3582040719269504e-05,
"loss": 0.65,
"step": 315
},
{
"epoch": 2.5421026407573493,
"grad_norm": 0.3540473065272315,
"learning_rate": 2.3035622238905694e-05,
"loss": 0.6679,
"step": 320
},
{
"epoch": 2.5819631290483307,
"grad_norm": 0.3661596143769676,
"learning_rate": 2.2486874092949708e-05,
"loss": 0.6738,
"step": 325
},
{
"epoch": 2.621823617339312,
"grad_norm": 0.35913068756682465,
"learning_rate": 2.1936217414063584e-05,
"loss": 0.6887,
"step": 330
},
{
"epoch": 2.661684105630294,
"grad_norm": 0.36799867292080646,
"learning_rate": 2.138407479959869e-05,
"loss": 0.6709,
"step": 335
},
{
"epoch": 2.7015445939212754,
"grad_norm": 0.35691876781155074,
"learning_rate": 2.0830869987276537e-05,
"loss": 0.665,
"step": 340
},
{
"epoch": 2.741405082212257,
"grad_norm": 0.38987523468576574,
"learning_rate": 2.027702752999444e-05,
"loss": 0.6528,
"step": 345
},
{
"epoch": 2.7812655705032387,
"grad_norm": 0.38505381731754873,
"learning_rate": 1.9722972470005573e-05,
"loss": 0.6771,
"step": 350
},
{
"epoch": 2.82112605879422,
"grad_norm": 0.35842950010166197,
"learning_rate": 1.916913001272347e-05,
"loss": 0.6638,
"step": 355
},
{
"epoch": 2.8609865470852016,
"grad_norm": 0.37422580701088903,
"learning_rate": 1.8615925200401318e-05,
"loss": 0.6753,
"step": 360
},
{
"epoch": 2.9008470353761835,
"grad_norm": 0.36860495212005495,
"learning_rate": 1.806378258593642e-05,
"loss": 0.6681,
"step": 365
},
{
"epoch": 2.940707523667165,
"grad_norm": 0.3647903293380729,
"learning_rate": 1.7513125907050302e-05,
"loss": 0.6658,
"step": 370
},
{
"epoch": 2.9805680119581464,
"grad_norm": 0.35870478733795147,
"learning_rate": 1.6964377761094313e-05,
"loss": 0.667,
"step": 375
},
{
"epoch": 3.0159441953163926,
"grad_norm": 0.6451047782511159,
"learning_rate": 1.6417959280730506e-05,
"loss": 0.612,
"step": 380
},
{
"epoch": 3.055804683607374,
"grad_norm": 0.7456786595663745,
"learning_rate": 1.5874289810736452e-05,
"loss": 0.5282,
"step": 385
},
{
"epoch": 3.095665171898356,
"grad_norm": 0.4611479200453808,
"learning_rate": 1.5333786586182308e-05,
"loss": 0.4945,
"step": 390
},
{
"epoch": 3.1355256601893373,
"grad_norm": 0.4624423710225065,
"learning_rate": 1.4796864412226812e-05,
"loss": 0.5178,
"step": 395
},
{
"epoch": 3.1753861484803187,
"grad_norm": 0.3988674416095005,
"learning_rate": 1.4263935345778202e-05,
"loss": 0.5015,
"step": 400
},
{
"epoch": 3.2152466367713006,
"grad_norm": 0.4171540538418279,
"learning_rate": 1.37354083792642e-05,
"loss": 0.4988,
"step": 405
},
{
"epoch": 3.255107125062282,
"grad_norm": 0.4043531142190362,
"learning_rate": 1.3211689126753879e-05,
"loss": 0.4966,
"step": 410
},
{
"epoch": 3.2949676133532635,
"grad_norm": 0.406846754086433,
"learning_rate": 1.26931795126721e-05,
"loss": 0.5081,
"step": 415
},
{
"epoch": 3.334828101644245,
"grad_norm": 0.42193109699205417,
"learning_rate": 1.2180277463345697e-05,
"loss": 0.5088,
"step": 420
},
{
"epoch": 3.374688589935227,
"grad_norm": 0.4039970787325296,
"learning_rate": 1.167337660161783e-05,
"loss": 0.5023,
"step": 425
},
{
"epoch": 3.4145490782262082,
"grad_norm": 0.3891294704118612,
"learning_rate": 1.1172865944765122e-05,
"loss": 0.5054,
"step": 430
},
{
"epoch": 3.4544095665171897,
"grad_norm": 0.4123628750841747,
"learning_rate": 1.067912960594923e-05,
"loss": 0.5078,
"step": 435
},
{
"epoch": 3.4942700548081715,
"grad_norm": 0.4116103758152132,
"learning_rate": 1.0192546499432066e-05,
"loss": 0.5008,
"step": 440
},
{
"epoch": 3.534130543099153,
"grad_norm": 0.39334581361661547,
"learning_rate": 9.713490049780931e-06,
"loss": 0.5117,
"step": 445
},
{
"epoch": 3.5739910313901344,
"grad_norm": 0.3977163931160735,
"learning_rate": 9.242327905286552e-06,
"loss": 0.5004,
"step": 450
},
{
"epoch": 3.6138515196811163,
"grad_norm": 0.41291346064789947,
"learning_rate": 8.779421655814189e-06,
"loss": 0.4899,
"step": 455
},
{
"epoch": 3.6537120079720977,
"grad_norm": 0.39864981854696013,
"learning_rate": 8.325126555304208e-06,
"loss": 0.4925,
"step": 460
},
{
"epoch": 3.693572496263079,
"grad_norm": 0.3783439845572638,
"learning_rate": 7.879791249135059e-06,
"loss": 0.4936,
"step": 465
},
{
"epoch": 3.733432984554061,
"grad_norm": 0.36028247570266225,
"learning_rate": 7.443757506558033e-06,
"loss": 0.5071,
"step": 470
},
{
"epoch": 3.7732934728450425,
"grad_norm": 0.36974308586061955,
"learning_rate": 7.0173599584089625e-06,
"loss": 0.4902,
"step": 475
},
{
"epoch": 3.813153961136024,
"grad_norm": 0.3709767672807715,
"learning_rate": 6.600925840298331e-06,
"loss": 0.505,
"step": 480
},
{
"epoch": 3.8530144494270053,
"grad_norm": 0.3610583139391416,
"learning_rate": 6.1947747414767035e-06,
"loss": 0.5208,
"step": 485
},
{
"epoch": 3.892874937717987,
"grad_norm": 0.3791524633277678,
"learning_rate": 5.799218359568395e-06,
"loss": 0.5301,
"step": 490
},
{
"epoch": 3.9327354260089686,
"grad_norm": 0.363724718810471,
"learning_rate": 5.414560261361415e-06,
"loss": 0.4855,
"step": 495
},
{
"epoch": 3.97259591429995,
"grad_norm": 0.3867971355298049,
"learning_rate": 5.041095649837429e-06,
"loss": 0.4872,
"step": 500
},
{
"epoch": 4.007972097658197,
"grad_norm": 0.6922635714147061,
"learning_rate": 4.679111137620442e-06,
"loss": 0.4901,
"step": 505
},
{
"epoch": 4.047832585949178,
"grad_norm": 0.5464640833763156,
"learning_rate": 4.328884527018067e-06,
"loss": 0.4086,
"step": 510
},
{
"epoch": 4.08769307424016,
"grad_norm": 0.5514248166968356,
"learning_rate": 3.990684596824219e-06,
"loss": 0.4042,
"step": 515
},
{
"epoch": 4.127553562531141,
"grad_norm": 0.46194805338366773,
"learning_rate": 3.6647708960468696e-06,
"loss": 0.4028,
"step": 520
},
{
"epoch": 4.1674140508221225,
"grad_norm": 0.45065104707045,
"learning_rate": 3.3513935447190595e-06,
"loss": 0.3937,
"step": 525
},
{
"epoch": 4.207274539113104,
"grad_norm": 0.3884400001553067,
"learning_rate": 3.050793041946183e-06,
"loss": 0.3941,
"step": 530
},
{
"epoch": 4.247135027404085,
"grad_norm": 0.4057662200999109,
"learning_rate": 2.763200081336721e-06,
"loss": 0.3913,
"step": 535
},
{
"epoch": 4.286995515695067,
"grad_norm": 0.3927959847718352,
"learning_rate": 2.488835373958185e-06,
"loss": 0.3993,
"step": 540
},
{
"epoch": 4.326856003986049,
"grad_norm": 0.3915532354537384,
"learning_rate": 2.2279094789540244e-06,
"loss": 0.4047,
"step": 545
},
{
"epoch": 4.36671649227703,
"grad_norm": 0.37576385131138085,
"learning_rate": 1.9806226419516195e-06,
"loss": 0.402,
"step": 550
},
{
"epoch": 4.406576980568012,
"grad_norm": 0.3589030046466532,
"learning_rate": 1.7471646413852439e-06,
"loss": 0.4037,
"step": 555
},
{
"epoch": 4.446437468858994,
"grad_norm": 0.38802398319508147,
"learning_rate": 1.527714642852045e-06,
"loss": 0.3975,
"step": 560
},
{
"epoch": 4.486297957149975,
"grad_norm": 0.3781055143033295,
"learning_rate": 1.3224410616127292e-06,
"loss": 0.3902,
"step": 565
},
{
"epoch": 4.526158445440957,
"grad_norm": 0.380556572113873,
"learning_rate": 1.1315014333425455e-06,
"loss": 0.3955,
"step": 570
},
{
"epoch": 4.566018933731939,
"grad_norm": 0.37627984573745005,
"learning_rate": 9.550422932316938e-07,
"loss": 0.3925,
"step": 575
},
{
"epoch": 4.60587942202292,
"grad_norm": 0.36605761395168335,
"learning_rate": 7.931990635280052e-07,
"loss": 0.3929,
"step": 580
},
{
"epoch": 4.645739910313901,
"grad_norm": 0.37403610200301257,
"learning_rate": 6.460959496081276e-07,
"loss": 0.4068,
"step": 585
},
{
"epoch": 4.685600398604883,
"grad_norm": 0.3673645131484265,
"learning_rate": 5.13845844657066e-07,
"loss": 0.3962,
"step": 590
},
{
"epoch": 4.725460886895864,
"grad_norm": 0.39800543909133185,
"learning_rate": 3.965502430291235e-07,
"loss": 0.3912,
"step": 595
},
{
"epoch": 4.765321375186846,
"grad_norm": 0.37969719371341065,
"learning_rate": 2.942991623568436e-07,
"loss": 0.3801,
"step": 600
},
{
"epoch": 4.805181863477827,
"grad_norm": 0.3777219824944034,
"learning_rate": 2.0717107446762696e-07,
"loss": 0.369,
"step": 605
},
{
"epoch": 4.845042351768809,
"grad_norm": 0.3913050535737085,
"learning_rate": 1.3523284516113955e-07,
"loss": 0.3982,
"step": 610
},
{
"epoch": 4.884902840059791,
"grad_norm": 0.3712369916416077,
"learning_rate": 7.853968289363245e-08,
"loss": 0.3916,
"step": 615
},
{
"epoch": 4.924763328350773,
"grad_norm": 0.38101325204255376,
"learning_rate": 3.7135096408631443e-08,
"loss": 0.3978,
"step": 620
},
{
"epoch": 4.964623816641754,
"grad_norm": 0.37989319704275204,
"learning_rate": 1.1050861346488806e-08,
"loss": 0.389,
"step": 625
},
{
"epoch": 5.0,
"grad_norm": 0.5514474703259893,
"learning_rate": 3.069958583856725e-10,
"loss": 0.4095,
"step": 630
},
{
"epoch": 5.0,
"step": 630,
"total_flos": 1235947560108032.0,
"train_loss": 0.7053156269921197,
"train_runtime": 61423.1886,
"train_samples_per_second": 1.307,
"train_steps_per_second": 0.01
}
],
"logging_steps": 5,
"max_steps": 630,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1235947560108032.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}