eeeebbb2's picture
Training in progress, step 150, checkpoint
8407c1f verified
{
"best_metric": 0.8291622996330261,
"best_model_checkpoint": "miner_id_24/checkpoint-150",
"epoch": 0.9621166566446182,
"eval_steps": 25,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006414111044297455,
"grad_norm": 10.31315803527832,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.8656,
"step": 1
},
{
"epoch": 0.006414111044297455,
"eval_loss": 2.4902782440185547,
"eval_runtime": 1.0287,
"eval_samples_per_second": 48.606,
"eval_steps_per_second": 12.637,
"step": 1
},
{
"epoch": 0.01282822208859491,
"grad_norm": 12.256026268005371,
"learning_rate": 3.3333333333333335e-05,
"loss": 2.0725,
"step": 2
},
{
"epoch": 0.019242333132892364,
"grad_norm": 10.072068214416504,
"learning_rate": 5e-05,
"loss": 2.1456,
"step": 3
},
{
"epoch": 0.02565644417718982,
"grad_norm": 3.304625988006592,
"learning_rate": 6.666666666666667e-05,
"loss": 2.1228,
"step": 4
},
{
"epoch": 0.032070555221487274,
"grad_norm": 3.110682487487793,
"learning_rate": 8.333333333333334e-05,
"loss": 2.1082,
"step": 5
},
{
"epoch": 0.03848466626578473,
"grad_norm": 3.0085599422454834,
"learning_rate": 0.0001,
"loss": 1.9762,
"step": 6
},
{
"epoch": 0.04489877731008218,
"grad_norm": 2.9738759994506836,
"learning_rate": 9.998929121859592e-05,
"loss": 1.8855,
"step": 7
},
{
"epoch": 0.05131288835437964,
"grad_norm": 2.297053813934326,
"learning_rate": 9.99571699711836e-05,
"loss": 1.7019,
"step": 8
},
{
"epoch": 0.05772699939867709,
"grad_norm": 1.9924105405807495,
"learning_rate": 9.990365154573717e-05,
"loss": 1.5766,
"step": 9
},
{
"epoch": 0.06414111044297455,
"grad_norm": 2.017209529876709,
"learning_rate": 9.982876141412856e-05,
"loss": 1.5321,
"step": 10
},
{
"epoch": 0.070555221487272,
"grad_norm": 2.017620086669922,
"learning_rate": 9.973253522000438e-05,
"loss": 1.4972,
"step": 11
},
{
"epoch": 0.07696933253156946,
"grad_norm": 2.0020158290863037,
"learning_rate": 9.961501876182148e-05,
"loss": 1.447,
"step": 12
},
{
"epoch": 0.08338344357586691,
"grad_norm": 1.0016213655471802,
"learning_rate": 9.947626797104925e-05,
"loss": 1.2698,
"step": 13
},
{
"epoch": 0.08979755462016437,
"grad_norm": 1.1300530433654785,
"learning_rate": 9.931634888554937e-05,
"loss": 1.2114,
"step": 14
},
{
"epoch": 0.09621166566446182,
"grad_norm": 0.8754634261131287,
"learning_rate": 9.913533761814537e-05,
"loss": 1.1015,
"step": 15
},
{
"epoch": 0.10262577670875928,
"grad_norm": 0.9171552658081055,
"learning_rate": 9.893332032039701e-05,
"loss": 1.1322,
"step": 16
},
{
"epoch": 0.10903988775305673,
"grad_norm": 0.8763850927352905,
"learning_rate": 9.871039314159677e-05,
"loss": 1.0756,
"step": 17
},
{
"epoch": 0.11545399879735418,
"grad_norm": 0.8467594385147095,
"learning_rate": 9.846666218300807e-05,
"loss": 1.0607,
"step": 18
},
{
"epoch": 0.12186810984165164,
"grad_norm": 0.9706906676292419,
"learning_rate": 9.82022434473668e-05,
"loss": 1.039,
"step": 19
},
{
"epoch": 0.1282822208859491,
"grad_norm": 0.7961729168891907,
"learning_rate": 9.791726278367022e-05,
"loss": 1.0063,
"step": 20
},
{
"epoch": 0.13469633193024655,
"grad_norm": 0.889162540435791,
"learning_rate": 9.761185582727977e-05,
"loss": 1.0522,
"step": 21
},
{
"epoch": 0.141110442974544,
"grad_norm": 0.7932081818580627,
"learning_rate": 9.728616793536588e-05,
"loss": 0.9918,
"step": 22
},
{
"epoch": 0.14752455401884146,
"grad_norm": 0.8014475703239441,
"learning_rate": 9.694035411772594e-05,
"loss": 1.0652,
"step": 23
},
{
"epoch": 0.1539386650631389,
"grad_norm": 1.1154183149337769,
"learning_rate": 9.657457896300791e-05,
"loss": 1.0376,
"step": 24
},
{
"epoch": 0.16035277610743637,
"grad_norm": 1.1352952718734741,
"learning_rate": 9.618901656037514e-05,
"loss": 1.1077,
"step": 25
},
{
"epoch": 0.16035277610743637,
"eval_loss": 1.0208630561828613,
"eval_runtime": 1.0294,
"eval_samples_per_second": 48.573,
"eval_steps_per_second": 12.629,
"step": 25
},
{
"epoch": 0.16676688715173382,
"grad_norm": 0.5933877229690552,
"learning_rate": 9.578385041664925e-05,
"loss": 1.1207,
"step": 26
},
{
"epoch": 0.17318099819603128,
"grad_norm": 0.7073403000831604,
"learning_rate": 9.535927336897098e-05,
"loss": 1.0747,
"step": 27
},
{
"epoch": 0.17959510924032873,
"grad_norm": 0.6395047903060913,
"learning_rate": 9.491548749301997e-05,
"loss": 1.0038,
"step": 28
},
{
"epoch": 0.1860092202846262,
"grad_norm": 0.5215311050415039,
"learning_rate": 9.445270400683786e-05,
"loss": 0.9655,
"step": 29
},
{
"epoch": 0.19242333132892364,
"grad_norm": 0.5276326537132263,
"learning_rate": 9.397114317029975e-05,
"loss": 0.9261,
"step": 30
},
{
"epoch": 0.1988374423732211,
"grad_norm": 0.45551949739456177,
"learning_rate": 9.34710341802826e-05,
"loss": 0.9266,
"step": 31
},
{
"epoch": 0.20525155341751855,
"grad_norm": 0.5097278356552124,
"learning_rate": 9.295261506157986e-05,
"loss": 0.9474,
"step": 32
},
{
"epoch": 0.211665664461816,
"grad_norm": 0.4889874756336212,
"learning_rate": 9.241613255361455e-05,
"loss": 0.9348,
"step": 33
},
{
"epoch": 0.21807977550611346,
"grad_norm": 0.6000301837921143,
"learning_rate": 9.186184199300464e-05,
"loss": 0.9084,
"step": 34
},
{
"epoch": 0.22449388655041091,
"grad_norm": 0.6574684977531433,
"learning_rate": 9.129000719203672e-05,
"loss": 0.9656,
"step": 35
},
{
"epoch": 0.23090799759470837,
"grad_norm": 0.6883850693702698,
"learning_rate": 9.070090031310558e-05,
"loss": 0.894,
"step": 36
},
{
"epoch": 0.23732210863900582,
"grad_norm": 0.7173681855201721,
"learning_rate": 9.009480173917968e-05,
"loss": 1.0018,
"step": 37
},
{
"epoch": 0.24373621968330328,
"grad_norm": 0.3584338128566742,
"learning_rate": 8.947199994035401e-05,
"loss": 1.0744,
"step": 38
},
{
"epoch": 0.25015033072760073,
"grad_norm": 0.4074675440788269,
"learning_rate": 8.883279133655399e-05,
"loss": 0.9454,
"step": 39
},
{
"epoch": 0.2565644417718982,
"grad_norm": 0.5095922350883484,
"learning_rate": 8.817748015645558e-05,
"loss": 0.9923,
"step": 40
},
{
"epoch": 0.26297855281619564,
"grad_norm": 0.511969268321991,
"learning_rate": 8.7506378292689e-05,
"loss": 0.9529,
"step": 41
},
{
"epoch": 0.2693926638604931,
"grad_norm": 0.5053486824035645,
"learning_rate": 8.681980515339464e-05,
"loss": 0.9017,
"step": 42
},
{
"epoch": 0.27580677490479055,
"grad_norm": 0.547535240650177,
"learning_rate": 8.611808751020213e-05,
"loss": 0.9219,
"step": 43
},
{
"epoch": 0.282220885949088,
"grad_norm": 0.47144246101379395,
"learning_rate": 8.540155934270471e-05,
"loss": 0.9067,
"step": 44
},
{
"epoch": 0.28863499699338546,
"grad_norm": 0.49577343463897705,
"learning_rate": 8.467056167950311e-05,
"loss": 0.9403,
"step": 45
},
{
"epoch": 0.2950491080376829,
"grad_norm": 0.5017397403717041,
"learning_rate": 8.392544243589427e-05,
"loss": 0.9151,
"step": 46
},
{
"epoch": 0.30146321908198037,
"grad_norm": 0.49621936678886414,
"learning_rate": 8.316655624828267e-05,
"loss": 0.8998,
"step": 47
},
{
"epoch": 0.3078773301262778,
"grad_norm": 0.4927474856376648,
"learning_rate": 8.239426430539243e-05,
"loss": 0.9294,
"step": 48
},
{
"epoch": 0.3142914411705753,
"grad_norm": 0.7246368527412415,
"learning_rate": 8.160893417636122e-05,
"loss": 0.8953,
"step": 49
},
{
"epoch": 0.32070555221487274,
"grad_norm": 0.7401767373085022,
"learning_rate": 8.081093963579707e-05,
"loss": 0.9852,
"step": 50
},
{
"epoch": 0.32070555221487274,
"eval_loss": 0.9231343865394592,
"eval_runtime": 1.0157,
"eval_samples_per_second": 49.226,
"eval_steps_per_second": 12.799,
"step": 50
},
{
"epoch": 0.3271196632591702,
"grad_norm": 0.3813113868236542,
"learning_rate": 8.000066048588211e-05,
"loss": 0.942,
"step": 51
},
{
"epoch": 0.33353377430346764,
"grad_norm": 0.3614146411418915,
"learning_rate": 7.917848237560709e-05,
"loss": 0.8877,
"step": 52
},
{
"epoch": 0.3399478853477651,
"grad_norm": 0.3518458306789398,
"learning_rate": 7.834479661722347e-05,
"loss": 0.9331,
"step": 53
},
{
"epoch": 0.34636199639206255,
"grad_norm": 0.36760586500167847,
"learning_rate": 7.75e-05,
"loss": 0.8761,
"step": 54
},
{
"epoch": 0.35277610743636,
"grad_norm": 0.4043155908584595,
"learning_rate": 7.664449460137245e-05,
"loss": 0.8931,
"step": 55
},
{
"epoch": 0.35919021848065746,
"grad_norm": 0.397081196308136,
"learning_rate": 7.577868759557654e-05,
"loss": 0.8639,
"step": 56
},
{
"epoch": 0.3656043295249549,
"grad_norm": 0.3925102651119232,
"learning_rate": 7.490299105985507e-05,
"loss": 0.8433,
"step": 57
},
{
"epoch": 0.3720184405692524,
"grad_norm": 0.40960493683815,
"learning_rate": 7.401782177833148e-05,
"loss": 0.8634,
"step": 58
},
{
"epoch": 0.37843255161354983,
"grad_norm": 0.42561712861061096,
"learning_rate": 7.312360104364318e-05,
"loss": 0.7908,
"step": 59
},
{
"epoch": 0.3848466626578473,
"grad_norm": 0.4647367298603058,
"learning_rate": 7.222075445642904e-05,
"loss": 0.8468,
"step": 60
},
{
"epoch": 0.39126077370214474,
"grad_norm": 0.4602052867412567,
"learning_rate": 7.130971172276657e-05,
"loss": 0.8855,
"step": 61
},
{
"epoch": 0.3976748847464422,
"grad_norm": 0.5147804617881775,
"learning_rate": 7.03909064496551e-05,
"loss": 0.9345,
"step": 62
},
{
"epoch": 0.40408899579073965,
"grad_norm": 0.3348015248775482,
"learning_rate": 6.946477593864228e-05,
"loss": 1.0632,
"step": 63
},
{
"epoch": 0.4105031068350371,
"grad_norm": 0.3764553368091583,
"learning_rate": 6.853176097769229e-05,
"loss": 0.9961,
"step": 64
},
{
"epoch": 0.41691721787933456,
"grad_norm": 0.41312095522880554,
"learning_rate": 6.759230563139466e-05,
"loss": 0.964,
"step": 65
},
{
"epoch": 0.423331328923632,
"grad_norm": 0.4216284453868866,
"learning_rate": 6.664685702961344e-05,
"loss": 0.8904,
"step": 66
},
{
"epoch": 0.42974543996792947,
"grad_norm": 0.3816414773464203,
"learning_rate": 6.56958651546778e-05,
"loss": 0.868,
"step": 67
},
{
"epoch": 0.4361595510122269,
"grad_norm": 0.35876405239105225,
"learning_rate": 6.473978262721463e-05,
"loss": 0.8476,
"step": 68
},
{
"epoch": 0.4425736620565244,
"grad_norm": 0.3981570601463318,
"learning_rate": 6.377906449072578e-05,
"loss": 0.8487,
"step": 69
},
{
"epoch": 0.44898777310082183,
"grad_norm": 0.44419988989830017,
"learning_rate": 6.281416799501188e-05,
"loss": 0.8494,
"step": 70
},
{
"epoch": 0.4554018841451193,
"grad_norm": 0.41967856884002686,
"learning_rate": 6.184555237854625e-05,
"loss": 0.8249,
"step": 71
},
{
"epoch": 0.46181599518941674,
"grad_norm": 0.460245817899704,
"learning_rate": 6.087367864990233e-05,
"loss": 0.8508,
"step": 72
},
{
"epoch": 0.4682301062337142,
"grad_norm": 0.5052414536476135,
"learning_rate": 5.989900936833841e-05,
"loss": 0.849,
"step": 73
},
{
"epoch": 0.47464421727801165,
"grad_norm": 0.6499659419059753,
"learning_rate": 5.8922008423644624e-05,
"loss": 0.881,
"step": 74
},
{
"epoch": 0.4810583283223091,
"grad_norm": 0.6127673983573914,
"learning_rate": 5.794314081535644e-05,
"loss": 0.9338,
"step": 75
},
{
"epoch": 0.4810583283223091,
"eval_loss": 0.880484402179718,
"eval_runtime": 1.0107,
"eval_samples_per_second": 49.469,
"eval_steps_per_second": 12.862,
"step": 75
},
{
"epoch": 0.48747243936660656,
"grad_norm": 0.34252408146858215,
"learning_rate": 5.696287243144013e-05,
"loss": 0.9852,
"step": 76
},
{
"epoch": 0.493886550410904,
"grad_norm": 0.4102064073085785,
"learning_rate": 5.598166982655526e-05,
"loss": 0.9529,
"step": 77
},
{
"epoch": 0.5003006614552015,
"grad_norm": 0.4815739095211029,
"learning_rate": 5.500000000000001e-05,
"loss": 0.918,
"step": 78
},
{
"epoch": 0.5067147724994989,
"grad_norm": 0.4241974651813507,
"learning_rate": 5.4018330173444754e-05,
"loss": 0.8912,
"step": 79
},
{
"epoch": 0.5131288835437964,
"grad_norm": 0.44345757365226746,
"learning_rate": 5.303712756855988e-05,
"loss": 0.9079,
"step": 80
},
{
"epoch": 0.5195429945880938,
"grad_norm": 0.48125022649765015,
"learning_rate": 5.205685918464356e-05,
"loss": 0.8695,
"step": 81
},
{
"epoch": 0.5259571056323913,
"grad_norm": 0.41340336203575134,
"learning_rate": 5.107799157635538e-05,
"loss": 0.7949,
"step": 82
},
{
"epoch": 0.5323712166766887,
"grad_norm": 0.4205497205257416,
"learning_rate": 5.0100990631661606e-05,
"loss": 0.7923,
"step": 83
},
{
"epoch": 0.5387853277209862,
"grad_norm": 0.4334363639354706,
"learning_rate": 4.912632135009769e-05,
"loss": 0.7829,
"step": 84
},
{
"epoch": 0.5451994387652837,
"grad_norm": 0.4538995623588562,
"learning_rate": 4.8154447621453744e-05,
"loss": 0.8175,
"step": 85
},
{
"epoch": 0.5516135498095811,
"grad_norm": 0.5367742776870728,
"learning_rate": 4.718583200498814e-05,
"loss": 0.8247,
"step": 86
},
{
"epoch": 0.5580276608538786,
"grad_norm": 0.6540936231613159,
"learning_rate": 4.6220935509274235e-05,
"loss": 0.8939,
"step": 87
},
{
"epoch": 0.564441771898176,
"grad_norm": 0.32181909680366516,
"learning_rate": 4.526021737278538e-05,
"loss": 0.9774,
"step": 88
},
{
"epoch": 0.5708558829424735,
"grad_norm": 0.3461693227291107,
"learning_rate": 4.430413484532222e-05,
"loss": 0.9442,
"step": 89
},
{
"epoch": 0.5772699939867709,
"grad_norm": 0.39385172724723816,
"learning_rate": 4.3353142970386564e-05,
"loss": 0.946,
"step": 90
},
{
"epoch": 0.5836841050310684,
"grad_norm": 0.35734453797340393,
"learning_rate": 4.240769436860537e-05,
"loss": 0.8235,
"step": 91
},
{
"epoch": 0.5900982160753658,
"grad_norm": 0.3919447362422943,
"learning_rate": 4.146823902230772e-05,
"loss": 0.7648,
"step": 92
},
{
"epoch": 0.5965123271196633,
"grad_norm": 0.3950856328010559,
"learning_rate": 4.053522406135775e-05,
"loss": 0.8024,
"step": 93
},
{
"epoch": 0.6029264381639607,
"grad_norm": 0.40364179015159607,
"learning_rate": 3.960909355034491e-05,
"loss": 0.9059,
"step": 94
},
{
"epoch": 0.6093405492082582,
"grad_norm": 0.4102337062358856,
"learning_rate": 3.8690288277233435e-05,
"loss": 0.8092,
"step": 95
},
{
"epoch": 0.6157546602525557,
"grad_norm": 0.4184516668319702,
"learning_rate": 3.777924554357096e-05,
"loss": 0.8231,
"step": 96
},
{
"epoch": 0.6221687712968531,
"grad_norm": 0.47536927461624146,
"learning_rate": 3.687639895635684e-05,
"loss": 0.8002,
"step": 97
},
{
"epoch": 0.6285828823411506,
"grad_norm": 0.4901541769504547,
"learning_rate": 3.598217822166854e-05,
"loss": 0.8575,
"step": 98
},
{
"epoch": 0.634996993385448,
"grad_norm": 0.5419043302536011,
"learning_rate": 3.509700894014496e-05,
"loss": 0.8291,
"step": 99
},
{
"epoch": 0.6414111044297455,
"grad_norm": 0.6231566667556763,
"learning_rate": 3.422131240442349e-05,
"loss": 0.8435,
"step": 100
},
{
"epoch": 0.6414111044297455,
"eval_loss": 0.8542066812515259,
"eval_runtime": 1.0284,
"eval_samples_per_second": 48.618,
"eval_steps_per_second": 12.641,
"step": 100
},
{
"epoch": 0.6478252154740429,
"grad_norm": 0.3350529968738556,
"learning_rate": 3.3355505398627566e-05,
"loss": 0.9972,
"step": 101
},
{
"epoch": 0.6542393265183404,
"grad_norm": 0.3845514953136444,
"learning_rate": 3.250000000000001e-05,
"loss": 0.8938,
"step": 102
},
{
"epoch": 0.6606534375626378,
"grad_norm": 0.39235520362854004,
"learning_rate": 3.165520338277653e-05,
"loss": 0.8996,
"step": 103
},
{
"epoch": 0.6670675486069353,
"grad_norm": 0.38063177466392517,
"learning_rate": 3.082151762439293e-05,
"loss": 0.8417,
"step": 104
},
{
"epoch": 0.6734816596512327,
"grad_norm": 0.39507997035980225,
"learning_rate": 2.9999339514117912e-05,
"loss": 0.7897,
"step": 105
},
{
"epoch": 0.6798957706955302,
"grad_norm": 0.37488269805908203,
"learning_rate": 2.9189060364202943e-05,
"loss": 0.8193,
"step": 106
},
{
"epoch": 0.6863098817398277,
"grad_norm": 0.45661434531211853,
"learning_rate": 2.8391065823638806e-05,
"loss": 0.8756,
"step": 107
},
{
"epoch": 0.6927239927841251,
"grad_norm": 0.4090655744075775,
"learning_rate": 2.760573569460757e-05,
"loss": 0.811,
"step": 108
},
{
"epoch": 0.6991381038284226,
"grad_norm": 0.4572974443435669,
"learning_rate": 2.6833443751717347e-05,
"loss": 0.8338,
"step": 109
},
{
"epoch": 0.70555221487272,
"grad_norm": 0.4468700587749481,
"learning_rate": 2.6074557564105727e-05,
"loss": 0.7846,
"step": 110
},
{
"epoch": 0.7119663259170175,
"grad_norm": 0.47316402196884155,
"learning_rate": 2.53294383204969e-05,
"loss": 0.7582,
"step": 111
},
{
"epoch": 0.7183804369613149,
"grad_norm": 0.5286366939544678,
"learning_rate": 2.459844065729529e-05,
"loss": 0.7915,
"step": 112
},
{
"epoch": 0.7247945480056124,
"grad_norm": 0.32154494524002075,
"learning_rate": 2.3881912489797885e-05,
"loss": 1.0302,
"step": 113
},
{
"epoch": 0.7312086590499098,
"grad_norm": 0.36848995089530945,
"learning_rate": 2.3180194846605367e-05,
"loss": 0.9236,
"step": 114
},
{
"epoch": 0.7376227700942073,
"grad_norm": 0.3812990188598633,
"learning_rate": 2.2493621707311002e-05,
"loss": 0.8507,
"step": 115
},
{
"epoch": 0.7440368811385047,
"grad_norm": 0.37178778648376465,
"learning_rate": 2.1822519843544424e-05,
"loss": 0.9149,
"step": 116
},
{
"epoch": 0.7504509921828022,
"grad_norm": 0.36262521147727966,
"learning_rate": 2.1167208663446025e-05,
"loss": 0.8503,
"step": 117
},
{
"epoch": 0.7568651032270997,
"grad_norm": 0.3932304084300995,
"learning_rate": 2.0528000059645997e-05,
"loss": 0.8351,
"step": 118
},
{
"epoch": 0.7632792142713971,
"grad_norm": 0.4190768301486969,
"learning_rate": 1.9905198260820328e-05,
"loss": 0.8448,
"step": 119
},
{
"epoch": 0.7696933253156946,
"grad_norm": 0.40936529636383057,
"learning_rate": 1.9299099686894423e-05,
"loss": 0.7507,
"step": 120
},
{
"epoch": 0.776107436359992,
"grad_norm": 0.43789592385292053,
"learning_rate": 1.8709992807963285e-05,
"loss": 0.8046,
"step": 121
},
{
"epoch": 0.7825215474042895,
"grad_norm": 0.44734787940979004,
"learning_rate": 1.8138158006995364e-05,
"loss": 0.8344,
"step": 122
},
{
"epoch": 0.7889356584485869,
"grad_norm": 0.48436063528060913,
"learning_rate": 1.758386744638546e-05,
"loss": 0.8474,
"step": 123
},
{
"epoch": 0.7953497694928844,
"grad_norm": 0.5177507996559143,
"learning_rate": 1.7047384938420154e-05,
"loss": 0.7806,
"step": 124
},
{
"epoch": 0.8017638805371818,
"grad_norm": 0.6930881142616272,
"learning_rate": 1.6528965819717413e-05,
"loss": 0.8634,
"step": 125
},
{
"epoch": 0.8017638805371818,
"eval_loss": 0.8382258415222168,
"eval_runtime": 1.018,
"eval_samples_per_second": 49.116,
"eval_steps_per_second": 12.77,
"step": 125
},
{
"epoch": 0.8081779915814793,
"grad_norm": 0.31197455525398254,
"learning_rate": 1.602885682970026e-05,
"loss": 0.9636,
"step": 126
},
{
"epoch": 0.8145921026257767,
"grad_norm": 0.3908165693283081,
"learning_rate": 1.5547295993162156e-05,
"loss": 0.95,
"step": 127
},
{
"epoch": 0.8210062136700742,
"grad_norm": 0.3665495812892914,
"learning_rate": 1.5084512506980026e-05,
"loss": 0.8052,
"step": 128
},
{
"epoch": 0.8274203247143717,
"grad_norm": 0.37569713592529297,
"learning_rate": 1.464072663102903e-05,
"loss": 0.8625,
"step": 129
},
{
"epoch": 0.8338344357586691,
"grad_norm": 0.40901851654052734,
"learning_rate": 1.4216149583350754e-05,
"loss": 0.7515,
"step": 130
},
{
"epoch": 0.8402485468029666,
"grad_norm": 0.42621657252311707,
"learning_rate": 1.3810983439624881e-05,
"loss": 0.7998,
"step": 131
},
{
"epoch": 0.846662657847264,
"grad_norm": 0.44142094254493713,
"learning_rate": 1.3425421036992098e-05,
"loss": 0.7929,
"step": 132
},
{
"epoch": 0.8530767688915615,
"grad_norm": 0.43068927526474,
"learning_rate": 1.305964588227407e-05,
"loss": 0.7788,
"step": 133
},
{
"epoch": 0.8594908799358589,
"grad_norm": 0.4872435927391052,
"learning_rate": 1.2713832064634126e-05,
"loss": 0.8294,
"step": 134
},
{
"epoch": 0.8659049909801564,
"grad_norm": 0.4502367675304413,
"learning_rate": 1.2388144172720251e-05,
"loss": 0.7533,
"step": 135
},
{
"epoch": 0.8723191020244538,
"grad_norm": 0.4974122941493988,
"learning_rate": 1.2082737216329794e-05,
"loss": 0.7485,
"step": 136
},
{
"epoch": 0.8787332130687513,
"grad_norm": 0.5290459394454956,
"learning_rate": 1.1797756552633215e-05,
"loss": 0.8153,
"step": 137
},
{
"epoch": 0.8851473241130488,
"grad_norm": 0.37030237913131714,
"learning_rate": 1.1533337816991932e-05,
"loss": 1.0218,
"step": 138
},
{
"epoch": 0.8915614351573462,
"grad_norm": 0.34467270970344543,
"learning_rate": 1.1289606858403237e-05,
"loss": 0.9029,
"step": 139
},
{
"epoch": 0.8979755462016437,
"grad_norm": 0.36066851019859314,
"learning_rate": 1.1066679679603e-05,
"loss": 0.8987,
"step": 140
},
{
"epoch": 0.9043896572459411,
"grad_norm": 0.3829849362373352,
"learning_rate": 1.0864662381854632e-05,
"loss": 0.8635,
"step": 141
},
{
"epoch": 0.9108037682902386,
"grad_norm": 0.3921374976634979,
"learning_rate": 1.0683651114450641e-05,
"loss": 0.842,
"step": 142
},
{
"epoch": 0.917217879334536,
"grad_norm": 0.41558754444122314,
"learning_rate": 1.0523732028950771e-05,
"loss": 0.7996,
"step": 143
},
{
"epoch": 0.9236319903788335,
"grad_norm": 0.4474291205406189,
"learning_rate": 1.0384981238178534e-05,
"loss": 0.7812,
"step": 144
},
{
"epoch": 0.9300461014231309,
"grad_norm": 0.4382871687412262,
"learning_rate": 1.0267464779995617e-05,
"loss": 0.7382,
"step": 145
},
{
"epoch": 0.9364602124674284,
"grad_norm": 0.43462201952934265,
"learning_rate": 1.017123858587145e-05,
"loss": 0.7305,
"step": 146
},
{
"epoch": 0.9428743235117258,
"grad_norm": 0.43509241938591003,
"learning_rate": 1.0096348454262845e-05,
"loss": 0.693,
"step": 147
},
{
"epoch": 0.9492884345560233,
"grad_norm": 0.4872788190841675,
"learning_rate": 1.00428300288164e-05,
"loss": 0.8178,
"step": 148
},
{
"epoch": 0.9557025456003208,
"grad_norm": 0.5364053845405579,
"learning_rate": 1.001070878140409e-05,
"loss": 0.7784,
"step": 149
},
{
"epoch": 0.9621166566446182,
"grad_norm": 0.6880702972412109,
"learning_rate": 1e-05,
"loss": 0.8225,
"step": 150
},
{
"epoch": 0.9621166566446182,
"eval_loss": 0.8291622996330261,
"eval_runtime": 1.0213,
"eval_samples_per_second": 48.958,
"eval_steps_per_second": 12.729,
"step": 150
}
],
"logging_steps": 1,
"max_steps": 150,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.500379396968284e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}