{
  "best_metric": 0.8291622996330261,
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
  "epoch": 0.9621166566446182,
  "eval_steps": 25,
  "global_step": 150,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.006414111044297455,
      "grad_norm": 10.31315803527832,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 1.8656,
      "step": 1
    },
    {
      "epoch": 0.006414111044297455,
      "eval_loss": 2.4902782440185547,
      "eval_runtime": 1.0287,
      "eval_samples_per_second": 48.606,
      "eval_steps_per_second": 12.637,
      "step": 1
    },
    {
      "epoch": 0.01282822208859491,
      "grad_norm": 12.256026268005371,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 2.0725,
      "step": 2
    },
    {
      "epoch": 0.019242333132892364,
      "grad_norm": 10.072068214416504,
      "learning_rate": 5e-05,
      "loss": 2.1456,
      "step": 3
    },
    {
      "epoch": 0.02565644417718982,
      "grad_norm": 3.304625988006592,
      "learning_rate": 6.666666666666667e-05,
      "loss": 2.1228,
      "step": 4
    },
    {
      "epoch": 0.032070555221487274,
      "grad_norm": 3.110682487487793,
      "learning_rate": 8.333333333333334e-05,
      "loss": 2.1082,
      "step": 5
    },
    {
      "epoch": 0.03848466626578473,
      "grad_norm": 3.0085599422454834,
      "learning_rate": 0.0001,
      "loss": 1.9762,
      "step": 6
    },
    {
      "epoch": 0.04489877731008218,
      "grad_norm": 2.9738759994506836,
      "learning_rate": 9.998929121859592e-05,
      "loss": 1.8855,
      "step": 7
    },
    {
      "epoch": 0.05131288835437964,
      "grad_norm": 2.297053813934326,
      "learning_rate": 9.99571699711836e-05,
      "loss": 1.7019,
      "step": 8
    },
    {
      "epoch": 0.05772699939867709,
      "grad_norm": 1.9924105405807495,
      "learning_rate": 9.990365154573717e-05,
      "loss": 1.5766,
      "step": 9
    },
    {
      "epoch": 0.06414111044297455,
      "grad_norm": 2.017209529876709,
      "learning_rate": 9.982876141412856e-05,
      "loss": 1.5321,
      "step": 10
    },
    {
      "epoch": 0.070555221487272,
      "grad_norm": 2.017620086669922,
      "learning_rate": 9.973253522000438e-05,
      "loss": 1.4972,
      "step": 11
    },
    {
      "epoch": 0.07696933253156946,
      "grad_norm": 2.0020158290863037,
      "learning_rate": 9.961501876182148e-05,
      "loss": 1.447,
      "step": 12
    },
    {
      "epoch": 0.08338344357586691,
      "grad_norm": 1.0016213655471802,
      "learning_rate": 9.947626797104925e-05,
      "loss": 1.2698,
      "step": 13
    },
    {
      "epoch": 0.08979755462016437,
      "grad_norm": 1.1300530433654785,
      "learning_rate": 9.931634888554937e-05,
      "loss": 1.2114,
      "step": 14
    },
    {
      "epoch": 0.09621166566446182,
      "grad_norm": 0.8754634261131287,
      "learning_rate": 9.913533761814537e-05,
      "loss": 1.1015,
      "step": 15
    },
    {
      "epoch": 0.10262577670875928,
      "grad_norm": 0.9171552658081055,
      "learning_rate": 9.893332032039701e-05,
      "loss": 1.1322,
      "step": 16
    },
    {
      "epoch": 0.10903988775305673,
      "grad_norm": 0.8763850927352905,
      "learning_rate": 9.871039314159677e-05,
      "loss": 1.0756,
      "step": 17
    },
    {
      "epoch": 0.11545399879735418,
      "grad_norm": 0.8467594385147095,
      "learning_rate": 9.846666218300807e-05,
      "loss": 1.0607,
      "step": 18
    },
    {
      "epoch": 0.12186810984165164,
      "grad_norm": 0.9706906676292419,
      "learning_rate": 9.82022434473668e-05,
      "loss": 1.039,
      "step": 19
    },
    {
      "epoch": 0.1282822208859491,
      "grad_norm": 0.7961729168891907,
      "learning_rate": 9.791726278367022e-05,
      "loss": 1.0063,
      "step": 20
    },
    {
      "epoch": 0.13469633193024655,
      "grad_norm": 0.889162540435791,
      "learning_rate": 9.761185582727977e-05,
      "loss": 1.0522,
      "step": 21
    },
    {
      "epoch": 0.141110442974544,
      "grad_norm": 0.7932081818580627,
      "learning_rate": 9.728616793536588e-05,
      "loss": 0.9918,
      "step": 22
    },
    {
      "epoch": 0.14752455401884146,
      "grad_norm": 0.8014475703239441,
      "learning_rate": 9.694035411772594e-05,
      "loss": 1.0652,
      "step": 23
    },
    {
      "epoch": 0.1539386650631389,
      "grad_norm": 1.1154183149337769,
      "learning_rate": 9.657457896300791e-05,
      "loss": 1.0376,
      "step": 24
    },
    {
      "epoch": 0.16035277610743637,
      "grad_norm": 1.1352952718734741,
      "learning_rate": 9.618901656037514e-05,
      "loss": 1.1077,
      "step": 25
    },
    {
      "epoch": 0.16035277610743637,
      "eval_loss": 1.0208630561828613,
      "eval_runtime": 1.0294,
      "eval_samples_per_second": 48.573,
      "eval_steps_per_second": 12.629,
      "step": 25
    },
    {
      "epoch": 0.16676688715173382,
      "grad_norm": 0.5933877229690552,
      "learning_rate": 9.578385041664925e-05,
      "loss": 1.1207,
      "step": 26
    },
    {
      "epoch": 0.17318099819603128,
      "grad_norm": 0.7073403000831604,
      "learning_rate": 9.535927336897098e-05,
      "loss": 1.0747,
      "step": 27
    },
    {
      "epoch": 0.17959510924032873,
      "grad_norm": 0.6395047903060913,
      "learning_rate": 9.491548749301997e-05,
      "loss": 1.0038,
      "step": 28
    },
    {
      "epoch": 0.1860092202846262,
      "grad_norm": 0.5215311050415039,
      "learning_rate": 9.445270400683786e-05,
      "loss": 0.9655,
      "step": 29
    },
    {
      "epoch": 0.19242333132892364,
      "grad_norm": 0.5276326537132263,
      "learning_rate": 9.397114317029975e-05,
      "loss": 0.9261,
      "step": 30
    },
    {
      "epoch": 0.1988374423732211,
      "grad_norm": 0.45551949739456177,
      "learning_rate": 9.34710341802826e-05,
      "loss": 0.9266,
      "step": 31
    },
    {
      "epoch": 0.20525155341751855,
      "grad_norm": 0.5097278356552124,
      "learning_rate": 9.295261506157986e-05,
      "loss": 0.9474,
      "step": 32
    },
    {
      "epoch": 0.211665664461816,
      "grad_norm": 0.4889874756336212,
      "learning_rate": 9.241613255361455e-05,
      "loss": 0.9348,
      "step": 33
    },
    {
      "epoch": 0.21807977550611346,
      "grad_norm": 0.6000301837921143,
      "learning_rate": 9.186184199300464e-05,
      "loss": 0.9084,
      "step": 34
    },
    {
      "epoch": 0.22449388655041091,
      "grad_norm": 0.6574684977531433,
      "learning_rate": 9.129000719203672e-05,
      "loss": 0.9656,
      "step": 35
    },
    {
      "epoch": 0.23090799759470837,
      "grad_norm": 0.6883850693702698,
      "learning_rate": 9.070090031310558e-05,
      "loss": 0.894,
      "step": 36
    },
    {
      "epoch": 0.23732210863900582,
      "grad_norm": 0.7173681855201721,
      "learning_rate": 9.009480173917968e-05,
      "loss": 1.0018,
      "step": 37
    },
    {
      "epoch": 0.24373621968330328,
      "grad_norm": 0.3584338128566742,
      "learning_rate": 8.947199994035401e-05,
      "loss": 1.0744,
      "step": 38
    },
    {
      "epoch": 0.25015033072760073,
      "grad_norm": 0.4074675440788269,
      "learning_rate": 8.883279133655399e-05,
      "loss": 0.9454,
      "step": 39
    },
    {
      "epoch": 0.2565644417718982,
      "grad_norm": 0.5095922350883484,
      "learning_rate": 8.817748015645558e-05,
      "loss": 0.9923,
      "step": 40
    },
    {
      "epoch": 0.26297855281619564,
      "grad_norm": 0.511969268321991,
      "learning_rate": 8.7506378292689e-05,
      "loss": 0.9529,
      "step": 41
    },
    {
      "epoch": 0.2693926638604931,
      "grad_norm": 0.5053486824035645,
      "learning_rate": 8.681980515339464e-05,
      "loss": 0.9017,
      "step": 42
    },
    {
      "epoch": 0.27580677490479055,
      "grad_norm": 0.547535240650177,
      "learning_rate": 8.611808751020213e-05,
      "loss": 0.9219,
      "step": 43
    },
    {
      "epoch": 0.282220885949088,
      "grad_norm": 0.47144246101379395,
      "learning_rate": 8.540155934270471e-05,
      "loss": 0.9067,
      "step": 44
    },
    {
      "epoch": 0.28863499699338546,
      "grad_norm": 0.49577343463897705,
      "learning_rate": 8.467056167950311e-05,
      "loss": 0.9403,
      "step": 45
    },
    {
      "epoch": 0.2950491080376829,
      "grad_norm": 0.5017397403717041,
      "learning_rate": 8.392544243589427e-05,
      "loss": 0.9151,
      "step": 46
    },
    {
      "epoch": 0.30146321908198037,
      "grad_norm": 0.49621936678886414,
      "learning_rate": 8.316655624828267e-05,
      "loss": 0.8998,
      "step": 47
    },
    {
      "epoch": 0.3078773301262778,
      "grad_norm": 0.4927474856376648,
      "learning_rate": 8.239426430539243e-05,
      "loss": 0.9294,
      "step": 48
    },
    {
      "epoch": 0.3142914411705753,
      "grad_norm": 0.7246368527412415,
      "learning_rate": 8.160893417636122e-05,
      "loss": 0.8953,
      "step": 49
    },
    {
      "epoch": 0.32070555221487274,
      "grad_norm": 0.7401767373085022,
      "learning_rate": 8.081093963579707e-05,
      "loss": 0.9852,
      "step": 50
    },
    {
      "epoch": 0.32070555221487274,
      "eval_loss": 0.9231343865394592,
      "eval_runtime": 1.0157,
      "eval_samples_per_second": 49.226,
      "eval_steps_per_second": 12.799,
      "step": 50
    },
    {
      "epoch": 0.3271196632591702,
      "grad_norm": 0.3813113868236542,
      "learning_rate": 8.000066048588211e-05,
      "loss": 0.942,
      "step": 51
    },
    {
      "epoch": 0.33353377430346764,
      "grad_norm": 0.3614146411418915,
      "learning_rate": 7.917848237560709e-05,
      "loss": 0.8877,
      "step": 52
    },
    {
      "epoch": 0.3399478853477651,
      "grad_norm": 0.3518458306789398,
      "learning_rate": 7.834479661722347e-05,
      "loss": 0.9331,
      "step": 53
    },
    {
      "epoch": 0.34636199639206255,
      "grad_norm": 0.36760586500167847,
      "learning_rate": 7.75e-05,
      "loss": 0.8761,
      "step": 54
    },
    {
      "epoch": 0.35277610743636,
      "grad_norm": 0.4043155908584595,
      "learning_rate": 7.664449460137245e-05,
      "loss": 0.8931,
      "step": 55
    },
    {
      "epoch": 0.35919021848065746,
      "grad_norm": 0.397081196308136,
      "learning_rate": 7.577868759557654e-05,
      "loss": 0.8639,
      "step": 56
    },
    {
      "epoch": 0.3656043295249549,
      "grad_norm": 0.3925102651119232,
      "learning_rate": 7.490299105985507e-05,
      "loss": 0.8433,
      "step": 57
    },
    {
      "epoch": 0.3720184405692524,
      "grad_norm": 0.40960493683815,
      "learning_rate": 7.401782177833148e-05,
      "loss": 0.8634,
      "step": 58
    },
    {
      "epoch": 0.37843255161354983,
      "grad_norm": 0.42561712861061096,
      "learning_rate": 7.312360104364318e-05,
      "loss": 0.7908,
      "step": 59
    },
    {
      "epoch": 0.3848466626578473,
      "grad_norm": 0.4647367298603058,
      "learning_rate": 7.222075445642904e-05,
      "loss": 0.8468,
      "step": 60
    },
    {
      "epoch": 0.39126077370214474,
      "grad_norm": 0.4602052867412567,
      "learning_rate": 7.130971172276657e-05,
      "loss": 0.8855,
      "step": 61
    },
    {
      "epoch": 0.3976748847464422,
      "grad_norm": 0.5147804617881775,
      "learning_rate": 7.03909064496551e-05,
      "loss": 0.9345,
      "step": 62
    },
    {
      "epoch": 0.40408899579073965,
      "grad_norm": 0.3348015248775482,
      "learning_rate": 6.946477593864228e-05,
      "loss": 1.0632,
      "step": 63
    },
    {
      "epoch": 0.4105031068350371,
      "grad_norm": 0.3764553368091583,
      "learning_rate": 6.853176097769229e-05,
      "loss": 0.9961,
      "step": 64
    },
    {
      "epoch": 0.41691721787933456,
      "grad_norm": 0.41312095522880554,
      "learning_rate": 6.759230563139466e-05,
      "loss": 0.964,
      "step": 65
    },
    {
      "epoch": 0.423331328923632,
      "grad_norm": 0.4216284453868866,
      "learning_rate": 6.664685702961344e-05,
      "loss": 0.8904,
      "step": 66
    },
    {
      "epoch": 0.42974543996792947,
      "grad_norm": 0.3816414773464203,
      "learning_rate": 6.56958651546778e-05,
      "loss": 0.868,
      "step": 67
    },
    {
      "epoch": 0.4361595510122269,
      "grad_norm": 0.35876405239105225,
      "learning_rate": 6.473978262721463e-05,
      "loss": 0.8476,
      "step": 68
    },
    {
      "epoch": 0.4425736620565244,
      "grad_norm": 0.3981570601463318,
      "learning_rate": 6.377906449072578e-05,
      "loss": 0.8487,
      "step": 69
    },
    {
      "epoch": 0.44898777310082183,
      "grad_norm": 0.44419988989830017,
      "learning_rate": 6.281416799501188e-05,
      "loss": 0.8494,
      "step": 70
    },
    {
      "epoch": 0.4554018841451193,
      "grad_norm": 0.41967856884002686,
      "learning_rate": 6.184555237854625e-05,
      "loss": 0.8249,
      "step": 71
    },
    {
      "epoch": 0.46181599518941674,
      "grad_norm": 0.460245817899704,
      "learning_rate": 6.087367864990233e-05,
      "loss": 0.8508,
      "step": 72
    },
    {
      "epoch": 0.4682301062337142,
      "grad_norm": 0.5052414536476135,
      "learning_rate": 5.989900936833841e-05,
      "loss": 0.849,
      "step": 73
    },
    {
      "epoch": 0.47464421727801165,
      "grad_norm": 0.6499659419059753,
      "learning_rate": 5.8922008423644624e-05,
      "loss": 0.881,
      "step": 74
    },
    {
      "epoch": 0.4810583283223091,
      "grad_norm": 0.6127673983573914,
      "learning_rate": 5.794314081535644e-05,
      "loss": 0.9338,
      "step": 75
    },
    {
      "epoch": 0.4810583283223091,
      "eval_loss": 0.880484402179718,
      "eval_runtime": 1.0107,
      "eval_samples_per_second": 49.469,
      "eval_steps_per_second": 12.862,
      "step": 75
    },
    {
      "epoch": 0.48747243936660656,
      "grad_norm": 0.34252408146858215,
      "learning_rate": 5.696287243144013e-05,
      "loss": 0.9852,
      "step": 76
    },
    {
      "epoch": 0.493886550410904,
      "grad_norm": 0.4102064073085785,
      "learning_rate": 5.598166982655526e-05,
      "loss": 0.9529,
      "step": 77
    },
    {
      "epoch": 0.5003006614552015,
      "grad_norm": 0.4815739095211029,
      "learning_rate": 5.500000000000001e-05,
      "loss": 0.918,
      "step": 78
    },
    {
      "epoch": 0.5067147724994989,
      "grad_norm": 0.4241974651813507,
      "learning_rate": 5.4018330173444754e-05,
      "loss": 0.8912,
      "step": 79
    },
    {
      "epoch": 0.5131288835437964,
      "grad_norm": 0.44345757365226746,
      "learning_rate": 5.303712756855988e-05,
      "loss": 0.9079,
      "step": 80
    },
    {
      "epoch": 0.5195429945880938,
      "grad_norm": 0.48125022649765015,
      "learning_rate": 5.205685918464356e-05,
      "loss": 0.8695,
      "step": 81
    },
    {
      "epoch": 0.5259571056323913,
      "grad_norm": 0.41340336203575134,
      "learning_rate": 5.107799157635538e-05,
      "loss": 0.7949,
      "step": 82
    },
    {
      "epoch": 0.5323712166766887,
      "grad_norm": 0.4205497205257416,
      "learning_rate": 5.0100990631661606e-05,
      "loss": 0.7923,
      "step": 83
    },
    {
      "epoch": 0.5387853277209862,
      "grad_norm": 0.4334363639354706,
      "learning_rate": 4.912632135009769e-05,
      "loss": 0.7829,
      "step": 84
    },
    {
      "epoch": 0.5451994387652837,
      "grad_norm": 0.4538995623588562,
      "learning_rate": 4.8154447621453744e-05,
      "loss": 0.8175,
      "step": 85
    },
    {
      "epoch": 0.5516135498095811,
      "grad_norm": 0.5367742776870728,
      "learning_rate": 4.718583200498814e-05,
      "loss": 0.8247,
      "step": 86
    },
    {
      "epoch": 0.5580276608538786,
      "grad_norm": 0.6540936231613159,
      "learning_rate": 4.6220935509274235e-05,
      "loss": 0.8939,
      "step": 87
    },
    {
      "epoch": 0.564441771898176,
      "grad_norm": 0.32181909680366516,
      "learning_rate": 4.526021737278538e-05,
      "loss": 0.9774,
      "step": 88
    },
    {
      "epoch": 0.5708558829424735,
      "grad_norm": 0.3461693227291107,
      "learning_rate": 4.430413484532222e-05,
      "loss": 0.9442,
      "step": 89
    },
    {
      "epoch": 0.5772699939867709,
      "grad_norm": 0.39385172724723816,
      "learning_rate": 4.3353142970386564e-05,
      "loss": 0.946,
      "step": 90
    },
    {
      "epoch": 0.5836841050310684,
      "grad_norm": 0.35734453797340393,
      "learning_rate": 4.240769436860537e-05,
      "loss": 0.8235,
      "step": 91
    },
    {
      "epoch": 0.5900982160753658,
      "grad_norm": 0.3919447362422943,
      "learning_rate": 4.146823902230772e-05,
      "loss": 0.7648,
      "step": 92
    },
    {
      "epoch": 0.5965123271196633,
      "grad_norm": 0.3950856328010559,
      "learning_rate": 4.053522406135775e-05,
      "loss": 0.8024,
      "step": 93
    },
    {
      "epoch": 0.6029264381639607,
      "grad_norm": 0.40364179015159607,
      "learning_rate": 3.960909355034491e-05,
      "loss": 0.9059,
      "step": 94
    },
    {
      "epoch": 0.6093405492082582,
      "grad_norm": 0.4102337062358856,
      "learning_rate": 3.8690288277233435e-05,
      "loss": 0.8092,
      "step": 95
    },
    {
      "epoch": 0.6157546602525557,
      "grad_norm": 0.4184516668319702,
      "learning_rate": 3.777924554357096e-05,
      "loss": 0.8231,
      "step": 96
    },
    {
      "epoch": 0.6221687712968531,
      "grad_norm": 0.47536927461624146,
      "learning_rate": 3.687639895635684e-05,
      "loss": 0.8002,
      "step": 97
    },
    {
      "epoch": 0.6285828823411506,
      "grad_norm": 0.4901541769504547,
      "learning_rate": 3.598217822166854e-05,
      "loss": 0.8575,
      "step": 98
    },
    {
      "epoch": 0.634996993385448,
      "grad_norm": 0.5419043302536011,
      "learning_rate": 3.509700894014496e-05,
      "loss": 0.8291,
      "step": 99
    },
    {
      "epoch": 0.6414111044297455,
      "grad_norm": 0.6231566667556763,
      "learning_rate": 3.422131240442349e-05,
      "loss": 0.8435,
      "step": 100
    },
    {
      "epoch": 0.6414111044297455,
      "eval_loss": 0.8542066812515259,
      "eval_runtime": 1.0284,
      "eval_samples_per_second": 48.618,
      "eval_steps_per_second": 12.641,
      "step": 100
    },
    {
      "epoch": 0.6478252154740429,
      "grad_norm": 0.3350529968738556,
      "learning_rate": 3.3355505398627566e-05,
      "loss": 0.9972,
      "step": 101
    },
    {
      "epoch": 0.6542393265183404,
      "grad_norm": 0.3845514953136444,
      "learning_rate": 3.250000000000001e-05,
      "loss": 0.8938,
      "step": 102
    },
    {
      "epoch": 0.6606534375626378,
      "grad_norm": 0.39235520362854004,
      "learning_rate": 3.165520338277653e-05,
      "loss": 0.8996,
      "step": 103
    },
    {
      "epoch": 0.6670675486069353,
      "grad_norm": 0.38063177466392517,
      "learning_rate": 3.082151762439293e-05,
      "loss": 0.8417,
      "step": 104
    },
    {
      "epoch": 0.6734816596512327,
      "grad_norm": 0.39507997035980225,
      "learning_rate": 2.9999339514117912e-05,
      "loss": 0.7897,
      "step": 105
    },
    {
      "epoch": 0.6798957706955302,
      "grad_norm": 0.37488269805908203,
      "learning_rate": 2.9189060364202943e-05,
      "loss": 0.8193,
      "step": 106
    },
    {
      "epoch": 0.6863098817398277,
      "grad_norm": 0.45661434531211853,
      "learning_rate": 2.8391065823638806e-05,
      "loss": 0.8756,
      "step": 107
    },
    {
      "epoch": 0.6927239927841251,
      "grad_norm": 0.4090655744075775,
      "learning_rate": 2.760573569460757e-05,
      "loss": 0.811,
      "step": 108
    },
    {
      "epoch": 0.6991381038284226,
      "grad_norm": 0.4572974443435669,
      "learning_rate": 2.6833443751717347e-05,
      "loss": 0.8338,
      "step": 109
    },
    {
      "epoch": 0.70555221487272,
      "grad_norm": 0.4468700587749481,
      "learning_rate": 2.6074557564105727e-05,
      "loss": 0.7846,
      "step": 110
    },
    {
      "epoch": 0.7119663259170175,
      "grad_norm": 0.47316402196884155,
      "learning_rate": 2.53294383204969e-05,
      "loss": 0.7582,
      "step": 111
    },
    {
      "epoch": 0.7183804369613149,
      "grad_norm": 0.5286366939544678,
      "learning_rate": 2.459844065729529e-05,
      "loss": 0.7915,
      "step": 112
    },
    {
      "epoch": 0.7247945480056124,
      "grad_norm": 0.32154494524002075,
      "learning_rate": 2.3881912489797885e-05,
      "loss": 1.0302,
      "step": 113
    },
    {
      "epoch": 0.7312086590499098,
      "grad_norm": 0.36848995089530945,
      "learning_rate": 2.3180194846605367e-05,
      "loss": 0.9236,
      "step": 114
    },
    {
      "epoch": 0.7376227700942073,
      "grad_norm": 0.3812990188598633,
      "learning_rate": 2.2493621707311002e-05,
      "loss": 0.8507,
      "step": 115
    },
    {
      "epoch": 0.7440368811385047,
      "grad_norm": 0.37178778648376465,
      "learning_rate": 2.1822519843544424e-05,
      "loss": 0.9149,
      "step": 116
    },
    {
      "epoch": 0.7504509921828022,
      "grad_norm": 0.36262521147727966,
      "learning_rate": 2.1167208663446025e-05,
      "loss": 0.8503,
      "step": 117
    },
    {
      "epoch": 0.7568651032270997,
      "grad_norm": 0.3932304084300995,
      "learning_rate": 2.0528000059645997e-05,
      "loss": 0.8351,
      "step": 118
    },
    {
      "epoch": 0.7632792142713971,
      "grad_norm": 0.4190768301486969,
      "learning_rate": 1.9905198260820328e-05,
      "loss": 0.8448,
      "step": 119
    },
    {
      "epoch": 0.7696933253156946,
      "grad_norm": 0.40936529636383057,
      "learning_rate": 1.9299099686894423e-05,
      "loss": 0.7507,
      "step": 120
    },
    {
      "epoch": 0.776107436359992,
      "grad_norm": 0.43789592385292053,
      "learning_rate": 1.8709992807963285e-05,
      "loss": 0.8046,
      "step": 121
    },
    {
      "epoch": 0.7825215474042895,
      "grad_norm": 0.44734787940979004,
      "learning_rate": 1.8138158006995364e-05,
      "loss": 0.8344,
      "step": 122
    },
    {
      "epoch": 0.7889356584485869,
      "grad_norm": 0.48436063528060913,
      "learning_rate": 1.758386744638546e-05,
      "loss": 0.8474,
      "step": 123
    },
    {
      "epoch": 0.7953497694928844,
      "grad_norm": 0.5177507996559143,
      "learning_rate": 1.7047384938420154e-05,
      "loss": 0.7806,
      "step": 124
    },
    {
      "epoch": 0.8017638805371818,
      "grad_norm": 0.6930881142616272,
      "learning_rate": 1.6528965819717413e-05,
      "loss": 0.8634,
      "step": 125
    },
    {
      "epoch": 0.8017638805371818,
      "eval_loss": 0.8382258415222168,
      "eval_runtime": 1.018,
      "eval_samples_per_second": 49.116,
      "eval_steps_per_second": 12.77,
      "step": 125
    },
    {
      "epoch": 0.8081779915814793,
      "grad_norm": 0.31197455525398254,
      "learning_rate": 1.602885682970026e-05,
      "loss": 0.9636,
      "step": 126
    },
    {
      "epoch": 0.8145921026257767,
      "grad_norm": 0.3908165693283081,
      "learning_rate": 1.5547295993162156e-05,
      "loss": 0.95,
      "step": 127
    },
    {
      "epoch": 0.8210062136700742,
      "grad_norm": 0.3665495812892914,
      "learning_rate": 1.5084512506980026e-05,
      "loss": 0.8052,
      "step": 128
    },
    {
      "epoch": 0.8274203247143717,
      "grad_norm": 0.37569713592529297,
      "learning_rate": 1.464072663102903e-05,
      "loss": 0.8625,
      "step": 129
    },
    {
      "epoch": 0.8338344357586691,
      "grad_norm": 0.40901851654052734,
      "learning_rate": 1.4216149583350754e-05,
      "loss": 0.7515,
      "step": 130
    },
    {
      "epoch": 0.8402485468029666,
      "grad_norm": 0.42621657252311707,
      "learning_rate": 1.3810983439624881e-05,
      "loss": 0.7998,
      "step": 131
    },
    {
      "epoch": 0.846662657847264,
      "grad_norm": 0.44142094254493713,
      "learning_rate": 1.3425421036992098e-05,
      "loss": 0.7929,
      "step": 132
    },
    {
      "epoch": 0.8530767688915615,
      "grad_norm": 0.43068927526474,
      "learning_rate": 1.305964588227407e-05,
      "loss": 0.7788,
      "step": 133
    },
    {
      "epoch": 0.8594908799358589,
      "grad_norm": 0.4872435927391052,
      "learning_rate": 1.2713832064634126e-05,
      "loss": 0.8294,
      "step": 134
    },
    {
      "epoch": 0.8659049909801564,
      "grad_norm": 0.4502367675304413,
      "learning_rate": 1.2388144172720251e-05,
      "loss": 0.7533,
      "step": 135
    },
    {
      "epoch": 0.8723191020244538,
      "grad_norm": 0.4974122941493988,
      "learning_rate": 1.2082737216329794e-05,
      "loss": 0.7485,
      "step": 136
    },
    {
      "epoch": 0.8787332130687513,
      "grad_norm": 0.5290459394454956,
      "learning_rate": 1.1797756552633215e-05,
      "loss": 0.8153,
      "step": 137
    },
    {
      "epoch": 0.8851473241130488,
      "grad_norm": 0.37030237913131714,
      "learning_rate": 1.1533337816991932e-05,
      "loss": 1.0218,
      "step": 138
    },
    {
      "epoch": 0.8915614351573462,
      "grad_norm": 0.34467270970344543,
      "learning_rate": 1.1289606858403237e-05,
      "loss": 0.9029,
      "step": 139
    },
    {
      "epoch": 0.8979755462016437,
      "grad_norm": 0.36066851019859314,
      "learning_rate": 1.1066679679603e-05,
      "loss": 0.8987,
      "step": 140
    },
    {
      "epoch": 0.9043896572459411,
      "grad_norm": 0.3829849362373352,
      "learning_rate": 1.0864662381854632e-05,
      "loss": 0.8635,
      "step": 141
    },
    {
      "epoch": 0.9108037682902386,
      "grad_norm": 0.3921374976634979,
      "learning_rate": 1.0683651114450641e-05,
      "loss": 0.842,
      "step": 142
    },
    {
      "epoch": 0.917217879334536,
      "grad_norm": 0.41558754444122314,
      "learning_rate": 1.0523732028950771e-05,
      "loss": 0.7996,
      "step": 143
    },
    {
      "epoch": 0.9236319903788335,
      "grad_norm": 0.4474291205406189,
      "learning_rate": 1.0384981238178534e-05,
      "loss": 0.7812,
      "step": 144
    },
    {
      "epoch": 0.9300461014231309,
      "grad_norm": 0.4382871687412262,
      "learning_rate": 1.0267464779995617e-05,
      "loss": 0.7382,
      "step": 145
    },
    {
      "epoch": 0.9364602124674284,
      "grad_norm": 0.43462201952934265,
      "learning_rate": 1.017123858587145e-05,
      "loss": 0.7305,
      "step": 146
    },
    {
      "epoch": 0.9428743235117258,
      "grad_norm": 0.43509241938591003,
      "learning_rate": 1.0096348454262845e-05,
      "loss": 0.693,
      "step": 147
    },
    {
      "epoch": 0.9492884345560233,
      "grad_norm": 0.4872788190841675,
      "learning_rate": 1.00428300288164e-05,
      "loss": 0.8178,
      "step": 148
    },
    {
      "epoch": 0.9557025456003208,
      "grad_norm": 0.5364053845405579,
      "learning_rate": 1.001070878140409e-05,
      "loss": 0.7784,
      "step": 149
    },
    {
      "epoch": 0.9621166566446182,
      "grad_norm": 0.6880702972412109,
      "learning_rate": 1e-05,
      "loss": 0.8225,
      "step": 150
    },
    {
      "epoch": 0.9621166566446182,
      "eval_loss": 0.8291622996330261,
      "eval_runtime": 1.0213,
      "eval_samples_per_second": 48.958,
      "eval_steps_per_second": 12.729,
      "step": 150
    }
  ],
  "logging_steps": 1,
  "max_steps": 150,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 1,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.500379396968284e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}