|
{ |
|
"best_metric": 0.8291622996330261, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.9621166566446182, |
|
"eval_steps": 25, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006414111044297455, |
|
"grad_norm": 10.31315803527832, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.8656, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006414111044297455, |
|
"eval_loss": 2.4902782440185547, |
|
"eval_runtime": 1.0287, |
|
"eval_samples_per_second": 48.606, |
|
"eval_steps_per_second": 12.637, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01282822208859491, |
|
"grad_norm": 12.256026268005371, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.0725, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.019242333132892364, |
|
"grad_norm": 10.072068214416504, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1456, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02565644417718982, |
|
"grad_norm": 3.304625988006592, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.1228, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.032070555221487274, |
|
"grad_norm": 3.110682487487793, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 2.1082, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03848466626578473, |
|
"grad_norm": 3.0085599422454834, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9762, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04489877731008218, |
|
"grad_norm": 2.9738759994506836, |
|
"learning_rate": 9.998929121859592e-05, |
|
"loss": 1.8855, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05131288835437964, |
|
"grad_norm": 2.297053813934326, |
|
"learning_rate": 9.99571699711836e-05, |
|
"loss": 1.7019, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05772699939867709, |
|
"grad_norm": 1.9924105405807495, |
|
"learning_rate": 9.990365154573717e-05, |
|
"loss": 1.5766, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06414111044297455, |
|
"grad_norm": 2.017209529876709, |
|
"learning_rate": 9.982876141412856e-05, |
|
"loss": 1.5321, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.070555221487272, |
|
"grad_norm": 2.017620086669922, |
|
"learning_rate": 9.973253522000438e-05, |
|
"loss": 1.4972, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07696933253156946, |
|
"grad_norm": 2.0020158290863037, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 1.447, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08338344357586691, |
|
"grad_norm": 1.0016213655471802, |
|
"learning_rate": 9.947626797104925e-05, |
|
"loss": 1.2698, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08979755462016437, |
|
"grad_norm": 1.1300530433654785, |
|
"learning_rate": 9.931634888554937e-05, |
|
"loss": 1.2114, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09621166566446182, |
|
"grad_norm": 0.8754634261131287, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 1.1015, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10262577670875928, |
|
"grad_norm": 0.9171552658081055, |
|
"learning_rate": 9.893332032039701e-05, |
|
"loss": 1.1322, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10903988775305673, |
|
"grad_norm": 0.8763850927352905, |
|
"learning_rate": 9.871039314159677e-05, |
|
"loss": 1.0756, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11545399879735418, |
|
"grad_norm": 0.8467594385147095, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 1.0607, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12186810984165164, |
|
"grad_norm": 0.9706906676292419, |
|
"learning_rate": 9.82022434473668e-05, |
|
"loss": 1.039, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1282822208859491, |
|
"grad_norm": 0.7961729168891907, |
|
"learning_rate": 9.791726278367022e-05, |
|
"loss": 1.0063, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13469633193024655, |
|
"grad_norm": 0.889162540435791, |
|
"learning_rate": 9.761185582727977e-05, |
|
"loss": 1.0522, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.141110442974544, |
|
"grad_norm": 0.7932081818580627, |
|
"learning_rate": 9.728616793536588e-05, |
|
"loss": 0.9918, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14752455401884146, |
|
"grad_norm": 0.8014475703239441, |
|
"learning_rate": 9.694035411772594e-05, |
|
"loss": 1.0652, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1539386650631389, |
|
"grad_norm": 1.1154183149337769, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 1.0376, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16035277610743637, |
|
"grad_norm": 1.1352952718734741, |
|
"learning_rate": 9.618901656037514e-05, |
|
"loss": 1.1077, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16035277610743637, |
|
"eval_loss": 1.0208630561828613, |
|
"eval_runtime": 1.0294, |
|
"eval_samples_per_second": 48.573, |
|
"eval_steps_per_second": 12.629, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16676688715173382, |
|
"grad_norm": 0.5933877229690552, |
|
"learning_rate": 9.578385041664925e-05, |
|
"loss": 1.1207, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17318099819603128, |
|
"grad_norm": 0.7073403000831604, |
|
"learning_rate": 9.535927336897098e-05, |
|
"loss": 1.0747, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.17959510924032873, |
|
"grad_norm": 0.6395047903060913, |
|
"learning_rate": 9.491548749301997e-05, |
|
"loss": 1.0038, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1860092202846262, |
|
"grad_norm": 0.5215311050415039, |
|
"learning_rate": 9.445270400683786e-05, |
|
"loss": 0.9655, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.19242333132892364, |
|
"grad_norm": 0.5276326537132263, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 0.9261, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1988374423732211, |
|
"grad_norm": 0.45551949739456177, |
|
"learning_rate": 9.34710341802826e-05, |
|
"loss": 0.9266, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.20525155341751855, |
|
"grad_norm": 0.5097278356552124, |
|
"learning_rate": 9.295261506157986e-05, |
|
"loss": 0.9474, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.211665664461816, |
|
"grad_norm": 0.4889874756336212, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 0.9348, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.21807977550611346, |
|
"grad_norm": 0.6000301837921143, |
|
"learning_rate": 9.186184199300464e-05, |
|
"loss": 0.9084, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.22449388655041091, |
|
"grad_norm": 0.6574684977531433, |
|
"learning_rate": 9.129000719203672e-05, |
|
"loss": 0.9656, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23090799759470837, |
|
"grad_norm": 0.6883850693702698, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 0.894, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23732210863900582, |
|
"grad_norm": 0.7173681855201721, |
|
"learning_rate": 9.009480173917968e-05, |
|
"loss": 1.0018, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.24373621968330328, |
|
"grad_norm": 0.3584338128566742, |
|
"learning_rate": 8.947199994035401e-05, |
|
"loss": 1.0744, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.25015033072760073, |
|
"grad_norm": 0.4074675440788269, |
|
"learning_rate": 8.883279133655399e-05, |
|
"loss": 0.9454, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2565644417718982, |
|
"grad_norm": 0.5095922350883484, |
|
"learning_rate": 8.817748015645558e-05, |
|
"loss": 0.9923, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26297855281619564, |
|
"grad_norm": 0.511969268321991, |
|
"learning_rate": 8.7506378292689e-05, |
|
"loss": 0.9529, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2693926638604931, |
|
"grad_norm": 0.5053486824035645, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 0.9017, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.27580677490479055, |
|
"grad_norm": 0.547535240650177, |
|
"learning_rate": 8.611808751020213e-05, |
|
"loss": 0.9219, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.282220885949088, |
|
"grad_norm": 0.47144246101379395, |
|
"learning_rate": 8.540155934270471e-05, |
|
"loss": 0.9067, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28863499699338546, |
|
"grad_norm": 0.49577343463897705, |
|
"learning_rate": 8.467056167950311e-05, |
|
"loss": 0.9403, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2950491080376829, |
|
"grad_norm": 0.5017397403717041, |
|
"learning_rate": 8.392544243589427e-05, |
|
"loss": 0.9151, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.30146321908198037, |
|
"grad_norm": 0.49621936678886414, |
|
"learning_rate": 8.316655624828267e-05, |
|
"loss": 0.8998, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.3078773301262778, |
|
"grad_norm": 0.4927474856376648, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 0.9294, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3142914411705753, |
|
"grad_norm": 0.7246368527412415, |
|
"learning_rate": 8.160893417636122e-05, |
|
"loss": 0.8953, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.32070555221487274, |
|
"grad_norm": 0.7401767373085022, |
|
"learning_rate": 8.081093963579707e-05, |
|
"loss": 0.9852, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32070555221487274, |
|
"eval_loss": 0.9231343865394592, |
|
"eval_runtime": 1.0157, |
|
"eval_samples_per_second": 49.226, |
|
"eval_steps_per_second": 12.799, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3271196632591702, |
|
"grad_norm": 0.3813113868236542, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 0.942, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.33353377430346764, |
|
"grad_norm": 0.3614146411418915, |
|
"learning_rate": 7.917848237560709e-05, |
|
"loss": 0.8877, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3399478853477651, |
|
"grad_norm": 0.3518458306789398, |
|
"learning_rate": 7.834479661722347e-05, |
|
"loss": 0.9331, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.34636199639206255, |
|
"grad_norm": 0.36760586500167847, |
|
"learning_rate": 7.75e-05, |
|
"loss": 0.8761, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.35277610743636, |
|
"grad_norm": 0.4043155908584595, |
|
"learning_rate": 7.664449460137245e-05, |
|
"loss": 0.8931, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35919021848065746, |
|
"grad_norm": 0.397081196308136, |
|
"learning_rate": 7.577868759557654e-05, |
|
"loss": 0.8639, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3656043295249549, |
|
"grad_norm": 0.3925102651119232, |
|
"learning_rate": 7.490299105985507e-05, |
|
"loss": 0.8433, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3720184405692524, |
|
"grad_norm": 0.40960493683815, |
|
"learning_rate": 7.401782177833148e-05, |
|
"loss": 0.8634, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.37843255161354983, |
|
"grad_norm": 0.42561712861061096, |
|
"learning_rate": 7.312360104364318e-05, |
|
"loss": 0.7908, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3848466626578473, |
|
"grad_norm": 0.4647367298603058, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 0.8468, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39126077370214474, |
|
"grad_norm": 0.4602052867412567, |
|
"learning_rate": 7.130971172276657e-05, |
|
"loss": 0.8855, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3976748847464422, |
|
"grad_norm": 0.5147804617881775, |
|
"learning_rate": 7.03909064496551e-05, |
|
"loss": 0.9345, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.40408899579073965, |
|
"grad_norm": 0.3348015248775482, |
|
"learning_rate": 6.946477593864228e-05, |
|
"loss": 1.0632, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4105031068350371, |
|
"grad_norm": 0.3764553368091583, |
|
"learning_rate": 6.853176097769229e-05, |
|
"loss": 0.9961, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.41691721787933456, |
|
"grad_norm": 0.41312095522880554, |
|
"learning_rate": 6.759230563139466e-05, |
|
"loss": 0.964, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.423331328923632, |
|
"grad_norm": 0.4216284453868866, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 0.8904, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.42974543996792947, |
|
"grad_norm": 0.3816414773464203, |
|
"learning_rate": 6.56958651546778e-05, |
|
"loss": 0.868, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.4361595510122269, |
|
"grad_norm": 0.35876405239105225, |
|
"learning_rate": 6.473978262721463e-05, |
|
"loss": 0.8476, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4425736620565244, |
|
"grad_norm": 0.3981570601463318, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 0.8487, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.44898777310082183, |
|
"grad_norm": 0.44419988989830017, |
|
"learning_rate": 6.281416799501188e-05, |
|
"loss": 0.8494, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4554018841451193, |
|
"grad_norm": 0.41967856884002686, |
|
"learning_rate": 6.184555237854625e-05, |
|
"loss": 0.8249, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.46181599518941674, |
|
"grad_norm": 0.460245817899704, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 0.8508, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4682301062337142, |
|
"grad_norm": 0.5052414536476135, |
|
"learning_rate": 5.989900936833841e-05, |
|
"loss": 0.849, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.47464421727801165, |
|
"grad_norm": 0.6499659419059753, |
|
"learning_rate": 5.8922008423644624e-05, |
|
"loss": 0.881, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.4810583283223091, |
|
"grad_norm": 0.6127673983573914, |
|
"learning_rate": 5.794314081535644e-05, |
|
"loss": 0.9338, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4810583283223091, |
|
"eval_loss": 0.880484402179718, |
|
"eval_runtime": 1.0107, |
|
"eval_samples_per_second": 49.469, |
|
"eval_steps_per_second": 12.862, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.48747243936660656, |
|
"grad_norm": 0.34252408146858215, |
|
"learning_rate": 5.696287243144013e-05, |
|
"loss": 0.9852, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.493886550410904, |
|
"grad_norm": 0.4102064073085785, |
|
"learning_rate": 5.598166982655526e-05, |
|
"loss": 0.9529, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5003006614552015, |
|
"grad_norm": 0.4815739095211029, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.918, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5067147724994989, |
|
"grad_norm": 0.4241974651813507, |
|
"learning_rate": 5.4018330173444754e-05, |
|
"loss": 0.8912, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5131288835437964, |
|
"grad_norm": 0.44345757365226746, |
|
"learning_rate": 5.303712756855988e-05, |
|
"loss": 0.9079, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5195429945880938, |
|
"grad_norm": 0.48125022649765015, |
|
"learning_rate": 5.205685918464356e-05, |
|
"loss": 0.8695, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5259571056323913, |
|
"grad_norm": 0.41340336203575134, |
|
"learning_rate": 5.107799157635538e-05, |
|
"loss": 0.7949, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5323712166766887, |
|
"grad_norm": 0.4205497205257416, |
|
"learning_rate": 5.0100990631661606e-05, |
|
"loss": 0.7923, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5387853277209862, |
|
"grad_norm": 0.4334363639354706, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 0.7829, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5451994387652837, |
|
"grad_norm": 0.4538995623588562, |
|
"learning_rate": 4.8154447621453744e-05, |
|
"loss": 0.8175, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5516135498095811, |
|
"grad_norm": 0.5367742776870728, |
|
"learning_rate": 4.718583200498814e-05, |
|
"loss": 0.8247, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5580276608538786, |
|
"grad_norm": 0.6540936231613159, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 0.8939, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.564441771898176, |
|
"grad_norm": 0.32181909680366516, |
|
"learning_rate": 4.526021737278538e-05, |
|
"loss": 0.9774, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5708558829424735, |
|
"grad_norm": 0.3461693227291107, |
|
"learning_rate": 4.430413484532222e-05, |
|
"loss": 0.9442, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5772699939867709, |
|
"grad_norm": 0.39385172724723816, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 0.946, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5836841050310684, |
|
"grad_norm": 0.35734453797340393, |
|
"learning_rate": 4.240769436860537e-05, |
|
"loss": 0.8235, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5900982160753658, |
|
"grad_norm": 0.3919447362422943, |
|
"learning_rate": 4.146823902230772e-05, |
|
"loss": 0.7648, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5965123271196633, |
|
"grad_norm": 0.3950856328010559, |
|
"learning_rate": 4.053522406135775e-05, |
|
"loss": 0.8024, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6029264381639607, |
|
"grad_norm": 0.40364179015159607, |
|
"learning_rate": 3.960909355034491e-05, |
|
"loss": 0.9059, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6093405492082582, |
|
"grad_norm": 0.4102337062358856, |
|
"learning_rate": 3.8690288277233435e-05, |
|
"loss": 0.8092, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6157546602525557, |
|
"grad_norm": 0.4184516668319702, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 0.8231, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6221687712968531, |
|
"grad_norm": 0.47536927461624146, |
|
"learning_rate": 3.687639895635684e-05, |
|
"loss": 0.8002, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6285828823411506, |
|
"grad_norm": 0.4901541769504547, |
|
"learning_rate": 3.598217822166854e-05, |
|
"loss": 0.8575, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.634996993385448, |
|
"grad_norm": 0.5419043302536011, |
|
"learning_rate": 3.509700894014496e-05, |
|
"loss": 0.8291, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6414111044297455, |
|
"grad_norm": 0.6231566667556763, |
|
"learning_rate": 3.422131240442349e-05, |
|
"loss": 0.8435, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6414111044297455, |
|
"eval_loss": 0.8542066812515259, |
|
"eval_runtime": 1.0284, |
|
"eval_samples_per_second": 48.618, |
|
"eval_steps_per_second": 12.641, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6478252154740429, |
|
"grad_norm": 0.3350529968738556, |
|
"learning_rate": 3.3355505398627566e-05, |
|
"loss": 0.9972, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6542393265183404, |
|
"grad_norm": 0.3845514953136444, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 0.8938, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6606534375626378, |
|
"grad_norm": 0.39235520362854004, |
|
"learning_rate": 3.165520338277653e-05, |
|
"loss": 0.8996, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6670675486069353, |
|
"grad_norm": 0.38063177466392517, |
|
"learning_rate": 3.082151762439293e-05, |
|
"loss": 0.8417, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6734816596512327, |
|
"grad_norm": 0.39507997035980225, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 0.7897, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6798957706955302, |
|
"grad_norm": 0.37488269805908203, |
|
"learning_rate": 2.9189060364202943e-05, |
|
"loss": 0.8193, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6863098817398277, |
|
"grad_norm": 0.45661434531211853, |
|
"learning_rate": 2.8391065823638806e-05, |
|
"loss": 0.8756, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6927239927841251, |
|
"grad_norm": 0.4090655744075775, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 0.811, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6991381038284226, |
|
"grad_norm": 0.4572974443435669, |
|
"learning_rate": 2.6833443751717347e-05, |
|
"loss": 0.8338, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.70555221487272, |
|
"grad_norm": 0.4468700587749481, |
|
"learning_rate": 2.6074557564105727e-05, |
|
"loss": 0.7846, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7119663259170175, |
|
"grad_norm": 0.47316402196884155, |
|
"learning_rate": 2.53294383204969e-05, |
|
"loss": 0.7582, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7183804369613149, |
|
"grad_norm": 0.5286366939544678, |
|
"learning_rate": 2.459844065729529e-05, |
|
"loss": 0.7915, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7247945480056124, |
|
"grad_norm": 0.32154494524002075, |
|
"learning_rate": 2.3881912489797885e-05, |
|
"loss": 1.0302, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7312086590499098, |
|
"grad_norm": 0.36848995089530945, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 0.9236, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7376227700942073, |
|
"grad_norm": 0.3812990188598633, |
|
"learning_rate": 2.2493621707311002e-05, |
|
"loss": 0.8507, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7440368811385047, |
|
"grad_norm": 0.37178778648376465, |
|
"learning_rate": 2.1822519843544424e-05, |
|
"loss": 0.9149, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7504509921828022, |
|
"grad_norm": 0.36262521147727966, |
|
"learning_rate": 2.1167208663446025e-05, |
|
"loss": 0.8503, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7568651032270997, |
|
"grad_norm": 0.3932304084300995, |
|
"learning_rate": 2.0528000059645997e-05, |
|
"loss": 0.8351, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7632792142713971, |
|
"grad_norm": 0.4190768301486969, |
|
"learning_rate": 1.9905198260820328e-05, |
|
"loss": 0.8448, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7696933253156946, |
|
"grad_norm": 0.40936529636383057, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 0.7507, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.776107436359992, |
|
"grad_norm": 0.43789592385292053, |
|
"learning_rate": 1.8709992807963285e-05, |
|
"loss": 0.8046, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7825215474042895, |
|
"grad_norm": 0.44734787940979004, |
|
"learning_rate": 1.8138158006995364e-05, |
|
"loss": 0.8344, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7889356584485869, |
|
"grad_norm": 0.48436063528060913, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 0.8474, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7953497694928844, |
|
"grad_norm": 0.5177507996559143, |
|
"learning_rate": 1.7047384938420154e-05, |
|
"loss": 0.7806, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8017638805371818, |
|
"grad_norm": 0.6930881142616272, |
|
"learning_rate": 1.6528965819717413e-05, |
|
"loss": 0.8634, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8017638805371818, |
|
"eval_loss": 0.8382258415222168, |
|
"eval_runtime": 1.018, |
|
"eval_samples_per_second": 49.116, |
|
"eval_steps_per_second": 12.77, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8081779915814793, |
|
"grad_norm": 0.31197455525398254, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 0.9636, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8145921026257767, |
|
"grad_norm": 0.3908165693283081, |
|
"learning_rate": 1.5547295993162156e-05, |
|
"loss": 0.95, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.8210062136700742, |
|
"grad_norm": 0.3665495812892914, |
|
"learning_rate": 1.5084512506980026e-05, |
|
"loss": 0.8052, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8274203247143717, |
|
"grad_norm": 0.37569713592529297, |
|
"learning_rate": 1.464072663102903e-05, |
|
"loss": 0.8625, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8338344357586691, |
|
"grad_norm": 0.40901851654052734, |
|
"learning_rate": 1.4216149583350754e-05, |
|
"loss": 0.7515, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8402485468029666, |
|
"grad_norm": 0.42621657252311707, |
|
"learning_rate": 1.3810983439624881e-05, |
|
"loss": 0.7998, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.846662657847264, |
|
"grad_norm": 0.44142094254493713, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 0.7929, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8530767688915615, |
|
"grad_norm": 0.43068927526474, |
|
"learning_rate": 1.305964588227407e-05, |
|
"loss": 0.7788, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8594908799358589, |
|
"grad_norm": 0.4872435927391052, |
|
"learning_rate": 1.2713832064634126e-05, |
|
"loss": 0.8294, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8659049909801564, |
|
"grad_norm": 0.4502367675304413, |
|
"learning_rate": 1.2388144172720251e-05, |
|
"loss": 0.7533, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8723191020244538, |
|
"grad_norm": 0.4974122941493988, |
|
"learning_rate": 1.2082737216329794e-05, |
|
"loss": 0.7485, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8787332130687513, |
|
"grad_norm": 0.5290459394454956, |
|
"learning_rate": 1.1797756552633215e-05, |
|
"loss": 0.8153, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8851473241130488, |
|
"grad_norm": 0.37030237913131714, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 1.0218, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8915614351573462, |
|
"grad_norm": 0.34467270970344543, |
|
"learning_rate": 1.1289606858403237e-05, |
|
"loss": 0.9029, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.8979755462016437, |
|
"grad_norm": 0.36066851019859314, |
|
"learning_rate": 1.1066679679603e-05, |
|
"loss": 0.8987, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9043896572459411, |
|
"grad_norm": 0.3829849362373352, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 0.8635, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.9108037682902386, |
|
"grad_norm": 0.3921374976634979, |
|
"learning_rate": 1.0683651114450641e-05, |
|
"loss": 0.842, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.917217879334536, |
|
"grad_norm": 0.41558754444122314, |
|
"learning_rate": 1.0523732028950771e-05, |
|
"loss": 0.7996, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.9236319903788335, |
|
"grad_norm": 0.4474291205406189, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 0.7812, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9300461014231309, |
|
"grad_norm": 0.4382871687412262, |
|
"learning_rate": 1.0267464779995617e-05, |
|
"loss": 0.7382, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.9364602124674284, |
|
"grad_norm": 0.43462201952934265, |
|
"learning_rate": 1.017123858587145e-05, |
|
"loss": 0.7305, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9428743235117258, |
|
"grad_norm": 0.43509241938591003, |
|
"learning_rate": 1.0096348454262845e-05, |
|
"loss": 0.693, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.9492884345560233, |
|
"grad_norm": 0.4872788190841675, |
|
"learning_rate": 1.00428300288164e-05, |
|
"loss": 0.8178, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9557025456003208, |
|
"grad_norm": 0.5364053845405579, |
|
"learning_rate": 1.001070878140409e-05, |
|
"loss": 0.7784, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.9621166566446182, |
|
"grad_norm": 0.6880702972412109, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8225, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9621166566446182, |
|
"eval_loss": 0.8291622996330261, |
|
"eval_runtime": 1.0213, |
|
"eval_samples_per_second": 48.958, |
|
"eval_steps_per_second": 12.729, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.500379396968284e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|