{ "best_metric": 1.1887668371200562, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 1.7218186709712133, "eval_steps": 25, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008609093354856066, "grad_norm": 3.315861463546753, "learning_rate": 4.347826086956522e-06, "loss": 4.8785, "step": 1 }, { "epoch": 0.008609093354856066, "eval_loss": 5.110793590545654, "eval_runtime": 1.5529, "eval_samples_per_second": 32.198, "eval_steps_per_second": 8.371, "step": 1 }, { "epoch": 0.017218186709712133, "grad_norm": 2.995750665664673, "learning_rate": 8.695652173913044e-06, "loss": 4.678, "step": 2 }, { "epoch": 0.0258272800645682, "grad_norm": 3.4421045780181885, "learning_rate": 1.3043478260869566e-05, "loss": 4.4056, "step": 3 }, { "epoch": 0.034436373419424265, "grad_norm": 2.8293936252593994, "learning_rate": 1.739130434782609e-05, "loss": 4.5363, "step": 4 }, { "epoch": 0.04304546677428033, "grad_norm": 3.2340526580810547, "learning_rate": 2.173913043478261e-05, "loss": 4.763, "step": 5 }, { "epoch": 0.0516545601291364, "grad_norm": 3.372311592102051, "learning_rate": 2.608695652173913e-05, "loss": 4.5034, "step": 6 }, { "epoch": 0.060263653483992465, "grad_norm": 3.8153085708618164, "learning_rate": 3.0434782608695656e-05, "loss": 4.2477, "step": 7 }, { "epoch": 0.06887274683884853, "grad_norm": 4.190725326538086, "learning_rate": 3.478260869565218e-05, "loss": 4.3524, "step": 8 }, { "epoch": 0.0774818401937046, "grad_norm": 5.624767303466797, "learning_rate": 3.91304347826087e-05, "loss": 3.945, "step": 9 }, { "epoch": 0.08609093354856066, "grad_norm": 6.327235698699951, "learning_rate": 4.347826086956522e-05, "loss": 3.976, "step": 10 }, { "epoch": 0.09470002690341674, "grad_norm": 7.578474998474121, "learning_rate": 4.782608695652174e-05, "loss": 4.0276, "step": 11 }, { "epoch": 0.1033091202582728, "grad_norm": 7.572633266448975, "learning_rate": 5.217391304347826e-05, "loss": 4.2388, "step": 12 }, { "epoch": 0.11191821361312887, "grad_norm": 8.599056243896484, "learning_rate": 5.652173913043478e-05, "loss": 4.6677, "step": 13 }, { "epoch": 0.12052730696798493, "grad_norm": 7.896859169006348, "learning_rate": 6.086956521739131e-05, "loss": 3.7006, "step": 14 }, { "epoch": 0.129136400322841, "grad_norm": 7.382732391357422, "learning_rate": 6.521739130434783e-05, "loss": 3.2896, "step": 15 }, { "epoch": 0.13774549367769706, "grad_norm": 6.985969066619873, "learning_rate": 6.956521739130436e-05, "loss": 2.8868, "step": 16 }, { "epoch": 0.14635458703255314, "grad_norm": 7.612574577331543, "learning_rate": 7.391304347826086e-05, "loss": 2.4229, "step": 17 }, { "epoch": 0.1549636803874092, "grad_norm": 7.9593353271484375, "learning_rate": 7.82608695652174e-05, "loss": 1.9384, "step": 18 }, { "epoch": 0.16357277374226525, "grad_norm": 7.119268417358398, "learning_rate": 8.260869565217392e-05, "loss": 1.7058, "step": 19 }, { "epoch": 0.17218186709712133, "grad_norm": 5.961546897888184, "learning_rate": 8.695652173913044e-05, "loss": 1.5168, "step": 20 }, { "epoch": 0.1807909604519774, "grad_norm": 4.6091108322143555, "learning_rate": 9.130434782608696e-05, "loss": 1.5859, "step": 21 }, { "epoch": 0.18940005380683347, "grad_norm": 3.547252655029297, "learning_rate": 9.565217391304348e-05, "loss": 1.6594, "step": 22 }, { "epoch": 0.19800914716168955, "grad_norm": 2.6677463054656982, "learning_rate": 0.0001, "loss": 1.562, "step": 23 }, { "epoch": 0.2066182405165456, "grad_norm": 2.7151801586151123, "learning_rate": 9.999886332607777e-05, "loss": 1.6305, "step": 24 }, { "epoch": 0.21522733387140167, "grad_norm": 3.942925453186035, "learning_rate": 9.999545336173447e-05, "loss": 2.4538, "step": 25 }, { "epoch": 0.21522733387140167, "eval_loss": 1.5649906396865845, "eval_runtime": 1.5525, "eval_samples_per_second": 32.207, "eval_steps_per_second": 8.374, "step": 25 }, { "epoch": 0.22383642722625774, "grad_norm": 2.9639642238616943, "learning_rate": 9.99897702792376e-05, "loss": 1.7593, "step": 26 }, { "epoch": 0.2324455205811138, "grad_norm": 1.685149073600769, "learning_rate": 9.998181436568988e-05, "loss": 1.6018, "step": 27 }, { "epoch": 0.24105461393596986, "grad_norm": 1.0844831466674805, "learning_rate": 9.997158602301483e-05, "loss": 1.4079, "step": 28 }, { "epoch": 0.24966370729082593, "grad_norm": 1.7795896530151367, "learning_rate": 9.995908576793646e-05, "loss": 1.4898, "step": 29 }, { "epoch": 0.258272800645682, "grad_norm": 2.1920080184936523, "learning_rate": 9.994431423195322e-05, "loss": 1.4451, "step": 30 }, { "epoch": 0.2668818940005381, "grad_norm": 1.2790383100509644, "learning_rate": 9.992727216130594e-05, "loss": 1.2006, "step": 31 }, { "epoch": 0.2754909873553941, "grad_norm": 0.9912707209587097, "learning_rate": 9.990796041694028e-05, "loss": 1.0584, "step": 32 }, { "epoch": 0.2841000807102502, "grad_norm": 0.9738203883171082, "learning_rate": 9.98863799744632e-05, "loss": 1.2063, "step": 33 }, { "epoch": 0.29270917406510627, "grad_norm": 0.9909037947654724, "learning_rate": 9.986253192409363e-05, "loss": 1.1359, "step": 34 }, { "epoch": 0.3013182674199623, "grad_norm": 1.0872948169708252, "learning_rate": 9.983641747060745e-05, "loss": 1.3116, "step": 35 }, { "epoch": 0.3099273607748184, "grad_norm": 1.0629243850708008, "learning_rate": 9.980803793327656e-05, "loss": 1.3898, "step": 36 }, { "epoch": 0.31853645412967446, "grad_norm": 1.2320526838302612, "learning_rate": 9.97773947458023e-05, "loss": 1.3583, "step": 37 }, { "epoch": 0.3271455474845305, "grad_norm": 2.0738534927368164, "learning_rate": 9.9744489456243e-05, "loss": 1.8781, "step": 38 }, { "epoch": 0.3357546408393866, "grad_norm": 1.07503080368042, "learning_rate": 9.970932372693575e-05, "loss": 1.4376, "step": 39 }, { "epoch": 0.34436373419424265, "grad_norm": 1.5182719230651855, "learning_rate": 9.967189933441243e-05, "loss": 1.3076, "step": 40 }, { "epoch": 0.35297282754909876, "grad_norm": 1.0016855001449585, "learning_rate": 9.963221816930997e-05, "loss": 1.4264, "step": 41 }, { "epoch": 0.3615819209039548, "grad_norm": 0.7020559906959534, "learning_rate": 9.959028223627485e-05, "loss": 1.2238, "step": 42 }, { "epoch": 0.37019101425881085, "grad_norm": 0.811683177947998, "learning_rate": 9.954609365386179e-05, "loss": 1.2275, "step": 43 }, { "epoch": 0.37880010761366695, "grad_norm": 0.6848633885383606, "learning_rate": 9.949965465442677e-05, "loss": 1.0887, "step": 44 }, { "epoch": 0.387409200968523, "grad_norm": 0.8840026259422302, "learning_rate": 9.94509675840142e-05, "loss": 1.0464, "step": 45 }, { "epoch": 0.3960182943233791, "grad_norm": 1.0079902410507202, "learning_rate": 9.940003490223849e-05, "loss": 1.0644, "step": 46 }, { "epoch": 0.40462738767823514, "grad_norm": 1.0744574069976807, "learning_rate": 9.934685918215964e-05, "loss": 1.1534, "step": 47 }, { "epoch": 0.4132364810330912, "grad_norm": 0.7194790840148926, "learning_rate": 9.929144311015344e-05, "loss": 1.2402, "step": 48 }, { "epoch": 0.4218455743879473, "grad_norm": 0.9537507891654968, "learning_rate": 9.923378948577559e-05, "loss": 1.1124, "step": 49 }, { "epoch": 0.43045466774280333, "grad_norm": 1.8321129083633423, "learning_rate": 9.917390122162037e-05, "loss": 1.7722, "step": 50 }, { "epoch": 0.43045466774280333, "eval_loss": 1.3280543088912964, "eval_runtime": 1.5594, "eval_samples_per_second": 32.064, "eval_steps_per_second": 8.337, "step": 50 }, { "epoch": 0.4390637610976594, "grad_norm": 0.9677706956863403, "learning_rate": 9.911178134317352e-05, "loss": 1.5514, "step": 51 }, { "epoch": 0.4476728544525155, "grad_norm": 1.2125188112258911, "learning_rate": 9.904743298865924e-05, "loss": 1.3899, "step": 52 }, { "epoch": 0.4562819478073715, "grad_norm": 1.2174468040466309, "learning_rate": 9.898085940888186e-05, "loss": 1.2112, "step": 53 }, { "epoch": 0.4648910411622276, "grad_norm": 0.8030359148979187, "learning_rate": 9.891206396706147e-05, "loss": 1.1958, "step": 54 }, { "epoch": 0.47350013451708367, "grad_norm": 0.7176796197891235, "learning_rate": 9.884105013866402e-05, "loss": 1.1852, "step": 55 }, { "epoch": 0.4821092278719397, "grad_norm": 0.692497193813324, "learning_rate": 9.876782151122585e-05, "loss": 1.0353, "step": 56 }, { "epoch": 0.4907183212267958, "grad_norm": 0.5701990723609924, "learning_rate": 9.869238178417235e-05, "loss": 1.0309, "step": 57 }, { "epoch": 0.49932741458165186, "grad_norm": 0.5832139253616333, "learning_rate": 9.861473476863107e-05, "loss": 0.9311, "step": 58 }, { "epoch": 0.5079365079365079, "grad_norm": 0.6962529420852661, "learning_rate": 9.853488438723925e-05, "loss": 0.9799, "step": 59 }, { "epoch": 0.516545601291364, "grad_norm": 0.6478574872016907, "learning_rate": 9.845283467394561e-05, "loss": 1.0114, "step": 60 }, { "epoch": 0.5251546946462201, "grad_norm": 0.8088128566741943, "learning_rate": 9.836858977380658e-05, "loss": 1.4307, "step": 61 }, { "epoch": 0.5337637880010762, "grad_norm": 1.0166041851043701, "learning_rate": 9.828215394277687e-05, "loss": 1.0745, "step": 62 }, { "epoch": 0.5423728813559322, "grad_norm": 1.8822686672210693, "learning_rate": 9.819353154749447e-05, "loss": 1.7973, "step": 63 }, { "epoch": 0.5509819747107882, "grad_norm": 0.7597546577453613, "learning_rate": 9.810272706506007e-05, "loss": 1.3296, "step": 64 }, { "epoch": 0.5595910680656443, "grad_norm": 0.9122503995895386, "learning_rate": 9.800974508281092e-05, "loss": 1.3314, "step": 65 }, { "epoch": 0.5682001614205004, "grad_norm": 0.8029553294181824, "learning_rate": 9.791459029808896e-05, "loss": 1.2401, "step": 66 }, { "epoch": 0.5768092547753565, "grad_norm": 0.8992607593536377, "learning_rate": 9.781726751800364e-05, "loss": 1.1981, "step": 67 }, { "epoch": 0.5854183481302125, "grad_norm": 0.6793228387832642, "learning_rate": 9.771778165918901e-05, "loss": 1.2453, "step": 68 }, { "epoch": 0.5940274414850686, "grad_norm": 0.7238474488258362, "learning_rate": 9.761613774755535e-05, "loss": 1.0891, "step": 69 }, { "epoch": 0.6026365348399246, "grad_norm": 0.6399270296096802, "learning_rate": 9.751234091803527e-05, "loss": 1.0015, "step": 70 }, { "epoch": 0.6112456281947808, "grad_norm": 0.6577760577201843, "learning_rate": 9.740639641432431e-05, "loss": 1.0185, "step": 71 }, { "epoch": 0.6198547215496368, "grad_norm": 0.6468656659126282, "learning_rate": 9.729830958861598e-05, "loss": 1.0839, "step": 72 }, { "epoch": 0.6284638149044929, "grad_norm": 0.8381094932556152, "learning_rate": 9.718808590133146e-05, "loss": 1.2058, "step": 73 }, { "epoch": 0.6370729082593489, "grad_norm": 0.9239258170127869, "learning_rate": 9.707573092084368e-05, "loss": 0.9228, "step": 74 }, { "epoch": 0.645682001614205, "grad_norm": 1.5042378902435303, "learning_rate": 9.6961250323196e-05, "loss": 1.8408, "step": 75 }, { "epoch": 0.645682001614205, "eval_loss": 1.2648259401321411, "eval_runtime": 1.2738, "eval_samples_per_second": 39.254, "eval_steps_per_second": 10.206, "step": 75 }, { "epoch": 0.654291094969061, "grad_norm": 0.7898931503295898, "learning_rate": 9.68446498918156e-05, "loss": 1.3101, "step": 76 }, { "epoch": 0.6629001883239172, "grad_norm": 0.9163423180580139, "learning_rate": 9.672593551722107e-05, "loss": 1.3338, "step": 77 }, { "epoch": 0.6715092816787732, "grad_norm": 0.6937429904937744, "learning_rate": 9.660511319672505e-05, "loss": 1.2016, "step": 78 }, { "epoch": 0.6801183750336293, "grad_norm": 0.9783784747123718, "learning_rate": 9.648218903413114e-05, "loss": 1.1252, "step": 79 }, { "epoch": 0.6887274683884853, "grad_norm": 0.6456884145736694, "learning_rate": 9.635716923942553e-05, "loss": 1.0339, "step": 80 }, { "epoch": 0.6973365617433414, "grad_norm": 0.682026207447052, "learning_rate": 9.623006012846337e-05, "loss": 1.1875, "step": 81 }, { "epoch": 0.7059456550981975, "grad_norm": 0.8329160213470459, "learning_rate": 9.610086812264966e-05, "loss": 1.06, "step": 82 }, { "epoch": 0.7145547484530536, "grad_norm": 0.8541434407234192, "learning_rate": 9.596959974861475e-05, "loss": 1.0262, "step": 83 }, { "epoch": 0.7231638418079096, "grad_norm": 0.6775283217430115, "learning_rate": 9.583626163788476e-05, "loss": 1.0738, "step": 84 }, { "epoch": 0.7317729351627656, "grad_norm": 0.6453855633735657, "learning_rate": 9.570086052654653e-05, "loss": 1.0279, "step": 85 }, { "epoch": 0.7403820285176217, "grad_norm": 0.7698174118995667, "learning_rate": 9.556340325490726e-05, "loss": 1.1105, "step": 86 }, { "epoch": 0.7489911218724778, "grad_norm": 0.9050216674804688, "learning_rate": 9.5423896767149e-05, "loss": 1.1907, "step": 87 }, { "epoch": 0.7576002152273339, "grad_norm": 1.7032601833343506, "learning_rate": 9.528234811097782e-05, "loss": 1.7483, "step": 88 }, { "epoch": 0.7662093085821899, "grad_norm": 0.9380318522453308, "learning_rate": 9.513876443726775e-05, "loss": 1.2805, "step": 89 }, { "epoch": 0.774818401937046, "grad_norm": 0.8037649989128113, "learning_rate": 9.499315299969962e-05, "loss": 1.2735, "step": 90 }, { "epoch": 0.783427495291902, "grad_norm": 1.0178877115249634, "learning_rate": 9.484552115439445e-05, "loss": 1.0856, "step": 91 }, { "epoch": 0.7920365886467582, "grad_norm": 1.3507095575332642, "learning_rate": 9.469587635954198e-05, "loss": 1.0897, "step": 92 }, { "epoch": 0.8006456820016142, "grad_norm": 0.8943363428115845, "learning_rate": 9.454422617502379e-05, "loss": 1.061, "step": 93 }, { "epoch": 0.8092547753564703, "grad_norm": 0.6829045414924622, "learning_rate": 9.439057826203145e-05, "loss": 1.0609, "step": 94 }, { "epoch": 0.8178638687113263, "grad_norm": 0.6709832549095154, "learning_rate": 9.423494038267945e-05, "loss": 1.0547, "step": 95 }, { "epoch": 0.8264729620661824, "grad_norm": 0.7212761044502258, "learning_rate": 9.407732039961304e-05, "loss": 0.9759, "step": 96 }, { "epoch": 0.8350820554210385, "grad_norm": 0.8272529244422913, "learning_rate": 9.391772627561112e-05, "loss": 1.0781, "step": 97 }, { "epoch": 0.8436911487758946, "grad_norm": 0.725784957408905, "learning_rate": 9.375616607318381e-05, "loss": 1.1727, "step": 98 }, { "epoch": 0.8523002421307506, "grad_norm": 0.7475800514221191, "learning_rate": 9.359264795416536e-05, "loss": 0.9373, "step": 99 }, { "epoch": 0.8609093354856067, "grad_norm": 1.408607840538025, "learning_rate": 9.342718017930156e-05, "loss": 1.6735, "step": 100 }, { "epoch": 0.8609093354856067, "eval_loss": 1.2356939315795898, "eval_runtime": 1.271, "eval_samples_per_second": 39.34, "eval_steps_per_second": 10.228, "step": 100 }, { "epoch": 0.8695184288404627, "grad_norm": 0.9433324933052063, "learning_rate": 9.325977110783264e-05, "loss": 1.4261, "step": 101 }, { "epoch": 0.8781275221953188, "grad_norm": 0.8093460202217102, "learning_rate": 9.309042919707086e-05, "loss": 1.2666, "step": 102 }, { "epoch": 0.8867366155501749, "grad_norm": 0.8018280863761902, "learning_rate": 9.29191630019733e-05, "loss": 1.1215, "step": 103 }, { "epoch": 0.895345708905031, "grad_norm": 0.8065201640129089, "learning_rate": 9.274598117470962e-05, "loss": 1.2179, "step": 104 }, { "epoch": 0.903954802259887, "grad_norm": 1.065794587135315, "learning_rate": 9.2570892464225e-05, "loss": 1.1027, "step": 105 }, { "epoch": 0.912563895614743, "grad_norm": 0.7889910936355591, "learning_rate": 9.239390571579819e-05, "loss": 1.1171, "step": 106 }, { "epoch": 0.9211729889695991, "grad_norm": 0.6680665016174316, "learning_rate": 9.221502987059459e-05, "loss": 1.0264, "step": 107 }, { "epoch": 0.9297820823244553, "grad_norm": 0.605944812297821, "learning_rate": 9.203427396521454e-05, "loss": 0.9694, "step": 108 }, { "epoch": 0.9383911756793113, "grad_norm": 0.6106491684913635, "learning_rate": 9.185164713123693e-05, "loss": 0.945, "step": 109 }, { "epoch": 0.9470002690341673, "grad_norm": 0.7006432414054871, "learning_rate": 9.166715859475773e-05, "loss": 1.0542, "step": 110 }, { "epoch": 0.9556093623890234, "grad_norm": 0.8687889575958252, "learning_rate": 9.148081767592397e-05, "loss": 1.0499, "step": 111 }, { "epoch": 0.9642184557438794, "grad_norm": 0.8556933999061584, "learning_rate": 9.129263378846291e-05, "loss": 0.9872, "step": 112 }, { "epoch": 0.9728275490987356, "grad_norm": 1.57082200050354, "learning_rate": 9.110261643920643e-05, "loss": 1.623, "step": 113 }, { "epoch": 0.9814366424535916, "grad_norm": 1.2634196281433105, "learning_rate": 9.091077522761079e-05, "loss": 1.182, "step": 114 }, { "epoch": 0.9900457358084477, "grad_norm": 0.616948127746582, "learning_rate": 9.071711984527162e-05, "loss": 0.9644, "step": 115 }, { "epoch": 0.9986548291633037, "grad_norm": 0.7694154977798462, "learning_rate": 9.052166007543444e-05, "loss": 1.023, "step": 116 }, { "epoch": 1.0072639225181599, "grad_norm": 0.8316962122917175, "learning_rate": 9.032440579250027e-05, "loss": 1.3752, "step": 117 }, { "epoch": 1.0158730158730158, "grad_norm": 0.6713112592697144, "learning_rate": 9.012536696152682e-05, "loss": 1.3118, "step": 118 }, { "epoch": 1.024482109227872, "grad_norm": 0.7449465990066528, "learning_rate": 8.99245536377252e-05, "loss": 1.0325, "step": 119 }, { "epoch": 1.033091202582728, "grad_norm": 0.7751779556274414, "learning_rate": 8.972197596595175e-05, "loss": 1.1722, "step": 120 }, { "epoch": 1.041700295937584, "grad_norm": 0.7328508496284485, "learning_rate": 8.951764418019565e-05, "loss": 1.0416, "step": 121 }, { "epoch": 1.0503093892924402, "grad_norm": 0.62940514087677, "learning_rate": 8.931156860306192e-05, "loss": 1.0258, "step": 122 }, { "epoch": 1.0589184826472962, "grad_norm": 0.6017870306968689, "learning_rate": 8.910375964524987e-05, "loss": 0.8907, "step": 123 }, { "epoch": 1.0675275760021523, "grad_norm": 0.6720871925354004, "learning_rate": 8.889422780502717e-05, "loss": 0.9037, "step": 124 }, { "epoch": 1.0761366693570082, "grad_norm": 0.730586051940918, "learning_rate": 8.868298366769954e-05, "loss": 0.8049, "step": 125 }, { "epoch": 1.0761366693570082, "eval_loss": 1.2327312231063843, "eval_runtime": 1.2749, "eval_samples_per_second": 39.219, "eval_steps_per_second": 10.197, "step": 125 }, { "epoch": 1.0847457627118644, "grad_norm": 0.6370446681976318, "learning_rate": 8.847003790507602e-05, "loss": 0.919, "step": 126 }, { "epoch": 1.0933548560667206, "grad_norm": 0.7401428818702698, "learning_rate": 8.825540127492967e-05, "loss": 0.9626, "step": 127 }, { "epoch": 1.1019639494215765, "grad_norm": 0.8133999109268188, "learning_rate": 8.80390846204543e-05, "loss": 0.8725, "step": 128 }, { "epoch": 1.1105730427764327, "grad_norm": 1.1162930727005005, "learning_rate": 8.782109886971657e-05, "loss": 1.5733, "step": 129 }, { "epoch": 1.1191821361312886, "grad_norm": 0.8581916689872742, "learning_rate": 8.760145503510397e-05, "loss": 1.1653, "step": 130 }, { "epoch": 1.1277912294861447, "grad_norm": 0.7115218639373779, "learning_rate": 8.738016421276847e-05, "loss": 1.0982, "step": 131 }, { "epoch": 1.136400322841001, "grad_norm": 0.6496354341506958, "learning_rate": 8.715723758206591e-05, "loss": 1.0461, "step": 132 }, { "epoch": 1.1450094161958568, "grad_norm": 0.6220307350158691, "learning_rate": 8.693268640499132e-05, "loss": 0.996, "step": 133 }, { "epoch": 1.153618509550713, "grad_norm": 0.60688716173172, "learning_rate": 8.670652202560987e-05, "loss": 0.9926, "step": 134 }, { "epoch": 1.162227602905569, "grad_norm": 0.682790994644165, "learning_rate": 8.647875586948391e-05, "loss": 1.0279, "step": 135 }, { "epoch": 1.170836696260425, "grad_norm": 0.7627474665641785, "learning_rate": 8.624939944309569e-05, "loss": 0.9441, "step": 136 }, { "epoch": 1.1794457896152812, "grad_norm": 0.7125784158706665, "learning_rate": 8.601846433326599e-05, "loss": 0.8664, "step": 137 }, { "epoch": 1.1880548829701372, "grad_norm": 0.7226782441139221, "learning_rate": 8.578596220656893e-05, "loss": 0.9258, "step": 138 }, { "epoch": 1.1966639763249933, "grad_norm": 0.7997826337814331, "learning_rate": 8.555190480874245e-05, "loss": 1.0336, "step": 139 }, { "epoch": 1.2052730696798493, "grad_norm": 0.7626352310180664, "learning_rate": 8.531630396409507e-05, "loss": 0.8054, "step": 140 }, { "epoch": 1.2138821630347054, "grad_norm": 1.2883024215698242, "learning_rate": 8.507917157490836e-05, "loss": 1.1503, "step": 141 }, { "epoch": 1.2224912563895614, "grad_norm": 1.5344481468200684, "learning_rate": 8.484051962083579e-05, "loss": 1.373, "step": 142 }, { "epoch": 1.2311003497444175, "grad_norm": 0.9424051642417908, "learning_rate": 8.460036015829747e-05, "loss": 1.098, "step": 143 }, { "epoch": 1.2397094430992737, "grad_norm": 0.7515062689781189, "learning_rate": 8.43587053198711e-05, "loss": 1.0671, "step": 144 }, { "epoch": 1.2483185364541296, "grad_norm": 0.661628782749176, "learning_rate": 8.411556731367904e-05, "loss": 1.0489, "step": 145 }, { "epoch": 1.2569276298089858, "grad_norm": 0.8060951232910156, "learning_rate": 8.387095842277159e-05, "loss": 1.1073, "step": 146 }, { "epoch": 1.2655367231638417, "grad_norm": 0.9029113054275513, "learning_rate": 8.362489100450638e-05, "loss": 1.054, "step": 147 }, { "epoch": 1.2741458165186978, "grad_norm": 0.821918785572052, "learning_rate": 8.337737748992419e-05, "loss": 0.9042, "step": 148 }, { "epoch": 1.282754909873554, "grad_norm": 0.6375196576118469, "learning_rate": 8.312843038312093e-05, "loss": 0.9154, "step": 149 }, { "epoch": 1.29136400322841, "grad_norm": 0.6537846326828003, "learning_rate": 8.287806226061587e-05, "loss": 0.8683, "step": 150 }, { "epoch": 1.29136400322841, "eval_loss": 1.2015514373779297, "eval_runtime": 1.2706, "eval_samples_per_second": 39.35, "eval_steps_per_second": 10.231, "step": 150 }, { "epoch": 1.299973096583266, "grad_norm": 0.7098111510276794, "learning_rate": 8.262628577071638e-05, "loss": 0.9429, "step": 151 }, { "epoch": 1.308582189938122, "grad_norm": 0.705226719379425, "learning_rate": 8.237311363287896e-05, "loss": 0.9092, "step": 152 }, { "epoch": 1.3171912832929782, "grad_norm": 1.0044783353805542, "learning_rate": 8.211855863706654e-05, "loss": 1.0479, "step": 153 }, { "epoch": 1.3258003766478343, "grad_norm": 1.1548088788986206, "learning_rate": 8.18626336431025e-05, "loss": 1.5881, "step": 154 }, { "epoch": 1.3344094700026903, "grad_norm": 0.8297902941703796, "learning_rate": 8.160535158002092e-05, "loss": 1.1529, "step": 155 }, { "epoch": 1.3430185633575464, "grad_norm": 0.9534130096435547, "learning_rate": 8.13467254454134e-05, "loss": 1.1669, "step": 156 }, { "epoch": 1.3516276567124024, "grad_norm": 0.8651900291442871, "learning_rate": 8.108676830477255e-05, "loss": 1.0837, "step": 157 }, { "epoch": 1.3602367500672585, "grad_norm": 0.7334545254707336, "learning_rate": 8.082549329083179e-05, "loss": 0.9547, "step": 158 }, { "epoch": 1.3688458434221147, "grad_norm": 0.7457680702209473, "learning_rate": 8.056291360290201e-05, "loss": 1.0481, "step": 159 }, { "epoch": 1.3774549367769706, "grad_norm": 0.6921712756156921, "learning_rate": 8.029904250620473e-05, "loss": 0.8894, "step": 160 }, { "epoch": 1.3860640301318268, "grad_norm": 0.7528761625289917, "learning_rate": 8.003389333120192e-05, "loss": 0.9376, "step": 161 }, { "epoch": 1.3946731234866827, "grad_norm": 0.8016011714935303, "learning_rate": 7.976747947292258e-05, "loss": 0.867, "step": 162 }, { "epoch": 1.4032822168415389, "grad_norm": 0.8211402893066406, "learning_rate": 7.949981439028605e-05, "loss": 0.8881, "step": 163 }, { "epoch": 1.411891310196395, "grad_norm": 0.8662030696868896, "learning_rate": 7.923091160542212e-05, "loss": 1.0727, "step": 164 }, { "epoch": 1.420500403551251, "grad_norm": 0.9291104078292847, "learning_rate": 7.896078470298774e-05, "loss": 0.9085, "step": 165 }, { "epoch": 1.4291094969061071, "grad_norm": 0.9801819324493408, "learning_rate": 7.868944732948101e-05, "loss": 1.1554, "step": 166 }, { "epoch": 1.437718590260963, "grad_norm": 1.1721428632736206, "learning_rate": 7.841691319255154e-05, "loss": 1.363, "step": 167 }, { "epoch": 1.4463276836158192, "grad_norm": 1.0181959867477417, "learning_rate": 7.814319606030803e-05, "loss": 1.135, "step": 168 }, { "epoch": 1.4549367769706754, "grad_norm": 0.757427453994751, "learning_rate": 7.78683097606228e-05, "loss": 0.9332, "step": 169 }, { "epoch": 1.4635458703255313, "grad_norm": 0.8897256255149841, "learning_rate": 7.759226818043309e-05, "loss": 1.0629, "step": 170 }, { "epoch": 1.4721549636803875, "grad_norm": 1.0402635335922241, "learning_rate": 7.73150852650396e-05, "loss": 0.9096, "step": 171 }, { "epoch": 1.4807640570352434, "grad_norm": 0.6742547750473022, "learning_rate": 7.703677501740194e-05, "loss": 0.9271, "step": 172 }, { "epoch": 1.4893731503900995, "grad_norm": 0.8658159375190735, "learning_rate": 7.675735149743131e-05, "loss": 0.7919, "step": 173 }, { "epoch": 1.4979822437449557, "grad_norm": 0.8262009024620056, "learning_rate": 7.647682882128002e-05, "loss": 0.9107, "step": 174 }, { "epoch": 1.5065913370998116, "grad_norm": 0.6974323391914368, "learning_rate": 7.619522116062857e-05, "loss": 0.8795, "step": 175 }, { "epoch": 1.5065913370998116, "eval_loss": 1.192854642868042, "eval_runtime": 1.2757, "eval_samples_per_second": 39.194, "eval_steps_per_second": 10.19, "step": 175 }, { "epoch": 1.5152004304546678, "grad_norm": 0.8274783492088318, "learning_rate": 7.591254274196959e-05, "loss": 0.9152, "step": 176 }, { "epoch": 1.5238095238095237, "grad_norm": 1.0990360975265503, "learning_rate": 7.562880784588916e-05, "loss": 0.8394, "step": 177 }, { "epoch": 1.5324186171643799, "grad_norm": 0.8963367342948914, "learning_rate": 7.534403080634538e-05, "loss": 0.7276, "step": 178 }, { "epoch": 1.541027710519236, "grad_norm": 1.506883978843689, "learning_rate": 7.505822600994424e-05, "loss": 1.4906, "step": 179 }, { "epoch": 1.549636803874092, "grad_norm": 1.6557178497314453, "learning_rate": 7.477140789521276e-05, "loss": 1.1603, "step": 180 }, { "epoch": 1.5582458972289481, "grad_norm": 0.9448726773262024, "learning_rate": 7.448359095186973e-05, "loss": 1.1379, "step": 181 }, { "epoch": 1.566854990583804, "grad_norm": 0.754417359828949, "learning_rate": 7.419478972009348e-05, "loss": 1.0106, "step": 182 }, { "epoch": 1.5754640839386602, "grad_norm": 0.844013512134552, "learning_rate": 7.390501878978759e-05, "loss": 0.938, "step": 183 }, { "epoch": 1.5840731772935164, "grad_norm": 0.8120993971824646, "learning_rate": 7.361429279984355e-05, "loss": 0.9858, "step": 184 }, { "epoch": 1.5926822706483723, "grad_norm": 0.8605924844741821, "learning_rate": 7.332262643740144e-05, "loss": 0.9036, "step": 185 }, { "epoch": 1.6012913640032282, "grad_norm": 0.8876140117645264, "learning_rate": 7.303003443710784e-05, "loss": 0.8838, "step": 186 }, { "epoch": 1.6099004573580844, "grad_norm": 0.9637414216995239, "learning_rate": 7.273653158037151e-05, "loss": 0.828, "step": 187 }, { "epoch": 1.6185095507129406, "grad_norm": 0.8308393955230713, "learning_rate": 7.244213269461656e-05, "loss": 0.9496, "step": 188 }, { "epoch": 1.6271186440677967, "grad_norm": 0.8119847178459167, "learning_rate": 7.214685265253351e-05, "loss": 0.9974, "step": 189 }, { "epoch": 1.6357277374226527, "grad_norm": 0.8133478760719299, "learning_rate": 7.185070637132787e-05, "loss": 0.7914, "step": 190 }, { "epoch": 1.6443368307775086, "grad_norm": 1.075223445892334, "learning_rate": 7.15537088119665e-05, "loss": 1.095, "step": 191 }, { "epoch": 1.6529459241323647, "grad_norm": 1.0103572607040405, "learning_rate": 7.12558749784219e-05, "loss": 1.2195, "step": 192 }, { "epoch": 1.661555017487221, "grad_norm": 1.1441484689712524, "learning_rate": 7.095721991691411e-05, "loss": 1.0929, "step": 193 }, { "epoch": 1.670164110842077, "grad_norm": 1.2061221599578857, "learning_rate": 7.065775871515072e-05, "loss": 1.1353, "step": 194 }, { "epoch": 1.678773204196933, "grad_norm": 1.0299855470657349, "learning_rate": 7.035750650156458e-05, "loss": 1.0423, "step": 195 }, { "epoch": 1.687382297551789, "grad_norm": 0.7968188524246216, "learning_rate": 7.005647844454949e-05, "loss": 1.0405, "step": 196 }, { "epoch": 1.695991390906645, "grad_norm": 0.7397557497024536, "learning_rate": 6.975468975169402e-05, "loss": 0.9024, "step": 197 }, { "epoch": 1.7046004842615012, "grad_norm": 0.7559933662414551, "learning_rate": 6.945215566901315e-05, "loss": 0.8593, "step": 198 }, { "epoch": 1.7132095776163574, "grad_norm": 0.7246858477592468, "learning_rate": 6.914889148017809e-05, "loss": 0.9244, "step": 199 }, { "epoch": 1.7218186709712133, "grad_norm": 0.6950869560241699, "learning_rate": 6.884491250574415e-05, "loss": 0.797, "step": 200 }, { "epoch": 1.7218186709712133, "eval_loss": 1.1887668371200562, "eval_runtime": 1.2738, "eval_samples_per_second": 39.253, "eval_steps_per_second": 10.206, "step": 200 } ], "logging_steps": 1, "max_steps": 465, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.248409672001782e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }