{ "best_metric": 0.4293454885482788, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.21344717182497333, "eval_steps": 25, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001422981145499822, "grad_norm": 0.8191930651664734, "learning_rate": 2.9999999999999997e-05, "loss": 2.8579, "step": 1 }, { "epoch": 0.001422981145499822, "eval_loss": 2.6855461597442627, "eval_runtime": 2.0671, "eval_samples_per_second": 24.188, "eval_steps_per_second": 3.386, "step": 1 }, { "epoch": 0.002845962290999644, "grad_norm": 0.9729381799697876, "learning_rate": 5.9999999999999995e-05, "loss": 3.0766, "step": 2 }, { "epoch": 0.004268943436499467, "grad_norm": 0.8968315124511719, "learning_rate": 8.999999999999999e-05, "loss": 2.5414, "step": 3 }, { "epoch": 0.005691924581999288, "grad_norm": 0.882707417011261, "learning_rate": 0.00011999999999999999, "loss": 2.5339, "step": 4 }, { "epoch": 0.007114905727499111, "grad_norm": 0.916093647480011, "learning_rate": 0.00015, "loss": 2.5088, "step": 5 }, { "epoch": 0.008537886872998933, "grad_norm": 0.9488129615783691, "learning_rate": 0.00017999999999999998, "loss": 2.385, "step": 6 }, { "epoch": 0.009960868018498754, "grad_norm": 1.007516860961914, "learning_rate": 0.00020999999999999998, "loss": 2.2796, "step": 7 }, { "epoch": 0.011383849163998577, "grad_norm": 1.0053644180297852, "learning_rate": 0.00023999999999999998, "loss": 2.0712, "step": 8 }, { "epoch": 0.012806830309498399, "grad_norm": 0.915839433670044, "learning_rate": 0.00027, "loss": 1.998, "step": 9 }, { "epoch": 0.014229811454998222, "grad_norm": 0.7682165503501892, "learning_rate": 0.0003, "loss": 1.8872, "step": 10 }, { "epoch": 0.015652792600498042, "grad_norm": 0.8641461133956909, "learning_rate": 0.0002999794957488703, "loss": 1.7376, "step": 11 }, { "epoch": 0.017075773745997867, "grad_norm": 0.8075277805328369, "learning_rate": 0.0002999179886011389, "loss": 1.632, "step": 12 }, { "epoch": 0.018498754891497687, "grad_norm": 0.762394905090332, "learning_rate": 0.0002998154953722457, "loss": 1.347, "step": 13 }, { "epoch": 0.019921736036997508, "grad_norm": 0.7908850312232971, "learning_rate": 0.00029967204408281613, "loss": 1.1928, "step": 14 }, { "epoch": 0.021344717182497332, "grad_norm": 0.8413186073303223, "learning_rate": 0.00029948767395100045, "loss": 1.0229, "step": 15 }, { "epoch": 0.022767698327997153, "grad_norm": 0.8821457028388977, "learning_rate": 0.0002992624353817517, "loss": 1.019, "step": 16 }, { "epoch": 0.024190679473496977, "grad_norm": 0.9173251986503601, "learning_rate": 0.0002989963899530457, "loss": 1.0191, "step": 17 }, { "epoch": 0.025613660618996798, "grad_norm": 0.7435721755027771, "learning_rate": 0.00029868961039904624, "loss": 0.9497, "step": 18 }, { "epoch": 0.02703664176449662, "grad_norm": 0.7625381350517273, "learning_rate": 0.00029834218059022024, "loss": 0.8573, "step": 19 }, { "epoch": 0.028459622909996443, "grad_norm": 0.8063183426856995, "learning_rate": 0.00029795419551040833, "loss": 0.9115, "step": 20 }, { "epoch": 0.029882604055496264, "grad_norm": 0.6584380269050598, "learning_rate": 0.00029752576123085736, "loss": 0.7548, "step": 21 }, { "epoch": 0.031305585200996085, "grad_norm": 0.6741005182266235, "learning_rate": 0.0002970569948812214, "loss": 0.8572, "step": 22 }, { "epoch": 0.03272856634649591, "grad_norm": 0.6349349617958069, "learning_rate": 0.0002965480246175399, "loss": 0.7942, "step": 23 }, { "epoch": 0.03415154749199573, "grad_norm": 0.5690104961395264, "learning_rate": 0.0002959989895872009, "loss": 0.7952, "step": 24 }, { "epoch": 0.03557452863749555, "grad_norm": 0.5119650363922119, "learning_rate": 0.0002954100398908995, "loss": 0.8102, "step": 25 }, { "epoch": 0.03557452863749555, "eval_loss": 0.8686224222183228, "eval_runtime": 1.5871, "eval_samples_per_second": 31.505, "eval_steps_per_second": 4.411, "step": 25 }, { "epoch": 0.036997509782995375, "grad_norm": 0.6452613472938538, "learning_rate": 0.0002947813365416023, "loss": 0.9172, "step": 26 }, { "epoch": 0.0384204909284952, "grad_norm": 0.5750677585601807, "learning_rate": 0.0002941130514205272, "loss": 0.8096, "step": 27 }, { "epoch": 0.039843472073995016, "grad_norm": 0.493135005235672, "learning_rate": 0.0002934053672301536, "loss": 0.6532, "step": 28 }, { "epoch": 0.04126645321949484, "grad_norm": 0.566339373588562, "learning_rate": 0.00029265847744427303, "loss": 0.7725, "step": 29 }, { "epoch": 0.042689434364994665, "grad_norm": 0.4760490655899048, "learning_rate": 0.00029187258625509513, "loss": 0.8217, "step": 30 }, { "epoch": 0.04411241551049449, "grad_norm": 0.4571889340877533, "learning_rate": 0.00029104790851742417, "loss": 0.746, "step": 31 }, { "epoch": 0.045535396655994306, "grad_norm": 0.4935093820095062, "learning_rate": 0.0002901846696899191, "loss": 0.7695, "step": 32 }, { "epoch": 0.04695837780149413, "grad_norm": 0.4788101315498352, "learning_rate": 0.00028928310577345606, "loss": 0.7196, "step": 33 }, { "epoch": 0.048381358946993955, "grad_norm": 0.4659596383571625, "learning_rate": 0.0002883434632466077, "loss": 0.6524, "step": 34 }, { "epoch": 0.04980434009249377, "grad_norm": 0.4702892005443573, "learning_rate": 0.00028736599899825856, "loss": 0.7084, "step": 35 }, { "epoch": 0.051227321237993596, "grad_norm": 0.43953150510787964, "learning_rate": 0.00028635098025737434, "loss": 0.6571, "step": 36 }, { "epoch": 0.05265030238349342, "grad_norm": 0.48017436265945435, "learning_rate": 0.00028529868451994384, "loss": 0.6499, "step": 37 }, { "epoch": 0.05407328352899324, "grad_norm": 0.4220162034034729, "learning_rate": 0.0002842093994731145, "loss": 0.6256, "step": 38 }, { "epoch": 0.05549626467449306, "grad_norm": 0.45973941683769226, "learning_rate": 0.00028308342291654174, "loss": 0.6496, "step": 39 }, { "epoch": 0.056919245819992886, "grad_norm": 0.49031051993370056, "learning_rate": 0.00028192106268097334, "loss": 0.7017, "step": 40 }, { "epoch": 0.05834222696549271, "grad_norm": 0.50996333360672, "learning_rate": 0.00028072263654409154, "loss": 0.646, "step": 41 }, { "epoch": 0.05976520811099253, "grad_norm": 0.49370792508125305, "learning_rate": 0.0002794884721436361, "loss": 0.6681, "step": 42 }, { "epoch": 0.06118818925649235, "grad_norm": 0.6224830746650696, "learning_rate": 0.00027821890688783083, "loss": 0.7153, "step": 43 }, { "epoch": 0.06261117040199217, "grad_norm": 0.47643178701400757, "learning_rate": 0.0002769142878631403, "loss": 0.7022, "step": 44 }, { "epoch": 0.064034151547492, "grad_norm": 0.5734248757362366, "learning_rate": 0.00027557497173937923, "loss": 0.7246, "step": 45 }, { "epoch": 0.06545713269299182, "grad_norm": 0.6396843194961548, "learning_rate": 0.000274201324672203, "loss": 0.6551, "step": 46 }, { "epoch": 0.06688011383849164, "grad_norm": 0.7682932019233704, "learning_rate": 0.00027279372220300385, "loss": 0.7881, "step": 47 }, { "epoch": 0.06830309498399147, "grad_norm": 0.5934016704559326, "learning_rate": 0.0002713525491562421, "loss": 0.6506, "step": 48 }, { "epoch": 0.06972607612949129, "grad_norm": 1.0871713161468506, "learning_rate": 0.00026987819953423867, "loss": 0.8281, "step": 49 }, { "epoch": 0.0711490572749911, "grad_norm": 0.8507772088050842, "learning_rate": 0.00026837107640945905, "loss": 0.8056, "step": 50 }, { "epoch": 0.0711490572749911, "eval_loss": 0.6373523473739624, "eval_runtime": 1.5884, "eval_samples_per_second": 31.477, "eval_steps_per_second": 4.407, "step": 50 }, { "epoch": 0.07257203842049093, "grad_norm": 1.044948697090149, "learning_rate": 0.0002668315918143169, "loss": 0.8342, "step": 51 }, { "epoch": 0.07399501956599075, "grad_norm": 1.0066823959350586, "learning_rate": 0.00026526016662852886, "loss": 1.0176, "step": 52 }, { "epoch": 0.07541800071149057, "grad_norm": 0.8357412815093994, "learning_rate": 0.00026365723046405023, "loss": 0.5818, "step": 53 }, { "epoch": 0.0768409818569904, "grad_norm": 0.5974358320236206, "learning_rate": 0.0002620232215476231, "loss": 0.5231, "step": 54 }, { "epoch": 0.07826396300249022, "grad_norm": 0.4712371826171875, "learning_rate": 0.0002603585866009697, "loss": 0.5346, "step": 55 }, { "epoch": 0.07968694414799003, "grad_norm": 0.48890915513038635, "learning_rate": 0.00025866378071866334, "loss": 0.6215, "step": 56 }, { "epoch": 0.08110992529348986, "grad_norm": 0.5396881699562073, "learning_rate": 0.00025693926724370956, "loss": 0.57, "step": 57 }, { "epoch": 0.08253290643898968, "grad_norm": 0.5231046080589294, "learning_rate": 0.00025518551764087326, "loss": 0.525, "step": 58 }, { "epoch": 0.0839558875844895, "grad_norm": 0.5166378617286682, "learning_rate": 0.00025340301136778483, "loss": 0.535, "step": 59 }, { "epoch": 0.08537886872998933, "grad_norm": 0.4543461203575134, "learning_rate": 0.00025159223574386114, "loss": 0.5265, "step": 60 }, { "epoch": 0.08680184987548915, "grad_norm": 0.4526726305484772, "learning_rate": 0.0002497536858170772, "loss": 0.5463, "step": 61 }, { "epoch": 0.08822483102098898, "grad_norm": 0.4510972201824188, "learning_rate": 0.00024788786422862526, "loss": 0.5163, "step": 62 }, { "epoch": 0.08964781216648879, "grad_norm": 0.3307866156101227, "learning_rate": 0.00024599528107549745, "loss": 0.4868, "step": 63 }, { "epoch": 0.09107079331198861, "grad_norm": 0.47421735525131226, "learning_rate": 0.00024407645377103054, "loss": 0.4633, "step": 64 }, { "epoch": 0.09249377445748844, "grad_norm": 0.37198832631111145, "learning_rate": 0.00024213190690345018, "loss": 0.484, "step": 65 }, { "epoch": 0.09391675560298826, "grad_norm": 0.492297500371933, "learning_rate": 0.00024016217209245374, "loss": 0.5477, "step": 66 }, { "epoch": 0.09533973674848809, "grad_norm": 0.46712058782577515, "learning_rate": 0.00023816778784387094, "loss": 0.566, "step": 67 }, { "epoch": 0.09676271789398791, "grad_norm": 0.395457923412323, "learning_rate": 0.0002361492994024415, "loss": 0.4635, "step": 68 }, { "epoch": 0.09818569903948772, "grad_norm": 0.49724629521369934, "learning_rate": 0.0002341072586027509, "loss": 0.501, "step": 69 }, { "epoch": 0.09960868018498754, "grad_norm": 0.4556506872177124, "learning_rate": 0.00023204222371836405, "loss": 0.4758, "step": 70 }, { "epoch": 0.10103166133048737, "grad_norm": 0.488097220659256, "learning_rate": 0.00022995475930919905, "loss": 0.5382, "step": 71 }, { "epoch": 0.10245464247598719, "grad_norm": 0.5412923693656921, "learning_rate": 0.00022784543606718227, "loss": 0.6215, "step": 72 }, { "epoch": 0.10387762362148702, "grad_norm": 0.431452214717865, "learning_rate": 0.00022571483066022657, "loss": 0.5064, "step": 73 }, { "epoch": 0.10530060476698684, "grad_norm": 0.6141003966331482, "learning_rate": 0.0002235635255745762, "loss": 0.4909, "step": 74 }, { "epoch": 0.10672358591248667, "grad_norm": 0.46321696043014526, "learning_rate": 0.00022139210895556104, "loss": 0.4664, "step": 75 }, { "epoch": 0.10672358591248667, "eval_loss": 0.5455062389373779, "eval_runtime": 1.5894, "eval_samples_per_second": 31.459, "eval_steps_per_second": 4.404, "step": 75 }, { "epoch": 0.10814656705798648, "grad_norm": 0.5080518126487732, "learning_rate": 0.00021920117444680317, "loss": 0.525, "step": 76 }, { "epoch": 0.1095695482034863, "grad_norm": 0.3996676802635193, "learning_rate": 0.00021699132102792097, "loss": 0.4702, "step": 77 }, { "epoch": 0.11099252934898612, "grad_norm": 0.4271460175514221, "learning_rate": 0.0002147631528507739, "loss": 0.5, "step": 78 }, { "epoch": 0.11241551049448595, "grad_norm": 0.46124765276908875, "learning_rate": 0.00021251727907429355, "loss": 0.4972, "step": 79 }, { "epoch": 0.11383849163998577, "grad_norm": 0.4041891098022461, "learning_rate": 0.0002102543136979454, "loss": 0.5017, "step": 80 }, { "epoch": 0.1152614727854856, "grad_norm": 0.43278566002845764, "learning_rate": 0.0002079748753938678, "loss": 0.4823, "step": 81 }, { "epoch": 0.11668445393098542, "grad_norm": 0.49185311794281006, "learning_rate": 0.0002056795873377331, "loss": 0.4747, "step": 82 }, { "epoch": 0.11810743507648523, "grad_norm": 0.4809925854206085, "learning_rate": 0.00020336907703837748, "loss": 0.5292, "step": 83 }, { "epoch": 0.11953041622198506, "grad_norm": 0.4222284257411957, "learning_rate": 0.00020104397616624645, "loss": 0.5157, "step": 84 }, { "epoch": 0.12095339736748488, "grad_norm": 0.46804237365722656, "learning_rate": 0.00019870492038070252, "loss": 0.5094, "step": 85 }, { "epoch": 0.1223763785129847, "grad_norm": 0.4556009769439697, "learning_rate": 0.0001963525491562421, "loss": 0.5129, "step": 86 }, { "epoch": 0.12379935965848453, "grad_norm": 0.458716481924057, "learning_rate": 0.0001939875056076697, "loss": 0.4831, "step": 87 }, { "epoch": 0.12522234080398434, "grad_norm": 0.4597315788269043, "learning_rate": 0.00019161043631427666, "loss": 0.4613, "step": 88 }, { "epoch": 0.12664532194948416, "grad_norm": 0.5253174901008606, "learning_rate": 0.00018922199114307294, "loss": 0.5142, "step": 89 }, { "epoch": 0.128068303094984, "grad_norm": 0.5549522638320923, "learning_rate": 0.00018682282307111987, "loss": 0.5332, "step": 90 }, { "epoch": 0.1294912842404838, "grad_norm": 0.4502198100090027, "learning_rate": 0.00018441358800701273, "loss": 0.4298, "step": 91 }, { "epoch": 0.13091426538598364, "grad_norm": 0.38060614466667175, "learning_rate": 0.00018199494461156203, "loss": 0.4558, "step": 92 }, { "epoch": 0.13233724653148346, "grad_norm": 0.52518630027771, "learning_rate": 0.000179567554117722, "loss": 0.4687, "step": 93 }, { "epoch": 0.13376022767698328, "grad_norm": 0.5193655490875244, "learning_rate": 0.00017713208014981648, "loss": 0.4809, "step": 94 }, { "epoch": 0.1351832088224831, "grad_norm": 0.5254992842674255, "learning_rate": 0.00017468918854211007, "loss": 0.4535, "step": 95 }, { "epoch": 0.13660618996798293, "grad_norm": 0.46708381175994873, "learning_rate": 0.00017223954715677627, "loss": 0.5143, "step": 96 }, { "epoch": 0.13802917111348276, "grad_norm": 0.5062940716743469, "learning_rate": 0.00016978382570131034, "loss": 0.5421, "step": 97 }, { "epoch": 0.13945215225898258, "grad_norm": 0.5772467255592346, "learning_rate": 0.00016732269554543794, "loss": 0.5691, "step": 98 }, { "epoch": 0.14087513340448238, "grad_norm": 0.7562189102172852, "learning_rate": 0.00016485682953756942, "loss": 0.5696, "step": 99 }, { "epoch": 0.1422981145499822, "grad_norm": 0.7072889804840088, "learning_rate": 0.00016238690182084986, "loss": 0.6114, "step": 100 }, { "epoch": 0.1422981145499822, "eval_loss": 0.48794320225715637, "eval_runtime": 1.5895, "eval_samples_per_second": 31.457, "eval_steps_per_second": 4.404, "step": 100 }, { "epoch": 0.14372109569548203, "grad_norm": 0.5918603539466858, "learning_rate": 0.0001599135876488549, "loss": 0.6081, "step": 101 }, { "epoch": 0.14514407684098185, "grad_norm": 0.814773440361023, "learning_rate": 0.00015743756320098332, "loss": 0.7945, "step": 102 }, { "epoch": 0.14656705798648167, "grad_norm": 0.5480428338050842, "learning_rate": 0.0001549595053975962, "loss": 0.4602, "step": 103 }, { "epoch": 0.1479900391319815, "grad_norm": 0.4311855435371399, "learning_rate": 0.00015248009171495378, "loss": 0.3792, "step": 104 }, { "epoch": 0.14941302027748132, "grad_norm": 0.5230007171630859, "learning_rate": 0.00015, "loss": 0.4123, "step": 105 }, { "epoch": 0.15083600142298115, "grad_norm": 0.4562471807003021, "learning_rate": 0.00014751990828504622, "loss": 0.4171, "step": 106 }, { "epoch": 0.15225898256848097, "grad_norm": 0.464626282453537, "learning_rate": 0.00014504049460240375, "loss": 0.3999, "step": 107 }, { "epoch": 0.1536819637139808, "grad_norm": 0.4413895606994629, "learning_rate": 0.00014256243679901663, "loss": 0.4036, "step": 108 }, { "epoch": 0.15510494485948062, "grad_norm": 0.4271002411842346, "learning_rate": 0.00014008641235114508, "loss": 0.4165, "step": 109 }, { "epoch": 0.15652792600498044, "grad_norm": 0.4437054395675659, "learning_rate": 0.00013761309817915014, "loss": 0.385, "step": 110 }, { "epoch": 0.15795090715048027, "grad_norm": 0.6215757727622986, "learning_rate": 0.00013514317046243058, "loss": 0.4807, "step": 111 }, { "epoch": 0.15937388829598007, "grad_norm": 0.6023930311203003, "learning_rate": 0.00013267730445456208, "loss": 0.4458, "step": 112 }, { "epoch": 0.1607968694414799, "grad_norm": 0.5075235366821289, "learning_rate": 0.00013021617429868963, "loss": 0.4103, "step": 113 }, { "epoch": 0.1622198505869797, "grad_norm": 0.4037129580974579, "learning_rate": 0.00012776045284322368, "loss": 0.4037, "step": 114 }, { "epoch": 0.16364283173247954, "grad_norm": 0.4262774884700775, "learning_rate": 0.00012531081145788987, "loss": 0.3738, "step": 115 }, { "epoch": 0.16506581287797936, "grad_norm": 0.38689273595809937, "learning_rate": 0.00012286791985018355, "loss": 0.4016, "step": 116 }, { "epoch": 0.1664887940234792, "grad_norm": 0.45144420862197876, "learning_rate": 0.00012043244588227796, "loss": 0.4268, "step": 117 }, { "epoch": 0.167911775168979, "grad_norm": 0.35227862000465393, "learning_rate": 0.00011800505538843798, "loss": 0.3679, "step": 118 }, { "epoch": 0.16933475631447883, "grad_norm": 0.4991707503795624, "learning_rate": 0.00011558641199298727, "loss": 0.4614, "step": 119 }, { "epoch": 0.17075773745997866, "grad_norm": 0.37295737862586975, "learning_rate": 0.00011317717692888012, "loss": 0.4173, "step": 120 }, { "epoch": 0.17218071860547848, "grad_norm": 0.3991025686264038, "learning_rate": 0.00011077800885692702, "loss": 0.4005, "step": 121 }, { "epoch": 0.1736036997509783, "grad_norm": 0.444135844707489, "learning_rate": 0.00010838956368572334, "loss": 0.3766, "step": 122 }, { "epoch": 0.17502668089647813, "grad_norm": 0.45515337586402893, "learning_rate": 0.0001060124943923303, "loss": 0.3954, "step": 123 }, { "epoch": 0.17644966204197796, "grad_norm": 0.5047637820243835, "learning_rate": 0.0001036474508437579, "loss": 0.427, "step": 124 }, { "epoch": 0.17787264318747775, "grad_norm": 0.4614162743091583, "learning_rate": 0.00010129507961929748, "loss": 0.4422, "step": 125 }, { "epoch": 0.17787264318747775, "eval_loss": 0.4491093158721924, "eval_runtime": 1.5932, "eval_samples_per_second": 31.383, "eval_steps_per_second": 4.394, "step": 125 }, { "epoch": 0.17929562433297758, "grad_norm": 0.39532631635665894, "learning_rate": 9.895602383375353e-05, "loss": 0.3954, "step": 126 }, { "epoch": 0.1807186054784774, "grad_norm": 0.4130837321281433, "learning_rate": 9.663092296162251e-05, "loss": 0.3957, "step": 127 }, { "epoch": 0.18214158662397723, "grad_norm": 0.4563869833946228, "learning_rate": 9.432041266226686e-05, "loss": 0.4121, "step": 128 }, { "epoch": 0.18356456776947705, "grad_norm": 0.4387674629688263, "learning_rate": 9.202512460613219e-05, "loss": 0.3886, "step": 129 }, { "epoch": 0.18498754891497687, "grad_norm": 0.49771103262901306, "learning_rate": 8.97456863020546e-05, "loss": 0.3902, "step": 130 }, { "epoch": 0.1864105300604767, "grad_norm": 0.5475918650627136, "learning_rate": 8.748272092570646e-05, "loss": 0.4373, "step": 131 }, { "epoch": 0.18783351120597652, "grad_norm": 0.49682438373565674, "learning_rate": 8.523684714922608e-05, "loss": 0.4013, "step": 132 }, { "epoch": 0.18925649235147635, "grad_norm": 0.4471288025379181, "learning_rate": 8.300867897207903e-05, "loss": 0.4139, "step": 133 }, { "epoch": 0.19067947349697617, "grad_norm": 0.41703081130981445, "learning_rate": 8.079882555319684e-05, "loss": 0.4089, "step": 134 }, { "epoch": 0.192102454642476, "grad_norm": 0.5421717166900635, "learning_rate": 7.860789104443896e-05, "loss": 0.4213, "step": 135 }, { "epoch": 0.19352543578797582, "grad_norm": 0.3587832450866699, "learning_rate": 7.643647442542382e-05, "loss": 0.3714, "step": 136 }, { "epoch": 0.19494841693347564, "grad_norm": 0.42268967628479004, "learning_rate": 7.428516933977347e-05, "loss": 0.391, "step": 137 }, { "epoch": 0.19637139807897544, "grad_norm": 0.45539185404777527, "learning_rate": 7.215456393281776e-05, "loss": 0.3907, "step": 138 }, { "epoch": 0.19779437922447526, "grad_norm": 0.5193498134613037, "learning_rate": 7.004524069080096e-05, "loss": 0.4293, "step": 139 }, { "epoch": 0.1992173603699751, "grad_norm": 0.49450474977493286, "learning_rate": 6.795777628163599e-05, "loss": 0.3975, "step": 140 }, { "epoch": 0.2006403415154749, "grad_norm": 0.5141414403915405, "learning_rate": 6.58927413972491e-05, "loss": 0.3932, "step": 141 }, { "epoch": 0.20206332266097474, "grad_norm": 0.5565205216407776, "learning_rate": 6.385070059755846e-05, "loss": 0.4097, "step": 142 }, { "epoch": 0.20348630380647456, "grad_norm": 0.5086135864257812, "learning_rate": 6.183221215612904e-05, "loss": 0.4553, "step": 143 }, { "epoch": 0.20490928495197439, "grad_norm": 0.43909886479377747, "learning_rate": 5.983782790754623e-05, "loss": 0.4142, "step": 144 }, { "epoch": 0.2063322660974742, "grad_norm": 0.4421076774597168, "learning_rate": 5.786809309654982e-05, "loss": 0.3916, "step": 145 }, { "epoch": 0.20775524724297403, "grad_norm": 0.5867587327957153, "learning_rate": 5.592354622896944e-05, "loss": 0.4844, "step": 146 }, { "epoch": 0.20917822838847386, "grad_norm": 0.4880177080631256, "learning_rate": 5.40047189245025e-05, "loss": 0.4324, "step": 147 }, { "epoch": 0.21060120953397368, "grad_norm": 0.5578730702400208, "learning_rate": 5.211213577137469e-05, "loss": 0.4769, "step": 148 }, { "epoch": 0.2120241906794735, "grad_norm": 0.5823155045509338, "learning_rate": 5.024631418292274e-05, "loss": 0.5011, "step": 149 }, { "epoch": 0.21344717182497333, "grad_norm": 1.0556669235229492, "learning_rate": 4.840776425613886e-05, "loss": 0.6444, "step": 150 }, { "epoch": 0.21344717182497333, "eval_loss": 0.4293454885482788, "eval_runtime": 1.5889, "eval_samples_per_second": 31.468, "eval_steps_per_second": 4.406, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.292050004063027e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }