|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"eval_steps": 500, |
|
"global_step": 540640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.11938641220331192, |
|
"learning_rate": 3.122109906777153e-05, |
|
"loss": 0.7845, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.11220108717679977, |
|
"learning_rate": 3.119219813554306e-05, |
|
"loss": 0.7681, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.20039337873458862, |
|
"learning_rate": 3.116329720331459e-05, |
|
"loss": 0.7627, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.20968303084373474, |
|
"learning_rate": 3.113439627108612e-05, |
|
"loss": 0.751, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.46042799949645996, |
|
"learning_rate": 3.1105495338857653e-05, |
|
"loss": 0.7395, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.5682156682014465, |
|
"learning_rate": 3.107659440662918e-05, |
|
"loss": 0.7169, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.446135938167572, |
|
"learning_rate": 3.104769347440071e-05, |
|
"loss": 0.7037, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.5436543822288513, |
|
"learning_rate": 3.1018792542172244e-05, |
|
"loss": 0.6854, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.5623897314071655, |
|
"learning_rate": 3.098989160994377e-05, |
|
"loss": 0.6661, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.7239806652069092, |
|
"learning_rate": 3.09609906777153e-05, |
|
"loss": 0.6477, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.6363663077354431, |
|
"learning_rate": 3.0932089745486834e-05, |
|
"loss": 0.6302, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.8511515855789185, |
|
"learning_rate": 3.090318881325836e-05, |
|
"loss": 0.6156, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.7209456562995911, |
|
"learning_rate": 3.087428788102989e-05, |
|
"loss": 0.6002, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.7105280756950378, |
|
"learning_rate": 3.0845386948801424e-05, |
|
"loss": 0.5852, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.7035876512527466, |
|
"learning_rate": 3.081648601657295e-05, |
|
"loss": 0.5734, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.6755463480949402, |
|
"learning_rate": 3.078758508434448e-05, |
|
"loss": 0.5619, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.6636064648628235, |
|
"learning_rate": 3.0758684152116015e-05, |
|
"loss": 0.5527, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.7909913063049316, |
|
"learning_rate": 3.072978321988754e-05, |
|
"loss": 0.545, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.7935764789581299, |
|
"learning_rate": 3.070088228765907e-05, |
|
"loss": 0.5342, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.7649631500244141, |
|
"learning_rate": 3.06719813554306e-05, |
|
"loss": 0.5264, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.7262706160545349, |
|
"learning_rate": 3.064308042320213e-05, |
|
"loss": 0.5194, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.7068478465080261, |
|
"learning_rate": 3.061417949097366e-05, |
|
"loss": 0.5137, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.6415815353393555, |
|
"learning_rate": 3.058527855874519e-05, |
|
"loss": 0.51, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.7167455554008484, |
|
"learning_rate": 3.055637762651672e-05, |
|
"loss": 0.5024, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.6563605666160583, |
|
"learning_rate": 3.052747669428825e-05, |
|
"loss": 0.4985, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.7427666783332825, |
|
"learning_rate": 3.049857576205978e-05, |
|
"loss": 0.4939, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6371767520904541, |
|
"learning_rate": 3.046967482983131e-05, |
|
"loss": 0.4923, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.7062104940414429, |
|
"learning_rate": 3.044077389760284e-05, |
|
"loss": 0.4894, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.7556993365287781, |
|
"learning_rate": 3.041187296537437e-05, |
|
"loss": 0.4846, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.6561410427093506, |
|
"learning_rate": 3.03829720331459e-05, |
|
"loss": 0.4831, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.6414974331855774, |
|
"learning_rate": 3.0354071100917432e-05, |
|
"loss": 0.4807, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.6632120609283447, |
|
"learning_rate": 3.032517016868896e-05, |
|
"loss": 0.472, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.6413108706474304, |
|
"learning_rate": 3.029626923646049e-05, |
|
"loss": 0.4723, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.6478744149208069, |
|
"learning_rate": 3.0267368304232022e-05, |
|
"loss": 0.4692, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.5901973247528076, |
|
"learning_rate": 3.023846737200355e-05, |
|
"loss": 0.4672, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 0.5960791707038879, |
|
"learning_rate": 3.020956643977508e-05, |
|
"loss": 0.4649, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.6265193819999695, |
|
"learning_rate": 3.0180665507546612e-05, |
|
"loss": 0.4616, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.6381145119667053, |
|
"learning_rate": 3.0151764575318144e-05, |
|
"loss": 0.4592, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.6370628476142883, |
|
"learning_rate": 3.012286364308967e-05, |
|
"loss": 0.4594, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.5658203363418579, |
|
"learning_rate": 3.0093962710861203e-05, |
|
"loss": 0.4542, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.5123589038848877, |
|
"learning_rate": 3.0065061778632734e-05, |
|
"loss": 0.4555, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.5034360289573669, |
|
"learning_rate": 3.0036160846404262e-05, |
|
"loss": 0.4501, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.5025657415390015, |
|
"learning_rate": 3.0007259914175793e-05, |
|
"loss": 0.4501, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"grad_norm": 0.5448479056358337, |
|
"learning_rate": 2.9978358981947324e-05, |
|
"loss": 0.4481, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.5894014239311218, |
|
"learning_rate": 2.9949458049718852e-05, |
|
"loss": 0.4438, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.653883159160614, |
|
"learning_rate": 2.9920557117490383e-05, |
|
"loss": 0.444, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 0.4382980167865753, |
|
"learning_rate": 2.9891656185261915e-05, |
|
"loss": 0.4437, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.4639624357223511, |
|
"learning_rate": 2.9862755253033443e-05, |
|
"loss": 0.4398, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.527728796005249, |
|
"learning_rate": 2.9833854320804974e-05, |
|
"loss": 0.4386, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.543736457824707, |
|
"learning_rate": 2.9804953388576505e-05, |
|
"loss": 0.4392, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 0.5280329585075378, |
|
"learning_rate": 2.9776052456348033e-05, |
|
"loss": 0.4383, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 0.4563904106616974, |
|
"learning_rate": 2.9747151524119564e-05, |
|
"loss": 0.4371, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.5162687301635742, |
|
"learning_rate": 2.9718250591891095e-05, |
|
"loss": 0.4367, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.4838933050632477, |
|
"learning_rate": 2.9689349659662623e-05, |
|
"loss": 0.4352, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.5301242470741272, |
|
"learning_rate": 2.9660448727434154e-05, |
|
"loss": 0.4319, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 0.5619557499885559, |
|
"learning_rate": 2.9631547795205686e-05, |
|
"loss": 0.4303, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 0.4900205433368683, |
|
"learning_rate": 2.9602646862977214e-05, |
|
"loss": 0.4312, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.46870502829551697, |
|
"learning_rate": 2.9573745930748745e-05, |
|
"loss": 0.4302, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.47382786870002747, |
|
"learning_rate": 2.9544844998520273e-05, |
|
"loss": 0.4287, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 0.5594569444656372, |
|
"learning_rate": 2.95159440662918e-05, |
|
"loss": 0.4284, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"grad_norm": 0.511375367641449, |
|
"learning_rate": 2.9487043134063332e-05, |
|
"loss": 0.4262, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 0.5069934725761414, |
|
"learning_rate": 2.9458142201834863e-05, |
|
"loss": 0.4247, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.5310338139533997, |
|
"learning_rate": 2.942924126960639e-05, |
|
"loss": 0.4249, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"grad_norm": 0.4728649854660034, |
|
"learning_rate": 2.9400340337377922e-05, |
|
"loss": 0.4225, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"grad_norm": 0.45557233691215515, |
|
"learning_rate": 2.9371439405149453e-05, |
|
"loss": 0.4241, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.4630686938762665, |
|
"learning_rate": 2.934253847292098e-05, |
|
"loss": 0.4212, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 0.509099543094635, |
|
"learning_rate": 2.9313637540692512e-05, |
|
"loss": 0.4215, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.4747762084007263, |
|
"learning_rate": 2.9284736608464044e-05, |
|
"loss": 0.4203, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"grad_norm": 0.43625542521476746, |
|
"learning_rate": 2.925583567623557e-05, |
|
"loss": 0.4211, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 0.44176748394966125, |
|
"learning_rate": 2.9226934744007103e-05, |
|
"loss": 0.4209, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"grad_norm": 0.5236085653305054, |
|
"learning_rate": 2.9198033811778634e-05, |
|
"loss": 0.422, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 0.4237843453884125, |
|
"learning_rate": 2.9169132879550162e-05, |
|
"loss": 0.4163, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"grad_norm": 0.44581139087677, |
|
"learning_rate": 2.9140231947321693e-05, |
|
"loss": 0.4152, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"grad_norm": 0.4488186836242676, |
|
"learning_rate": 2.9111331015093224e-05, |
|
"loss": 0.4175, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"grad_norm": 0.5051326751708984, |
|
"learning_rate": 2.9082430082864752e-05, |
|
"loss": 0.4149, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"grad_norm": 0.4836309850215912, |
|
"learning_rate": 2.9053529150636283e-05, |
|
"loss": 0.4138, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"grad_norm": 0.46710771322250366, |
|
"learning_rate": 2.9024628218407815e-05, |
|
"loss": 0.4125, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"grad_norm": 0.39740118384361267, |
|
"learning_rate": 2.8995727286179342e-05, |
|
"loss": 0.4169, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.4491262435913086, |
|
"learning_rate": 2.8966826353950874e-05, |
|
"loss": 0.4136, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 0.4240283966064453, |
|
"learning_rate": 2.8937925421722405e-05, |
|
"loss": 0.4143, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 0.43018123507499695, |
|
"learning_rate": 2.8909024489493933e-05, |
|
"loss": 0.41, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.49115487933158875, |
|
"learning_rate": 2.8880123557265464e-05, |
|
"loss": 0.4086, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"grad_norm": 0.4617484211921692, |
|
"learning_rate": 2.8851222625036995e-05, |
|
"loss": 0.4111, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 0.4269873797893524, |
|
"learning_rate": 2.8822321692808523e-05, |
|
"loss": 0.4068, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"grad_norm": 0.45183584094047546, |
|
"learning_rate": 2.8793420760580054e-05, |
|
"loss": 0.4104, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"grad_norm": 0.3999849557876587, |
|
"learning_rate": 2.8764519828351586e-05, |
|
"loss": 0.4074, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 0.3897479772567749, |
|
"learning_rate": 2.8735618896123113e-05, |
|
"loss": 0.4113, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"grad_norm": 0.36687174439430237, |
|
"learning_rate": 2.8706717963894645e-05, |
|
"loss": 0.409, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 0.41888511180877686, |
|
"learning_rate": 2.8677817031666176e-05, |
|
"loss": 0.4072, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"grad_norm": 0.4102098047733307, |
|
"learning_rate": 2.8648916099437704e-05, |
|
"loss": 0.4081, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"grad_norm": 0.42067912220954895, |
|
"learning_rate": 2.8620015167209235e-05, |
|
"loss": 0.4093, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"grad_norm": 0.45427748560905457, |
|
"learning_rate": 2.8591114234980766e-05, |
|
"loss": 0.4076, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 0.394954115152359, |
|
"learning_rate": 2.8562213302752294e-05, |
|
"loss": 0.4067, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"grad_norm": 0.42659953236579895, |
|
"learning_rate": 2.8533312370523825e-05, |
|
"loss": 0.4062, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.38056984543800354, |
|
"learning_rate": 2.8504411438295357e-05, |
|
"loss": 0.4061, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.368455708026886, |
|
"learning_rate": 2.8475510506066884e-05, |
|
"loss": 0.4032, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"grad_norm": 0.44540271162986755, |
|
"learning_rate": 2.8446609573838416e-05, |
|
"loss": 0.4054, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 0.3926877975463867, |
|
"learning_rate": 2.8417708641609943e-05, |
|
"loss": 0.4024, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"grad_norm": 0.4288729727268219, |
|
"learning_rate": 2.838880770938147e-05, |
|
"loss": 0.4013, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 0.4729566276073456, |
|
"learning_rate": 2.8359906777153003e-05, |
|
"loss": 0.4019, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"grad_norm": 0.46875321865081787, |
|
"learning_rate": 2.8331005844924534e-05, |
|
"loss": 0.4, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"grad_norm": 0.63325035572052, |
|
"learning_rate": 2.830210491269606e-05, |
|
"loss": 0.4008, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"grad_norm": 0.4186055064201355, |
|
"learning_rate": 2.8273203980467593e-05, |
|
"loss": 0.4026, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"grad_norm": 0.3860541880130768, |
|
"learning_rate": 2.8244303048239124e-05, |
|
"loss": 0.4022, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"grad_norm": 0.4552393853664398, |
|
"learning_rate": 2.8215402116010652e-05, |
|
"loss": 0.3979, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.4990374743938446, |
|
"learning_rate": 2.8186501183782183e-05, |
|
"loss": 0.4001, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 0.46718060970306396, |
|
"learning_rate": 2.8157600251553714e-05, |
|
"loss": 0.4, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"grad_norm": 0.45432960987091064, |
|
"learning_rate": 2.8128699319325242e-05, |
|
"loss": 0.398, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.40666621923446655, |
|
"learning_rate": 2.8099798387096774e-05, |
|
"loss": 0.3996, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"grad_norm": 0.402972936630249, |
|
"learning_rate": 2.8070897454868305e-05, |
|
"loss": 0.3985, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"grad_norm": 0.3767193853855133, |
|
"learning_rate": 2.8041996522639836e-05, |
|
"loss": 0.4, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"grad_norm": 0.40102022886276245, |
|
"learning_rate": 2.8013095590411364e-05, |
|
"loss": 0.3987, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"grad_norm": 0.4435707926750183, |
|
"learning_rate": 2.7984194658182895e-05, |
|
"loss": 0.3976, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"grad_norm": 0.39804941415786743, |
|
"learning_rate": 2.7955293725954426e-05, |
|
"loss": 0.395, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"grad_norm": 0.41703784465789795, |
|
"learning_rate": 2.7926392793725954e-05, |
|
"loss": 0.395, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 0.4349576234817505, |
|
"learning_rate": 2.7897491861497485e-05, |
|
"loss": 0.3946, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"grad_norm": 0.37204691767692566, |
|
"learning_rate": 2.7868590929269017e-05, |
|
"loss": 0.394, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 0.42759761214256287, |
|
"learning_rate": 2.7839689997040545e-05, |
|
"loss": 0.3949, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.37754470109939575, |
|
"learning_rate": 2.7810789064812076e-05, |
|
"loss": 0.3939, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"grad_norm": 0.3639107346534729, |
|
"learning_rate": 2.7781888132583607e-05, |
|
"loss": 0.3932, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 0.37291327118873596, |
|
"learning_rate": 2.7752987200355135e-05, |
|
"loss": 0.394, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.3964773416519165, |
|
"learning_rate": 2.7724086268126666e-05, |
|
"loss": 0.3959, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.4025065004825592, |
|
"learning_rate": 2.7695185335898197e-05, |
|
"loss": 0.3922, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"grad_norm": 0.5499910116195679, |
|
"learning_rate": 2.7666284403669725e-05, |
|
"loss": 0.3893, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.43492835760116577, |
|
"learning_rate": 2.7637383471441256e-05, |
|
"loss": 0.3942, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"grad_norm": 0.38981184363365173, |
|
"learning_rate": 2.7608482539212788e-05, |
|
"loss": 0.3941, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"grad_norm": 0.4508809745311737, |
|
"learning_rate": 2.7579581606984316e-05, |
|
"loss": 0.3922, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 0.37447696924209595, |
|
"learning_rate": 2.7550680674755847e-05, |
|
"loss": 0.3905, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"grad_norm": 0.40094566345214844, |
|
"learning_rate": 2.7521779742527378e-05, |
|
"loss": 0.3938, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"grad_norm": 0.46564099192619324, |
|
"learning_rate": 2.7492878810298906e-05, |
|
"loss": 0.3914, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"grad_norm": 0.37548139691352844, |
|
"learning_rate": 2.7463977878070437e-05, |
|
"loss": 0.3923, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"grad_norm": 0.39845481514930725, |
|
"learning_rate": 2.743507694584197e-05, |
|
"loss": 0.3904, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.46478548645973206, |
|
"learning_rate": 2.7406176013613496e-05, |
|
"loss": 0.3897, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"grad_norm": 0.5512229204177856, |
|
"learning_rate": 2.7377275081385027e-05, |
|
"loss": 0.3898, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"grad_norm": 0.34783828258514404, |
|
"learning_rate": 2.734837414915656e-05, |
|
"loss": 0.3901, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"grad_norm": 0.4403396546840668, |
|
"learning_rate": 2.7319473216928083e-05, |
|
"loss": 0.388, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"grad_norm": 0.37262195348739624, |
|
"learning_rate": 2.7290572284699614e-05, |
|
"loss": 0.3869, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"grad_norm": 0.38222742080688477, |
|
"learning_rate": 2.7261671352471146e-05, |
|
"loss": 0.3901, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"grad_norm": 0.614713191986084, |
|
"learning_rate": 2.7232770420242673e-05, |
|
"loss": 0.3886, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"grad_norm": 0.4252707362174988, |
|
"learning_rate": 2.7203869488014205e-05, |
|
"loss": 0.3874, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"grad_norm": 0.3792737126350403, |
|
"learning_rate": 2.7174968555785736e-05, |
|
"loss": 0.3855, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"grad_norm": 0.40962672233581543, |
|
"learning_rate": 2.7146067623557264e-05, |
|
"loss": 0.3875, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"grad_norm": 0.38987433910369873, |
|
"learning_rate": 2.7117166691328795e-05, |
|
"loss": 0.3855, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"grad_norm": 0.407105028629303, |
|
"learning_rate": 2.7088265759100326e-05, |
|
"loss": 0.3857, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"grad_norm": 0.3527330160140991, |
|
"learning_rate": 2.7059364826871854e-05, |
|
"loss": 0.3869, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"grad_norm": 0.3859241306781769, |
|
"learning_rate": 2.7030463894643385e-05, |
|
"loss": 0.3872, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"grad_norm": 0.3989656865596771, |
|
"learning_rate": 2.7001562962414917e-05, |
|
"loss": 0.3855, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"grad_norm": 0.4163249731063843, |
|
"learning_rate": 2.6972662030186444e-05, |
|
"loss": 0.3855, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"grad_norm": 0.39577716588974, |
|
"learning_rate": 2.6943761097957976e-05, |
|
"loss": 0.3848, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"grad_norm": 0.3792816400527954, |
|
"learning_rate": 2.6914860165729507e-05, |
|
"loss": 0.3864, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"grad_norm": 0.3979376554489136, |
|
"learning_rate": 2.6885959233501035e-05, |
|
"loss": 0.3847, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"grad_norm": 0.44446560740470886, |
|
"learning_rate": 2.6857058301272566e-05, |
|
"loss": 0.3851, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"grad_norm": 0.3826451599597931, |
|
"learning_rate": 2.6828157369044097e-05, |
|
"loss": 0.385, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"grad_norm": 0.38595423102378845, |
|
"learning_rate": 2.6799256436815625e-05, |
|
"loss": 0.3848, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"grad_norm": 0.4047674238681793, |
|
"learning_rate": 2.6770355504587156e-05, |
|
"loss": 0.384, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"grad_norm": 0.3704206347465515, |
|
"learning_rate": 2.6741454572358688e-05, |
|
"loss": 0.3846, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"grad_norm": 0.42468497157096863, |
|
"learning_rate": 2.6712553640130215e-05, |
|
"loss": 0.3847, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"grad_norm": 0.43300580978393555, |
|
"learning_rate": 2.6683652707901747e-05, |
|
"loss": 0.3783, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"grad_norm": 0.3680213689804077, |
|
"learning_rate": 2.6654751775673278e-05, |
|
"loss": 0.3811, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"grad_norm": 0.3971569240093231, |
|
"learning_rate": 2.6625850843444806e-05, |
|
"loss": 0.3816, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"grad_norm": 0.3454134464263916, |
|
"learning_rate": 2.6596949911216337e-05, |
|
"loss": 0.38, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"grad_norm": 0.38850536942481995, |
|
"learning_rate": 2.6568048978987868e-05, |
|
"loss": 0.3806, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"grad_norm": 0.41783374547958374, |
|
"learning_rate": 2.6539148046759396e-05, |
|
"loss": 0.3805, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"grad_norm": 0.37427714467048645, |
|
"learning_rate": 2.6510247114530927e-05, |
|
"loss": 0.3844, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"grad_norm": 0.3917700946331024, |
|
"learning_rate": 2.648134618230246e-05, |
|
"loss": 0.3818, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"grad_norm": 0.36409491300582886, |
|
"learning_rate": 2.6452445250073986e-05, |
|
"loss": 0.3815, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"grad_norm": 0.4320700764656067, |
|
"learning_rate": 2.6423544317845518e-05, |
|
"loss": 0.3813, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"grad_norm": 0.3927746117115021, |
|
"learning_rate": 2.639464338561705e-05, |
|
"loss": 0.3827, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.3693206310272217, |
|
"learning_rate": 2.6365742453388577e-05, |
|
"loss": 0.3803, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"grad_norm": 0.4206922948360443, |
|
"learning_rate": 2.6336841521160108e-05, |
|
"loss": 0.3794, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"grad_norm": 0.35786914825439453, |
|
"learning_rate": 2.630794058893164e-05, |
|
"loss": 0.3823, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"grad_norm": 0.4055446982383728, |
|
"learning_rate": 2.6279039656703167e-05, |
|
"loss": 0.3797, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"grad_norm": 0.473630428314209, |
|
"learning_rate": 2.62501387244747e-05, |
|
"loss": 0.3776, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"grad_norm": 0.36061763763427734, |
|
"learning_rate": 2.622123779224623e-05, |
|
"loss": 0.3781, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"grad_norm": 0.4378969371318817, |
|
"learning_rate": 2.6192336860017754e-05, |
|
"loss": 0.3778, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"grad_norm": 0.3772602379322052, |
|
"learning_rate": 2.6163435927789285e-05, |
|
"loss": 0.3776, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"grad_norm": 0.42682790756225586, |
|
"learning_rate": 2.6134534995560817e-05, |
|
"loss": 0.3802, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"grad_norm": 0.38328275084495544, |
|
"learning_rate": 2.6105634063332344e-05, |
|
"loss": 0.3813, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"grad_norm": 0.4136464595794678, |
|
"learning_rate": 2.6076733131103876e-05, |
|
"loss": 0.3773, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"grad_norm": 0.39002037048339844, |
|
"learning_rate": 2.6047832198875407e-05, |
|
"loss": 0.3767, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"grad_norm": 0.4823383092880249, |
|
"learning_rate": 2.6018931266646935e-05, |
|
"loss": 0.3789, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"grad_norm": 0.3532434403896332, |
|
"learning_rate": 2.5990030334418466e-05, |
|
"loss": 0.3755, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"grad_norm": 0.3406650424003601, |
|
"learning_rate": 2.5961129402189997e-05, |
|
"loss": 0.3782, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"grad_norm": 0.42174699902534485, |
|
"learning_rate": 2.5932228469961525e-05, |
|
"loss": 0.3792, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"grad_norm": 0.4112718999385834, |
|
"learning_rate": 2.5903327537733056e-05, |
|
"loss": 0.3758, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"grad_norm": 0.39170435070991516, |
|
"learning_rate": 2.5874426605504588e-05, |
|
"loss": 0.3772, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"grad_norm": 0.35669615864753723, |
|
"learning_rate": 2.584552567327612e-05, |
|
"loss": 0.376, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"grad_norm": 0.36161208152770996, |
|
"learning_rate": 2.5816624741047647e-05, |
|
"loss": 0.3759, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"grad_norm": 0.3548930883407593, |
|
"learning_rate": 2.5787723808819178e-05, |
|
"loss": 0.3772, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"grad_norm": 0.3934749662876129, |
|
"learning_rate": 2.575882287659071e-05, |
|
"loss": 0.3774, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"grad_norm": 0.3642575442790985, |
|
"learning_rate": 2.5729921944362237e-05, |
|
"loss": 0.3756, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"grad_norm": 0.34236952662467957, |
|
"learning_rate": 2.5701021012133768e-05, |
|
"loss": 0.3758, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"grad_norm": 0.40388983488082886, |
|
"learning_rate": 2.56721200799053e-05, |
|
"loss": 0.3749, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"grad_norm": 0.432076632976532, |
|
"learning_rate": 2.5643219147676827e-05, |
|
"loss": 0.3756, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"grad_norm": 0.3947947025299072, |
|
"learning_rate": 2.561431821544836e-05, |
|
"loss": 0.3792, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"grad_norm": 0.3974926173686981, |
|
"learning_rate": 2.558541728321989e-05, |
|
"loss": 0.3741, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"grad_norm": 0.3732788860797882, |
|
"learning_rate": 2.5556516350991418e-05, |
|
"loss": 0.3741, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"grad_norm": 0.35293856263160706, |
|
"learning_rate": 2.552761541876295e-05, |
|
"loss": 0.375, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"grad_norm": 0.38685211539268494, |
|
"learning_rate": 2.549871448653448e-05, |
|
"loss": 0.3711, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"grad_norm": 0.40676021575927734, |
|
"learning_rate": 2.5469813554306008e-05, |
|
"loss": 0.3786, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"grad_norm": 0.40946874022483826, |
|
"learning_rate": 2.544091262207754e-05, |
|
"loss": 0.3749, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"grad_norm": 0.35838282108306885, |
|
"learning_rate": 2.541201168984907e-05, |
|
"loss": 0.3733, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"grad_norm": 0.4110182821750641, |
|
"learning_rate": 2.5383110757620598e-05, |
|
"loss": 0.3712, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"grad_norm": 0.35349026322364807, |
|
"learning_rate": 2.535420982539213e-05, |
|
"loss": 0.3719, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"grad_norm": 0.38222944736480713, |
|
"learning_rate": 2.532530889316366e-05, |
|
"loss": 0.3737, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"grad_norm": 0.47066986560821533, |
|
"learning_rate": 2.529640796093519e-05, |
|
"loss": 0.3728, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"grad_norm": 0.3949437439441681, |
|
"learning_rate": 2.526750702870672e-05, |
|
"loss": 0.3752, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"grad_norm": 0.42243218421936035, |
|
"learning_rate": 2.523860609647825e-05, |
|
"loss": 0.3729, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"grad_norm": 0.4031197726726532, |
|
"learning_rate": 2.520970516424978e-05, |
|
"loss": 0.3704, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"grad_norm": 0.4034245014190674, |
|
"learning_rate": 2.518080423202131e-05, |
|
"loss": 0.3713, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"grad_norm": 0.4237426817417145, |
|
"learning_rate": 2.515190329979284e-05, |
|
"loss": 0.3719, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"grad_norm": 0.41453346610069275, |
|
"learning_rate": 2.512300236756437e-05, |
|
"loss": 0.3742, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"grad_norm": 0.4238516390323639, |
|
"learning_rate": 2.50941014353359e-05, |
|
"loss": 0.3734, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"grad_norm": 0.3762485384941101, |
|
"learning_rate": 2.506520050310743e-05, |
|
"loss": 0.3736, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"grad_norm": 0.36851537227630615, |
|
"learning_rate": 2.5036299570878956e-05, |
|
"loss": 0.3736, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"grad_norm": 0.36127322912216187, |
|
"learning_rate": 2.5007398638650487e-05, |
|
"loss": 0.3716, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"grad_norm": 0.4159682095050812, |
|
"learning_rate": 2.497849770642202e-05, |
|
"loss": 0.3695, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"grad_norm": 0.40441277623176575, |
|
"learning_rate": 2.4949596774193547e-05, |
|
"loss": 0.3702, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"grad_norm": 0.3531157076358795, |
|
"learning_rate": 2.4920695841965078e-05, |
|
"loss": 0.3751, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"grad_norm": 0.40636512637138367, |
|
"learning_rate": 2.489179490973661e-05, |
|
"loss": 0.3692, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"grad_norm": 0.3990442156791687, |
|
"learning_rate": 2.4862893977508137e-05, |
|
"loss": 0.3697, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"grad_norm": 0.39944297075271606, |
|
"learning_rate": 2.4833993045279668e-05, |
|
"loss": 0.3683, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"grad_norm": 0.3601832985877991, |
|
"learning_rate": 2.48050921130512e-05, |
|
"loss": 0.3699, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"grad_norm": 0.40571844577789307, |
|
"learning_rate": 2.4776191180822727e-05, |
|
"loss": 0.3689, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"grad_norm": 0.40363049507141113, |
|
"learning_rate": 2.474729024859426e-05, |
|
"loss": 0.3708, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"grad_norm": 0.3990092873573303, |
|
"learning_rate": 2.471838931636579e-05, |
|
"loss": 0.3706, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"grad_norm": 0.36532795429229736, |
|
"learning_rate": 2.4689488384137317e-05, |
|
"loss": 0.3721, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 16.87, |
|
"grad_norm": 0.44025781750679016, |
|
"learning_rate": 2.466058745190885e-05, |
|
"loss": 0.3733, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"grad_norm": 0.3850398659706116, |
|
"learning_rate": 2.463168651968038e-05, |
|
"loss": 0.3693, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"grad_norm": 0.38132092356681824, |
|
"learning_rate": 2.4602785587451908e-05, |
|
"loss": 0.3703, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"grad_norm": 0.39702102541923523, |
|
"learning_rate": 2.457388465522344e-05, |
|
"loss": 0.3706, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"grad_norm": 0.35070690512657166, |
|
"learning_rate": 2.454498372299497e-05, |
|
"loss": 0.3706, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"grad_norm": 0.42235612869262695, |
|
"learning_rate": 2.4516082790766498e-05, |
|
"loss": 0.3703, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"grad_norm": 0.35915622115135193, |
|
"learning_rate": 2.448718185853803e-05, |
|
"loss": 0.3693, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"grad_norm": 0.34371674060821533, |
|
"learning_rate": 2.445828092630956e-05, |
|
"loss": 0.3686, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"grad_norm": 0.48452600836753845, |
|
"learning_rate": 2.442937999408109e-05, |
|
"loss": 0.3696, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 17.53, |
|
"grad_norm": 0.39582401514053345, |
|
"learning_rate": 2.440047906185262e-05, |
|
"loss": 0.3698, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"grad_norm": 0.43614286184310913, |
|
"learning_rate": 2.437157812962415e-05, |
|
"loss": 0.3688, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"grad_norm": 0.3942769765853882, |
|
"learning_rate": 2.434267719739568e-05, |
|
"loss": 0.368, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"grad_norm": 0.34341031312942505, |
|
"learning_rate": 2.431377626516721e-05, |
|
"loss": 0.3665, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"grad_norm": 0.3661987781524658, |
|
"learning_rate": 2.428487533293874e-05, |
|
"loss": 0.3664, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"grad_norm": 0.32992053031921387, |
|
"learning_rate": 2.425597440071027e-05, |
|
"loss": 0.3683, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"grad_norm": 0.40151405334472656, |
|
"learning_rate": 2.42270734684818e-05, |
|
"loss": 0.3677, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"grad_norm": 0.3343447148799896, |
|
"learning_rate": 2.419817253625333e-05, |
|
"loss": 0.3679, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"grad_norm": 0.3798489272594452, |
|
"learning_rate": 2.416927160402486e-05, |
|
"loss": 0.3687, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"grad_norm": 0.3244016766548157, |
|
"learning_rate": 2.414037067179639e-05, |
|
"loss": 0.3643, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"grad_norm": 0.4036329984664917, |
|
"learning_rate": 2.4111469739567922e-05, |
|
"loss": 0.3703, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"grad_norm": 0.3875889778137207, |
|
"learning_rate": 2.408256880733945e-05, |
|
"loss": 0.3661, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"grad_norm": 0.3607046902179718, |
|
"learning_rate": 2.405366787511098e-05, |
|
"loss": 0.3646, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"grad_norm": 0.33054810762405396, |
|
"learning_rate": 2.4024766942882512e-05, |
|
"loss": 0.3675, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"grad_norm": 0.4091895520687103, |
|
"learning_rate": 2.399586601065404e-05, |
|
"loss": 0.3652, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"grad_norm": 0.37667781114578247, |
|
"learning_rate": 2.396696507842557e-05, |
|
"loss": 0.3691, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"grad_norm": 0.3683590888977051, |
|
"learning_rate": 2.39380641461971e-05, |
|
"loss": 0.3675, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"grad_norm": 0.397073894739151, |
|
"learning_rate": 2.3909163213968627e-05, |
|
"loss": 0.3637, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"grad_norm": 0.3522073030471802, |
|
"learning_rate": 2.3880262281740158e-05, |
|
"loss": 0.3676, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"grad_norm": 0.3389582633972168, |
|
"learning_rate": 2.385136134951169e-05, |
|
"loss": 0.3676, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"grad_norm": 0.3726537823677063, |
|
"learning_rate": 2.3822460417283217e-05, |
|
"loss": 0.3674, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"grad_norm": 0.3774533271789551, |
|
"learning_rate": 2.379355948505475e-05, |
|
"loss": 0.3669, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"grad_norm": 0.45427048206329346, |
|
"learning_rate": 2.376465855282628e-05, |
|
"loss": 0.3652, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"grad_norm": 0.39148712158203125, |
|
"learning_rate": 2.373575762059781e-05, |
|
"loss": 0.3632, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"grad_norm": 0.3727419078350067, |
|
"learning_rate": 2.370685668836934e-05, |
|
"loss": 0.3674, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"grad_norm": 0.3490041196346283, |
|
"learning_rate": 2.367795575614087e-05, |
|
"loss": 0.3685, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"grad_norm": 0.33863797783851624, |
|
"learning_rate": 2.36490548239124e-05, |
|
"loss": 0.3659, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 19.53, |
|
"grad_norm": 0.41820865869522095, |
|
"learning_rate": 2.362015389168393e-05, |
|
"loss": 0.3647, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"grad_norm": 0.31935831904411316, |
|
"learning_rate": 2.359125295945546e-05, |
|
"loss": 0.3657, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"grad_norm": 0.39523938298225403, |
|
"learning_rate": 2.3562352027226992e-05, |
|
"loss": 0.3643, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"grad_norm": 0.3851146996021271, |
|
"learning_rate": 2.353345109499852e-05, |
|
"loss": 0.3624, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"grad_norm": 0.3778953552246094, |
|
"learning_rate": 2.350455016277005e-05, |
|
"loss": 0.3658, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 19.9, |
|
"grad_norm": 0.3673354387283325, |
|
"learning_rate": 2.3475649230541582e-05, |
|
"loss": 0.3645, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"grad_norm": 0.40675076842308044, |
|
"learning_rate": 2.344674829831311e-05, |
|
"loss": 0.3624, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"grad_norm": 0.32396331429481506, |
|
"learning_rate": 2.341784736608464e-05, |
|
"loss": 0.3608, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"grad_norm": 0.4665846526622772, |
|
"learning_rate": 2.3388946433856172e-05, |
|
"loss": 0.3654, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"grad_norm": 0.3753814697265625, |
|
"learning_rate": 2.33600455016277e-05, |
|
"loss": 0.3611, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"grad_norm": 0.39572277665138245, |
|
"learning_rate": 2.333114456939923e-05, |
|
"loss": 0.363, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"grad_norm": 0.36638927459716797, |
|
"learning_rate": 2.3302243637170763e-05, |
|
"loss": 0.3625, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"grad_norm": 0.40173882246017456, |
|
"learning_rate": 2.327334270494229e-05, |
|
"loss": 0.3645, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"grad_norm": 0.34684666991233826, |
|
"learning_rate": 2.3244441772713822e-05, |
|
"loss": 0.3636, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"grad_norm": 0.3533775806427002, |
|
"learning_rate": 2.3215540840485353e-05, |
|
"loss": 0.3624, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 20.64, |
|
"grad_norm": 0.36431315541267395, |
|
"learning_rate": 2.318663990825688e-05, |
|
"loss": 0.3649, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"grad_norm": 0.3629516363143921, |
|
"learning_rate": 2.3157738976028412e-05, |
|
"loss": 0.3646, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"grad_norm": 0.383987158536911, |
|
"learning_rate": 2.3128838043799943e-05, |
|
"loss": 0.3623, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"grad_norm": 0.38170096278190613, |
|
"learning_rate": 2.309993711157147e-05, |
|
"loss": 0.3629, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 20.94, |
|
"grad_norm": 0.36627018451690674, |
|
"learning_rate": 2.3071036179343003e-05, |
|
"loss": 0.362, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"grad_norm": 0.37587088346481323, |
|
"learning_rate": 2.3042135247114534e-05, |
|
"loss": 0.3624, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"grad_norm": 0.3648183047771454, |
|
"learning_rate": 2.301323431488606e-05, |
|
"loss": 0.3633, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"grad_norm": 0.3818926513195038, |
|
"learning_rate": 2.2984333382657593e-05, |
|
"loss": 0.3627, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"grad_norm": 0.38963159918785095, |
|
"learning_rate": 2.2955432450429124e-05, |
|
"loss": 0.3619, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"grad_norm": 0.4655527174472809, |
|
"learning_rate": 2.2926531518200652e-05, |
|
"loss": 0.3616, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"grad_norm": 0.36112433671951294, |
|
"learning_rate": 2.2897630585972183e-05, |
|
"loss": 0.3597, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 21.46, |
|
"grad_norm": 0.3661918044090271, |
|
"learning_rate": 2.2868729653743714e-05, |
|
"loss": 0.3626, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"grad_norm": 0.340862512588501, |
|
"learning_rate": 2.2839828721515242e-05, |
|
"loss": 0.3637, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"grad_norm": 0.3376435339450836, |
|
"learning_rate": 2.281092778928677e-05, |
|
"loss": 0.3622, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"grad_norm": 0.4211704730987549, |
|
"learning_rate": 2.27820268570583e-05, |
|
"loss": 0.3593, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 21.75, |
|
"grad_norm": 0.3886992037296295, |
|
"learning_rate": 2.275312592482983e-05, |
|
"loss": 0.3614, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"grad_norm": 0.34686926007270813, |
|
"learning_rate": 2.272422499260136e-05, |
|
"loss": 0.3615, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"grad_norm": 0.4066803455352783, |
|
"learning_rate": 2.269532406037289e-05, |
|
"loss": 0.3597, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"grad_norm": 0.37257149815559387, |
|
"learning_rate": 2.266642312814442e-05, |
|
"loss": 0.364, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"grad_norm": 0.3770715892314911, |
|
"learning_rate": 2.263752219591595e-05, |
|
"loss": 0.359, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"grad_norm": 0.3486000597476959, |
|
"learning_rate": 2.2608621263687482e-05, |
|
"loss": 0.3616, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"grad_norm": 0.4026411771774292, |
|
"learning_rate": 2.257972033145901e-05, |
|
"loss": 0.3615, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"grad_norm": 0.4740307629108429, |
|
"learning_rate": 2.255081939923054e-05, |
|
"loss": 0.359, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"grad_norm": 0.5692958235740662, |
|
"learning_rate": 2.2521918467002072e-05, |
|
"loss": 0.3614, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"grad_norm": 0.4546482264995575, |
|
"learning_rate": 2.24930175347736e-05, |
|
"loss": 0.3609, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"grad_norm": 0.3762848675251007, |
|
"learning_rate": 2.246411660254513e-05, |
|
"loss": 0.3612, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"grad_norm": 0.3631458282470703, |
|
"learning_rate": 2.2435215670316663e-05, |
|
"loss": 0.3613, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"grad_norm": 0.3560335040092468, |
|
"learning_rate": 2.240631473808819e-05, |
|
"loss": 0.3601, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 22.71, |
|
"grad_norm": 0.3739969730377197, |
|
"learning_rate": 2.2377413805859722e-05, |
|
"loss": 0.3586, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 22.79, |
|
"grad_norm": 0.3538212478160858, |
|
"learning_rate": 2.2348512873631253e-05, |
|
"loss": 0.3619, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"grad_norm": 0.33162832260131836, |
|
"learning_rate": 2.231961194140278e-05, |
|
"loss": 0.3566, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"grad_norm": 0.3731963634490967, |
|
"learning_rate": 2.2290711009174312e-05, |
|
"loss": 0.3617, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"grad_norm": 0.3658241033554077, |
|
"learning_rate": 2.2261810076945843e-05, |
|
"loss": 0.3621, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"grad_norm": 0.3295973837375641, |
|
"learning_rate": 2.223290914471737e-05, |
|
"loss": 0.3592, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"grad_norm": 0.3476080894470215, |
|
"learning_rate": 2.2204008212488902e-05, |
|
"loss": 0.358, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"grad_norm": 0.4091341197490692, |
|
"learning_rate": 2.2175107280260434e-05, |
|
"loss": 0.3588, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"grad_norm": 0.4243708848953247, |
|
"learning_rate": 2.214620634803196e-05, |
|
"loss": 0.3581, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"grad_norm": 0.4200844168663025, |
|
"learning_rate": 2.2117305415803493e-05, |
|
"loss": 0.3598, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"grad_norm": 0.4001297652721405, |
|
"learning_rate": 2.2088404483575024e-05, |
|
"loss": 0.3608, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"grad_norm": 0.44927239418029785, |
|
"learning_rate": 2.2059503551346552e-05, |
|
"loss": 0.3572, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"grad_norm": 0.3438055217266083, |
|
"learning_rate": 2.2030602619118083e-05, |
|
"loss": 0.3603, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"grad_norm": 0.395907461643219, |
|
"learning_rate": 2.2001701686889614e-05, |
|
"loss": 0.3583, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"grad_norm": 0.3705403506755829, |
|
"learning_rate": 2.1972800754661142e-05, |
|
"loss": 0.3606, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"grad_norm": 0.34676727652549744, |
|
"learning_rate": 2.1943899822432673e-05, |
|
"loss": 0.3589, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 23.9, |
|
"grad_norm": 0.3712906837463379, |
|
"learning_rate": 2.1914998890204205e-05, |
|
"loss": 0.3597, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"grad_norm": 0.3136620819568634, |
|
"learning_rate": 2.1886097957975732e-05, |
|
"loss": 0.3571, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"grad_norm": 0.3915017545223236, |
|
"learning_rate": 2.1857197025747264e-05, |
|
"loss": 0.3595, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"grad_norm": 0.3403623700141907, |
|
"learning_rate": 2.1828296093518795e-05, |
|
"loss": 0.3608, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"grad_norm": 0.3841993510723114, |
|
"learning_rate": 2.1799395161290323e-05, |
|
"loss": 0.3596, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"grad_norm": 0.42894381284713745, |
|
"learning_rate": 2.1770494229061854e-05, |
|
"loss": 0.3571, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 24.34, |
|
"grad_norm": 0.4211946129798889, |
|
"learning_rate": 2.1741593296833385e-05, |
|
"loss": 0.3552, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"grad_norm": 0.35293659567832947, |
|
"learning_rate": 2.1712692364604917e-05, |
|
"loss": 0.359, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"grad_norm": 0.3743543326854706, |
|
"learning_rate": 2.168379143237644e-05, |
|
"loss": 0.3556, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 24.56, |
|
"grad_norm": 0.4101512134075165, |
|
"learning_rate": 2.1654890500147972e-05, |
|
"loss": 0.3561, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"grad_norm": 0.34685665369033813, |
|
"learning_rate": 2.1625989567919503e-05, |
|
"loss": 0.3596, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"grad_norm": 0.3677741289138794, |
|
"learning_rate": 2.159708863569103e-05, |
|
"loss": 0.3572, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"grad_norm": 0.3836405575275421, |
|
"learning_rate": 2.1568187703462563e-05, |
|
"loss": 0.36, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 24.86, |
|
"grad_norm": 0.3649022579193115, |
|
"learning_rate": 2.1539286771234094e-05, |
|
"loss": 0.3558, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"grad_norm": 0.3566337525844574, |
|
"learning_rate": 2.151038583900562e-05, |
|
"loss": 0.3567, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"grad_norm": 0.4024845361709595, |
|
"learning_rate": 2.1481484906777153e-05, |
|
"loss": 0.3555, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"grad_norm": 0.3865801692008972, |
|
"learning_rate": 2.1452583974548684e-05, |
|
"loss": 0.3587, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 25.16, |
|
"grad_norm": 0.3753124475479126, |
|
"learning_rate": 2.1423683042320212e-05, |
|
"loss": 0.358, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"grad_norm": 0.3889290690422058, |
|
"learning_rate": 2.1394782110091743e-05, |
|
"loss": 0.3573, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"grad_norm": 0.425574392080307, |
|
"learning_rate": 2.1365881177863274e-05, |
|
"loss": 0.3583, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 25.38, |
|
"grad_norm": 0.35915040969848633, |
|
"learning_rate": 2.1336980245634802e-05, |
|
"loss": 0.355, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"grad_norm": 0.3714876174926758, |
|
"learning_rate": 2.1308079313406334e-05, |
|
"loss": 0.3558, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"grad_norm": 0.3659971356391907, |
|
"learning_rate": 2.1279178381177865e-05, |
|
"loss": 0.3526, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"grad_norm": 0.35083669424057007, |
|
"learning_rate": 2.1250277448949393e-05, |
|
"loss": 0.3582, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"grad_norm": 0.3540023863315582, |
|
"learning_rate": 2.1221376516720924e-05, |
|
"loss": 0.3555, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"grad_norm": 0.3811222016811371, |
|
"learning_rate": 2.1192475584492455e-05, |
|
"loss": 0.3557, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 25.82, |
|
"grad_norm": 0.37513551115989685, |
|
"learning_rate": 2.1163574652263983e-05, |
|
"loss": 0.3563, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"grad_norm": 0.4036356508731842, |
|
"learning_rate": 2.1134673720035514e-05, |
|
"loss": 0.3548, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"grad_norm": 0.3446299135684967, |
|
"learning_rate": 2.1105772787807045e-05, |
|
"loss": 0.3573, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 26.04, |
|
"grad_norm": 0.4351588487625122, |
|
"learning_rate": 2.1076871855578573e-05, |
|
"loss": 0.3556, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"grad_norm": 0.38238152861595154, |
|
"learning_rate": 2.1047970923350105e-05, |
|
"loss": 0.3566, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 26.19, |
|
"grad_norm": 0.3972441554069519, |
|
"learning_rate": 2.1019069991121636e-05, |
|
"loss": 0.3533, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"grad_norm": 0.40132614970207214, |
|
"learning_rate": 2.0990169058893164e-05, |
|
"loss": 0.3548, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 26.34, |
|
"grad_norm": 0.3178902864456177, |
|
"learning_rate": 2.0961268126664695e-05, |
|
"loss": 0.355, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"grad_norm": 0.4328124225139618, |
|
"learning_rate": 2.0932367194436226e-05, |
|
"loss": 0.3554, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 26.49, |
|
"grad_norm": 0.3971725404262543, |
|
"learning_rate": 2.0903466262207754e-05, |
|
"loss": 0.3549, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"grad_norm": 0.3241216540336609, |
|
"learning_rate": 2.0874565329979285e-05, |
|
"loss": 0.3534, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"grad_norm": 0.3448522984981537, |
|
"learning_rate": 2.0845664397750816e-05, |
|
"loss": 0.3559, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 26.71, |
|
"grad_norm": 0.34117060899734497, |
|
"learning_rate": 2.0816763465522344e-05, |
|
"loss": 0.3555, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"grad_norm": 0.39051172137260437, |
|
"learning_rate": 2.0787862533293876e-05, |
|
"loss": 0.3558, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"grad_norm": 0.3349858820438385, |
|
"learning_rate": 2.0758961601065407e-05, |
|
"loss": 0.3546, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"grad_norm": 0.4579429030418396, |
|
"learning_rate": 2.0730060668836935e-05, |
|
"loss": 0.3537, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"grad_norm": 0.3789091110229492, |
|
"learning_rate": 2.0701159736608466e-05, |
|
"loss": 0.3527, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"grad_norm": 0.43690434098243713, |
|
"learning_rate": 2.0672258804379997e-05, |
|
"loss": 0.3563, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 27.15, |
|
"grad_norm": 0.3886288106441498, |
|
"learning_rate": 2.0643357872151525e-05, |
|
"loss": 0.3543, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"grad_norm": 0.40548428893089294, |
|
"learning_rate": 2.0614456939923056e-05, |
|
"loss": 0.353, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 27.3, |
|
"grad_norm": 0.4054431915283203, |
|
"learning_rate": 2.0585556007694584e-05, |
|
"loss": 0.3575, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"grad_norm": 0.3319009840488434, |
|
"learning_rate": 2.0556655075466112e-05, |
|
"loss": 0.3524, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"grad_norm": 0.36432087421417236, |
|
"learning_rate": 2.0527754143237643e-05, |
|
"loss": 0.3556, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"grad_norm": 0.3561677038669586, |
|
"learning_rate": 2.0498853211009174e-05, |
|
"loss": 0.3539, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"grad_norm": 0.41498541831970215, |
|
"learning_rate": 2.0469952278780702e-05, |
|
"loss": 0.3561, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"grad_norm": 0.3646217882633209, |
|
"learning_rate": 2.0441051346552233e-05, |
|
"loss": 0.3519, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 27.74, |
|
"grad_norm": 0.34534063935279846, |
|
"learning_rate": 2.0412150414323765e-05, |
|
"loss": 0.3539, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"grad_norm": 0.4323655962944031, |
|
"learning_rate": 2.0383249482095293e-05, |
|
"loss": 0.3559, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"grad_norm": 0.3833807408809662, |
|
"learning_rate": 2.0354348549866824e-05, |
|
"loss": 0.3532, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"grad_norm": 0.37557268142700195, |
|
"learning_rate": 2.0325447617638355e-05, |
|
"loss": 0.3523, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"grad_norm": 0.37144702672958374, |
|
"learning_rate": 2.0296546685409883e-05, |
|
"loss": 0.3536, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"grad_norm": 0.40455296635627747, |
|
"learning_rate": 2.0267645753181414e-05, |
|
"loss": 0.3534, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"grad_norm": 0.3639744818210602, |
|
"learning_rate": 2.0238744820952945e-05, |
|
"loss": 0.3536, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"grad_norm": 0.38016533851623535, |
|
"learning_rate": 2.0209843888724473e-05, |
|
"loss": 0.3569, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 28.34, |
|
"grad_norm": 0.35611262917518616, |
|
"learning_rate": 2.0180942956496004e-05, |
|
"loss": 0.3539, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 28.41, |
|
"grad_norm": 0.3586650490760803, |
|
"learning_rate": 2.0152042024267536e-05, |
|
"loss": 0.3516, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"grad_norm": 0.3105120062828064, |
|
"learning_rate": 2.0123141092039064e-05, |
|
"loss": 0.3518, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 28.56, |
|
"grad_norm": 0.37972891330718994, |
|
"learning_rate": 2.0094240159810595e-05, |
|
"loss": 0.3558, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"grad_norm": 0.35530367493629456, |
|
"learning_rate": 2.0065339227582126e-05, |
|
"loss": 0.3505, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 28.71, |
|
"grad_norm": 0.42136579751968384, |
|
"learning_rate": 2.0036438295353654e-05, |
|
"loss": 0.3537, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 28.78, |
|
"grad_norm": 0.37874168157577515, |
|
"learning_rate": 2.0007537363125185e-05, |
|
"loss": 0.3505, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"grad_norm": 0.33442074060440063, |
|
"learning_rate": 1.9978636430896716e-05, |
|
"loss": 0.3536, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 28.93, |
|
"grad_norm": 0.37098708748817444, |
|
"learning_rate": 1.9949735498668244e-05, |
|
"loss": 0.3553, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.33478862047195435, |
|
"learning_rate": 1.9920834566439775e-05, |
|
"loss": 0.3527, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 29.08, |
|
"grad_norm": 0.3783182203769684, |
|
"learning_rate": 1.9891933634211307e-05, |
|
"loss": 0.3523, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 29.15, |
|
"grad_norm": 0.32911786437034607, |
|
"learning_rate": 1.9863032701982835e-05, |
|
"loss": 0.3526, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 29.22, |
|
"grad_norm": 0.33882907032966614, |
|
"learning_rate": 1.9834131769754366e-05, |
|
"loss": 0.3507, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"grad_norm": 0.4318142235279083, |
|
"learning_rate": 1.9805230837525897e-05, |
|
"loss": 0.3504, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 29.37, |
|
"grad_norm": 0.33973386883735657, |
|
"learning_rate": 1.9776329905297425e-05, |
|
"loss": 0.3526, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 29.45, |
|
"grad_norm": 0.3557802736759186, |
|
"learning_rate": 1.9747428973068956e-05, |
|
"loss": 0.3511, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 29.52, |
|
"grad_norm": 0.4430686831474304, |
|
"learning_rate": 1.9718528040840487e-05, |
|
"loss": 0.3503, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 29.59, |
|
"grad_norm": 0.33132269978523254, |
|
"learning_rate": 1.9689627108612015e-05, |
|
"loss": 0.3531, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"grad_norm": 0.362075537443161, |
|
"learning_rate": 1.9660726176383546e-05, |
|
"loss": 0.3517, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 29.74, |
|
"grad_norm": 0.3604036867618561, |
|
"learning_rate": 1.9631825244155078e-05, |
|
"loss": 0.3558, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 29.82, |
|
"grad_norm": 0.39711400866508484, |
|
"learning_rate": 1.960292431192661e-05, |
|
"loss": 0.3515, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 29.89, |
|
"grad_norm": 0.30394095182418823, |
|
"learning_rate": 1.9574023379698137e-05, |
|
"loss": 0.352, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 29.96, |
|
"grad_norm": 0.3903627097606659, |
|
"learning_rate": 1.9545122447469668e-05, |
|
"loss": 0.3496, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 30.04, |
|
"grad_norm": 0.3124367296695709, |
|
"learning_rate": 1.95162215152412e-05, |
|
"loss": 0.3517, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 30.11, |
|
"grad_norm": 0.4038899540901184, |
|
"learning_rate": 1.9487320583012727e-05, |
|
"loss": 0.3518, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"grad_norm": 0.32454368472099304, |
|
"learning_rate": 1.9458419650784255e-05, |
|
"loss": 0.3517, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 30.26, |
|
"grad_norm": 0.35400575399398804, |
|
"learning_rate": 1.9429518718555786e-05, |
|
"loss": 0.3479, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 30.33, |
|
"grad_norm": 0.424834281206131, |
|
"learning_rate": 1.9400617786327314e-05, |
|
"loss": 0.3502, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"grad_norm": 0.4748223125934601, |
|
"learning_rate": 1.9371716854098845e-05, |
|
"loss": 0.349, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"grad_norm": 0.3032289445400238, |
|
"learning_rate": 1.9342815921870376e-05, |
|
"loss": 0.3543, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 30.56, |
|
"grad_norm": 0.4162702262401581, |
|
"learning_rate": 1.9313914989641904e-05, |
|
"loss": 0.3518, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 30.63, |
|
"grad_norm": 0.3512803912162781, |
|
"learning_rate": 1.9285014057413436e-05, |
|
"loss": 0.3505, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 30.7, |
|
"grad_norm": 0.3622516989707947, |
|
"learning_rate": 1.9256113125184967e-05, |
|
"loss": 0.3538, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 30.78, |
|
"grad_norm": 0.3330516517162323, |
|
"learning_rate": 1.9227212192956495e-05, |
|
"loss": 0.3489, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 30.85, |
|
"grad_norm": 0.3457803726196289, |
|
"learning_rate": 1.9198311260728026e-05, |
|
"loss": 0.352, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"grad_norm": 0.3154030442237854, |
|
"learning_rate": 1.9169410328499557e-05, |
|
"loss": 0.3491, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 0.46131783723831177, |
|
"learning_rate": 1.9140509396271085e-05, |
|
"loss": 0.3509, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 31.07, |
|
"grad_norm": 0.400088369846344, |
|
"learning_rate": 1.9111608464042616e-05, |
|
"loss": 0.3509, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"grad_norm": 0.3647451400756836, |
|
"learning_rate": 1.9082707531814147e-05, |
|
"loss": 0.35, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 31.22, |
|
"grad_norm": 0.4007732570171356, |
|
"learning_rate": 1.9053806599585675e-05, |
|
"loss": 0.3504, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"grad_norm": 0.3861900269985199, |
|
"learning_rate": 1.9024905667357207e-05, |
|
"loss": 0.3484, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"grad_norm": 0.411627858877182, |
|
"learning_rate": 1.8996004735128738e-05, |
|
"loss": 0.3507, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 31.44, |
|
"grad_norm": 0.35766077041625977, |
|
"learning_rate": 1.8967103802900266e-05, |
|
"loss": 0.3485, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 31.52, |
|
"grad_norm": 0.3537013530731201, |
|
"learning_rate": 1.8938202870671797e-05, |
|
"loss": 0.3517, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 31.59, |
|
"grad_norm": 0.3919309675693512, |
|
"learning_rate": 1.8909301938443328e-05, |
|
"loss": 0.3489, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"grad_norm": 0.3441930115222931, |
|
"learning_rate": 1.8880401006214856e-05, |
|
"loss": 0.351, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 31.74, |
|
"grad_norm": 0.38138172030448914, |
|
"learning_rate": 1.8851500073986387e-05, |
|
"loss": 0.3495, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 31.81, |
|
"grad_norm": 0.4080500304698944, |
|
"learning_rate": 1.882259914175792e-05, |
|
"loss": 0.3497, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 31.89, |
|
"grad_norm": 0.3864932358264923, |
|
"learning_rate": 1.8793698209529446e-05, |
|
"loss": 0.3525, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 31.96, |
|
"grad_norm": 0.4017949104309082, |
|
"learning_rate": 1.8764797277300978e-05, |
|
"loss": 0.3528, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 32.04, |
|
"grad_norm": 0.3484615087509155, |
|
"learning_rate": 1.873589634507251e-05, |
|
"loss": 0.3505, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 32.11, |
|
"grad_norm": 0.34500235319137573, |
|
"learning_rate": 1.8706995412844037e-05, |
|
"loss": 0.3498, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 32.18, |
|
"grad_norm": 0.32486996054649353, |
|
"learning_rate": 1.8678094480615568e-05, |
|
"loss": 0.3501, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"grad_norm": 0.3440997302532196, |
|
"learning_rate": 1.86491935483871e-05, |
|
"loss": 0.3504, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"grad_norm": 0.359846293926239, |
|
"learning_rate": 1.8620292616158627e-05, |
|
"loss": 0.3491, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 32.41, |
|
"grad_norm": 0.36168062686920166, |
|
"learning_rate": 1.8591391683930158e-05, |
|
"loss": 0.3489, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"grad_norm": 0.43606841564178467, |
|
"learning_rate": 1.856249075170169e-05, |
|
"loss": 0.3497, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"grad_norm": 0.3898315727710724, |
|
"learning_rate": 1.8533589819473217e-05, |
|
"loss": 0.3487, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 32.63, |
|
"grad_norm": 0.381244421005249, |
|
"learning_rate": 1.850468888724475e-05, |
|
"loss": 0.3475, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 32.7, |
|
"grad_norm": 0.41321712732315063, |
|
"learning_rate": 1.847578795501628e-05, |
|
"loss": 0.3484, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"grad_norm": 0.3538101017475128, |
|
"learning_rate": 1.8446887022787808e-05, |
|
"loss": 0.3484, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 32.85, |
|
"grad_norm": 0.38104715943336487, |
|
"learning_rate": 1.841798609055934e-05, |
|
"loss": 0.3482, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 32.92, |
|
"grad_norm": 0.37761756777763367, |
|
"learning_rate": 1.838908515833087e-05, |
|
"loss": 0.3474, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 0.3524073362350464, |
|
"learning_rate": 1.8360184226102398e-05, |
|
"loss": 0.3518, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"grad_norm": 0.3452695608139038, |
|
"learning_rate": 1.8331283293873926e-05, |
|
"loss": 0.3509, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 33.15, |
|
"grad_norm": 0.4063817262649536, |
|
"learning_rate": 1.8302382361645457e-05, |
|
"loss": 0.3496, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 33.22, |
|
"grad_norm": 0.41099056601524353, |
|
"learning_rate": 1.8273481429416985e-05, |
|
"loss": 0.3489, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 33.29, |
|
"grad_norm": 0.3691389560699463, |
|
"learning_rate": 1.8244580497188516e-05, |
|
"loss": 0.3507, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 33.37, |
|
"grad_norm": 0.36765575408935547, |
|
"learning_rate": 1.8215679564960047e-05, |
|
"loss": 0.3464, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"grad_norm": 0.39067772030830383, |
|
"learning_rate": 1.8186778632731575e-05, |
|
"loss": 0.3479, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 33.52, |
|
"grad_norm": 0.3263433873653412, |
|
"learning_rate": 1.8157877700503106e-05, |
|
"loss": 0.3457, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 33.59, |
|
"grad_norm": 0.45672136545181274, |
|
"learning_rate": 1.8128976768274638e-05, |
|
"loss": 0.3488, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 33.66, |
|
"grad_norm": 0.42077481746673584, |
|
"learning_rate": 1.8100075836046166e-05, |
|
"loss": 0.3447, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 33.74, |
|
"grad_norm": 0.3391963243484497, |
|
"learning_rate": 1.8071174903817697e-05, |
|
"loss": 0.3471, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 33.81, |
|
"grad_norm": 0.3776566684246063, |
|
"learning_rate": 1.8042273971589228e-05, |
|
"loss": 0.3498, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 33.89, |
|
"grad_norm": 0.3776949942111969, |
|
"learning_rate": 1.8013373039360756e-05, |
|
"loss": 0.3463, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"grad_norm": 0.3695808947086334, |
|
"learning_rate": 1.7984472107132287e-05, |
|
"loss": 0.3492, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"grad_norm": 0.36413583159446716, |
|
"learning_rate": 1.795557117490382e-05, |
|
"loss": 0.3472, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 34.11, |
|
"grad_norm": 0.39360344409942627, |
|
"learning_rate": 1.7926670242675346e-05, |
|
"loss": 0.3505, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 34.18, |
|
"grad_norm": 0.3416905999183655, |
|
"learning_rate": 1.7897769310446877e-05, |
|
"loss": 0.3472, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"grad_norm": 0.40496984124183655, |
|
"learning_rate": 1.786886837821841e-05, |
|
"loss": 0.3477, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 34.33, |
|
"grad_norm": 0.3441724479198456, |
|
"learning_rate": 1.7839967445989937e-05, |
|
"loss": 0.3479, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"grad_norm": 0.37928706407546997, |
|
"learning_rate": 1.7811066513761468e-05, |
|
"loss": 0.3486, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"grad_norm": 0.3675363063812256, |
|
"learning_rate": 1.7782165581533e-05, |
|
"loss": 0.3458, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"grad_norm": 0.40871405601501465, |
|
"learning_rate": 1.7753264649304527e-05, |
|
"loss": 0.3493, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 34.63, |
|
"grad_norm": 0.417258620262146, |
|
"learning_rate": 1.7724363717076058e-05, |
|
"loss": 0.3453, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 34.7, |
|
"grad_norm": 0.3210906386375427, |
|
"learning_rate": 1.769546278484759e-05, |
|
"loss": 0.3457, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"grad_norm": 0.3412734270095825, |
|
"learning_rate": 1.7666561852619117e-05, |
|
"loss": 0.3475, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 34.85, |
|
"grad_norm": 0.39695098996162415, |
|
"learning_rate": 1.763766092039065e-05, |
|
"loss": 0.3509, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 34.92, |
|
"grad_norm": 0.36834970116615295, |
|
"learning_rate": 1.760875998816218e-05, |
|
"loss": 0.3485, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.3971041738986969, |
|
"learning_rate": 1.7579859055933708e-05, |
|
"loss": 0.3468, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 35.07, |
|
"grad_norm": 0.3513786196708679, |
|
"learning_rate": 1.755095812370524e-05, |
|
"loss": 0.3478, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 35.14, |
|
"grad_norm": 0.35932984948158264, |
|
"learning_rate": 1.752205719147677e-05, |
|
"loss": 0.348, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 35.22, |
|
"grad_norm": 0.40685245394706726, |
|
"learning_rate": 1.7493156259248298e-05, |
|
"loss": 0.3447, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"grad_norm": 0.37929338216781616, |
|
"learning_rate": 1.746425532701983e-05, |
|
"loss": 0.3465, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"grad_norm": 0.40910473465919495, |
|
"learning_rate": 1.743535439479136e-05, |
|
"loss": 0.3465, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 35.44, |
|
"grad_norm": 0.34920281171798706, |
|
"learning_rate": 1.740645346256289e-05, |
|
"loss": 0.3463, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 35.51, |
|
"grad_norm": 0.37716421484947205, |
|
"learning_rate": 1.737755253033442e-05, |
|
"loss": 0.3487, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 35.59, |
|
"grad_norm": 0.4301624596118927, |
|
"learning_rate": 1.734865159810595e-05, |
|
"loss": 0.3449, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 35.66, |
|
"grad_norm": 0.37206390500068665, |
|
"learning_rate": 1.7319750665877482e-05, |
|
"loss": 0.3453, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 35.74, |
|
"grad_norm": 0.38183438777923584, |
|
"learning_rate": 1.729084973364901e-05, |
|
"loss": 0.3493, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"grad_norm": 0.3732428252696991, |
|
"learning_rate": 1.726194880142054e-05, |
|
"loss": 0.349, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 35.88, |
|
"grad_norm": 0.3665318787097931, |
|
"learning_rate": 1.7233047869192072e-05, |
|
"loss": 0.3482, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 35.96, |
|
"grad_norm": 0.3693431317806244, |
|
"learning_rate": 1.7204146936963597e-05, |
|
"loss": 0.3445, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 36.03, |
|
"grad_norm": 0.3580050766468048, |
|
"learning_rate": 1.7175246004735128e-05, |
|
"loss": 0.3469, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"grad_norm": 0.3874383568763733, |
|
"learning_rate": 1.714634507250666e-05, |
|
"loss": 0.3483, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 36.18, |
|
"grad_norm": 0.39490264654159546, |
|
"learning_rate": 1.7117444140278187e-05, |
|
"loss": 0.3479, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 36.25, |
|
"grad_norm": 0.38647031784057617, |
|
"learning_rate": 1.7088543208049718e-05, |
|
"loss": 0.3458, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 36.33, |
|
"grad_norm": 0.3899756073951721, |
|
"learning_rate": 1.705964227582125e-05, |
|
"loss": 0.3448, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"grad_norm": 0.4022426903247833, |
|
"learning_rate": 1.7030741343592777e-05, |
|
"loss": 0.3463, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 36.48, |
|
"grad_norm": 0.37726154923439026, |
|
"learning_rate": 1.700184041136431e-05, |
|
"loss": 0.3469, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"grad_norm": 0.4022297263145447, |
|
"learning_rate": 1.697293947913584e-05, |
|
"loss": 0.3457, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"grad_norm": 0.43506869673728943, |
|
"learning_rate": 1.6944038546907368e-05, |
|
"loss": 0.3467, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 36.7, |
|
"grad_norm": 0.4157175123691559, |
|
"learning_rate": 1.69151376146789e-05, |
|
"loss": 0.3443, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 36.77, |
|
"grad_norm": 0.4038371443748474, |
|
"learning_rate": 1.688623668245043e-05, |
|
"loss": 0.3408, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 36.85, |
|
"grad_norm": 0.3598155081272125, |
|
"learning_rate": 1.6857335750221958e-05, |
|
"loss": 0.3455, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"grad_norm": 0.3888005018234253, |
|
"learning_rate": 1.682843481799349e-05, |
|
"loss": 0.3465, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"grad_norm": 0.3933923840522766, |
|
"learning_rate": 1.679953388576502e-05, |
|
"loss": 0.3488, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 37.07, |
|
"grad_norm": 0.36610084772109985, |
|
"learning_rate": 1.677063295353655e-05, |
|
"loss": 0.3448, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"grad_norm": 0.3755366802215576, |
|
"learning_rate": 1.674173202130808e-05, |
|
"loss": 0.3475, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 37.22, |
|
"grad_norm": 0.3687468469142914, |
|
"learning_rate": 1.671283108907961e-05, |
|
"loss": 0.3441, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"grad_norm": 0.3150022327899933, |
|
"learning_rate": 1.668393015685114e-05, |
|
"loss": 0.3439, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 37.36, |
|
"grad_norm": 0.37440210580825806, |
|
"learning_rate": 1.665502922462267e-05, |
|
"loss": 0.3474, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 37.44, |
|
"grad_norm": 0.3700256943702698, |
|
"learning_rate": 1.66261282923942e-05, |
|
"loss": 0.3453, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 37.51, |
|
"grad_norm": 0.40626585483551025, |
|
"learning_rate": 1.659722736016573e-05, |
|
"loss": 0.345, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 37.59, |
|
"grad_norm": 0.39384424686431885, |
|
"learning_rate": 1.656832642793726e-05, |
|
"loss": 0.3465, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 37.66, |
|
"grad_norm": 0.38323110342025757, |
|
"learning_rate": 1.653942549570879e-05, |
|
"loss": 0.3458, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"grad_norm": 0.3620944619178772, |
|
"learning_rate": 1.651052456348032e-05, |
|
"loss": 0.3457, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"grad_norm": 0.3920278251171112, |
|
"learning_rate": 1.648162363125185e-05, |
|
"loss": 0.3433, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 37.88, |
|
"grad_norm": 0.3547744154930115, |
|
"learning_rate": 1.6452722699023382e-05, |
|
"loss": 0.3427, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 37.96, |
|
"grad_norm": 0.3048088252544403, |
|
"learning_rate": 1.642382176679491e-05, |
|
"loss": 0.3462, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"grad_norm": 0.3744346499443054, |
|
"learning_rate": 1.639492083456644e-05, |
|
"loss": 0.3451, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 38.1, |
|
"grad_norm": 0.3640407621860504, |
|
"learning_rate": 1.6366019902337972e-05, |
|
"loss": 0.3485, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 38.18, |
|
"grad_norm": 0.37335264682769775, |
|
"learning_rate": 1.63371189701095e-05, |
|
"loss": 0.3466, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"grad_norm": 0.3745587170124054, |
|
"learning_rate": 1.630821803788103e-05, |
|
"loss": 0.344, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"grad_norm": 0.462406724691391, |
|
"learning_rate": 1.6279317105652562e-05, |
|
"loss": 0.3432, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"grad_norm": 0.3359210193157196, |
|
"learning_rate": 1.625041617342409e-05, |
|
"loss": 0.3469, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 38.47, |
|
"grad_norm": 0.3449317514896393, |
|
"learning_rate": 1.622151524119562e-05, |
|
"loss": 0.3435, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 38.55, |
|
"grad_norm": 0.4265647232532501, |
|
"learning_rate": 1.6192614308967153e-05, |
|
"loss": 0.3458, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 38.62, |
|
"grad_norm": 0.40118905901908875, |
|
"learning_rate": 1.616371337673868e-05, |
|
"loss": 0.3461, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 38.69, |
|
"grad_norm": 0.36499762535095215, |
|
"learning_rate": 1.6134812444510212e-05, |
|
"loss": 0.3457, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"grad_norm": 0.37067875266075134, |
|
"learning_rate": 1.6105911512281743e-05, |
|
"loss": 0.3426, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 38.84, |
|
"grad_norm": 0.402778685092926, |
|
"learning_rate": 1.6077010580053268e-05, |
|
"loss": 0.3432, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 38.92, |
|
"grad_norm": 0.37418636679649353, |
|
"learning_rate": 1.60481096478248e-05, |
|
"loss": 0.347, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"grad_norm": 0.4147396981716156, |
|
"learning_rate": 1.601920871559633e-05, |
|
"loss": 0.3456, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 39.06, |
|
"grad_norm": 0.42823702096939087, |
|
"learning_rate": 1.5990307783367858e-05, |
|
"loss": 0.342, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 39.14, |
|
"grad_norm": 0.40999341011047363, |
|
"learning_rate": 1.596140685113939e-05, |
|
"loss": 0.3413, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 39.21, |
|
"grad_norm": 0.32551825046539307, |
|
"learning_rate": 1.593250591891092e-05, |
|
"loss": 0.3433, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"grad_norm": 0.3688596487045288, |
|
"learning_rate": 1.5903604986682448e-05, |
|
"loss": 0.3457, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"grad_norm": 0.39799386262893677, |
|
"learning_rate": 1.587470405445398e-05, |
|
"loss": 0.343, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 39.43, |
|
"grad_norm": 0.34967321157455444, |
|
"learning_rate": 1.584580312222551e-05, |
|
"loss": 0.3418, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 39.51, |
|
"grad_norm": 0.36091017723083496, |
|
"learning_rate": 1.581690218999704e-05, |
|
"loss": 0.3435, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 39.58, |
|
"grad_norm": 0.3361178934574127, |
|
"learning_rate": 1.578800125776857e-05, |
|
"loss": 0.3471, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"grad_norm": 0.36311858892440796, |
|
"learning_rate": 1.57591003255401e-05, |
|
"loss": 0.3442, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 39.73, |
|
"grad_norm": 0.37522685527801514, |
|
"learning_rate": 1.573019939331163e-05, |
|
"loss": 0.3432, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 39.8, |
|
"grad_norm": 0.42775389552116394, |
|
"learning_rate": 1.570129846108316e-05, |
|
"loss": 0.344, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 39.88, |
|
"grad_norm": 0.40960633754730225, |
|
"learning_rate": 1.567239752885469e-05, |
|
"loss": 0.3428, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"grad_norm": 0.35652443766593933, |
|
"learning_rate": 1.564349659662622e-05, |
|
"loss": 0.3435, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 40.03, |
|
"grad_norm": 0.41139841079711914, |
|
"learning_rate": 1.561459566439775e-05, |
|
"loss": 0.3436, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 40.1, |
|
"grad_norm": 0.3651288151741028, |
|
"learning_rate": 1.5585694732169282e-05, |
|
"loss": 0.3447, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 40.17, |
|
"grad_norm": 0.37484046816825867, |
|
"learning_rate": 1.555679379994081e-05, |
|
"loss": 0.3456, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 40.25, |
|
"grad_norm": 0.3306860625743866, |
|
"learning_rate": 1.552789286771234e-05, |
|
"loss": 0.3428, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"grad_norm": 0.37026843428611755, |
|
"learning_rate": 1.5498991935483872e-05, |
|
"loss": 0.3433, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"grad_norm": 0.3959224224090576, |
|
"learning_rate": 1.54700910032554e-05, |
|
"loss": 0.3453, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 40.47, |
|
"grad_norm": 0.3823704719543457, |
|
"learning_rate": 1.544119007102693e-05, |
|
"loss": 0.3413, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"grad_norm": 0.33115535974502563, |
|
"learning_rate": 1.5412289138798462e-05, |
|
"loss": 0.3442, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"grad_norm": 0.5036399364471436, |
|
"learning_rate": 1.538338820656999e-05, |
|
"loss": 0.3417, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 40.69, |
|
"grad_norm": 0.3805595934391022, |
|
"learning_rate": 1.535448727434152e-05, |
|
"loss": 0.3453, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 40.77, |
|
"grad_norm": 0.4390459656715393, |
|
"learning_rate": 1.5325586342113053e-05, |
|
"loss": 0.343, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 40.84, |
|
"grad_norm": 0.3673398792743683, |
|
"learning_rate": 1.5296685409884584e-05, |
|
"loss": 0.3402, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"grad_norm": 0.36677980422973633, |
|
"learning_rate": 1.5267784477656112e-05, |
|
"loss": 0.3418, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"grad_norm": 0.37628763914108276, |
|
"learning_rate": 1.5238883545427641e-05, |
|
"loss": 0.3423, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 41.06, |
|
"grad_norm": 0.3959880769252777, |
|
"learning_rate": 1.5209982613199171e-05, |
|
"loss": 0.3413, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"grad_norm": 0.35615137219429016, |
|
"learning_rate": 1.51810816809707e-05, |
|
"loss": 0.3438, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 41.21, |
|
"grad_norm": 0.4133353531360626, |
|
"learning_rate": 1.5152180748742232e-05, |
|
"loss": 0.3429, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 41.28, |
|
"grad_norm": 0.35143953561782837, |
|
"learning_rate": 1.5123279816513761e-05, |
|
"loss": 0.3437, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 41.36, |
|
"grad_norm": 0.37390509247779846, |
|
"learning_rate": 1.509437888428529e-05, |
|
"loss": 0.341, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"grad_norm": 0.39959460496902466, |
|
"learning_rate": 1.5065477952056822e-05, |
|
"loss": 0.3423, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"grad_norm": 0.3992210030555725, |
|
"learning_rate": 1.5036577019828352e-05, |
|
"loss": 0.3462, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 41.58, |
|
"grad_norm": 0.3677886724472046, |
|
"learning_rate": 1.5007676087599881e-05, |
|
"loss": 0.3427, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 41.65, |
|
"grad_norm": 0.3906817138195038, |
|
"learning_rate": 1.4978775155371412e-05, |
|
"loss": 0.3415, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 41.73, |
|
"grad_norm": 0.39412081241607666, |
|
"learning_rate": 1.4949874223142942e-05, |
|
"loss": 0.3434, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"grad_norm": 0.4120485782623291, |
|
"learning_rate": 1.4920973290914471e-05, |
|
"loss": 0.344, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 41.88, |
|
"grad_norm": 0.4464050829410553, |
|
"learning_rate": 1.4892072358686003e-05, |
|
"loss": 0.34, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 41.95, |
|
"grad_norm": 0.3044414520263672, |
|
"learning_rate": 1.4863171426457532e-05, |
|
"loss": 0.3417, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"grad_norm": 0.32554033398628235, |
|
"learning_rate": 1.4834270494229062e-05, |
|
"loss": 0.3417, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 42.1, |
|
"grad_norm": 0.3944820463657379, |
|
"learning_rate": 1.4805369562000593e-05, |
|
"loss": 0.3436, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 42.17, |
|
"grad_norm": 0.37802961468696594, |
|
"learning_rate": 1.4776468629772123e-05, |
|
"loss": 0.3439, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"grad_norm": 0.4089604616165161, |
|
"learning_rate": 1.4747567697543654e-05, |
|
"loss": 0.3408, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 42.32, |
|
"grad_norm": 0.37038755416870117, |
|
"learning_rate": 1.4718666765315183e-05, |
|
"loss": 0.3426, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 42.39, |
|
"grad_norm": 0.35514524579048157, |
|
"learning_rate": 1.4689765833086711e-05, |
|
"loss": 0.3451, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 42.47, |
|
"grad_norm": 0.4710882902145386, |
|
"learning_rate": 1.4660864900858242e-05, |
|
"loss": 0.3429, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 42.54, |
|
"grad_norm": 0.34002232551574707, |
|
"learning_rate": 1.4631963968629772e-05, |
|
"loss": 0.3418, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"grad_norm": 0.4424833059310913, |
|
"learning_rate": 1.4603063036401302e-05, |
|
"loss": 0.3425, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 42.69, |
|
"grad_norm": 0.4154003858566284, |
|
"learning_rate": 1.4574162104172833e-05, |
|
"loss": 0.3428, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 42.76, |
|
"grad_norm": 0.40983182191848755, |
|
"learning_rate": 1.4545261171944362e-05, |
|
"loss": 0.343, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 42.84, |
|
"grad_norm": 0.3568328320980072, |
|
"learning_rate": 1.4516360239715892e-05, |
|
"loss": 0.3387, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 42.91, |
|
"grad_norm": 0.3948083221912384, |
|
"learning_rate": 1.4487459307487423e-05, |
|
"loss": 0.3388, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"grad_norm": 0.3891525864601135, |
|
"learning_rate": 1.4458558375258953e-05, |
|
"loss": 0.3415, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 43.06, |
|
"grad_norm": 0.3312503695487976, |
|
"learning_rate": 1.4429657443030482e-05, |
|
"loss": 0.3407, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 43.13, |
|
"grad_norm": 0.37773850560188293, |
|
"learning_rate": 1.4400756510802013e-05, |
|
"loss": 0.3411, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 43.21, |
|
"grad_norm": 0.29978179931640625, |
|
"learning_rate": 1.4371855578573543e-05, |
|
"loss": 0.3423, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 43.28, |
|
"grad_norm": 0.4314406216144562, |
|
"learning_rate": 1.4342954646345072e-05, |
|
"loss": 0.3373, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"grad_norm": 0.3975353538990021, |
|
"learning_rate": 1.4314053714116604e-05, |
|
"loss": 0.3405, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"grad_norm": 0.35734960436820984, |
|
"learning_rate": 1.4285152781888133e-05, |
|
"loss": 0.3416, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 43.5, |
|
"grad_norm": 0.44908031821250916, |
|
"learning_rate": 1.4256251849659663e-05, |
|
"loss": 0.3392, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 43.58, |
|
"grad_norm": 0.3516298532485962, |
|
"learning_rate": 1.4227350917431194e-05, |
|
"loss": 0.3385, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 43.65, |
|
"grad_norm": 0.3821066915988922, |
|
"learning_rate": 1.4198449985202724e-05, |
|
"loss": 0.3448, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"grad_norm": 0.3824633061885834, |
|
"learning_rate": 1.4169549052974253e-05, |
|
"loss": 0.3415, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"grad_norm": 0.3336328864097595, |
|
"learning_rate": 1.4140648120745784e-05, |
|
"loss": 0.3403, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"grad_norm": 0.41100433468818665, |
|
"learning_rate": 1.4111747188517312e-05, |
|
"loss": 0.3428, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 43.95, |
|
"grad_norm": 0.38574787974357605, |
|
"learning_rate": 1.4082846256288842e-05, |
|
"loss": 0.3388, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"grad_norm": 0.3482591509819031, |
|
"learning_rate": 1.4053945324060373e-05, |
|
"loss": 0.3392, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 44.1, |
|
"grad_norm": 0.39932170510292053, |
|
"learning_rate": 1.4025044391831903e-05, |
|
"loss": 0.3385, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 44.17, |
|
"grad_norm": 0.3750057518482208, |
|
"learning_rate": 1.3996143459603432e-05, |
|
"loss": 0.3419, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 44.24, |
|
"grad_norm": 0.3343985974788666, |
|
"learning_rate": 1.3967242527374963e-05, |
|
"loss": 0.3398, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 44.32, |
|
"grad_norm": 0.32805758714675903, |
|
"learning_rate": 1.3938341595146493e-05, |
|
"loss": 0.339, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 44.39, |
|
"grad_norm": 0.376280814409256, |
|
"learning_rate": 1.3909440662918022e-05, |
|
"loss": 0.3407, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 44.47, |
|
"grad_norm": 0.27181968092918396, |
|
"learning_rate": 1.3880539730689554e-05, |
|
"loss": 0.3392, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 44.54, |
|
"grad_norm": 0.3351672887802124, |
|
"learning_rate": 1.3851638798461083e-05, |
|
"loss": 0.3416, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 44.61, |
|
"grad_norm": 0.3780210614204407, |
|
"learning_rate": 1.3822737866232613e-05, |
|
"loss": 0.3429, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 44.69, |
|
"grad_norm": 0.3951726257801056, |
|
"learning_rate": 1.3793836934004144e-05, |
|
"loss": 0.3407, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 44.76, |
|
"grad_norm": 0.3675825893878937, |
|
"learning_rate": 1.3764936001775674e-05, |
|
"loss": 0.339, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 44.84, |
|
"grad_norm": 0.3629719913005829, |
|
"learning_rate": 1.3736035069547205e-05, |
|
"loss": 0.3431, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"grad_norm": 0.4170212745666504, |
|
"learning_rate": 1.3707134137318734e-05, |
|
"loss": 0.3394, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 44.98, |
|
"grad_norm": 0.3839627206325531, |
|
"learning_rate": 1.3678233205090264e-05, |
|
"loss": 0.3422, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 45.06, |
|
"grad_norm": 0.4395899176597595, |
|
"learning_rate": 1.3649332272861795e-05, |
|
"loss": 0.3402, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"grad_norm": 0.37044864892959595, |
|
"learning_rate": 1.3620431340633325e-05, |
|
"loss": 0.342, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 45.21, |
|
"grad_norm": 0.4869653582572937, |
|
"learning_rate": 1.3591530408404854e-05, |
|
"loss": 0.3391, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"grad_norm": 0.416748046875, |
|
"learning_rate": 1.3562629476176384e-05, |
|
"loss": 0.3394, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 45.35, |
|
"grad_norm": 0.37101954221725464, |
|
"learning_rate": 1.3533728543947913e-05, |
|
"loss": 0.3425, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 45.43, |
|
"grad_norm": 0.3808073103427887, |
|
"learning_rate": 1.3504827611719443e-05, |
|
"loss": 0.3399, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 45.5, |
|
"grad_norm": 0.3832837641239166, |
|
"learning_rate": 1.3475926679490974e-05, |
|
"loss": 0.3413, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 45.58, |
|
"grad_norm": 0.3216901421546936, |
|
"learning_rate": 1.3447025747262504e-05, |
|
"loss": 0.3403, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"grad_norm": 0.36098387837409973, |
|
"learning_rate": 1.3418124815034033e-05, |
|
"loss": 0.3426, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 45.72, |
|
"grad_norm": 0.5177834033966064, |
|
"learning_rate": 1.3389223882805564e-05, |
|
"loss": 0.3411, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"grad_norm": 0.41095811128616333, |
|
"learning_rate": 1.3360322950577094e-05, |
|
"loss": 0.3381, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 45.87, |
|
"grad_norm": 0.38759657740592957, |
|
"learning_rate": 1.3331422018348624e-05, |
|
"loss": 0.3385, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"grad_norm": 0.34995037317276, |
|
"learning_rate": 1.3302521086120155e-05, |
|
"loss": 0.3387, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"grad_norm": 0.40866127610206604, |
|
"learning_rate": 1.3273620153891684e-05, |
|
"loss": 0.3406, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 46.09, |
|
"grad_norm": 0.40558964014053345, |
|
"learning_rate": 1.3244719221663214e-05, |
|
"loss": 0.34, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 46.17, |
|
"grad_norm": 0.3268815875053406, |
|
"learning_rate": 1.3215818289434745e-05, |
|
"loss": 0.3428, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 46.24, |
|
"grad_norm": 0.4113875925540924, |
|
"learning_rate": 1.3186917357206275e-05, |
|
"loss": 0.3397, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 46.32, |
|
"grad_norm": 0.3797847032546997, |
|
"learning_rate": 1.3158016424977804e-05, |
|
"loss": 0.3404, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 46.39, |
|
"grad_norm": 0.3348693251609802, |
|
"learning_rate": 1.3129115492749335e-05, |
|
"loss": 0.34, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 46.46, |
|
"grad_norm": 0.3879573941230774, |
|
"learning_rate": 1.3100214560520865e-05, |
|
"loss": 0.341, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 46.54, |
|
"grad_norm": 0.40568268299102783, |
|
"learning_rate": 1.3071313628292394e-05, |
|
"loss": 0.3387, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 46.61, |
|
"grad_norm": 0.4025332033634186, |
|
"learning_rate": 1.3042412696063926e-05, |
|
"loss": 0.3415, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 46.69, |
|
"grad_norm": 0.38457706570625305, |
|
"learning_rate": 1.3013511763835455e-05, |
|
"loss": 0.3372, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 46.76, |
|
"grad_norm": 0.36499732732772827, |
|
"learning_rate": 1.2984610831606983e-05, |
|
"loss": 0.3402, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 46.83, |
|
"grad_norm": 0.5976847410202026, |
|
"learning_rate": 1.2955709899378514e-05, |
|
"loss": 0.3402, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 46.91, |
|
"grad_norm": 0.38978490233421326, |
|
"learning_rate": 1.2926808967150044e-05, |
|
"loss": 0.3378, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 46.98, |
|
"grad_norm": 0.4110495448112488, |
|
"learning_rate": 1.2897908034921573e-05, |
|
"loss": 0.339, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"grad_norm": 0.37475350499153137, |
|
"learning_rate": 1.2869007102693105e-05, |
|
"loss": 0.3402, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"grad_norm": 0.34179574251174927, |
|
"learning_rate": 1.2840106170464634e-05, |
|
"loss": 0.3397, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"grad_norm": 0.3006201982498169, |
|
"learning_rate": 1.2811205238236164e-05, |
|
"loss": 0.342, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 47.28, |
|
"grad_norm": 0.38113319873809814, |
|
"learning_rate": 1.2782304306007695e-05, |
|
"loss": 0.341, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 47.35, |
|
"grad_norm": 0.48638325929641724, |
|
"learning_rate": 1.2753403373779225e-05, |
|
"loss": 0.3403, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 47.43, |
|
"grad_norm": 0.38058334589004517, |
|
"learning_rate": 1.2724502441550754e-05, |
|
"loss": 0.3401, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"grad_norm": 0.3784169554710388, |
|
"learning_rate": 1.2695601509322285e-05, |
|
"loss": 0.3378, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 47.57, |
|
"grad_norm": 0.3679274916648865, |
|
"learning_rate": 1.2666700577093815e-05, |
|
"loss": 0.3388, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 47.65, |
|
"grad_norm": 0.4005269706249237, |
|
"learning_rate": 1.2637799644865346e-05, |
|
"loss": 0.3382, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 47.72, |
|
"grad_norm": 0.36187744140625, |
|
"learning_rate": 1.2608898712636876e-05, |
|
"loss": 0.3372, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"grad_norm": 0.4188271164894104, |
|
"learning_rate": 1.2579997780408405e-05, |
|
"loss": 0.339, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"grad_norm": 0.40314343571662903, |
|
"learning_rate": 1.2551096848179936e-05, |
|
"loss": 0.3404, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"grad_norm": 0.4233045279979706, |
|
"learning_rate": 1.2522195915951466e-05, |
|
"loss": 0.3398, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"grad_norm": 0.3816595673561096, |
|
"learning_rate": 1.2493294983722996e-05, |
|
"loss": 0.3382, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 48.09, |
|
"grad_norm": 0.4000893831253052, |
|
"learning_rate": 1.2464394051494527e-05, |
|
"loss": 0.3402, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"grad_norm": 0.4130527377128601, |
|
"learning_rate": 1.2435493119266055e-05, |
|
"loss": 0.3394, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 48.24, |
|
"grad_norm": 0.38113564252853394, |
|
"learning_rate": 1.2406592187037584e-05, |
|
"loss": 0.3365, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 48.31, |
|
"grad_norm": 0.346966415643692, |
|
"learning_rate": 1.2377691254809115e-05, |
|
"loss": 0.3395, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"grad_norm": 0.4276494085788727, |
|
"learning_rate": 1.2348790322580645e-05, |
|
"loss": 0.3362, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 48.46, |
|
"grad_norm": 0.39347225427627563, |
|
"learning_rate": 1.2319889390352175e-05, |
|
"loss": 0.3422, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 48.54, |
|
"grad_norm": 0.3483811616897583, |
|
"learning_rate": 1.2290988458123706e-05, |
|
"loss": 0.3395, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 48.61, |
|
"grad_norm": 0.36153608560562134, |
|
"learning_rate": 1.2262087525895235e-05, |
|
"loss": 0.3365, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 48.68, |
|
"grad_norm": 0.39289888739585876, |
|
"learning_rate": 1.2233186593666765e-05, |
|
"loss": 0.3421, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 48.76, |
|
"grad_norm": 0.4176575839519501, |
|
"learning_rate": 1.2204285661438296e-05, |
|
"loss": 0.3364, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 48.83, |
|
"grad_norm": 0.3840237855911255, |
|
"learning_rate": 1.2175384729209826e-05, |
|
"loss": 0.3357, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 48.91, |
|
"grad_norm": 0.44171571731567383, |
|
"learning_rate": 1.2146483796981355e-05, |
|
"loss": 0.3413, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 48.98, |
|
"grad_norm": 0.42055392265319824, |
|
"learning_rate": 1.2117582864752886e-05, |
|
"loss": 0.335, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 49.05, |
|
"grad_norm": 0.44252675771713257, |
|
"learning_rate": 1.2088681932524416e-05, |
|
"loss": 0.3385, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 49.13, |
|
"grad_norm": 0.378095805644989, |
|
"learning_rate": 1.2059781000295946e-05, |
|
"loss": 0.3413, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"grad_norm": 0.3892216980457306, |
|
"learning_rate": 1.2030880068067477e-05, |
|
"loss": 0.3374, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 49.27, |
|
"grad_norm": 0.3788653612136841, |
|
"learning_rate": 1.2001979135839006e-05, |
|
"loss": 0.3398, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 49.35, |
|
"grad_norm": 0.38030126690864563, |
|
"learning_rate": 1.1973078203610536e-05, |
|
"loss": 0.3375, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 49.42, |
|
"grad_norm": 0.4031144082546234, |
|
"learning_rate": 1.1944177271382067e-05, |
|
"loss": 0.3398, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 49.5, |
|
"grad_norm": 0.3956519663333893, |
|
"learning_rate": 1.1915276339153597e-05, |
|
"loss": 0.3374, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"grad_norm": 0.3961743414402008, |
|
"learning_rate": 1.1886375406925124e-05, |
|
"loss": 0.3349, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 49.64, |
|
"grad_norm": 0.3616986572742462, |
|
"learning_rate": 1.1857474474696656e-05, |
|
"loss": 0.3374, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 49.72, |
|
"grad_norm": 0.36143624782562256, |
|
"learning_rate": 1.1828573542468185e-05, |
|
"loss": 0.3361, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 49.79, |
|
"grad_norm": 0.389981746673584, |
|
"learning_rate": 1.1799672610239715e-05, |
|
"loss": 0.3378, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 49.87, |
|
"grad_norm": 0.4078088104724884, |
|
"learning_rate": 1.1770771678011246e-05, |
|
"loss": 0.3382, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 49.94, |
|
"grad_norm": 0.3802012801170349, |
|
"learning_rate": 1.1741870745782776e-05, |
|
"loss": 0.3356, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"grad_norm": 0.46680396795272827, |
|
"learning_rate": 1.1712969813554305e-05, |
|
"loss": 0.339, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 50.09, |
|
"grad_norm": 0.45273512601852417, |
|
"learning_rate": 1.1684068881325836e-05, |
|
"loss": 0.3381, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 50.16, |
|
"grad_norm": 0.3563522398471832, |
|
"learning_rate": 1.1655167949097366e-05, |
|
"loss": 0.3371, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 50.24, |
|
"grad_norm": 0.43655216693878174, |
|
"learning_rate": 1.1626267016868895e-05, |
|
"loss": 0.3354, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 50.31, |
|
"grad_norm": 0.3371317982673645, |
|
"learning_rate": 1.1597366084640427e-05, |
|
"loss": 0.3374, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 50.38, |
|
"grad_norm": 0.39056339859962463, |
|
"learning_rate": 1.1568465152411956e-05, |
|
"loss": 0.3376, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 50.46, |
|
"grad_norm": 0.40476441383361816, |
|
"learning_rate": 1.1539564220183487e-05, |
|
"loss": 0.3381, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"grad_norm": 0.3706866502761841, |
|
"learning_rate": 1.1510663287955017e-05, |
|
"loss": 0.3355, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 50.61, |
|
"grad_norm": 0.43677544593811035, |
|
"learning_rate": 1.1481762355726547e-05, |
|
"loss": 0.3352, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 50.68, |
|
"grad_norm": 0.3938286602497101, |
|
"learning_rate": 1.1452861423498078e-05, |
|
"loss": 0.3375, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 50.75, |
|
"grad_norm": 0.3463038504123688, |
|
"learning_rate": 1.1423960491269607e-05, |
|
"loss": 0.3379, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 50.83, |
|
"grad_norm": 0.3810366988182068, |
|
"learning_rate": 1.1395059559041137e-05, |
|
"loss": 0.3367, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 50.9, |
|
"grad_norm": 0.3845095932483673, |
|
"learning_rate": 1.1366158626812668e-05, |
|
"loss": 0.3366, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"grad_norm": 0.5161909461021423, |
|
"learning_rate": 1.1337257694584198e-05, |
|
"loss": 0.3382, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 51.05, |
|
"grad_norm": 0.4319625794887543, |
|
"learning_rate": 1.1308356762355726e-05, |
|
"loss": 0.3359, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 51.12, |
|
"grad_norm": 0.34908732771873474, |
|
"learning_rate": 1.1279455830127257e-05, |
|
"loss": 0.3353, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"grad_norm": 0.38367515802383423, |
|
"learning_rate": 1.1250554897898786e-05, |
|
"loss": 0.3367, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 51.27, |
|
"grad_norm": 0.3939116597175598, |
|
"learning_rate": 1.1221653965670316e-05, |
|
"loss": 0.3374, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 51.35, |
|
"grad_norm": 0.44843488931655884, |
|
"learning_rate": 1.1192753033441847e-05, |
|
"loss": 0.3376, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 51.42, |
|
"grad_norm": 0.4169764816761017, |
|
"learning_rate": 1.1163852101213377e-05, |
|
"loss": 0.3385, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 51.49, |
|
"grad_norm": 0.3487055003643036, |
|
"learning_rate": 1.1134951168984906e-05, |
|
"loss": 0.3372, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 51.57, |
|
"grad_norm": 0.3876706063747406, |
|
"learning_rate": 1.1106050236756437e-05, |
|
"loss": 0.3379, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 51.64, |
|
"grad_norm": 0.33344700932502747, |
|
"learning_rate": 1.1077149304527967e-05, |
|
"loss": 0.3389, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 51.72, |
|
"grad_norm": 0.41183948516845703, |
|
"learning_rate": 1.1048248372299497e-05, |
|
"loss": 0.3363, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 51.79, |
|
"grad_norm": 0.3549967110157013, |
|
"learning_rate": 1.1019347440071028e-05, |
|
"loss": 0.3374, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 51.86, |
|
"grad_norm": 0.4144490659236908, |
|
"learning_rate": 1.0990446507842557e-05, |
|
"loss": 0.3347, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 51.94, |
|
"grad_norm": 0.3781343400478363, |
|
"learning_rate": 1.0961545575614087e-05, |
|
"loss": 0.3365, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"grad_norm": 0.4050437808036804, |
|
"learning_rate": 1.0932644643385618e-05, |
|
"loss": 0.3384, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 52.09, |
|
"grad_norm": 0.3758808374404907, |
|
"learning_rate": 1.0903743711157148e-05, |
|
"loss": 0.3382, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 52.16, |
|
"grad_norm": 0.456534206867218, |
|
"learning_rate": 1.0874842778928677e-05, |
|
"loss": 0.3392, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 52.23, |
|
"grad_norm": 0.38857728242874146, |
|
"learning_rate": 1.0845941846700208e-05, |
|
"loss": 0.3374, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 52.31, |
|
"grad_norm": 0.39419788122177124, |
|
"learning_rate": 1.0817040914471738e-05, |
|
"loss": 0.3374, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 52.38, |
|
"grad_norm": 0.41852855682373047, |
|
"learning_rate": 1.0788139982243268e-05, |
|
"loss": 0.3335, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 52.46, |
|
"grad_norm": 0.3561359941959381, |
|
"learning_rate": 1.0759239050014797e-05, |
|
"loss": 0.3359, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 52.53, |
|
"grad_norm": 0.3975025713443756, |
|
"learning_rate": 1.0730338117786327e-05, |
|
"loss": 0.336, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 52.6, |
|
"grad_norm": 0.39150169491767883, |
|
"learning_rate": 1.0701437185557856e-05, |
|
"loss": 0.337, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 52.68, |
|
"grad_norm": 0.404354453086853, |
|
"learning_rate": 1.0672536253329387e-05, |
|
"loss": 0.3378, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 52.75, |
|
"grad_norm": 0.3414269685745239, |
|
"learning_rate": 1.0643635321100917e-05, |
|
"loss": 0.3338, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"grad_norm": 0.4378945827484131, |
|
"learning_rate": 1.0614734388872446e-05, |
|
"loss": 0.3369, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"grad_norm": 0.5136425495147705, |
|
"learning_rate": 1.0585833456643978e-05, |
|
"loss": 0.3348, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 52.97, |
|
"grad_norm": 0.3793259263038635, |
|
"learning_rate": 1.0556932524415507e-05, |
|
"loss": 0.3354, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 53.05, |
|
"grad_norm": 0.3828275203704834, |
|
"learning_rate": 1.0528031592187039e-05, |
|
"loss": 0.3348, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"grad_norm": 0.380776971578598, |
|
"learning_rate": 1.0499130659958568e-05, |
|
"loss": 0.3375, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"grad_norm": 0.40259137749671936, |
|
"learning_rate": 1.0470229727730098e-05, |
|
"loss": 0.3366, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 53.27, |
|
"grad_norm": 0.3794288635253906, |
|
"learning_rate": 1.0441328795501629e-05, |
|
"loss": 0.3343, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 53.34, |
|
"grad_norm": 0.44558051228523254, |
|
"learning_rate": 1.0412427863273158e-05, |
|
"loss": 0.3355, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 53.42, |
|
"grad_norm": 0.42926931381225586, |
|
"learning_rate": 1.0383526931044688e-05, |
|
"loss": 0.3368, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 53.49, |
|
"grad_norm": 0.3846406936645508, |
|
"learning_rate": 1.035462599881622e-05, |
|
"loss": 0.3363, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 53.57, |
|
"grad_norm": 0.43000903725624084, |
|
"learning_rate": 1.0325725066587749e-05, |
|
"loss": 0.3338, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 53.64, |
|
"grad_norm": 0.42310836911201477, |
|
"learning_rate": 1.0296824134359278e-05, |
|
"loss": 0.336, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 53.71, |
|
"grad_norm": 0.3451327681541443, |
|
"learning_rate": 1.026792320213081e-05, |
|
"loss": 0.3384, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 53.79, |
|
"grad_norm": 0.4068630337715149, |
|
"learning_rate": 1.0239022269902339e-05, |
|
"loss": 0.3389, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 53.86, |
|
"grad_norm": 0.36988091468811035, |
|
"learning_rate": 1.0210121337673869e-05, |
|
"loss": 0.3368, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 53.94, |
|
"grad_norm": 0.37670448422431946, |
|
"learning_rate": 1.0181220405445398e-05, |
|
"loss": 0.3361, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"grad_norm": 0.4235304296016693, |
|
"learning_rate": 1.0152319473216928e-05, |
|
"loss": 0.3339, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 54.08, |
|
"grad_norm": 0.4179520606994629, |
|
"learning_rate": 1.0123418540988457e-05, |
|
"loss": 0.3372, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 54.16, |
|
"grad_norm": 0.3763734996318817, |
|
"learning_rate": 1.0094517608759988e-05, |
|
"loss": 0.3368, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 54.23, |
|
"grad_norm": 0.4098796844482422, |
|
"learning_rate": 1.0065616676531518e-05, |
|
"loss": 0.3326, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 54.31, |
|
"grad_norm": 0.41570228338241577, |
|
"learning_rate": 1.0036715744303048e-05, |
|
"loss": 0.3366, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 54.38, |
|
"grad_norm": 0.38217049837112427, |
|
"learning_rate": 1.0007814812074579e-05, |
|
"loss": 0.3338, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 54.45, |
|
"grad_norm": 0.36770564317703247, |
|
"learning_rate": 9.978913879846108e-06, |
|
"loss": 0.3323, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 54.53, |
|
"grad_norm": 0.43568935990333557, |
|
"learning_rate": 9.950012947617638e-06, |
|
"loss": 0.3361, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 54.6, |
|
"grad_norm": 0.47602441906929016, |
|
"learning_rate": 9.921112015389169e-06, |
|
"loss": 0.3349, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 54.68, |
|
"grad_norm": 0.4022866487503052, |
|
"learning_rate": 9.892211083160699e-06, |
|
"loss": 0.3347, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 54.75, |
|
"grad_norm": 0.3981685936450958, |
|
"learning_rate": 9.863310150932228e-06, |
|
"loss": 0.3351, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 54.82, |
|
"grad_norm": 0.3706594705581665, |
|
"learning_rate": 9.83440921870376e-06, |
|
"loss": 0.3342, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 54.9, |
|
"grad_norm": 0.36316171288490295, |
|
"learning_rate": 9.805508286475289e-06, |
|
"loss": 0.337, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 54.97, |
|
"grad_norm": 0.3705138564109802, |
|
"learning_rate": 9.776607354246819e-06, |
|
"loss": 0.3358, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 55.05, |
|
"grad_norm": 0.4264328181743622, |
|
"learning_rate": 9.74770642201835e-06, |
|
"loss": 0.3349, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 55.12, |
|
"grad_norm": 0.39624592661857605, |
|
"learning_rate": 9.71880548978988e-06, |
|
"loss": 0.3327, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 55.19, |
|
"grad_norm": 0.41520076990127563, |
|
"learning_rate": 9.689904557561409e-06, |
|
"loss": 0.3363, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 55.27, |
|
"grad_norm": 0.37249574065208435, |
|
"learning_rate": 9.66100362533294e-06, |
|
"loss": 0.335, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 55.34, |
|
"grad_norm": 0.42657721042633057, |
|
"learning_rate": 9.632102693104468e-06, |
|
"loss": 0.3353, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 55.42, |
|
"grad_norm": 0.3780669569969177, |
|
"learning_rate": 9.603201760875998e-06, |
|
"loss": 0.337, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 55.49, |
|
"grad_norm": 0.3783871829509735, |
|
"learning_rate": 9.574300828647529e-06, |
|
"loss": 0.3348, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"grad_norm": 0.4328089952468872, |
|
"learning_rate": 9.545399896419058e-06, |
|
"loss": 0.3366, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 55.64, |
|
"grad_norm": 0.3957238793373108, |
|
"learning_rate": 9.516498964190588e-06, |
|
"loss": 0.3344, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 55.71, |
|
"grad_norm": 0.3606773614883423, |
|
"learning_rate": 9.487598031962119e-06, |
|
"loss": 0.3342, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 55.79, |
|
"grad_norm": 0.4170531928539276, |
|
"learning_rate": 9.458697099733649e-06, |
|
"loss": 0.3349, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 55.86, |
|
"grad_norm": 0.3830915093421936, |
|
"learning_rate": 9.42979616750518e-06, |
|
"loss": 0.3371, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 55.93, |
|
"grad_norm": 0.4350239634513855, |
|
"learning_rate": 9.40089523527671e-06, |
|
"loss": 0.3377, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"grad_norm": 0.37382885813713074, |
|
"learning_rate": 9.371994303048239e-06, |
|
"loss": 0.3362, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 56.08, |
|
"grad_norm": 0.3806856870651245, |
|
"learning_rate": 9.34309337081977e-06, |
|
"loss": 0.3347, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 56.16, |
|
"grad_norm": 0.3189554214477539, |
|
"learning_rate": 9.3141924385913e-06, |
|
"loss": 0.3363, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 56.23, |
|
"grad_norm": 0.33894240856170654, |
|
"learning_rate": 9.28529150636283e-06, |
|
"loss": 0.3362, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 56.3, |
|
"grad_norm": 0.4565516710281372, |
|
"learning_rate": 9.25639057413436e-06, |
|
"loss": 0.3331, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 56.38, |
|
"grad_norm": 0.4101388156414032, |
|
"learning_rate": 9.22748964190589e-06, |
|
"loss": 0.335, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"grad_norm": 0.40449845790863037, |
|
"learning_rate": 9.19858870967742e-06, |
|
"loss": 0.3337, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"grad_norm": 0.47349539399147034, |
|
"learning_rate": 9.16968777744895e-06, |
|
"loss": 0.3328, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"grad_norm": 0.42848438024520874, |
|
"learning_rate": 9.14078684522048e-06, |
|
"loss": 0.334, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 56.67, |
|
"grad_norm": 0.3625510334968567, |
|
"learning_rate": 9.11188591299201e-06, |
|
"loss": 0.3321, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 56.75, |
|
"grad_norm": 0.3561297357082367, |
|
"learning_rate": 9.082984980763541e-06, |
|
"loss": 0.3349, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"grad_norm": 0.3738841414451599, |
|
"learning_rate": 9.054084048535069e-06, |
|
"loss": 0.3366, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 56.9, |
|
"grad_norm": 0.33738991618156433, |
|
"learning_rate": 9.025183116306599e-06, |
|
"loss": 0.3327, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 56.97, |
|
"grad_norm": 0.42749759554862976, |
|
"learning_rate": 8.99628218407813e-06, |
|
"loss": 0.336, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 57.04, |
|
"grad_norm": 0.4089387059211731, |
|
"learning_rate": 8.96738125184966e-06, |
|
"loss": 0.3334, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 57.12, |
|
"grad_norm": 0.3684140145778656, |
|
"learning_rate": 8.938480319621189e-06, |
|
"loss": 0.3345, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 57.19, |
|
"grad_norm": 0.3694292902946472, |
|
"learning_rate": 8.90957938739272e-06, |
|
"loss": 0.3333, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 57.27, |
|
"grad_norm": 0.31505081057548523, |
|
"learning_rate": 8.88067845516425e-06, |
|
"loss": 0.3339, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 57.34, |
|
"grad_norm": 0.4051445722579956, |
|
"learning_rate": 8.85177752293578e-06, |
|
"loss": 0.3348, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 57.41, |
|
"grad_norm": 0.426145076751709, |
|
"learning_rate": 8.82287659070731e-06, |
|
"loss": 0.3307, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 57.49, |
|
"grad_norm": 0.4356764256954193, |
|
"learning_rate": 8.79397565847884e-06, |
|
"loss": 0.3336, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 57.56, |
|
"grad_norm": 0.39635592699050903, |
|
"learning_rate": 8.76507472625037e-06, |
|
"loss": 0.3355, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 57.64, |
|
"grad_norm": 0.4467043876647949, |
|
"learning_rate": 8.7361737940219e-06, |
|
"loss": 0.3369, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 57.71, |
|
"grad_norm": 0.5042401552200317, |
|
"learning_rate": 8.70727286179343e-06, |
|
"loss": 0.3352, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 57.78, |
|
"grad_norm": 0.38742733001708984, |
|
"learning_rate": 8.67837192956496e-06, |
|
"loss": 0.3349, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 57.86, |
|
"grad_norm": 0.35748493671417236, |
|
"learning_rate": 8.649470997336491e-06, |
|
"loss": 0.3331, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 57.93, |
|
"grad_norm": 0.406547486782074, |
|
"learning_rate": 8.62057006510802e-06, |
|
"loss": 0.3345, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"grad_norm": 0.37016528844833374, |
|
"learning_rate": 8.59166913287955e-06, |
|
"loss": 0.3338, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 58.08, |
|
"grad_norm": 0.39589524269104004, |
|
"learning_rate": 8.562768200651081e-06, |
|
"loss": 0.3334, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 58.15, |
|
"grad_norm": 0.42654627561569214, |
|
"learning_rate": 8.533867268422611e-06, |
|
"loss": 0.3336, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 58.23, |
|
"grad_norm": 0.4174553453922272, |
|
"learning_rate": 8.504966336194139e-06, |
|
"loss": 0.3339, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 58.3, |
|
"grad_norm": 0.43379977345466614, |
|
"learning_rate": 8.47606540396567e-06, |
|
"loss": 0.3329, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 58.38, |
|
"grad_norm": 0.3706502914428711, |
|
"learning_rate": 8.4471644717372e-06, |
|
"loss": 0.332, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 58.45, |
|
"grad_norm": 0.4529905319213867, |
|
"learning_rate": 8.41826353950873e-06, |
|
"loss": 0.3342, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 58.52, |
|
"grad_norm": 0.4060870110988617, |
|
"learning_rate": 8.38936260728026e-06, |
|
"loss": 0.3331, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 58.6, |
|
"grad_norm": 0.4102860689163208, |
|
"learning_rate": 8.36046167505179e-06, |
|
"loss": 0.3339, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 58.67, |
|
"grad_norm": 0.38025009632110596, |
|
"learning_rate": 8.331560742823321e-06, |
|
"loss": 0.3334, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 58.75, |
|
"grad_norm": 0.3559959828853607, |
|
"learning_rate": 8.30265981059485e-06, |
|
"loss": 0.334, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"grad_norm": 0.48199519515037537, |
|
"learning_rate": 8.27375887836638e-06, |
|
"loss": 0.3328, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 58.89, |
|
"grad_norm": 0.40932905673980713, |
|
"learning_rate": 8.244857946137912e-06, |
|
"loss": 0.3314, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"grad_norm": 0.4070405960083008, |
|
"learning_rate": 8.215957013909441e-06, |
|
"loss": 0.3354, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 59.04, |
|
"grad_norm": 0.392281711101532, |
|
"learning_rate": 8.18705608168097e-06, |
|
"loss": 0.3324, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 59.12, |
|
"grad_norm": 0.38242244720458984, |
|
"learning_rate": 8.158155149452502e-06, |
|
"loss": 0.3313, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 59.19, |
|
"grad_norm": 0.4169810712337494, |
|
"learning_rate": 8.129254217224031e-06, |
|
"loss": 0.3354, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 59.26, |
|
"grad_norm": 0.335362046957016, |
|
"learning_rate": 8.100353284995561e-06, |
|
"loss": 0.3312, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 59.34, |
|
"grad_norm": 0.41095077991485596, |
|
"learning_rate": 8.071452352767092e-06, |
|
"loss": 0.3331, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 59.41, |
|
"grad_norm": 0.39492741227149963, |
|
"learning_rate": 8.042551420538622e-06, |
|
"loss": 0.3314, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 59.49, |
|
"grad_norm": 0.42789730429649353, |
|
"learning_rate": 8.013650488310151e-06, |
|
"loss": 0.333, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 59.56, |
|
"grad_norm": 0.35511842370033264, |
|
"learning_rate": 7.984749556081683e-06, |
|
"loss": 0.3346, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 59.63, |
|
"grad_norm": 0.36928626894950867, |
|
"learning_rate": 7.95584862385321e-06, |
|
"loss": 0.335, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 59.71, |
|
"grad_norm": 0.4076744318008423, |
|
"learning_rate": 7.92694769162474e-06, |
|
"loss": 0.3294, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 59.78, |
|
"grad_norm": 0.35494473576545715, |
|
"learning_rate": 7.898046759396271e-06, |
|
"loss": 0.3336, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 59.85, |
|
"grad_norm": 0.3991703689098358, |
|
"learning_rate": 7.8691458271678e-06, |
|
"loss": 0.3294, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 59.93, |
|
"grad_norm": 0.3891808092594147, |
|
"learning_rate": 7.84024489493933e-06, |
|
"loss": 0.3349, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.5921450257301331, |
|
"learning_rate": 7.811343962710861e-06, |
|
"loss": 0.3331, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 60.08, |
|
"grad_norm": 0.387185275554657, |
|
"learning_rate": 7.782443030482391e-06, |
|
"loss": 0.3326, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 60.15, |
|
"grad_norm": 0.5411362648010254, |
|
"learning_rate": 7.75354209825392e-06, |
|
"loss": 0.3303, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 60.22, |
|
"grad_norm": 0.35113802552223206, |
|
"learning_rate": 7.724641166025452e-06, |
|
"loss": 0.3343, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 60.3, |
|
"grad_norm": 0.3711684048175812, |
|
"learning_rate": 7.695740233796981e-06, |
|
"loss": 0.3316, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 60.37, |
|
"grad_norm": 0.40576910972595215, |
|
"learning_rate": 7.666839301568511e-06, |
|
"loss": 0.3344, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 60.45, |
|
"grad_norm": 0.4487907588481903, |
|
"learning_rate": 7.637938369340042e-06, |
|
"loss": 0.3337, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 60.52, |
|
"grad_norm": 0.4065958857536316, |
|
"learning_rate": 7.609037437111572e-06, |
|
"loss": 0.3314, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 60.59, |
|
"grad_norm": 0.4283113479614258, |
|
"learning_rate": 7.580136504883102e-06, |
|
"loss": 0.3337, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 60.67, |
|
"grad_norm": 0.4433044493198395, |
|
"learning_rate": 7.5512355726546325e-06, |
|
"loss": 0.3317, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 60.74, |
|
"grad_norm": 0.38607364892959595, |
|
"learning_rate": 7.522334640426161e-06, |
|
"loss": 0.333, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 60.82, |
|
"grad_norm": 0.45367687940597534, |
|
"learning_rate": 7.4934337081976916e-06, |
|
"loss": 0.3298, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 60.89, |
|
"grad_norm": 0.4054895043373108, |
|
"learning_rate": 7.464532775969222e-06, |
|
"loss": 0.3318, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 60.96, |
|
"grad_norm": 0.41600409150123596, |
|
"learning_rate": 7.4356318437407515e-06, |
|
"loss": 0.3313, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 61.04, |
|
"grad_norm": 0.4171212911605835, |
|
"learning_rate": 7.406730911512282e-06, |
|
"loss": 0.3318, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"grad_norm": 0.40264466404914856, |
|
"learning_rate": 7.377829979283812e-06, |
|
"loss": 0.3335, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 61.19, |
|
"grad_norm": 0.37919875979423523, |
|
"learning_rate": 7.348929047055342e-06, |
|
"loss": 0.3324, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 61.26, |
|
"grad_norm": 0.47246700525283813, |
|
"learning_rate": 7.320028114826872e-06, |
|
"loss": 0.3341, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 61.33, |
|
"grad_norm": 0.4305689036846161, |
|
"learning_rate": 7.291127182598403e-06, |
|
"loss": 0.3335, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 61.41, |
|
"grad_norm": 0.38494426012039185, |
|
"learning_rate": 7.262226250369932e-06, |
|
"loss": 0.3337, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 61.48, |
|
"grad_norm": 0.45139452815055847, |
|
"learning_rate": 7.233325318141462e-06, |
|
"loss": 0.3322, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 61.56, |
|
"grad_norm": 0.4199995994567871, |
|
"learning_rate": 7.204424385912992e-06, |
|
"loss": 0.3302, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 61.63, |
|
"grad_norm": 0.3823252022266388, |
|
"learning_rate": 7.175523453684522e-06, |
|
"loss": 0.333, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 61.7, |
|
"grad_norm": 0.38762542605400085, |
|
"learning_rate": 7.146622521456052e-06, |
|
"loss": 0.3338, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 61.78, |
|
"grad_norm": 0.3889346718788147, |
|
"learning_rate": 7.117721589227582e-06, |
|
"loss": 0.3333, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 61.85, |
|
"grad_norm": 0.43703803420066833, |
|
"learning_rate": 7.088820656999113e-06, |
|
"loss": 0.3313, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 61.93, |
|
"grad_norm": 0.37083032727241516, |
|
"learning_rate": 7.059919724770642e-06, |
|
"loss": 0.3327, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"grad_norm": 0.431436687707901, |
|
"learning_rate": 7.031018792542173e-06, |
|
"loss": 0.3275, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 62.07, |
|
"grad_norm": 0.38710957765579224, |
|
"learning_rate": 7.002117860313703e-06, |
|
"loss": 0.3315, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 62.15, |
|
"grad_norm": 0.4548743963241577, |
|
"learning_rate": 6.973216928085232e-06, |
|
"loss": 0.3314, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 62.22, |
|
"grad_norm": 0.4413709342479706, |
|
"learning_rate": 6.944315995856762e-06, |
|
"loss": 0.3317, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 62.3, |
|
"grad_norm": 0.42544716596603394, |
|
"learning_rate": 6.915415063628293e-06, |
|
"loss": 0.3327, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 62.37, |
|
"grad_norm": 0.4307864010334015, |
|
"learning_rate": 6.886514131399822e-06, |
|
"loss": 0.3335, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 62.44, |
|
"grad_norm": 0.4296441376209259, |
|
"learning_rate": 6.8576131991713526e-06, |
|
"loss": 0.3317, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 62.52, |
|
"grad_norm": 0.3624299466609955, |
|
"learning_rate": 6.828712266942883e-06, |
|
"loss": 0.3307, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 62.59, |
|
"grad_norm": 0.4123700261116028, |
|
"learning_rate": 6.7998113347144125e-06, |
|
"loss": 0.3317, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 62.67, |
|
"grad_norm": 0.4546355903148651, |
|
"learning_rate": 6.770910402485943e-06, |
|
"loss": 0.3288, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 62.74, |
|
"grad_norm": 0.4328787922859192, |
|
"learning_rate": 6.742009470257473e-06, |
|
"loss": 0.3321, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 62.81, |
|
"grad_norm": 0.39879125356674194, |
|
"learning_rate": 6.713108538029003e-06, |
|
"loss": 0.334, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 62.89, |
|
"grad_norm": 0.42407459020614624, |
|
"learning_rate": 6.684207605800532e-06, |
|
"loss": 0.3312, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"grad_norm": 0.5664127469062805, |
|
"learning_rate": 6.655306673572063e-06, |
|
"loss": 0.3323, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 63.04, |
|
"grad_norm": 0.47169846296310425, |
|
"learning_rate": 6.626405741343592e-06, |
|
"loss": 0.3309, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 63.11, |
|
"grad_norm": 0.3552204668521881, |
|
"learning_rate": 6.597504809115123e-06, |
|
"loss": 0.33, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 63.18, |
|
"grad_norm": 0.44585150480270386, |
|
"learning_rate": 6.568603876886653e-06, |
|
"loss": 0.3306, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 63.26, |
|
"grad_norm": 0.4512608051300049, |
|
"learning_rate": 6.5397029446581835e-06, |
|
"loss": 0.3308, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"grad_norm": 0.40121740102767944, |
|
"learning_rate": 6.510802012429713e-06, |
|
"loss": 0.3302, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 63.41, |
|
"grad_norm": 0.4354041516780853, |
|
"learning_rate": 6.481901080201243e-06, |
|
"loss": 0.3327, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 63.48, |
|
"grad_norm": 0.4612290561199188, |
|
"learning_rate": 6.453000147972774e-06, |
|
"loss": 0.3311, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 63.55, |
|
"grad_norm": 0.4508548676967621, |
|
"learning_rate": 6.424099215744303e-06, |
|
"loss": 0.3312, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 63.63, |
|
"grad_norm": 0.4045092761516571, |
|
"learning_rate": 6.395198283515833e-06, |
|
"loss": 0.3313, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 63.7, |
|
"grad_norm": 0.4180326759815216, |
|
"learning_rate": 6.366297351287363e-06, |
|
"loss": 0.3324, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 63.78, |
|
"grad_norm": 0.3800413906574249, |
|
"learning_rate": 6.337396419058893e-06, |
|
"loss": 0.3357, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 63.85, |
|
"grad_norm": 0.4264669716358185, |
|
"learning_rate": 6.308495486830423e-06, |
|
"loss": 0.3314, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 63.92, |
|
"grad_norm": 0.4021168351173401, |
|
"learning_rate": 6.279594554601954e-06, |
|
"loss": 0.3301, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 0.4635623097419739, |
|
"learning_rate": 6.250693622373483e-06, |
|
"loss": 0.3304, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 64.07, |
|
"grad_norm": 0.4012512266635895, |
|
"learning_rate": 6.2217926901450136e-06, |
|
"loss": 0.3322, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"grad_norm": 0.4430687725543976, |
|
"learning_rate": 6.192891757916544e-06, |
|
"loss": 0.3302, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 64.22, |
|
"grad_norm": 0.43903249502182007, |
|
"learning_rate": 6.1639908256880735e-06, |
|
"loss": 0.3326, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"grad_norm": 0.5228444337844849, |
|
"learning_rate": 6.135089893459604e-06, |
|
"loss": 0.3298, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 64.37, |
|
"grad_norm": 0.43113288283348083, |
|
"learning_rate": 6.1061889612311334e-06, |
|
"loss": 0.3291, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 64.44, |
|
"grad_norm": 0.47652667760849, |
|
"learning_rate": 6.077288029002663e-06, |
|
"loss": 0.3299, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"grad_norm": 0.4017566442489624, |
|
"learning_rate": 6.048387096774193e-06, |
|
"loss": 0.3312, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 64.59, |
|
"grad_norm": 0.4369170069694519, |
|
"learning_rate": 6.019486164545724e-06, |
|
"loss": 0.3339, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 64.66, |
|
"grad_norm": 0.36806294322013855, |
|
"learning_rate": 5.990585232317254e-06, |
|
"loss": 0.3317, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 64.74, |
|
"grad_norm": 0.42576882243156433, |
|
"learning_rate": 5.961684300088784e-06, |
|
"loss": 0.3309, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 64.81, |
|
"grad_norm": 0.4077777564525604, |
|
"learning_rate": 5.932783367860314e-06, |
|
"loss": 0.3319, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"grad_norm": 0.4394007921218872, |
|
"learning_rate": 5.9038824356318445e-06, |
|
"loss": 0.3327, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 64.96, |
|
"grad_norm": 0.32965216040611267, |
|
"learning_rate": 5.874981503403374e-06, |
|
"loss": 0.3277, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 65.03, |
|
"grad_norm": 0.4312441945075989, |
|
"learning_rate": 5.846080571174904e-06, |
|
"loss": 0.3291, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 65.11, |
|
"grad_norm": 0.3752184808254242, |
|
"learning_rate": 5.817179638946434e-06, |
|
"loss": 0.3319, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 65.18, |
|
"grad_norm": 0.4169740080833435, |
|
"learning_rate": 5.7882787067179635e-06, |
|
"loss": 0.331, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 65.26, |
|
"grad_norm": 0.43580740690231323, |
|
"learning_rate": 5.759377774489494e-06, |
|
"loss": 0.3301, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 65.33, |
|
"grad_norm": 0.46015655994415283, |
|
"learning_rate": 5.730476842261024e-06, |
|
"loss": 0.3326, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 65.4, |
|
"grad_norm": 0.4646316468715668, |
|
"learning_rate": 5.701575910032554e-06, |
|
"loss": 0.3307, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 65.48, |
|
"grad_norm": 0.4371485114097595, |
|
"learning_rate": 5.672674977804084e-06, |
|
"loss": 0.3303, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 65.55, |
|
"grad_norm": 0.443768173456192, |
|
"learning_rate": 5.643774045575615e-06, |
|
"loss": 0.3315, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 65.63, |
|
"grad_norm": 0.44002553820610046, |
|
"learning_rate": 5.614873113347144e-06, |
|
"loss": 0.3305, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 65.7, |
|
"grad_norm": 0.39671292901039124, |
|
"learning_rate": 5.5859721811186746e-06, |
|
"loss": 0.3312, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 65.77, |
|
"grad_norm": 0.4188387393951416, |
|
"learning_rate": 5.557071248890204e-06, |
|
"loss": 0.3302, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 65.85, |
|
"grad_norm": 0.44623398780822754, |
|
"learning_rate": 5.528170316661734e-06, |
|
"loss": 0.3308, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 65.92, |
|
"grad_norm": 0.36335235834121704, |
|
"learning_rate": 5.499269384433264e-06, |
|
"loss": 0.3293, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"grad_norm": 0.41810572147369385, |
|
"learning_rate": 5.4703684522047944e-06, |
|
"loss": 0.329, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 66.07, |
|
"grad_norm": 0.4002617299556732, |
|
"learning_rate": 5.441467519976325e-06, |
|
"loss": 0.3278, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 66.14, |
|
"grad_norm": 0.45273175835609436, |
|
"learning_rate": 5.412566587747854e-06, |
|
"loss": 0.3303, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 66.22, |
|
"grad_norm": 0.48169875144958496, |
|
"learning_rate": 5.383665655519385e-06, |
|
"loss": 0.332, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 66.29, |
|
"grad_norm": 0.39927640557289124, |
|
"learning_rate": 5.354764723290915e-06, |
|
"loss": 0.3296, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"grad_norm": 0.42319226264953613, |
|
"learning_rate": 5.325863791062445e-06, |
|
"loss": 0.3309, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 66.44, |
|
"grad_norm": 0.4284779131412506, |
|
"learning_rate": 5.296962858833975e-06, |
|
"loss": 0.3321, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 66.51, |
|
"grad_norm": 0.5179397463798523, |
|
"learning_rate": 5.268061926605505e-06, |
|
"loss": 0.33, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 66.59, |
|
"grad_norm": 0.44250035285949707, |
|
"learning_rate": 5.239160994377034e-06, |
|
"loss": 0.3295, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"grad_norm": 0.46015605330467224, |
|
"learning_rate": 5.210260062148565e-06, |
|
"loss": 0.3313, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 66.74, |
|
"grad_norm": 0.5012817978858948, |
|
"learning_rate": 5.181359129920095e-06, |
|
"loss": 0.3302, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 66.81, |
|
"grad_norm": 0.403338223695755, |
|
"learning_rate": 5.1524581976916245e-06, |
|
"loss": 0.3306, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 66.88, |
|
"grad_norm": 0.4086831212043762, |
|
"learning_rate": 5.123557265463155e-06, |
|
"loss": 0.3286, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"grad_norm": 0.3715237081050873, |
|
"learning_rate": 5.094656333234685e-06, |
|
"loss": 0.3301, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 67.03, |
|
"grad_norm": 0.46829870343208313, |
|
"learning_rate": 5.065755401006215e-06, |
|
"loss": 0.3307, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 67.11, |
|
"grad_norm": 0.4667709767818451, |
|
"learning_rate": 5.036854468777745e-06, |
|
"loss": 0.3298, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 67.18, |
|
"grad_norm": 0.4758981466293335, |
|
"learning_rate": 5.007953536549275e-06, |
|
"loss": 0.3272, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 67.25, |
|
"grad_norm": 0.48276805877685547, |
|
"learning_rate": 4.979052604320804e-06, |
|
"loss": 0.3288, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 67.33, |
|
"grad_norm": 0.400806725025177, |
|
"learning_rate": 4.950151672092335e-06, |
|
"loss": 0.3258, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 67.4, |
|
"grad_norm": 0.40156251192092896, |
|
"learning_rate": 4.921250739863865e-06, |
|
"loss": 0.3351, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 67.48, |
|
"grad_norm": 0.5024535655975342, |
|
"learning_rate": 4.8923498076353955e-06, |
|
"loss": 0.3306, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 67.55, |
|
"grad_norm": 0.52587890625, |
|
"learning_rate": 4.863448875406925e-06, |
|
"loss": 0.3331, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 67.62, |
|
"grad_norm": 0.41265735030174255, |
|
"learning_rate": 4.8345479431784554e-06, |
|
"loss": 0.3328, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 67.7, |
|
"grad_norm": 0.34202754497528076, |
|
"learning_rate": 4.805647010949986e-06, |
|
"loss": 0.3321, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 67.77, |
|
"grad_norm": 0.4898373484611511, |
|
"learning_rate": 4.776746078721515e-06, |
|
"loss": 0.331, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 67.85, |
|
"grad_norm": 0.52295982837677, |
|
"learning_rate": 4.747845146493046e-06, |
|
"loss": 0.3306, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"grad_norm": 0.46750620007514954, |
|
"learning_rate": 4.718944214264575e-06, |
|
"loss": 0.3315, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 67.99, |
|
"grad_norm": 0.35533860325813293, |
|
"learning_rate": 4.690043282036105e-06, |
|
"loss": 0.3315, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 68.07, |
|
"grad_norm": 0.41508856415748596, |
|
"learning_rate": 4.661142349807635e-06, |
|
"loss": 0.3291, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 68.14, |
|
"grad_norm": 0.4271659851074219, |
|
"learning_rate": 4.632241417579166e-06, |
|
"loss": 0.3286, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 68.22, |
|
"grad_norm": 0.44648808240890503, |
|
"learning_rate": 4.603340485350695e-06, |
|
"loss": 0.3299, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 68.29, |
|
"grad_norm": 0.4843562841415405, |
|
"learning_rate": 4.574439553122226e-06, |
|
"loss": 0.3282, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 68.36, |
|
"grad_norm": 0.41266024112701416, |
|
"learning_rate": 4.545538620893756e-06, |
|
"loss": 0.3299, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 68.44, |
|
"grad_norm": 0.4088280200958252, |
|
"learning_rate": 4.5166376886652855e-06, |
|
"loss": 0.332, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 68.51, |
|
"grad_norm": 0.48477041721343994, |
|
"learning_rate": 4.487736756436816e-06, |
|
"loss": 0.3312, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 68.59, |
|
"grad_norm": 0.42487454414367676, |
|
"learning_rate": 4.458835824208346e-06, |
|
"loss": 0.3296, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 68.66, |
|
"grad_norm": 0.4671236276626587, |
|
"learning_rate": 4.429934891979876e-06, |
|
"loss": 0.3288, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 68.73, |
|
"grad_norm": 0.4430939257144928, |
|
"learning_rate": 4.401033959751405e-06, |
|
"loss": 0.3297, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 68.81, |
|
"grad_norm": 0.4080400764942169, |
|
"learning_rate": 4.372133027522936e-06, |
|
"loss": 0.3305, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 68.88, |
|
"grad_norm": 0.3743002712726593, |
|
"learning_rate": 4.343232095294466e-06, |
|
"loss": 0.3294, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"grad_norm": 0.3991639316082001, |
|
"learning_rate": 4.314331163065996e-06, |
|
"loss": 0.3293, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 69.03, |
|
"grad_norm": 0.40404531359672546, |
|
"learning_rate": 4.285430230837526e-06, |
|
"loss": 0.3307, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 69.1, |
|
"grad_norm": 0.4253464639186859, |
|
"learning_rate": 4.2565292986090565e-06, |
|
"loss": 0.3278, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 69.18, |
|
"grad_norm": 0.43970435857772827, |
|
"learning_rate": 4.227628366380586e-06, |
|
"loss": 0.3284, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 69.25, |
|
"grad_norm": 0.42423635721206665, |
|
"learning_rate": 4.1987274341521164e-06, |
|
"loss": 0.3337, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 69.33, |
|
"grad_norm": 0.4581485092639923, |
|
"learning_rate": 4.169826501923646e-06, |
|
"loss": 0.3273, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"grad_norm": 0.4594268500804901, |
|
"learning_rate": 4.1409255696951755e-06, |
|
"loss": 0.3295, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 69.47, |
|
"grad_norm": 0.49994996190071106, |
|
"learning_rate": 4.112024637466706e-06, |
|
"loss": 0.3267, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 69.55, |
|
"grad_norm": 0.4062737822532654, |
|
"learning_rate": 4.083123705238236e-06, |
|
"loss": 0.3283, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 69.62, |
|
"grad_norm": 0.4764838218688965, |
|
"learning_rate": 4.054222773009766e-06, |
|
"loss": 0.3318, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 69.7, |
|
"grad_norm": 0.42747876048088074, |
|
"learning_rate": 4.025321840781296e-06, |
|
"loss": 0.3311, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 69.77, |
|
"grad_norm": 0.45434367656707764, |
|
"learning_rate": 3.996420908552827e-06, |
|
"loss": 0.3293, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 69.84, |
|
"grad_norm": 0.387123703956604, |
|
"learning_rate": 3.967519976324356e-06, |
|
"loss": 0.3311, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 69.92, |
|
"grad_norm": 0.412826806306839, |
|
"learning_rate": 3.938619044095887e-06, |
|
"loss": 0.329, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"grad_norm": 0.532727062702179, |
|
"learning_rate": 3.909718111867417e-06, |
|
"loss": 0.3266, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 70.07, |
|
"grad_norm": 0.4674714505672455, |
|
"learning_rate": 3.8808171796389465e-06, |
|
"loss": 0.3257, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 70.14, |
|
"grad_norm": 0.3989239037036896, |
|
"learning_rate": 3.851916247410477e-06, |
|
"loss": 0.3288, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 70.21, |
|
"grad_norm": 0.5390828251838684, |
|
"learning_rate": 3.8230153151820065e-06, |
|
"loss": 0.3316, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 70.29, |
|
"grad_norm": 0.4232146143913269, |
|
"learning_rate": 3.7941143829535364e-06, |
|
"loss": 0.3297, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 70.36, |
|
"grad_norm": 0.4476439654827118, |
|
"learning_rate": 3.765213450725067e-06, |
|
"loss": 0.3308, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 70.44, |
|
"grad_norm": 0.46341538429260254, |
|
"learning_rate": 3.7363125184965968e-06, |
|
"loss": 0.3241, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"grad_norm": 0.3792473077774048, |
|
"learning_rate": 3.7074115862681268e-06, |
|
"loss": 0.3282, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 70.58, |
|
"grad_norm": 0.42449694871902466, |
|
"learning_rate": 3.6785106540396567e-06, |
|
"loss": 0.3282, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 70.66, |
|
"grad_norm": 0.389700323343277, |
|
"learning_rate": 3.6496097218111867e-06, |
|
"loss": 0.3332, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 70.73, |
|
"grad_norm": 0.4011322855949402, |
|
"learning_rate": 3.6207087895827167e-06, |
|
"loss": 0.3295, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"grad_norm": 0.485365092754364, |
|
"learning_rate": 3.591807857354247e-06, |
|
"loss": 0.33, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 70.88, |
|
"grad_norm": 0.39829009771347046, |
|
"learning_rate": 3.562906925125777e-06, |
|
"loss": 0.3291, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 70.95, |
|
"grad_norm": 0.46039626002311707, |
|
"learning_rate": 3.5340059928973066e-06, |
|
"loss": 0.331, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 71.03, |
|
"grad_norm": 0.40389734506607056, |
|
"learning_rate": 3.505105060668837e-06, |
|
"loss": 0.3286, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 71.1, |
|
"grad_norm": 0.4375256299972534, |
|
"learning_rate": 3.476204128440367e-06, |
|
"loss": 0.3298, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 71.17, |
|
"grad_norm": 0.42462676763534546, |
|
"learning_rate": 3.4473031962118973e-06, |
|
"loss": 0.3302, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 71.25, |
|
"grad_norm": 0.3216535747051239, |
|
"learning_rate": 3.4184022639834273e-06, |
|
"loss": 0.3283, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 71.32, |
|
"grad_norm": 0.45945799350738525, |
|
"learning_rate": 3.389501331754957e-06, |
|
"loss": 0.3278, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 71.4, |
|
"grad_norm": 0.4495971202850342, |
|
"learning_rate": 3.3606003995264872e-06, |
|
"loss": 0.3265, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 71.47, |
|
"grad_norm": 0.4159165322780609, |
|
"learning_rate": 3.331699467298017e-06, |
|
"loss": 0.3292, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 71.54, |
|
"grad_norm": 0.410427063703537, |
|
"learning_rate": 3.302798535069547e-06, |
|
"loss": 0.3299, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 71.62, |
|
"grad_norm": 0.5130240321159363, |
|
"learning_rate": 3.2738976028410776e-06, |
|
"loss": 0.3316, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 71.69, |
|
"grad_norm": 0.4405277669429779, |
|
"learning_rate": 3.244996670612607e-06, |
|
"loss": 0.33, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 71.77, |
|
"grad_norm": 0.575674295425415, |
|
"learning_rate": 3.2160957383841375e-06, |
|
"loss": 0.3298, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 71.84, |
|
"grad_norm": 0.4434616267681122, |
|
"learning_rate": 3.1871948061556675e-06, |
|
"loss": 0.3298, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 71.91, |
|
"grad_norm": 0.3960082530975342, |
|
"learning_rate": 3.1582938739271974e-06, |
|
"loss": 0.3282, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 71.99, |
|
"grad_norm": 0.42698296904563904, |
|
"learning_rate": 3.129392941698728e-06, |
|
"loss": 0.3296, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 72.06, |
|
"grad_norm": 0.5218748450279236, |
|
"learning_rate": 3.1004920094702574e-06, |
|
"loss": 0.3296, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 72.14, |
|
"grad_norm": 0.46763402223587036, |
|
"learning_rate": 3.0715910772417873e-06, |
|
"loss": 0.3262, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 72.21, |
|
"grad_norm": 0.42345327138900757, |
|
"learning_rate": 3.0426901450133177e-06, |
|
"loss": 0.33, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 72.28, |
|
"grad_norm": 0.4526881277561188, |
|
"learning_rate": 3.0137892127848477e-06, |
|
"loss": 0.3304, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 72.36, |
|
"grad_norm": 0.42106348276138306, |
|
"learning_rate": 2.984888280556378e-06, |
|
"loss": 0.3292, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 72.43, |
|
"grad_norm": 0.5022510886192322, |
|
"learning_rate": 2.9559873483279076e-06, |
|
"loss": 0.3266, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 72.51, |
|
"grad_norm": 0.4436812996864319, |
|
"learning_rate": 2.9270864160994376e-06, |
|
"loss": 0.3269, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 72.58, |
|
"grad_norm": 0.42252251505851746, |
|
"learning_rate": 2.898185483870968e-06, |
|
"loss": 0.3281, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 72.65, |
|
"grad_norm": 0.5339802503585815, |
|
"learning_rate": 2.869284551642498e-06, |
|
"loss": 0.3289, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"grad_norm": 0.3937510550022125, |
|
"learning_rate": 2.840383619414028e-06, |
|
"loss": 0.328, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"grad_norm": 0.3894229829311371, |
|
"learning_rate": 2.811482687185558e-06, |
|
"loss": 0.3282, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 72.88, |
|
"grad_norm": 0.4481090307235718, |
|
"learning_rate": 2.782581754957088e-06, |
|
"loss": 0.3301, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 72.95, |
|
"grad_norm": 0.45495444536209106, |
|
"learning_rate": 2.753680822728618e-06, |
|
"loss": 0.3278, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 73.02, |
|
"grad_norm": 0.49259716272354126, |
|
"learning_rate": 2.7247798905001482e-06, |
|
"loss": 0.3295, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 73.1, |
|
"grad_norm": 0.4257282018661499, |
|
"learning_rate": 2.6958789582716778e-06, |
|
"loss": 0.3269, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 73.17, |
|
"grad_norm": 0.43159300088882446, |
|
"learning_rate": 2.666978026043208e-06, |
|
"loss": 0.3276, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 73.25, |
|
"grad_norm": 0.4048108458518982, |
|
"learning_rate": 2.638077093814738e-06, |
|
"loss": 0.3333, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 73.32, |
|
"grad_norm": 0.4666566252708435, |
|
"learning_rate": 2.609176161586268e-06, |
|
"loss": 0.3275, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 73.39, |
|
"grad_norm": 0.3985891342163086, |
|
"learning_rate": 2.5802752293577985e-06, |
|
"loss": 0.3279, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 73.47, |
|
"grad_norm": 0.5439868569374084, |
|
"learning_rate": 2.551374297129328e-06, |
|
"loss": 0.3275, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 73.54, |
|
"grad_norm": 0.45784690976142883, |
|
"learning_rate": 2.522473364900858e-06, |
|
"loss": 0.3309, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 73.62, |
|
"grad_norm": 0.4779771864414215, |
|
"learning_rate": 2.4935724326723884e-06, |
|
"loss": 0.3288, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 73.69, |
|
"grad_norm": 0.47680574655532837, |
|
"learning_rate": 2.4646715004439184e-06, |
|
"loss": 0.3285, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 73.76, |
|
"grad_norm": 0.3629719913005829, |
|
"learning_rate": 2.4357705682154488e-06, |
|
"loss": 0.3284, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 73.84, |
|
"grad_norm": 0.46253129839897156, |
|
"learning_rate": 2.4068696359869783e-06, |
|
"loss": 0.3284, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 73.91, |
|
"grad_norm": 0.44531476497650146, |
|
"learning_rate": 2.3779687037585083e-06, |
|
"loss": 0.3281, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 73.99, |
|
"grad_norm": 0.39289695024490356, |
|
"learning_rate": 2.3490677715300387e-06, |
|
"loss": 0.326, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 74.06, |
|
"grad_norm": 0.48103997111320496, |
|
"learning_rate": 2.3201668393015686e-06, |
|
"loss": 0.3272, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 74.13, |
|
"grad_norm": 0.4336768388748169, |
|
"learning_rate": 2.2912659070730986e-06, |
|
"loss": 0.3265, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 74.21, |
|
"grad_norm": 0.4040307402610779, |
|
"learning_rate": 2.2623649748446286e-06, |
|
"loss": 0.3271, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 74.28, |
|
"grad_norm": 0.49081218242645264, |
|
"learning_rate": 2.2334640426161585e-06, |
|
"loss": 0.328, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 74.36, |
|
"grad_norm": 0.44683390855789185, |
|
"learning_rate": 2.2045631103876885e-06, |
|
"loss": 0.3266, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 74.43, |
|
"grad_norm": 0.4362635612487793, |
|
"learning_rate": 2.175662178159219e-06, |
|
"loss": 0.3293, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 74.5, |
|
"grad_norm": 0.4326813220977783, |
|
"learning_rate": 2.146761245930749e-06, |
|
"loss": 0.3302, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 74.58, |
|
"grad_norm": 0.5289288759231567, |
|
"learning_rate": 2.117860313702279e-06, |
|
"loss": 0.3288, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 74.65, |
|
"grad_norm": 0.5708897709846497, |
|
"learning_rate": 2.088959381473809e-06, |
|
"loss": 0.3271, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 74.73, |
|
"grad_norm": 0.38460394740104675, |
|
"learning_rate": 2.0600584492453388e-06, |
|
"loss": 0.3262, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"grad_norm": 0.4401102066040039, |
|
"learning_rate": 2.031157517016869e-06, |
|
"loss": 0.3285, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 74.87, |
|
"grad_norm": 0.4699185788631439, |
|
"learning_rate": 2.002256584788399e-06, |
|
"loss": 0.3292, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 74.95, |
|
"grad_norm": 0.43969598412513733, |
|
"learning_rate": 1.9733556525599287e-06, |
|
"loss": 0.3282, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 75.02, |
|
"grad_norm": 0.5226773619651794, |
|
"learning_rate": 1.944454720331459e-06, |
|
"loss": 0.3275, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 75.1, |
|
"grad_norm": 0.42381104826927185, |
|
"learning_rate": 1.915553788102989e-06, |
|
"loss": 0.3287, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 75.17, |
|
"grad_norm": 0.47836771607398987, |
|
"learning_rate": 1.886652855874519e-06, |
|
"loss": 0.3248, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 75.24, |
|
"grad_norm": 0.4760962128639221, |
|
"learning_rate": 1.8577519236460492e-06, |
|
"loss": 0.3255, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 75.32, |
|
"grad_norm": 0.4954340159893036, |
|
"learning_rate": 1.8288509914175794e-06, |
|
"loss": 0.3274, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 75.39, |
|
"grad_norm": 0.3998168110847473, |
|
"learning_rate": 1.7999500591891091e-06, |
|
"loss": 0.328, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"grad_norm": 0.3899104595184326, |
|
"learning_rate": 1.7710491269606393e-06, |
|
"loss": 0.3291, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 75.54, |
|
"grad_norm": 0.4677903652191162, |
|
"learning_rate": 1.7421481947321693e-06, |
|
"loss": 0.3261, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 75.61, |
|
"grad_norm": 0.41607698798179626, |
|
"learning_rate": 1.7132472625036995e-06, |
|
"loss": 0.3291, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 75.69, |
|
"grad_norm": 0.44930896162986755, |
|
"learning_rate": 1.6843463302752294e-06, |
|
"loss": 0.3307, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 75.76, |
|
"grad_norm": 0.48138096928596497, |
|
"learning_rate": 1.6554453980467594e-06, |
|
"loss": 0.3256, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 75.84, |
|
"grad_norm": 0.44024384021759033, |
|
"learning_rate": 1.6265444658182896e-06, |
|
"loss": 0.3278, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 75.91, |
|
"grad_norm": 0.42400872707366943, |
|
"learning_rate": 1.5976435335898193e-06, |
|
"loss": 0.3295, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 75.98, |
|
"grad_norm": 0.4450230896472931, |
|
"learning_rate": 1.5687426013613495e-06, |
|
"loss": 0.3293, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 76.06, |
|
"grad_norm": 0.4113300144672394, |
|
"learning_rate": 1.5398416691328797e-06, |
|
"loss": 0.3277, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 76.13, |
|
"grad_norm": 0.4838961064815521, |
|
"learning_rate": 1.5109407369044097e-06, |
|
"loss": 0.3257, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 76.21, |
|
"grad_norm": 0.45890524983406067, |
|
"learning_rate": 1.4820398046759396e-06, |
|
"loss": 0.3263, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 76.28, |
|
"grad_norm": 0.4421687424182892, |
|
"learning_rate": 1.4531388724474696e-06, |
|
"loss": 0.3287, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 76.35, |
|
"grad_norm": 0.4234231114387512, |
|
"learning_rate": 1.4242379402189998e-06, |
|
"loss": 0.3308, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 76.43, |
|
"grad_norm": 0.4239380657672882, |
|
"learning_rate": 1.3953370079905297e-06, |
|
"loss": 0.3291, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"grad_norm": 0.4606933891773224, |
|
"learning_rate": 1.3664360757620597e-06, |
|
"loss": 0.3288, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 76.58, |
|
"grad_norm": 0.4070008099079132, |
|
"learning_rate": 1.33753514353359e-06, |
|
"loss": 0.3268, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 76.65, |
|
"grad_norm": 0.5978463888168335, |
|
"learning_rate": 1.3086342113051199e-06, |
|
"loss": 0.3272, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 76.72, |
|
"grad_norm": 0.43075379729270935, |
|
"learning_rate": 1.27973327907665e-06, |
|
"loss": 0.3287, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"grad_norm": 0.46790510416030884, |
|
"learning_rate": 1.2508323468481798e-06, |
|
"loss": 0.3266, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 76.87, |
|
"grad_norm": 0.45541101694107056, |
|
"learning_rate": 1.22193141461971e-06, |
|
"loss": 0.3254, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 76.95, |
|
"grad_norm": 0.44363468885421753, |
|
"learning_rate": 1.1930304823912402e-06, |
|
"loss": 0.3248, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 77.02, |
|
"grad_norm": 0.5055235624313354, |
|
"learning_rate": 1.1641295501627701e-06, |
|
"loss": 0.3265, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 77.09, |
|
"grad_norm": 0.3572923541069031, |
|
"learning_rate": 1.1352286179343e-06, |
|
"loss": 0.324, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 77.17, |
|
"grad_norm": 0.40502551198005676, |
|
"learning_rate": 1.10632768570583e-06, |
|
"loss": 0.3254, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 77.24, |
|
"grad_norm": 0.45639294385910034, |
|
"learning_rate": 1.0774267534773602e-06, |
|
"loss": 0.328, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 77.32, |
|
"grad_norm": 0.4580610990524292, |
|
"learning_rate": 1.0485258212488904e-06, |
|
"loss": 0.3278, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 77.39, |
|
"grad_norm": 0.4812680184841156, |
|
"learning_rate": 1.0196248890204202e-06, |
|
"loss": 0.3274, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 77.46, |
|
"grad_norm": 0.416979044675827, |
|
"learning_rate": 9.907239567919504e-07, |
|
"loss": 0.3261, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 77.54, |
|
"grad_norm": 0.39473670721054077, |
|
"learning_rate": 9.618230245634803e-07, |
|
"loss": 0.328, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 77.61, |
|
"grad_norm": 0.4831089675426483, |
|
"learning_rate": 9.329220923350104e-07, |
|
"loss": 0.328, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 77.69, |
|
"grad_norm": 0.4752112627029419, |
|
"learning_rate": 9.040211601065404e-07, |
|
"loss": 0.3249, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 77.76, |
|
"grad_norm": 0.4114755690097809, |
|
"learning_rate": 8.751202278780705e-07, |
|
"loss": 0.3282, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 77.83, |
|
"grad_norm": 0.539284348487854, |
|
"learning_rate": 8.462192956496004e-07, |
|
"loss": 0.327, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 77.91, |
|
"grad_norm": 0.4160568118095398, |
|
"learning_rate": 8.173183634211305e-07, |
|
"loss": 0.3278, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 77.98, |
|
"grad_norm": 0.42335689067840576, |
|
"learning_rate": 7.884174311926606e-07, |
|
"loss": 0.3291, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 78.06, |
|
"grad_norm": 0.4964425563812256, |
|
"learning_rate": 7.595164989641906e-07, |
|
"loss": 0.3269, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 78.13, |
|
"grad_norm": 0.5482224822044373, |
|
"learning_rate": 7.306155667357206e-07, |
|
"loss": 0.3257, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 78.2, |
|
"grad_norm": 0.4845934808254242, |
|
"learning_rate": 7.017146345072507e-07, |
|
"loss": 0.3257, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 78.28, |
|
"grad_norm": 0.44311293959617615, |
|
"learning_rate": 6.728137022787807e-07, |
|
"loss": 0.3267, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 78.35, |
|
"grad_norm": 0.49295201897621155, |
|
"learning_rate": 6.439127700503107e-07, |
|
"loss": 0.3262, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 78.43, |
|
"grad_norm": 0.45838817954063416, |
|
"learning_rate": 6.150118378218408e-07, |
|
"loss": 0.3272, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 78.5, |
|
"grad_norm": 0.4277520477771759, |
|
"learning_rate": 5.861109055933709e-07, |
|
"loss": 0.3275, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 78.57, |
|
"grad_norm": 0.49568185210227966, |
|
"learning_rate": 5.572099733649008e-07, |
|
"loss": 0.3238, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 78.65, |
|
"grad_norm": 0.3964736759662628, |
|
"learning_rate": 5.283090411364309e-07, |
|
"loss": 0.3265, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 78.72, |
|
"grad_norm": 0.38991761207580566, |
|
"learning_rate": 4.994081089079609e-07, |
|
"loss": 0.3297, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 78.8, |
|
"grad_norm": 0.514043390750885, |
|
"learning_rate": 4.7050717667949096e-07, |
|
"loss": 0.3274, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 78.87, |
|
"grad_norm": 0.44372057914733887, |
|
"learning_rate": 4.41606244451021e-07, |
|
"loss": 0.3265, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 78.94, |
|
"grad_norm": 0.40556496381759644, |
|
"learning_rate": 4.1270531222255106e-07, |
|
"loss": 0.3268, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 79.02, |
|
"grad_norm": 0.37113696336746216, |
|
"learning_rate": 3.838043799940811e-07, |
|
"loss": 0.331, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 79.09, |
|
"grad_norm": 0.42463332414627075, |
|
"learning_rate": 3.549034477656111e-07, |
|
"loss": 0.3269, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 79.17, |
|
"grad_norm": 0.456259548664093, |
|
"learning_rate": 3.260025155371412e-07, |
|
"loss": 0.3296, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 79.24, |
|
"grad_norm": 0.39561837911605835, |
|
"learning_rate": 2.971015833086712e-07, |
|
"loss": 0.3272, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 79.31, |
|
"grad_norm": 0.42246511578559875, |
|
"learning_rate": 2.6820065108020127e-07, |
|
"loss": 0.3274, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 79.39, |
|
"grad_norm": 0.42932552099227905, |
|
"learning_rate": 2.392997188517313e-07, |
|
"loss": 0.3259, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 79.46, |
|
"grad_norm": 0.4081755578517914, |
|
"learning_rate": 2.1039878662326132e-07, |
|
"loss": 0.3254, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 79.54, |
|
"grad_norm": 0.43017107248306274, |
|
"learning_rate": 1.8149785439479136e-07, |
|
"loss": 0.3294, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 79.61, |
|
"grad_norm": 0.3940086364746094, |
|
"learning_rate": 1.525969221663214e-07, |
|
"loss": 0.3284, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 79.68, |
|
"grad_norm": 0.37287628650665283, |
|
"learning_rate": 1.2369598993785146e-07, |
|
"loss": 0.3269, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 79.76, |
|
"grad_norm": 0.451742559671402, |
|
"learning_rate": 9.479505770938148e-08, |
|
"loss": 0.3259, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 79.83, |
|
"grad_norm": 0.4438938796520233, |
|
"learning_rate": 6.589412548091152e-08, |
|
"loss": 0.3238, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 79.91, |
|
"grad_norm": 0.4649119973182678, |
|
"learning_rate": 3.699319325244155e-08, |
|
"loss": 0.3257, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 79.98, |
|
"grad_norm": 0.4151638150215149, |
|
"learning_rate": 8.09226102397159e-09, |
|
"loss": 0.3303, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 540640, |
|
"total_flos": 4.3712245507093955e+20, |
|
"train_loss": 0.35890252478696355, |
|
"train_runtime": 56544.5558, |
|
"train_samples_per_second": 76.486, |
|
"train_steps_per_second": 9.561 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 540640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 80, |
|
"save_steps": 1000000000, |
|
"total_flos": 4.3712245507093955e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|