|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3472222222222222, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006944444444444445, |
|
"grad_norm": 0.38245001435279846, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 1.5265, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001388888888888889, |
|
"grad_norm": 0.36873823404312134, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 1.5149, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0020833333333333333, |
|
"grad_norm": 0.31270918250083923, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 1.478, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002777777777777778, |
|
"grad_norm": 0.21180707216262817, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 1.4297, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003472222222222222, |
|
"grad_norm": 0.10717298090457916, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 1.3724, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004166666666666667, |
|
"grad_norm": 0.12929688394069672, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 1.3816, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004861111111111111, |
|
"grad_norm": 0.10623333603143692, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 1.3061, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.005555555555555556, |
|
"grad_norm": 0.07657431811094284, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 1.2736, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00625, |
|
"grad_norm": 0.05140658840537071, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 1.2767, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.006944444444444444, |
|
"grad_norm": 0.04951579496264458, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.2659, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007638888888888889, |
|
"grad_norm": 0.035326242446899414, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1883, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008333333333333333, |
|
"grad_norm": 0.03807157278060913, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.2452, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009027777777777777, |
|
"grad_norm": 0.03424696624279022, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 1.2394, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.009722222222222222, |
|
"grad_norm": 0.026057492941617966, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 1.157, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 0.024454891681671143, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 1.1901, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011111111111111112, |
|
"grad_norm": 0.02439827099442482, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 1.1669, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.011805555555555555, |
|
"grad_norm": 0.023934854194521904, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 1.1833, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0125, |
|
"grad_norm": 0.023052480071783066, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 1.1777, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013194444444444444, |
|
"grad_norm": 0.0216195210814476, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 1.1541, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.013888888888888888, |
|
"grad_norm": 0.021930918097496033, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.1519, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014583333333333334, |
|
"grad_norm": 0.01956241950392723, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 1.1289, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015277777777777777, |
|
"grad_norm": 0.019718188792467117, |
|
"learning_rate": 2e-05, |
|
"loss": 1.12, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01597222222222222, |
|
"grad_norm": 0.018330449238419533, |
|
"learning_rate": 2.090909090909091e-05, |
|
"loss": 1.151, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.016666666666666666, |
|
"grad_norm": 0.019966386258602142, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 1.1717, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.017361111111111112, |
|
"grad_norm": 0.019648971036076546, |
|
"learning_rate": 2.2727272727272733e-05, |
|
"loss": 1.1188, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018055555555555554, |
|
"grad_norm": 0.018150752410292625, |
|
"learning_rate": 2.363636363636364e-05, |
|
"loss": 1.0927, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01875, |
|
"grad_norm": 0.02000141702592373, |
|
"learning_rate": 2.454545454545455e-05, |
|
"loss": 1.1205, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.019444444444444445, |
|
"grad_norm": 0.02152254991233349, |
|
"learning_rate": 2.5454545454545457e-05, |
|
"loss": 1.1136, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02013888888888889, |
|
"grad_norm": 0.016942821443080902, |
|
"learning_rate": 2.6363636363636365e-05, |
|
"loss": 1.1073, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 0.018401362001895905, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 1.0745, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.021527777777777778, |
|
"grad_norm": 0.01856987737119198, |
|
"learning_rate": 2.8181818181818185e-05, |
|
"loss": 1.0699, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.022222222222222223, |
|
"grad_norm": 0.014833790250122547, |
|
"learning_rate": 2.9090909090909093e-05, |
|
"loss": 1.0226, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.022916666666666665, |
|
"grad_norm": 0.01955495961010456, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 1.1012, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02361111111111111, |
|
"grad_norm": 0.01699119061231613, |
|
"learning_rate": 3.090909090909091e-05, |
|
"loss": 1.0457, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.024305555555555556, |
|
"grad_norm": 0.020263031125068665, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 1.0581, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 0.015922358259558678, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 1.0958, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.025694444444444443, |
|
"grad_norm": 0.016595132648944855, |
|
"learning_rate": 3.363636363636364e-05, |
|
"loss": 1.0425, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02638888888888889, |
|
"grad_norm": 0.018398325890302658, |
|
"learning_rate": 3.454545454545455e-05, |
|
"loss": 1.0893, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.027083333333333334, |
|
"grad_norm": 0.017498090863227844, |
|
"learning_rate": 3.545454545454546e-05, |
|
"loss": 1.0487, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.027777777777777776, |
|
"grad_norm": 0.016576213762164116, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 1.0567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02847222222222222, |
|
"grad_norm": 0.013903766870498657, |
|
"learning_rate": 3.7272727272727276e-05, |
|
"loss": 1.023, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.029166666666666667, |
|
"grad_norm": 0.01546618901193142, |
|
"learning_rate": 3.818181818181819e-05, |
|
"loss": 1.0456, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.029861111111111113, |
|
"grad_norm": 0.014900722540915012, |
|
"learning_rate": 3.909090909090909e-05, |
|
"loss": 1.0365, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.030555555555555555, |
|
"grad_norm": 0.015999475494027138, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0486, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 0.01696275919675827, |
|
"learning_rate": 3.999994935591541e-05, |
|
"loss": 1.0602, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03194444444444444, |
|
"grad_norm": 0.016145754605531693, |
|
"learning_rate": 3.999979742391812e-05, |
|
"loss": 1.0709, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03263888888888889, |
|
"grad_norm": 0.015007151290774345, |
|
"learning_rate": 3.999954420477757e-05, |
|
"loss": 1.0273, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03333333333333333, |
|
"grad_norm": 0.016809619963169098, |
|
"learning_rate": 3.9999189699776166e-05, |
|
"loss": 1.062, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.034027777777777775, |
|
"grad_norm": 0.017114873975515366, |
|
"learning_rate": 3.9998733910709277e-05, |
|
"loss": 1.0624, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.034722222222222224, |
|
"grad_norm": 0.014565072022378445, |
|
"learning_rate": 3.9998176839885196e-05, |
|
"loss": 1.0487, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.035416666666666666, |
|
"grad_norm": 0.014314558357000351, |
|
"learning_rate": 3.9997518490125166e-05, |
|
"loss": 1.0673, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03611111111111111, |
|
"grad_norm": 0.01658693701028824, |
|
"learning_rate": 3.999675886476332e-05, |
|
"loss": 1.0328, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03680555555555556, |
|
"grad_norm": 0.014420751482248306, |
|
"learning_rate": 3.9995897967646725e-05, |
|
"loss": 0.9782, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0375, |
|
"grad_norm": 0.017560573294758797, |
|
"learning_rate": 3.999493580313532e-05, |
|
"loss": 1.0367, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03819444444444445, |
|
"grad_norm": 0.017693940550088882, |
|
"learning_rate": 3.9993872376101894e-05, |
|
"loss": 1.0518, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03888888888888889, |
|
"grad_norm": 0.014515440911054611, |
|
"learning_rate": 3.9992707691932067e-05, |
|
"loss": 1.0322, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03958333333333333, |
|
"grad_norm": 0.015049392357468605, |
|
"learning_rate": 3.999144175652428e-05, |
|
"loss": 1.0323, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04027777777777778, |
|
"grad_norm": 0.017379263415932655, |
|
"learning_rate": 3.999007457628976e-05, |
|
"loss": 1.0391, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04097222222222222, |
|
"grad_norm": 0.01676369458436966, |
|
"learning_rate": 3.998860615815246e-05, |
|
"loss": 1.0637, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 0.020871426910161972, |
|
"learning_rate": 3.9987036509549034e-05, |
|
"loss": 1.0831, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04236111111111111, |
|
"grad_norm": 0.01579645834863186, |
|
"learning_rate": 3.998536563842884e-05, |
|
"loss": 1.0439, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.043055555555555555, |
|
"grad_norm": 0.021290535107254982, |
|
"learning_rate": 3.998359355325384e-05, |
|
"loss": 1.0859, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04375, |
|
"grad_norm": 0.016678057610988617, |
|
"learning_rate": 3.998172026299861e-05, |
|
"loss": 1.0521, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 0.016436951234936714, |
|
"learning_rate": 3.997974577715026e-05, |
|
"loss": 1.0156, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04513888888888889, |
|
"grad_norm": 0.016367288306355476, |
|
"learning_rate": 3.9977670105708377e-05, |
|
"loss": 1.0365, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04583333333333333, |
|
"grad_norm": 0.017032478004693985, |
|
"learning_rate": 3.997549325918501e-05, |
|
"loss": 1.068, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04652777777777778, |
|
"grad_norm": 0.015650460496544838, |
|
"learning_rate": 3.997321524860461e-05, |
|
"loss": 1.0361, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.04722222222222222, |
|
"grad_norm": 0.014867709018290043, |
|
"learning_rate": 3.997083608550395e-05, |
|
"loss": 1.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04791666666666667, |
|
"grad_norm": 0.015197121538221836, |
|
"learning_rate": 3.996835578193208e-05, |
|
"loss": 1.0488, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04861111111111111, |
|
"grad_norm": 0.015954626724123955, |
|
"learning_rate": 3.996577435045027e-05, |
|
"loss": 1.0356, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.049305555555555554, |
|
"grad_norm": 0.01822635903954506, |
|
"learning_rate": 3.996309180413195e-05, |
|
"loss": 1.0493, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.014613306149840355, |
|
"learning_rate": 3.996030815656262e-05, |
|
"loss": 0.9882, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.050694444444444445, |
|
"grad_norm": 0.015572323463857174, |
|
"learning_rate": 3.995742342183982e-05, |
|
"loss": 1.0162, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05138888888888889, |
|
"grad_norm": 0.01604314148426056, |
|
"learning_rate": 3.9954437614573015e-05, |
|
"loss": 1.0424, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"grad_norm": 0.015765074640512466, |
|
"learning_rate": 3.9951350749883555e-05, |
|
"loss": 1.0607, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05277777777777778, |
|
"grad_norm": 0.014984416775405407, |
|
"learning_rate": 3.994816284340459e-05, |
|
"loss": 1.0443, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05347222222222222, |
|
"grad_norm": 0.016170239076018333, |
|
"learning_rate": 3.9944873911280976e-05, |
|
"loss": 1.0275, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05416666666666667, |
|
"grad_norm": 0.014334661886096, |
|
"learning_rate": 3.99414839701692e-05, |
|
"loss": 1.0175, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05486111111111111, |
|
"grad_norm": 0.014810354448854923, |
|
"learning_rate": 3.993799303723733e-05, |
|
"loss": 1.0015, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 0.014517512172460556, |
|
"learning_rate": 3.993440113016485e-05, |
|
"loss": 1.0137, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05625, |
|
"grad_norm": 0.01477818377315998, |
|
"learning_rate": 3.993070826714267e-05, |
|
"loss": 0.9852, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05694444444444444, |
|
"grad_norm": 0.014672035351395607, |
|
"learning_rate": 3.9926914466872936e-05, |
|
"loss": 1.0099, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05763888888888889, |
|
"grad_norm": 0.015323956497013569, |
|
"learning_rate": 3.9923019748569015e-05, |
|
"loss": 1.0345, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.058333333333333334, |
|
"grad_norm": 0.014713946729898453, |
|
"learning_rate": 3.991902413195535e-05, |
|
"loss": 1.0318, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.059027777777777776, |
|
"grad_norm": 0.01328709814697504, |
|
"learning_rate": 3.9914927637267366e-05, |
|
"loss": 1.0084, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.059722222222222225, |
|
"grad_norm": 0.017166944220662117, |
|
"learning_rate": 3.991073028525139e-05, |
|
"loss": 1.0036, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06041666666666667, |
|
"grad_norm": 0.016073813661932945, |
|
"learning_rate": 3.990643209716454e-05, |
|
"loss": 1.0329, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06111111111111111, |
|
"grad_norm": 0.01435924507677555, |
|
"learning_rate": 3.990203309477457e-05, |
|
"loss": 1.0535, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06180555555555556, |
|
"grad_norm": 0.014585713855922222, |
|
"learning_rate": 3.989753330035985e-05, |
|
"loss": 1.0016, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 0.014899005182087421, |
|
"learning_rate": 3.989293273670916e-05, |
|
"loss": 1.0386, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06319444444444444, |
|
"grad_norm": 0.015017863363027573, |
|
"learning_rate": 3.988823142712165e-05, |
|
"loss": 1.0217, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06388888888888888, |
|
"grad_norm": 0.014558105729520321, |
|
"learning_rate": 3.9883429395406666e-05, |
|
"loss": 1.0046, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06458333333333334, |
|
"grad_norm": 0.014639356173574924, |
|
"learning_rate": 3.987852666588364e-05, |
|
"loss": 1.0226, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06527777777777778, |
|
"grad_norm": 0.015396458096802235, |
|
"learning_rate": 3.9873523263382015e-05, |
|
"loss": 1.0374, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06597222222222222, |
|
"grad_norm": 0.014208097010850906, |
|
"learning_rate": 3.9868419213241064e-05, |
|
"loss": 1.0058, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 0.013101368211209774, |
|
"learning_rate": 3.986321454130978e-05, |
|
"loss": 0.9642, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06736111111111111, |
|
"grad_norm": 0.014994910918176174, |
|
"learning_rate": 3.9857909273946747e-05, |
|
"loss": 0.995, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.06805555555555555, |
|
"grad_norm": 0.01322688814252615, |
|
"learning_rate": 3.985250343802e-05, |
|
"loss": 1.0124, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.06875, |
|
"grad_norm": 0.014946015551686287, |
|
"learning_rate": 3.984699706090691e-05, |
|
"loss": 0.9908, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06944444444444445, |
|
"grad_norm": 0.014106931164860725, |
|
"learning_rate": 3.9841390170494024e-05, |
|
"loss": 0.9985, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07013888888888889, |
|
"grad_norm": 0.014384300448000431, |
|
"learning_rate": 3.9835682795176905e-05, |
|
"loss": 0.9727, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07083333333333333, |
|
"grad_norm": 0.014217739924788475, |
|
"learning_rate": 3.982987496386004e-05, |
|
"loss": 0.9839, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07152777777777777, |
|
"grad_norm": 0.015070730820298195, |
|
"learning_rate": 3.982396670595668e-05, |
|
"loss": 1.0052, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07222222222222222, |
|
"grad_norm": 0.01753557287156582, |
|
"learning_rate": 3.9817958051388624e-05, |
|
"loss": 1.0035, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07291666666666667, |
|
"grad_norm": 0.015090793371200562, |
|
"learning_rate": 3.981184903058618e-05, |
|
"loss": 1.0413, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07361111111111111, |
|
"grad_norm": 0.017897464334964752, |
|
"learning_rate": 3.980563967448791e-05, |
|
"loss": 0.9938, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07430555555555556, |
|
"grad_norm": 0.015342462807893753, |
|
"learning_rate": 3.979933001454053e-05, |
|
"loss": 1.003, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 0.01586705632507801, |
|
"learning_rate": 3.979292008269874e-05, |
|
"loss": 1.0052, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07569444444444444, |
|
"grad_norm": 0.013492800295352936, |
|
"learning_rate": 3.978640991142505e-05, |
|
"loss": 1.0415, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0763888888888889, |
|
"grad_norm": 0.017706014215946198, |
|
"learning_rate": 3.9779799533689634e-05, |
|
"loss": 0.9927, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07708333333333334, |
|
"grad_norm": 0.015358942560851574, |
|
"learning_rate": 3.9773088982970135e-05, |
|
"loss": 1.0201, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.07777777777777778, |
|
"grad_norm": 0.01901993341743946, |
|
"learning_rate": 3.9766278293251526e-05, |
|
"loss": 1.0191, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.07847222222222222, |
|
"grad_norm": 0.015617494471371174, |
|
"learning_rate": 3.9759367499025924e-05, |
|
"loss": 1.0256, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.07916666666666666, |
|
"grad_norm": 0.014908060431480408, |
|
"learning_rate": 3.9752356635292405e-05, |
|
"loss": 0.9898, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0798611111111111, |
|
"grad_norm": 0.013952597044408321, |
|
"learning_rate": 3.974524573755686e-05, |
|
"loss": 0.9862, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08055555555555556, |
|
"grad_norm": 0.013434095308184624, |
|
"learning_rate": 3.9738034841831776e-05, |
|
"loss": 0.9593, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08125, |
|
"grad_norm": 0.014166664332151413, |
|
"learning_rate": 3.9730723984636064e-05, |
|
"loss": 1.0377, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08194444444444444, |
|
"grad_norm": 0.014752853661775589, |
|
"learning_rate": 3.9723313202994904e-05, |
|
"loss": 1.0139, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08263888888888889, |
|
"grad_norm": 0.014744486659765244, |
|
"learning_rate": 3.971580253443951e-05, |
|
"loss": 1.012, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.01442610565572977, |
|
"learning_rate": 3.970819201700699e-05, |
|
"loss": 1.0334, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08402777777777778, |
|
"grad_norm": 0.015393883921205997, |
|
"learning_rate": 3.970048168924009e-05, |
|
"loss": 0.9252, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08472222222222223, |
|
"grad_norm": 0.01524246297776699, |
|
"learning_rate": 3.9692671590187093e-05, |
|
"loss": 1.0209, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08541666666666667, |
|
"grad_norm": 0.014700529165565968, |
|
"learning_rate": 3.96847617594015e-05, |
|
"loss": 0.9943, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08611111111111111, |
|
"grad_norm": 0.014305293560028076, |
|
"learning_rate": 3.967675223694193e-05, |
|
"loss": 0.966, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08680555555555555, |
|
"grad_norm": 0.017354033887386322, |
|
"learning_rate": 3.966864306337189e-05, |
|
"loss": 0.9938, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0875, |
|
"grad_norm": 0.014640097506344318, |
|
"learning_rate": 3.9660434279759536e-05, |
|
"loss": 1.0317, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.08819444444444445, |
|
"grad_norm": 0.014592519961297512, |
|
"learning_rate": 3.965212592767751e-05, |
|
"loss": 1.002, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 0.01544480212032795, |
|
"learning_rate": 3.964371804920269e-05, |
|
"loss": 0.9818, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.08958333333333333, |
|
"grad_norm": 0.016679253429174423, |
|
"learning_rate": 3.9635210686916016e-05, |
|
"loss": 0.9963, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09027777777777778, |
|
"grad_norm": 0.015196239575743675, |
|
"learning_rate": 3.962660388390224e-05, |
|
"loss": 1.017, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09097222222222222, |
|
"grad_norm": 0.01522803958505392, |
|
"learning_rate": 3.9617897683749726e-05, |
|
"loss": 1.0141, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09166666666666666, |
|
"grad_norm": 0.014725332148373127, |
|
"learning_rate": 3.960909213055023e-05, |
|
"loss": 1.0029, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09236111111111112, |
|
"grad_norm": 0.015328467823565006, |
|
"learning_rate": 3.960018726889868e-05, |
|
"loss": 1.0228, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09305555555555556, |
|
"grad_norm": 0.014145839028060436, |
|
"learning_rate": 3.959118314389291e-05, |
|
"loss": 0.9709, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 0.015266870148479939, |
|
"learning_rate": 3.958207980113351e-05, |
|
"loss": 0.9938, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09444444444444444, |
|
"grad_norm": 0.014660796150565147, |
|
"learning_rate": 3.957287728672352e-05, |
|
"loss": 0.9958, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09513888888888888, |
|
"grad_norm": 0.015213273465633392, |
|
"learning_rate": 3.956357564726822e-05, |
|
"loss": 1.011, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09583333333333334, |
|
"grad_norm": 0.015523552894592285, |
|
"learning_rate": 3.955417492987493e-05, |
|
"loss": 1.0297, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.09652777777777778, |
|
"grad_norm": 0.013940747827291489, |
|
"learning_rate": 3.95446751821527e-05, |
|
"loss": 1.0099, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.09722222222222222, |
|
"grad_norm": 0.015171276405453682, |
|
"learning_rate": 3.9535076452212156e-05, |
|
"loss": 0.979, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09791666666666667, |
|
"grad_norm": 0.014734701253473759, |
|
"learning_rate": 3.952537878866517e-05, |
|
"loss": 1.0203, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.09861111111111111, |
|
"grad_norm": 0.014895283617079258, |
|
"learning_rate": 3.951558224062469e-05, |
|
"loss": 0.9803, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.09930555555555555, |
|
"grad_norm": 0.015903517603874207, |
|
"learning_rate": 3.9505686857704416e-05, |
|
"loss": 1.0109, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.018048502504825592, |
|
"learning_rate": 3.949569269001861e-05, |
|
"loss": 1.0263, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10069444444444445, |
|
"grad_norm": 0.014940551482141018, |
|
"learning_rate": 3.948559978818184e-05, |
|
"loss": 1.0167, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10138888888888889, |
|
"grad_norm": 0.018485594540834427, |
|
"learning_rate": 3.947540820330867e-05, |
|
"loss": 1.021, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10208333333333333, |
|
"grad_norm": 0.015115809626877308, |
|
"learning_rate": 3.9465117987013445e-05, |
|
"loss": 1.0245, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10277777777777777, |
|
"grad_norm": 0.016594298183918, |
|
"learning_rate": 3.9454729191410025e-05, |
|
"loss": 0.9967, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10347222222222222, |
|
"grad_norm": 0.015151984058320522, |
|
"learning_rate": 3.944424186911152e-05, |
|
"loss": 0.9781, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 0.016983836889266968, |
|
"learning_rate": 3.943365607323001e-05, |
|
"loss": 0.975, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10486111111111111, |
|
"grad_norm": 0.015473946928977966, |
|
"learning_rate": 3.9422971857376296e-05, |
|
"loss": 1.0259, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.10555555555555556, |
|
"grad_norm": 0.014182898215949535, |
|
"learning_rate": 3.941218927565959e-05, |
|
"loss": 1.0058, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10625, |
|
"grad_norm": 0.014923619106411934, |
|
"learning_rate": 3.9401308382687324e-05, |
|
"loss": 0.9821, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.10694444444444444, |
|
"grad_norm": 0.015497357584536076, |
|
"learning_rate": 3.9390329233564755e-05, |
|
"loss": 1.0599, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1076388888888889, |
|
"grad_norm": 0.01431487686932087, |
|
"learning_rate": 3.9379251883894795e-05, |
|
"loss": 1.0193, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.10833333333333334, |
|
"grad_norm": 0.014660484157502651, |
|
"learning_rate": 3.9368076389777655e-05, |
|
"loss": 0.9527, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.10902777777777778, |
|
"grad_norm": 0.01643138937652111, |
|
"learning_rate": 3.9356802807810613e-05, |
|
"loss": 1.0134, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.10972222222222222, |
|
"grad_norm": 0.016061700880527496, |
|
"learning_rate": 3.934543119508769e-05, |
|
"loss": 1.0064, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11041666666666666, |
|
"grad_norm": 0.01461365632712841, |
|
"learning_rate": 3.933396160919938e-05, |
|
"loss": 0.9953, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 0.01574275828897953, |
|
"learning_rate": 3.932239410823233e-05, |
|
"loss": 1.0223, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11180555555555556, |
|
"grad_norm": 0.015788385644555092, |
|
"learning_rate": 3.931072875076912e-05, |
|
"loss": 0.9739, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1125, |
|
"grad_norm": 0.014799153432250023, |
|
"learning_rate": 3.929896559588786e-05, |
|
"loss": 1.0028, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11319444444444444, |
|
"grad_norm": 0.013463880866765976, |
|
"learning_rate": 3.9287104703162005e-05, |
|
"loss": 0.9771, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11388888888888889, |
|
"grad_norm": 0.014836735092103481, |
|
"learning_rate": 3.927514613265992e-05, |
|
"loss": 1.0159, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11458333333333333, |
|
"grad_norm": 0.013634877279400826, |
|
"learning_rate": 3.9263089944944715e-05, |
|
"loss": 1.0431, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11527777777777778, |
|
"grad_norm": 0.013801947236061096, |
|
"learning_rate": 3.925093620107384e-05, |
|
"loss": 0.9892, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.11597222222222223, |
|
"grad_norm": 0.015041469596326351, |
|
"learning_rate": 3.923868496259882e-05, |
|
"loss": 1.018, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.11666666666666667, |
|
"grad_norm": 0.013934548944234848, |
|
"learning_rate": 3.9226336291564935e-05, |
|
"loss": 0.9945, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.11736111111111111, |
|
"grad_norm": 0.014555184170603752, |
|
"learning_rate": 3.92138902505109e-05, |
|
"loss": 1.0101, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.11805555555555555, |
|
"grad_norm": 0.014088758267462254, |
|
"learning_rate": 3.920134690246854e-05, |
|
"loss": 0.9859, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11875, |
|
"grad_norm": 0.013208562508225441, |
|
"learning_rate": 3.91887063109625e-05, |
|
"loss": 0.9752, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.11944444444444445, |
|
"grad_norm": 0.015783201903104782, |
|
"learning_rate": 3.91759685400099e-05, |
|
"loss": 1.0432, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12013888888888889, |
|
"grad_norm": 0.014122221618890762, |
|
"learning_rate": 3.916313365412002e-05, |
|
"loss": 1.0079, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12083333333333333, |
|
"grad_norm": 0.014722524210810661, |
|
"learning_rate": 3.915020171829395e-05, |
|
"loss": 1.0195, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12152777777777778, |
|
"grad_norm": 0.016117624938488007, |
|
"learning_rate": 3.9137172798024304e-05, |
|
"loss": 0.9712, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12222222222222222, |
|
"grad_norm": 0.015223621390759945, |
|
"learning_rate": 3.912404695929486e-05, |
|
"loss": 1.0116, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12291666666666666, |
|
"grad_norm": 0.013804764486849308, |
|
"learning_rate": 3.9110824268580206e-05, |
|
"loss": 0.929, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12361111111111112, |
|
"grad_norm": 0.014369015581905842, |
|
"learning_rate": 3.909750479284548e-05, |
|
"loss": 0.983, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12430555555555556, |
|
"grad_norm": 0.015310019254684448, |
|
"learning_rate": 3.908408859954593e-05, |
|
"loss": 0.9962, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.015282726846635342, |
|
"learning_rate": 3.907057575662663e-05, |
|
"loss": 1.0367, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12569444444444444, |
|
"grad_norm": 0.013963599689304829, |
|
"learning_rate": 3.905696633252216e-05, |
|
"loss": 0.9506, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.12638888888888888, |
|
"grad_norm": 0.015924159437417984, |
|
"learning_rate": 3.904326039615618e-05, |
|
"loss": 0.9647, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.12708333333333333, |
|
"grad_norm": 0.01227070577442646, |
|
"learning_rate": 3.902945801694117e-05, |
|
"loss": 0.9613, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.12777777777777777, |
|
"grad_norm": 0.014005225151777267, |
|
"learning_rate": 3.9015559264777994e-05, |
|
"loss": 0.9774, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1284722222222222, |
|
"grad_norm": 0.014159591868519783, |
|
"learning_rate": 3.9001564210055624e-05, |
|
"loss": 0.9962, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.12916666666666668, |
|
"grad_norm": 0.013430262915790081, |
|
"learning_rate": 3.898747292365073e-05, |
|
"loss": 0.9928, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.12986111111111112, |
|
"grad_norm": 0.012756455689668655, |
|
"learning_rate": 3.897328547692735e-05, |
|
"loss": 0.9652, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13055555555555556, |
|
"grad_norm": 0.015474308282136917, |
|
"learning_rate": 3.89590019417365e-05, |
|
"loss": 1.0134, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.13125, |
|
"grad_norm": 0.013962026685476303, |
|
"learning_rate": 3.8944622390415835e-05, |
|
"loss": 1.0008, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.13194444444444445, |
|
"grad_norm": 0.013678076677024364, |
|
"learning_rate": 3.893014689578928e-05, |
|
"loss": 0.9865, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1326388888888889, |
|
"grad_norm": 0.01606924645602703, |
|
"learning_rate": 3.891557553116665e-05, |
|
"loss": 1.0097, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.014233720488846302, |
|
"learning_rate": 3.89009083703433e-05, |
|
"loss": 1.0165, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13402777777777777, |
|
"grad_norm": 0.019174130633473396, |
|
"learning_rate": 3.888614548759971e-05, |
|
"loss": 1.0029, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.13472222222222222, |
|
"grad_norm": 0.014648901298642159, |
|
"learning_rate": 3.8871286957701146e-05, |
|
"loss": 0.972, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.13541666666666666, |
|
"grad_norm": 0.015734290704131126, |
|
"learning_rate": 3.8856332855897286e-05, |
|
"loss": 0.9562, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1361111111111111, |
|
"grad_norm": 0.014715912751853466, |
|
"learning_rate": 3.8841283257921794e-05, |
|
"loss": 0.9801, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.13680555555555557, |
|
"grad_norm": 0.0156076829880476, |
|
"learning_rate": 3.882613823999199e-05, |
|
"loss": 1.0438, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1375, |
|
"grad_norm": 0.014719419181346893, |
|
"learning_rate": 3.881089787880843e-05, |
|
"loss": 1.0208, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.13819444444444445, |
|
"grad_norm": 0.015471681021153927, |
|
"learning_rate": 3.879556225155453e-05, |
|
"loss": 0.9728, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 0.013889987021684647, |
|
"learning_rate": 3.878013143589617e-05, |
|
"loss": 0.9525, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13958333333333334, |
|
"grad_norm": 0.014045029878616333, |
|
"learning_rate": 3.87646055099813e-05, |
|
"loss": 0.9568, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.14027777777777778, |
|
"grad_norm": 0.015043736435472965, |
|
"learning_rate": 3.874898455243955e-05, |
|
"loss": 0.9852, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.14097222222222222, |
|
"grad_norm": 0.014233953319489956, |
|
"learning_rate": 3.873326864238183e-05, |
|
"loss": 1.0112, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.14166666666666666, |
|
"grad_norm": 0.014034410007297993, |
|
"learning_rate": 3.871745785939993e-05, |
|
"loss": 0.959, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1423611111111111, |
|
"grad_norm": 0.013695859350264072, |
|
"learning_rate": 3.870155228356611e-05, |
|
"loss": 0.9657, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14305555555555555, |
|
"grad_norm": 0.01533182617276907, |
|
"learning_rate": 3.8685551995432713e-05, |
|
"loss": 1.0051, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14375, |
|
"grad_norm": 0.014852087013423443, |
|
"learning_rate": 3.866945707603172e-05, |
|
"loss": 1.0207, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.14444444444444443, |
|
"grad_norm": 0.014271554537117481, |
|
"learning_rate": 3.8653267606874395e-05, |
|
"loss": 1.0129, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1451388888888889, |
|
"grad_norm": 0.013892865739762783, |
|
"learning_rate": 3.863698366995079e-05, |
|
"loss": 1.0039, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 0.01436774991452694, |
|
"learning_rate": 3.862060534772945e-05, |
|
"loss": 0.9811, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14652777777777778, |
|
"grad_norm": 0.014014553278684616, |
|
"learning_rate": 3.8604132723156865e-05, |
|
"loss": 0.9575, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.14722222222222223, |
|
"grad_norm": 0.012928716838359833, |
|
"learning_rate": 3.858756587965714e-05, |
|
"loss": 0.9725, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.14791666666666667, |
|
"grad_norm": 0.014141298830509186, |
|
"learning_rate": 3.8570904901131544e-05, |
|
"loss": 0.9553, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1486111111111111, |
|
"grad_norm": 0.014526846818625927, |
|
"learning_rate": 3.855414987195807e-05, |
|
"loss": 0.9812, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.14930555555555555, |
|
"grad_norm": 0.012949762865900993, |
|
"learning_rate": 3.853730087699103e-05, |
|
"loss": 0.9676, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.013820677995681763, |
|
"learning_rate": 3.852035800156062e-05, |
|
"loss": 1.0086, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15069444444444444, |
|
"grad_norm": 0.013385163620114326, |
|
"learning_rate": 3.850332133147248e-05, |
|
"loss": 0.9818, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.15138888888888888, |
|
"grad_norm": 0.012220976874232292, |
|
"learning_rate": 3.848619095300726e-05, |
|
"loss": 0.96, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15208333333333332, |
|
"grad_norm": 0.01417449489235878, |
|
"learning_rate": 3.8468966952920205e-05, |
|
"loss": 1.0152, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1527777777777778, |
|
"grad_norm": 0.013405581004917622, |
|
"learning_rate": 3.845164941844068e-05, |
|
"loss": 1.0077, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15347222222222223, |
|
"grad_norm": 0.01379991602152586, |
|
"learning_rate": 3.843423843727176e-05, |
|
"loss": 0.9917, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.15416666666666667, |
|
"grad_norm": 0.014063477516174316, |
|
"learning_rate": 3.8416734097589756e-05, |
|
"loss": 0.9679, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.15486111111111112, |
|
"grad_norm": 0.012324227020144463, |
|
"learning_rate": 3.83991364880438e-05, |
|
"loss": 0.9574, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.15555555555555556, |
|
"grad_norm": 0.01431061141192913, |
|
"learning_rate": 3.8381445697755365e-05, |
|
"loss": 0.9967, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.014298739843070507, |
|
"learning_rate": 3.836366181631785e-05, |
|
"loss": 0.9815, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15694444444444444, |
|
"grad_norm": 0.012113712728023529, |
|
"learning_rate": 3.8345784933796095e-05, |
|
"loss": 0.9337, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.15763888888888888, |
|
"grad_norm": 0.015284246765077114, |
|
"learning_rate": 3.832781514072593e-05, |
|
"loss": 1.0051, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.15833333333333333, |
|
"grad_norm": 0.012948422692716122, |
|
"learning_rate": 3.8309752528113725e-05, |
|
"loss": 0.9554, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.15902777777777777, |
|
"grad_norm": 0.014634348452091217, |
|
"learning_rate": 3.8291597187435926e-05, |
|
"loss": 0.9814, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1597222222222222, |
|
"grad_norm": 0.014931918121874332, |
|
"learning_rate": 3.827334921063861e-05, |
|
"loss": 1.0053, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16041666666666668, |
|
"grad_norm": 0.012998196296393871, |
|
"learning_rate": 3.825500869013697e-05, |
|
"loss": 0.9781, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.16111111111111112, |
|
"grad_norm": 0.015014944598078728, |
|
"learning_rate": 3.82365757188149e-05, |
|
"loss": 1.014, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.16180555555555556, |
|
"grad_norm": 0.015584563836455345, |
|
"learning_rate": 3.821805039002449e-05, |
|
"loss": 0.9788, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1625, |
|
"grad_norm": 0.013651632703840733, |
|
"learning_rate": 3.8199432797585576e-05, |
|
"loss": 0.9905, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16319444444444445, |
|
"grad_norm": 0.015139803290367126, |
|
"learning_rate": 3.8180723035785246e-05, |
|
"loss": 1.0362, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1638888888888889, |
|
"grad_norm": 0.014169261790812016, |
|
"learning_rate": 3.816192119937738e-05, |
|
"loss": 0.9688, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.16458333333333333, |
|
"grad_norm": 0.013432355597615242, |
|
"learning_rate": 3.814302738358216e-05, |
|
"loss": 0.955, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.16527777777777777, |
|
"grad_norm": 0.012756401672959328, |
|
"learning_rate": 3.812404168408558e-05, |
|
"loss": 0.9817, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.16597222222222222, |
|
"grad_norm": 0.012755703181028366, |
|
"learning_rate": 3.810496419703898e-05, |
|
"loss": 0.9785, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.014174016192555428, |
|
"learning_rate": 3.808579501905856e-05, |
|
"loss": 0.9704, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1673611111111111, |
|
"grad_norm": 0.013185903429985046, |
|
"learning_rate": 3.806653424722484e-05, |
|
"loss": 0.9805, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.16805555555555557, |
|
"grad_norm": 0.014006533659994602, |
|
"learning_rate": 3.804718197908225e-05, |
|
"loss": 0.9846, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.16875, |
|
"grad_norm": 0.01497966144233942, |
|
"learning_rate": 3.802773831263859e-05, |
|
"loss": 0.9697, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.16944444444444445, |
|
"grad_norm": 0.0131217110902071, |
|
"learning_rate": 3.800820334636452e-05, |
|
"loss": 1.0037, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1701388888888889, |
|
"grad_norm": 0.012915054336190224, |
|
"learning_rate": 3.798857717919308e-05, |
|
"loss": 0.9489, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17083333333333334, |
|
"grad_norm": 0.013334513641893864, |
|
"learning_rate": 3.7968859910519215e-05, |
|
"loss": 0.9891, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.17152777777777778, |
|
"grad_norm": 0.014228935353457928, |
|
"learning_rate": 3.7949051640199216e-05, |
|
"loss": 1.0013, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17222222222222222, |
|
"grad_norm": 0.013772614300251007, |
|
"learning_rate": 3.7929152468550245e-05, |
|
"loss": 1.0042, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.17291666666666666, |
|
"grad_norm": 0.01312232669442892, |
|
"learning_rate": 3.790916249634986e-05, |
|
"loss": 0.9374, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.1736111111111111, |
|
"grad_norm": 0.014259965158998966, |
|
"learning_rate": 3.788908182483542e-05, |
|
"loss": 0.9432, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17430555555555555, |
|
"grad_norm": 0.014950071461498737, |
|
"learning_rate": 3.786891055570367e-05, |
|
"loss": 0.9674, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 0.014557240530848503, |
|
"learning_rate": 3.784864879111013e-05, |
|
"loss": 0.9811, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.17569444444444443, |
|
"grad_norm": 0.011923140846192837, |
|
"learning_rate": 3.782829663366868e-05, |
|
"loss": 0.9773, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1763888888888889, |
|
"grad_norm": 0.012792705558240414, |
|
"learning_rate": 3.780785418645095e-05, |
|
"loss": 0.9635, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.17708333333333334, |
|
"grad_norm": 0.01456011924892664, |
|
"learning_rate": 3.7787321552985826e-05, |
|
"loss": 0.9271, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.014588495716452599, |
|
"learning_rate": 3.776669883725897e-05, |
|
"loss": 0.9818, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.17847222222222223, |
|
"grad_norm": 0.013889756053686142, |
|
"learning_rate": 3.774598614371223e-05, |
|
"loss": 0.9894, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.17916666666666667, |
|
"grad_norm": 0.014536023139953613, |
|
"learning_rate": 3.7725183577243144e-05, |
|
"loss": 0.9675, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1798611111111111, |
|
"grad_norm": 0.01632644794881344, |
|
"learning_rate": 3.7704291243204416e-05, |
|
"loss": 1.0065, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18055555555555555, |
|
"grad_norm": 0.013067888095974922, |
|
"learning_rate": 3.7683309247403345e-05, |
|
"loss": 1.0002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18125, |
|
"grad_norm": 0.01566268689930439, |
|
"learning_rate": 3.766223769610134e-05, |
|
"loss": 0.9552, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.18194444444444444, |
|
"grad_norm": 0.01552093680948019, |
|
"learning_rate": 3.764107669601333e-05, |
|
"loss": 1.0219, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.18263888888888888, |
|
"grad_norm": 0.013710713014006615, |
|
"learning_rate": 3.761982635430729e-05, |
|
"loss": 0.9559, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.18333333333333332, |
|
"grad_norm": 0.013827464543282986, |
|
"learning_rate": 3.759848677860359e-05, |
|
"loss": 0.9741, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.1840277777777778, |
|
"grad_norm": 0.01393877249211073, |
|
"learning_rate": 3.7577058076974595e-05, |
|
"loss": 0.9841, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.18472222222222223, |
|
"grad_norm": 0.014325949363410473, |
|
"learning_rate": 3.755554035794398e-05, |
|
"loss": 1.0068, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.18541666666666667, |
|
"grad_norm": 0.013421372510492802, |
|
"learning_rate": 3.7533933730486276e-05, |
|
"loss": 0.9409, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.18611111111111112, |
|
"grad_norm": 0.015207280404865742, |
|
"learning_rate": 3.751223830402627e-05, |
|
"loss": 1.0078, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.18680555555555556, |
|
"grad_norm": 0.015149595215916634, |
|
"learning_rate": 3.749045418843844e-05, |
|
"loss": 0.9989, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 0.012723335064947605, |
|
"learning_rate": 3.746858149404648e-05, |
|
"loss": 0.9656, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18819444444444444, |
|
"grad_norm": 0.01434493437409401, |
|
"learning_rate": 3.744662033162262e-05, |
|
"loss": 0.9725, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.18888888888888888, |
|
"grad_norm": 0.01342215109616518, |
|
"learning_rate": 3.742457081238717e-05, |
|
"loss": 0.9555, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.18958333333333333, |
|
"grad_norm": 0.012995844706892967, |
|
"learning_rate": 3.740243304800791e-05, |
|
"loss": 0.9775, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19027777777777777, |
|
"grad_norm": 0.01189506333321333, |
|
"learning_rate": 3.738020715059951e-05, |
|
"loss": 0.9443, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.1909722222222222, |
|
"grad_norm": 0.013390977866947651, |
|
"learning_rate": 3.7357893232722984e-05, |
|
"loss": 0.9675, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19166666666666668, |
|
"grad_norm": 0.01335330493748188, |
|
"learning_rate": 3.733549140738514e-05, |
|
"loss": 0.9596, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.19236111111111112, |
|
"grad_norm": 0.012211819179356098, |
|
"learning_rate": 3.731300178803797e-05, |
|
"loss": 0.9413, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.19305555555555556, |
|
"grad_norm": 0.013362012803554535, |
|
"learning_rate": 3.7290424488578094e-05, |
|
"loss": 0.9622, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.19375, |
|
"grad_norm": 0.013488608412444592, |
|
"learning_rate": 3.726775962334617e-05, |
|
"loss": 1.0322, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.19444444444444445, |
|
"grad_norm": 0.013000058010220528, |
|
"learning_rate": 3.7245007307126346e-05, |
|
"loss": 0.9966, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1951388888888889, |
|
"grad_norm": 0.014398259110748768, |
|
"learning_rate": 3.7222167655145636e-05, |
|
"loss": 0.9775, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.19583333333333333, |
|
"grad_norm": 0.012063449248671532, |
|
"learning_rate": 3.7199240783073365e-05, |
|
"loss": 0.9369, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.19652777777777777, |
|
"grad_norm": 0.012797530740499496, |
|
"learning_rate": 3.717622680702058e-05, |
|
"loss": 0.9591, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.19722222222222222, |
|
"grad_norm": 0.012506108731031418, |
|
"learning_rate": 3.7153125843539455e-05, |
|
"loss": 0.9781, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.19791666666666666, |
|
"grad_norm": 0.013660159893333912, |
|
"learning_rate": 3.712993800962271e-05, |
|
"loss": 0.9994, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1986111111111111, |
|
"grad_norm": 0.013593221083283424, |
|
"learning_rate": 3.7106663422703004e-05, |
|
"loss": 0.9945, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.19930555555555557, |
|
"grad_norm": 0.013533521443605423, |
|
"learning_rate": 3.708330220065235e-05, |
|
"loss": 1.0033, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.015442555770277977, |
|
"learning_rate": 3.7059854461781514e-05, |
|
"loss": 0.9829, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.20069444444444445, |
|
"grad_norm": 0.014286418445408344, |
|
"learning_rate": 3.7036320324839444e-05, |
|
"loss": 0.9674, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2013888888888889, |
|
"grad_norm": 0.013105183839797974, |
|
"learning_rate": 3.70126999090126e-05, |
|
"loss": 0.9562, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.20208333333333334, |
|
"grad_norm": 0.012748828157782555, |
|
"learning_rate": 3.698899333392442e-05, |
|
"loss": 0.9385, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.20277777777777778, |
|
"grad_norm": 0.013904962688684464, |
|
"learning_rate": 3.696520071963469e-05, |
|
"loss": 0.9769, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.20347222222222222, |
|
"grad_norm": 0.013301237486302853, |
|
"learning_rate": 3.6941322186638924e-05, |
|
"loss": 0.9582, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.20416666666666666, |
|
"grad_norm": 0.013818389736115932, |
|
"learning_rate": 3.691735785586777e-05, |
|
"loss": 0.9992, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2048611111111111, |
|
"grad_norm": 0.018090086057782173, |
|
"learning_rate": 3.6893307848686376e-05, |
|
"loss": 1.0265, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.20555555555555555, |
|
"grad_norm": 0.011723886243999004, |
|
"learning_rate": 3.686917228689382e-05, |
|
"loss": 0.9447, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.20625, |
|
"grad_norm": 0.013388545252382755, |
|
"learning_rate": 3.684495129272242e-05, |
|
"loss": 0.9725, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.20694444444444443, |
|
"grad_norm": 0.013546598143875599, |
|
"learning_rate": 3.682064498883721e-05, |
|
"loss": 0.9867, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2076388888888889, |
|
"grad_norm": 0.01278660912066698, |
|
"learning_rate": 3.679625349833523e-05, |
|
"loss": 0.9533, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 0.013434414751827717, |
|
"learning_rate": 3.677177694474495e-05, |
|
"loss": 0.9587, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20902777777777778, |
|
"grad_norm": 0.013443008065223694, |
|
"learning_rate": 3.674721545202563e-05, |
|
"loss": 0.9758, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.20972222222222223, |
|
"grad_norm": 0.012249941006302834, |
|
"learning_rate": 3.672256914456671e-05, |
|
"loss": 0.9391, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.21041666666666667, |
|
"grad_norm": 0.013929427601397038, |
|
"learning_rate": 3.669783814718716e-05, |
|
"loss": 0.966, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2111111111111111, |
|
"grad_norm": 0.020106054842472076, |
|
"learning_rate": 3.667302258513484e-05, |
|
"loss": 0.9916, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.21180555555555555, |
|
"grad_norm": 0.017338188365101814, |
|
"learning_rate": 3.6648122584085907e-05, |
|
"loss": 0.9905, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2125, |
|
"grad_norm": 0.016092827543616295, |
|
"learning_rate": 3.662313827014412e-05, |
|
"loss": 0.9422, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.21319444444444444, |
|
"grad_norm": 0.014228146523237228, |
|
"learning_rate": 3.659806976984026e-05, |
|
"loss": 1.0033, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.21388888888888888, |
|
"grad_norm": 0.015558873303234577, |
|
"learning_rate": 3.657291721013146e-05, |
|
"loss": 0.9775, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.21458333333333332, |
|
"grad_norm": 0.016239987686276436, |
|
"learning_rate": 3.6547680718400546e-05, |
|
"loss": 0.9863, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2152777777777778, |
|
"grad_norm": 0.014311805367469788, |
|
"learning_rate": 3.652236042245542e-05, |
|
"loss": 1.0078, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.21597222222222223, |
|
"grad_norm": 0.014693599194288254, |
|
"learning_rate": 3.64969564505284e-05, |
|
"loss": 0.9743, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.21666666666666667, |
|
"grad_norm": 0.014121411368250847, |
|
"learning_rate": 3.647146893127559e-05, |
|
"loss": 0.9712, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.21736111111111112, |
|
"grad_norm": 0.013323220424354076, |
|
"learning_rate": 3.644589799377618e-05, |
|
"loss": 0.9912, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.21805555555555556, |
|
"grad_norm": 0.014195873402059078, |
|
"learning_rate": 3.642024376753186e-05, |
|
"loss": 0.9733, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 0.01346920058131218, |
|
"learning_rate": 3.639450638246611e-05, |
|
"loss": 0.9562, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.21944444444444444, |
|
"grad_norm": 0.013232593424618244, |
|
"learning_rate": 3.636868596892353e-05, |
|
"loss": 0.9724, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.22013888888888888, |
|
"grad_norm": 0.01408706046640873, |
|
"learning_rate": 3.634278265766928e-05, |
|
"loss": 1.009, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.22083333333333333, |
|
"grad_norm": 0.013279229402542114, |
|
"learning_rate": 3.631679657988829e-05, |
|
"loss": 0.9757, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.22152777777777777, |
|
"grad_norm": 0.015104196034371853, |
|
"learning_rate": 3.6290727867184676e-05, |
|
"loss": 1.001, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.012325095012784004, |
|
"learning_rate": 3.6264576651581036e-05, |
|
"loss": 0.9758, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22291666666666668, |
|
"grad_norm": 0.013523522764444351, |
|
"learning_rate": 3.623834306551782e-05, |
|
"loss": 0.9482, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.22361111111111112, |
|
"grad_norm": 0.01278225053101778, |
|
"learning_rate": 3.6212027241852625e-05, |
|
"loss": 0.9631, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.22430555555555556, |
|
"grad_norm": 0.012586407363414764, |
|
"learning_rate": 3.618562931385952e-05, |
|
"loss": 1.0226, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 0.012502876110374928, |
|
"learning_rate": 3.6159149415228403e-05, |
|
"loss": 0.9229, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.22569444444444445, |
|
"grad_norm": 0.012781070545315742, |
|
"learning_rate": 3.613258768006429e-05, |
|
"loss": 0.9532, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2263888888888889, |
|
"grad_norm": 0.013412564061582088, |
|
"learning_rate": 3.6105944242886674e-05, |
|
"loss": 1.0037, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.22708333333333333, |
|
"grad_norm": 0.013194269500672817, |
|
"learning_rate": 3.607921923862878e-05, |
|
"loss": 0.9241, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.22777777777777777, |
|
"grad_norm": 0.013584061525762081, |
|
"learning_rate": 3.605241280263696e-05, |
|
"loss": 0.9482, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.22847222222222222, |
|
"grad_norm": 0.013534624129533768, |
|
"learning_rate": 3.6025525070669955e-05, |
|
"loss": 0.9653, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 0.015827780589461327, |
|
"learning_rate": 3.599855617889822e-05, |
|
"loss": 0.9511, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2298611111111111, |
|
"grad_norm": 0.013115919195115566, |
|
"learning_rate": 3.5971506263903234e-05, |
|
"loss": 0.9516, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.23055555555555557, |
|
"grad_norm": 0.015311370603740215, |
|
"learning_rate": 3.594437546267682e-05, |
|
"loss": 0.9543, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.23125, |
|
"grad_norm": 0.014090127311646938, |
|
"learning_rate": 3.591716391262044e-05, |
|
"loss": 0.9914, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.23194444444444445, |
|
"grad_norm": 0.013163258321583271, |
|
"learning_rate": 3.588987175154449e-05, |
|
"loss": 0.9722, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2326388888888889, |
|
"grad_norm": 0.012093394063413143, |
|
"learning_rate": 3.586249911766763e-05, |
|
"loss": 0.9758, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.23333333333333334, |
|
"grad_norm": 0.014174525626003742, |
|
"learning_rate": 3.583504614961605e-05, |
|
"loss": 0.9683, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.23402777777777778, |
|
"grad_norm": 0.014316610060632229, |
|
"learning_rate": 3.58075129864228e-05, |
|
"loss": 0.9814, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.23472222222222222, |
|
"grad_norm": 0.013283569365739822, |
|
"learning_rate": 3.5779899767527064e-05, |
|
"loss": 1.0012, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.23541666666666666, |
|
"grad_norm": 0.013545655645430088, |
|
"learning_rate": 3.575220663277346e-05, |
|
"loss": 0.9426, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2361111111111111, |
|
"grad_norm": 0.01365567371249199, |
|
"learning_rate": 3.572443372241134e-05, |
|
"loss": 0.957, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.23680555555555555, |
|
"grad_norm": 0.012532095424830914, |
|
"learning_rate": 3.569658117709406e-05, |
|
"loss": 0.9439, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2375, |
|
"grad_norm": 0.012797664850950241, |
|
"learning_rate": 3.5668649137878275e-05, |
|
"loss": 0.9805, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.23819444444444443, |
|
"grad_norm": 0.013005426153540611, |
|
"learning_rate": 3.5640637746223253e-05, |
|
"loss": 0.9515, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2388888888888889, |
|
"grad_norm": 0.013586447574198246, |
|
"learning_rate": 3.561254714399013e-05, |
|
"loss": 0.9731, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.23958333333333334, |
|
"grad_norm": 0.013848516158759594, |
|
"learning_rate": 3.5584377473441187e-05, |
|
"loss": 0.9731, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.24027777777777778, |
|
"grad_norm": 0.014170484617352486, |
|
"learning_rate": 3.5556128877239125e-05, |
|
"loss": 0.9817, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.24097222222222223, |
|
"grad_norm": 0.014107048511505127, |
|
"learning_rate": 3.552780149844639e-05, |
|
"loss": 0.9733, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.24166666666666667, |
|
"grad_norm": 0.013129886239767075, |
|
"learning_rate": 3.5499395480524394e-05, |
|
"loss": 0.9803, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2423611111111111, |
|
"grad_norm": 0.014618804678320885, |
|
"learning_rate": 3.5470910967332815e-05, |
|
"loss": 0.9941, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.24305555555555555, |
|
"grad_norm": 0.013775044120848179, |
|
"learning_rate": 3.544234810312886e-05, |
|
"loss": 0.9978, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24375, |
|
"grad_norm": 0.013130244798958302, |
|
"learning_rate": 3.541370703256654e-05, |
|
"loss": 0.9978, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.24444444444444444, |
|
"grad_norm": 0.013368207029998302, |
|
"learning_rate": 3.538498790069594e-05, |
|
"loss": 0.9875, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.24513888888888888, |
|
"grad_norm": 0.014766732230782509, |
|
"learning_rate": 3.5356190852962474e-05, |
|
"loss": 0.9446, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.24583333333333332, |
|
"grad_norm": 0.012403284199535847, |
|
"learning_rate": 3.5327316035206145e-05, |
|
"loss": 0.9305, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2465277777777778, |
|
"grad_norm": 0.013556853868067265, |
|
"learning_rate": 3.5298363593660835e-05, |
|
"loss": 1.0034, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.24722222222222223, |
|
"grad_norm": 0.013330447487533092, |
|
"learning_rate": 3.526933367495353e-05, |
|
"loss": 0.9721, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.24791666666666667, |
|
"grad_norm": 0.01214715838432312, |
|
"learning_rate": 3.5240226426103596e-05, |
|
"loss": 0.9462, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.24861111111111112, |
|
"grad_norm": 0.013268118724226952, |
|
"learning_rate": 3.521104199452203e-05, |
|
"loss": 0.9796, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.24930555555555556, |
|
"grad_norm": 0.012496302835643291, |
|
"learning_rate": 3.5181780528010715e-05, |
|
"loss": 0.9427, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.013434799388051033, |
|
"learning_rate": 3.515244217476166e-05, |
|
"loss": 0.9519, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.25069444444444444, |
|
"grad_norm": 0.012152577750384808, |
|
"learning_rate": 3.5123027083356285e-05, |
|
"loss": 0.9707, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2513888888888889, |
|
"grad_norm": 0.013102750293910503, |
|
"learning_rate": 3.509353540276462e-05, |
|
"loss": 0.9594, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2520833333333333, |
|
"grad_norm": 0.01402961928397417, |
|
"learning_rate": 3.506396728234459e-05, |
|
"loss": 0.9629, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.25277777777777777, |
|
"grad_norm": 0.013971925713121891, |
|
"learning_rate": 3.503432287184121e-05, |
|
"loss": 0.9559, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2534722222222222, |
|
"grad_norm": 0.013591865077614784, |
|
"learning_rate": 3.500460232138591e-05, |
|
"loss": 0.9919, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.25416666666666665, |
|
"grad_norm": 0.013570788316428661, |
|
"learning_rate": 3.497480578149569e-05, |
|
"loss": 0.9537, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2548611111111111, |
|
"grad_norm": 0.013885971158742905, |
|
"learning_rate": 3.4944933403072387e-05, |
|
"loss": 0.9678, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.25555555555555554, |
|
"grad_norm": 0.013096191920340061, |
|
"learning_rate": 3.491498533740193e-05, |
|
"loss": 0.9505, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.25625, |
|
"grad_norm": 0.013115983456373215, |
|
"learning_rate": 3.488496173615358e-05, |
|
"loss": 0.9467, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2569444444444444, |
|
"grad_norm": 0.01301599945873022, |
|
"learning_rate": 3.485486275137909e-05, |
|
"loss": 0.9501, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.25763888888888886, |
|
"grad_norm": 0.012697902508080006, |
|
"learning_rate": 3.482468853551202e-05, |
|
"loss": 0.9749, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.25833333333333336, |
|
"grad_norm": 0.01309168990701437, |
|
"learning_rate": 3.479443924136693e-05, |
|
"loss": 0.9791, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2590277777777778, |
|
"grad_norm": 0.012592338025569916, |
|
"learning_rate": 3.47641150221386e-05, |
|
"loss": 0.9726, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.25972222222222224, |
|
"grad_norm": 0.012716379016637802, |
|
"learning_rate": 3.473371603140125e-05, |
|
"loss": 0.9423, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2604166666666667, |
|
"grad_norm": 0.01352920476347208, |
|
"learning_rate": 3.47032424231078e-05, |
|
"loss": 0.9829, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2611111111111111, |
|
"grad_norm": 0.012491214089095592, |
|
"learning_rate": 3.4672694351589046e-05, |
|
"loss": 0.9633, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.26180555555555557, |
|
"grad_norm": 0.01319537591189146, |
|
"learning_rate": 3.46420719715529e-05, |
|
"loss": 0.9221, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2625, |
|
"grad_norm": 0.013664872385561466, |
|
"learning_rate": 3.46113754380836e-05, |
|
"loss": 0.9621, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.26319444444444445, |
|
"grad_norm": 0.01348687894642353, |
|
"learning_rate": 3.4580604906640936e-05, |
|
"loss": 0.9634, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2638888888888889, |
|
"grad_norm": 0.011980963870882988, |
|
"learning_rate": 3.454976053305943e-05, |
|
"loss": 0.9288, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.26458333333333334, |
|
"grad_norm": 0.01295421365648508, |
|
"learning_rate": 3.4518842473547614e-05, |
|
"loss": 0.9216, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2652777777777778, |
|
"grad_norm": 0.014002040959894657, |
|
"learning_rate": 3.4487850884687155e-05, |
|
"loss": 0.9781, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2659722222222222, |
|
"grad_norm": 0.012479842640459538, |
|
"learning_rate": 3.445678592343212e-05, |
|
"loss": 0.9305, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.013758037239313126, |
|
"learning_rate": 3.442564774710816e-05, |
|
"loss": 0.9732, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.2673611111111111, |
|
"grad_norm": 0.01285907905548811, |
|
"learning_rate": 3.439443651341172e-05, |
|
"loss": 0.9494, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.26805555555555555, |
|
"grad_norm": 0.014414280652999878, |
|
"learning_rate": 3.436315238040924e-05, |
|
"loss": 0.9495, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.26875, |
|
"grad_norm": 0.013502138666808605, |
|
"learning_rate": 3.4331795506536336e-05, |
|
"loss": 0.924, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.26944444444444443, |
|
"grad_norm": 0.01338834036141634, |
|
"learning_rate": 3.430036605059704e-05, |
|
"loss": 0.9467, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2701388888888889, |
|
"grad_norm": 0.014044429175555706, |
|
"learning_rate": 3.426886417176294e-05, |
|
"loss": 0.9976, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 0.013052555732429028, |
|
"learning_rate": 3.423729002957244e-05, |
|
"loss": 0.9586, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.27152777777777776, |
|
"grad_norm": 0.012639213353395462, |
|
"learning_rate": 3.4205643783929875e-05, |
|
"loss": 0.9498, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.2722222222222222, |
|
"grad_norm": 0.012982922606170177, |
|
"learning_rate": 3.417392559510475e-05, |
|
"loss": 0.9844, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.27291666666666664, |
|
"grad_norm": 0.012655354104936123, |
|
"learning_rate": 3.4142135623730954e-05, |
|
"loss": 0.948, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.27361111111111114, |
|
"grad_norm": 0.013443528674542904, |
|
"learning_rate": 3.411027403080587e-05, |
|
"loss": 0.9682, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.2743055555555556, |
|
"grad_norm": 0.013121607713401318, |
|
"learning_rate": 3.407834097768962e-05, |
|
"loss": 1.0032, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 0.011974303051829338, |
|
"learning_rate": 3.4046336626104235e-05, |
|
"loss": 0.9615, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.27569444444444446, |
|
"grad_norm": 0.013200311921536922, |
|
"learning_rate": 3.401426113813282e-05, |
|
"loss": 0.9612, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.2763888888888889, |
|
"grad_norm": 0.011763915419578552, |
|
"learning_rate": 3.398211467621875e-05, |
|
"loss": 0.9523, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.27708333333333335, |
|
"grad_norm": 0.012212570756673813, |
|
"learning_rate": 3.394989740316484e-05, |
|
"loss": 0.9482, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 0.012741866521537304, |
|
"learning_rate": 3.391760948213251e-05, |
|
"loss": 0.9787, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27847222222222223, |
|
"grad_norm": 0.013035683892667294, |
|
"learning_rate": 3.388525107664099e-05, |
|
"loss": 0.9398, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.2791666666666667, |
|
"grad_norm": 0.013569427654147148, |
|
"learning_rate": 3.3852822350566455e-05, |
|
"loss": 0.9492, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2798611111111111, |
|
"grad_norm": 0.012654770165681839, |
|
"learning_rate": 3.382032346814123e-05, |
|
"loss": 0.9762, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.28055555555555556, |
|
"grad_norm": 0.013426728546619415, |
|
"learning_rate": 3.378775459395292e-05, |
|
"loss": 0.9348, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 0.01277459692209959, |
|
"learning_rate": 3.3755115892943616e-05, |
|
"loss": 0.9698, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.28194444444444444, |
|
"grad_norm": 0.013655406422913074, |
|
"learning_rate": 3.372240753040902e-05, |
|
"loss": 0.9792, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.2826388888888889, |
|
"grad_norm": 0.01355676632374525, |
|
"learning_rate": 3.368962967199765e-05, |
|
"loss": 0.9584, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.2833333333333333, |
|
"grad_norm": 0.013229291886091232, |
|
"learning_rate": 3.3656782483709966e-05, |
|
"loss": 0.9581, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.28402777777777777, |
|
"grad_norm": 0.01382211409509182, |
|
"learning_rate": 3.3623866131897554e-05, |
|
"loss": 0.9953, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.2847222222222222, |
|
"grad_norm": 0.012580876238644123, |
|
"learning_rate": 3.3590880783262245e-05, |
|
"loss": 0.9491, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.28541666666666665, |
|
"grad_norm": 0.014332804828882217, |
|
"learning_rate": 3.3557826604855335e-05, |
|
"loss": 0.9777, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.2861111111111111, |
|
"grad_norm": 0.014996136538684368, |
|
"learning_rate": 3.3524703764076684e-05, |
|
"loss": 0.949, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.28680555555555554, |
|
"grad_norm": 0.014224053360521793, |
|
"learning_rate": 3.3491512428673877e-05, |
|
"loss": 0.983, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2875, |
|
"grad_norm": 0.014253957197070122, |
|
"learning_rate": 3.345825276674139e-05, |
|
"loss": 0.955, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.2881944444444444, |
|
"grad_norm": 0.0160871259868145, |
|
"learning_rate": 3.342492494671976e-05, |
|
"loss": 0.9732, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.28888888888888886, |
|
"grad_norm": 0.012912842445075512, |
|
"learning_rate": 3.339152913739466e-05, |
|
"loss": 0.9695, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.28958333333333336, |
|
"grad_norm": 0.013580222614109516, |
|
"learning_rate": 3.335806550789611e-05, |
|
"loss": 0.9974, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.2902777777777778, |
|
"grad_norm": 0.013716046698391438, |
|
"learning_rate": 3.332453422769762e-05, |
|
"loss": 0.9577, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.29097222222222224, |
|
"grad_norm": 0.013095886446535587, |
|
"learning_rate": 3.329093546661526e-05, |
|
"loss": 0.9624, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 0.013227414339780807, |
|
"learning_rate": 3.3257269394806894e-05, |
|
"loss": 0.9823, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2923611111111111, |
|
"grad_norm": 0.012513620778918266, |
|
"learning_rate": 3.322353618277126e-05, |
|
"loss": 0.9446, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.29305555555555557, |
|
"grad_norm": 0.01236687507480383, |
|
"learning_rate": 3.3189736001347125e-05, |
|
"loss": 0.9639, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.29375, |
|
"grad_norm": 0.013223248533904552, |
|
"learning_rate": 3.315586902171241e-05, |
|
"loss": 0.9436, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.29444444444444445, |
|
"grad_norm": 0.01373869925737381, |
|
"learning_rate": 3.3121935415383325e-05, |
|
"loss": 0.9751, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2951388888888889, |
|
"grad_norm": 0.012828570790588856, |
|
"learning_rate": 3.308793535421353e-05, |
|
"loss": 0.98, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.29583333333333334, |
|
"grad_norm": 0.011885512620210648, |
|
"learning_rate": 3.305386901039321e-05, |
|
"loss": 0.9612, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.2965277777777778, |
|
"grad_norm": 0.011671421118080616, |
|
"learning_rate": 3.301973655644825e-05, |
|
"loss": 0.9269, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.2972222222222222, |
|
"grad_norm": 0.013502037152647972, |
|
"learning_rate": 3.298553816523934e-05, |
|
"loss": 0.9404, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.29791666666666666, |
|
"grad_norm": 0.013123046606779099, |
|
"learning_rate": 3.2951274009961094e-05, |
|
"loss": 0.9812, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.2986111111111111, |
|
"grad_norm": 0.012360905297100544, |
|
"learning_rate": 3.29169442641412e-05, |
|
"loss": 0.9355, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.29930555555555555, |
|
"grad_norm": 0.01276017352938652, |
|
"learning_rate": 3.288254910163951e-05, |
|
"loss": 0.9689, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.013017778284847736, |
|
"learning_rate": 3.284808869664716e-05, |
|
"loss": 0.9716, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.30069444444444443, |
|
"grad_norm": 0.011824820190668106, |
|
"learning_rate": 3.281356322368575e-05, |
|
"loss": 0.9633, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3013888888888889, |
|
"grad_norm": 0.012792828492820263, |
|
"learning_rate": 3.277897285760635e-05, |
|
"loss": 0.9841, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3020833333333333, |
|
"grad_norm": 0.012392980977892876, |
|
"learning_rate": 3.2744317773588696e-05, |
|
"loss": 0.9172, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.30277777777777776, |
|
"grad_norm": 0.013595100492238998, |
|
"learning_rate": 3.270959814714032e-05, |
|
"loss": 0.9827, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3034722222222222, |
|
"grad_norm": 0.01175049040466547, |
|
"learning_rate": 3.267481415409557e-05, |
|
"loss": 0.9782, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.30416666666666664, |
|
"grad_norm": 0.013577048666775227, |
|
"learning_rate": 3.26399659706148e-05, |
|
"loss": 1.0069, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.30486111111111114, |
|
"grad_norm": 0.012351465411484241, |
|
"learning_rate": 3.260505377318344e-05, |
|
"loss": 0.9585, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3055555555555556, |
|
"grad_norm": 0.013745117001235485, |
|
"learning_rate": 3.257007773861113e-05, |
|
"loss": 0.9942, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.30625, |
|
"grad_norm": 0.013106070458889008, |
|
"learning_rate": 3.253503804403079e-05, |
|
"loss": 0.9419, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.30694444444444446, |
|
"grad_norm": 0.013440150767564774, |
|
"learning_rate": 3.249993486689774e-05, |
|
"loss": 0.9428, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3076388888888889, |
|
"grad_norm": 0.014590987004339695, |
|
"learning_rate": 3.246476838498881e-05, |
|
"loss": 0.9874, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.30833333333333335, |
|
"grad_norm": 0.013203262351453304, |
|
"learning_rate": 3.242953877640142e-05, |
|
"loss": 0.947, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3090277777777778, |
|
"grad_norm": 0.013087602332234383, |
|
"learning_rate": 3.2394246219552724e-05, |
|
"loss": 0.9897, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.30972222222222223, |
|
"grad_norm": 0.012205411680042744, |
|
"learning_rate": 3.2358890893178617e-05, |
|
"loss": 0.9489, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3104166666666667, |
|
"grad_norm": 0.012712490744888783, |
|
"learning_rate": 3.2323472976332926e-05, |
|
"loss": 0.9363, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 0.012987968511879444, |
|
"learning_rate": 3.228799264838645e-05, |
|
"loss": 0.9568, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.31180555555555556, |
|
"grad_norm": 0.014164082705974579, |
|
"learning_rate": 3.225245008902606e-05, |
|
"loss": 0.9515, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.014080497436225414, |
|
"learning_rate": 3.221684547825379e-05, |
|
"loss": 0.9715, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.31319444444444444, |
|
"grad_norm": 0.01266495417803526, |
|
"learning_rate": 3.218117899638594e-05, |
|
"loss": 0.9961, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3138888888888889, |
|
"grad_norm": 0.012783526442945004, |
|
"learning_rate": 3.214545082405213e-05, |
|
"loss": 0.9667, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3145833333333333, |
|
"grad_norm": 0.012183685787022114, |
|
"learning_rate": 3.210966114219444e-05, |
|
"loss": 0.9561, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.31527777777777777, |
|
"grad_norm": 0.014331048354506493, |
|
"learning_rate": 3.2073810132066414e-05, |
|
"loss": 1.0032, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3159722222222222, |
|
"grad_norm": 0.012980937026441097, |
|
"learning_rate": 3.2037897975232216e-05, |
|
"loss": 0.9567, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.31666666666666665, |
|
"grad_norm": 0.01176460087299347, |
|
"learning_rate": 3.200192485356569e-05, |
|
"loss": 0.9396, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3173611111111111, |
|
"grad_norm": 0.012090741656720638, |
|
"learning_rate": 3.1965890949249405e-05, |
|
"loss": 0.9494, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.31805555555555554, |
|
"grad_norm": 0.013988793827593327, |
|
"learning_rate": 3.192979644477378e-05, |
|
"loss": 1.0022, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.31875, |
|
"grad_norm": 0.012773574329912663, |
|
"learning_rate": 3.189364152293612e-05, |
|
"loss": 1.022, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3194444444444444, |
|
"grad_norm": 0.013159438967704773, |
|
"learning_rate": 3.185742636683972e-05, |
|
"loss": 0.9754, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.32013888888888886, |
|
"grad_norm": 0.015010225586593151, |
|
"learning_rate": 3.1821151159892924e-05, |
|
"loss": 0.9525, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.32083333333333336, |
|
"grad_norm": 0.012587225064635277, |
|
"learning_rate": 3.1784816085808196e-05, |
|
"loss": 0.9651, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3215277777777778, |
|
"grad_norm": 0.01335595827549696, |
|
"learning_rate": 3.17484213286012e-05, |
|
"loss": 0.9691, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.32222222222222224, |
|
"grad_norm": 0.012998990714550018, |
|
"learning_rate": 3.171196707258984e-05, |
|
"loss": 0.936, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3229166666666667, |
|
"grad_norm": 0.013085835613310337, |
|
"learning_rate": 3.167545350239336e-05, |
|
"loss": 0.9294, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3236111111111111, |
|
"grad_norm": 0.012886827811598778, |
|
"learning_rate": 3.16388808029314e-05, |
|
"loss": 0.9685, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.32430555555555557, |
|
"grad_norm": 0.014822276309132576, |
|
"learning_rate": 3.1602249159423054e-05, |
|
"loss": 0.9545, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 0.013084998354315758, |
|
"learning_rate": 3.1565558757385914e-05, |
|
"loss": 0.9638, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.32569444444444445, |
|
"grad_norm": 0.012661305256187916, |
|
"learning_rate": 3.152880978263517e-05, |
|
"loss": 0.9033, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3263888888888889, |
|
"grad_norm": 0.0137302465736866, |
|
"learning_rate": 3.149200242128263e-05, |
|
"loss": 0.9576, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.32708333333333334, |
|
"grad_norm": 0.01353361364454031, |
|
"learning_rate": 3.145513685973583e-05, |
|
"loss": 0.9555, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3277777777777778, |
|
"grad_norm": 0.01424756832420826, |
|
"learning_rate": 3.1418213284697e-05, |
|
"loss": 0.9583, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3284722222222222, |
|
"grad_norm": 0.013587539084255695, |
|
"learning_rate": 3.138123188316224e-05, |
|
"loss": 0.9665, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.32916666666666666, |
|
"grad_norm": 0.013397585600614548, |
|
"learning_rate": 3.1344192842420435e-05, |
|
"loss": 0.951, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3298611111111111, |
|
"grad_norm": 0.012381638400256634, |
|
"learning_rate": 3.130709635005245e-05, |
|
"loss": 0.96, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.33055555555555555, |
|
"grad_norm": 0.014178499579429626, |
|
"learning_rate": 3.1269942593930055e-05, |
|
"loss": 0.9891, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.33125, |
|
"grad_norm": 0.013224642723798752, |
|
"learning_rate": 3.123273176221506e-05, |
|
"loss": 0.9719, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.33194444444444443, |
|
"grad_norm": 0.012466056272387505, |
|
"learning_rate": 3.119546404335831e-05, |
|
"loss": 0.968, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3326388888888889, |
|
"grad_norm": 0.01483081839978695, |
|
"learning_rate": 3.115813962609874e-05, |
|
"loss": 0.9601, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.013933224603533745, |
|
"learning_rate": 3.112075869946247e-05, |
|
"loss": 0.9802, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.33402777777777776, |
|
"grad_norm": 0.013764635659754276, |
|
"learning_rate": 3.108332145276177e-05, |
|
"loss": 0.9646, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3347222222222222, |
|
"grad_norm": 0.014757770113646984, |
|
"learning_rate": 3.104582807559414e-05, |
|
"loss": 0.9592, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.33541666666666664, |
|
"grad_norm": 0.012920022010803223, |
|
"learning_rate": 3.100827875784138e-05, |
|
"loss": 0.9674, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.33611111111111114, |
|
"grad_norm": 0.013609576970338821, |
|
"learning_rate": 3.097067368966855e-05, |
|
"loss": 0.9456, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3368055555555556, |
|
"grad_norm": 0.013191001489758492, |
|
"learning_rate": 3.093301306152308e-05, |
|
"loss": 0.9568, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3375, |
|
"grad_norm": 0.013639998622238636, |
|
"learning_rate": 3.089529706413378e-05, |
|
"loss": 0.9746, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.33819444444444446, |
|
"grad_norm": 0.013085294514894485, |
|
"learning_rate": 3.085752588850986e-05, |
|
"loss": 0.9391, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3388888888888889, |
|
"grad_norm": 0.01391910295933485, |
|
"learning_rate": 3.081969972593999e-05, |
|
"loss": 0.9885, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.33958333333333335, |
|
"grad_norm": 0.013547900132834911, |
|
"learning_rate": 3.0781818767991295e-05, |
|
"loss": 0.9481, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3402777777777778, |
|
"grad_norm": 0.012131288647651672, |
|
"learning_rate": 3.074388320650843e-05, |
|
"loss": 0.9667, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.34097222222222223, |
|
"grad_norm": 0.013132079504430294, |
|
"learning_rate": 3.070589323361257e-05, |
|
"loss": 0.9915, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3416666666666667, |
|
"grad_norm": 0.014800147153437138, |
|
"learning_rate": 3.0667849041700454e-05, |
|
"loss": 0.9737, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3423611111111111, |
|
"grad_norm": 0.01190117746591568, |
|
"learning_rate": 3.062975082344341e-05, |
|
"loss": 0.9413, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.34305555555555556, |
|
"grad_norm": 0.011400418356060982, |
|
"learning_rate": 3.059159877178638e-05, |
|
"loss": 0.9453, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 0.011752789840102196, |
|
"learning_rate": 3.055339307994693e-05, |
|
"loss": 0.9832, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.34444444444444444, |
|
"grad_norm": 0.01378630194813013, |
|
"learning_rate": 3.0515133941414294e-05, |
|
"loss": 0.9243, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3451388888888889, |
|
"grad_norm": 0.012208312749862671, |
|
"learning_rate": 3.0476821549948376e-05, |
|
"loss": 0.9798, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3458333333333333, |
|
"grad_norm": 0.01983562856912613, |
|
"learning_rate": 3.0438456099578775e-05, |
|
"loss": 0.9998, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.34652777777777777, |
|
"grad_norm": 0.013442217372357845, |
|
"learning_rate": 3.0400037784603805e-05, |
|
"loss": 0.9304, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3472222222222222, |
|
"grad_norm": 0.014221866615116596, |
|
"learning_rate": 3.0361566799589498e-05, |
|
"loss": 0.9938, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1598246092800000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|