|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9959072305593453, |
|
"eval_steps": 500, |
|
"global_step": 1098, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002728512960436562, |
|
"grad_norm": 74.70013427734375, |
|
"learning_rate": 1.8181818181818183e-07, |
|
"loss": 9.0455, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005457025920873124, |
|
"grad_norm": 72.38750457763672, |
|
"learning_rate": 3.6363636363636366e-07, |
|
"loss": 9.1792, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008185538881309686, |
|
"grad_norm": 72.11083984375, |
|
"learning_rate": 5.454545454545455e-07, |
|
"loss": 9.189, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010914051841746248, |
|
"grad_norm": 70.28816986083984, |
|
"learning_rate": 7.272727272727273e-07, |
|
"loss": 9.1017, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013642564802182811, |
|
"grad_norm": 64.50697326660156, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 8.9227, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01637107776261937, |
|
"grad_norm": 63.397464752197266, |
|
"learning_rate": 1.090909090909091e-06, |
|
"loss": 8.9681, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019099590723055934, |
|
"grad_norm": 52.970638275146484, |
|
"learning_rate": 1.2727272727272728e-06, |
|
"loss": 8.4263, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.021828103683492497, |
|
"grad_norm": 53.22758483886719, |
|
"learning_rate": 1.4545454545454546e-06, |
|
"loss": 8.2943, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02455661664392906, |
|
"grad_norm": 42.63364791870117, |
|
"learning_rate": 1.6363636363636365e-06, |
|
"loss": 7.8594, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027285129604365622, |
|
"grad_norm": 44.645076751708984, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 8.0175, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030013642564802184, |
|
"grad_norm": 76.23380279541016, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 7.6523, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03274215552523874, |
|
"grad_norm": 62.988075256347656, |
|
"learning_rate": 2.181818181818182e-06, |
|
"loss": 7.4509, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03547066848567531, |
|
"grad_norm": 62.151546478271484, |
|
"learning_rate": 2.363636363636364e-06, |
|
"loss": 7.4096, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03819918144611187, |
|
"grad_norm": 66.8625717163086, |
|
"learning_rate": 2.5454545454545456e-06, |
|
"loss": 7.2394, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.040927694406548434, |
|
"grad_norm": 84.82785034179688, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 6.6904, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04365620736698499, |
|
"grad_norm": 96.20341491699219, |
|
"learning_rate": 2.9090909090909093e-06, |
|
"loss": 6.0718, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04638472032742155, |
|
"grad_norm": 104.0381088256836, |
|
"learning_rate": 3.090909090909091e-06, |
|
"loss": 5.4693, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04911323328785812, |
|
"grad_norm": 163.81932067871094, |
|
"learning_rate": 3.272727272727273e-06, |
|
"loss": 4.8046, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05184174624829468, |
|
"grad_norm": 100.85552215576172, |
|
"learning_rate": 3.454545454545455e-06, |
|
"loss": 3.8863, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.054570259208731244, |
|
"grad_norm": 94.65142822265625, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 3.2934, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0572987721691678, |
|
"grad_norm": 86.24637603759766, |
|
"learning_rate": 3.818181818181819e-06, |
|
"loss": 2.7249, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06002728512960437, |
|
"grad_norm": 69.26914978027344, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.9717, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06275579809004093, |
|
"grad_norm": 58.77188491821289, |
|
"learning_rate": 4.181818181818182e-06, |
|
"loss": 1.4078, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06548431105047749, |
|
"grad_norm": 39.297298431396484, |
|
"learning_rate": 4.363636363636364e-06, |
|
"loss": 0.9179, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06821282401091405, |
|
"grad_norm": 9.521720886230469, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.8258, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07094133697135062, |
|
"grad_norm": 7.111854553222656, |
|
"learning_rate": 4.727272727272728e-06, |
|
"loss": 0.7448, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07366984993178717, |
|
"grad_norm": 5.886301517486572, |
|
"learning_rate": 4.90909090909091e-06, |
|
"loss": 0.7132, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07639836289222374, |
|
"grad_norm": 6.183082103729248, |
|
"learning_rate": 5.090909090909091e-06, |
|
"loss": 0.6925, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0791268758526603, |
|
"grad_norm": 3.947871446609497, |
|
"learning_rate": 5.272727272727273e-06, |
|
"loss": 0.6724, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08185538881309687, |
|
"grad_norm": 3.117551565170288, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.6551, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08458390177353342, |
|
"grad_norm": 2.5701775550842285, |
|
"learning_rate": 5.636363636363636e-06, |
|
"loss": 0.6278, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08731241473396999, |
|
"grad_norm": 2.8870959281921387, |
|
"learning_rate": 5.8181818181818185e-06, |
|
"loss": 0.6113, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09004092769440655, |
|
"grad_norm": 4.249965667724609, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6114, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0927694406548431, |
|
"grad_norm": 2.521073341369629, |
|
"learning_rate": 6.181818181818182e-06, |
|
"loss": 0.5882, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09549795361527967, |
|
"grad_norm": 2.4046638011932373, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.5772, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09822646657571624, |
|
"grad_norm": 2.6150600910186768, |
|
"learning_rate": 6.545454545454546e-06, |
|
"loss": 0.5681, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1009549795361528, |
|
"grad_norm": 3.809873580932617, |
|
"learning_rate": 6.7272727272727275e-06, |
|
"loss": 0.5616, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10368349249658936, |
|
"grad_norm": 2.455195665359497, |
|
"learning_rate": 6.90909090909091e-06, |
|
"loss": 0.5595, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10641200545702592, |
|
"grad_norm": 1.6467875242233276, |
|
"learning_rate": 7.0909090909090916e-06, |
|
"loss": 0.5632, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10914051841746249, |
|
"grad_norm": 1.7783292531967163, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.5377, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11186903137789904, |
|
"grad_norm": 2.9395012855529785, |
|
"learning_rate": 7.454545454545456e-06, |
|
"loss": 0.5424, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1145975443383356, |
|
"grad_norm": 4.087269306182861, |
|
"learning_rate": 7.636363636363638e-06, |
|
"loss": 0.5374, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11732605729877217, |
|
"grad_norm": 1.7641241550445557, |
|
"learning_rate": 7.81818181818182e-06, |
|
"loss": 0.5343, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12005457025920874, |
|
"grad_norm": 1.872262954711914, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.522, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12278308321964529, |
|
"grad_norm": 2.4888625144958496, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 0.516, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12551159618008187, |
|
"grad_norm": 2.895923614501953, |
|
"learning_rate": 8.363636363636365e-06, |
|
"loss": 0.5191, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12824010914051842, |
|
"grad_norm": 2.1216979026794434, |
|
"learning_rate": 8.545454545454546e-06, |
|
"loss": 0.4997, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13096862210095497, |
|
"grad_norm": 4.029631614685059, |
|
"learning_rate": 8.727272727272728e-06, |
|
"loss": 0.5045, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13369713506139155, |
|
"grad_norm": 2.3348028659820557, |
|
"learning_rate": 8.90909090909091e-06, |
|
"loss": 0.5104, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1364256480218281, |
|
"grad_norm": 2.498213291168213, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.4862, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13915416098226466, |
|
"grad_norm": 3.932359218597412, |
|
"learning_rate": 9.272727272727273e-06, |
|
"loss": 0.4811, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14188267394270124, |
|
"grad_norm": 2.6025686264038086, |
|
"learning_rate": 9.454545454545456e-06, |
|
"loss": 0.4873, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1446111869031378, |
|
"grad_norm": 4.504275321960449, |
|
"learning_rate": 9.636363636363638e-06, |
|
"loss": 0.4705, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14733969986357434, |
|
"grad_norm": 2.8336246013641357, |
|
"learning_rate": 9.81818181818182e-06, |
|
"loss": 0.4734, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15006821282401092, |
|
"grad_norm": 3.5705838203430176, |
|
"learning_rate": 1e-05, |
|
"loss": 0.464, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15279672578444747, |
|
"grad_norm": 4.8748931884765625, |
|
"learning_rate": 1.0181818181818182e-05, |
|
"loss": 0.4677, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15552523874488403, |
|
"grad_norm": 3.273179292678833, |
|
"learning_rate": 1.0363636363636364e-05, |
|
"loss": 0.4663, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1582537517053206, |
|
"grad_norm": 2.9470977783203125, |
|
"learning_rate": 1.0545454545454546e-05, |
|
"loss": 0.4544, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16098226466575716, |
|
"grad_norm": 5.8234171867370605, |
|
"learning_rate": 1.0727272727272729e-05, |
|
"loss": 0.4569, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16371077762619374, |
|
"grad_norm": 2.682898759841919, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.4523, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1664392905866303, |
|
"grad_norm": 9.162665367126465, |
|
"learning_rate": 1.1090909090909092e-05, |
|
"loss": 0.4428, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16916780354706684, |
|
"grad_norm": 8.336901664733887, |
|
"learning_rate": 1.1272727272727272e-05, |
|
"loss": 0.4493, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17189631650750342, |
|
"grad_norm": 3.5765390396118164, |
|
"learning_rate": 1.1454545454545455e-05, |
|
"loss": 0.4405, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17462482946793997, |
|
"grad_norm": 6.3450026512146, |
|
"learning_rate": 1.1636363636363637e-05, |
|
"loss": 0.4349, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.17735334242837653, |
|
"grad_norm": 3.6071555614471436, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 0.4272, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1800818553888131, |
|
"grad_norm": 3.7100143432617188, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.4161, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18281036834924966, |
|
"grad_norm": 3.773010492324829, |
|
"learning_rate": 1.2181818181818184e-05, |
|
"loss": 0.4142, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1855388813096862, |
|
"grad_norm": 2.8710012435913086, |
|
"learning_rate": 1.2363636363636364e-05, |
|
"loss": 0.4193, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1882673942701228, |
|
"grad_norm": 1.9093347787857056, |
|
"learning_rate": 1.2545454545454547e-05, |
|
"loss": 0.417, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19099590723055934, |
|
"grad_norm": 2.574664831161499, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.4023, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1937244201909959, |
|
"grad_norm": 2.875770330429077, |
|
"learning_rate": 1.2909090909090912e-05, |
|
"loss": 0.4032, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.19645293315143247, |
|
"grad_norm": 3.4898297786712646, |
|
"learning_rate": 1.3090909090909092e-05, |
|
"loss": 0.3789, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.19918144611186903, |
|
"grad_norm": 2.9872593879699707, |
|
"learning_rate": 1.3272727272727275e-05, |
|
"loss": 0.3955, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2019099590723056, |
|
"grad_norm": 2.160285472869873, |
|
"learning_rate": 1.3454545454545455e-05, |
|
"loss": 0.3881, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.20463847203274216, |
|
"grad_norm": 2.384871006011963, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.3921, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2073669849931787, |
|
"grad_norm": 3.565929412841797, |
|
"learning_rate": 1.381818181818182e-05, |
|
"loss": 0.377, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2100954979536153, |
|
"grad_norm": 2.4952895641326904, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.3795, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.21282401091405184, |
|
"grad_norm": 4.278308868408203, |
|
"learning_rate": 1.4181818181818183e-05, |
|
"loss": 0.3807, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2155525238744884, |
|
"grad_norm": 2.777308464050293, |
|
"learning_rate": 1.4363636363636365e-05, |
|
"loss": 0.378, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.21828103683492497, |
|
"grad_norm": 2.3112456798553467, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.3726, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22100954979536153, |
|
"grad_norm": 3.0835683345794678, |
|
"learning_rate": 1.4727272727272728e-05, |
|
"loss": 0.3741, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22373806275579808, |
|
"grad_norm": 3.618603467941284, |
|
"learning_rate": 1.4909090909090911e-05, |
|
"loss": 0.3631, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.22646657571623466, |
|
"grad_norm": 4.083821773529053, |
|
"learning_rate": 1.5090909090909091e-05, |
|
"loss": 0.3708, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2291950886766712, |
|
"grad_norm": 1.9423736333847046, |
|
"learning_rate": 1.5272727272727276e-05, |
|
"loss": 0.3627, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23192360163710776, |
|
"grad_norm": 4.401960372924805, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.362, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.23465211459754434, |
|
"grad_norm": 3.1123781204223633, |
|
"learning_rate": 1.563636363636364e-05, |
|
"loss": 0.3578, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2373806275579809, |
|
"grad_norm": 2.0257530212402344, |
|
"learning_rate": 1.5818181818181818e-05, |
|
"loss": 0.356, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.24010914051841747, |
|
"grad_norm": 3.943086624145508, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.3503, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24283765347885403, |
|
"grad_norm": 4.66082763671875, |
|
"learning_rate": 1.6181818181818184e-05, |
|
"loss": 0.347, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.24556616643929058, |
|
"grad_norm": 2.2836050987243652, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.3516, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24829467939972716, |
|
"grad_norm": 6.001435279846191, |
|
"learning_rate": 1.6545454545454548e-05, |
|
"loss": 0.3421, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25102319236016374, |
|
"grad_norm": 3.366344451904297, |
|
"learning_rate": 1.672727272727273e-05, |
|
"loss": 0.3506, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.25375170532060026, |
|
"grad_norm": 6.4907379150390625, |
|
"learning_rate": 1.690909090909091e-05, |
|
"loss": 0.3514, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.25648021828103684, |
|
"grad_norm": 3.615786075592041, |
|
"learning_rate": 1.7090909090909092e-05, |
|
"loss": 0.3589, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2592087312414734, |
|
"grad_norm": 9.323090553283691, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 0.3548, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.26193724420190995, |
|
"grad_norm": 5.748571395874023, |
|
"learning_rate": 1.7454545454545456e-05, |
|
"loss": 0.345, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2646657571623465, |
|
"grad_norm": 6.471901893615723, |
|
"learning_rate": 1.7636363636363637e-05, |
|
"loss": 0.3495, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2673942701227831, |
|
"grad_norm": 6.451484680175781, |
|
"learning_rate": 1.781818181818182e-05, |
|
"loss": 0.3305, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.27012278308321963, |
|
"grad_norm": 5.596473693847656, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3377, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2728512960436562, |
|
"grad_norm": 4.3230695724487305, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.3288, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2755798090040928, |
|
"grad_norm": 5.818665981292725, |
|
"learning_rate": 1.8363636363636367e-05, |
|
"loss": 0.3232, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2783083219645293, |
|
"grad_norm": 4.472134113311768, |
|
"learning_rate": 1.8545454545454545e-05, |
|
"loss": 0.3373, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2810368349249659, |
|
"grad_norm": 7.769753932952881, |
|
"learning_rate": 1.872727272727273e-05, |
|
"loss": 0.3182, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2837653478854025, |
|
"grad_norm": 7.661220550537109, |
|
"learning_rate": 1.8909090909090912e-05, |
|
"loss": 0.3243, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.286493860845839, |
|
"grad_norm": 3.2343485355377197, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 0.3192, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2892223738062756, |
|
"grad_norm": 3.323853015899658, |
|
"learning_rate": 1.9272727272727275e-05, |
|
"loss": 0.3075, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.29195088676671216, |
|
"grad_norm": 5.918493270874023, |
|
"learning_rate": 1.9454545454545457e-05, |
|
"loss": 0.3103, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2946793997271487, |
|
"grad_norm": 3.1720244884490967, |
|
"learning_rate": 1.963636363636364e-05, |
|
"loss": 0.305, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.29740791268758526, |
|
"grad_norm": 6.2536821365356445, |
|
"learning_rate": 1.981818181818182e-05, |
|
"loss": 0.3066, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.30013642564802184, |
|
"grad_norm": 5.35992431640625, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3027, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.30286493860845837, |
|
"grad_norm": 5.9867262840271, |
|
"learning_rate": 1.9999949446003432e-05, |
|
"loss": 0.3101, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.30559345156889495, |
|
"grad_norm": 5.217441082000732, |
|
"learning_rate": 1.9999797784524866e-05, |
|
"loss": 0.2974, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.3083219645293315, |
|
"grad_norm": 5.573215484619141, |
|
"learning_rate": 1.9999545017097726e-05, |
|
"loss": 0.3021, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31105047748976805, |
|
"grad_norm": 5.682995796203613, |
|
"learning_rate": 1.999919114627769e-05, |
|
"loss": 0.305, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.31377899045020463, |
|
"grad_norm": 5.977804660797119, |
|
"learning_rate": 1.9998736175642674e-05, |
|
"loss": 0.3014, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3165075034106412, |
|
"grad_norm": 5.056419372558594, |
|
"learning_rate": 1.9998180109792793e-05, |
|
"loss": 0.2918, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.31923601637107774, |
|
"grad_norm": 4.191486835479736, |
|
"learning_rate": 1.999752295435032e-05, |
|
"loss": 0.2859, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3219645293315143, |
|
"grad_norm": 3.9454257488250732, |
|
"learning_rate": 1.999676471595962e-05, |
|
"loss": 0.2853, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3246930422919509, |
|
"grad_norm": 6.6568922996521, |
|
"learning_rate": 1.9995905402287094e-05, |
|
"loss": 0.2906, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3274215552523875, |
|
"grad_norm": 6.2630133628845215, |
|
"learning_rate": 1.9994945022021085e-05, |
|
"loss": 0.2945, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.330150068212824, |
|
"grad_norm": 3.6698672771453857, |
|
"learning_rate": 1.9993883584871807e-05, |
|
"loss": 0.2856, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3328785811732606, |
|
"grad_norm": 4.083488464355469, |
|
"learning_rate": 1.9992721101571238e-05, |
|
"loss": 0.2897, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.33560709413369716, |
|
"grad_norm": 5.033275127410889, |
|
"learning_rate": 1.999145758387301e-05, |
|
"loss": 0.2853, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3383356070941337, |
|
"grad_norm": 3.358428955078125, |
|
"learning_rate": 1.9990093044552304e-05, |
|
"loss": 0.2848, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.34106412005457026, |
|
"grad_norm": 6.593973159790039, |
|
"learning_rate": 1.9988627497405696e-05, |
|
"loss": 0.2868, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.34379263301500684, |
|
"grad_norm": 5.836681842803955, |
|
"learning_rate": 1.9987060957251047e-05, |
|
"loss": 0.2785, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.34652114597544337, |
|
"grad_norm": 2.7933859825134277, |
|
"learning_rate": 1.9985393439927325e-05, |
|
"loss": 0.273, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.34924965893587995, |
|
"grad_norm": 3.4475436210632324, |
|
"learning_rate": 1.998362496229446e-05, |
|
"loss": 0.2745, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3519781718963165, |
|
"grad_norm": 4.551499366760254, |
|
"learning_rate": 1.9981755542233175e-05, |
|
"loss": 0.2792, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.35470668485675305, |
|
"grad_norm": 3.666813611984253, |
|
"learning_rate": 1.997978519864481e-05, |
|
"loss": 0.2711, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35743519781718963, |
|
"grad_norm": 5.33425235748291, |
|
"learning_rate": 1.9977713951451102e-05, |
|
"loss": 0.27, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3601637107776262, |
|
"grad_norm": 4.913830280303955, |
|
"learning_rate": 1.9975541821594028e-05, |
|
"loss": 0.2732, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.36289222373806274, |
|
"grad_norm": 3.9749391078948975, |
|
"learning_rate": 1.9973268831035547e-05, |
|
"loss": 0.2693, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3656207366984993, |
|
"grad_norm": 3.055846691131592, |
|
"learning_rate": 1.9970895002757413e-05, |
|
"loss": 0.2692, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3683492496589359, |
|
"grad_norm": 6.07874059677124, |
|
"learning_rate": 1.996842036076093e-05, |
|
"loss": 0.273, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3710777626193724, |
|
"grad_norm": 4.657766819000244, |
|
"learning_rate": 1.99658449300667e-05, |
|
"loss": 0.2819, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.373806275579809, |
|
"grad_norm": 4.3276801109313965, |
|
"learning_rate": 1.9963168736714395e-05, |
|
"loss": 0.2737, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3765347885402456, |
|
"grad_norm": 4.3239569664001465, |
|
"learning_rate": 1.9960391807762462e-05, |
|
"loss": 0.268, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3792633015006821, |
|
"grad_norm": 3.855254888534546, |
|
"learning_rate": 1.9957514171287875e-05, |
|
"loss": 0.2607, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3819918144611187, |
|
"grad_norm": 3.0584146976470947, |
|
"learning_rate": 1.995453585638584e-05, |
|
"loss": 0.2652, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38472032742155526, |
|
"grad_norm": 4.666026592254639, |
|
"learning_rate": 1.9951456893169497e-05, |
|
"loss": 0.2614, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3874488403819918, |
|
"grad_norm": 3.672959566116333, |
|
"learning_rate": 1.994827731276963e-05, |
|
"loss": 0.2725, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.39017735334242837, |
|
"grad_norm": 5.299169063568115, |
|
"learning_rate": 1.994499714733434e-05, |
|
"loss": 0.2629, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.39290586630286495, |
|
"grad_norm": 4.301833629608154, |
|
"learning_rate": 1.9941616430028713e-05, |
|
"loss": 0.2597, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3956343792633015, |
|
"grad_norm": 4.388802528381348, |
|
"learning_rate": 1.993813519503451e-05, |
|
"loss": 0.2702, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.39836289222373805, |
|
"grad_norm": 3.4200570583343506, |
|
"learning_rate": 1.9934553477549795e-05, |
|
"loss": 0.2608, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.40109140518417463, |
|
"grad_norm": 6.084020614624023, |
|
"learning_rate": 1.99308713137886e-05, |
|
"loss": 0.2665, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4038199181446112, |
|
"grad_norm": 4.747646331787109, |
|
"learning_rate": 1.992708874098054e-05, |
|
"loss": 0.2605, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.40654843110504774, |
|
"grad_norm": 3.8556838035583496, |
|
"learning_rate": 1.992320579737045e-05, |
|
"loss": 0.2562, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4092769440654843, |
|
"grad_norm": 3.480994462966919, |
|
"learning_rate": 1.9919222522217998e-05, |
|
"loss": 0.2614, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4120054570259209, |
|
"grad_norm": 3.6695926189422607, |
|
"learning_rate": 1.9915138955797272e-05, |
|
"loss": 0.2593, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4147339699863574, |
|
"grad_norm": 2.5881447792053223, |
|
"learning_rate": 1.9910955139396395e-05, |
|
"loss": 0.2598, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.417462482946794, |
|
"grad_norm": 5.744887351989746, |
|
"learning_rate": 1.99066711153171e-05, |
|
"loss": 0.2553, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4201909959072306, |
|
"grad_norm": 5.279797554016113, |
|
"learning_rate": 1.990228692687429e-05, |
|
"loss": 0.2616, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4229195088676671, |
|
"grad_norm": 4.670532703399658, |
|
"learning_rate": 1.9897802618395614e-05, |
|
"loss": 0.2636, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4256480218281037, |
|
"grad_norm": 4.182051658630371, |
|
"learning_rate": 1.9893218235221016e-05, |
|
"loss": 0.2604, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.42837653478854026, |
|
"grad_norm": 3.6281673908233643, |
|
"learning_rate": 1.988853382370228e-05, |
|
"loss": 0.2571, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4311050477489768, |
|
"grad_norm": 3.440207004547119, |
|
"learning_rate": 1.988374943120254e-05, |
|
"loss": 0.2569, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.43383356070941337, |
|
"grad_norm": 4.212845802307129, |
|
"learning_rate": 1.9878865106095838e-05, |
|
"loss": 0.2482, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.43656207366984995, |
|
"grad_norm": 4.241349220275879, |
|
"learning_rate": 1.9873880897766597e-05, |
|
"loss": 0.2549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4392905866302865, |
|
"grad_norm": 3.784503221511841, |
|
"learning_rate": 1.9868796856609154e-05, |
|
"loss": 0.2641, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.44201909959072305, |
|
"grad_norm": 2.801694631576538, |
|
"learning_rate": 1.9863613034027224e-05, |
|
"loss": 0.2501, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.44474761255115963, |
|
"grad_norm": 5.349376678466797, |
|
"learning_rate": 1.9858329482433404e-05, |
|
"loss": 0.2552, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.44747612551159616, |
|
"grad_norm": 4.352358341217041, |
|
"learning_rate": 1.985294625524861e-05, |
|
"loss": 0.2534, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.45020463847203274, |
|
"grad_norm": 4.777315616607666, |
|
"learning_rate": 1.984746340690159e-05, |
|
"loss": 0.2555, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4529331514324693, |
|
"grad_norm": 4.782495498657227, |
|
"learning_rate": 1.9841880992828306e-05, |
|
"loss": 0.2544, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.45566166439290584, |
|
"grad_norm": 2.6630942821502686, |
|
"learning_rate": 1.983619906947144e-05, |
|
"loss": 0.2542, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4583901773533424, |
|
"grad_norm": 2.5042455196380615, |
|
"learning_rate": 1.9830417694279766e-05, |
|
"loss": 0.2511, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.461118690313779, |
|
"grad_norm": 4.9226579666137695, |
|
"learning_rate": 1.9824536925707622e-05, |
|
"loss": 0.2561, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4638472032742155, |
|
"grad_norm": 4.228565216064453, |
|
"learning_rate": 1.981855682321427e-05, |
|
"loss": 0.2497, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4665757162346521, |
|
"grad_norm": 4.8918280601501465, |
|
"learning_rate": 1.9812477447263324e-05, |
|
"loss": 0.2509, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4693042291950887, |
|
"grad_norm": 4.446721076965332, |
|
"learning_rate": 1.9806298859322143e-05, |
|
"loss": 0.2519, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.47203274215552526, |
|
"grad_norm": 3.0315163135528564, |
|
"learning_rate": 1.980002112186118e-05, |
|
"loss": 0.247, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4747612551159618, |
|
"grad_norm": 3.33701229095459, |
|
"learning_rate": 1.979364429835339e-05, |
|
"loss": 0.2516, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.47748976807639837, |
|
"grad_norm": 5.234941482543945, |
|
"learning_rate": 1.9787168453273546e-05, |
|
"loss": 0.2538, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.48021828103683495, |
|
"grad_norm": 3.7416799068450928, |
|
"learning_rate": 1.978059365209762e-05, |
|
"loss": 0.2578, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4829467939972715, |
|
"grad_norm": 4.0507330894470215, |
|
"learning_rate": 1.9773919961302113e-05, |
|
"loss": 0.2515, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.48567530695770805, |
|
"grad_norm": 4.139606952667236, |
|
"learning_rate": 1.9767147448363366e-05, |
|
"loss": 0.2502, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.48840381991814463, |
|
"grad_norm": 3.8178112506866455, |
|
"learning_rate": 1.9760276181756905e-05, |
|
"loss": 0.2508, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.49113233287858116, |
|
"grad_norm": 3.926447629928589, |
|
"learning_rate": 1.975330623095672e-05, |
|
"loss": 0.2475, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49386084583901774, |
|
"grad_norm": 3.354318141937256, |
|
"learning_rate": 1.9746237666434588e-05, |
|
"loss": 0.2502, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4965893587994543, |
|
"grad_norm": 3.2970614433288574, |
|
"learning_rate": 1.9739070559659347e-05, |
|
"loss": 0.2472, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.49931787175989084, |
|
"grad_norm": 4.805551052093506, |
|
"learning_rate": 1.973180498309618e-05, |
|
"loss": 0.2427, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5020463847203275, |
|
"grad_norm": 3.856684446334839, |
|
"learning_rate": 1.9724441010205865e-05, |
|
"loss": 0.2441, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.504774897680764, |
|
"grad_norm": 3.8979854583740234, |
|
"learning_rate": 1.9716978715444056e-05, |
|
"loss": 0.2463, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5075034106412005, |
|
"grad_norm": 3.709199905395508, |
|
"learning_rate": 1.9709418174260523e-05, |
|
"loss": 0.246, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5102319236016372, |
|
"grad_norm": 4.20219612121582, |
|
"learning_rate": 1.9701759463098377e-05, |
|
"loss": 0.2512, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5129604365620737, |
|
"grad_norm": 3.796937942504883, |
|
"learning_rate": 1.9694002659393306e-05, |
|
"loss": 0.2482, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5156889495225102, |
|
"grad_norm": 3.9981391429901123, |
|
"learning_rate": 1.9686147841572803e-05, |
|
"loss": 0.2359, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5184174624829468, |
|
"grad_norm": 3.802412986755371, |
|
"learning_rate": 1.9678195089055347e-05, |
|
"loss": 0.2417, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5211459754433834, |
|
"grad_norm": 4.142465114593506, |
|
"learning_rate": 1.967014448224963e-05, |
|
"loss": 0.2392, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5238744884038199, |
|
"grad_norm": 3.554514169692993, |
|
"learning_rate": 1.9661996102553716e-05, |
|
"loss": 0.2399, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5266030013642565, |
|
"grad_norm": 3.4873900413513184, |
|
"learning_rate": 1.965375003235424e-05, |
|
"loss": 0.2481, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.529331514324693, |
|
"grad_norm": 3.5542309284210205, |
|
"learning_rate": 1.9645406355025565e-05, |
|
"loss": 0.2423, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5320600272851296, |
|
"grad_norm": 3.76362943649292, |
|
"learning_rate": 1.9636965154928932e-05, |
|
"loss": 0.2383, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5347885402455662, |
|
"grad_norm": 2.883169651031494, |
|
"learning_rate": 1.9628426517411625e-05, |
|
"loss": 0.2383, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5375170532060027, |
|
"grad_norm": 3.5655651092529297, |
|
"learning_rate": 1.9619790528806092e-05, |
|
"loss": 0.2387, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5402455661664393, |
|
"grad_norm": 3.3741378784179688, |
|
"learning_rate": 1.9611057276429085e-05, |
|
"loss": 0.2444, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5429740791268759, |
|
"grad_norm": 4.598501682281494, |
|
"learning_rate": 1.9602226848580762e-05, |
|
"loss": 0.2406, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5457025920873124, |
|
"grad_norm": 4.26082181930542, |
|
"learning_rate": 1.959329933454381e-05, |
|
"loss": 0.2508, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.548431105047749, |
|
"grad_norm": 3.1739697456359863, |
|
"learning_rate": 1.958427482458253e-05, |
|
"loss": 0.242, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5511596180081856, |
|
"grad_norm": 2.8693127632141113, |
|
"learning_rate": 1.957515340994193e-05, |
|
"loss": 0.2367, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5538881309686221, |
|
"grad_norm": 4.578694820404053, |
|
"learning_rate": 1.95659351828468e-05, |
|
"loss": 0.2378, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5566166439290586, |
|
"grad_norm": 3.6820926666259766, |
|
"learning_rate": 1.9556620236500794e-05, |
|
"loss": 0.2413, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5593451568894953, |
|
"grad_norm": 4.010608196258545, |
|
"learning_rate": 1.954720866508546e-05, |
|
"loss": 0.2477, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5620736698499318, |
|
"grad_norm": 3.723583221435547, |
|
"learning_rate": 1.9537700563759303e-05, |
|
"loss": 0.2376, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5648021828103683, |
|
"grad_norm": 3.185204029083252, |
|
"learning_rate": 1.9528096028656835e-05, |
|
"loss": 0.2402, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.567530695770805, |
|
"grad_norm": 3.1054224967956543, |
|
"learning_rate": 1.9518395156887574e-05, |
|
"loss": 0.2401, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5702592087312415, |
|
"grad_norm": 4.654784202575684, |
|
"learning_rate": 1.9508598046535095e-05, |
|
"loss": 0.2515, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.572987721691678, |
|
"grad_norm": 4.25405216217041, |
|
"learning_rate": 1.949870479665602e-05, |
|
"loss": 0.2442, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5757162346521146, |
|
"grad_norm": 3.365250587463379, |
|
"learning_rate": 1.9488715507279e-05, |
|
"loss": 0.2379, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5784447476125512, |
|
"grad_norm": 2.708874464035034, |
|
"learning_rate": 1.9478630279403737e-05, |
|
"loss": 0.2289, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5811732605729877, |
|
"grad_norm": 4.695353031158447, |
|
"learning_rate": 1.9468449214999956e-05, |
|
"loss": 0.2449, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5839017735334243, |
|
"grad_norm": 3.746852159500122, |
|
"learning_rate": 1.9458172417006347e-05, |
|
"loss": 0.2339, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5866302864938608, |
|
"grad_norm": 3.6476454734802246, |
|
"learning_rate": 1.9447799989329557e-05, |
|
"loss": 0.2382, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5893587994542974, |
|
"grad_norm": 3.3392837047576904, |
|
"learning_rate": 1.943733203684312e-05, |
|
"loss": 0.2406, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.592087312414734, |
|
"grad_norm": 3.441448211669922, |
|
"learning_rate": 1.9426768665386397e-05, |
|
"loss": 0.2446, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5948158253751705, |
|
"grad_norm": 2.790771722793579, |
|
"learning_rate": 1.9416109981763526e-05, |
|
"loss": 0.2325, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.597544338335607, |
|
"grad_norm": 5.028257369995117, |
|
"learning_rate": 1.9405356093742314e-05, |
|
"loss": 0.2312, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6002728512960437, |
|
"grad_norm": 4.487533092498779, |
|
"learning_rate": 1.939450711005316e-05, |
|
"loss": 0.2465, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6030013642564802, |
|
"grad_norm": 3.343076467514038, |
|
"learning_rate": 1.9383563140387966e-05, |
|
"loss": 0.234, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6057298772169167, |
|
"grad_norm": 3.0385632514953613, |
|
"learning_rate": 1.9372524295399014e-05, |
|
"loss": 0.233, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6084583901773534, |
|
"grad_norm": 3.32714581489563, |
|
"learning_rate": 1.9361390686697847e-05, |
|
"loss": 0.2385, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6111869031377899, |
|
"grad_norm": 2.715806245803833, |
|
"learning_rate": 1.9350162426854152e-05, |
|
"loss": 0.2317, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6139154160982264, |
|
"grad_norm": 4.670190334320068, |
|
"learning_rate": 1.9338839629394606e-05, |
|
"loss": 0.2349, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.616643929058663, |
|
"grad_norm": 3.5198147296905518, |
|
"learning_rate": 1.9327422408801744e-05, |
|
"loss": 0.2249, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6193724420190996, |
|
"grad_norm": 3.903116464614868, |
|
"learning_rate": 1.9315910880512792e-05, |
|
"loss": 0.2383, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6221009549795361, |
|
"grad_norm": 3.9901745319366455, |
|
"learning_rate": 1.93043051609185e-05, |
|
"loss": 0.2412, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6248294679399727, |
|
"grad_norm": 2.8431413173675537, |
|
"learning_rate": 1.929260536736198e-05, |
|
"loss": 0.2393, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6275579809004093, |
|
"grad_norm": 3.0009732246398926, |
|
"learning_rate": 1.9280811618137486e-05, |
|
"loss": 0.2332, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6302864938608458, |
|
"grad_norm": 4.375698566436768, |
|
"learning_rate": 1.926892403248925e-05, |
|
"loss": 0.2394, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6330150068212824, |
|
"grad_norm": 3.7204086780548096, |
|
"learning_rate": 1.9256942730610268e-05, |
|
"loss": 0.2366, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.635743519781719, |
|
"grad_norm": 3.1521670818328857, |
|
"learning_rate": 1.9244867833641078e-05, |
|
"loss": 0.2355, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6384720327421555, |
|
"grad_norm": 2.801316499710083, |
|
"learning_rate": 1.9232699463668543e-05, |
|
"loss": 0.2345, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6412005457025921, |
|
"grad_norm": 4.499333381652832, |
|
"learning_rate": 1.9220437743724605e-05, |
|
"loss": 0.2311, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6439290586630286, |
|
"grad_norm": 3.6295053958892822, |
|
"learning_rate": 1.9208082797785057e-05, |
|
"loss": 0.2304, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6466575716234653, |
|
"grad_norm": 3.377734899520874, |
|
"learning_rate": 1.9195634750768276e-05, |
|
"loss": 0.2304, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6493860845839018, |
|
"grad_norm": 3.2624452114105225, |
|
"learning_rate": 1.9183093728533966e-05, |
|
"loss": 0.2275, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6521145975443383, |
|
"grad_norm": 3.3896477222442627, |
|
"learning_rate": 1.9170459857881888e-05, |
|
"loss": 0.2292, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.654843110504775, |
|
"grad_norm": 2.768524646759033, |
|
"learning_rate": 1.9157733266550577e-05, |
|
"loss": 0.2371, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6575716234652115, |
|
"grad_norm": 3.741811513900757, |
|
"learning_rate": 1.9144914083216036e-05, |
|
"loss": 0.2302, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.660300136425648, |
|
"grad_norm": 3.910012722015381, |
|
"learning_rate": 1.913200243749046e-05, |
|
"loss": 0.2306, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6630286493860846, |
|
"grad_norm": 3.4035255908966064, |
|
"learning_rate": 1.91189984599209e-05, |
|
"loss": 0.2302, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6657571623465212, |
|
"grad_norm": 2.929786205291748, |
|
"learning_rate": 1.910590228198798e-05, |
|
"loss": 0.2316, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6684856753069577, |
|
"grad_norm": 3.5022189617156982, |
|
"learning_rate": 1.9092714036104508e-05, |
|
"loss": 0.2387, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6712141882673943, |
|
"grad_norm": 2.599740982055664, |
|
"learning_rate": 1.9079433855614203e-05, |
|
"loss": 0.2284, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6739427012278308, |
|
"grad_norm": 4.416684627532959, |
|
"learning_rate": 1.9066061874790302e-05, |
|
"loss": 0.2323, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6766712141882674, |
|
"grad_norm": 4.048118591308594, |
|
"learning_rate": 1.9052598228834217e-05, |
|
"loss": 0.2318, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.679399727148704, |
|
"grad_norm": 3.1929619312286377, |
|
"learning_rate": 1.9039043053874175e-05, |
|
"loss": 0.2375, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6821282401091405, |
|
"grad_norm": 2.838665246963501, |
|
"learning_rate": 1.9025396486963827e-05, |
|
"loss": 0.2309, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.684856753069577, |
|
"grad_norm": 3.510965585708618, |
|
"learning_rate": 1.9011658666080873e-05, |
|
"loss": 0.226, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6875852660300137, |
|
"grad_norm": 3.3126542568206787, |
|
"learning_rate": 1.8997829730125662e-05, |
|
"loss": 0.2255, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6903137789904502, |
|
"grad_norm": 2.888857126235962, |
|
"learning_rate": 1.898390981891979e-05, |
|
"loss": 0.2244, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6930422919508867, |
|
"grad_norm": 3.039818286895752, |
|
"learning_rate": 1.8969899073204687e-05, |
|
"loss": 0.219, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6957708049113234, |
|
"grad_norm": 3.580467939376831, |
|
"learning_rate": 1.895579763464019e-05, |
|
"loss": 0.2317, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6984993178717599, |
|
"grad_norm": 2.6989052295684814, |
|
"learning_rate": 1.8941605645803115e-05, |
|
"loss": 0.2276, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7012278308321964, |
|
"grad_norm": 4.139655590057373, |
|
"learning_rate": 1.8927323250185815e-05, |
|
"loss": 0.2352, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.703956343792633, |
|
"grad_norm": 3.3270514011383057, |
|
"learning_rate": 1.891295059219472e-05, |
|
"loss": 0.2325, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7066848567530696, |
|
"grad_norm": 3.4230055809020996, |
|
"learning_rate": 1.88984878171489e-05, |
|
"loss": 0.2363, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7094133697135061, |
|
"grad_norm": 3.38492751121521, |
|
"learning_rate": 1.888393507127856e-05, |
|
"loss": 0.2186, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7121418826739427, |
|
"grad_norm": 3.371267080307007, |
|
"learning_rate": 1.8869292501723602e-05, |
|
"loss": 0.2287, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7148703956343793, |
|
"grad_norm": 3.399115800857544, |
|
"learning_rate": 1.8854560256532098e-05, |
|
"loss": 0.2267, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7175989085948158, |
|
"grad_norm": 3.724271774291992, |
|
"learning_rate": 1.8839738484658835e-05, |
|
"loss": 0.2264, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7203274215552524, |
|
"grad_norm": 3.2149219512939453, |
|
"learning_rate": 1.8824827335963767e-05, |
|
"loss": 0.2331, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.723055934515689, |
|
"grad_norm": 3.3156564235687256, |
|
"learning_rate": 1.8809826961210527e-05, |
|
"loss": 0.2304, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7257844474761255, |
|
"grad_norm": 2.95145845413208, |
|
"learning_rate": 1.879473751206489e-05, |
|
"loss": 0.228, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7285129604365621, |
|
"grad_norm": 4.918845176696777, |
|
"learning_rate": 1.8779559141093256e-05, |
|
"loss": 0.2338, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7312414733969986, |
|
"grad_norm": 4.510799884796143, |
|
"learning_rate": 1.876429200176108e-05, |
|
"loss": 0.2257, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7339699863574352, |
|
"grad_norm": 2.117758274078369, |
|
"learning_rate": 1.8748936248431353e-05, |
|
"loss": 0.2303, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7366984993178718, |
|
"grad_norm": 2.579289197921753, |
|
"learning_rate": 1.8733492036363007e-05, |
|
"loss": 0.2362, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7394270122783083, |
|
"grad_norm": 2.8581790924072266, |
|
"learning_rate": 1.871795952170937e-05, |
|
"loss": 0.2297, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7421555252387448, |
|
"grad_norm": 1.994896650314331, |
|
"learning_rate": 1.8702338861516587e-05, |
|
"loss": 0.2326, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7448840381991815, |
|
"grad_norm": 3.054471492767334, |
|
"learning_rate": 1.8686630213722015e-05, |
|
"loss": 0.2257, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.747612551159618, |
|
"grad_norm": 2.0378928184509277, |
|
"learning_rate": 1.867083373715264e-05, |
|
"loss": 0.2278, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7503410641200545, |
|
"grad_norm": 4.272954940795898, |
|
"learning_rate": 1.8654949591523467e-05, |
|
"loss": 0.2295, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7530695770804912, |
|
"grad_norm": 3.0166015625, |
|
"learning_rate": 1.86389779374359e-05, |
|
"loss": 0.2276, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7557980900409277, |
|
"grad_norm": 2.997028350830078, |
|
"learning_rate": 1.8622918936376133e-05, |
|
"loss": 0.227, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7585266030013642, |
|
"grad_norm": 3.245464563369751, |
|
"learning_rate": 1.8606772750713503e-05, |
|
"loss": 0.2324, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7612551159618008, |
|
"grad_norm": 3.697631597518921, |
|
"learning_rate": 1.8590539543698852e-05, |
|
"loss": 0.2284, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7639836289222374, |
|
"grad_norm": 3.2018115520477295, |
|
"learning_rate": 1.857421947946288e-05, |
|
"loss": 0.2317, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7667121418826739, |
|
"grad_norm": 2.5883381366729736, |
|
"learning_rate": 1.8557812723014476e-05, |
|
"loss": 0.2286, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7694406548431105, |
|
"grad_norm": 2.716522216796875, |
|
"learning_rate": 1.8541319440239066e-05, |
|
"loss": 0.2318, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.772169167803547, |
|
"grad_norm": 1.4050514698028564, |
|
"learning_rate": 1.8524739797896924e-05, |
|
"loss": 0.2274, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7748976807639836, |
|
"grad_norm": 3.013352394104004, |
|
"learning_rate": 1.8508073963621482e-05, |
|
"loss": 0.2234, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7776261937244202, |
|
"grad_norm": 1.8967962265014648, |
|
"learning_rate": 1.8491322105917645e-05, |
|
"loss": 0.2283, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7803547066848567, |
|
"grad_norm": 5.349843978881836, |
|
"learning_rate": 1.847448439416009e-05, |
|
"loss": 0.2304, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7830832196452933, |
|
"grad_norm": 3.769728422164917, |
|
"learning_rate": 1.845756099859154e-05, |
|
"loss": 0.235, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7858117326057299, |
|
"grad_norm": 2.657892942428589, |
|
"learning_rate": 1.8440552090321047e-05, |
|
"loss": 0.2328, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7885402455661664, |
|
"grad_norm": 2.9830169677734375, |
|
"learning_rate": 1.842345784132227e-05, |
|
"loss": 0.2344, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.791268758526603, |
|
"grad_norm": 3.0973758697509766, |
|
"learning_rate": 1.8406278424431737e-05, |
|
"loss": 0.2361, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7939972714870396, |
|
"grad_norm": 2.921966791152954, |
|
"learning_rate": 1.838901401334708e-05, |
|
"loss": 0.236, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7967257844474761, |
|
"grad_norm": 2.3755767345428467, |
|
"learning_rate": 1.8371664782625287e-05, |
|
"loss": 0.232, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7994542974079127, |
|
"grad_norm": 2.2962570190429688, |
|
"learning_rate": 1.835423090768096e-05, |
|
"loss": 0.2279, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8021828103683493, |
|
"grad_norm": 2.22548246383667, |
|
"learning_rate": 1.8336712564784506e-05, |
|
"loss": 0.2356, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8049113233287858, |
|
"grad_norm": 2.092400074005127, |
|
"learning_rate": 1.8319109931060367e-05, |
|
"loss": 0.2333, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8076398362892224, |
|
"grad_norm": 1.5649902820587158, |
|
"learning_rate": 1.8301423184485253e-05, |
|
"loss": 0.2291, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.810368349249659, |
|
"grad_norm": 3.5106163024902344, |
|
"learning_rate": 1.82836525038863e-05, |
|
"loss": 0.2312, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8130968622100955, |
|
"grad_norm": 2.0999836921691895, |
|
"learning_rate": 1.8265798068939295e-05, |
|
"loss": 0.2274, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8158253751705321, |
|
"grad_norm": 2.878789186477661, |
|
"learning_rate": 1.824786006016685e-05, |
|
"loss": 0.2202, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8185538881309686, |
|
"grad_norm": 2.4010396003723145, |
|
"learning_rate": 1.8229838658936566e-05, |
|
"loss": 0.2348, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8212824010914052, |
|
"grad_norm": 3.518007278442383, |
|
"learning_rate": 1.821173404745922e-05, |
|
"loss": 0.2347, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8240109140518418, |
|
"grad_norm": 2.7652671337127686, |
|
"learning_rate": 1.81935464087869e-05, |
|
"loss": 0.2309, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8267394270122783, |
|
"grad_norm": 2.9426369667053223, |
|
"learning_rate": 1.8175275926811173e-05, |
|
"loss": 0.2313, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8294679399727148, |
|
"grad_norm": 2.348344326019287, |
|
"learning_rate": 1.815692278626122e-05, |
|
"loss": 0.2321, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8321964529331515, |
|
"grad_norm": 2.599593162536621, |
|
"learning_rate": 1.813848717270195e-05, |
|
"loss": 0.2312, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.834924965893588, |
|
"grad_norm": 1.9701220989227295, |
|
"learning_rate": 1.8119969272532164e-05, |
|
"loss": 0.2301, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8376534788540245, |
|
"grad_norm": 1.4868361949920654, |
|
"learning_rate": 1.8101369272982633e-05, |
|
"loss": 0.2269, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8403819918144612, |
|
"grad_norm": 2.4426674842834473, |
|
"learning_rate": 1.808268736211421e-05, |
|
"loss": 0.2313, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8431105047748977, |
|
"grad_norm": 1.3095004558563232, |
|
"learning_rate": 1.806392372881596e-05, |
|
"loss": 0.2309, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8458390177353342, |
|
"grad_norm": 2.470839738845825, |
|
"learning_rate": 1.8045078562803203e-05, |
|
"loss": 0.2347, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8485675306957708, |
|
"grad_norm": 2.4233086109161377, |
|
"learning_rate": 1.8026152054615633e-05, |
|
"loss": 0.2305, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8512960436562074, |
|
"grad_norm": 1.4290592670440674, |
|
"learning_rate": 1.800714439561538e-05, |
|
"loss": 0.2353, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8540245566166439, |
|
"grad_norm": 1.4730360507965088, |
|
"learning_rate": 1.7988055777985066e-05, |
|
"loss": 0.2284, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8567530695770805, |
|
"grad_norm": 1.795615315437317, |
|
"learning_rate": 1.7968886394725876e-05, |
|
"loss": 0.2258, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.859481582537517, |
|
"grad_norm": 1.8772125244140625, |
|
"learning_rate": 1.7949636439655592e-05, |
|
"loss": 0.2246, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8622100954979536, |
|
"grad_norm": 1.611531376838684, |
|
"learning_rate": 1.793030610740665e-05, |
|
"loss": 0.2355, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8649386084583902, |
|
"grad_norm": 1.9092748165130615, |
|
"learning_rate": 1.7910895593424166e-05, |
|
"loss": 0.2287, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8676671214188267, |
|
"grad_norm": 1.8194537162780762, |
|
"learning_rate": 1.789140509396394e-05, |
|
"loss": 0.2302, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8703956343792633, |
|
"grad_norm": 3.0680384635925293, |
|
"learning_rate": 1.7871834806090502e-05, |
|
"loss": 0.2357, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8731241473396999, |
|
"grad_norm": 1.7811588048934937, |
|
"learning_rate": 1.7852184927675113e-05, |
|
"loss": 0.2322, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8758526603001364, |
|
"grad_norm": 2.6121044158935547, |
|
"learning_rate": 1.7832455657393745e-05, |
|
"loss": 0.2408, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.878581173260573, |
|
"grad_norm": 2.011620044708252, |
|
"learning_rate": 1.7812647194725093e-05, |
|
"loss": 0.2383, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8813096862210096, |
|
"grad_norm": 2.1114919185638428, |
|
"learning_rate": 1.7792759739948546e-05, |
|
"loss": 0.2349, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8840381991814461, |
|
"grad_norm": 2.098647117614746, |
|
"learning_rate": 1.777279349414217e-05, |
|
"loss": 0.2359, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8867667121418826, |
|
"grad_norm": 2.1539065837860107, |
|
"learning_rate": 1.7752748659180662e-05, |
|
"loss": 0.2375, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8894952251023193, |
|
"grad_norm": 1.4913593530654907, |
|
"learning_rate": 1.7732625437733338e-05, |
|
"loss": 0.23, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8922237380627558, |
|
"grad_norm": 2.4666330814361572, |
|
"learning_rate": 1.771242403326204e-05, |
|
"loss": 0.2284, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8949522510231923, |
|
"grad_norm": 2.010523796081543, |
|
"learning_rate": 1.7692144650019125e-05, |
|
"loss": 0.2322, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.897680763983629, |
|
"grad_norm": 2.6557087898254395, |
|
"learning_rate": 1.767178749304536e-05, |
|
"loss": 0.2346, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9004092769440655, |
|
"grad_norm": 2.222409725189209, |
|
"learning_rate": 1.765135276816787e-05, |
|
"loss": 0.2422, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.903137789904502, |
|
"grad_norm": 2.119662284851074, |
|
"learning_rate": 1.7630840681998068e-05, |
|
"loss": 0.2309, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9058663028649386, |
|
"grad_norm": 2.8383290767669678, |
|
"learning_rate": 1.7610251441929532e-05, |
|
"loss": 0.2387, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9085948158253752, |
|
"grad_norm": 1.5416494607925415, |
|
"learning_rate": 1.758958525613594e-05, |
|
"loss": 0.2316, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9113233287858117, |
|
"grad_norm": 3.2457337379455566, |
|
"learning_rate": 1.7568842333568952e-05, |
|
"loss": 0.2318, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9140518417462483, |
|
"grad_norm": 2.664722442626953, |
|
"learning_rate": 1.754802288395609e-05, |
|
"loss": 0.2369, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9167803547066848, |
|
"grad_norm": 3.379852294921875, |
|
"learning_rate": 1.7527127117798635e-05, |
|
"loss": 0.2386, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9195088676671214, |
|
"grad_norm": 2.4600136280059814, |
|
"learning_rate": 1.750615524636948e-05, |
|
"loss": 0.242, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.922237380627558, |
|
"grad_norm": 3.1573469638824463, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"loss": 0.2283, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9249658935879945, |
|
"grad_norm": 2.4754116535186768, |
|
"learning_rate": 1.7463984036632956e-05, |
|
"loss": 0.2298, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.927694406548431, |
|
"grad_norm": 4.697962760925293, |
|
"learning_rate": 1.7442785124710227e-05, |
|
"loss": 0.2349, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9304229195088677, |
|
"grad_norm": 4.781267166137695, |
|
"learning_rate": 1.742151096028076e-05, |
|
"loss": 0.2323, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9331514324693042, |
|
"grad_norm": 1.7259125709533691, |
|
"learning_rate": 1.7400161758443377e-05, |
|
"loss": 0.2335, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9358799454297408, |
|
"grad_norm": 3.0204477310180664, |
|
"learning_rate": 1.7378737735055562e-05, |
|
"loss": 0.2313, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9386084583901774, |
|
"grad_norm": 2.2070934772491455, |
|
"learning_rate": 1.735723910673132e-05, |
|
"loss": 0.2284, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9413369713506139, |
|
"grad_norm": 1.3308905363082886, |
|
"learning_rate": 1.7335666090838965e-05, |
|
"loss": 0.2295, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9440654843110505, |
|
"grad_norm": 2.530228853225708, |
|
"learning_rate": 1.7314018905498932e-05, |
|
"loss": 0.2233, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.946793997271487, |
|
"grad_norm": 1.7885509729385376, |
|
"learning_rate": 1.729229776958157e-05, |
|
"loss": 0.2296, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9495225102319236, |
|
"grad_norm": 3.9306282997131348, |
|
"learning_rate": 1.7270502902704925e-05, |
|
"loss": 0.2308, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9522510231923602, |
|
"grad_norm": 2.548013210296631, |
|
"learning_rate": 1.7248634525232523e-05, |
|
"loss": 0.2327, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9549795361527967, |
|
"grad_norm": 2.808361291885376, |
|
"learning_rate": 1.7226692858271133e-05, |
|
"loss": 0.2297, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9577080491132333, |
|
"grad_norm": 2.629054069519043, |
|
"learning_rate": 1.7204678123668556e-05, |
|
"loss": 0.2215, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9604365620736699, |
|
"grad_norm": 3.761340379714966, |
|
"learning_rate": 1.718259054401135e-05, |
|
"loss": 0.2293, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9631650750341064, |
|
"grad_norm": 3.36629581451416, |
|
"learning_rate": 1.71604303426226e-05, |
|
"loss": 0.2269, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.965893587994543, |
|
"grad_norm": 3.0692992210388184, |
|
"learning_rate": 1.7138197743559656e-05, |
|
"loss": 0.2276, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9686221009549796, |
|
"grad_norm": 2.2692136764526367, |
|
"learning_rate": 1.7115892971611864e-05, |
|
"loss": 0.2264, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9713506139154161, |
|
"grad_norm": 3.0690739154815674, |
|
"learning_rate": 1.7093516252298296e-05, |
|
"loss": 0.2242, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9740791268758526, |
|
"grad_norm": 2.2217397689819336, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.2202, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9768076398362893, |
|
"grad_norm": 3.9510974884033203, |
|
"learning_rate": 1.7048547877285078e-05, |
|
"loss": 0.2206, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9795361527967258, |
|
"grad_norm": 4.3253254890441895, |
|
"learning_rate": 1.7025956676251636e-05, |
|
"loss": 0.2294, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9822646657571623, |
|
"grad_norm": 2.0930545330047607, |
|
"learning_rate": 1.7003294437180254e-05, |
|
"loss": 0.2228, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.984993178717599, |
|
"grad_norm": 2.5518953800201416, |
|
"learning_rate": 1.6980561389204285e-05, |
|
"loss": 0.2288, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9877216916780355, |
|
"grad_norm": 1.6454943418502808, |
|
"learning_rate": 1.695775776217301e-05, |
|
"loss": 0.2165, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.990450204638472, |
|
"grad_norm": 1.9209353923797607, |
|
"learning_rate": 1.6934883786649333e-05, |
|
"loss": 0.2144, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9931787175989086, |
|
"grad_norm": 2.2938175201416016, |
|
"learning_rate": 1.6911939693907422e-05, |
|
"loss": 0.2274, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9959072305593452, |
|
"grad_norm": 2.0856289863586426, |
|
"learning_rate": 1.6888925715930396e-05, |
|
"loss": 0.2185, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9986357435197817, |
|
"grad_norm": 3.4801177978515625, |
|
"learning_rate": 1.686584208540797e-05, |
|
"loss": 0.2224, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.0013642564802183, |
|
"grad_norm": 2.19926118850708, |
|
"learning_rate": 1.68426890357341e-05, |
|
"loss": 0.2037, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.004092769440655, |
|
"grad_norm": 2.824136972427368, |
|
"learning_rate": 1.6819466801004622e-05, |
|
"loss": 0.1922, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0068212824010914, |
|
"grad_norm": 1.9411588907241821, |
|
"learning_rate": 1.6796175616014894e-05, |
|
"loss": 0.19, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.009549795361528, |
|
"grad_norm": 2.6989634037017822, |
|
"learning_rate": 1.6772815716257414e-05, |
|
"loss": 0.1906, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0122783083219646, |
|
"grad_norm": 2.432159662246704, |
|
"learning_rate": 1.6749387337919434e-05, |
|
"loss": 0.1814, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.015006821282401, |
|
"grad_norm": 1.962867259979248, |
|
"learning_rate": 1.672589071788059e-05, |
|
"loss": 0.1855, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.0177353342428377, |
|
"grad_norm": 2.1539595127105713, |
|
"learning_rate": 1.6702326093710493e-05, |
|
"loss": 0.1831, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.0204638472032743, |
|
"grad_norm": 2.6385738849639893, |
|
"learning_rate": 1.6678693703666327e-05, |
|
"loss": 0.1873, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.0231923601637107, |
|
"grad_norm": 2.0655276775360107, |
|
"learning_rate": 1.6654993786690445e-05, |
|
"loss": 0.1843, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0259208731241474, |
|
"grad_norm": 2.4418952465057373, |
|
"learning_rate": 1.6631226582407954e-05, |
|
"loss": 0.1855, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.028649386084584, |
|
"grad_norm": 2.3700835704803467, |
|
"learning_rate": 1.6607392331124282e-05, |
|
"loss": 0.1784, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.0313778990450204, |
|
"grad_norm": 2.478952646255493, |
|
"learning_rate": 1.6583491273822763e-05, |
|
"loss": 0.1858, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.034106412005457, |
|
"grad_norm": 2.238147020339966, |
|
"learning_rate": 1.6559523652162192e-05, |
|
"loss": 0.185, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.0368349249658937, |
|
"grad_norm": 2.578995943069458, |
|
"learning_rate": 1.653548970847438e-05, |
|
"loss": 0.1854, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.03956343792633, |
|
"grad_norm": 2.7153942584991455, |
|
"learning_rate": 1.651138968576171e-05, |
|
"loss": 0.1859, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.0422919508867667, |
|
"grad_norm": 1.802567720413208, |
|
"learning_rate": 1.6487223827694673e-05, |
|
"loss": 0.1844, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0450204638472034, |
|
"grad_norm": 1.6883435249328613, |
|
"learning_rate": 1.646299237860941e-05, |
|
"loss": 0.1858, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0477489768076398, |
|
"grad_norm": 2.673168897628784, |
|
"learning_rate": 1.643869558350524e-05, |
|
"loss": 0.1855, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.0504774897680764, |
|
"grad_norm": 2.126051187515259, |
|
"learning_rate": 1.6414333688042186e-05, |
|
"loss": 0.1875, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.053206002728513, |
|
"grad_norm": 2.813554048538208, |
|
"learning_rate": 1.638990693853848e-05, |
|
"loss": 0.1842, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0559345156889495, |
|
"grad_norm": 2.9675509929656982, |
|
"learning_rate": 1.6365415581968086e-05, |
|
"loss": 0.1899, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.058663028649386, |
|
"grad_norm": 1.7976346015930176, |
|
"learning_rate": 1.6340859865958193e-05, |
|
"loss": 0.1865, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0613915416098227, |
|
"grad_norm": 2.076399326324463, |
|
"learning_rate": 1.631624003878672e-05, |
|
"loss": 0.1848, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0641200545702592, |
|
"grad_norm": 2.5090761184692383, |
|
"learning_rate": 1.6291556349379794e-05, |
|
"loss": 0.1858, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0668485675306958, |
|
"grad_norm": 1.3025147914886475, |
|
"learning_rate": 1.6266809047309253e-05, |
|
"loss": 0.1876, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.0695770804911324, |
|
"grad_norm": 2.8731820583343506, |
|
"learning_rate": 1.6241998382790095e-05, |
|
"loss": 0.1841, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0723055934515688, |
|
"grad_norm": 2.52224063873291, |
|
"learning_rate": 1.6217124606677973e-05, |
|
"loss": 0.1817, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0750341064120055, |
|
"grad_norm": 2.2980759143829346, |
|
"learning_rate": 1.6192187970466646e-05, |
|
"loss": 0.1783, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.077762619372442, |
|
"grad_norm": 2.4828219413757324, |
|
"learning_rate": 1.6167188726285433e-05, |
|
"loss": 0.1835, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0804911323328785, |
|
"grad_norm": 1.777030348777771, |
|
"learning_rate": 1.6142127126896682e-05, |
|
"loss": 0.1876, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.0832196452933152, |
|
"grad_norm": 1.924090027809143, |
|
"learning_rate": 1.611700342569319e-05, |
|
"loss": 0.1844, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.0859481582537518, |
|
"grad_norm": 2.529407501220703, |
|
"learning_rate": 1.6091817876695655e-05, |
|
"loss": 0.184, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.0886766712141882, |
|
"grad_norm": 2.5993223190307617, |
|
"learning_rate": 1.606657073455012e-05, |
|
"loss": 0.1854, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0914051841746248, |
|
"grad_norm": 2.47813081741333, |
|
"learning_rate": 1.6041262254525362e-05, |
|
"loss": 0.1913, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0941336971350615, |
|
"grad_norm": 1.9955610036849976, |
|
"learning_rate": 1.601589269251035e-05, |
|
"loss": 0.1836, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.096862210095498, |
|
"grad_norm": 2.6094555854797363, |
|
"learning_rate": 1.599046230501163e-05, |
|
"loss": 0.1886, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.0995907230559345, |
|
"grad_norm": 1.9604135751724243, |
|
"learning_rate": 1.5964971349150746e-05, |
|
"loss": 0.1851, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1023192360163712, |
|
"grad_norm": 2.7384026050567627, |
|
"learning_rate": 1.593942008266164e-05, |
|
"loss": 0.1861, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.1050477489768076, |
|
"grad_norm": 3.150024890899658, |
|
"learning_rate": 1.591380876388804e-05, |
|
"loss": 0.1898, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.1077762619372442, |
|
"grad_norm": 1.3892546892166138, |
|
"learning_rate": 1.5888137651780847e-05, |
|
"loss": 0.1837, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.1105047748976808, |
|
"grad_norm": 2.159607410430908, |
|
"learning_rate": 1.5862407005895524e-05, |
|
"loss": 0.1866, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.1132332878581173, |
|
"grad_norm": 1.919472336769104, |
|
"learning_rate": 1.583661708638947e-05, |
|
"loss": 0.1885, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.115961800818554, |
|
"grad_norm": 1.2415019273757935, |
|
"learning_rate": 1.5810768154019386e-05, |
|
"loss": 0.1855, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.1186903137789905, |
|
"grad_norm": 1.8879867792129517, |
|
"learning_rate": 1.5784860470138633e-05, |
|
"loss": 0.1873, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.121418826739427, |
|
"grad_norm": 1.4997961521148682, |
|
"learning_rate": 1.5758894296694614e-05, |
|
"loss": 0.1871, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.1241473396998636, |
|
"grad_norm": 2.5156009197235107, |
|
"learning_rate": 1.573286989622609e-05, |
|
"loss": 0.183, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.1268758526603002, |
|
"grad_norm": 1.9973162412643433, |
|
"learning_rate": 1.5706787531860557e-05, |
|
"loss": 0.1795, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.1296043656207366, |
|
"grad_norm": 2.4125733375549316, |
|
"learning_rate": 1.568064746731156e-05, |
|
"loss": 0.1903, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.1323328785811733, |
|
"grad_norm": 3.0142152309417725, |
|
"learning_rate": 1.565444996687605e-05, |
|
"loss": 0.1818, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.13506139154161, |
|
"grad_norm": 1.368192434310913, |
|
"learning_rate": 1.5628195295431696e-05, |
|
"loss": 0.1858, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.1377899045020463, |
|
"grad_norm": 2.020052433013916, |
|
"learning_rate": 1.5601883718434207e-05, |
|
"loss": 0.1912, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.140518417462483, |
|
"grad_norm": 1.513268232345581, |
|
"learning_rate": 1.557551550191467e-05, |
|
"loss": 0.1843, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.1432469304229196, |
|
"grad_norm": 0.989016592502594, |
|
"learning_rate": 1.554909091247682e-05, |
|
"loss": 0.1802, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.145975443383356, |
|
"grad_norm": 1.345131754875183, |
|
"learning_rate": 1.5522610217294377e-05, |
|
"loss": 0.187, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1487039563437926, |
|
"grad_norm": 1.432015061378479, |
|
"learning_rate": 1.549607368410834e-05, |
|
"loss": 0.1799, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.1514324693042293, |
|
"grad_norm": 1.2019281387329102, |
|
"learning_rate": 1.5469481581224274e-05, |
|
"loss": 0.1881, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.1541609822646657, |
|
"grad_norm": 1.5989677906036377, |
|
"learning_rate": 1.544283417750958e-05, |
|
"loss": 0.1875, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.1568894952251023, |
|
"grad_norm": 1.4773874282836914, |
|
"learning_rate": 1.5416131742390827e-05, |
|
"loss": 0.1861, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.159618008185539, |
|
"grad_norm": 1.039644479751587, |
|
"learning_rate": 1.5389374545850973e-05, |
|
"loss": 0.191, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1623465211459754, |
|
"grad_norm": 1.808279275894165, |
|
"learning_rate": 1.5362562858426655e-05, |
|
"loss": 0.1894, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.165075034106412, |
|
"grad_norm": 1.3765541315078735, |
|
"learning_rate": 1.533569695120547e-05, |
|
"loss": 0.1967, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1678035470668486, |
|
"grad_norm": 1.5166419744491577, |
|
"learning_rate": 1.530877709582321e-05, |
|
"loss": 0.1916, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.170532060027285, |
|
"grad_norm": 2.17692232131958, |
|
"learning_rate": 1.5281803564461135e-05, |
|
"loss": 0.1911, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.1732605729877217, |
|
"grad_norm": 1.4779309034347534, |
|
"learning_rate": 1.5254776629843204e-05, |
|
"loss": 0.192, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1759890859481583, |
|
"grad_norm": 2.124163866043091, |
|
"learning_rate": 1.522769656523333e-05, |
|
"loss": 0.1899, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.1787175989085947, |
|
"grad_norm": 1.3838238716125488, |
|
"learning_rate": 1.5200563644432614e-05, |
|
"loss": 0.1896, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.1814461118690314, |
|
"grad_norm": 1.1780169010162354, |
|
"learning_rate": 1.5173378141776569e-05, |
|
"loss": 0.1963, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.184174624829468, |
|
"grad_norm": 0.9128435850143433, |
|
"learning_rate": 1.5146140332132359e-05, |
|
"loss": 0.1831, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.1869031377899044, |
|
"grad_norm": 1.093888521194458, |
|
"learning_rate": 1.5118850490896012e-05, |
|
"loss": 0.1902, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.189631650750341, |
|
"grad_norm": 1.3495734930038452, |
|
"learning_rate": 1.5091508893989633e-05, |
|
"loss": 0.1886, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.1923601637107777, |
|
"grad_norm": 2.244813919067383, |
|
"learning_rate": 1.5064115817858622e-05, |
|
"loss": 0.195, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.195088676671214, |
|
"grad_norm": 1.4227724075317383, |
|
"learning_rate": 1.5036671539468879e-05, |
|
"loss": 0.1931, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.1978171896316507, |
|
"grad_norm": 2.971980333328247, |
|
"learning_rate": 1.5009176336303987e-05, |
|
"loss": 0.1945, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.2005457025920874, |
|
"grad_norm": 2.9744460582733154, |
|
"learning_rate": 1.4981630486362435e-05, |
|
"loss": 0.1886, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2032742155525238, |
|
"grad_norm": 2.5313005447387695, |
|
"learning_rate": 1.4954034268154777e-05, |
|
"loss": 0.1961, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.2060027285129604, |
|
"grad_norm": 2.4875779151916504, |
|
"learning_rate": 1.4926387960700843e-05, |
|
"loss": 0.1964, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.208731241473397, |
|
"grad_norm": 1.7058812379837036, |
|
"learning_rate": 1.4898691843526897e-05, |
|
"loss": 0.1937, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.2114597544338335, |
|
"grad_norm": 2.4829766750335693, |
|
"learning_rate": 1.4870946196662822e-05, |
|
"loss": 0.1988, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.21418826739427, |
|
"grad_norm": 1.777729868888855, |
|
"learning_rate": 1.4843151300639282e-05, |
|
"loss": 0.2021, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2169167803547067, |
|
"grad_norm": 1.580891728401184, |
|
"learning_rate": 1.4815307436484898e-05, |
|
"loss": 0.197, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.2196452933151432, |
|
"grad_norm": 1.710062861442566, |
|
"learning_rate": 1.4787414885723386e-05, |
|
"loss": 0.1934, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.2223738062755798, |
|
"grad_norm": 1.6073284149169922, |
|
"learning_rate": 1.4759473930370738e-05, |
|
"loss": 0.1927, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.2251023192360164, |
|
"grad_norm": 1.4678465127944946, |
|
"learning_rate": 1.4731484852932338e-05, |
|
"loss": 0.1946, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.2278308321964528, |
|
"grad_norm": 1.3270896673202515, |
|
"learning_rate": 1.4703447936400135e-05, |
|
"loss": 0.1877, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2305593451568895, |
|
"grad_norm": 2.5790812969207764, |
|
"learning_rate": 1.4675363464249763e-05, |
|
"loss": 0.195, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.233287858117326, |
|
"grad_norm": 1.3712650537490845, |
|
"learning_rate": 1.4647231720437687e-05, |
|
"loss": 0.194, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.2360163710777625, |
|
"grad_norm": 3.243920087814331, |
|
"learning_rate": 1.461905298939832e-05, |
|
"loss": 0.1874, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.2387448840381992, |
|
"grad_norm": 2.803710460662842, |
|
"learning_rate": 1.4590827556041158e-05, |
|
"loss": 0.1948, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.2414733969986358, |
|
"grad_norm": 2.0171213150024414, |
|
"learning_rate": 1.4562555705747894e-05, |
|
"loss": 0.1946, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.2442019099590724, |
|
"grad_norm": 1.85854172706604, |
|
"learning_rate": 1.4534237724369534e-05, |
|
"loss": 0.192, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.2469304229195088, |
|
"grad_norm": 1.7220263481140137, |
|
"learning_rate": 1.4505873898223498e-05, |
|
"loss": 0.1926, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.2496589358799455, |
|
"grad_norm": 2.4301505088806152, |
|
"learning_rate": 1.4477464514090745e-05, |
|
"loss": 0.1874, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.252387448840382, |
|
"grad_norm": 1.6035195589065552, |
|
"learning_rate": 1.4449009859212857e-05, |
|
"loss": 0.1961, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.2551159618008185, |
|
"grad_norm": 1.4066811800003052, |
|
"learning_rate": 1.4420510221289137e-05, |
|
"loss": 0.1905, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2578444747612552, |
|
"grad_norm": 1.3481764793395996, |
|
"learning_rate": 1.4391965888473705e-05, |
|
"loss": 0.1869, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.2605729877216918, |
|
"grad_norm": 1.235824704170227, |
|
"learning_rate": 1.4363377149372584e-05, |
|
"loss": 0.1894, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.2633015006821282, |
|
"grad_norm": 2.259446144104004, |
|
"learning_rate": 1.4334744293040773e-05, |
|
"loss": 0.1935, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.2660300136425648, |
|
"grad_norm": 1.5957938432693481, |
|
"learning_rate": 1.430606760897934e-05, |
|
"loss": 0.1947, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.2687585266030013, |
|
"grad_norm": 1.1070665121078491, |
|
"learning_rate": 1.4277347387132482e-05, |
|
"loss": 0.1793, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.271487039563438, |
|
"grad_norm": 1.6437128782272339, |
|
"learning_rate": 1.4248583917884595e-05, |
|
"loss": 0.1883, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.2742155525238745, |
|
"grad_norm": 1.2046825885772705, |
|
"learning_rate": 1.4219777492057349e-05, |
|
"loss": 0.1862, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.2769440654843112, |
|
"grad_norm": 1.8802250623703003, |
|
"learning_rate": 1.4190928400906731e-05, |
|
"loss": 0.1845, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.2796725784447476, |
|
"grad_norm": 1.2976617813110352, |
|
"learning_rate": 1.4162036936120115e-05, |
|
"loss": 0.1942, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.2824010914051842, |
|
"grad_norm": 2.1956803798675537, |
|
"learning_rate": 1.4133103389813302e-05, |
|
"loss": 0.1935, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2851296043656206, |
|
"grad_norm": 1.6689682006835938, |
|
"learning_rate": 1.410412805452757e-05, |
|
"loss": 0.191, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.2878581173260573, |
|
"grad_norm": 2.384645938873291, |
|
"learning_rate": 1.4075111223226721e-05, |
|
"loss": 0.1898, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.290586630286494, |
|
"grad_norm": 1.9162484407424927, |
|
"learning_rate": 1.4046053189294114e-05, |
|
"loss": 0.1881, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.2933151432469305, |
|
"grad_norm": 2.4748995304107666, |
|
"learning_rate": 1.4016954246529697e-05, |
|
"loss": 0.1876, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.296043656207367, |
|
"grad_norm": 1.9592957496643066, |
|
"learning_rate": 1.3987814689147041e-05, |
|
"loss": 0.1958, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2987721691678036, |
|
"grad_norm": 2.6773533821105957, |
|
"learning_rate": 1.3958634811770361e-05, |
|
"loss": 0.1967, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.30150068212824, |
|
"grad_norm": 2.139191150665283, |
|
"learning_rate": 1.3929414909431544e-05, |
|
"loss": 0.1917, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.3042291950886766, |
|
"grad_norm": 1.8918670415878296, |
|
"learning_rate": 1.3900155277567157e-05, |
|
"loss": 0.1923, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.3069577080491133, |
|
"grad_norm": 1.8260009288787842, |
|
"learning_rate": 1.3870856212015468e-05, |
|
"loss": 0.1912, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.30968622100955, |
|
"grad_norm": 1.858114242553711, |
|
"learning_rate": 1.3841518009013446e-05, |
|
"loss": 0.1912, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3124147339699863, |
|
"grad_norm": 1.8641937971115112, |
|
"learning_rate": 1.3812140965193775e-05, |
|
"loss": 0.1906, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.315143246930423, |
|
"grad_norm": 2.4870729446411133, |
|
"learning_rate": 1.378272537758185e-05, |
|
"loss": 0.1901, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.3178717598908594, |
|
"grad_norm": 1.7273850440979004, |
|
"learning_rate": 1.3753271543592772e-05, |
|
"loss": 0.1898, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.320600272851296, |
|
"grad_norm": 1.6276838779449463, |
|
"learning_rate": 1.3723779761028349e-05, |
|
"loss": 0.1851, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.3233287858117326, |
|
"grad_norm": 1.5349172353744507, |
|
"learning_rate": 1.3694250328074072e-05, |
|
"loss": 0.19, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.3260572987721693, |
|
"grad_norm": 1.9674981832504272, |
|
"learning_rate": 1.3664683543296114e-05, |
|
"loss": 0.1858, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.3287858117326057, |
|
"grad_norm": 1.7096377611160278, |
|
"learning_rate": 1.3635079705638298e-05, |
|
"loss": 0.1853, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.3315143246930423, |
|
"grad_norm": 2.1643528938293457, |
|
"learning_rate": 1.3605439114419095e-05, |
|
"loss": 0.1803, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.3342428376534787, |
|
"grad_norm": 2.1609997749328613, |
|
"learning_rate": 1.3575762069328567e-05, |
|
"loss": 0.1888, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.3369713506139154, |
|
"grad_norm": 2.171285390853882, |
|
"learning_rate": 1.3546048870425356e-05, |
|
"loss": 0.1887, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.339699863574352, |
|
"grad_norm": 1.9047596454620361, |
|
"learning_rate": 1.3516299818133664e-05, |
|
"loss": 0.1844, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.3424283765347886, |
|
"grad_norm": 1.5691167116165161, |
|
"learning_rate": 1.3486515213240188e-05, |
|
"loss": 0.1889, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.345156889495225, |
|
"grad_norm": 1.809037208557129, |
|
"learning_rate": 1.3456695356891079e-05, |
|
"loss": 0.1868, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.3478854024556617, |
|
"grad_norm": 1.375952959060669, |
|
"learning_rate": 1.3426840550588933e-05, |
|
"loss": 0.1857, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.350613915416098, |
|
"grad_norm": 1.5163205862045288, |
|
"learning_rate": 1.33969510961897e-05, |
|
"loss": 0.1853, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.3533424283765347, |
|
"grad_norm": 1.8282253742218018, |
|
"learning_rate": 1.3367027295899652e-05, |
|
"loss": 0.1852, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.3560709413369714, |
|
"grad_norm": 1.6411131620407104, |
|
"learning_rate": 1.3337069452272332e-05, |
|
"loss": 0.185, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.358799454297408, |
|
"grad_norm": 1.8116145133972168, |
|
"learning_rate": 1.3307077868205487e-05, |
|
"loss": 0.1888, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.3615279672578444, |
|
"grad_norm": 1.7081019878387451, |
|
"learning_rate": 1.3277052846937997e-05, |
|
"loss": 0.1922, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.364256480218281, |
|
"grad_norm": 1.9334781169891357, |
|
"learning_rate": 1.3246994692046837e-05, |
|
"loss": 0.1837, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3669849931787175, |
|
"grad_norm": 1.9487545490264893, |
|
"learning_rate": 1.321690370744397e-05, |
|
"loss": 0.183, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.369713506139154, |
|
"grad_norm": 1.4426416158676147, |
|
"learning_rate": 1.3186780197373306e-05, |
|
"loss": 0.1847, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.3724420190995907, |
|
"grad_norm": 1.153779149055481, |
|
"learning_rate": 1.3156624466407607e-05, |
|
"loss": 0.1818, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.3751705320600274, |
|
"grad_norm": 1.7035014629364014, |
|
"learning_rate": 1.3126436819445423e-05, |
|
"loss": 0.1889, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.3778990450204638, |
|
"grad_norm": 0.9309306740760803, |
|
"learning_rate": 1.309621756170799e-05, |
|
"loss": 0.1839, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.3806275579809004, |
|
"grad_norm": 1.7719669342041016, |
|
"learning_rate": 1.3065966998736155e-05, |
|
"loss": 0.1803, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.3833560709413368, |
|
"grad_norm": 1.2695763111114502, |
|
"learning_rate": 1.3035685436387297e-05, |
|
"loss": 0.1872, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.3860845839017735, |
|
"grad_norm": 1.6255990266799927, |
|
"learning_rate": 1.300537318083221e-05, |
|
"loss": 0.1815, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.38881309686221, |
|
"grad_norm": 1.109044075012207, |
|
"learning_rate": 1.297503053855203e-05, |
|
"loss": 0.1844, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.3915416098226467, |
|
"grad_norm": 1.4328869581222534, |
|
"learning_rate": 1.2944657816335124e-05, |
|
"loss": 0.1866, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3942701227830832, |
|
"grad_norm": 1.5600212812423706, |
|
"learning_rate": 1.2914255321273987e-05, |
|
"loss": 0.1883, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.3969986357435198, |
|
"grad_norm": 1.095321774482727, |
|
"learning_rate": 1.2883823360762149e-05, |
|
"loss": 0.1875, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.3997271487039564, |
|
"grad_norm": 1.7407549619674683, |
|
"learning_rate": 1.2853362242491054e-05, |
|
"loss": 0.1819, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.4024556616643928, |
|
"grad_norm": 1.4621182680130005, |
|
"learning_rate": 1.2822872274446958e-05, |
|
"loss": 0.1869, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.4051841746248295, |
|
"grad_norm": 1.9369522333145142, |
|
"learning_rate": 1.2792353764907803e-05, |
|
"loss": 0.1879, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.407912687585266, |
|
"grad_norm": 1.6838245391845703, |
|
"learning_rate": 1.276180702244012e-05, |
|
"loss": 0.1916, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.4106412005457025, |
|
"grad_norm": 1.968902349472046, |
|
"learning_rate": 1.273123235589589e-05, |
|
"loss": 0.1865, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.4133697135061392, |
|
"grad_norm": 1.254606008529663, |
|
"learning_rate": 1.2700630074409427e-05, |
|
"loss": 0.1813, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.4160982264665758, |
|
"grad_norm": 1.8816652297973633, |
|
"learning_rate": 1.2670000487394268e-05, |
|
"loss": 0.1835, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.4188267394270122, |
|
"grad_norm": 1.258792757987976, |
|
"learning_rate": 1.2639343904540008e-05, |
|
"loss": 0.1821, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4215552523874488, |
|
"grad_norm": 1.609707236289978, |
|
"learning_rate": 1.260866063580921e-05, |
|
"loss": 0.1875, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.4242837653478855, |
|
"grad_norm": 1.041623830795288, |
|
"learning_rate": 1.2577950991434249e-05, |
|
"loss": 0.1843, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.427012278308322, |
|
"grad_norm": 1.3459815979003906, |
|
"learning_rate": 1.254721528191417e-05, |
|
"loss": 0.1862, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.4297407912687585, |
|
"grad_norm": 1.082248330116272, |
|
"learning_rate": 1.2516453818011567e-05, |
|
"loss": 0.1862, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.4324693042291952, |
|
"grad_norm": 1.1448614597320557, |
|
"learning_rate": 1.2485666910749427e-05, |
|
"loss": 0.1854, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4351978171896316, |
|
"grad_norm": 0.9976285099983215, |
|
"learning_rate": 1.2454854871407993e-05, |
|
"loss": 0.1905, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.4379263301500682, |
|
"grad_norm": 1.1316357851028442, |
|
"learning_rate": 1.242401801152161e-05, |
|
"loss": 0.1874, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.4406548431105048, |
|
"grad_norm": 1.0879647731781006, |
|
"learning_rate": 1.2393156642875579e-05, |
|
"loss": 0.188, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.4433833560709413, |
|
"grad_norm": 1.6636865139007568, |
|
"learning_rate": 1.2362271077503007e-05, |
|
"loss": 0.1877, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.446111869031378, |
|
"grad_norm": 1.0785863399505615, |
|
"learning_rate": 1.2331361627681645e-05, |
|
"loss": 0.1805, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4488403819918145, |
|
"grad_norm": 1.1955676078796387, |
|
"learning_rate": 1.2300428605930736e-05, |
|
"loss": 0.186, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.451568894952251, |
|
"grad_norm": 1.0076512098312378, |
|
"learning_rate": 1.2269472325007858e-05, |
|
"loss": 0.1843, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.4542974079126876, |
|
"grad_norm": 0.9785951375961304, |
|
"learning_rate": 1.2238493097905754e-05, |
|
"loss": 0.1865, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.4570259208731242, |
|
"grad_norm": 1.2281177043914795, |
|
"learning_rate": 1.2207491237849174e-05, |
|
"loss": 0.1817, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.4597544338335606, |
|
"grad_norm": 1.263206958770752, |
|
"learning_rate": 1.2176467058291699e-05, |
|
"loss": 0.1816, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.4624829467939973, |
|
"grad_norm": 1.1939092874526978, |
|
"learning_rate": 1.2145420872912586e-05, |
|
"loss": 0.1842, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.465211459754434, |
|
"grad_norm": 1.162329912185669, |
|
"learning_rate": 1.2114352995613582e-05, |
|
"loss": 0.1878, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.4679399727148703, |
|
"grad_norm": 0.8624812960624695, |
|
"learning_rate": 1.2083263740515764e-05, |
|
"loss": 0.1839, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.470668485675307, |
|
"grad_norm": 1.0390948057174683, |
|
"learning_rate": 1.2052153421956343e-05, |
|
"loss": 0.189, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.4733969986357436, |
|
"grad_norm": 1.2229454517364502, |
|
"learning_rate": 1.2021022354485514e-05, |
|
"loss": 0.1896, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.4761255115961802, |
|
"grad_norm": 1.5394783020019531, |
|
"learning_rate": 1.1989870852863254e-05, |
|
"loss": 0.1839, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.4788540245566166, |
|
"grad_norm": 0.9282181859016418, |
|
"learning_rate": 1.1958699232056135e-05, |
|
"loss": 0.1847, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.4815825375170533, |
|
"grad_norm": 1.620680809020996, |
|
"learning_rate": 1.1927507807234169e-05, |
|
"loss": 0.1839, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.4843110504774897, |
|
"grad_norm": 1.1609814167022705, |
|
"learning_rate": 1.1896296893767588e-05, |
|
"loss": 0.187, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.4870395634379263, |
|
"grad_norm": 1.349696159362793, |
|
"learning_rate": 1.186506680722367e-05, |
|
"loss": 0.1873, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.489768076398363, |
|
"grad_norm": 0.9885646104812622, |
|
"learning_rate": 1.1833817863363563e-05, |
|
"loss": 0.1831, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.4924965893587996, |
|
"grad_norm": 1.7727011442184448, |
|
"learning_rate": 1.180255037813906e-05, |
|
"loss": 0.1911, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.495225102319236, |
|
"grad_norm": 1.5409637689590454, |
|
"learning_rate": 1.1771264667689428e-05, |
|
"loss": 0.1861, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.4979536152796726, |
|
"grad_norm": 1.6905938386917114, |
|
"learning_rate": 1.1739961048338213e-05, |
|
"loss": 0.1902, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.500682128240109, |
|
"grad_norm": 1.2861829996109009, |
|
"learning_rate": 1.1708639836590024e-05, |
|
"loss": 0.1886, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5034106412005457, |
|
"grad_norm": 1.4801191091537476, |
|
"learning_rate": 1.1677301349127349e-05, |
|
"loss": 0.1868, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.5061391541609823, |
|
"grad_norm": 1.0299503803253174, |
|
"learning_rate": 1.164594590280734e-05, |
|
"loss": 0.193, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.508867667121419, |
|
"grad_norm": 1.705196738243103, |
|
"learning_rate": 1.161457381465863e-05, |
|
"loss": 0.19, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.5115961800818554, |
|
"grad_norm": 1.2943288087844849, |
|
"learning_rate": 1.15831854018781e-05, |
|
"loss": 0.1955, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.514324693042292, |
|
"grad_norm": 1.1207996606826782, |
|
"learning_rate": 1.1551780981827699e-05, |
|
"loss": 0.1867, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5170532060027284, |
|
"grad_norm": 1.675868034362793, |
|
"learning_rate": 1.1520360872031208e-05, |
|
"loss": 0.1821, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.519781718963165, |
|
"grad_norm": 1.2020845413208008, |
|
"learning_rate": 1.148892539017106e-05, |
|
"loss": 0.1909, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.5225102319236017, |
|
"grad_norm": 0.9670946002006531, |
|
"learning_rate": 1.1457474854085095e-05, |
|
"loss": 0.1868, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.5252387448840383, |
|
"grad_norm": 1.1065406799316406, |
|
"learning_rate": 1.1426009581763377e-05, |
|
"loss": 0.1904, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.5279672578444747, |
|
"grad_norm": 1.2990299463272095, |
|
"learning_rate": 1.139452989134496e-05, |
|
"loss": 0.1812, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.5306957708049114, |
|
"grad_norm": 1.2565958499908447, |
|
"learning_rate": 1.1363036101114671e-05, |
|
"loss": 0.1896, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.5334242837653478, |
|
"grad_norm": 1.460721492767334, |
|
"learning_rate": 1.1331528529499909e-05, |
|
"loss": 0.1857, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.5361527967257844, |
|
"grad_norm": 1.0245671272277832, |
|
"learning_rate": 1.1300007495067403e-05, |
|
"loss": 0.1831, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.538881309686221, |
|
"grad_norm": 1.3558366298675537, |
|
"learning_rate": 1.1268473316520007e-05, |
|
"loss": 0.1869, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.5416098226466577, |
|
"grad_norm": 1.1182175874710083, |
|
"learning_rate": 1.123692631269348e-05, |
|
"loss": 0.1857, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.544338335607094, |
|
"grad_norm": 2.172612428665161, |
|
"learning_rate": 1.1205366802553231e-05, |
|
"loss": 0.1857, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.5470668485675307, |
|
"grad_norm": 1.591204285621643, |
|
"learning_rate": 1.1173795105191146e-05, |
|
"loss": 0.1849, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.5497953615279672, |
|
"grad_norm": 2.5178825855255127, |
|
"learning_rate": 1.1142211539822318e-05, |
|
"loss": 0.1847, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.5525238744884038, |
|
"grad_norm": 2.313584089279175, |
|
"learning_rate": 1.1110616425781833e-05, |
|
"loss": 0.1818, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.5552523874488404, |
|
"grad_norm": 1.1800787448883057, |
|
"learning_rate": 1.1079010082521557e-05, |
|
"loss": 0.1827, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.557980900409277, |
|
"grad_norm": 1.6123071908950806, |
|
"learning_rate": 1.1047392829606876e-05, |
|
"loss": 0.195, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.5607094133697135, |
|
"grad_norm": 0.9764700531959534, |
|
"learning_rate": 1.101576498671349e-05, |
|
"loss": 0.1837, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.56343792633015, |
|
"grad_norm": 1.6063830852508545, |
|
"learning_rate": 1.098412687362418e-05, |
|
"loss": 0.1856, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.5661664392905865, |
|
"grad_norm": 1.6164367198944092, |
|
"learning_rate": 1.095247881022555e-05, |
|
"loss": 0.1791, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.5688949522510232, |
|
"grad_norm": 1.005224585533142, |
|
"learning_rate": 1.0920821116504816e-05, |
|
"loss": 0.1865, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.5716234652114598, |
|
"grad_norm": 1.6949632167816162, |
|
"learning_rate": 1.0889154112546569e-05, |
|
"loss": 0.1831, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.5743519781718964, |
|
"grad_norm": 1.3134765625, |
|
"learning_rate": 1.0857478118529534e-05, |
|
"loss": 0.1853, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.5770804911323328, |
|
"grad_norm": 1.852800726890564, |
|
"learning_rate": 1.0825793454723325e-05, |
|
"loss": 0.1826, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.5798090040927695, |
|
"grad_norm": 1.9858014583587646, |
|
"learning_rate": 1.079410044148522e-05, |
|
"loss": 0.1843, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.5825375170532059, |
|
"grad_norm": 1.0838700532913208, |
|
"learning_rate": 1.0762399399256917e-05, |
|
"loss": 0.1847, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.5852660300136425, |
|
"grad_norm": 1.2184253931045532, |
|
"learning_rate": 1.0730690648561293e-05, |
|
"loss": 0.1848, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.5879945429740792, |
|
"grad_norm": 0.9322428703308105, |
|
"learning_rate": 1.0698974509999159e-05, |
|
"loss": 0.184, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.5907230559345158, |
|
"grad_norm": 2.5623462200164795, |
|
"learning_rate": 1.0667251304246028e-05, |
|
"loss": 0.1823, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.5934515688949522, |
|
"grad_norm": 1.9705911874771118, |
|
"learning_rate": 1.0635521352048873e-05, |
|
"loss": 0.1815, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.5961800818553888, |
|
"grad_norm": 2.024606704711914, |
|
"learning_rate": 1.0603784974222862e-05, |
|
"loss": 0.1909, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.5989085948158253, |
|
"grad_norm": 2.2946503162384033, |
|
"learning_rate": 1.057204249164815e-05, |
|
"loss": 0.1842, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.601637107776262, |
|
"grad_norm": 1.239753007888794, |
|
"learning_rate": 1.0540294225266608e-05, |
|
"loss": 0.1827, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.6043656207366985, |
|
"grad_norm": 1.7404361963272095, |
|
"learning_rate": 1.0508540496078582e-05, |
|
"loss": 0.1798, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.6070941336971352, |
|
"grad_norm": 1.5583375692367554, |
|
"learning_rate": 1.0476781625139655e-05, |
|
"loss": 0.1836, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.6098226466575716, |
|
"grad_norm": 1.1195285320281982, |
|
"learning_rate": 1.0445017933557404e-05, |
|
"loss": 0.1843, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6125511596180082, |
|
"grad_norm": 1.6010850667953491, |
|
"learning_rate": 1.0413249742488132e-05, |
|
"loss": 0.1833, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.6152796725784446, |
|
"grad_norm": 1.3162715435028076, |
|
"learning_rate": 1.0381477373133652e-05, |
|
"loss": 0.184, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.6180081855388813, |
|
"grad_norm": 1.436637282371521, |
|
"learning_rate": 1.0349701146738007e-05, |
|
"loss": 0.1826, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.620736698499318, |
|
"grad_norm": 1.3779467344284058, |
|
"learning_rate": 1.0317921384584245e-05, |
|
"loss": 0.1809, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.6234652114597545, |
|
"grad_norm": 1.9165902137756348, |
|
"learning_rate": 1.0286138407991171e-05, |
|
"loss": 0.1835, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.626193724420191, |
|
"grad_norm": 1.925999402999878, |
|
"learning_rate": 1.0254352538310075e-05, |
|
"loss": 0.1799, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.6289222373806276, |
|
"grad_norm": 1.4837428331375122, |
|
"learning_rate": 1.0222564096921505e-05, |
|
"loss": 0.182, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.631650750341064, |
|
"grad_norm": 1.3574309349060059, |
|
"learning_rate": 1.0190773405232024e-05, |
|
"loss": 0.1796, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.6343792633015006, |
|
"grad_norm": 1.8260838985443115, |
|
"learning_rate": 1.0158980784670927e-05, |
|
"loss": 0.1811, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.6371077762619373, |
|
"grad_norm": 1.1332685947418213, |
|
"learning_rate": 1.012718655668702e-05, |
|
"loss": 0.1799, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.639836289222374, |
|
"grad_norm": 1.505661964416504, |
|
"learning_rate": 1.0095391042745362e-05, |
|
"loss": 0.1805, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.6425648021828103, |
|
"grad_norm": 1.5634633302688599, |
|
"learning_rate": 1.0063594564324014e-05, |
|
"loss": 0.1813, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.645293315143247, |
|
"grad_norm": 0.9850923418998718, |
|
"learning_rate": 1.0031797442910788e-05, |
|
"loss": 0.1742, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.6480218281036834, |
|
"grad_norm": 1.3787219524383545, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1871, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.65075034106412, |
|
"grad_norm": 1.1535123586654663, |
|
"learning_rate": 9.968202557089213e-06, |
|
"loss": 0.1826, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.6534788540245566, |
|
"grad_norm": 0.977457582950592, |
|
"learning_rate": 9.936405435675991e-06, |
|
"loss": 0.181, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.6562073669849933, |
|
"grad_norm": 0.9296724200248718, |
|
"learning_rate": 9.904608957254643e-06, |
|
"loss": 0.1772, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.65893587994543, |
|
"grad_norm": 1.0147004127502441, |
|
"learning_rate": 9.872813443312984e-06, |
|
"loss": 0.178, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.6616643929058663, |
|
"grad_norm": 0.9305097460746765, |
|
"learning_rate": 9.84101921532908e-06, |
|
"loss": 0.1777, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.6643929058663027, |
|
"grad_norm": 1.123794674873352, |
|
"learning_rate": 9.809226594767979e-06, |
|
"loss": 0.1821, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.6671214188267394, |
|
"grad_norm": 1.1139179468154907, |
|
"learning_rate": 9.777435903078493e-06, |
|
"loss": 0.1794, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.669849931787176, |
|
"grad_norm": 0.873371422290802, |
|
"learning_rate": 9.745647461689932e-06, |
|
"loss": 0.1728, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.6725784447476126, |
|
"grad_norm": 0.8382684588432312, |
|
"learning_rate": 9.713861592008834e-06, |
|
"loss": 0.1741, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.6753069577080493, |
|
"grad_norm": 1.2288588285446167, |
|
"learning_rate": 9.682078615415755e-06, |
|
"loss": 0.1769, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.6780354706684857, |
|
"grad_norm": 1.0390663146972656, |
|
"learning_rate": 9.650298853261998e-06, |
|
"loss": 0.1747, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.680763983628922, |
|
"grad_norm": 1.0253381729125977, |
|
"learning_rate": 9.618522626866351e-06, |
|
"loss": 0.1835, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.6834924965893587, |
|
"grad_norm": 0.8118390440940857, |
|
"learning_rate": 9.586750257511868e-06, |
|
"loss": 0.178, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.6862210095497954, |
|
"grad_norm": 0.9968405365943909, |
|
"learning_rate": 9.554982066442601e-06, |
|
"loss": 0.1779, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.688949522510232, |
|
"grad_norm": 0.9809510111808777, |
|
"learning_rate": 9.523218374860348e-06, |
|
"loss": 0.1806, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.6916780354706686, |
|
"grad_norm": 0.9609003663063049, |
|
"learning_rate": 9.49145950392142e-06, |
|
"loss": 0.1825, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.694406548431105, |
|
"grad_norm": 1.0416550636291504, |
|
"learning_rate": 9.459705774733397e-06, |
|
"loss": 0.1845, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.6971350613915415, |
|
"grad_norm": 1.0884149074554443, |
|
"learning_rate": 9.427957508351852e-06, |
|
"loss": 0.1826, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.699863574351978, |
|
"grad_norm": 0.936427652835846, |
|
"learning_rate": 9.39621502577714e-06, |
|
"loss": 0.1814, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.7025920873124147, |
|
"grad_norm": 0.9558141827583313, |
|
"learning_rate": 9.364478647951132e-06, |
|
"loss": 0.187, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.7053206002728514, |
|
"grad_norm": 0.986827552318573, |
|
"learning_rate": 9.332748695753973e-06, |
|
"loss": 0.1817, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.708049113233288, |
|
"grad_norm": 0.9488497376441956, |
|
"learning_rate": 9.301025490000843e-06, |
|
"loss": 0.1807, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.7107776261937244, |
|
"grad_norm": 1.1289781332015991, |
|
"learning_rate": 9.26930935143871e-06, |
|
"loss": 0.1816, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.7135061391541608, |
|
"grad_norm": 1.031248688697815, |
|
"learning_rate": 9.237600600743086e-06, |
|
"loss": 0.1816, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.7162346521145975, |
|
"grad_norm": 0.9620054960250854, |
|
"learning_rate": 9.20589955851478e-06, |
|
"loss": 0.178, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.718963165075034, |
|
"grad_norm": 1.0059623718261719, |
|
"learning_rate": 9.174206545276678e-06, |
|
"loss": 0.1821, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7216916780354707, |
|
"grad_norm": 0.8320233821868896, |
|
"learning_rate": 9.14252188147047e-06, |
|
"loss": 0.1787, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.7244201909959074, |
|
"grad_norm": 0.8600996732711792, |
|
"learning_rate": 9.11084588745343e-06, |
|
"loss": 0.1775, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.7271487039563438, |
|
"grad_norm": 0.7816293835639954, |
|
"learning_rate": 9.07917888349519e-06, |
|
"loss": 0.1716, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.7298772169167802, |
|
"grad_norm": 1.091169834136963, |
|
"learning_rate": 9.047521189774456e-06, |
|
"loss": 0.1793, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.7326057298772168, |
|
"grad_norm": 1.5603679418563843, |
|
"learning_rate": 9.015873126375822e-06, |
|
"loss": 0.1756, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.7353342428376535, |
|
"grad_norm": 1.0447919368743896, |
|
"learning_rate": 8.984235013286512e-06, |
|
"loss": 0.1813, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.73806275579809, |
|
"grad_norm": 1.329048991203308, |
|
"learning_rate": 8.952607170393126e-06, |
|
"loss": 0.1786, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.7407912687585267, |
|
"grad_norm": 1.0863256454467773, |
|
"learning_rate": 8.920989917478446e-06, |
|
"loss": 0.1763, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.7435197817189632, |
|
"grad_norm": 1.4967482089996338, |
|
"learning_rate": 8.88938357421817e-06, |
|
"loss": 0.1748, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.7462482946793996, |
|
"grad_norm": 1.3216819763183594, |
|
"learning_rate": 8.857788460177685e-06, |
|
"loss": 0.1829, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.7489768076398362, |
|
"grad_norm": 1.3818433284759521, |
|
"learning_rate": 8.826204894808856e-06, |
|
"loss": 0.1824, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.7517053206002728, |
|
"grad_norm": 1.4184951782226562, |
|
"learning_rate": 8.79463319744677e-06, |
|
"loss": 0.1778, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.7544338335607095, |
|
"grad_norm": 1.4973526000976562, |
|
"learning_rate": 8.763073687306523e-06, |
|
"loss": 0.1775, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.7571623465211461, |
|
"grad_norm": 1.6445238590240479, |
|
"learning_rate": 8.731526683479991e-06, |
|
"loss": 0.1803, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.7598908594815825, |
|
"grad_norm": 1.743328332901001, |
|
"learning_rate": 8.699992504932599e-06, |
|
"loss": 0.1777, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.762619372442019, |
|
"grad_norm": 1.3567850589752197, |
|
"learning_rate": 8.668471470500094e-06, |
|
"loss": 0.1781, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.7653478854024556, |
|
"grad_norm": 1.3435090780258179, |
|
"learning_rate": 8.63696389888533e-06, |
|
"loss": 0.1791, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.7680763983628922, |
|
"grad_norm": 0.8996968269348145, |
|
"learning_rate": 8.605470108655046e-06, |
|
"loss": 0.1745, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.7708049113233288, |
|
"grad_norm": 0.9321063160896301, |
|
"learning_rate": 8.573990418236626e-06, |
|
"loss": 0.1757, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.7735334242837655, |
|
"grad_norm": 0.7093449234962463, |
|
"learning_rate": 8.542525145914907e-06, |
|
"loss": 0.1749, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.776261937244202, |
|
"grad_norm": 1.155967354774475, |
|
"learning_rate": 8.511074609828944e-06, |
|
"loss": 0.1735, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.7789904502046383, |
|
"grad_norm": 0.8285070657730103, |
|
"learning_rate": 8.479639127968793e-06, |
|
"loss": 0.1787, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.781718963165075, |
|
"grad_norm": 1.2278356552124023, |
|
"learning_rate": 8.448219018172303e-06, |
|
"loss": 0.1804, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.7844474761255116, |
|
"grad_norm": 1.3046550750732422, |
|
"learning_rate": 8.416814598121901e-06, |
|
"loss": 0.1767, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.7871759890859482, |
|
"grad_norm": 0.9383738040924072, |
|
"learning_rate": 8.385426185341374e-06, |
|
"loss": 0.1777, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.7899045020463848, |
|
"grad_norm": 1.2242978811264038, |
|
"learning_rate": 8.35405409719266e-06, |
|
"loss": 0.1807, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.7926330150068213, |
|
"grad_norm": 0.9162175059318542, |
|
"learning_rate": 8.322698650872656e-06, |
|
"loss": 0.1736, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.795361527967258, |
|
"grad_norm": 1.3758541345596313, |
|
"learning_rate": 8.291360163409978e-06, |
|
"loss": 0.1747, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.7980900409276943, |
|
"grad_norm": 1.1687759160995483, |
|
"learning_rate": 8.260038951661787e-06, |
|
"loss": 0.1767, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.800818553888131, |
|
"grad_norm": 1.366279125213623, |
|
"learning_rate": 8.228735332310575e-06, |
|
"loss": 0.1828, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.8035470668485676, |
|
"grad_norm": 1.273398756980896, |
|
"learning_rate": 8.197449621860944e-06, |
|
"loss": 0.1755, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.8062755798090042, |
|
"grad_norm": 1.1922065019607544, |
|
"learning_rate": 8.16618213663644e-06, |
|
"loss": 0.1781, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.8090040927694406, |
|
"grad_norm": 0.9690226912498474, |
|
"learning_rate": 8.134933192776333e-06, |
|
"loss": 0.1778, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.8117326057298773, |
|
"grad_norm": 1.3045841455459595, |
|
"learning_rate": 8.103703106232416e-06, |
|
"loss": 0.183, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.8144611186903137, |
|
"grad_norm": 0.8391909599304199, |
|
"learning_rate": 8.072492192765833e-06, |
|
"loss": 0.1744, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.8171896316507503, |
|
"grad_norm": 0.9879373908042908, |
|
"learning_rate": 8.041300767943867e-06, |
|
"loss": 0.176, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.819918144611187, |
|
"grad_norm": 0.8668816685676575, |
|
"learning_rate": 8.010129147136749e-06, |
|
"loss": 0.1771, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.8226466575716236, |
|
"grad_norm": 0.8918569684028625, |
|
"learning_rate": 7.978977645514488e-06, |
|
"loss": 0.1782, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.82537517053206, |
|
"grad_norm": 1.0004130601882935, |
|
"learning_rate": 7.947846578043658e-06, |
|
"loss": 0.1797, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.8281036834924966, |
|
"grad_norm": 1.0414166450500488, |
|
"learning_rate": 7.916736259484239e-06, |
|
"loss": 0.1763, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.830832196452933, |
|
"grad_norm": 1.5945677757263184, |
|
"learning_rate": 7.885647004386421e-06, |
|
"loss": 0.1727, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.8335607094133697, |
|
"grad_norm": 1.3431050777435303, |
|
"learning_rate": 7.854579127087418e-06, |
|
"loss": 0.1763, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.8362892223738063, |
|
"grad_norm": 1.1084147691726685, |
|
"learning_rate": 7.823532941708305e-06, |
|
"loss": 0.1765, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.839017735334243, |
|
"grad_norm": 1.1193599700927734, |
|
"learning_rate": 7.792508762150833e-06, |
|
"loss": 0.1766, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.8417462482946794, |
|
"grad_norm": 1.06298828125, |
|
"learning_rate": 7.761506902094248e-06, |
|
"loss": 0.1738, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.844474761255116, |
|
"grad_norm": 1.2637038230895996, |
|
"learning_rate": 7.730527674992143e-06, |
|
"loss": 0.1798, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.8472032742155524, |
|
"grad_norm": 1.1762093305587769, |
|
"learning_rate": 7.699571394069269e-06, |
|
"loss": 0.1769, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.849931787175989, |
|
"grad_norm": 0.9763647317886353, |
|
"learning_rate": 7.668638372318359e-06, |
|
"loss": 0.1767, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.8526603001364257, |
|
"grad_norm": 0.976860523223877, |
|
"learning_rate": 7.637728922496996e-06, |
|
"loss": 0.1731, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.8553888130968623, |
|
"grad_norm": 0.8932291269302368, |
|
"learning_rate": 7.606843357124426e-06, |
|
"loss": 0.1734, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8581173260572987, |
|
"grad_norm": 0.978801429271698, |
|
"learning_rate": 7.575981988478393e-06, |
|
"loss": 0.1758, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.8608458390177354, |
|
"grad_norm": 0.9244241714477539, |
|
"learning_rate": 7.545145128592009e-06, |
|
"loss": 0.171, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.8635743519781718, |
|
"grad_norm": 0.8267676830291748, |
|
"learning_rate": 7.514333089250577e-06, |
|
"loss": 0.1705, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.8663028649386084, |
|
"grad_norm": 1.031118392944336, |
|
"learning_rate": 7.483546181988437e-06, |
|
"loss": 0.1767, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.869031377899045, |
|
"grad_norm": 1.0337843894958496, |
|
"learning_rate": 7.452784718085834e-06, |
|
"loss": 0.1707, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.8717598908594817, |
|
"grad_norm": 1.016801118850708, |
|
"learning_rate": 7.422049008565757e-06, |
|
"loss": 0.1757, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.874488403819918, |
|
"grad_norm": 1.1047598123550415, |
|
"learning_rate": 7.391339364190794e-06, |
|
"loss": 0.1762, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.8772169167803547, |
|
"grad_norm": 1.1827868223190308, |
|
"learning_rate": 7.360656095459995e-06, |
|
"loss": 0.1702, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.8799454297407912, |
|
"grad_norm": 1.0567187070846558, |
|
"learning_rate": 7.329999512605738e-06, |
|
"loss": 0.1759, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.8826739427012278, |
|
"grad_norm": 1.480316400527954, |
|
"learning_rate": 7.299369925590575e-06, |
|
"loss": 0.1731, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.8854024556616644, |
|
"grad_norm": 0.9149882793426514, |
|
"learning_rate": 7.268767644104114e-06, |
|
"loss": 0.1786, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.888130968622101, |
|
"grad_norm": 1.175398826599121, |
|
"learning_rate": 7.2381929775598835e-06, |
|
"loss": 0.1743, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.8908594815825375, |
|
"grad_norm": 1.0491394996643066, |
|
"learning_rate": 7.207646235092201e-06, |
|
"loss": 0.1704, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.893587994542974, |
|
"grad_norm": 1.049817681312561, |
|
"learning_rate": 7.1771277255530456e-06, |
|
"loss": 0.175, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.8963165075034105, |
|
"grad_norm": 0.8883402347564697, |
|
"learning_rate": 7.14663775750895e-06, |
|
"loss": 0.1766, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.8990450204638472, |
|
"grad_norm": 0.8892170190811157, |
|
"learning_rate": 7.116176639237853e-06, |
|
"loss": 0.1764, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.9017735334242838, |
|
"grad_norm": 0.8051910400390625, |
|
"learning_rate": 7.085744678726013e-06, |
|
"loss": 0.1755, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.9045020463847204, |
|
"grad_norm": 1.0221798419952393, |
|
"learning_rate": 7.05534218366488e-06, |
|
"loss": 0.172, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.9072305593451568, |
|
"grad_norm": 0.8057647347450256, |
|
"learning_rate": 7.024969461447973e-06, |
|
"loss": 0.1781, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.9099590723055935, |
|
"grad_norm": 1.0149929523468018, |
|
"learning_rate": 6.994626819167789e-06, |
|
"loss": 0.1717, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9126875852660299, |
|
"grad_norm": 0.9378892779350281, |
|
"learning_rate": 6.964314563612709e-06, |
|
"loss": 0.1746, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.9154160982264665, |
|
"grad_norm": 0.8830491900444031, |
|
"learning_rate": 6.934033001263847e-06, |
|
"loss": 0.1751, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.9181446111869032, |
|
"grad_norm": 1.0347460508346558, |
|
"learning_rate": 6.9037824382920145e-06, |
|
"loss": 0.1744, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.9208731241473398, |
|
"grad_norm": 0.8412283062934875, |
|
"learning_rate": 6.873563180554583e-06, |
|
"loss": 0.1702, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.9236016371077762, |
|
"grad_norm": 1.1008716821670532, |
|
"learning_rate": 6.843375533592395e-06, |
|
"loss": 0.1721, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9263301500682128, |
|
"grad_norm": 1.1027634143829346, |
|
"learning_rate": 6.813219802626698e-06, |
|
"loss": 0.1697, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.9290586630286493, |
|
"grad_norm": 0.9036797285079956, |
|
"learning_rate": 6.783096292556035e-06, |
|
"loss": 0.1742, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.931787175989086, |
|
"grad_norm": 0.9932321310043335, |
|
"learning_rate": 6.7530053079531664e-06, |
|
"loss": 0.1695, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.9345156889495225, |
|
"grad_norm": 0.9580210447311401, |
|
"learning_rate": 6.722947153062003e-06, |
|
"loss": 0.1732, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.9372442019099592, |
|
"grad_norm": 0.7242920994758606, |
|
"learning_rate": 6.692922131794517e-06, |
|
"loss": 0.1676, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.9399727148703958, |
|
"grad_norm": 1.1400161981582642, |
|
"learning_rate": 6.662930547727668e-06, |
|
"loss": 0.17, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.9427012278308322, |
|
"grad_norm": 0.9288698434829712, |
|
"learning_rate": 6.632972704100349e-06, |
|
"loss": 0.1699, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.9454297407912686, |
|
"grad_norm": 1.0455366373062134, |
|
"learning_rate": 6.603048903810305e-06, |
|
"loss": 0.1691, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.9481582537517053, |
|
"grad_norm": 0.7870394587516785, |
|
"learning_rate": 6.573159449411071e-06, |
|
"loss": 0.1693, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.950886766712142, |
|
"grad_norm": 0.9377486705780029, |
|
"learning_rate": 6.5433046431089205e-06, |
|
"loss": 0.1717, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.9536152796725785, |
|
"grad_norm": 0.8685896992683411, |
|
"learning_rate": 6.513484786759818e-06, |
|
"loss": 0.1751, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.9563437926330152, |
|
"grad_norm": 1.2274606227874756, |
|
"learning_rate": 6.483700181866337e-06, |
|
"loss": 0.1725, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.9590723055934516, |
|
"grad_norm": 1.0122281312942505, |
|
"learning_rate": 6.453951129574644e-06, |
|
"loss": 0.1689, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.961800818553888, |
|
"grad_norm": 1.0082857608795166, |
|
"learning_rate": 6.42423793067144e-06, |
|
"loss": 0.174, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.9645293315143246, |
|
"grad_norm": 1.1493691205978394, |
|
"learning_rate": 6.39456088558091e-06, |
|
"loss": 0.1703, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9672578444747613, |
|
"grad_norm": 0.7770411968231201, |
|
"learning_rate": 6.364920294361701e-06, |
|
"loss": 0.171, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.969986357435198, |
|
"grad_norm": 1.0410341024398804, |
|
"learning_rate": 6.335316456703891e-06, |
|
"loss": 0.1684, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.9727148703956345, |
|
"grad_norm": 1.0169074535369873, |
|
"learning_rate": 6.3057496719259314e-06, |
|
"loss": 0.172, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.975443383356071, |
|
"grad_norm": 1.2108582258224487, |
|
"learning_rate": 6.276220238971653e-06, |
|
"loss": 0.1687, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.9781718963165074, |
|
"grad_norm": 1.1328767538070679, |
|
"learning_rate": 6.2467284564072294e-06, |
|
"loss": 0.1709, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.980900409276944, |
|
"grad_norm": 0.8491936326026917, |
|
"learning_rate": 6.2172746224181524e-06, |
|
"loss": 0.1739, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.9836289222373806, |
|
"grad_norm": 0.977837860584259, |
|
"learning_rate": 6.187859034806225e-06, |
|
"loss": 0.1678, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.9863574351978173, |
|
"grad_norm": 1.0643810033798218, |
|
"learning_rate": 6.158481990986558e-06, |
|
"loss": 0.1698, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.989085948158254, |
|
"grad_norm": 1.1140060424804688, |
|
"learning_rate": 6.1291437879845335e-06, |
|
"loss": 0.1725, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.9918144611186903, |
|
"grad_norm": 0.7833942174911499, |
|
"learning_rate": 6.099844722432844e-06, |
|
"loss": 0.1718, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.9945429740791267, |
|
"grad_norm": 0.7607527375221252, |
|
"learning_rate": 6.07058509056846e-06, |
|
"loss": 0.1696, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.9972714870395634, |
|
"grad_norm": 0.6727914810180664, |
|
"learning_rate": 6.041365188229641e-06, |
|
"loss": 0.1733, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.7081233263015747, |
|
"learning_rate": 6.012185310852962e-06, |
|
"loss": 0.1684, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.0027285129604366, |
|
"grad_norm": 0.8626097440719604, |
|
"learning_rate": 5.983045753470308e-06, |
|
"loss": 0.133, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.0054570259208733, |
|
"grad_norm": 0.8325755596160889, |
|
"learning_rate": 5.9539468107058885e-06, |
|
"loss": 0.1346, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.00818553888131, |
|
"grad_norm": 0.6211928129196167, |
|
"learning_rate": 5.924888776773281e-06, |
|
"loss": 0.13, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.010914051841746, |
|
"grad_norm": 0.877323567867279, |
|
"learning_rate": 5.895871945472434e-06, |
|
"loss": 0.1327, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.0136425648021827, |
|
"grad_norm": 1.7426798343658447, |
|
"learning_rate": 5.866896610186701e-06, |
|
"loss": 0.1304, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.0163710777626194, |
|
"grad_norm": 1.0055882930755615, |
|
"learning_rate": 5.8379630638798845e-06, |
|
"loss": 0.1334, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.019099590723056, |
|
"grad_norm": 0.7875477075576782, |
|
"learning_rate": 5.809071599093272e-06, |
|
"loss": 0.1319, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.0218281036834926, |
|
"grad_norm": 0.6837515234947205, |
|
"learning_rate": 5.780222507942654e-06, |
|
"loss": 0.1277, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.0245566166439293, |
|
"grad_norm": 0.8274489045143127, |
|
"learning_rate": 5.7514160821154085e-06, |
|
"loss": 0.126, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.0272851296043655, |
|
"grad_norm": 0.6948771476745605, |
|
"learning_rate": 5.7226526128675234e-06, |
|
"loss": 0.1255, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.030013642564802, |
|
"grad_norm": 0.7969790101051331, |
|
"learning_rate": 5.693932391020664e-06, |
|
"loss": 0.1269, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.0327421555252387, |
|
"grad_norm": 0.7251871824264526, |
|
"learning_rate": 5.665255706959231e-06, |
|
"loss": 0.1268, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.0354706684856754, |
|
"grad_norm": 0.635795533657074, |
|
"learning_rate": 5.63662285062742e-06, |
|
"loss": 0.1328, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.038199181446112, |
|
"grad_norm": 1.0244261026382446, |
|
"learning_rate": 5.608034111526298e-06, |
|
"loss": 0.1286, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.0409276944065486, |
|
"grad_norm": 0.754211962223053, |
|
"learning_rate": 5.579489778710867e-06, |
|
"loss": 0.1251, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.043656207366985, |
|
"grad_norm": 0.7149548530578613, |
|
"learning_rate": 5.550990140787147e-06, |
|
"loss": 0.1281, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.0463847203274215, |
|
"grad_norm": 0.8640093803405762, |
|
"learning_rate": 5.522535485909258e-06, |
|
"loss": 0.1252, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.049113233287858, |
|
"grad_norm": 0.7243211269378662, |
|
"learning_rate": 5.494126101776505e-06, |
|
"loss": 0.1271, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.0518417462482947, |
|
"grad_norm": 1.0630416870117188, |
|
"learning_rate": 5.465762275630471e-06, |
|
"loss": 0.1262, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.0545702592087314, |
|
"grad_norm": 0.6877619028091431, |
|
"learning_rate": 5.437444294252108e-06, |
|
"loss": 0.1278, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.057298772169168, |
|
"grad_norm": 0.7790495157241821, |
|
"learning_rate": 5.409172443958844e-06, |
|
"loss": 0.126, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.060027285129604, |
|
"grad_norm": 1.3545631170272827, |
|
"learning_rate": 5.380947010601681e-06, |
|
"loss": 0.1275, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.062755798090041, |
|
"grad_norm": 0.723747193813324, |
|
"learning_rate": 5.352768279562315e-06, |
|
"loss": 0.1293, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.0654843110504775, |
|
"grad_norm": 0.9228985905647278, |
|
"learning_rate": 5.324636535750238e-06, |
|
"loss": 0.1239, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.068212824010914, |
|
"grad_norm": 0.779529869556427, |
|
"learning_rate": 5.2965520635998676e-06, |
|
"loss": 0.1266, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.0709413369713507, |
|
"grad_norm": 0.6386115550994873, |
|
"learning_rate": 5.268515147067666e-06, |
|
"loss": 0.1251, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.0736698499317874, |
|
"grad_norm": 0.7511923909187317, |
|
"learning_rate": 5.240526069629265e-06, |
|
"loss": 0.1263, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.0763983628922236, |
|
"grad_norm": 0.6733880639076233, |
|
"learning_rate": 5.212585114276614e-06, |
|
"loss": 0.1275, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.07912687585266, |
|
"grad_norm": 0.6625434160232544, |
|
"learning_rate": 5.184692563515104e-06, |
|
"loss": 0.1284, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.081855388813097, |
|
"grad_norm": 0.6756749153137207, |
|
"learning_rate": 5.156848699360719e-06, |
|
"loss": 0.1256, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.0845839017735335, |
|
"grad_norm": 0.8829818964004517, |
|
"learning_rate": 5.129053803337181e-06, |
|
"loss": 0.1252, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.08731241473397, |
|
"grad_norm": 0.6200577020645142, |
|
"learning_rate": 5.101308156473104e-06, |
|
"loss": 0.1275, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.0900409276944067, |
|
"grad_norm": 1.5605920553207397, |
|
"learning_rate": 5.073612039299157e-06, |
|
"loss": 0.1253, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.092769440654843, |
|
"grad_norm": 0.7764895558357239, |
|
"learning_rate": 5.045965731845223e-06, |
|
"loss": 0.1324, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.0954979536152796, |
|
"grad_norm": 0.6887750029563904, |
|
"learning_rate": 5.018369513637567e-06, |
|
"loss": 0.1315, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.098226466575716, |
|
"grad_norm": 1.163913607597351, |
|
"learning_rate": 4.990823663696013e-06, |
|
"loss": 0.1267, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.100954979536153, |
|
"grad_norm": 0.9275516271591187, |
|
"learning_rate": 4.963328460531127e-06, |
|
"loss": 0.1247, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.1036834924965895, |
|
"grad_norm": 0.8884099721908569, |
|
"learning_rate": 4.9358841821413775e-06, |
|
"loss": 0.1282, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.106412005457026, |
|
"grad_norm": 0.791497528553009, |
|
"learning_rate": 4.908491106010368e-06, |
|
"loss": 0.1258, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.1091405184174623, |
|
"grad_norm": 0.7595200538635254, |
|
"learning_rate": 4.881149509103993e-06, |
|
"loss": 0.13, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.111869031377899, |
|
"grad_norm": 0.7046754956245422, |
|
"learning_rate": 4.853859667867641e-06, |
|
"loss": 0.1247, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.1145975443383356, |
|
"grad_norm": 0.8449010848999023, |
|
"learning_rate": 4.826621858223431e-06, |
|
"loss": 0.127, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.117326057298772, |
|
"grad_norm": 0.6766776442527771, |
|
"learning_rate": 4.799436355567391e-06, |
|
"loss": 0.1286, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.120054570259209, |
|
"grad_norm": 0.8445289134979248, |
|
"learning_rate": 4.772303434766669e-06, |
|
"loss": 0.1266, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.1227830832196455, |
|
"grad_norm": 0.6168569326400757, |
|
"learning_rate": 4.745223370156797e-06, |
|
"loss": 0.1261, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.1255115961800817, |
|
"grad_norm": 0.8796712756156921, |
|
"learning_rate": 4.7181964355388695e-06, |
|
"loss": 0.1304, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.1282401091405183, |
|
"grad_norm": 0.7083600759506226, |
|
"learning_rate": 4.691222904176791e-06, |
|
"loss": 0.1264, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.130968622100955, |
|
"grad_norm": 0.8051674962043762, |
|
"learning_rate": 4.664303048794533e-06, |
|
"loss": 0.1262, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.1336971350613916, |
|
"grad_norm": 0.6285784840583801, |
|
"learning_rate": 4.63743714157335e-06, |
|
"loss": 0.1274, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.136425648021828, |
|
"grad_norm": 0.9793753027915955, |
|
"learning_rate": 4.610625454149033e-06, |
|
"loss": 0.1244, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.139154160982265, |
|
"grad_norm": 0.7283981442451477, |
|
"learning_rate": 4.583868257609171e-06, |
|
"loss": 0.1256, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.141882673942701, |
|
"grad_norm": 0.8791332244873047, |
|
"learning_rate": 4.55716582249042e-06, |
|
"loss": 0.1239, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.1446111869031377, |
|
"grad_norm": 0.7714548110961914, |
|
"learning_rate": 4.530518418775734e-06, |
|
"loss": 0.1269, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.1473396998635743, |
|
"grad_norm": 0.7238229513168335, |
|
"learning_rate": 4.50392631589166e-06, |
|
"loss": 0.1304, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.150068212824011, |
|
"grad_norm": 0.7066569924354553, |
|
"learning_rate": 4.477389782705628e-06, |
|
"loss": 0.128, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.1527967257844476, |
|
"grad_norm": 0.7357354760169983, |
|
"learning_rate": 4.4509090875231865e-06, |
|
"loss": 0.1242, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.155525238744884, |
|
"grad_norm": 0.6796067357063293, |
|
"learning_rate": 4.424484498085335e-06, |
|
"loss": 0.1262, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.1582537517053204, |
|
"grad_norm": 0.6906223893165588, |
|
"learning_rate": 4.398116281565794e-06, |
|
"loss": 0.1253, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.160982264665757, |
|
"grad_norm": 0.7198973894119263, |
|
"learning_rate": 4.371804704568309e-06, |
|
"loss": 0.1269, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.1637107776261937, |
|
"grad_norm": 0.6786914467811584, |
|
"learning_rate": 4.345550033123954e-06, |
|
"loss": 0.1252, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.1664392905866303, |
|
"grad_norm": 0.6092426776885986, |
|
"learning_rate": 4.319352532688444e-06, |
|
"loss": 0.125, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.169167803547067, |
|
"grad_norm": 0.6962175369262695, |
|
"learning_rate": 4.293212468139447e-06, |
|
"loss": 0.1279, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.1718963165075036, |
|
"grad_norm": 0.5801219344139099, |
|
"learning_rate": 4.267130103773911e-06, |
|
"loss": 0.1253, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.17462482946794, |
|
"grad_norm": 0.7044116258621216, |
|
"learning_rate": 4.241105703305388e-06, |
|
"loss": 0.1269, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.1773533424283764, |
|
"grad_norm": 0.8102765083312988, |
|
"learning_rate": 4.2151395298613675e-06, |
|
"loss": 0.1262, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.180081855388813, |
|
"grad_norm": 0.6070725321769714, |
|
"learning_rate": 4.189231845980618e-06, |
|
"loss": 0.1231, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.1828103683492497, |
|
"grad_norm": 0.7759428024291992, |
|
"learning_rate": 4.163382913610533e-06, |
|
"loss": 0.1281, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1855388813096863, |
|
"grad_norm": 0.8558132648468018, |
|
"learning_rate": 4.137592994104479e-06, |
|
"loss": 0.1263, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.188267394270123, |
|
"grad_norm": 0.6967403888702393, |
|
"learning_rate": 4.111862348219158e-06, |
|
"loss": 0.1261, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.190995907230559, |
|
"grad_norm": 1.0783143043518066, |
|
"learning_rate": 4.086191236111964e-06, |
|
"loss": 0.1218, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.193724420190996, |
|
"grad_norm": 1.0875624418258667, |
|
"learning_rate": 4.060579917338362e-06, |
|
"loss": 0.1274, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.1964529331514324, |
|
"grad_norm": 0.8018985390663147, |
|
"learning_rate": 4.035028650849255e-06, |
|
"loss": 0.1224, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.199181446111869, |
|
"grad_norm": 1.0255545377731323, |
|
"learning_rate": 4.009537694988372e-06, |
|
"loss": 0.1262, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.2019099590723057, |
|
"grad_norm": 0.9814453721046448, |
|
"learning_rate": 3.984107307489652e-06, |
|
"loss": 0.1288, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.2046384720327423, |
|
"grad_norm": 0.7587682008743286, |
|
"learning_rate": 3.958737745474638e-06, |
|
"loss": 0.1234, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.2073669849931785, |
|
"grad_norm": 0.8585322499275208, |
|
"learning_rate": 3.933429265449882e-06, |
|
"loss": 0.1239, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.210095497953615, |
|
"grad_norm": 0.9962207674980164, |
|
"learning_rate": 3.908182123304344e-06, |
|
"loss": 0.1284, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.212824010914052, |
|
"grad_norm": 0.7775647044181824, |
|
"learning_rate": 3.882996574306818e-06, |
|
"loss": 0.1263, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.2155525238744884, |
|
"grad_norm": 1.0317277908325195, |
|
"learning_rate": 3.857872873103322e-06, |
|
"loss": 0.1285, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.218281036834925, |
|
"grad_norm": 0.9878216981887817, |
|
"learning_rate": 3.832811273714569e-06, |
|
"loss": 0.1263, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.2210095497953617, |
|
"grad_norm": 0.7432425022125244, |
|
"learning_rate": 3.807812029533362e-06, |
|
"loss": 0.1255, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.223738062755798, |
|
"grad_norm": 0.7971869111061096, |
|
"learning_rate": 3.78287539332203e-06, |
|
"loss": 0.1254, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.2264665757162345, |
|
"grad_norm": 1.0156506299972534, |
|
"learning_rate": 3.7580016172099067e-06, |
|
"loss": 0.1259, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.229195088676671, |
|
"grad_norm": 0.7717655897140503, |
|
"learning_rate": 3.7331909526907527e-06, |
|
"loss": 0.1221, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.231923601637108, |
|
"grad_norm": 0.6119164824485779, |
|
"learning_rate": 3.708443650620206e-06, |
|
"loss": 0.1256, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.2346521145975444, |
|
"grad_norm": 1.200944423675537, |
|
"learning_rate": 3.6837599612132826e-06, |
|
"loss": 0.1287, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.237380627557981, |
|
"grad_norm": 1.009369969367981, |
|
"learning_rate": 3.659140134041812e-06, |
|
"loss": 0.1249, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.2401091405184177, |
|
"grad_norm": 0.6488488912582397, |
|
"learning_rate": 3.6345844180319157e-06, |
|
"loss": 0.1231, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.242837653478854, |
|
"grad_norm": 0.8771604299545288, |
|
"learning_rate": 3.6100930614615204e-06, |
|
"loss": 0.1248, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.2455661664392905, |
|
"grad_norm": 1.1320050954818726, |
|
"learning_rate": 3.5856663119578174e-06, |
|
"loss": 0.1251, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.248294679399727, |
|
"grad_norm": 1.1242350339889526, |
|
"learning_rate": 3.5613044164947617e-06, |
|
"loss": 0.1255, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.251023192360164, |
|
"grad_norm": 0.7258925437927246, |
|
"learning_rate": 3.5370076213905904e-06, |
|
"loss": 0.1252, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.2537517053206004, |
|
"grad_norm": 0.844993531703949, |
|
"learning_rate": 3.5127761723053313e-06, |
|
"loss": 0.1238, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.2564802182810366, |
|
"grad_norm": 1.045020341873169, |
|
"learning_rate": 3.4886103142382944e-06, |
|
"loss": 0.1237, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.2592087312414733, |
|
"grad_norm": 0.6890770196914673, |
|
"learning_rate": 3.46451029152562e-06, |
|
"loss": 0.1273, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.26193724420191, |
|
"grad_norm": 0.7367672324180603, |
|
"learning_rate": 3.440476347837811e-06, |
|
"loss": 0.1264, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.2646657571623465, |
|
"grad_norm": 1.0174280405044556, |
|
"learning_rate": 3.41650872617724e-06, |
|
"loss": 0.1255, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.267394270122783, |
|
"grad_norm": 0.8533175587654114, |
|
"learning_rate": 3.392607668875718e-06, |
|
"loss": 0.1269, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.27012278308322, |
|
"grad_norm": 0.7625150680541992, |
|
"learning_rate": 3.3687734175920505e-06, |
|
"loss": 0.1249, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.2728512960436564, |
|
"grad_norm": 0.735122799873352, |
|
"learning_rate": 3.3450062133095572e-06, |
|
"loss": 0.1243, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.2755798090040926, |
|
"grad_norm": 0.7749839425086975, |
|
"learning_rate": 3.321306296333673e-06, |
|
"loss": 0.1246, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.2783083219645293, |
|
"grad_norm": 0.7999988794326782, |
|
"learning_rate": 3.29767390628951e-06, |
|
"loss": 0.1253, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.281036834924966, |
|
"grad_norm": 0.8466626405715942, |
|
"learning_rate": 3.274109282119413e-06, |
|
"loss": 0.1256, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.2837653478854025, |
|
"grad_norm": 0.7036960124969482, |
|
"learning_rate": 3.2506126620805666e-06, |
|
"loss": 0.1256, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.286493860845839, |
|
"grad_norm": 0.7315667271614075, |
|
"learning_rate": 3.2271842837425917e-06, |
|
"loss": 0.124, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.2892223738062754, |
|
"grad_norm": 0.7803946733474731, |
|
"learning_rate": 3.203824383985108e-06, |
|
"loss": 0.1247, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.291950886766712, |
|
"grad_norm": 0.6711841821670532, |
|
"learning_rate": 3.180533198995379e-06, |
|
"loss": 0.1247, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.2946793997271486, |
|
"grad_norm": 0.6884995698928833, |
|
"learning_rate": 3.157310964265903e-06, |
|
"loss": 0.1248, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.2974079126875853, |
|
"grad_norm": 0.6317690014839172, |
|
"learning_rate": 3.134157914592032e-06, |
|
"loss": 0.1233, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.300136425648022, |
|
"grad_norm": 0.7348897457122803, |
|
"learning_rate": 3.1110742840696063e-06, |
|
"loss": 0.1223, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.3028649386084585, |
|
"grad_norm": 0.8744590282440186, |
|
"learning_rate": 3.088060306092582e-06, |
|
"loss": 0.1242, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.305593451568895, |
|
"grad_norm": 0.6979570984840393, |
|
"learning_rate": 3.0651162133506707e-06, |
|
"loss": 0.1229, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.3083219645293314, |
|
"grad_norm": 0.6471583843231201, |
|
"learning_rate": 3.042242237826991e-06, |
|
"loss": 0.1249, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.311050477489768, |
|
"grad_norm": 0.8014260530471802, |
|
"learning_rate": 3.0194386107957175e-06, |
|
"loss": 0.1248, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.3137789904502046, |
|
"grad_norm": 0.676422119140625, |
|
"learning_rate": 2.996705562819747e-06, |
|
"loss": 0.1243, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.3165075034106413, |
|
"grad_norm": 0.707206666469574, |
|
"learning_rate": 2.9740433237483667e-06, |
|
"loss": 0.1249, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.319236016371078, |
|
"grad_norm": 0.6779247522354126, |
|
"learning_rate": 2.951452122714926e-06, |
|
"loss": 0.1235, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.321964529331514, |
|
"grad_norm": 0.6838175654411316, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.1257, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.3246930422919507, |
|
"grad_norm": 0.894402027130127, |
|
"learning_rate": 2.906483747701705e-06, |
|
"loss": 0.1229, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.3274215552523874, |
|
"grad_norm": 0.6708566546440125, |
|
"learning_rate": 2.88410702838814e-06, |
|
"loss": 0.1202, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.330150068212824, |
|
"grad_norm": 0.7092727422714233, |
|
"learning_rate": 2.861802256440348e-06, |
|
"loss": 0.1242, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.3328785811732606, |
|
"grad_norm": 0.9429033994674683, |
|
"learning_rate": 2.8395696573774034e-06, |
|
"loss": 0.1258, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.3356070941336973, |
|
"grad_norm": 0.799321711063385, |
|
"learning_rate": 2.8174094559886535e-06, |
|
"loss": 0.1222, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.338335607094134, |
|
"grad_norm": 0.5677041411399841, |
|
"learning_rate": 2.795321876331446e-06, |
|
"loss": 0.1237, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.34106412005457, |
|
"grad_norm": 0.7222699522972107, |
|
"learning_rate": 2.773307141728867e-06, |
|
"loss": 0.1215, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.3437926330150067, |
|
"grad_norm": 0.7362144589424133, |
|
"learning_rate": 2.751365474767479e-06, |
|
"loss": 0.1243, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.3465211459754434, |
|
"grad_norm": 0.7244157791137695, |
|
"learning_rate": 2.729497097295075e-06, |
|
"loss": 0.1214, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.34924965893588, |
|
"grad_norm": 0.6958690881729126, |
|
"learning_rate": 2.70770223041843e-06, |
|
"loss": 0.1239, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.3519781718963166, |
|
"grad_norm": 0.6821141839027405, |
|
"learning_rate": 2.6859810945010687e-06, |
|
"loss": 0.1255, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.354706684856753, |
|
"grad_norm": 0.5896955132484436, |
|
"learning_rate": 2.6643339091610376e-06, |
|
"loss": 0.1184, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.3574351978171895, |
|
"grad_norm": 0.6613571047782898, |
|
"learning_rate": 2.642760893268684e-06, |
|
"loss": 0.1226, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.360163710777626, |
|
"grad_norm": 0.6776670813560486, |
|
"learning_rate": 2.621262264944444e-06, |
|
"loss": 0.1229, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.3628922237380627, |
|
"grad_norm": 0.6587377786636353, |
|
"learning_rate": 2.5998382415566258e-06, |
|
"loss": 0.1234, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.3656207366984994, |
|
"grad_norm": 0.7616446614265442, |
|
"learning_rate": 2.5784890397192395e-06, |
|
"loss": 0.1235, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.368349249658936, |
|
"grad_norm": 0.6983022093772888, |
|
"learning_rate": 2.55721487528978e-06, |
|
"loss": 0.1304, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.3710777626193726, |
|
"grad_norm": 0.5825099349021912, |
|
"learning_rate": 2.5360159633670456e-06, |
|
"loss": 0.1211, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.373806275579809, |
|
"grad_norm": 0.7340635657310486, |
|
"learning_rate": 2.514892518288988e-06, |
|
"loss": 0.123, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.3765347885402455, |
|
"grad_norm": 0.6723161339759827, |
|
"learning_rate": 2.4938447536305243e-06, |
|
"loss": 0.1257, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.379263301500682, |
|
"grad_norm": 0.6565150618553162, |
|
"learning_rate": 2.4728728822013683e-06, |
|
"loss": 0.1219, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.3819918144611187, |
|
"grad_norm": 0.609505295753479, |
|
"learning_rate": 2.451977116043911e-06, |
|
"loss": 0.1239, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.3847203274215554, |
|
"grad_norm": 0.6170854568481445, |
|
"learning_rate": 2.431157666431052e-06, |
|
"loss": 0.1265, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.3874488403819916, |
|
"grad_norm": 0.6223445534706116, |
|
"learning_rate": 2.410414743864059e-06, |
|
"loss": 0.1235, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.390177353342428, |
|
"grad_norm": 0.6010127663612366, |
|
"learning_rate": 2.3897485580704684e-06, |
|
"loss": 0.122, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.392905866302865, |
|
"grad_norm": 0.6026211380958557, |
|
"learning_rate": 2.369159318001937e-06, |
|
"loss": 0.1219, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.3956343792633015, |
|
"grad_norm": 0.6825677752494812, |
|
"learning_rate": 2.348647231832131e-06, |
|
"loss": 0.1213, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.398362892223738, |
|
"grad_norm": 0.6073306202888489, |
|
"learning_rate": 2.3282125069546437e-06, |
|
"loss": 0.1245, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.4010914051841747, |
|
"grad_norm": 0.8288139700889587, |
|
"learning_rate": 2.30785534998088e-06, |
|
"loss": 0.1229, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.4038199181446114, |
|
"grad_norm": 0.6051532626152039, |
|
"learning_rate": 2.2875759667379616e-06, |
|
"loss": 0.1225, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.4065484311050476, |
|
"grad_norm": 0.6381723284721375, |
|
"learning_rate": 2.267374562266662e-06, |
|
"loss": 0.1237, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.409276944065484, |
|
"grad_norm": 0.6321126222610474, |
|
"learning_rate": 2.2472513408193385e-06, |
|
"loss": 0.124, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.412005457025921, |
|
"grad_norm": 0.793342113494873, |
|
"learning_rate": 2.227206505857834e-06, |
|
"loss": 0.1217, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.4147339699863575, |
|
"grad_norm": 0.6574937105178833, |
|
"learning_rate": 2.207240260051453e-06, |
|
"loss": 0.1217, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.417462482946794, |
|
"grad_norm": 0.6402091979980469, |
|
"learning_rate": 2.1873528052749094e-06, |
|
"loss": 0.1197, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.4201909959072307, |
|
"grad_norm": 0.5669599771499634, |
|
"learning_rate": 2.167544342606256e-06, |
|
"loss": 0.1234, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.422919508867667, |
|
"grad_norm": 0.6423009037971497, |
|
"learning_rate": 2.147815072324886e-06, |
|
"loss": 0.122, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.4256480218281036, |
|
"grad_norm": 0.9944397807121277, |
|
"learning_rate": 2.1281651939094996e-06, |
|
"loss": 0.1221, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.42837653478854, |
|
"grad_norm": 0.7023611068725586, |
|
"learning_rate": 2.1085949060360654e-06, |
|
"loss": 0.1221, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.431105047748977, |
|
"grad_norm": 0.6616829037666321, |
|
"learning_rate": 2.089104406575837e-06, |
|
"loss": 0.121, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.4338335607094135, |
|
"grad_norm": 0.6649833917617798, |
|
"learning_rate": 2.0696938925933505e-06, |
|
"loss": 0.1244, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.43656207366985, |
|
"grad_norm": 0.9376471042633057, |
|
"learning_rate": 2.0503635603444094e-06, |
|
"loss": 0.1228, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.4392905866302863, |
|
"grad_norm": 0.6371198892593384, |
|
"learning_rate": 2.0311136052741274e-06, |
|
"loss": 0.1211, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.442019099590723, |
|
"grad_norm": 0.6163613796234131, |
|
"learning_rate": 2.0119442220149356e-06, |
|
"loss": 0.1236, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.4447476125511596, |
|
"grad_norm": 0.6523067951202393, |
|
"learning_rate": 1.9928556043846215e-06, |
|
"loss": 0.1244, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.447476125511596, |
|
"grad_norm": 0.7866846919059753, |
|
"learning_rate": 1.9738479453843685e-06, |
|
"loss": 0.1234, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.450204638472033, |
|
"grad_norm": 0.7902270555496216, |
|
"learning_rate": 1.9549214371968008e-06, |
|
"loss": 0.1235, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.4529331514324695, |
|
"grad_norm": 0.8805884718894958, |
|
"learning_rate": 1.936076271184044e-06, |
|
"loss": 0.1234, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.4556616643929057, |
|
"grad_norm": 0.6005100011825562, |
|
"learning_rate": 1.917312637885791e-06, |
|
"loss": 0.1221, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4583901773533423, |
|
"grad_norm": 0.7518947720527649, |
|
"learning_rate": 1.898630727017371e-06, |
|
"loss": 0.1211, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.461118690313779, |
|
"grad_norm": 1.2557408809661865, |
|
"learning_rate": 1.8800307274678364e-06, |
|
"loss": 0.1203, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.4638472032742156, |
|
"grad_norm": 0.7856685519218445, |
|
"learning_rate": 1.861512827298051e-06, |
|
"loss": 0.1246, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.466575716234652, |
|
"grad_norm": 0.7623482346534729, |
|
"learning_rate": 1.8430772137387853e-06, |
|
"loss": 0.1231, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.469304229195089, |
|
"grad_norm": 0.6743024587631226, |
|
"learning_rate": 1.8247240731888293e-06, |
|
"loss": 0.1211, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.472032742155525, |
|
"grad_norm": 0.8061301112174988, |
|
"learning_rate": 1.8064535912131032e-06, |
|
"loss": 0.12, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.4747612551159617, |
|
"grad_norm": 0.8119410872459412, |
|
"learning_rate": 1.7882659525407842e-06, |
|
"loss": 0.1208, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.4774897680763983, |
|
"grad_norm": 0.6645972728729248, |
|
"learning_rate": 1.7701613410634367e-06, |
|
"loss": 0.1204, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.480218281036835, |
|
"grad_norm": 0.6861996650695801, |
|
"learning_rate": 1.752139939833154e-06, |
|
"loss": 0.1228, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.4829467939972716, |
|
"grad_norm": 0.6746291518211365, |
|
"learning_rate": 1.7342019310607062e-06, |
|
"loss": 0.1232, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.485675306957708, |
|
"grad_norm": 0.7837573289871216, |
|
"learning_rate": 1.7163474961137029e-06, |
|
"loss": 0.1226, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.488403819918145, |
|
"grad_norm": 0.7806301712989807, |
|
"learning_rate": 1.6985768155147498e-06, |
|
"loss": 0.1196, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.491132332878581, |
|
"grad_norm": 0.5961025953292847, |
|
"learning_rate": 1.6808900689396334e-06, |
|
"loss": 0.1207, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.4938608458390177, |
|
"grad_norm": 0.5994763374328613, |
|
"learning_rate": 1.6632874352154982e-06, |
|
"loss": 0.1221, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.4965893587994543, |
|
"grad_norm": 0.7007748484611511, |
|
"learning_rate": 1.645769092319045e-06, |
|
"loss": 0.1219, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.499317871759891, |
|
"grad_norm": 0.6721235513687134, |
|
"learning_rate": 1.6283352173747148e-06, |
|
"loss": 0.1207, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.5020463847203276, |
|
"grad_norm": 0.648476779460907, |
|
"learning_rate": 1.6109859866529253e-06, |
|
"loss": 0.1216, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.504774897680764, |
|
"grad_norm": 0.6167169213294983, |
|
"learning_rate": 1.5937215755682667e-06, |
|
"loss": 0.1221, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.5075034106412004, |
|
"grad_norm": 0.6264228820800781, |
|
"learning_rate": 1.5765421586777285e-06, |
|
"loss": 0.1197, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.510231923601637, |
|
"grad_norm": 0.6109753847122192, |
|
"learning_rate": 1.559447909678954e-06, |
|
"loss": 0.1212, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.5129604365620737, |
|
"grad_norm": 0.7094171643257141, |
|
"learning_rate": 1.5424390014084644e-06, |
|
"loss": 0.1216, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.5156889495225103, |
|
"grad_norm": 0.9089038968086243, |
|
"learning_rate": 1.5255156058399124e-06, |
|
"loss": 0.1206, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.518417462482947, |
|
"grad_norm": 0.6656555533409119, |
|
"learning_rate": 1.5086778940823544e-06, |
|
"loss": 0.1211, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.5211459754433836, |
|
"grad_norm": 0.6644884943962097, |
|
"learning_rate": 1.4919260363785215e-06, |
|
"loss": 0.1239, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.52387448840382, |
|
"grad_norm": 0.5716986060142517, |
|
"learning_rate": 1.4752602021030794e-06, |
|
"loss": 0.1204, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5266030013642564, |
|
"grad_norm": 0.9487411975860596, |
|
"learning_rate": 1.4586805597609333e-06, |
|
"loss": 0.1214, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.529331514324693, |
|
"grad_norm": 0.8672340512275696, |
|
"learning_rate": 1.4421872769855262e-06, |
|
"loss": 0.1228, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.5320600272851297, |
|
"grad_norm": 0.6485180854797363, |
|
"learning_rate": 1.4257805205371233e-06, |
|
"loss": 0.123, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.5347885402455663, |
|
"grad_norm": 0.593885600566864, |
|
"learning_rate": 1.409460456301147e-06, |
|
"loss": 0.1214, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.5375170532060025, |
|
"grad_norm": 0.5803414583206177, |
|
"learning_rate": 1.3932272492864984e-06, |
|
"loss": 0.1229, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.540245566166439, |
|
"grad_norm": 0.6213256120681763, |
|
"learning_rate": 1.3770810636238685e-06, |
|
"loss": 0.1199, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.542974079126876, |
|
"grad_norm": 0.8107399344444275, |
|
"learning_rate": 1.3610220625641002e-06, |
|
"loss": 0.1205, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.5457025920873124, |
|
"grad_norm": 0.5576758980751038, |
|
"learning_rate": 1.3450504084765381e-06, |
|
"loss": 0.122, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.548431105047749, |
|
"grad_norm": 0.6477549076080322, |
|
"learning_rate": 1.3291662628473634e-06, |
|
"loss": 0.1225, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.5511596180081857, |
|
"grad_norm": 0.5818179249763489, |
|
"learning_rate": 1.313369786277987e-06, |
|
"loss": 0.1191, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.5538881309686223, |
|
"grad_norm": 0.7482567429542542, |
|
"learning_rate": 1.2976611384834148e-06, |
|
"loss": 0.1205, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.5566166439290585, |
|
"grad_norm": 0.6153425574302673, |
|
"learning_rate": 1.2820404782906315e-06, |
|
"loss": 0.1218, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.559345156889495, |
|
"grad_norm": 0.6472384333610535, |
|
"learning_rate": 1.266507963636997e-06, |
|
"loss": 0.1216, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.562073669849932, |
|
"grad_norm": 0.6243811845779419, |
|
"learning_rate": 1.2510637515686497e-06, |
|
"loss": 0.1187, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.5648021828103684, |
|
"grad_norm": 0.643061637878418, |
|
"learning_rate": 1.2357079982389197e-06, |
|
"loss": 0.1197, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.567530695770805, |
|
"grad_norm": 0.592107892036438, |
|
"learning_rate": 1.2204408589067462e-06, |
|
"loss": 0.1231, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.5702592087312413, |
|
"grad_norm": 0.5936471819877625, |
|
"learning_rate": 1.2052624879351105e-06, |
|
"loss": 0.1199, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.572987721691678, |
|
"grad_norm": 0.6120553612709045, |
|
"learning_rate": 1.190173038789476e-06, |
|
"loss": 0.1229, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.5757162346521145, |
|
"grad_norm": 0.5819773077964783, |
|
"learning_rate": 1.175172664036235e-06, |
|
"loss": 0.1205, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.578444747612551, |
|
"grad_norm": 0.5850197076797485, |
|
"learning_rate": 1.1602615153411666e-06, |
|
"loss": 0.1188, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.581173260572988, |
|
"grad_norm": 0.5791446566581726, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"loss": 0.1202, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.5839017735334244, |
|
"grad_norm": 0.6168528199195862, |
|
"learning_rate": 1.1307074982764022e-06, |
|
"loss": 0.1233, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.586630286493861, |
|
"grad_norm": 0.6764042973518372, |
|
"learning_rate": 1.116064928721442e-06, |
|
"loss": 0.1218, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.5893587994542973, |
|
"grad_norm": 0.6462754011154175, |
|
"learning_rate": 1.1015121828511033e-06, |
|
"loss": 0.1189, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.592087312414734, |
|
"grad_norm": 0.557019054889679, |
|
"learning_rate": 1.0870494078052796e-06, |
|
"loss": 0.1194, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.5948158253751705, |
|
"grad_norm": 0.6844297647476196, |
|
"learning_rate": 1.0726767498141877e-06, |
|
"loss": 0.1252, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.597544338335607, |
|
"grad_norm": 0.6181948781013489, |
|
"learning_rate": 1.0583943541968856e-06, |
|
"loss": 0.1204, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.600272851296044, |
|
"grad_norm": 0.6468029022216797, |
|
"learning_rate": 1.044202365359811e-06, |
|
"loss": 0.1213, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.60300136425648, |
|
"grad_norm": 0.607456386089325, |
|
"learning_rate": 1.0301009267953145e-06, |
|
"loss": 0.119, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.6057298772169166, |
|
"grad_norm": 0.6066553592681885, |
|
"learning_rate": 1.0160901810802114e-06, |
|
"loss": 0.1223, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.6084583901773533, |
|
"grad_norm": 0.6003243923187256, |
|
"learning_rate": 1.0021702698743408e-06, |
|
"loss": 0.1239, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.61118690313779, |
|
"grad_norm": 0.5733383297920227, |
|
"learning_rate": 9.883413339191295e-07, |
|
"loss": 0.1206, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.6139154160982265, |
|
"grad_norm": 0.6340444087982178, |
|
"learning_rate": 9.746035130361741e-07, |
|
"loss": 0.1162, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.616643929058663, |
|
"grad_norm": 0.5557238459587097, |
|
"learning_rate": 9.609569461258262e-07, |
|
"loss": 0.1197, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.6193724420191, |
|
"grad_norm": 0.590971827507019, |
|
"learning_rate": 9.474017711657835e-07, |
|
"loss": 0.1203, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.622100954979536, |
|
"grad_norm": 0.6239796280860901, |
|
"learning_rate": 9.339381252097001e-07, |
|
"loss": 0.1248, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.6248294679399726, |
|
"grad_norm": 0.6050336360931396, |
|
"learning_rate": 9.205661443857994e-07, |
|
"loss": 0.1213, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.6275579809004093, |
|
"grad_norm": 0.564030647277832, |
|
"learning_rate": 9.072859638954956e-07, |
|
"loss": 0.1189, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.630286493860846, |
|
"grad_norm": 0.5844472646713257, |
|
"learning_rate": 8.940977180120247e-07, |
|
"loss": 0.121, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.6330150068212825, |
|
"grad_norm": 0.6036733984947205, |
|
"learning_rate": 8.810015400790994e-07, |
|
"loss": 0.1203, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.6357435197817187, |
|
"grad_norm": 0.619272768497467, |
|
"learning_rate": 8.67997562509546e-07, |
|
"loss": 0.12, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.6384720327421554, |
|
"grad_norm": 0.5766704678535461, |
|
"learning_rate": 8.550859167839665e-07, |
|
"loss": 0.1192, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.641200545702592, |
|
"grad_norm": 0.6223350763320923, |
|
"learning_rate": 8.42266733449425e-07, |
|
"loss": 0.1218, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.6439290586630286, |
|
"grad_norm": 0.569657564163208, |
|
"learning_rate": 8.295401421181126e-07, |
|
"loss": 0.1239, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.6466575716234653, |
|
"grad_norm": 0.6090306639671326, |
|
"learning_rate": 8.169062714660347e-07, |
|
"loss": 0.1213, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.649386084583902, |
|
"grad_norm": 0.6015664935112, |
|
"learning_rate": 8.043652492317256e-07, |
|
"loss": 0.122, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.6521145975443385, |
|
"grad_norm": 0.5519795417785645, |
|
"learning_rate": 7.919172022149458e-07, |
|
"loss": 0.1204, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.6548431105047747, |
|
"grad_norm": 0.6051272749900818, |
|
"learning_rate": 7.795622562753957e-07, |
|
"loss": 0.1175, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.6575716234652114, |
|
"grad_norm": 0.5712152123451233, |
|
"learning_rate": 7.673005363314578e-07, |
|
"loss": 0.1234, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.660300136425648, |
|
"grad_norm": 0.5520964860916138, |
|
"learning_rate": 7.551321663589229e-07, |
|
"loss": 0.1198, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6630286493860846, |
|
"grad_norm": 0.711789608001709, |
|
"learning_rate": 7.430572693897342e-07, |
|
"loss": 0.1188, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.6657571623465213, |
|
"grad_norm": 0.6316415667533875, |
|
"learning_rate": 7.310759675107515e-07, |
|
"loss": 0.1197, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.6684856753069575, |
|
"grad_norm": 0.6260297298431396, |
|
"learning_rate": 7.19188381862519e-07, |
|
"loss": 0.1195, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.6712141882673945, |
|
"grad_norm": 0.5559263229370117, |
|
"learning_rate": 7.073946326380243e-07, |
|
"loss": 0.1211, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.6739427012278307, |
|
"grad_norm": 0.5541836619377136, |
|
"learning_rate": 6.956948390814977e-07, |
|
"loss": 0.1206, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.6766712141882674, |
|
"grad_norm": 0.5544542074203491, |
|
"learning_rate": 6.840891194872112e-07, |
|
"loss": 0.1212, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.679399727148704, |
|
"grad_norm": 0.5722479224205017, |
|
"learning_rate": 6.725775911982602e-07, |
|
"loss": 0.1162, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.6821282401091406, |
|
"grad_norm": 0.58185213804245, |
|
"learning_rate": 6.61160370605397e-07, |
|
"loss": 0.1204, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.6848567530695773, |
|
"grad_norm": 0.5623180270195007, |
|
"learning_rate": 6.498375731458529e-07, |
|
"loss": 0.1197, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.6875852660300135, |
|
"grad_norm": 0.7158675193786621, |
|
"learning_rate": 6.386093133021554e-07, |
|
"loss": 0.12, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.69031377899045, |
|
"grad_norm": 0.5737314224243164, |
|
"learning_rate": 6.274757046009871e-07, |
|
"loss": 0.12, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.6930422919508867, |
|
"grad_norm": 0.5828775763511658, |
|
"learning_rate": 6.164368596120351e-07, |
|
"loss": 0.1187, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.6957708049113234, |
|
"grad_norm": 0.6204085946083069, |
|
"learning_rate": 6.054928899468427e-07, |
|
"loss": 0.1183, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.69849931787176, |
|
"grad_norm": 0.6081199645996094, |
|
"learning_rate": 5.946439062576903e-07, |
|
"loss": 0.1198, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.701227830832196, |
|
"grad_norm": 0.5731498599052429, |
|
"learning_rate": 5.83890018236476e-07, |
|
"loss": 0.1194, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.7039563437926333, |
|
"grad_norm": 0.5686942934989929, |
|
"learning_rate": 5.732313346136032e-07, |
|
"loss": 0.1209, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.7066848567530695, |
|
"grad_norm": 0.7291159629821777, |
|
"learning_rate": 5.626679631568832e-07, |
|
"loss": 0.1197, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.709413369713506, |
|
"grad_norm": 0.609981119632721, |
|
"learning_rate": 5.52200010670444e-07, |
|
"loss": 0.1171, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.7121418826739427, |
|
"grad_norm": 0.5696244239807129, |
|
"learning_rate": 5.418275829936537e-07, |
|
"loss": 0.1223, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.7148703956343794, |
|
"grad_norm": 0.5775014162063599, |
|
"learning_rate": 5.315507850000456e-07, |
|
"loss": 0.1208, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.717598908594816, |
|
"grad_norm": 0.5481632947921753, |
|
"learning_rate": 5.213697205962631e-07, |
|
"loss": 0.1217, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.720327421555252, |
|
"grad_norm": 0.6667028665542603, |
|
"learning_rate": 5.112844927210048e-07, |
|
"loss": 0.1161, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.723055934515689, |
|
"grad_norm": 0.5523772835731506, |
|
"learning_rate": 5.012952033439844e-07, |
|
"loss": 0.118, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.7257844474761255, |
|
"grad_norm": 0.5393189787864685, |
|
"learning_rate": 4.914019534649039e-07, |
|
"loss": 0.1234, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.728512960436562, |
|
"grad_norm": 0.5639728903770447, |
|
"learning_rate": 4.816048431124265e-07, |
|
"loss": 0.1194, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7312414733969987, |
|
"grad_norm": 0.6052538156509399, |
|
"learning_rate": 4.7190397134316946e-07, |
|
"loss": 0.122, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.733969986357435, |
|
"grad_norm": 0.5383861660957336, |
|
"learning_rate": 4.6229943624069963e-07, |
|
"loss": 0.1183, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.736698499317872, |
|
"grad_norm": 0.5372768640518188, |
|
"learning_rate": 4.5279133491454406e-07, |
|
"loss": 0.1174, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.739427012278308, |
|
"grad_norm": 0.5531887412071228, |
|
"learning_rate": 4.4337976349920763e-07, |
|
"loss": 0.1198, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.742155525238745, |
|
"grad_norm": 0.622898519039154, |
|
"learning_rate": 4.3406481715319916e-07, |
|
"loss": 0.1183, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.7448840381991815, |
|
"grad_norm": 0.5865097045898438, |
|
"learning_rate": 4.248465900580734e-07, |
|
"loss": 0.1235, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.747612551159618, |
|
"grad_norm": 0.5709042549133301, |
|
"learning_rate": 4.1572517541747294e-07, |
|
"loss": 0.1184, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.7503410641200547, |
|
"grad_norm": 0.5316457152366638, |
|
"learning_rate": 4.0670066545619224e-07, |
|
"loss": 0.1208, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.753069577080491, |
|
"grad_norm": 0.5730975866317749, |
|
"learning_rate": 3.9777315141923847e-07, |
|
"loss": 0.1203, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.7557980900409276, |
|
"grad_norm": 0.5913178324699402, |
|
"learning_rate": 3.889427235709153e-07, |
|
"loss": 0.1166, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.758526603001364, |
|
"grad_norm": 0.5924515724182129, |
|
"learning_rate": 3.802094711939075e-07, |
|
"loss": 0.1225, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.761255115961801, |
|
"grad_norm": 0.5406906604766846, |
|
"learning_rate": 3.715734825883766e-07, |
|
"loss": 0.1203, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.7639836289222375, |
|
"grad_norm": 0.5417434573173523, |
|
"learning_rate": 3.6303484507106965e-07, |
|
"loss": 0.1225, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.7667121418826737, |
|
"grad_norm": 0.5310923457145691, |
|
"learning_rate": 3.5459364497443696e-07, |
|
"loss": 0.1223, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.7694406548431107, |
|
"grad_norm": 0.60945725440979, |
|
"learning_rate": 3.462499676457598e-07, |
|
"loss": 0.1172, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.772169167803547, |
|
"grad_norm": 0.5810585021972656, |
|
"learning_rate": 3.38003897446284e-07, |
|
"loss": 0.1197, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.7748976807639836, |
|
"grad_norm": 0.5767588019371033, |
|
"learning_rate": 3.298555177503726e-07, |
|
"loss": 0.1206, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.77762619372442, |
|
"grad_norm": 0.5493221282958984, |
|
"learning_rate": 3.2180491094465414e-07, |
|
"loss": 0.1209, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.780354706684857, |
|
"grad_norm": 0.5434983968734741, |
|
"learning_rate": 3.138521584272003e-07, |
|
"loss": 0.1233, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.7830832196452935, |
|
"grad_norm": 0.6788725256919861, |
|
"learning_rate": 3.059973406066963e-07, |
|
"loss": 0.121, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.7858117326057297, |
|
"grad_norm": 0.5693413615226746, |
|
"learning_rate": 2.982405369016272e-07, |
|
"loss": 0.1204, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.7885402455661663, |
|
"grad_norm": 0.5602209568023682, |
|
"learning_rate": 2.905818257394799e-07, |
|
"loss": 0.1185, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.791268758526603, |
|
"grad_norm": 0.5665656328201294, |
|
"learning_rate": 2.830212845559466e-07, |
|
"loss": 0.1225, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.7939972714870396, |
|
"grad_norm": 0.5789375305175781, |
|
"learning_rate": 2.7555898979413796e-07, |
|
"loss": 0.1199, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.796725784447476, |
|
"grad_norm": 0.5932232141494751, |
|
"learning_rate": 2.6819501690382275e-07, |
|
"loss": 0.1193, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.799454297407913, |
|
"grad_norm": 0.6497074961662292, |
|
"learning_rate": 2.609294403406537e-07, |
|
"loss": 0.1217, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.8021828103683495, |
|
"grad_norm": 0.5614838004112244, |
|
"learning_rate": 2.537623335654127e-07, |
|
"loss": 0.1187, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.8049113233287857, |
|
"grad_norm": 0.5890393257141113, |
|
"learning_rate": 2.4669376904328244e-07, |
|
"loss": 0.1208, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.8076398362892223, |
|
"grad_norm": 0.5420589447021484, |
|
"learning_rate": 2.397238182430994e-07, |
|
"loss": 0.1171, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.810368349249659, |
|
"grad_norm": 0.5810146331787109, |
|
"learning_rate": 2.3285255163663535e-07, |
|
"loss": 0.1207, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.8130968622100956, |
|
"grad_norm": 0.6615646481513977, |
|
"learning_rate": 2.2608003869788786e-07, |
|
"loss": 0.1209, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.815825375170532, |
|
"grad_norm": 0.5754259824752808, |
|
"learning_rate": 2.1940634790238003e-07, |
|
"loss": 0.1208, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.8185538881309684, |
|
"grad_norm": 0.642939567565918, |
|
"learning_rate": 2.1283154672645522e-07, |
|
"loss": 0.1205, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.821282401091405, |
|
"grad_norm": 0.5582534074783325, |
|
"learning_rate": 2.063557016466111e-07, |
|
"loss": 0.1188, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.8240109140518417, |
|
"grad_norm": 0.5194653272628784, |
|
"learning_rate": 1.999788781388201e-07, |
|
"loss": 0.1202, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.8267394270122783, |
|
"grad_norm": 0.5354530215263367, |
|
"learning_rate": 1.9370114067785995e-07, |
|
"loss": 0.1196, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.829467939972715, |
|
"grad_norm": 0.5607388615608215, |
|
"learning_rate": 1.8752255273667752e-07, |
|
"loss": 0.1166, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.8321964529331516, |
|
"grad_norm": 0.5641687512397766, |
|
"learning_rate": 1.8144317678573497e-07, |
|
"loss": 0.1181, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.8349249658935882, |
|
"grad_norm": 0.5822216272354126, |
|
"learning_rate": 1.7546307429238129e-07, |
|
"loss": 0.1195, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.8376534788540244, |
|
"grad_norm": 0.547010600566864, |
|
"learning_rate": 1.6958230572023504e-07, |
|
"loss": 0.1192, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.840381991814461, |
|
"grad_norm": 0.6075326204299927, |
|
"learning_rate": 1.6380093052856482e-07, |
|
"loss": 0.1186, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.8431105047748977, |
|
"grad_norm": 0.5427007675170898, |
|
"learning_rate": 1.5811900717169537e-07, |
|
"loss": 0.122, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.8458390177353343, |
|
"grad_norm": 0.5267951488494873, |
|
"learning_rate": 1.5253659309841463e-07, |
|
"loss": 0.1196, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.848567530695771, |
|
"grad_norm": 0.5560564398765564, |
|
"learning_rate": 1.4705374475138978e-07, |
|
"loss": 0.121, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.851296043656207, |
|
"grad_norm": 0.7646809220314026, |
|
"learning_rate": 1.416705175666e-07, |
|
"loss": 0.12, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.854024556616644, |
|
"grad_norm": 0.5174649953842163, |
|
"learning_rate": 1.3638696597277678e-07, |
|
"loss": 0.1195, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.8567530695770804, |
|
"grad_norm": 0.549839198589325, |
|
"learning_rate": 1.3120314339084782e-07, |
|
"loss": 0.1179, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.859481582537517, |
|
"grad_norm": 0.6386695504188538, |
|
"learning_rate": 1.2611910223340408e-07, |
|
"loss": 0.1194, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.8622100954979537, |
|
"grad_norm": 0.5545169115066528, |
|
"learning_rate": 1.2113489390416565e-07, |
|
"loss": 0.1204, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.8649386084583903, |
|
"grad_norm": 0.5403316617012024, |
|
"learning_rate": 1.1625056879746133e-07, |
|
"loss": 0.1181, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.867667121418827, |
|
"grad_norm": 0.6179401278495789, |
|
"learning_rate": 1.1146617629772316e-07, |
|
"loss": 0.1206, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.870395634379263, |
|
"grad_norm": 0.5648332834243774, |
|
"learning_rate": 1.0678176477898372e-07, |
|
"loss": 0.1189, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.8731241473397, |
|
"grad_norm": 0.6361631751060486, |
|
"learning_rate": 1.0219738160438753e-07, |
|
"loss": 0.1212, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.8758526603001364, |
|
"grad_norm": 0.5742694735527039, |
|
"learning_rate": 9.771307312571254e-08, |
|
"loss": 0.1185, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.878581173260573, |
|
"grad_norm": 0.5447854995727539, |
|
"learning_rate": 9.332888468290168e-08, |
|
"loss": 0.1192, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.8813096862210097, |
|
"grad_norm": 0.5452158451080322, |
|
"learning_rate": 8.90448606036054e-08, |
|
"loss": 0.1182, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.884038199181446, |
|
"grad_norm": 0.5986908674240112, |
|
"learning_rate": 8.486104420272979e-08, |
|
"loss": 0.1213, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.8867667121418825, |
|
"grad_norm": 0.5244796276092529, |
|
"learning_rate": 8.077747778200474e-08, |
|
"loss": 0.1218, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.889495225102319, |
|
"grad_norm": 0.5394319295883179, |
|
"learning_rate": 7.679420262954984e-08, |
|
"loss": 0.1195, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.892223738062756, |
|
"grad_norm": 0.6105839014053345, |
|
"learning_rate": 7.291125901946027e-08, |
|
"loss": 0.1209, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.8949522510231924, |
|
"grad_norm": 0.5574126839637756, |
|
"learning_rate": 6.912868621140045e-08, |
|
"loss": 0.1206, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.897680763983629, |
|
"grad_norm": 0.5388451218605042, |
|
"learning_rate": 6.544652245020433e-08, |
|
"loss": 0.1211, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.9004092769440657, |
|
"grad_norm": 0.5984753370285034, |
|
"learning_rate": 6.18648049654913e-08, |
|
"loss": 0.1201, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.903137789904502, |
|
"grad_norm": 0.5538267493247986, |
|
"learning_rate": 5.838356997128869e-08, |
|
"loss": 0.1198, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.9058663028649385, |
|
"grad_norm": 0.6062244772911072, |
|
"learning_rate": 5.500285266566319e-08, |
|
"loss": 0.1192, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.908594815825375, |
|
"grad_norm": 0.5678040385246277, |
|
"learning_rate": 5.1722687230369995e-08, |
|
"loss": 0.1214, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.911323328785812, |
|
"grad_norm": 0.5567272901535034, |
|
"learning_rate": 4.854310683050312e-08, |
|
"loss": 0.1152, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.9140518417462484, |
|
"grad_norm": 0.551325798034668, |
|
"learning_rate": 4.5464143614162294e-08, |
|
"loss": 0.1211, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.9167803547066846, |
|
"grad_norm": 0.5160740613937378, |
|
"learning_rate": 4.2485828712126584e-08, |
|
"loss": 0.1174, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.9195088676671213, |
|
"grad_norm": 0.5358514189720154, |
|
"learning_rate": 3.96081922375402e-08, |
|
"loss": 0.1187, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.922237380627558, |
|
"grad_norm": 0.5362658500671387, |
|
"learning_rate": 3.683126328560826e-08, |
|
"loss": 0.1174, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.9249658935879945, |
|
"grad_norm": 0.6440585851669312, |
|
"learning_rate": 3.4155069933301535e-08, |
|
"loss": 0.1143, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.927694406548431, |
|
"grad_norm": 0.5408025979995728, |
|
"learning_rate": 3.1579639239074364e-08, |
|
"loss": 0.119, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.930422919508868, |
|
"grad_norm": 0.5744211673736572, |
|
"learning_rate": 2.9104997242590528e-08, |
|
"loss": 0.1212, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.9331514324693044, |
|
"grad_norm": 0.5802960991859436, |
|
"learning_rate": 2.673116896445671e-08, |
|
"loss": 0.1225, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.9358799454297406, |
|
"grad_norm": 0.6244254112243652, |
|
"learning_rate": 2.4458178405974974e-08, |
|
"loss": 0.121, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.9386084583901773, |
|
"grad_norm": 0.5513128638267517, |
|
"learning_rate": 2.2286048548897378e-08, |
|
"loss": 0.121, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.941336971350614, |
|
"grad_norm": 0.5372868776321411, |
|
"learning_rate": 2.0214801355192826e-08, |
|
"loss": 0.1208, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.9440654843110505, |
|
"grad_norm": 0.550727903842926, |
|
"learning_rate": 1.824445776682504e-08, |
|
"loss": 0.1193, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.946793997271487, |
|
"grad_norm": 0.5490522384643555, |
|
"learning_rate": 1.6375037705543827e-08, |
|
"loss": 0.1197, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.9495225102319234, |
|
"grad_norm": 0.5467248558998108, |
|
"learning_rate": 1.4606560072679687e-08, |
|
"loss": 0.1187, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.9522510231923604, |
|
"grad_norm": 0.5518107414245605, |
|
"learning_rate": 1.2939042748955078e-08, |
|
"loss": 0.1192, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.9549795361527966, |
|
"grad_norm": 0.5449725985527039, |
|
"learning_rate": 1.1372502594303448e-08, |
|
"loss": 0.1187, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.9577080491132333, |
|
"grad_norm": 0.546035885810852, |
|
"learning_rate": 9.906955447697153e-09, |
|
"loss": 0.1199, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.96043656207367, |
|
"grad_norm": 0.6331919431686401, |
|
"learning_rate": 8.542416126989805e-09, |
|
"loss": 0.116, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.9631650750341065, |
|
"grad_norm": 0.5184534192085266, |
|
"learning_rate": 7.278898428764169e-09, |
|
"loss": 0.1202, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.965893587994543, |
|
"grad_norm": 0.5950097441673279, |
|
"learning_rate": 6.1164151281944974e-09, |
|
"loss": 0.1163, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.9686221009549794, |
|
"grad_norm": 0.5502780675888062, |
|
"learning_rate": 5.054977978916631e-09, |
|
"loss": 0.1175, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.971350613915416, |
|
"grad_norm": 0.5208196640014648, |
|
"learning_rate": 4.094597712908099e-09, |
|
"loss": 0.1206, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.9740791268758526, |
|
"grad_norm": 0.5595605969429016, |
|
"learning_rate": 3.2352840403804264e-09, |
|
"loss": 0.1185, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.9768076398362893, |
|
"grad_norm": 0.6191928386688232, |
|
"learning_rate": 2.477045649681431e-09, |
|
"loss": 0.1181, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.979536152796726, |
|
"grad_norm": 0.5491606593132019, |
|
"learning_rate": 1.8198902072097402e-09, |
|
"loss": 0.1194, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.982264665757162, |
|
"grad_norm": 0.5842803120613098, |
|
"learning_rate": 1.2638243573293019e-09, |
|
"loss": 0.1201, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.984993178717599, |
|
"grad_norm": 0.5514943599700928, |
|
"learning_rate": 8.088537223116533e-10, |
|
"loss": 0.117, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.9877216916780354, |
|
"grad_norm": 0.603424072265625, |
|
"learning_rate": 4.549829022748586e-10, |
|
"loss": 0.1182, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.990450204638472, |
|
"grad_norm": 0.59501051902771, |
|
"learning_rate": 2.02215475132439e-10, |
|
"loss": 0.1163, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.9931787175989086, |
|
"grad_norm": 0.6325620412826538, |
|
"learning_rate": 5.0553996568947216e-11, |
|
"loss": 0.1229, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.9959072305593453, |
|
"grad_norm": 0.536891520023346, |
|
"learning_rate": 0.0, |
|
"loss": 0.1192, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.9959072305593453, |
|
"step": 1098, |
|
"total_flos": 1.338644246774258e+19, |
|
"train_loss": 0.3336014450614244, |
|
"train_runtime": 19884.0933, |
|
"train_samples_per_second": 7.076, |
|
"train_steps_per_second": 0.055 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1098, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 999999, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.338644246774258e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|