{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997951239500102, "eval_steps": 500, "global_step": 3660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.540920734405518, "learning_rate": 1.818181818181818e-08, "loss": 0.1451, "step": 1 }, { "epoch": 0.0, "grad_norm": 6.235923767089844, "learning_rate": 3.636363636363636e-08, "loss": 0.1932, "step": 2 }, { "epoch": 0.0, "grad_norm": 6.495096683502197, "learning_rate": 5.454545454545454e-08, "loss": 0.2138, "step": 3 }, { "epoch": 0.0, "grad_norm": 5.446401119232178, "learning_rate": 7.272727272727273e-08, "loss": 0.1645, "step": 4 }, { "epoch": 0.0, "grad_norm": 6.433992862701416, "learning_rate": 9.09090909090909e-08, "loss": 0.1969, "step": 5 }, { "epoch": 0.0, "grad_norm": 6.203385353088379, "learning_rate": 1.0909090909090908e-07, "loss": 0.2056, "step": 6 }, { "epoch": 0.0, "grad_norm": 4.300374984741211, "learning_rate": 1.2727272727272726e-07, "loss": 0.138, "step": 7 }, { "epoch": 0.0, "grad_norm": 5.246260643005371, "learning_rate": 1.4545454545454545e-07, "loss": 0.1673, "step": 8 }, { "epoch": 0.0, "grad_norm": 5.447892665863037, "learning_rate": 1.6363636363636364e-07, "loss": 0.1739, "step": 9 }, { "epoch": 0.0, "grad_norm": 5.377528190612793, "learning_rate": 1.818181818181818e-07, "loss": 0.168, "step": 10 }, { "epoch": 0.0, "grad_norm": 5.494555950164795, "learning_rate": 2e-07, "loss": 0.173, "step": 11 }, { "epoch": 0.0, "grad_norm": 5.380703926086426, "learning_rate": 2.1818181818181815e-07, "loss": 0.1594, "step": 12 }, { "epoch": 0.0, "grad_norm": 5.685830116271973, "learning_rate": 2.3636363636363634e-07, "loss": 0.1892, "step": 13 }, { "epoch": 0.0, "grad_norm": 4.251393795013428, "learning_rate": 2.5454545454545453e-07, "loss": 0.123, "step": 14 }, { "epoch": 0.0, "grad_norm": 4.709303379058838, "learning_rate": 2.727272727272727e-07, "loss": 0.1555, "step": 15 }, { "epoch": 0.0, "grad_norm": 5.874042987823486, "learning_rate": 2.909090909090909e-07, "loss": 0.1811, "step": 16 }, { "epoch": 0.0, "grad_norm": 5.070192337036133, "learning_rate": 3.0909090909090907e-07, "loss": 0.1657, "step": 17 }, { "epoch": 0.0, "grad_norm": 4.039353370666504, "learning_rate": 3.272727272727273e-07, "loss": 0.1373, "step": 18 }, { "epoch": 0.01, "grad_norm": 5.17448091506958, "learning_rate": 3.4545454545454544e-07, "loss": 0.1782, "step": 19 }, { "epoch": 0.01, "grad_norm": 5.469040393829346, "learning_rate": 3.636363636363636e-07, "loss": 0.1865, "step": 20 }, { "epoch": 0.01, "grad_norm": 4.591649532318115, "learning_rate": 3.818181818181818e-07, "loss": 0.1772, "step": 21 }, { "epoch": 0.01, "grad_norm": 3.8730628490448, "learning_rate": 4e-07, "loss": 0.1458, "step": 22 }, { "epoch": 0.01, "grad_norm": 3.1940574645996094, "learning_rate": 4.1818181818181814e-07, "loss": 0.117, "step": 23 }, { "epoch": 0.01, "grad_norm": 3.8417506217956543, "learning_rate": 4.363636363636363e-07, "loss": 0.1477, "step": 24 }, { "epoch": 0.01, "grad_norm": 3.924102306365967, "learning_rate": 4.545454545454545e-07, "loss": 0.1496, "step": 25 }, { "epoch": 0.01, "grad_norm": 4.038068771362305, "learning_rate": 4.727272727272727e-07, "loss": 0.1745, "step": 26 }, { "epoch": 0.01, "grad_norm": 3.5735890865325928, "learning_rate": 4.909090909090909e-07, "loss": 0.1278, "step": 27 }, { "epoch": 0.01, "grad_norm": 4.1404571533203125, "learning_rate": 5.090909090909091e-07, "loss": 0.1648, "step": 28 }, { "epoch": 0.01, "grad_norm": 4.566656112670898, "learning_rate": 5.272727272727272e-07, "loss": 0.1759, "step": 29 }, { "epoch": 0.01, "grad_norm": 3.562042713165283, "learning_rate": 5.454545454545454e-07, "loss": 0.1438, "step": 30 }, { "epoch": 0.01, "grad_norm": 3.4260830879211426, "learning_rate": 5.636363636363635e-07, "loss": 0.1328, "step": 31 }, { "epoch": 0.01, "grad_norm": 3.2388596534729004, "learning_rate": 5.818181818181818e-07, "loss": 0.1379, "step": 32 }, { "epoch": 0.01, "grad_norm": 3.5923690795898438, "learning_rate": 6e-07, "loss": 0.143, "step": 33 }, { "epoch": 0.01, "grad_norm": 3.057919979095459, "learning_rate": 6.181818181818181e-07, "loss": 0.1169, "step": 34 }, { "epoch": 0.01, "grad_norm": 3.3284711837768555, "learning_rate": 6.363636363636363e-07, "loss": 0.125, "step": 35 }, { "epoch": 0.01, "grad_norm": 3.4238357543945312, "learning_rate": 6.545454545454546e-07, "loss": 0.1247, "step": 36 }, { "epoch": 0.01, "grad_norm": 3.1603212356567383, "learning_rate": 6.727272727272727e-07, "loss": 0.1399, "step": 37 }, { "epoch": 0.01, "grad_norm": 3.2443954944610596, "learning_rate": 6.909090909090909e-07, "loss": 0.1494, "step": 38 }, { "epoch": 0.01, "grad_norm": 3.3392746448516846, "learning_rate": 7.09090909090909e-07, "loss": 0.1325, "step": 39 }, { "epoch": 0.01, "grad_norm": 3.302252769470215, "learning_rate": 7.272727272727272e-07, "loss": 0.1234, "step": 40 }, { "epoch": 0.01, "grad_norm": 3.4786174297332764, "learning_rate": 7.454545454545455e-07, "loss": 0.1372, "step": 41 }, { "epoch": 0.01, "grad_norm": 3.567997694015503, "learning_rate": 7.636363636363636e-07, "loss": 0.1225, "step": 42 }, { "epoch": 0.01, "grad_norm": 3.4705660343170166, "learning_rate": 7.818181818181818e-07, "loss": 0.123, "step": 43 }, { "epoch": 0.01, "grad_norm": 3.383411169052124, "learning_rate": 8e-07, "loss": 0.1274, "step": 44 }, { "epoch": 0.01, "grad_norm": 3.456897258758545, "learning_rate": 8.181818181818182e-07, "loss": 0.1047, "step": 45 }, { "epoch": 0.01, "grad_norm": 3.369755268096924, "learning_rate": 8.363636363636363e-07, "loss": 0.1195, "step": 46 }, { "epoch": 0.01, "grad_norm": 3.9524924755096436, "learning_rate": 8.545454545454544e-07, "loss": 0.1294, "step": 47 }, { "epoch": 0.01, "grad_norm": 3.9460361003875732, "learning_rate": 8.727272727272726e-07, "loss": 0.1534, "step": 48 }, { "epoch": 0.01, "grad_norm": 3.7565395832061768, "learning_rate": 8.909090909090909e-07, "loss": 0.1297, "step": 49 }, { "epoch": 0.01, "grad_norm": 3.749035596847534, "learning_rate": 9.09090909090909e-07, "loss": 0.1435, "step": 50 }, { "epoch": 0.01, "grad_norm": 3.6407580375671387, "learning_rate": 9.272727272727272e-07, "loss": 0.1163, "step": 51 }, { "epoch": 0.01, "grad_norm": 3.7361981868743896, "learning_rate": 9.454545454545454e-07, "loss": 0.1302, "step": 52 }, { "epoch": 0.01, "grad_norm": 3.205831527709961, "learning_rate": 9.636363636363636e-07, "loss": 0.0986, "step": 53 }, { "epoch": 0.01, "grad_norm": 3.8467040061950684, "learning_rate": 9.818181818181818e-07, "loss": 0.1389, "step": 54 }, { "epoch": 0.02, "grad_norm": 3.386436700820923, "learning_rate": 1e-06, "loss": 0.1114, "step": 55 }, { "epoch": 0.02, "grad_norm": 3.471832752227783, "learning_rate": 1.0181818181818181e-06, "loss": 0.1278, "step": 56 }, { "epoch": 0.02, "grad_norm": 3.7745180130004883, "learning_rate": 1.0363636363636363e-06, "loss": 0.1184, "step": 57 }, { "epoch": 0.02, "grad_norm": 3.520988702774048, "learning_rate": 1.0545454545454544e-06, "loss": 0.1025, "step": 58 }, { "epoch": 0.02, "grad_norm": 3.45341157913208, "learning_rate": 1.0727272727272726e-06, "loss": 0.1162, "step": 59 }, { "epoch": 0.02, "grad_norm": 3.98226261138916, "learning_rate": 1.0909090909090908e-06, "loss": 0.1496, "step": 60 }, { "epoch": 0.02, "grad_norm": 3.621644973754883, "learning_rate": 1.1090909090909091e-06, "loss": 0.1232, "step": 61 }, { "epoch": 0.02, "grad_norm": 3.586500406265259, "learning_rate": 1.127272727272727e-06, "loss": 0.1345, "step": 62 }, { "epoch": 0.02, "grad_norm": 3.3112382888793945, "learning_rate": 1.1454545454545455e-06, "loss": 0.1423, "step": 63 }, { "epoch": 0.02, "grad_norm": 3.1299896240234375, "learning_rate": 1.1636363636363636e-06, "loss": 0.1267, "step": 64 }, { "epoch": 0.02, "grad_norm": 3.3250696659088135, "learning_rate": 1.1818181818181818e-06, "loss": 0.1108, "step": 65 }, { "epoch": 0.02, "grad_norm": 3.5346617698669434, "learning_rate": 1.2e-06, "loss": 0.1243, "step": 66 }, { "epoch": 0.02, "grad_norm": 3.2549474239349365, "learning_rate": 1.2181818181818181e-06, "loss": 0.1323, "step": 67 }, { "epoch": 0.02, "grad_norm": 3.8255019187927246, "learning_rate": 1.2363636363636363e-06, "loss": 0.1358, "step": 68 }, { "epoch": 0.02, "grad_norm": 3.330427646636963, "learning_rate": 1.2545454545454546e-06, "loss": 0.1232, "step": 69 }, { "epoch": 0.02, "grad_norm": 3.0509235858917236, "learning_rate": 1.2727272727272726e-06, "loss": 0.103, "step": 70 }, { "epoch": 0.02, "grad_norm": 3.553762912750244, "learning_rate": 1.290909090909091e-06, "loss": 0.1228, "step": 71 }, { "epoch": 0.02, "grad_norm": 3.2095208168029785, "learning_rate": 1.3090909090909091e-06, "loss": 0.1181, "step": 72 }, { "epoch": 0.02, "grad_norm": 3.5218029022216797, "learning_rate": 1.3272727272727273e-06, "loss": 0.117, "step": 73 }, { "epoch": 0.02, "grad_norm": 3.9620566368103027, "learning_rate": 1.3454545454545455e-06, "loss": 0.1481, "step": 74 }, { "epoch": 0.02, "grad_norm": 3.876711368560791, "learning_rate": 1.3636363636363634e-06, "loss": 0.1501, "step": 75 }, { "epoch": 0.02, "grad_norm": 4.166055202484131, "learning_rate": 1.3818181818181818e-06, "loss": 0.1201, "step": 76 }, { "epoch": 0.02, "grad_norm": 3.5768558979034424, "learning_rate": 1.4e-06, "loss": 0.1096, "step": 77 }, { "epoch": 0.02, "grad_norm": 3.830570936203003, "learning_rate": 1.418181818181818e-06, "loss": 0.1181, "step": 78 }, { "epoch": 0.02, "grad_norm": 3.5578572750091553, "learning_rate": 1.4363636363636363e-06, "loss": 0.1111, "step": 79 }, { "epoch": 0.02, "grad_norm": 3.674180030822754, "learning_rate": 1.4545454545454544e-06, "loss": 0.1187, "step": 80 }, { "epoch": 0.02, "grad_norm": 3.9807794094085693, "learning_rate": 1.4727272727272726e-06, "loss": 0.1532, "step": 81 }, { "epoch": 0.02, "grad_norm": 3.374263048171997, "learning_rate": 1.490909090909091e-06, "loss": 0.1135, "step": 82 }, { "epoch": 0.02, "grad_norm": 3.739839553833008, "learning_rate": 1.509090909090909e-06, "loss": 0.1328, "step": 83 }, { "epoch": 0.02, "grad_norm": 3.470029354095459, "learning_rate": 1.5272727272727273e-06, "loss": 0.1078, "step": 84 }, { "epoch": 0.02, "grad_norm": 3.3445234298706055, "learning_rate": 1.5454545454545454e-06, "loss": 0.1257, "step": 85 }, { "epoch": 0.02, "grad_norm": 3.2428488731384277, "learning_rate": 1.5636363636363636e-06, "loss": 0.1207, "step": 86 }, { "epoch": 0.02, "grad_norm": 3.345752477645874, "learning_rate": 1.5818181818181818e-06, "loss": 0.124, "step": 87 }, { "epoch": 0.02, "grad_norm": 3.6470723152160645, "learning_rate": 1.6e-06, "loss": 0.1366, "step": 88 }, { "epoch": 0.02, "grad_norm": 3.7567741870880127, "learning_rate": 1.618181818181818e-06, "loss": 0.1384, "step": 89 }, { "epoch": 0.02, "grad_norm": 3.5427284240722656, "learning_rate": 1.6363636363636365e-06, "loss": 0.1439, "step": 90 }, { "epoch": 0.02, "grad_norm": 3.694549560546875, "learning_rate": 1.6545454545454544e-06, "loss": 0.128, "step": 91 }, { "epoch": 0.03, "grad_norm": 3.5761115550994873, "learning_rate": 1.6727272727272726e-06, "loss": 0.1247, "step": 92 }, { "epoch": 0.03, "grad_norm": 3.4093759059906006, "learning_rate": 1.6909090909090907e-06, "loss": 0.1429, "step": 93 }, { "epoch": 0.03, "grad_norm": 3.4228475093841553, "learning_rate": 1.709090909090909e-06, "loss": 0.1295, "step": 94 }, { "epoch": 0.03, "grad_norm": 4.1292195320129395, "learning_rate": 1.7272727272727273e-06, "loss": 0.1133, "step": 95 }, { "epoch": 0.03, "grad_norm": 3.841623306274414, "learning_rate": 1.7454545454545452e-06, "loss": 0.1356, "step": 96 }, { "epoch": 0.03, "grad_norm": 3.5247111320495605, "learning_rate": 1.7636363636363636e-06, "loss": 0.1217, "step": 97 }, { "epoch": 0.03, "grad_norm": 3.483203172683716, "learning_rate": 1.7818181818181818e-06, "loss": 0.1324, "step": 98 }, { "epoch": 0.03, "grad_norm": 3.9401931762695312, "learning_rate": 1.8e-06, "loss": 0.1559, "step": 99 }, { "epoch": 0.03, "grad_norm": 3.1736230850219727, "learning_rate": 1.818181818181818e-06, "loss": 0.1124, "step": 100 }, { "epoch": 0.03, "grad_norm": 3.3248181343078613, "learning_rate": 1.8363636363636362e-06, "loss": 0.1061, "step": 101 }, { "epoch": 0.03, "grad_norm": 3.954529285430908, "learning_rate": 1.8545454545454544e-06, "loss": 0.1396, "step": 102 }, { "epoch": 0.03, "grad_norm": 3.5198869705200195, "learning_rate": 1.8727272727272728e-06, "loss": 0.1246, "step": 103 }, { "epoch": 0.03, "grad_norm": 3.517188310623169, "learning_rate": 1.8909090909090907e-06, "loss": 0.1191, "step": 104 }, { "epoch": 0.03, "grad_norm": 3.422008991241455, "learning_rate": 1.909090909090909e-06, "loss": 0.1402, "step": 105 }, { "epoch": 0.03, "grad_norm": 3.771535634994507, "learning_rate": 1.9272727272727273e-06, "loss": 0.1457, "step": 106 }, { "epoch": 0.03, "grad_norm": 4.059760093688965, "learning_rate": 1.9454545454545454e-06, "loss": 0.1344, "step": 107 }, { "epoch": 0.03, "grad_norm": 3.780738592147827, "learning_rate": 1.9636363636363636e-06, "loss": 0.1361, "step": 108 }, { "epoch": 0.03, "grad_norm": 4.163651943206787, "learning_rate": 1.9818181818181817e-06, "loss": 0.1372, "step": 109 }, { "epoch": 0.03, "grad_norm": 3.797593355178833, "learning_rate": 2e-06, "loss": 0.137, "step": 110 }, { "epoch": 0.03, "grad_norm": 3.6726772785186768, "learning_rate": 1.99999960842675e-06, "loss": 0.1179, "step": 111 }, { "epoch": 0.03, "grad_norm": 3.261941909790039, "learning_rate": 1.9999984337073077e-06, "loss": 0.1152, "step": 112 }, { "epoch": 0.03, "grad_norm": 3.536269187927246, "learning_rate": 1.999996475842593e-06, "loss": 0.1425, "step": 113 }, { "epoch": 0.03, "grad_norm": 3.9828357696533203, "learning_rate": 1.9999937348341392e-06, "loss": 0.1322, "step": 114 }, { "epoch": 0.03, "grad_norm": 3.628679037094116, "learning_rate": 1.999990210684092e-06, "loss": 0.1106, "step": 115 }, { "epoch": 0.03, "grad_norm": 3.5205116271972656, "learning_rate": 1.9999859033952126e-06, "loss": 0.1336, "step": 116 }, { "epoch": 0.03, "grad_norm": 3.6973910331726074, "learning_rate": 1.999980812970873e-06, "loss": 0.1272, "step": 117 }, { "epoch": 0.03, "grad_norm": 3.650892734527588, "learning_rate": 1.9999749394150607e-06, "loss": 0.1363, "step": 118 }, { "epoch": 0.03, "grad_norm": 3.479020833969116, "learning_rate": 1.9999682827323754e-06, "loss": 0.1367, "step": 119 }, { "epoch": 0.03, "grad_norm": 3.836937189102173, "learning_rate": 1.99996084292803e-06, "loss": 0.1392, "step": 120 }, { "epoch": 0.03, "grad_norm": 3.656867027282715, "learning_rate": 1.9999526200078507e-06, "loss": 0.1285, "step": 121 }, { "epoch": 0.03, "grad_norm": 3.851539373397827, "learning_rate": 1.999943613978278e-06, "loss": 0.1475, "step": 122 }, { "epoch": 0.03, "grad_norm": 3.374127149581909, "learning_rate": 1.9999338248463646e-06, "loss": 0.1198, "step": 123 }, { "epoch": 0.03, "grad_norm": 3.398015260696411, "learning_rate": 1.9999232526197767e-06, "loss": 0.1155, "step": 124 }, { "epoch": 0.03, "grad_norm": 3.809936761856079, "learning_rate": 1.999911897306794e-06, "loss": 0.1338, "step": 125 }, { "epoch": 0.03, "grad_norm": 3.7471814155578613, "learning_rate": 1.9998997589163095e-06, "loss": 0.1581, "step": 126 }, { "epoch": 0.03, "grad_norm": 3.1297061443328857, "learning_rate": 1.9998868374578286e-06, "loss": 0.1184, "step": 127 }, { "epoch": 0.03, "grad_norm": 3.4692442417144775, "learning_rate": 1.999873132941472e-06, "loss": 0.1152, "step": 128 }, { "epoch": 0.04, "grad_norm": 3.715061902999878, "learning_rate": 1.999858645377971e-06, "loss": 0.148, "step": 129 }, { "epoch": 0.04, "grad_norm": 3.395211935043335, "learning_rate": 1.9998433747786726e-06, "loss": 0.1261, "step": 130 }, { "epoch": 0.04, "grad_norm": 3.7628817558288574, "learning_rate": 1.9998273211555354e-06, "loss": 0.1354, "step": 131 }, { "epoch": 0.04, "grad_norm": 3.3477649688720703, "learning_rate": 1.9998104845211313e-06, "loss": 0.13, "step": 132 }, { "epoch": 0.04, "grad_norm": 3.556675434112549, "learning_rate": 1.9997928648886467e-06, "loss": 0.1297, "step": 133 }, { "epoch": 0.04, "grad_norm": 3.8331122398376465, "learning_rate": 1.9997744622718796e-06, "loss": 0.1407, "step": 134 }, { "epoch": 0.04, "grad_norm": 3.4685864448547363, "learning_rate": 1.999755276685243e-06, "loss": 0.1145, "step": 135 }, { "epoch": 0.04, "grad_norm": 3.602905750274658, "learning_rate": 1.999735308143761e-06, "loss": 0.1325, "step": 136 }, { "epoch": 0.04, "grad_norm": 3.2092838287353516, "learning_rate": 1.999714556663072e-06, "loss": 0.1233, "step": 137 }, { "epoch": 0.04, "grad_norm": 3.2409422397613525, "learning_rate": 1.999693022259428e-06, "loss": 0.1339, "step": 138 }, { "epoch": 0.04, "grad_norm": 3.6716156005859375, "learning_rate": 1.999670704949693e-06, "loss": 0.1235, "step": 139 }, { "epoch": 0.04, "grad_norm": 3.52691388130188, "learning_rate": 1.999647604751345e-06, "loss": 0.136, "step": 140 }, { "epoch": 0.04, "grad_norm": 3.946507453918457, "learning_rate": 1.999623721682475e-06, "loss": 0.1521, "step": 141 }, { "epoch": 0.04, "grad_norm": 3.8056299686431885, "learning_rate": 1.999599055761787e-06, "loss": 0.1221, "step": 142 }, { "epoch": 0.04, "grad_norm": 3.361619710922241, "learning_rate": 1.9995736070085978e-06, "loss": 0.1337, "step": 143 }, { "epoch": 0.04, "grad_norm": 3.38295578956604, "learning_rate": 1.999547375442837e-06, "loss": 0.1297, "step": 144 }, { "epoch": 0.04, "grad_norm": 3.585200786590576, "learning_rate": 1.999520361085049e-06, "loss": 0.1276, "step": 145 }, { "epoch": 0.04, "grad_norm": 3.449899673461914, "learning_rate": 1.9994925639563886e-06, "loss": 0.1283, "step": 146 }, { "epoch": 0.04, "grad_norm": 3.813476324081421, "learning_rate": 1.999463984078626e-06, "loss": 0.1475, "step": 147 }, { "epoch": 0.04, "grad_norm": 3.5906283855438232, "learning_rate": 1.999434621474143e-06, "loss": 0.1287, "step": 148 }, { "epoch": 0.04, "grad_norm": 4.171735763549805, "learning_rate": 1.999404476165935e-06, "loss": 0.1531, "step": 149 }, { "epoch": 0.04, "grad_norm": 3.4671435356140137, "learning_rate": 1.99937354817761e-06, "loss": 0.1396, "step": 150 }, { "epoch": 0.04, "grad_norm": 3.950822114944458, "learning_rate": 1.99934183753339e-06, "loss": 0.14, "step": 151 }, { "epoch": 0.04, "grad_norm": 3.534167528152466, "learning_rate": 1.9993093442581075e-06, "loss": 0.1262, "step": 152 }, { "epoch": 0.04, "grad_norm": 3.732804298400879, "learning_rate": 1.999276068377211e-06, "loss": 0.1449, "step": 153 }, { "epoch": 0.04, "grad_norm": 3.422449827194214, "learning_rate": 1.999242009916759e-06, "loss": 0.1539, "step": 154 }, { "epoch": 0.04, "grad_norm": 3.5805094242095947, "learning_rate": 1.9992071689034255e-06, "loss": 0.153, "step": 155 }, { "epoch": 0.04, "grad_norm": 3.6584551334381104, "learning_rate": 1.999171545364496e-06, "loss": 0.1491, "step": 156 }, { "epoch": 0.04, "grad_norm": 3.4344966411590576, "learning_rate": 1.999135139327868e-06, "loss": 0.1421, "step": 157 }, { "epoch": 0.04, "grad_norm": 3.415125846862793, "learning_rate": 1.9990979508220536e-06, "loss": 0.1391, "step": 158 }, { "epoch": 0.04, "grad_norm": 3.371690034866333, "learning_rate": 1.9990599798761766e-06, "loss": 0.1394, "step": 159 }, { "epoch": 0.04, "grad_norm": 3.242325782775879, "learning_rate": 1.9990212265199736e-06, "loss": 0.1355, "step": 160 }, { "epoch": 0.04, "grad_norm": 3.316002368927002, "learning_rate": 1.9989816907837944e-06, "loss": 0.1423, "step": 161 }, { "epoch": 0.04, "grad_norm": 3.9251341819763184, "learning_rate": 1.998941372698601e-06, "loss": 0.1408, "step": 162 }, { "epoch": 0.04, "grad_norm": 3.750389814376831, "learning_rate": 1.998900272295969e-06, "loss": 0.154, "step": 163 }, { "epoch": 0.04, "grad_norm": 3.681218147277832, "learning_rate": 1.9988583896080856e-06, "loss": 0.151, "step": 164 }, { "epoch": 0.05, "grad_norm": 3.407083511352539, "learning_rate": 1.9988157246677513e-06, "loss": 0.1354, "step": 165 }, { "epoch": 0.05, "grad_norm": 3.2584445476531982, "learning_rate": 1.9987722775083785e-06, "loss": 0.1321, "step": 166 }, { "epoch": 0.05, "grad_norm": 3.265322685241699, "learning_rate": 1.998728048163993e-06, "loss": 0.1146, "step": 167 }, { "epoch": 0.05, "grad_norm": 3.724404811859131, "learning_rate": 1.998683036669233e-06, "loss": 0.1424, "step": 168 }, { "epoch": 0.05, "grad_norm": 4.1619486808776855, "learning_rate": 1.998637243059349e-06, "loss": 0.1428, "step": 169 }, { "epoch": 0.05, "grad_norm": 3.1609082221984863, "learning_rate": 1.998590667370204e-06, "loss": 0.1218, "step": 170 }, { "epoch": 0.05, "grad_norm": 3.0653064250946045, "learning_rate": 1.9985433096382735e-06, "loss": 0.1122, "step": 171 }, { "epoch": 0.05, "grad_norm": 3.4336159229278564, "learning_rate": 1.998495169900646e-06, "loss": 0.1296, "step": 172 }, { "epoch": 0.05, "grad_norm": 3.300739049911499, "learning_rate": 1.998446248195021e-06, "loss": 0.128, "step": 173 }, { "epoch": 0.05, "grad_norm": 3.1947007179260254, "learning_rate": 1.998396544559713e-06, "loss": 0.1192, "step": 174 }, { "epoch": 0.05, "grad_norm": 3.543092966079712, "learning_rate": 1.9983460590336457e-06, "loss": 0.1405, "step": 175 }, { "epoch": 0.05, "grad_norm": 3.321154832839966, "learning_rate": 1.998294791656357e-06, "loss": 0.1222, "step": 176 }, { "epoch": 0.05, "grad_norm": 3.4130699634552, "learning_rate": 1.9982427424679976e-06, "loss": 0.1494, "step": 177 }, { "epoch": 0.05, "grad_norm": 3.2330543994903564, "learning_rate": 1.9981899115093287e-06, "loss": 0.1447, "step": 178 }, { "epoch": 0.05, "grad_norm": 3.6075711250305176, "learning_rate": 1.9981362988217246e-06, "loss": 0.1532, "step": 179 }, { "epoch": 0.05, "grad_norm": 3.4685580730438232, "learning_rate": 1.998081904447173e-06, "loss": 0.1178, "step": 180 }, { "epoch": 0.05, "grad_norm": 3.286449909210205, "learning_rate": 1.9980267284282714e-06, "loss": 0.1435, "step": 181 }, { "epoch": 0.05, "grad_norm": 3.7313830852508545, "learning_rate": 1.9979707708082315e-06, "loss": 0.1509, "step": 182 }, { "epoch": 0.05, "grad_norm": 3.748180627822876, "learning_rate": 1.9979140316308762e-06, "loss": 0.1365, "step": 183 }, { "epoch": 0.05, "grad_norm": 3.4357450008392334, "learning_rate": 1.9978565109406402e-06, "loss": 0.1301, "step": 184 }, { "epoch": 0.05, "grad_norm": 3.5195868015289307, "learning_rate": 1.9977982087825712e-06, "loss": 0.141, "step": 185 }, { "epoch": 0.05, "grad_norm": 3.6003425121307373, "learning_rate": 1.9977391252023277e-06, "loss": 0.1346, "step": 186 }, { "epoch": 0.05, "grad_norm": 3.1029670238494873, "learning_rate": 1.9976792602461813e-06, "loss": 0.1241, "step": 187 }, { "epoch": 0.05, "grad_norm": 3.2697935104370117, "learning_rate": 1.9976186139610146e-06, "loss": 0.132, "step": 188 }, { "epoch": 0.05, "grad_norm": 4.49591064453125, "learning_rate": 1.997557186394323e-06, "loss": 0.1508, "step": 189 }, { "epoch": 0.05, "grad_norm": 3.4977004528045654, "learning_rate": 1.9974949775942133e-06, "loss": 0.1464, "step": 190 }, { "epoch": 0.05, "grad_norm": 3.6156258583068848, "learning_rate": 1.997431987609403e-06, "loss": 0.1564, "step": 191 }, { "epoch": 0.05, "grad_norm": 3.6873908042907715, "learning_rate": 1.9973682164892242e-06, "loss": 0.1439, "step": 192 }, { "epoch": 0.05, "grad_norm": 3.5300700664520264, "learning_rate": 1.997303664283618e-06, "loss": 0.1521, "step": 193 }, { "epoch": 0.05, "grad_norm": 3.2912561893463135, "learning_rate": 1.997238331043138e-06, "loss": 0.1269, "step": 194 }, { "epoch": 0.05, "grad_norm": 3.3216238021850586, "learning_rate": 1.9971722168189506e-06, "loss": 0.1286, "step": 195 }, { "epoch": 0.05, "grad_norm": 3.44690203666687, "learning_rate": 1.997105321662832e-06, "loss": 0.1423, "step": 196 }, { "epoch": 0.05, "grad_norm": 3.240175247192383, "learning_rate": 1.9970376456271718e-06, "loss": 0.122, "step": 197 }, { "epoch": 0.05, "grad_norm": 3.19724440574646, "learning_rate": 1.9969691887649696e-06, "loss": 0.1327, "step": 198 }, { "epoch": 0.05, "grad_norm": 3.4572486877441406, "learning_rate": 1.9968999511298373e-06, "loss": 0.1373, "step": 199 }, { "epoch": 0.05, "grad_norm": 3.4547650814056396, "learning_rate": 1.9968299327759985e-06, "loss": 0.1327, "step": 200 }, { "epoch": 0.05, "grad_norm": 3.5186169147491455, "learning_rate": 1.996759133758287e-06, "loss": 0.144, "step": 201 }, { "epoch": 0.06, "grad_norm": 3.53615665435791, "learning_rate": 1.9966875541321497e-06, "loss": 0.1261, "step": 202 }, { "epoch": 0.06, "grad_norm": 3.302018404006958, "learning_rate": 1.996615193953643e-06, "loss": 0.139, "step": 203 }, { "epoch": 0.06, "grad_norm": 3.4731132984161377, "learning_rate": 1.9965420532794364e-06, "loss": 0.1453, "step": 204 }, { "epoch": 0.06, "grad_norm": 3.6803643703460693, "learning_rate": 1.9964681321668095e-06, "loss": 0.1512, "step": 205 }, { "epoch": 0.06, "grad_norm": 3.640664577484131, "learning_rate": 1.996393430673653e-06, "loss": 0.1257, "step": 206 }, { "epoch": 0.06, "grad_norm": 3.283768653869629, "learning_rate": 1.9963179488584697e-06, "loss": 0.1347, "step": 207 }, { "epoch": 0.06, "grad_norm": 3.3293819427490234, "learning_rate": 1.9962416867803726e-06, "loss": 0.139, "step": 208 }, { "epoch": 0.06, "grad_norm": 3.2843964099884033, "learning_rate": 1.9961646444990855e-06, "loss": 0.1399, "step": 209 }, { "epoch": 0.06, "grad_norm": 3.1588640213012695, "learning_rate": 1.9960868220749447e-06, "loss": 0.1338, "step": 210 }, { "epoch": 0.06, "grad_norm": 3.0526747703552246, "learning_rate": 1.9960082195688964e-06, "loss": 0.1225, "step": 211 }, { "epoch": 0.06, "grad_norm": 3.466364622116089, "learning_rate": 1.9959288370424975e-06, "loss": 0.1609, "step": 212 }, { "epoch": 0.06, "grad_norm": 3.3527302742004395, "learning_rate": 1.9958486745579162e-06, "loss": 0.1312, "step": 213 }, { "epoch": 0.06, "grad_norm": 3.9912383556365967, "learning_rate": 1.995767732177932e-06, "loss": 0.1449, "step": 214 }, { "epoch": 0.06, "grad_norm": 3.5762126445770264, "learning_rate": 1.995686009965934e-06, "loss": 0.1323, "step": 215 }, { "epoch": 0.06, "grad_norm": 3.335552215576172, "learning_rate": 1.995603507985923e-06, "loss": 0.1179, "step": 216 }, { "epoch": 0.06, "grad_norm": 3.964589834213257, "learning_rate": 1.9955202263025103e-06, "loss": 0.1593, "step": 217 }, { "epoch": 0.06, "grad_norm": 3.3723831176757812, "learning_rate": 1.995436164980917e-06, "loss": 0.1316, "step": 218 }, { "epoch": 0.06, "grad_norm": 3.1864852905273438, "learning_rate": 1.9953513240869763e-06, "loss": 0.1237, "step": 219 }, { "epoch": 0.06, "grad_norm": 3.535829782485962, "learning_rate": 1.9952657036871305e-06, "loss": 0.139, "step": 220 }, { "epoch": 0.06, "grad_norm": 3.485399007797241, "learning_rate": 1.9951793038484326e-06, "loss": 0.1551, "step": 221 }, { "epoch": 0.06, "grad_norm": 3.34142804145813, "learning_rate": 1.995092124638547e-06, "loss": 0.1322, "step": 222 }, { "epoch": 0.06, "grad_norm": 3.582733631134033, "learning_rate": 1.995004166125748e-06, "loss": 0.1296, "step": 223 }, { "epoch": 0.06, "grad_norm": 3.348628044128418, "learning_rate": 1.994915428378919e-06, "loss": 0.1287, "step": 224 }, { "epoch": 0.06, "grad_norm": 3.2857093811035156, "learning_rate": 1.994825911467555e-06, "loss": 0.1184, "step": 225 }, { "epoch": 0.06, "grad_norm": 3.5528059005737305, "learning_rate": 1.994735615461762e-06, "loss": 0.1434, "step": 226 }, { "epoch": 0.06, "grad_norm": 3.3763883113861084, "learning_rate": 1.9946445404322533e-06, "loss": 0.1324, "step": 227 }, { "epoch": 0.06, "grad_norm": 3.357837438583374, "learning_rate": 1.9945526864503547e-06, "loss": 0.1291, "step": 228 }, { "epoch": 0.06, "grad_norm": 3.7160394191741943, "learning_rate": 1.9944600535880018e-06, "loss": 0.1464, "step": 229 }, { "epoch": 0.06, "grad_norm": 3.718874931335449, "learning_rate": 1.994366641917739e-06, "loss": 0.1195, "step": 230 }, { "epoch": 0.06, "grad_norm": 3.4990196228027344, "learning_rate": 1.9942724515127216e-06, "loss": 0.1474, "step": 231 }, { "epoch": 0.06, "grad_norm": 3.5642192363739014, "learning_rate": 1.9941774824467148e-06, "loss": 0.1436, "step": 232 }, { "epoch": 0.06, "grad_norm": 3.294753313064575, "learning_rate": 1.9940817347940927e-06, "loss": 0.1169, "step": 233 }, { "epoch": 0.06, "grad_norm": 3.635255813598633, "learning_rate": 1.9939852086298397e-06, "loss": 0.1528, "step": 234 }, { "epoch": 0.06, "grad_norm": 3.3293750286102295, "learning_rate": 1.9938879040295507e-06, "loss": 0.1367, "step": 235 }, { "epoch": 0.06, "grad_norm": 3.4504055976867676, "learning_rate": 1.993789821069429e-06, "loss": 0.1424, "step": 236 }, { "epoch": 0.06, "grad_norm": 3.2252650260925293, "learning_rate": 1.993690959826288e-06, "loss": 0.1381, "step": 237 }, { "epoch": 0.07, "grad_norm": 3.5592312812805176, "learning_rate": 1.99359132037755e-06, "loss": 0.1304, "step": 238 }, { "epoch": 0.07, "grad_norm": 3.446719169616699, "learning_rate": 1.9934909028012477e-06, "loss": 0.1378, "step": 239 }, { "epoch": 0.07, "grad_norm": 3.1963205337524414, "learning_rate": 1.9933897071760235e-06, "loss": 0.1279, "step": 240 }, { "epoch": 0.07, "grad_norm": 3.2924935817718506, "learning_rate": 1.993287733581127e-06, "loss": 0.13, "step": 241 }, { "epoch": 0.07, "grad_norm": 3.186282157897949, "learning_rate": 1.9931849820964196e-06, "loss": 0.1148, "step": 242 }, { "epoch": 0.07, "grad_norm": 3.7735495567321777, "learning_rate": 1.9930814528023703e-06, "loss": 0.1398, "step": 243 }, { "epoch": 0.07, "grad_norm": 3.284437656402588, "learning_rate": 1.992977145780058e-06, "loss": 0.13, "step": 244 }, { "epoch": 0.07, "grad_norm": 3.665106773376465, "learning_rate": 1.9928720611111695e-06, "loss": 0.1325, "step": 245 }, { "epoch": 0.07, "grad_norm": 3.378911018371582, "learning_rate": 1.9927661988780024e-06, "loss": 0.1286, "step": 246 }, { "epoch": 0.07, "grad_norm": 3.492617130279541, "learning_rate": 1.9926595591634625e-06, "loss": 0.1539, "step": 247 }, { "epoch": 0.07, "grad_norm": 3.062282085418701, "learning_rate": 1.992552142051063e-06, "loss": 0.1134, "step": 248 }, { "epoch": 0.07, "grad_norm": 3.0996599197387695, "learning_rate": 1.9924439476249287e-06, "loss": 0.1115, "step": 249 }, { "epoch": 0.07, "grad_norm": 3.804471969604492, "learning_rate": 1.992334975969791e-06, "loss": 0.1418, "step": 250 }, { "epoch": 0.07, "grad_norm": 3.4804577827453613, "learning_rate": 1.9922252271709913e-06, "loss": 0.1405, "step": 251 }, { "epoch": 0.07, "grad_norm": 3.598825693130493, "learning_rate": 1.9921147013144777e-06, "loss": 0.1347, "step": 252 }, { "epoch": 0.07, "grad_norm": 3.65277361869812, "learning_rate": 1.9920033984868093e-06, "loss": 0.1333, "step": 253 }, { "epoch": 0.07, "grad_norm": 3.512441873550415, "learning_rate": 1.9918913187751516e-06, "loss": 0.1644, "step": 254 }, { "epoch": 0.07, "grad_norm": 3.4071273803710938, "learning_rate": 1.9917784622672805e-06, "loss": 0.1446, "step": 255 }, { "epoch": 0.07, "grad_norm": 3.3086330890655518, "learning_rate": 1.9916648290515785e-06, "loss": 0.1265, "step": 256 }, { "epoch": 0.07, "grad_norm": 3.5405070781707764, "learning_rate": 1.9915504192170373e-06, "loss": 0.1339, "step": 257 }, { "epoch": 0.07, "grad_norm": 3.329610586166382, "learning_rate": 1.991435232853256e-06, "loss": 0.1289, "step": 258 }, { "epoch": 0.07, "grad_norm": 3.483816385269165, "learning_rate": 1.9913192700504435e-06, "loss": 0.1314, "step": 259 }, { "epoch": 0.07, "grad_norm": 3.278635025024414, "learning_rate": 1.9912025308994145e-06, "loss": 0.1329, "step": 260 }, { "epoch": 0.07, "grad_norm": 3.243156671524048, "learning_rate": 1.9910850154915936e-06, "loss": 0.138, "step": 261 }, { "epoch": 0.07, "grad_norm": 3.3385517597198486, "learning_rate": 1.9909667239190123e-06, "loss": 0.144, "step": 262 }, { "epoch": 0.07, "grad_norm": 3.1529195308685303, "learning_rate": 1.99084765627431e-06, "loss": 0.1347, "step": 263 }, { "epoch": 0.07, "grad_norm": 3.207628011703491, "learning_rate": 1.9907278126507347e-06, "loss": 0.1571, "step": 264 }, { "epoch": 0.07, "grad_norm": 3.169715404510498, "learning_rate": 1.9906071931421412e-06, "loss": 0.1397, "step": 265 }, { "epoch": 0.07, "grad_norm": 3.044822931289673, "learning_rate": 1.990485797842992e-06, "loss": 0.1111, "step": 266 }, { "epoch": 0.07, "grad_norm": 3.2648305892944336, "learning_rate": 1.9903636268483577e-06, "loss": 0.1504, "step": 267 }, { "epoch": 0.07, "grad_norm": 3.314117431640625, "learning_rate": 1.990240680253916e-06, "loss": 0.1539, "step": 268 }, { "epoch": 0.07, "grad_norm": 3.51577091217041, "learning_rate": 1.990116958155953e-06, "loss": 0.1374, "step": 269 }, { "epoch": 0.07, "grad_norm": 3.2009201049804688, "learning_rate": 1.989992460651359e-06, "loss": 0.1344, "step": 270 }, { "epoch": 0.07, "grad_norm": 3.7722017765045166, "learning_rate": 1.9898671878376363e-06, "loss": 0.162, "step": 271 }, { "epoch": 0.07, "grad_norm": 3.3505313396453857, "learning_rate": 1.98974113981289e-06, "loss": 0.1588, "step": 272 }, { "epoch": 0.07, "grad_norm": 3.020047426223755, "learning_rate": 1.989614316675835e-06, "loss": 0.1215, "step": 273 }, { "epoch": 0.07, "grad_norm": 3.8189098834991455, "learning_rate": 1.9894867185257924e-06, "loss": 0.1685, "step": 274 }, { "epoch": 0.08, "grad_norm": 3.6801252365112305, "learning_rate": 1.98935834546269e-06, "loss": 0.139, "step": 275 }, { "epoch": 0.08, "grad_norm": 3.7327663898468018, "learning_rate": 1.989229197587063e-06, "loss": 0.154, "step": 276 }, { "epoch": 0.08, "grad_norm": 3.297316789627075, "learning_rate": 1.9890992750000527e-06, "loss": 0.1504, "step": 277 }, { "epoch": 0.08, "grad_norm": 3.2879798412323, "learning_rate": 1.988968577803408e-06, "loss": 0.1384, "step": 278 }, { "epoch": 0.08, "grad_norm": 3.343733787536621, "learning_rate": 1.9888371060994836e-06, "loss": 0.1463, "step": 279 }, { "epoch": 0.08, "grad_norm": 3.16746187210083, "learning_rate": 1.9887048599912412e-06, "loss": 0.1187, "step": 280 }, { "epoch": 0.08, "grad_norm": 3.205296516418457, "learning_rate": 1.9885718395822487e-06, "loss": 0.1305, "step": 281 }, { "epoch": 0.08, "grad_norm": 3.2150719165802, "learning_rate": 1.988438044976681e-06, "loss": 0.1242, "step": 282 }, { "epoch": 0.08, "grad_norm": 3.5434811115264893, "learning_rate": 1.988303476279319e-06, "loss": 0.1469, "step": 283 }, { "epoch": 0.08, "grad_norm": 3.290196418762207, "learning_rate": 1.9881681335955487e-06, "loss": 0.144, "step": 284 }, { "epoch": 0.08, "grad_norm": 3.230937957763672, "learning_rate": 1.9880320170313638e-06, "loss": 0.1345, "step": 285 }, { "epoch": 0.08, "grad_norm": 3.4872334003448486, "learning_rate": 1.987895126693364e-06, "loss": 0.1481, "step": 286 }, { "epoch": 0.08, "grad_norm": 3.31950306892395, "learning_rate": 1.987757462688754e-06, "loss": 0.1334, "step": 287 }, { "epoch": 0.08, "grad_norm": 3.483234405517578, "learning_rate": 1.987619025125345e-06, "loss": 0.148, "step": 288 }, { "epoch": 0.08, "grad_norm": 3.3412797451019287, "learning_rate": 1.987479814111554e-06, "loss": 0.1506, "step": 289 }, { "epoch": 0.08, "grad_norm": 3.706984758377075, "learning_rate": 1.9873398297564034e-06, "loss": 0.1575, "step": 290 }, { "epoch": 0.08, "grad_norm": 3.5726327896118164, "learning_rate": 1.987199072169521e-06, "loss": 0.1498, "step": 291 }, { "epoch": 0.08, "grad_norm": 3.05886173248291, "learning_rate": 1.987057541461142e-06, "loss": 0.1301, "step": 292 }, { "epoch": 0.08, "grad_norm": 3.118166446685791, "learning_rate": 1.9869152377421047e-06, "loss": 0.1351, "step": 293 }, { "epoch": 0.08, "grad_norm": 3.4826500415802, "learning_rate": 1.9867721611238535e-06, "loss": 0.1481, "step": 294 }, { "epoch": 0.08, "grad_norm": 3.4809482097625732, "learning_rate": 1.986628311718439e-06, "loss": 0.1407, "step": 295 }, { "epoch": 0.08, "grad_norm": 3.4283668994903564, "learning_rate": 1.986483689638516e-06, "loss": 0.1426, "step": 296 }, { "epoch": 0.08, "grad_norm": 3.3706417083740234, "learning_rate": 1.986338294997345e-06, "loss": 0.1154, "step": 297 }, { "epoch": 0.08, "grad_norm": 3.6070687770843506, "learning_rate": 1.986192127908791e-06, "loss": 0.1531, "step": 298 }, { "epoch": 0.08, "grad_norm": 3.358705759048462, "learning_rate": 1.9860451884873245e-06, "loss": 0.1445, "step": 299 }, { "epoch": 0.08, "grad_norm": 3.1420834064483643, "learning_rate": 1.9858974768480202e-06, "loss": 0.1328, "step": 300 }, { "epoch": 0.08, "grad_norm": 3.0843026638031006, "learning_rate": 1.985748993106559e-06, "loss": 0.121, "step": 301 }, { "epoch": 0.08, "grad_norm": 3.4602112770080566, "learning_rate": 1.9855997373792237e-06, "loss": 0.125, "step": 302 }, { "epoch": 0.08, "grad_norm": 3.8304550647735596, "learning_rate": 1.9854497097829052e-06, "loss": 0.163, "step": 303 }, { "epoch": 0.08, "grad_norm": 3.5888307094573975, "learning_rate": 1.985298910435096e-06, "loss": 0.1431, "step": 304 }, { "epoch": 0.08, "grad_norm": 3.4069719314575195, "learning_rate": 1.9851473394538946e-06, "loss": 0.1382, "step": 305 }, { "epoch": 0.08, "grad_norm": 3.813002347946167, "learning_rate": 1.984994996958003e-06, "loss": 0.1319, "step": 306 }, { "epoch": 0.08, "grad_norm": 3.3693482875823975, "learning_rate": 1.9848418830667276e-06, "loss": 0.1368, "step": 307 }, { "epoch": 0.08, "grad_norm": 3.2808310985565186, "learning_rate": 1.984687997899979e-06, "loss": 0.1448, "step": 308 }, { "epoch": 0.08, "grad_norm": 3.611994743347168, "learning_rate": 1.9845333415782723e-06, "loss": 0.1405, "step": 309 }, { "epoch": 0.08, "grad_norm": 3.4369349479675293, "learning_rate": 1.9843779142227253e-06, "loss": 0.1732, "step": 310 }, { "epoch": 0.08, "grad_norm": 3.140673875808716, "learning_rate": 1.984221715955061e-06, "loss": 0.1309, "step": 311 }, { "epoch": 0.09, "grad_norm": 3.4380476474761963, "learning_rate": 1.984064746897606e-06, "loss": 0.1307, "step": 312 }, { "epoch": 0.09, "grad_norm": 3.550687789916992, "learning_rate": 1.983907007173289e-06, "loss": 0.1362, "step": 313 }, { "epoch": 0.09, "grad_norm": 3.2162210941314697, "learning_rate": 1.9837484969056433e-06, "loss": 0.1298, "step": 314 }, { "epoch": 0.09, "grad_norm": 3.265251398086548, "learning_rate": 1.983589216218806e-06, "loss": 0.1308, "step": 315 }, { "epoch": 0.09, "grad_norm": 3.379631280899048, "learning_rate": 1.983429165237518e-06, "loss": 0.1592, "step": 316 }, { "epoch": 0.09, "grad_norm": 3.2787411212921143, "learning_rate": 1.9832683440871217e-06, "loss": 0.1356, "step": 317 }, { "epoch": 0.09, "grad_norm": 3.5051069259643555, "learning_rate": 1.9831067528935635e-06, "loss": 0.1701, "step": 318 }, { "epoch": 0.09, "grad_norm": 3.3926029205322266, "learning_rate": 1.982944391783394e-06, "loss": 0.1298, "step": 319 }, { "epoch": 0.09, "grad_norm": 3.2148022651672363, "learning_rate": 1.982781260883765e-06, "loss": 0.1296, "step": 320 }, { "epoch": 0.09, "grad_norm": 3.4440078735351562, "learning_rate": 1.9826173603224317e-06, "loss": 0.1449, "step": 321 }, { "epoch": 0.09, "grad_norm": 3.4502224922180176, "learning_rate": 1.9824526902277525e-06, "loss": 0.1521, "step": 322 }, { "epoch": 0.09, "grad_norm": 3.1308059692382812, "learning_rate": 1.9822872507286887e-06, "loss": 0.1293, "step": 323 }, { "epoch": 0.09, "grad_norm": 3.0398175716400146, "learning_rate": 1.982121041954803e-06, "loss": 0.1165, "step": 324 }, { "epoch": 0.09, "grad_norm": 3.3087451457977295, "learning_rate": 1.981954064036261e-06, "loss": 0.1424, "step": 325 }, { "epoch": 0.09, "grad_norm": 3.4104042053222656, "learning_rate": 1.981786317103832e-06, "loss": 0.1446, "step": 326 }, { "epoch": 0.09, "grad_norm": 3.2183303833007812, "learning_rate": 1.981617801288885e-06, "loss": 0.1324, "step": 327 }, { "epoch": 0.09, "grad_norm": 3.486673593521118, "learning_rate": 1.981448516723394e-06, "loss": 0.1461, "step": 328 }, { "epoch": 0.09, "grad_norm": 3.439990997314453, "learning_rate": 1.9812784635399326e-06, "loss": 0.1511, "step": 329 }, { "epoch": 0.09, "grad_norm": 3.738765239715576, "learning_rate": 1.981107641871678e-06, "loss": 0.1323, "step": 330 }, { "epoch": 0.09, "grad_norm": 3.526998519897461, "learning_rate": 1.9809360518524078e-06, "loss": 0.1542, "step": 331 }, { "epoch": 0.09, "grad_norm": 3.3102715015411377, "learning_rate": 1.980763693616503e-06, "loss": 0.142, "step": 332 }, { "epoch": 0.09, "grad_norm": 3.672074317932129, "learning_rate": 1.9805905672989445e-06, "loss": 0.1476, "step": 333 }, { "epoch": 0.09, "grad_norm": 3.2519290447235107, "learning_rate": 1.980416673035316e-06, "loss": 0.1518, "step": 334 }, { "epoch": 0.09, "grad_norm": 3.2809934616088867, "learning_rate": 1.9802420109618028e-06, "loss": 0.1261, "step": 335 }, { "epoch": 0.09, "grad_norm": 3.447441577911377, "learning_rate": 1.98006658121519e-06, "loss": 0.1478, "step": 336 }, { "epoch": 0.09, "grad_norm": 2.9944467544555664, "learning_rate": 1.9798903839328647e-06, "loss": 0.1254, "step": 337 }, { "epoch": 0.09, "grad_norm": 3.4824864864349365, "learning_rate": 1.979713419252816e-06, "loss": 0.146, "step": 338 }, { "epoch": 0.09, "grad_norm": 3.4047210216522217, "learning_rate": 1.9795356873136324e-06, "loss": 0.144, "step": 339 }, { "epoch": 0.09, "grad_norm": 3.480583906173706, "learning_rate": 1.9793571882545048e-06, "loss": 0.1608, "step": 340 }, { "epoch": 0.09, "grad_norm": 3.3297982215881348, "learning_rate": 1.9791779222152232e-06, "loss": 0.1301, "step": 341 }, { "epoch": 0.09, "grad_norm": 3.241499662399292, "learning_rate": 1.97899788933618e-06, "loss": 0.1421, "step": 342 }, { "epoch": 0.09, "grad_norm": 3.4256677627563477, "learning_rate": 1.978817089758367e-06, "loss": 0.1504, "step": 343 }, { "epoch": 0.09, "grad_norm": 3.3894009590148926, "learning_rate": 1.9786355236233767e-06, "loss": 0.1473, "step": 344 }, { "epoch": 0.09, "grad_norm": 3.277958631515503, "learning_rate": 1.978453191073402e-06, "loss": 0.1345, "step": 345 }, { "epoch": 0.09, "grad_norm": 3.110243558883667, "learning_rate": 1.9782700922512356e-06, "loss": 0.1271, "step": 346 }, { "epoch": 0.09, "grad_norm": 3.212660551071167, "learning_rate": 1.9780862273002718e-06, "loss": 0.1419, "step": 347 }, { "epoch": 0.1, "grad_norm": 3.4038431644439697, "learning_rate": 1.977901596364503e-06, "loss": 0.1476, "step": 348 }, { "epoch": 0.1, "grad_norm": 3.4911129474639893, "learning_rate": 1.9777161995885216e-06, "loss": 0.1649, "step": 349 }, { "epoch": 0.1, "grad_norm": 3.139709711074829, "learning_rate": 1.977530037117522e-06, "loss": 0.1388, "step": 350 }, { "epoch": 0.1, "grad_norm": 3.3062047958374023, "learning_rate": 1.977343109097296e-06, "loss": 0.1431, "step": 351 }, { "epoch": 0.1, "grad_norm": 3.1570358276367188, "learning_rate": 1.977155415674235e-06, "loss": 0.1324, "step": 352 }, { "epoch": 0.1, "grad_norm": 3.35162091255188, "learning_rate": 1.976966956995331e-06, "loss": 0.14, "step": 353 }, { "epoch": 0.1, "grad_norm": 3.1200473308563232, "learning_rate": 1.976777733208175e-06, "loss": 0.1448, "step": 354 }, { "epoch": 0.1, "grad_norm": 3.0821895599365234, "learning_rate": 1.9765877444609565e-06, "loss": 0.1233, "step": 355 }, { "epoch": 0.1, "grad_norm": 3.0552830696105957, "learning_rate": 1.976396990902465e-06, "loss": 0.1182, "step": 356 }, { "epoch": 0.1, "grad_norm": 3.555692195892334, "learning_rate": 1.976205472682088e-06, "loss": 0.1527, "step": 357 }, { "epoch": 0.1, "grad_norm": 3.5902371406555176, "learning_rate": 1.9760131899498125e-06, "loss": 0.152, "step": 358 }, { "epoch": 0.1, "grad_norm": 3.108504056930542, "learning_rate": 1.975820142856224e-06, "loss": 0.1257, "step": 359 }, { "epoch": 0.1, "grad_norm": 3.2323732376098633, "learning_rate": 1.975626331552507e-06, "loss": 0.1446, "step": 360 }, { "epoch": 0.1, "grad_norm": 3.161468267440796, "learning_rate": 1.9754317561904433e-06, "loss": 0.1334, "step": 361 }, { "epoch": 0.1, "grad_norm": 3.21755051612854, "learning_rate": 1.9752364169224148e-06, "loss": 0.1433, "step": 362 }, { "epoch": 0.1, "grad_norm": 3.455519437789917, "learning_rate": 1.9750403139014003e-06, "loss": 0.1634, "step": 363 }, { "epoch": 0.1, "grad_norm": 3.1519174575805664, "learning_rate": 1.9748434472809776e-06, "loss": 0.1327, "step": 364 }, { "epoch": 0.1, "grad_norm": 3.293189287185669, "learning_rate": 1.974645817215322e-06, "loss": 0.1276, "step": 365 }, { "epoch": 0.1, "grad_norm": 3.327479362487793, "learning_rate": 1.974447423859206e-06, "loss": 0.1424, "step": 366 }, { "epoch": 0.1, "grad_norm": 3.365499973297119, "learning_rate": 1.9742482673680015e-06, "loss": 0.1553, "step": 367 }, { "epoch": 0.1, "grad_norm": 3.1190264225006104, "learning_rate": 1.974048347897677e-06, "loss": 0.1255, "step": 368 }, { "epoch": 0.1, "grad_norm": 3.285921096801758, "learning_rate": 1.973847665604799e-06, "loss": 0.131, "step": 369 }, { "epoch": 0.1, "grad_norm": 3.1432747840881348, "learning_rate": 1.973646220646531e-06, "loss": 0.1287, "step": 370 }, { "epoch": 0.1, "grad_norm": 3.507528066635132, "learning_rate": 1.973444013180633e-06, "loss": 0.1615, "step": 371 }, { "epoch": 0.1, "grad_norm": 3.055715322494507, "learning_rate": 1.973241043365464e-06, "loss": 0.1417, "step": 372 }, { "epoch": 0.1, "grad_norm": 3.1101040840148926, "learning_rate": 1.9730373113599796e-06, "loss": 0.1285, "step": 373 }, { "epoch": 0.1, "grad_norm": 3.195751905441284, "learning_rate": 1.972832817323731e-06, "loss": 0.1537, "step": 374 }, { "epoch": 0.1, "grad_norm": 3.517488956451416, "learning_rate": 1.9726275614168667e-06, "loss": 0.1525, "step": 375 }, { "epoch": 0.1, "grad_norm": 3.0122179985046387, "learning_rate": 1.972421543800133e-06, "loss": 0.1225, "step": 376 }, { "epoch": 0.1, "grad_norm": 2.995920181274414, "learning_rate": 1.9722147646348712e-06, "loss": 0.1355, "step": 377 }, { "epoch": 0.1, "grad_norm": 3.0849361419677734, "learning_rate": 1.97200722408302e-06, "loss": 0.1286, "step": 378 }, { "epoch": 0.1, "grad_norm": 2.914358615875244, "learning_rate": 1.9717989223071143e-06, "loss": 0.1265, "step": 379 }, { "epoch": 0.1, "grad_norm": 3.264975070953369, "learning_rate": 1.971589859470284e-06, "loss": 0.1448, "step": 380 }, { "epoch": 0.1, "grad_norm": 3.29422926902771, "learning_rate": 1.971380035736257e-06, "loss": 0.143, "step": 381 }, { "epoch": 0.1, "grad_norm": 3.24412202835083, "learning_rate": 1.9711694512693557e-06, "loss": 0.1493, "step": 382 }, { "epoch": 0.1, "grad_norm": 3.6297881603240967, "learning_rate": 1.970958106234498e-06, "loss": 0.1577, "step": 383 }, { "epoch": 0.1, "grad_norm": 3.3670804500579834, "learning_rate": 1.9707460007971986e-06, "loss": 0.1528, "step": 384 }, { "epoch": 0.11, "grad_norm": 3.3906354904174805, "learning_rate": 1.9705331351235673e-06, "loss": 0.166, "step": 385 }, { "epoch": 0.11, "grad_norm": 3.1111812591552734, "learning_rate": 1.9703195093803084e-06, "loss": 0.1404, "step": 386 }, { "epoch": 0.11, "grad_norm": 3.115222454071045, "learning_rate": 1.9701051237347228e-06, "loss": 0.1351, "step": 387 }, { "epoch": 0.11, "grad_norm": 3.2881312370300293, "learning_rate": 1.9698899783547055e-06, "loss": 0.1387, "step": 388 }, { "epoch": 0.11, "grad_norm": 3.433166742324829, "learning_rate": 1.969674073408747e-06, "loss": 0.136, "step": 389 }, { "epoch": 0.11, "grad_norm": 3.301527261734009, "learning_rate": 1.969457409065933e-06, "loss": 0.1417, "step": 390 }, { "epoch": 0.11, "grad_norm": 3.092268705368042, "learning_rate": 1.9692399854959423e-06, "loss": 0.1297, "step": 391 }, { "epoch": 0.11, "grad_norm": 3.169426441192627, "learning_rate": 1.96902180286905e-06, "loss": 0.1402, "step": 392 }, { "epoch": 0.11, "grad_norm": 3.187683343887329, "learning_rate": 1.968802861356125e-06, "loss": 0.1463, "step": 393 }, { "epoch": 0.11, "grad_norm": 3.421762704849243, "learning_rate": 1.968583161128631e-06, "loss": 0.1452, "step": 394 }, { "epoch": 0.11, "grad_norm": 3.173125743865967, "learning_rate": 1.968362702358625e-06, "loss": 0.1479, "step": 395 }, { "epoch": 0.11, "grad_norm": 3.2461776733398438, "learning_rate": 1.9681414852187584e-06, "loss": 0.1326, "step": 396 }, { "epoch": 0.11, "grad_norm": 3.0317840576171875, "learning_rate": 1.9679195098822773e-06, "loss": 0.1202, "step": 397 }, { "epoch": 0.11, "grad_norm": 3.3286936283111572, "learning_rate": 1.96769677652302e-06, "loss": 0.1455, "step": 398 }, { "epoch": 0.11, "grad_norm": 3.585869312286377, "learning_rate": 1.9674732853154204e-06, "loss": 0.1587, "step": 399 }, { "epoch": 0.11, "grad_norm": 3.1691548824310303, "learning_rate": 1.9672490364345037e-06, "loss": 0.1374, "step": 400 }, { "epoch": 0.11, "grad_norm": 3.390744924545288, "learning_rate": 1.9670240300558903e-06, "loss": 0.1359, "step": 401 }, { "epoch": 0.11, "grad_norm": 3.236043930053711, "learning_rate": 1.9667982663557935e-06, "loss": 0.1424, "step": 402 }, { "epoch": 0.11, "grad_norm": 3.2566139698028564, "learning_rate": 1.9665717455110186e-06, "loss": 0.1354, "step": 403 }, { "epoch": 0.11, "grad_norm": 3.3723509311676025, "learning_rate": 1.966344467698965e-06, "loss": 0.1277, "step": 404 }, { "epoch": 0.11, "grad_norm": 3.2254810333251953, "learning_rate": 1.9661164330976243e-06, "loss": 0.1184, "step": 405 }, { "epoch": 0.11, "grad_norm": 3.084118127822876, "learning_rate": 1.965887641885581e-06, "loss": 0.1313, "step": 406 }, { "epoch": 0.11, "grad_norm": 3.266711711883545, "learning_rate": 1.965658094242013e-06, "loss": 0.1366, "step": 407 }, { "epoch": 0.11, "grad_norm": 3.0545456409454346, "learning_rate": 1.965427790346688e-06, "loss": 0.1258, "step": 408 }, { "epoch": 0.11, "grad_norm": 3.190474510192871, "learning_rate": 1.965196730379969e-06, "loss": 0.1327, "step": 409 }, { "epoch": 0.11, "grad_norm": 3.1471309661865234, "learning_rate": 1.96496491452281e-06, "loss": 0.1278, "step": 410 }, { "epoch": 0.11, "grad_norm": 3.120482921600342, "learning_rate": 1.964732342956756e-06, "loss": 0.1311, "step": 411 }, { "epoch": 0.11, "grad_norm": 3.269605875015259, "learning_rate": 1.9644990158639447e-06, "loss": 0.1376, "step": 412 }, { "epoch": 0.11, "grad_norm": 2.940713405609131, "learning_rate": 1.964264933427106e-06, "loss": 0.1248, "step": 413 }, { "epoch": 0.11, "grad_norm": 3.0395448207855225, "learning_rate": 1.9640300958295597e-06, "loss": 0.1282, "step": 414 }, { "epoch": 0.11, "grad_norm": 3.565747022628784, "learning_rate": 1.963794503255219e-06, "loss": 0.1351, "step": 415 }, { "epoch": 0.11, "grad_norm": 4.205630779266357, "learning_rate": 1.963558155888587e-06, "loss": 0.1632, "step": 416 }, { "epoch": 0.11, "grad_norm": 3.1847383975982666, "learning_rate": 1.9633210539147582e-06, "loss": 0.1279, "step": 417 }, { "epoch": 0.11, "grad_norm": 3.4606165885925293, "learning_rate": 1.963083197519419e-06, "loss": 0.162, "step": 418 }, { "epoch": 0.11, "grad_norm": 3.155571222305298, "learning_rate": 1.9628445868888444e-06, "loss": 0.1218, "step": 419 }, { "epoch": 0.11, "grad_norm": 3.181898355484009, "learning_rate": 1.962605222209903e-06, "loss": 0.1267, "step": 420 }, { "epoch": 0.12, "grad_norm": 3.7715001106262207, "learning_rate": 1.962365103670051e-06, "loss": 0.1607, "step": 421 }, { "epoch": 0.12, "grad_norm": 3.2761077880859375, "learning_rate": 1.9621242314573374e-06, "loss": 0.1328, "step": 422 }, { "epoch": 0.12, "grad_norm": 3.0447583198547363, "learning_rate": 1.9618826057604002e-06, "loss": 0.1402, "step": 423 }, { "epoch": 0.12, "grad_norm": 2.9950973987579346, "learning_rate": 1.9616402267684673e-06, "loss": 0.1199, "step": 424 }, { "epoch": 0.12, "grad_norm": 3.516925573348999, "learning_rate": 1.9613970946713573e-06, "loss": 0.1546, "step": 425 }, { "epoch": 0.12, "grad_norm": 3.3416996002197266, "learning_rate": 1.961153209659478e-06, "loss": 0.1426, "step": 426 }, { "epoch": 0.12, "grad_norm": 3.3621819019317627, "learning_rate": 1.9609085719238275e-06, "loss": 0.1522, "step": 427 }, { "epoch": 0.12, "grad_norm": 3.4265851974487305, "learning_rate": 1.960663181655993e-06, "loss": 0.15, "step": 428 }, { "epoch": 0.12, "grad_norm": 3.2601821422576904, "learning_rate": 1.960417039048151e-06, "loss": 0.1418, "step": 429 }, { "epoch": 0.12, "grad_norm": 3.084263324737549, "learning_rate": 1.9601701442930666e-06, "loss": 0.1385, "step": 430 }, { "epoch": 0.12, "grad_norm": 3.3431873321533203, "learning_rate": 1.9599224975840947e-06, "loss": 0.1563, "step": 431 }, { "epoch": 0.12, "grad_norm": 2.996354341506958, "learning_rate": 1.9596740991151798e-06, "loss": 0.1197, "step": 432 }, { "epoch": 0.12, "grad_norm": 3.18942928314209, "learning_rate": 1.9594249490808535e-06, "loss": 0.13, "step": 433 }, { "epoch": 0.12, "grad_norm": 3.276121139526367, "learning_rate": 1.9591750476762373e-06, "loss": 0.1306, "step": 434 }, { "epoch": 0.12, "grad_norm": 3.223360538482666, "learning_rate": 1.95892439509704e-06, "loss": 0.1518, "step": 435 }, { "epoch": 0.12, "grad_norm": 3.457029342651367, "learning_rate": 1.9586729915395595e-06, "loss": 0.1507, "step": 436 }, { "epoch": 0.12, "grad_norm": 2.9880590438842773, "learning_rate": 1.9584208372006823e-06, "loss": 0.1303, "step": 437 }, { "epoch": 0.12, "grad_norm": 3.204850435256958, "learning_rate": 1.9581679322778813e-06, "loss": 0.1304, "step": 438 }, { "epoch": 0.12, "grad_norm": 3.195963144302368, "learning_rate": 1.9579142769692183e-06, "loss": 0.1457, "step": 439 }, { "epoch": 0.12, "grad_norm": 3.0109193325042725, "learning_rate": 1.957659871473343e-06, "loss": 0.1233, "step": 440 }, { "epoch": 0.12, "grad_norm": 3.1999616622924805, "learning_rate": 1.9574047159894915e-06, "loss": 0.1376, "step": 441 }, { "epoch": 0.12, "grad_norm": 3.2869439125061035, "learning_rate": 1.9571488107174887e-06, "loss": 0.1477, "step": 442 }, { "epoch": 0.12, "grad_norm": 3.073430061340332, "learning_rate": 1.9568921558577452e-06, "loss": 0.1331, "step": 443 }, { "epoch": 0.12, "grad_norm": 3.045060873031616, "learning_rate": 1.9566347516112596e-06, "loss": 0.1276, "step": 444 }, { "epoch": 0.12, "grad_norm": 3.1451849937438965, "learning_rate": 1.9563765981796176e-06, "loss": 0.1363, "step": 445 }, { "epoch": 0.12, "grad_norm": 3.5179929733276367, "learning_rate": 1.9561176957649907e-06, "loss": 0.1421, "step": 446 }, { "epoch": 0.12, "grad_norm": 3.413295269012451, "learning_rate": 1.955858044570137e-06, "loss": 0.1554, "step": 447 }, { "epoch": 0.12, "grad_norm": 3.3223888874053955, "learning_rate": 1.9555976447984026e-06, "loss": 0.1416, "step": 448 }, { "epoch": 0.12, "grad_norm": 3.189138174057007, "learning_rate": 1.9553364966537176e-06, "loss": 0.1336, "step": 449 }, { "epoch": 0.12, "grad_norm": 3.597012519836426, "learning_rate": 1.9550746003405995e-06, "loss": 0.1471, "step": 450 }, { "epoch": 0.12, "grad_norm": 3.439164161682129, "learning_rate": 1.954811956064152e-06, "loss": 0.1459, "step": 451 }, { "epoch": 0.12, "grad_norm": 3.2802419662475586, "learning_rate": 1.954548564030063e-06, "loss": 0.1391, "step": 452 }, { "epoch": 0.12, "grad_norm": 2.9764251708984375, "learning_rate": 1.9542844244446083e-06, "loss": 0.1271, "step": 453 }, { "epoch": 0.12, "grad_norm": 3.5547447204589844, "learning_rate": 1.9540195375146465e-06, "loss": 0.1483, "step": 454 }, { "epoch": 0.12, "grad_norm": 3.2675955295562744, "learning_rate": 1.9537539034476243e-06, "loss": 0.1227, "step": 455 }, { "epoch": 0.12, "grad_norm": 3.3789680004119873, "learning_rate": 1.9534875224515718e-06, "loss": 0.1528, "step": 456 }, { "epoch": 0.12, "grad_norm": 3.386911153793335, "learning_rate": 1.9532203947351033e-06, "loss": 0.1198, "step": 457 }, { "epoch": 0.13, "grad_norm": 3.5681533813476562, "learning_rate": 1.95295252050742e-06, "loss": 0.1549, "step": 458 }, { "epoch": 0.13, "grad_norm": 3.107218027114868, "learning_rate": 1.9526838999783062e-06, "loss": 0.1206, "step": 459 }, { "epoch": 0.13, "grad_norm": 3.2751224040985107, "learning_rate": 1.9524145333581313e-06, "loss": 0.1519, "step": 460 }, { "epoch": 0.13, "grad_norm": 3.1982295513153076, "learning_rate": 1.9521444208578484e-06, "loss": 0.132, "step": 461 }, { "epoch": 0.13, "grad_norm": 3.3096084594726562, "learning_rate": 1.951873562688996e-06, "loss": 0.1345, "step": 462 }, { "epoch": 0.13, "grad_norm": 3.5432839393615723, "learning_rate": 1.9516019590636953e-06, "loss": 0.1724, "step": 463 }, { "epoch": 0.13, "grad_norm": 2.7536401748657227, "learning_rate": 1.9513296101946515e-06, "loss": 0.1061, "step": 464 }, { "epoch": 0.13, "grad_norm": 3.1745247840881348, "learning_rate": 1.9510565162951534e-06, "loss": 0.125, "step": 465 }, { "epoch": 0.13, "grad_norm": 3.4509172439575195, "learning_rate": 1.9507826775790743e-06, "loss": 0.1532, "step": 466 }, { "epoch": 0.13, "grad_norm": 3.3914451599121094, "learning_rate": 1.9505080942608698e-06, "loss": 0.1488, "step": 467 }, { "epoch": 0.13, "grad_norm": 2.9867324829101562, "learning_rate": 1.9502327665555787e-06, "loss": 0.127, "step": 468 }, { "epoch": 0.13, "grad_norm": 3.1437714099884033, "learning_rate": 1.949956694678823e-06, "loss": 0.1305, "step": 469 }, { "epoch": 0.13, "grad_norm": 3.198093891143799, "learning_rate": 1.9496798788468074e-06, "loss": 0.137, "step": 470 }, { "epoch": 0.13, "grad_norm": 3.217594861984253, "learning_rate": 1.949402319276319e-06, "loss": 0.1423, "step": 471 }, { "epoch": 0.13, "grad_norm": 3.119245767593384, "learning_rate": 1.949124016184728e-06, "loss": 0.1438, "step": 472 }, { "epoch": 0.13, "grad_norm": 3.3736212253570557, "learning_rate": 1.948844969789987e-06, "loss": 0.1316, "step": 473 }, { "epoch": 0.13, "grad_norm": 3.4069814682006836, "learning_rate": 1.9485651803106283e-06, "loss": 0.1458, "step": 474 }, { "epoch": 0.13, "grad_norm": 3.2153573036193848, "learning_rate": 1.9482846479657704e-06, "loss": 0.1349, "step": 475 }, { "epoch": 0.13, "grad_norm": 3.210415840148926, "learning_rate": 1.9480033729751096e-06, "loss": 0.1389, "step": 476 }, { "epoch": 0.13, "grad_norm": 3.301241636276245, "learning_rate": 1.947721355558926e-06, "loss": 0.1443, "step": 477 }, { "epoch": 0.13, "grad_norm": 3.1576645374298096, "learning_rate": 1.9474385959380806e-06, "loss": 0.1443, "step": 478 }, { "epoch": 0.13, "grad_norm": 2.980865240097046, "learning_rate": 1.9471550943340157e-06, "loss": 0.1351, "step": 479 }, { "epoch": 0.13, "grad_norm": 2.9259071350097656, "learning_rate": 1.9468708509687544e-06, "loss": 0.131, "step": 480 }, { "epoch": 0.13, "grad_norm": 3.2585418224334717, "learning_rate": 1.946585866064901e-06, "loss": 0.1225, "step": 481 }, { "epoch": 0.13, "grad_norm": 3.609048843383789, "learning_rate": 1.9463001398456397e-06, "loss": 0.152, "step": 482 }, { "epoch": 0.13, "grad_norm": 3.1587791442871094, "learning_rate": 1.946013672534737e-06, "loss": 0.1297, "step": 483 }, { "epoch": 0.13, "grad_norm": 3.1544039249420166, "learning_rate": 1.9457264643565383e-06, "loss": 0.1448, "step": 484 }, { "epoch": 0.13, "grad_norm": 3.3397443294525146, "learning_rate": 1.94543851553597e-06, "loss": 0.1272, "step": 485 }, { "epoch": 0.13, "grad_norm": 3.064429521560669, "learning_rate": 1.9451498262985384e-06, "loss": 0.1337, "step": 486 }, { "epoch": 0.13, "grad_norm": 3.0073888301849365, "learning_rate": 1.944860396870328e-06, "loss": 0.1347, "step": 487 }, { "epoch": 0.13, "grad_norm": 3.140315294265747, "learning_rate": 1.944570227478006e-06, "loss": 0.1303, "step": 488 }, { "epoch": 0.13, "grad_norm": 3.46236252784729, "learning_rate": 1.9442793183488174e-06, "loss": 0.1379, "step": 489 }, { "epoch": 0.13, "grad_norm": 3.236910104751587, "learning_rate": 1.943987669710586e-06, "loss": 0.1427, "step": 490 }, { "epoch": 0.13, "grad_norm": 3.339592218399048, "learning_rate": 1.943695281791716e-06, "loss": 0.1543, "step": 491 }, { "epoch": 0.13, "grad_norm": 3.673352003097534, "learning_rate": 1.943402154821189e-06, "loss": 0.1482, "step": 492 }, { "epoch": 0.13, "grad_norm": 3.3469910621643066, "learning_rate": 1.943108289028568e-06, "loss": 0.1361, "step": 493 }, { "epoch": 0.13, "grad_norm": 3.042558431625366, "learning_rate": 1.9428136846439915e-06, "loss": 0.1351, "step": 494 }, { "epoch": 0.14, "grad_norm": 3.3642477989196777, "learning_rate": 1.942518341898178e-06, "loss": 0.1321, "step": 495 }, { "epoch": 0.14, "grad_norm": 3.2358176708221436, "learning_rate": 1.942222261022425e-06, "loss": 0.13, "step": 496 }, { "epoch": 0.14, "grad_norm": 3.4853944778442383, "learning_rate": 1.941925442248607e-06, "loss": 0.1424, "step": 497 }, { "epoch": 0.14, "grad_norm": 3.204458236694336, "learning_rate": 1.9416278858091757e-06, "loss": 0.1329, "step": 498 }, { "epoch": 0.14, "grad_norm": 3.2569594383239746, "learning_rate": 1.9413295919371626e-06, "loss": 0.1409, "step": 499 }, { "epoch": 0.14, "grad_norm": 3.180896759033203, "learning_rate": 1.9410305608661742e-06, "loss": 0.1315, "step": 500 }, { "epoch": 0.14, "grad_norm": 2.9764926433563232, "learning_rate": 1.940730792830397e-06, "loss": 0.1212, "step": 501 }, { "epoch": 0.14, "grad_norm": 3.056260347366333, "learning_rate": 1.9404302880645925e-06, "loss": 0.1228, "step": 502 }, { "epoch": 0.14, "grad_norm": 3.196409225463867, "learning_rate": 1.9401290468041002e-06, "loss": 0.128, "step": 503 }, { "epoch": 0.14, "grad_norm": 3.324312925338745, "learning_rate": 1.939827069284836e-06, "loss": 0.1387, "step": 504 }, { "epoch": 0.14, "grad_norm": 3.0371274948120117, "learning_rate": 1.9395243557432923e-06, "loss": 0.1243, "step": 505 }, { "epoch": 0.14, "grad_norm": 3.1022558212280273, "learning_rate": 1.939220906416539e-06, "loss": 0.121, "step": 506 }, { "epoch": 0.14, "grad_norm": 2.9566850662231445, "learning_rate": 1.9389167215422203e-06, "loss": 0.1215, "step": 507 }, { "epoch": 0.14, "grad_norm": 3.5804319381713867, "learning_rate": 1.938611801358558e-06, "loss": 0.1528, "step": 508 }, { "epoch": 0.14, "grad_norm": 3.33417010307312, "learning_rate": 1.9383061461043496e-06, "loss": 0.1439, "step": 509 }, { "epoch": 0.14, "grad_norm": 3.3231208324432373, "learning_rate": 1.9379997560189675e-06, "loss": 0.1525, "step": 510 }, { "epoch": 0.14, "grad_norm": 3.470815420150757, "learning_rate": 1.93769263134236e-06, "loss": 0.1498, "step": 511 }, { "epoch": 0.14, "grad_norm": 3.2947394847869873, "learning_rate": 1.937384772315051e-06, "loss": 0.1342, "step": 512 }, { "epoch": 0.14, "grad_norm": 3.2775120735168457, "learning_rate": 1.9370761791781392e-06, "loss": 0.1403, "step": 513 }, { "epoch": 0.14, "grad_norm": 3.1588900089263916, "learning_rate": 1.936766852173298e-06, "loss": 0.1399, "step": 514 }, { "epoch": 0.14, "grad_norm": 3.2581050395965576, "learning_rate": 1.936456791542776e-06, "loss": 0.1545, "step": 515 }, { "epoch": 0.14, "grad_norm": 3.052593946456909, "learning_rate": 1.936145997529396e-06, "loss": 0.1411, "step": 516 }, { "epoch": 0.14, "grad_norm": 3.378387212753296, "learning_rate": 1.9358344703765553e-06, "loss": 0.1567, "step": 517 }, { "epoch": 0.14, "grad_norm": 2.960052251815796, "learning_rate": 1.935522210328225e-06, "loss": 0.1387, "step": 518 }, { "epoch": 0.14, "grad_norm": 3.3264615535736084, "learning_rate": 1.9352092176289508e-06, "loss": 0.1632, "step": 519 }, { "epoch": 0.14, "grad_norm": 3.1621196269989014, "learning_rate": 1.934895492523852e-06, "loss": 0.1396, "step": 520 }, { "epoch": 0.14, "grad_norm": 3.1209771633148193, "learning_rate": 1.9345810352586203e-06, "loss": 0.1562, "step": 521 }, { "epoch": 0.14, "grad_norm": 3.20269775390625, "learning_rate": 1.934265846079523e-06, "loss": 0.1369, "step": 522 }, { "epoch": 0.14, "grad_norm": 3.130948066711426, "learning_rate": 1.9339499252333995e-06, "loss": 0.1299, "step": 523 }, { "epoch": 0.14, "grad_norm": 3.228376865386963, "learning_rate": 1.9336332729676606e-06, "loss": 0.1579, "step": 524 }, { "epoch": 0.14, "grad_norm": 3.1680591106414795, "learning_rate": 1.933315889530293e-06, "loss": 0.1319, "step": 525 }, { "epoch": 0.14, "grad_norm": 3.06646466255188, "learning_rate": 1.932997775169854e-06, "loss": 0.1354, "step": 526 }, { "epoch": 0.14, "grad_norm": 3.227132558822632, "learning_rate": 1.932678930135473e-06, "loss": 0.1317, "step": 527 }, { "epoch": 0.14, "grad_norm": 3.0347650051116943, "learning_rate": 1.932359354676853e-06, "loss": 0.1213, "step": 528 }, { "epoch": 0.14, "grad_norm": 3.594651699066162, "learning_rate": 1.9320390490442685e-06, "loss": 0.1418, "step": 529 }, { "epoch": 0.14, "grad_norm": 3.1397430896759033, "learning_rate": 1.9317180134885657e-06, "loss": 0.1388, "step": 530 }, { "epoch": 0.15, "grad_norm": 3.069596767425537, "learning_rate": 1.931396248261162e-06, "loss": 0.1194, "step": 531 }, { "epoch": 0.15, "grad_norm": 3.0973358154296875, "learning_rate": 1.9310737536140476e-06, "loss": 0.1387, "step": 532 }, { "epoch": 0.15, "grad_norm": 3.301105499267578, "learning_rate": 1.930750529799782e-06, "loss": 0.1397, "step": 533 }, { "epoch": 0.15, "grad_norm": 3.406813621520996, "learning_rate": 1.9304265770714976e-06, "loss": 0.1447, "step": 534 }, { "epoch": 0.15, "grad_norm": 3.189361095428467, "learning_rate": 1.9301018956828963e-06, "loss": 0.1361, "step": 535 }, { "epoch": 0.15, "grad_norm": 3.1613755226135254, "learning_rate": 1.929776485888251e-06, "loss": 0.1354, "step": 536 }, { "epoch": 0.15, "grad_norm": 3.0347232818603516, "learning_rate": 1.9294503479424066e-06, "loss": 0.1175, "step": 537 }, { "epoch": 0.15, "grad_norm": 3.215599775314331, "learning_rate": 1.9291234821007755e-06, "loss": 0.1424, "step": 538 }, { "epoch": 0.15, "grad_norm": 3.0372154712677, "learning_rate": 1.928795888619342e-06, "loss": 0.138, "step": 539 }, { "epoch": 0.15, "grad_norm": 3.246412754058838, "learning_rate": 1.9284675677546602e-06, "loss": 0.1211, "step": 540 }, { "epoch": 0.15, "grad_norm": 3.2461938858032227, "learning_rate": 1.9281385197638525e-06, "loss": 0.1422, "step": 541 }, { "epoch": 0.15, "grad_norm": 3.447664260864258, "learning_rate": 1.9278087449046125e-06, "loss": 0.1451, "step": 542 }, { "epoch": 0.15, "grad_norm": 3.254608631134033, "learning_rate": 1.9274782434352014e-06, "loss": 0.1429, "step": 543 }, { "epoch": 0.15, "grad_norm": 3.195387601852417, "learning_rate": 1.9271470156144514e-06, "loss": 0.1412, "step": 544 }, { "epoch": 0.15, "grad_norm": 3.034229278564453, "learning_rate": 1.926815061701762e-06, "loss": 0.141, "step": 545 }, { "epoch": 0.15, "grad_norm": 3.351449728012085, "learning_rate": 1.926482381957101e-06, "loss": 0.1477, "step": 546 }, { "epoch": 0.15, "grad_norm": 3.1248385906219482, "learning_rate": 1.926148976641006e-06, "loss": 0.1162, "step": 547 }, { "epoch": 0.15, "grad_norm": 3.154493808746338, "learning_rate": 1.9258148460145826e-06, "loss": 0.1252, "step": 548 }, { "epoch": 0.15, "grad_norm": 3.1947855949401855, "learning_rate": 1.925479990339503e-06, "loss": 0.1309, "step": 549 }, { "epoch": 0.15, "grad_norm": 3.2486939430236816, "learning_rate": 1.925144409878009e-06, "loss": 0.1375, "step": 550 }, { "epoch": 0.15, "grad_norm": 2.9871749877929688, "learning_rate": 1.9248081048929095e-06, "loss": 0.1317, "step": 551 }, { "epoch": 0.15, "grad_norm": 3.172790765762329, "learning_rate": 1.9244710756475797e-06, "loss": 0.1391, "step": 552 }, { "epoch": 0.15, "grad_norm": 2.975095272064209, "learning_rate": 1.9241333224059637e-06, "loss": 0.1373, "step": 553 }, { "epoch": 0.15, "grad_norm": 3.308891534805298, "learning_rate": 1.923794845432571e-06, "loss": 0.157, "step": 554 }, { "epoch": 0.15, "grad_norm": 3.0941014289855957, "learning_rate": 1.9234556449924794e-06, "loss": 0.1287, "step": 555 }, { "epoch": 0.15, "grad_norm": 3.365940570831299, "learning_rate": 1.9231157213513323e-06, "loss": 0.154, "step": 556 }, { "epoch": 0.15, "grad_norm": 3.150245428085327, "learning_rate": 1.9227750747753393e-06, "loss": 0.1316, "step": 557 }, { "epoch": 0.15, "grad_norm": 3.226799488067627, "learning_rate": 1.922433705531277e-06, "loss": 0.1437, "step": 558 }, { "epoch": 0.15, "grad_norm": 3.223442316055298, "learning_rate": 1.9220916138864875e-06, "loss": 0.1448, "step": 559 }, { "epoch": 0.15, "grad_norm": 3.3040995597839355, "learning_rate": 1.921748800108878e-06, "loss": 0.1517, "step": 560 }, { "epoch": 0.15, "grad_norm": 3.551745653152466, "learning_rate": 1.9214052644669235e-06, "loss": 0.1675, "step": 561 }, { "epoch": 0.15, "grad_norm": 3.008906364440918, "learning_rate": 1.921061007229661e-06, "loss": 0.1415, "step": 562 }, { "epoch": 0.15, "grad_norm": 3.0802981853485107, "learning_rate": 1.920716028666695e-06, "loss": 0.1341, "step": 563 }, { "epoch": 0.15, "grad_norm": 2.821687936782837, "learning_rate": 1.9203703290481946e-06, "loss": 0.1234, "step": 564 }, { "epoch": 0.15, "grad_norm": 3.080923080444336, "learning_rate": 1.920023908644893e-06, "loss": 0.1472, "step": 565 }, { "epoch": 0.15, "grad_norm": 3.197179079055786, "learning_rate": 1.9196767677280885e-06, "loss": 0.1292, "step": 566 }, { "epoch": 0.15, "grad_norm": 3.17303729057312, "learning_rate": 1.919328906569642e-06, "loss": 0.1379, "step": 567 }, { "epoch": 0.16, "grad_norm": 3.1359729766845703, "learning_rate": 1.9189803254419812e-06, "loss": 0.1374, "step": 568 }, { "epoch": 0.16, "grad_norm": 2.890521287918091, "learning_rate": 1.9186310246180956e-06, "loss": 0.1247, "step": 569 }, { "epoch": 0.16, "grad_norm": 3.2141013145446777, "learning_rate": 1.9182810043715388e-06, "loss": 0.1314, "step": 570 }, { "epoch": 0.16, "grad_norm": 3.1341261863708496, "learning_rate": 1.9179302649764282e-06, "loss": 0.1274, "step": 571 }, { "epoch": 0.16, "grad_norm": 3.1453020572662354, "learning_rate": 1.9175788067074445e-06, "loss": 0.1334, "step": 572 }, { "epoch": 0.16, "grad_norm": 3.1976587772369385, "learning_rate": 1.9172266298398297e-06, "loss": 0.1243, "step": 573 }, { "epoch": 0.16, "grad_norm": 3.1611573696136475, "learning_rate": 1.9168737346493914e-06, "loss": 0.1177, "step": 574 }, { "epoch": 0.16, "grad_norm": 3.2642691135406494, "learning_rate": 1.9165201214124972e-06, "loss": 0.1462, "step": 575 }, { "epoch": 0.16, "grad_norm": 2.8818516731262207, "learning_rate": 1.9161657904060784e-06, "loss": 0.1242, "step": 576 }, { "epoch": 0.16, "grad_norm": 3.033656358718872, "learning_rate": 1.915810741907628e-06, "loss": 0.1324, "step": 577 }, { "epoch": 0.16, "grad_norm": 3.5656187534332275, "learning_rate": 1.915454976195201e-06, "loss": 0.1505, "step": 578 }, { "epoch": 0.16, "grad_norm": 3.210245132446289, "learning_rate": 1.9150984935474146e-06, "loss": 0.1566, "step": 579 }, { "epoch": 0.16, "grad_norm": 3.1418895721435547, "learning_rate": 1.9147412942434463e-06, "loss": 0.1382, "step": 580 }, { "epoch": 0.16, "grad_norm": 3.0061264038085938, "learning_rate": 1.9143833785630354e-06, "loss": 0.1343, "step": 581 }, { "epoch": 0.16, "grad_norm": 3.0668489933013916, "learning_rate": 1.914024746786483e-06, "loss": 0.1503, "step": 582 }, { "epoch": 0.16, "grad_norm": 3.207681894302368, "learning_rate": 1.91366539919465e-06, "loss": 0.1406, "step": 583 }, { "epoch": 0.16, "grad_norm": 3.234178066253662, "learning_rate": 1.9133053360689576e-06, "loss": 0.1447, "step": 584 }, { "epoch": 0.16, "grad_norm": 3.5382869243621826, "learning_rate": 1.9129445576913886e-06, "loss": 0.1483, "step": 585 }, { "epoch": 0.16, "grad_norm": 3.5630078315734863, "learning_rate": 1.9125830643444854e-06, "loss": 0.1652, "step": 586 }, { "epoch": 0.16, "grad_norm": 3.188985824584961, "learning_rate": 1.91222085631135e-06, "loss": 0.1467, "step": 587 }, { "epoch": 0.16, "grad_norm": 3.0435938835144043, "learning_rate": 1.9118579338756445e-06, "loss": 0.1316, "step": 588 }, { "epoch": 0.16, "grad_norm": 2.9583663940429688, "learning_rate": 1.9114942973215902e-06, "loss": 0.1249, "step": 589 }, { "epoch": 0.16, "grad_norm": 3.3159563541412354, "learning_rate": 1.911129946933968e-06, "loss": 0.152, "step": 590 }, { "epoch": 0.16, "grad_norm": 3.066227674484253, "learning_rate": 1.9107648829981172e-06, "loss": 0.1417, "step": 591 }, { "epoch": 0.16, "grad_norm": 3.287661552429199, "learning_rate": 1.910399105799937e-06, "loss": 0.1417, "step": 592 }, { "epoch": 0.16, "grad_norm": 3.2641029357910156, "learning_rate": 1.910032615625884e-06, "loss": 0.1383, "step": 593 }, { "epoch": 0.16, "grad_norm": 3.0615787506103516, "learning_rate": 1.909665412762974e-06, "loss": 0.1294, "step": 594 }, { "epoch": 0.16, "grad_norm": 2.9307010173797607, "learning_rate": 1.90929749749878e-06, "loss": 0.1256, "step": 595 }, { "epoch": 0.16, "grad_norm": 3.220402479171753, "learning_rate": 1.9089288701214344e-06, "loss": 0.1378, "step": 596 }, { "epoch": 0.16, "grad_norm": 3.3105733394622803, "learning_rate": 1.908559530919626e-06, "loss": 0.1542, "step": 597 }, { "epoch": 0.16, "grad_norm": 3.1800825595855713, "learning_rate": 1.908189480182602e-06, "loss": 0.1324, "step": 598 }, { "epoch": 0.16, "grad_norm": 3.283498764038086, "learning_rate": 1.9078187182001654e-06, "loss": 0.1571, "step": 599 }, { "epoch": 0.16, "grad_norm": 3.2634048461914062, "learning_rate": 1.9074472452626775e-06, "loss": 0.1504, "step": 600 }, { "epoch": 0.16, "grad_norm": 3.1880810260772705, "learning_rate": 1.9070750616610565e-06, "loss": 0.1261, "step": 601 }, { "epoch": 0.16, "grad_norm": 2.873277187347412, "learning_rate": 1.9067021676867765e-06, "loss": 0.1364, "step": 602 }, { "epoch": 0.16, "grad_norm": 3.0402424335479736, "learning_rate": 1.906328563631868e-06, "loss": 0.137, "step": 603 }, { "epoch": 0.16, "grad_norm": 2.894537925720215, "learning_rate": 1.9059542497889176e-06, "loss": 0.1266, "step": 604 }, { "epoch": 0.17, "grad_norm": 3.748373031616211, "learning_rate": 1.905579226451068e-06, "loss": 0.1681, "step": 605 }, { "epoch": 0.17, "grad_norm": 3.571746826171875, "learning_rate": 1.9052034939120174e-06, "loss": 0.1575, "step": 606 }, { "epoch": 0.17, "grad_norm": 2.8718080520629883, "learning_rate": 1.9048270524660196e-06, "loss": 0.1227, "step": 607 }, { "epoch": 0.17, "grad_norm": 3.5282645225524902, "learning_rate": 1.904449902407883e-06, "loss": 0.1603, "step": 608 }, { "epoch": 0.17, "grad_norm": 3.7271251678466797, "learning_rate": 1.9040720440329715e-06, "loss": 0.1463, "step": 609 }, { "epoch": 0.17, "grad_norm": 2.921295642852783, "learning_rate": 1.9036934776372039e-06, "loss": 0.1235, "step": 610 }, { "epoch": 0.17, "grad_norm": 3.2682065963745117, "learning_rate": 1.9033142035170526e-06, "loss": 0.1262, "step": 611 }, { "epoch": 0.17, "grad_norm": 3.3024415969848633, "learning_rate": 1.9029342219695452e-06, "loss": 0.1317, "step": 612 }, { "epoch": 0.17, "grad_norm": 3.1507554054260254, "learning_rate": 1.902553533292263e-06, "loss": 0.1329, "step": 613 }, { "epoch": 0.17, "grad_norm": 3.013246774673462, "learning_rate": 1.9021721377833403e-06, "loss": 0.1265, "step": 614 }, { "epoch": 0.17, "grad_norm": 3.1882128715515137, "learning_rate": 1.9017900357414667e-06, "loss": 0.1462, "step": 615 }, { "epoch": 0.17, "grad_norm": 3.45387864112854, "learning_rate": 1.9014072274658831e-06, "loss": 0.1331, "step": 616 }, { "epoch": 0.17, "grad_norm": 3.2602956295013428, "learning_rate": 1.9010237132563853e-06, "loss": 0.1427, "step": 617 }, { "epoch": 0.17, "grad_norm": 3.3179290294647217, "learning_rate": 1.9006394934133206e-06, "loss": 0.1478, "step": 618 }, { "epoch": 0.17, "grad_norm": 3.0537261962890625, "learning_rate": 1.9002545682375896e-06, "loss": 0.1348, "step": 619 }, { "epoch": 0.17, "grad_norm": 3.1373305320739746, "learning_rate": 1.8998689380306448e-06, "loss": 0.1306, "step": 620 }, { "epoch": 0.17, "grad_norm": 3.3811371326446533, "learning_rate": 1.8994826030944915e-06, "loss": 0.1288, "step": 621 }, { "epoch": 0.17, "grad_norm": 3.559420585632324, "learning_rate": 1.8990955637316862e-06, "loss": 0.1517, "step": 622 }, { "epoch": 0.17, "grad_norm": 3.1817328929901123, "learning_rate": 1.898707820245338e-06, "loss": 0.1287, "step": 623 }, { "epoch": 0.17, "grad_norm": 3.299144983291626, "learning_rate": 1.8983193729391066e-06, "loss": 0.1358, "step": 624 }, { "epoch": 0.17, "grad_norm": 3.3346171379089355, "learning_rate": 1.8979302221172027e-06, "loss": 0.1554, "step": 625 }, { "epoch": 0.17, "grad_norm": 3.2857518196105957, "learning_rate": 1.897540368084389e-06, "loss": 0.152, "step": 626 }, { "epoch": 0.17, "grad_norm": 3.3210465908050537, "learning_rate": 1.8971498111459778e-06, "loss": 0.1434, "step": 627 }, { "epoch": 0.17, "grad_norm": 3.127253770828247, "learning_rate": 1.8967585516078328e-06, "loss": 0.1214, "step": 628 }, { "epoch": 0.17, "grad_norm": 2.917435884475708, "learning_rate": 1.8963665897763677e-06, "loss": 0.1383, "step": 629 }, { "epoch": 0.17, "grad_norm": 2.965036392211914, "learning_rate": 1.8959739259585454e-06, "loss": 0.1205, "step": 630 }, { "epoch": 0.17, "grad_norm": 2.9649155139923096, "learning_rate": 1.8955805604618798e-06, "loss": 0.1287, "step": 631 }, { "epoch": 0.17, "grad_norm": 3.0632846355438232, "learning_rate": 1.8951864935944334e-06, "loss": 0.1295, "step": 632 }, { "epoch": 0.17, "grad_norm": 3.091184616088867, "learning_rate": 1.8947917256648186e-06, "loss": 0.1298, "step": 633 }, { "epoch": 0.17, "grad_norm": 2.8987550735473633, "learning_rate": 1.894396256982196e-06, "loss": 0.1319, "step": 634 }, { "epoch": 0.17, "grad_norm": 2.84979248046875, "learning_rate": 1.8940000878562755e-06, "loss": 0.1337, "step": 635 }, { "epoch": 0.17, "grad_norm": 2.992001533508301, "learning_rate": 1.8936032185973164e-06, "loss": 0.137, "step": 636 }, { "epoch": 0.17, "grad_norm": 2.981226921081543, "learning_rate": 1.8932056495161247e-06, "loss": 0.1407, "step": 637 }, { "epoch": 0.17, "grad_norm": 3.0171751976013184, "learning_rate": 1.8928073809240551e-06, "loss": 0.1271, "step": 638 }, { "epoch": 0.17, "grad_norm": 3.175798177719116, "learning_rate": 1.892408413133011e-06, "loss": 0.1352, "step": 639 }, { "epoch": 0.17, "grad_norm": 2.949618339538574, "learning_rate": 1.8920087464554424e-06, "loss": 0.1271, "step": 640 }, { "epoch": 0.18, "grad_norm": 3.176654577255249, "learning_rate": 1.8916083812043463e-06, "loss": 0.1388, "step": 641 }, { "epoch": 0.18, "grad_norm": 3.2517740726470947, "learning_rate": 1.891207317693268e-06, "loss": 0.137, "step": 642 }, { "epoch": 0.18, "grad_norm": 3.248309850692749, "learning_rate": 1.890805556236299e-06, "loss": 0.1328, "step": 643 }, { "epoch": 0.18, "grad_norm": 3.2052953243255615, "learning_rate": 1.8904030971480767e-06, "loss": 0.1341, "step": 644 }, { "epoch": 0.18, "grad_norm": 3.079794406890869, "learning_rate": 1.8899999407437859e-06, "loss": 0.1322, "step": 645 }, { "epoch": 0.18, "grad_norm": 3.074662685394287, "learning_rate": 1.8895960873391573e-06, "loss": 0.1317, "step": 646 }, { "epoch": 0.18, "grad_norm": 3.106125593185425, "learning_rate": 1.889191537250467e-06, "loss": 0.1501, "step": 647 }, { "epoch": 0.18, "grad_norm": 3.11283802986145, "learning_rate": 1.8887862907945373e-06, "loss": 0.1598, "step": 648 }, { "epoch": 0.18, "grad_norm": 3.104076862335205, "learning_rate": 1.8883803482887352e-06, "loss": 0.131, "step": 649 }, { "epoch": 0.18, "grad_norm": 2.8443734645843506, "learning_rate": 1.8879737100509737e-06, "loss": 0.139, "step": 650 }, { "epoch": 0.18, "grad_norm": 2.937448024749756, "learning_rate": 1.8875663763997095e-06, "loss": 0.1407, "step": 651 }, { "epoch": 0.18, "grad_norm": 2.8431577682495117, "learning_rate": 1.8871583476539445e-06, "loss": 0.1291, "step": 652 }, { "epoch": 0.18, "grad_norm": 3.3876118659973145, "learning_rate": 1.8867496241332255e-06, "loss": 0.1332, "step": 653 }, { "epoch": 0.18, "grad_norm": 3.0150880813598633, "learning_rate": 1.8863402061576428e-06, "loss": 0.1321, "step": 654 }, { "epoch": 0.18, "grad_norm": 3.4667561054229736, "learning_rate": 1.8859300940478302e-06, "loss": 0.1642, "step": 655 }, { "epoch": 0.18, "grad_norm": 3.204092264175415, "learning_rate": 1.885519288124966e-06, "loss": 0.1446, "step": 656 }, { "epoch": 0.18, "grad_norm": 3.096914768218994, "learning_rate": 1.8851077887107714e-06, "loss": 0.1375, "step": 657 }, { "epoch": 0.18, "grad_norm": 2.9773669242858887, "learning_rate": 1.8846955961275103e-06, "loss": 0.1328, "step": 658 }, { "epoch": 0.18, "grad_norm": 3.0036585330963135, "learning_rate": 1.8842827106979904e-06, "loss": 0.154, "step": 659 }, { "epoch": 0.18, "grad_norm": 3.532486915588379, "learning_rate": 1.8838691327455609e-06, "loss": 0.1609, "step": 660 }, { "epoch": 0.18, "grad_norm": 3.165400266647339, "learning_rate": 1.8834548625941146e-06, "loss": 0.1423, "step": 661 }, { "epoch": 0.18, "grad_norm": 3.1645941734313965, "learning_rate": 1.8830399005680854e-06, "loss": 0.1355, "step": 662 }, { "epoch": 0.18, "grad_norm": 3.250861644744873, "learning_rate": 1.8826242469924493e-06, "loss": 0.1651, "step": 663 }, { "epoch": 0.18, "grad_norm": 3.0005064010620117, "learning_rate": 1.8822079021927242e-06, "loss": 0.1383, "step": 664 }, { "epoch": 0.18, "grad_norm": 2.876596689224243, "learning_rate": 1.8817908664949686e-06, "loss": 0.1101, "step": 665 }, { "epoch": 0.18, "grad_norm": 3.1340842247009277, "learning_rate": 1.8813731402257829e-06, "loss": 0.1384, "step": 666 }, { "epoch": 0.18, "grad_norm": 3.11590576171875, "learning_rate": 1.8809547237123077e-06, "loss": 0.1439, "step": 667 }, { "epoch": 0.18, "grad_norm": 2.904118776321411, "learning_rate": 1.8805356172822248e-06, "loss": 0.1284, "step": 668 }, { "epoch": 0.18, "grad_norm": 3.2419793605804443, "learning_rate": 1.880115821263756e-06, "loss": 0.1479, "step": 669 }, { "epoch": 0.18, "grad_norm": 3.2196366786956787, "learning_rate": 1.8796953359856625e-06, "loss": 0.135, "step": 670 }, { "epoch": 0.18, "grad_norm": 3.2696025371551514, "learning_rate": 1.8792741617772462e-06, "loss": 0.1379, "step": 671 }, { "epoch": 0.18, "grad_norm": 2.964501142501831, "learning_rate": 1.8788522989683485e-06, "loss": 0.1304, "step": 672 }, { "epoch": 0.18, "grad_norm": 2.754298686981201, "learning_rate": 1.8784297478893491e-06, "loss": 0.1319, "step": 673 }, { "epoch": 0.18, "grad_norm": 3.0672099590301514, "learning_rate": 1.878006508871168e-06, "loss": 0.1423, "step": 674 }, { "epoch": 0.18, "grad_norm": 3.1538891792297363, "learning_rate": 1.8775825822452634e-06, "loss": 0.1479, "step": 675 }, { "epoch": 0.18, "grad_norm": 3.1253247261047363, "learning_rate": 1.8771579683436313e-06, "loss": 0.1325, "step": 676 }, { "epoch": 0.18, "grad_norm": 3.3955092430114746, "learning_rate": 1.8767326674988069e-06, "loss": 0.1466, "step": 677 }, { "epoch": 0.19, "grad_norm": 2.8525750637054443, "learning_rate": 1.8763066800438634e-06, "loss": 0.127, "step": 678 }, { "epoch": 0.19, "grad_norm": 2.9499616622924805, "learning_rate": 1.8758800063124114e-06, "loss": 0.1326, "step": 679 }, { "epoch": 0.19, "grad_norm": 3.2043933868408203, "learning_rate": 1.8754526466385983e-06, "loss": 0.1335, "step": 680 }, { "epoch": 0.19, "grad_norm": 3.0339996814727783, "learning_rate": 1.8750246013571098e-06, "loss": 0.1313, "step": 681 }, { "epoch": 0.19, "grad_norm": 3.287776231765747, "learning_rate": 1.874595870803168e-06, "loss": 0.1472, "step": 682 }, { "epoch": 0.19, "grad_norm": 2.9506630897521973, "learning_rate": 1.8741664553125316e-06, "loss": 0.136, "step": 683 }, { "epoch": 0.19, "grad_norm": 3.030766010284424, "learning_rate": 1.8737363552214962e-06, "loss": 0.131, "step": 684 }, { "epoch": 0.19, "grad_norm": 3.2087364196777344, "learning_rate": 1.8733055708668925e-06, "loss": 0.1448, "step": 685 }, { "epoch": 0.19, "grad_norm": 3.3978960514068604, "learning_rate": 1.8728741025860887e-06, "loss": 0.1547, "step": 686 }, { "epoch": 0.19, "grad_norm": 2.826280117034912, "learning_rate": 1.872441950716987e-06, "loss": 0.1236, "step": 687 }, { "epoch": 0.19, "grad_norm": 3.1583199501037598, "learning_rate": 1.8720091155980255e-06, "loss": 0.1566, "step": 688 }, { "epoch": 0.19, "grad_norm": 3.0519423484802246, "learning_rate": 1.871575597568178e-06, "loss": 0.1415, "step": 689 }, { "epoch": 0.19, "grad_norm": 3.523808479309082, "learning_rate": 1.8711413969669525e-06, "loss": 0.1594, "step": 690 }, { "epoch": 0.19, "grad_norm": 3.3291847705841064, "learning_rate": 1.8707065141343916e-06, "loss": 0.1476, "step": 691 }, { "epoch": 0.19, "grad_norm": 3.1632869243621826, "learning_rate": 1.870270949411072e-06, "loss": 0.1525, "step": 692 }, { "epoch": 0.19, "grad_norm": 2.94757080078125, "learning_rate": 1.8698347031381052e-06, "loss": 0.134, "step": 693 }, { "epoch": 0.19, "grad_norm": 3.0513572692871094, "learning_rate": 1.8693977756571357e-06, "loss": 0.1513, "step": 694 }, { "epoch": 0.19, "grad_norm": 3.512118101119995, "learning_rate": 1.8689601673103417e-06, "loss": 0.1543, "step": 695 }, { "epoch": 0.19, "grad_norm": 3.3068912029266357, "learning_rate": 1.8685218784404346e-06, "loss": 0.1543, "step": 696 }, { "epoch": 0.19, "grad_norm": 3.037355899810791, "learning_rate": 1.868082909390659e-06, "loss": 0.1358, "step": 697 }, { "epoch": 0.19, "grad_norm": 3.0297398567199707, "learning_rate": 1.8676432605047915e-06, "loss": 0.1392, "step": 698 }, { "epoch": 0.19, "grad_norm": 3.0772452354431152, "learning_rate": 1.8672029321271423e-06, "loss": 0.1386, "step": 699 }, { "epoch": 0.19, "grad_norm": 3.000218152999878, "learning_rate": 1.8667619246025526e-06, "loss": 0.1231, "step": 700 }, { "epoch": 0.19, "grad_norm": 2.958463430404663, "learning_rate": 1.866320238276396e-06, "loss": 0.1304, "step": 701 }, { "epoch": 0.19, "grad_norm": 3.007695198059082, "learning_rate": 1.8658778734945773e-06, "loss": 0.1259, "step": 702 }, { "epoch": 0.19, "grad_norm": 3.208649158477783, "learning_rate": 1.8654348306035335e-06, "loss": 0.1414, "step": 703 }, { "epoch": 0.19, "grad_norm": 3.0507309436798096, "learning_rate": 1.8649911099502314e-06, "loss": 0.1221, "step": 704 }, { "epoch": 0.19, "grad_norm": 3.497598648071289, "learning_rate": 1.8645467118821698e-06, "loss": 0.1366, "step": 705 }, { "epoch": 0.19, "grad_norm": 3.0675430297851562, "learning_rate": 1.8641016367473775e-06, "loss": 0.1393, "step": 706 }, { "epoch": 0.19, "grad_norm": 3.0553345680236816, "learning_rate": 1.8636558848944133e-06, "loss": 0.1463, "step": 707 }, { "epoch": 0.19, "grad_norm": 2.814319610595703, "learning_rate": 1.863209456672366e-06, "loss": 0.1266, "step": 708 }, { "epoch": 0.19, "grad_norm": 2.928010940551758, "learning_rate": 1.862762352430855e-06, "loss": 0.1433, "step": 709 }, { "epoch": 0.19, "grad_norm": 3.2125368118286133, "learning_rate": 1.8623145725200277e-06, "loss": 0.1298, "step": 710 }, { "epoch": 0.19, "grad_norm": 2.9775197505950928, "learning_rate": 1.8618661172905617e-06, "loss": 0.112, "step": 711 }, { "epoch": 0.19, "grad_norm": 3.340319871902466, "learning_rate": 1.8614169870936634e-06, "loss": 0.1322, "step": 712 }, { "epoch": 0.19, "grad_norm": 3.5040061473846436, "learning_rate": 1.860967182281067e-06, "loss": 0.1424, "step": 713 }, { "epoch": 0.2, "grad_norm": 3.348180055618286, "learning_rate": 1.8605167032050357e-06, "loss": 0.1503, "step": 714 }, { "epoch": 0.2, "grad_norm": 2.845379590988159, "learning_rate": 1.8600655502183608e-06, "loss": 0.1203, "step": 715 }, { "epoch": 0.2, "grad_norm": 3.2701401710510254, "learning_rate": 1.8596137236743611e-06, "loss": 0.1561, "step": 716 }, { "epoch": 0.2, "grad_norm": 3.179831027984619, "learning_rate": 1.8591612239268831e-06, "loss": 0.1348, "step": 717 }, { "epoch": 0.2, "grad_norm": 3.3915798664093018, "learning_rate": 1.8587080513303005e-06, "loss": 0.1387, "step": 718 }, { "epoch": 0.2, "grad_norm": 3.1097445487976074, "learning_rate": 1.8582542062395131e-06, "loss": 0.1484, "step": 719 }, { "epoch": 0.2, "grad_norm": 2.8442914485931396, "learning_rate": 1.8577996890099489e-06, "loss": 0.1348, "step": 720 }, { "epoch": 0.2, "grad_norm": 2.8544671535491943, "learning_rate": 1.8573444999975612e-06, "loss": 0.1327, "step": 721 }, { "epoch": 0.2, "grad_norm": 3.004678249359131, "learning_rate": 1.8568886395588295e-06, "loss": 0.1393, "step": 722 }, { "epoch": 0.2, "grad_norm": 2.9679996967315674, "learning_rate": 1.8564321080507596e-06, "loss": 0.1397, "step": 723 }, { "epoch": 0.2, "grad_norm": 3.111448287963867, "learning_rate": 1.8559749058308824e-06, "loss": 0.1578, "step": 724 }, { "epoch": 0.2, "grad_norm": 3.253422260284424, "learning_rate": 1.8555170332572542e-06, "loss": 0.1608, "step": 725 }, { "epoch": 0.2, "grad_norm": 3.186513662338257, "learning_rate": 1.8550584906884565e-06, "loss": 0.1529, "step": 726 }, { "epoch": 0.2, "grad_norm": 3.253415584564209, "learning_rate": 1.8545992784835952e-06, "loss": 0.1379, "step": 727 }, { "epoch": 0.2, "grad_norm": 3.2245519161224365, "learning_rate": 1.8541393970023004e-06, "loss": 0.137, "step": 728 }, { "epoch": 0.2, "grad_norm": 3.0035579204559326, "learning_rate": 1.8536788466047272e-06, "loss": 0.1171, "step": 729 }, { "epoch": 0.2, "grad_norm": 2.9544918537139893, "learning_rate": 1.8532176276515534e-06, "loss": 0.13, "step": 730 }, { "epoch": 0.2, "grad_norm": 3.0871593952178955, "learning_rate": 1.8527557405039817e-06, "loss": 0.1345, "step": 731 }, { "epoch": 0.2, "grad_norm": 3.2445547580718994, "learning_rate": 1.852293185523737e-06, "loss": 0.1407, "step": 732 }, { "epoch": 0.2, "grad_norm": 3.1610848903656006, "learning_rate": 1.8518299630730678e-06, "loss": 0.1449, "step": 733 }, { "epoch": 0.2, "grad_norm": 3.135960578918457, "learning_rate": 1.851366073514745e-06, "loss": 0.1454, "step": 734 }, { "epoch": 0.2, "grad_norm": 3.1653096675872803, "learning_rate": 1.850901517212062e-06, "loss": 0.1353, "step": 735 }, { "epoch": 0.2, "grad_norm": 3.5717997550964355, "learning_rate": 1.8504362945288347e-06, "loss": 0.1383, "step": 736 }, { "epoch": 0.2, "grad_norm": 2.9212334156036377, "learning_rate": 1.8499704058294007e-06, "loss": 0.1348, "step": 737 }, { "epoch": 0.2, "grad_norm": 2.8877100944519043, "learning_rate": 1.8495038514786184e-06, "loss": 0.1258, "step": 738 }, { "epoch": 0.2, "grad_norm": 3.737501621246338, "learning_rate": 1.8490366318418692e-06, "loss": 0.1574, "step": 739 }, { "epoch": 0.2, "grad_norm": 2.9736099243164062, "learning_rate": 1.8485687472850537e-06, "loss": 0.1316, "step": 740 }, { "epoch": 0.2, "grad_norm": 3.177656650543213, "learning_rate": 1.8481001981745945e-06, "loss": 0.1243, "step": 741 }, { "epoch": 0.2, "grad_norm": 2.998987913131714, "learning_rate": 1.8476309848774343e-06, "loss": 0.1302, "step": 742 }, { "epoch": 0.2, "grad_norm": 3.0486202239990234, "learning_rate": 1.8471611077610353e-06, "loss": 0.1395, "step": 743 }, { "epoch": 0.2, "grad_norm": 2.892500638961792, "learning_rate": 1.8466905671933806e-06, "loss": 0.1232, "step": 744 }, { "epoch": 0.2, "grad_norm": 3.2417190074920654, "learning_rate": 1.846219363542972e-06, "loss": 0.1426, "step": 745 }, { "epoch": 0.2, "grad_norm": 2.9307663440704346, "learning_rate": 1.8457474971788315e-06, "loss": 0.1439, "step": 746 }, { "epoch": 0.2, "grad_norm": 3.0509347915649414, "learning_rate": 1.8452749684704992e-06, "loss": 0.1312, "step": 747 }, { "epoch": 0.2, "grad_norm": 2.960585355758667, "learning_rate": 1.8448017777880347e-06, "loss": 0.1315, "step": 748 }, { "epoch": 0.2, "grad_norm": 3.291780471801758, "learning_rate": 1.844327925502015e-06, "loss": 0.1479, "step": 749 }, { "epoch": 0.2, "grad_norm": 2.964261054992676, "learning_rate": 1.8438534119835362e-06, "loss": 0.1281, "step": 750 }, { "epoch": 0.21, "grad_norm": 3.006636619567871, "learning_rate": 1.8433782376042123e-06, "loss": 0.1418, "step": 751 }, { "epoch": 0.21, "grad_norm": 2.929056167602539, "learning_rate": 1.8429024027361737e-06, "loss": 0.1345, "step": 752 }, { "epoch": 0.21, "grad_norm": 3.0413315296173096, "learning_rate": 1.8424259077520693e-06, "loss": 0.1422, "step": 753 }, { "epoch": 0.21, "grad_norm": 3.3456640243530273, "learning_rate": 1.8419487530250644e-06, "loss": 0.1559, "step": 754 }, { "epoch": 0.21, "grad_norm": 3.007039785385132, "learning_rate": 1.841470938928841e-06, "loss": 0.1417, "step": 755 }, { "epoch": 0.21, "grad_norm": 3.312983989715576, "learning_rate": 1.8409924658375973e-06, "loss": 0.1475, "step": 756 }, { "epoch": 0.21, "grad_norm": 3.082507848739624, "learning_rate": 1.8405133341260483e-06, "loss": 0.1463, "step": 757 }, { "epoch": 0.21, "grad_norm": 3.0856258869171143, "learning_rate": 1.840033544169424e-06, "loss": 0.1322, "step": 758 }, { "epoch": 0.21, "grad_norm": 2.7325146198272705, "learning_rate": 1.8395530963434704e-06, "loss": 0.1307, "step": 759 }, { "epoch": 0.21, "grad_norm": 3.127000570297241, "learning_rate": 1.8390719910244486e-06, "loss": 0.1431, "step": 760 }, { "epoch": 0.21, "grad_norm": 3.2272660732269287, "learning_rate": 1.838590228589134e-06, "loss": 0.1284, "step": 761 }, { "epoch": 0.21, "grad_norm": 3.585092306137085, "learning_rate": 1.8381078094148182e-06, "loss": 0.1347, "step": 762 }, { "epoch": 0.21, "grad_norm": 2.858229160308838, "learning_rate": 1.837624733879305e-06, "loss": 0.1305, "step": 763 }, { "epoch": 0.21, "grad_norm": 2.713216781616211, "learning_rate": 1.8371410023609138e-06, "loss": 0.1189, "step": 764 }, { "epoch": 0.21, "grad_norm": 2.635714054107666, "learning_rate": 1.836656615238477e-06, "loss": 0.1229, "step": 765 }, { "epoch": 0.21, "grad_norm": 3.3685312271118164, "learning_rate": 1.8361715728913411e-06, "loss": 0.1653, "step": 766 }, { "epoch": 0.21, "grad_norm": 3.0395724773406982, "learning_rate": 1.8356858756993652e-06, "loss": 0.1332, "step": 767 }, { "epoch": 0.21, "grad_norm": 3.4399709701538086, "learning_rate": 1.8351995240429213e-06, "loss": 0.144, "step": 768 }, { "epoch": 0.21, "grad_norm": 3.529384136199951, "learning_rate": 1.8347125183028938e-06, "loss": 0.1436, "step": 769 }, { "epoch": 0.21, "grad_norm": 2.861670970916748, "learning_rate": 1.8342248588606796e-06, "loss": 0.1263, "step": 770 }, { "epoch": 0.21, "grad_norm": 3.187953472137451, "learning_rate": 1.833736546098188e-06, "loss": 0.1372, "step": 771 }, { "epoch": 0.21, "grad_norm": 2.8213915824890137, "learning_rate": 1.8332475803978388e-06, "loss": 0.1343, "step": 772 }, { "epoch": 0.21, "grad_norm": 3.0806078910827637, "learning_rate": 1.8327579621425637e-06, "loss": 0.1489, "step": 773 }, { "epoch": 0.21, "grad_norm": 2.9299821853637695, "learning_rate": 1.8322676917158062e-06, "loss": 0.1397, "step": 774 }, { "epoch": 0.21, "grad_norm": 2.94779109954834, "learning_rate": 1.8317767695015194e-06, "loss": 0.14, "step": 775 }, { "epoch": 0.21, "grad_norm": 3.0014383792877197, "learning_rate": 1.8312851958841672e-06, "loss": 0.1343, "step": 776 }, { "epoch": 0.21, "grad_norm": 3.195161819458008, "learning_rate": 1.830792971248724e-06, "loss": 0.1501, "step": 777 }, { "epoch": 0.21, "grad_norm": 3.0580642223358154, "learning_rate": 1.8303000959806739e-06, "loss": 0.1343, "step": 778 }, { "epoch": 0.21, "grad_norm": 3.2072629928588867, "learning_rate": 1.8298065704660102e-06, "loss": 0.1449, "step": 779 }, { "epoch": 0.21, "grad_norm": 3.1455795764923096, "learning_rate": 1.829312395091236e-06, "loss": 0.1367, "step": 780 }, { "epoch": 0.21, "grad_norm": 3.1127781867980957, "learning_rate": 1.8288175702433623e-06, "loss": 0.1296, "step": 781 }, { "epoch": 0.21, "grad_norm": 2.8094706535339355, "learning_rate": 1.8283220963099101e-06, "loss": 0.1293, "step": 782 }, { "epoch": 0.21, "grad_norm": 3.0397279262542725, "learning_rate": 1.8278259736789083e-06, "loss": 0.1294, "step": 783 }, { "epoch": 0.21, "grad_norm": 2.868248701095581, "learning_rate": 1.827329202738893e-06, "loss": 0.1356, "step": 784 }, { "epoch": 0.21, "grad_norm": 3.0783395767211914, "learning_rate": 1.8268317838789087e-06, "loss": 0.1595, "step": 785 }, { "epoch": 0.21, "grad_norm": 2.824054002761841, "learning_rate": 1.8263337174885074e-06, "loss": 0.1178, "step": 786 }, { "epoch": 0.21, "grad_norm": 2.7530617713928223, "learning_rate": 1.8258350039577482e-06, "loss": 0.1348, "step": 787 }, { "epoch": 0.22, "grad_norm": 3.053938388824463, "learning_rate": 1.8253356436771962e-06, "loss": 0.125, "step": 788 }, { "epoch": 0.22, "grad_norm": 2.9758999347686768, "learning_rate": 1.8248356370379247e-06, "loss": 0.1452, "step": 789 }, { "epoch": 0.22, "grad_norm": 3.1671602725982666, "learning_rate": 1.8243349844315115e-06, "loss": 0.1436, "step": 790 }, { "epoch": 0.22, "grad_norm": 3.3275105953216553, "learning_rate": 1.8238336862500408e-06, "loss": 0.1345, "step": 791 }, { "epoch": 0.22, "grad_norm": 3.104665517807007, "learning_rate": 1.823331742886103e-06, "loss": 0.1352, "step": 792 }, { "epoch": 0.22, "grad_norm": 3.321075916290283, "learning_rate": 1.8228291547327928e-06, "loss": 0.1661, "step": 793 }, { "epoch": 0.22, "grad_norm": 3.2859387397766113, "learning_rate": 1.8223259221837106e-06, "loss": 0.1478, "step": 794 }, { "epoch": 0.22, "grad_norm": 3.207505464553833, "learning_rate": 1.8218220456329614e-06, "loss": 0.1415, "step": 795 }, { "epoch": 0.22, "grad_norm": 3.0411899089813232, "learning_rate": 1.821317525475154e-06, "loss": 0.1327, "step": 796 }, { "epoch": 0.22, "grad_norm": 2.902604103088379, "learning_rate": 1.8208123621054016e-06, "loss": 0.1452, "step": 797 }, { "epoch": 0.22, "grad_norm": 2.8644320964813232, "learning_rate": 1.8203065559193212e-06, "loss": 0.1413, "step": 798 }, { "epoch": 0.22, "grad_norm": 3.0004799365997314, "learning_rate": 1.8198001073130333e-06, "loss": 0.136, "step": 799 }, { "epoch": 0.22, "grad_norm": 3.020627737045288, "learning_rate": 1.8192930166831615e-06, "loss": 0.145, "step": 800 }, { "epoch": 0.22, "grad_norm": 2.8599250316619873, "learning_rate": 1.8187852844268318e-06, "loss": 0.1313, "step": 801 }, { "epoch": 0.22, "grad_norm": 2.9492738246917725, "learning_rate": 1.8182769109416727e-06, "loss": 0.1326, "step": 802 }, { "epoch": 0.22, "grad_norm": 2.7574639320373535, "learning_rate": 1.8177678966258155e-06, "loss": 0.1369, "step": 803 }, { "epoch": 0.22, "grad_norm": 2.846613645553589, "learning_rate": 1.817258241877893e-06, "loss": 0.1317, "step": 804 }, { "epoch": 0.22, "grad_norm": 3.2272794246673584, "learning_rate": 1.8167479470970391e-06, "loss": 0.1472, "step": 805 }, { "epoch": 0.22, "grad_norm": 2.8653202056884766, "learning_rate": 1.81623701268289e-06, "loss": 0.1297, "step": 806 }, { "epoch": 0.22, "grad_norm": 3.3714654445648193, "learning_rate": 1.8157254390355812e-06, "loss": 0.1624, "step": 807 }, { "epoch": 0.22, "grad_norm": 3.1940484046936035, "learning_rate": 1.815213226555751e-06, "loss": 0.1449, "step": 808 }, { "epoch": 0.22, "grad_norm": 3.4637551307678223, "learning_rate": 1.8147003756445361e-06, "loss": 0.1484, "step": 809 }, { "epoch": 0.22, "grad_norm": 3.2469146251678467, "learning_rate": 1.8141868867035744e-06, "loss": 0.1406, "step": 810 }, { "epoch": 0.22, "grad_norm": 3.205935478210449, "learning_rate": 1.813672760135002e-06, "loss": 0.149, "step": 811 }, { "epoch": 0.22, "grad_norm": 3.0534427165985107, "learning_rate": 1.8131579963414563e-06, "loss": 0.1341, "step": 812 }, { "epoch": 0.22, "grad_norm": 3.206599473953247, "learning_rate": 1.8126425957260722e-06, "loss": 0.1311, "step": 813 }, { "epoch": 0.22, "grad_norm": 2.7452621459960938, "learning_rate": 1.8121265586924846e-06, "loss": 0.1222, "step": 814 }, { "epoch": 0.22, "grad_norm": 2.8834660053253174, "learning_rate": 1.8116098856448251e-06, "loss": 0.1356, "step": 815 }, { "epoch": 0.22, "grad_norm": 2.8721609115600586, "learning_rate": 1.8110925769877252e-06, "loss": 0.1254, "step": 816 }, { "epoch": 0.22, "grad_norm": 3.101356267929077, "learning_rate": 1.810574633126313e-06, "loss": 0.1375, "step": 817 }, { "epoch": 0.22, "grad_norm": 3.010807514190674, "learning_rate": 1.8100560544662144e-06, "loss": 0.1252, "step": 818 }, { "epoch": 0.22, "grad_norm": 2.8136961460113525, "learning_rate": 1.8095368414135525e-06, "loss": 0.1231, "step": 819 }, { "epoch": 0.22, "grad_norm": 3.085277557373047, "learning_rate": 1.8090169943749474e-06, "loss": 0.1451, "step": 820 }, { "epoch": 0.22, "grad_norm": 3.15325927734375, "learning_rate": 1.808496513757515e-06, "loss": 0.1428, "step": 821 }, { "epoch": 0.22, "grad_norm": 3.2210240364074707, "learning_rate": 1.8079753999688686e-06, "loss": 0.1531, "step": 822 }, { "epoch": 0.22, "grad_norm": 3.0681214332580566, "learning_rate": 1.8074536534171158e-06, "loss": 0.1286, "step": 823 }, { "epoch": 0.23, "grad_norm": 2.953857898712158, "learning_rate": 1.8069312745108614e-06, "loss": 0.129, "step": 824 }, { "epoch": 0.23, "grad_norm": 3.2039496898651123, "learning_rate": 1.806408263659204e-06, "loss": 0.1484, "step": 825 }, { "epoch": 0.23, "grad_norm": 3.072256326675415, "learning_rate": 1.8058846212717379e-06, "loss": 0.1209, "step": 826 }, { "epoch": 0.23, "grad_norm": 2.9288153648376465, "learning_rate": 1.805360347758552e-06, "loss": 0.1425, "step": 827 }, { "epoch": 0.23, "grad_norm": 3.1752777099609375, "learning_rate": 1.8048354435302289e-06, "loss": 0.138, "step": 828 }, { "epoch": 0.23, "grad_norm": 2.991837739944458, "learning_rate": 1.8043099089978457e-06, "loss": 0.1459, "step": 829 }, { "epoch": 0.23, "grad_norm": 2.880028009414673, "learning_rate": 1.8037837445729732e-06, "loss": 0.1226, "step": 830 }, { "epoch": 0.23, "grad_norm": 3.154533863067627, "learning_rate": 1.8032569506676748e-06, "loss": 0.1419, "step": 831 }, { "epoch": 0.23, "grad_norm": 3.134220600128174, "learning_rate": 1.8027295276945075e-06, "loss": 0.1417, "step": 832 }, { "epoch": 0.23, "grad_norm": 2.910787582397461, "learning_rate": 1.802201476066521e-06, "loss": 0.1455, "step": 833 }, { "epoch": 0.23, "grad_norm": 2.9492084980010986, "learning_rate": 1.8016727961972564e-06, "loss": 0.1274, "step": 834 }, { "epoch": 0.23, "grad_norm": 2.8318257331848145, "learning_rate": 1.8011434885007479e-06, "loss": 0.1205, "step": 835 }, { "epoch": 0.23, "grad_norm": 3.070709705352783, "learning_rate": 1.8006135533915212e-06, "loss": 0.1402, "step": 836 }, { "epoch": 0.23, "grad_norm": 3.4233484268188477, "learning_rate": 1.8000829912845929e-06, "loss": 0.1334, "step": 837 }, { "epoch": 0.23, "grad_norm": 3.1346375942230225, "learning_rate": 1.7995518025954707e-06, "loss": 0.1343, "step": 838 }, { "epoch": 0.23, "grad_norm": 3.0293099880218506, "learning_rate": 1.7990199877401535e-06, "loss": 0.141, "step": 839 }, { "epoch": 0.23, "grad_norm": 3.2051947116851807, "learning_rate": 1.79848754713513e-06, "loss": 0.1401, "step": 840 }, { "epoch": 0.23, "grad_norm": 3.3078722953796387, "learning_rate": 1.7979544811973791e-06, "loss": 0.168, "step": 841 }, { "epoch": 0.23, "grad_norm": 3.4778082370758057, "learning_rate": 1.7974207903443699e-06, "loss": 0.164, "step": 842 }, { "epoch": 0.23, "grad_norm": 3.250641345977783, "learning_rate": 1.7968864749940603e-06, "loss": 0.1409, "step": 843 }, { "epoch": 0.23, "grad_norm": 3.047170639038086, "learning_rate": 1.7963515355648972e-06, "loss": 0.1436, "step": 844 }, { "epoch": 0.23, "grad_norm": 2.890998363494873, "learning_rate": 1.795815972475817e-06, "loss": 0.121, "step": 845 }, { "epoch": 0.23, "grad_norm": 3.205892562866211, "learning_rate": 1.7952797861462442e-06, "loss": 0.1467, "step": 846 }, { "epoch": 0.23, "grad_norm": 2.950531482696533, "learning_rate": 1.7947429769960904e-06, "loss": 0.1389, "step": 847 }, { "epoch": 0.23, "grad_norm": 3.001091241836548, "learning_rate": 1.7942055454457568e-06, "loss": 0.143, "step": 848 }, { "epoch": 0.23, "grad_norm": 3.553637742996216, "learning_rate": 1.7936674919161305e-06, "loss": 0.1711, "step": 849 }, { "epoch": 0.23, "grad_norm": 3.0406932830810547, "learning_rate": 1.793128816828586e-06, "loss": 0.1519, "step": 850 }, { "epoch": 0.23, "grad_norm": 2.908801555633545, "learning_rate": 1.7925895206049858e-06, "loss": 0.1184, "step": 851 }, { "epoch": 0.23, "grad_norm": 3.0099973678588867, "learning_rate": 1.7920496036676765e-06, "loss": 0.1418, "step": 852 }, { "epoch": 0.23, "grad_norm": 3.1775577068328857, "learning_rate": 1.791509066439493e-06, "loss": 0.1461, "step": 853 }, { "epoch": 0.23, "grad_norm": 3.443354606628418, "learning_rate": 1.790967909343755e-06, "loss": 0.1538, "step": 854 }, { "epoch": 0.23, "grad_norm": 3.434736728668213, "learning_rate": 1.790426132804268e-06, "loss": 0.1405, "step": 855 }, { "epoch": 0.23, "grad_norm": 3.2804572582244873, "learning_rate": 1.7898837372453221e-06, "loss": 0.148, "step": 856 }, { "epoch": 0.23, "grad_norm": 3.0672659873962402, "learning_rate": 1.7893407230916924e-06, "loss": 0.1477, "step": 857 }, { "epoch": 0.23, "grad_norm": 3.0499002933502197, "learning_rate": 1.788797090768639e-06, "loss": 0.1387, "step": 858 }, { "epoch": 0.23, "grad_norm": 3.054581642150879, "learning_rate": 1.7882528407019048e-06, "loss": 0.1431, "step": 859 }, { "epoch": 0.23, "grad_norm": 3.286684513092041, "learning_rate": 1.7877079733177183e-06, "loss": 0.1417, "step": 860 }, { "epoch": 0.24, "grad_norm": 3.0893189907073975, "learning_rate": 1.7871624890427896e-06, "loss": 0.135, "step": 861 }, { "epoch": 0.24, "grad_norm": 3.071838855743408, "learning_rate": 1.7866163883043139e-06, "loss": 0.1455, "step": 862 }, { "epoch": 0.24, "grad_norm": 3.244340658187866, "learning_rate": 1.786069671529967e-06, "loss": 0.1417, "step": 863 }, { "epoch": 0.24, "grad_norm": 3.050936698913574, "learning_rate": 1.7855223391479086e-06, "loss": 0.1429, "step": 864 }, { "epoch": 0.24, "grad_norm": 2.821762800216675, "learning_rate": 1.7849743915867806e-06, "loss": 0.1278, "step": 865 }, { "epoch": 0.24, "grad_norm": 2.879225969314575, "learning_rate": 1.7844258292757054e-06, "loss": 0.1322, "step": 866 }, { "epoch": 0.24, "grad_norm": 2.966362714767456, "learning_rate": 1.7838766526442886e-06, "loss": 0.144, "step": 867 }, { "epoch": 0.24, "grad_norm": 2.860746145248413, "learning_rate": 1.7833268621226148e-06, "loss": 0.1338, "step": 868 }, { "epoch": 0.24, "grad_norm": 3.343733072280884, "learning_rate": 1.7827764581412515e-06, "loss": 0.1579, "step": 869 }, { "epoch": 0.24, "grad_norm": 2.8615481853485107, "learning_rate": 1.7822254411312451e-06, "loss": 0.1268, "step": 870 }, { "epoch": 0.24, "grad_norm": 2.838470697402954, "learning_rate": 1.781673811524123e-06, "loss": 0.134, "step": 871 }, { "epoch": 0.24, "grad_norm": 2.8155670166015625, "learning_rate": 1.781121569751892e-06, "loss": 0.1247, "step": 872 }, { "epoch": 0.24, "grad_norm": 3.1020331382751465, "learning_rate": 1.7805687162470378e-06, "loss": 0.1358, "step": 873 }, { "epoch": 0.24, "grad_norm": 2.99312424659729, "learning_rate": 1.7800152514425265e-06, "loss": 0.1452, "step": 874 }, { "epoch": 0.24, "grad_norm": 3.434626340866089, "learning_rate": 1.7794611757718011e-06, "loss": 0.1574, "step": 875 }, { "epoch": 0.24, "grad_norm": 2.9138333797454834, "learning_rate": 1.7789064896687848e-06, "loss": 0.1414, "step": 876 }, { "epoch": 0.24, "grad_norm": 2.970022439956665, "learning_rate": 1.7783511935678779e-06, "loss": 0.1371, "step": 877 }, { "epoch": 0.24, "grad_norm": 2.739241361618042, "learning_rate": 1.7777952879039585e-06, "loss": 0.1295, "step": 878 }, { "epoch": 0.24, "grad_norm": 2.763500690460205, "learning_rate": 1.7772387731123825e-06, "loss": 0.1163, "step": 879 }, { "epoch": 0.24, "grad_norm": 2.9955568313598633, "learning_rate": 1.776681649628982e-06, "loss": 0.1274, "step": 880 }, { "epoch": 0.24, "grad_norm": 3.2668027877807617, "learning_rate": 1.7761239178900667e-06, "loss": 0.1637, "step": 881 }, { "epoch": 0.24, "grad_norm": 3.040350914001465, "learning_rate": 1.775565578332422e-06, "loss": 0.1295, "step": 882 }, { "epoch": 0.24, "grad_norm": 2.8555662631988525, "learning_rate": 1.7750066313933096e-06, "loss": 0.129, "step": 883 }, { "epoch": 0.24, "grad_norm": 3.162750720977783, "learning_rate": 1.774447077510467e-06, "loss": 0.1485, "step": 884 }, { "epoch": 0.24, "grad_norm": 3.2075698375701904, "learning_rate": 1.7738869171221067e-06, "loss": 0.1428, "step": 885 }, { "epoch": 0.24, "grad_norm": 2.953458309173584, "learning_rate": 1.7733261506669165e-06, "loss": 0.129, "step": 886 }, { "epoch": 0.24, "grad_norm": 3.3823306560516357, "learning_rate": 1.7727647785840588e-06, "loss": 0.1798, "step": 887 }, { "epoch": 0.24, "grad_norm": 3.3550498485565186, "learning_rate": 1.7722028013131695e-06, "loss": 0.1642, "step": 888 }, { "epoch": 0.24, "grad_norm": 3.0226235389709473, "learning_rate": 1.77164021929436e-06, "loss": 0.1301, "step": 889 }, { "epoch": 0.24, "grad_norm": 3.0606689453125, "learning_rate": 1.7710770329682143e-06, "loss": 0.1472, "step": 890 }, { "epoch": 0.24, "grad_norm": 2.988096237182617, "learning_rate": 1.7705132427757892e-06, "loss": 0.1399, "step": 891 }, { "epoch": 0.24, "grad_norm": 3.045409679412842, "learning_rate": 1.7699488491586154e-06, "loss": 0.1208, "step": 892 }, { "epoch": 0.24, "grad_norm": 2.9872851371765137, "learning_rate": 1.769383852558696e-06, "loss": 0.1435, "step": 893 }, { "epoch": 0.24, "grad_norm": 3.2067313194274902, "learning_rate": 1.7688182534185056e-06, "loss": 0.1401, "step": 894 }, { "epoch": 0.24, "grad_norm": 3.144598960876465, "learning_rate": 1.7682520521809917e-06, "loss": 0.1409, "step": 895 }, { "epoch": 0.24, "grad_norm": 3.270148754119873, "learning_rate": 1.7676852492895724e-06, "loss": 0.1564, "step": 896 }, { "epoch": 0.25, "grad_norm": 3.129302978515625, "learning_rate": 1.7671178451881375e-06, "loss": 0.1334, "step": 897 }, { "epoch": 0.25, "grad_norm": 2.916828155517578, "learning_rate": 1.7665498403210476e-06, "loss": 0.1362, "step": 898 }, { "epoch": 0.25, "grad_norm": 2.9184865951538086, "learning_rate": 1.7659812351331342e-06, "loss": 0.1359, "step": 899 }, { "epoch": 0.25, "grad_norm": 3.1969926357269287, "learning_rate": 1.7654120300696978e-06, "loss": 0.1496, "step": 900 }, { "epoch": 0.25, "grad_norm": 3.058776378631592, "learning_rate": 1.7648422255765095e-06, "loss": 0.1416, "step": 901 }, { "epoch": 0.25, "grad_norm": 3.2968432903289795, "learning_rate": 1.7642718220998093e-06, "loss": 0.1299, "step": 902 }, { "epoch": 0.25, "grad_norm": 3.108567953109741, "learning_rate": 1.7637008200863077e-06, "loss": 0.1533, "step": 903 }, { "epoch": 0.25, "grad_norm": 2.989795207977295, "learning_rate": 1.7631292199831824e-06, "loss": 0.1295, "step": 904 }, { "epoch": 0.25, "grad_norm": 3.2122561931610107, "learning_rate": 1.7625570222380796e-06, "loss": 0.1367, "step": 905 }, { "epoch": 0.25, "grad_norm": 3.3966312408447266, "learning_rate": 1.7619842272991145e-06, "loss": 0.1526, "step": 906 }, { "epoch": 0.25, "grad_norm": 3.062476634979248, "learning_rate": 1.7614108356148693e-06, "loss": 0.1203, "step": 907 }, { "epoch": 0.25, "grad_norm": 3.133892774581909, "learning_rate": 1.760836847634394e-06, "loss": 0.1424, "step": 908 }, { "epoch": 0.25, "grad_norm": 3.282561779022217, "learning_rate": 1.7602622638072047e-06, "loss": 0.1392, "step": 909 }, { "epoch": 0.25, "grad_norm": 3.0799286365509033, "learning_rate": 1.7596870845832847e-06, "loss": 0.1433, "step": 910 }, { "epoch": 0.25, "grad_norm": 3.043998956680298, "learning_rate": 1.7591113104130844e-06, "loss": 0.1511, "step": 911 }, { "epoch": 0.25, "grad_norm": 2.924272060394287, "learning_rate": 1.7585349417475184e-06, "loss": 0.1295, "step": 912 }, { "epoch": 0.25, "grad_norm": 3.174017906188965, "learning_rate": 1.7579579790379683e-06, "loss": 0.143, "step": 913 }, { "epoch": 0.25, "grad_norm": 3.3196375370025635, "learning_rate": 1.7573804227362805e-06, "loss": 0.1654, "step": 914 }, { "epoch": 0.25, "grad_norm": 3.114105224609375, "learning_rate": 1.756802273294766e-06, "loss": 0.1305, "step": 915 }, { "epoch": 0.25, "grad_norm": 2.9059255123138428, "learning_rate": 1.7562235311662e-06, "loss": 0.134, "step": 916 }, { "epoch": 0.25, "grad_norm": 3.0459864139556885, "learning_rate": 1.7556441968038237e-06, "loss": 0.1294, "step": 917 }, { "epoch": 0.25, "grad_norm": 2.786449670791626, "learning_rate": 1.7550642706613395e-06, "loss": 0.1302, "step": 918 }, { "epoch": 0.25, "grad_norm": 3.0151493549346924, "learning_rate": 1.754483753192915e-06, "loss": 0.1356, "step": 919 }, { "epoch": 0.25, "grad_norm": 2.8167083263397217, "learning_rate": 1.7539026448531806e-06, "loss": 0.1304, "step": 920 }, { "epoch": 0.25, "grad_norm": 3.0963945388793945, "learning_rate": 1.7533209460972292e-06, "loss": 0.1348, "step": 921 }, { "epoch": 0.25, "grad_norm": 3.0987884998321533, "learning_rate": 1.752738657380616e-06, "loss": 0.1527, "step": 922 }, { "epoch": 0.25, "grad_norm": 2.9413533210754395, "learning_rate": 1.7521557791593582e-06, "loss": 0.1344, "step": 923 }, { "epoch": 0.25, "grad_norm": 3.198122501373291, "learning_rate": 1.751572311889935e-06, "loss": 0.1427, "step": 924 }, { "epoch": 0.25, "grad_norm": 2.9854321479797363, "learning_rate": 1.750988256029287e-06, "loss": 0.143, "step": 925 }, { "epoch": 0.25, "grad_norm": 3.3399744033813477, "learning_rate": 1.7504036120348154e-06, "loss": 0.1478, "step": 926 }, { "epoch": 0.25, "grad_norm": 3.10494327545166, "learning_rate": 1.7498183803643819e-06, "loss": 0.1167, "step": 927 }, { "epoch": 0.25, "grad_norm": 2.8649749755859375, "learning_rate": 1.7492325614763086e-06, "loss": 0.1218, "step": 928 }, { "epoch": 0.25, "grad_norm": 3.151996374130249, "learning_rate": 1.7486461558293777e-06, "loss": 0.1409, "step": 929 }, { "epoch": 0.25, "grad_norm": 2.9325687885284424, "learning_rate": 1.7480591638828307e-06, "loss": 0.1317, "step": 930 }, { "epoch": 0.25, "grad_norm": 2.6797404289245605, "learning_rate": 1.7474715860963683e-06, "loss": 0.1371, "step": 931 }, { "epoch": 0.25, "grad_norm": 3.1968817710876465, "learning_rate": 1.74688342293015e-06, "loss": 0.1521, "step": 932 }, { "epoch": 0.25, "grad_norm": 2.7755022048950195, "learning_rate": 1.7462946748447935e-06, "loss": 0.1307, "step": 933 }, { "epoch": 0.26, "grad_norm": 2.925846815109253, "learning_rate": 1.7457053423013751e-06, "loss": 0.1253, "step": 934 }, { "epoch": 0.26, "grad_norm": 2.949812173843384, "learning_rate": 1.7451154257614284e-06, "loss": 0.1332, "step": 935 }, { "epoch": 0.26, "grad_norm": 3.158405065536499, "learning_rate": 1.7445249256869444e-06, "loss": 0.1421, "step": 936 }, { "epoch": 0.26, "grad_norm": 2.9394330978393555, "learning_rate": 1.7439338425403713e-06, "loss": 0.1313, "step": 937 }, { "epoch": 0.26, "grad_norm": 2.8409595489501953, "learning_rate": 1.7433421767846136e-06, "loss": 0.1312, "step": 938 }, { "epoch": 0.26, "grad_norm": 2.929218292236328, "learning_rate": 1.7427499288830326e-06, "loss": 0.138, "step": 939 }, { "epoch": 0.26, "grad_norm": 3.0145485401153564, "learning_rate": 1.7421570992994447e-06, "loss": 0.1491, "step": 940 }, { "epoch": 0.26, "grad_norm": 2.813136339187622, "learning_rate": 1.741563688498123e-06, "loss": 0.1303, "step": 941 }, { "epoch": 0.26, "grad_norm": 3.102907419204712, "learning_rate": 1.7409696969437943e-06, "loss": 0.134, "step": 942 }, { "epoch": 0.26, "grad_norm": 2.875605344772339, "learning_rate": 1.7403751251016416e-06, "loss": 0.1387, "step": 943 }, { "epoch": 0.26, "grad_norm": 2.903993844985962, "learning_rate": 1.7397799734373012e-06, "loss": 0.1309, "step": 944 }, { "epoch": 0.26, "grad_norm": 3.1668875217437744, "learning_rate": 1.7391842424168647e-06, "loss": 0.1359, "step": 945 }, { "epoch": 0.26, "grad_norm": 3.0324251651763916, "learning_rate": 1.7385879325068764e-06, "loss": 0.149, "step": 946 }, { "epoch": 0.26, "grad_norm": 3.013434410095215, "learning_rate": 1.7379910441743345e-06, "loss": 0.1489, "step": 947 }, { "epoch": 0.26, "grad_norm": 3.1340384483337402, "learning_rate": 1.7373935778866895e-06, "loss": 0.1504, "step": 948 }, { "epoch": 0.26, "grad_norm": 3.3014206886291504, "learning_rate": 1.7367955341118456e-06, "loss": 0.1362, "step": 949 }, { "epoch": 0.26, "grad_norm": 2.800163507461548, "learning_rate": 1.7361969133181584e-06, "loss": 0.1218, "step": 950 }, { "epoch": 0.26, "grad_norm": 3.1261839866638184, "learning_rate": 1.7355977159744358e-06, "loss": 0.145, "step": 951 }, { "epoch": 0.26, "grad_norm": 2.8605103492736816, "learning_rate": 1.734997942549937e-06, "loss": 0.1259, "step": 952 }, { "epoch": 0.26, "grad_norm": 3.1533775329589844, "learning_rate": 1.7343975935143727e-06, "loss": 0.1496, "step": 953 }, { "epoch": 0.26, "grad_norm": 3.145339012145996, "learning_rate": 1.733796669337904e-06, "loss": 0.1392, "step": 954 }, { "epoch": 0.26, "grad_norm": 2.741110324859619, "learning_rate": 1.7331951704911424e-06, "loss": 0.1363, "step": 955 }, { "epoch": 0.26, "grad_norm": 2.8262789249420166, "learning_rate": 1.7325930974451497e-06, "loss": 0.1374, "step": 956 }, { "epoch": 0.26, "grad_norm": 3.010044813156128, "learning_rate": 1.7319904506714375e-06, "loss": 0.1433, "step": 957 }, { "epoch": 0.26, "grad_norm": 3.2525150775909424, "learning_rate": 1.7313872306419662e-06, "loss": 0.163, "step": 958 }, { "epoch": 0.26, "grad_norm": 2.9591891765594482, "learning_rate": 1.730783437829146e-06, "loss": 0.1165, "step": 959 }, { "epoch": 0.26, "grad_norm": 3.2669708728790283, "learning_rate": 1.7301790727058343e-06, "loss": 0.1521, "step": 960 }, { "epoch": 0.26, "grad_norm": 2.821305751800537, "learning_rate": 1.729574135745338e-06, "loss": 0.1233, "step": 961 }, { "epoch": 0.26, "grad_norm": 3.1753952503204346, "learning_rate": 1.7289686274214115e-06, "loss": 0.1391, "step": 962 }, { "epoch": 0.26, "grad_norm": 2.831979274749756, "learning_rate": 1.7283625482082563e-06, "loss": 0.1227, "step": 963 }, { "epoch": 0.26, "grad_norm": 3.01729679107666, "learning_rate": 1.7277558985805211e-06, "loss": 0.1396, "step": 964 }, { "epoch": 0.26, "grad_norm": 2.9155466556549072, "learning_rate": 1.727148679013302e-06, "loss": 0.1327, "step": 965 }, { "epoch": 0.26, "grad_norm": 3.0422449111938477, "learning_rate": 1.7265408899821403e-06, "loss": 0.1333, "step": 966 }, { "epoch": 0.26, "grad_norm": 2.878432035446167, "learning_rate": 1.725932531963024e-06, "loss": 0.1286, "step": 967 }, { "epoch": 0.26, "grad_norm": 3.600102186203003, "learning_rate": 1.7253236054323868e-06, "loss": 0.1424, "step": 968 }, { "epoch": 0.26, "grad_norm": 2.899467706680298, "learning_rate": 1.724714110867107e-06, "loss": 0.1304, "step": 969 }, { "epoch": 0.26, "grad_norm": 2.8585548400878906, "learning_rate": 1.724104048744508e-06, "loss": 0.1274, "step": 970 }, { "epoch": 0.27, "grad_norm": 3.1076653003692627, "learning_rate": 1.7234934195423584e-06, "loss": 0.1335, "step": 971 }, { "epoch": 0.27, "grad_norm": 3.2046873569488525, "learning_rate": 1.7228822237388703e-06, "loss": 0.1397, "step": 972 }, { "epoch": 0.27, "grad_norm": 2.9000132083892822, "learning_rate": 1.722270461812699e-06, "loss": 0.1213, "step": 973 }, { "epoch": 0.27, "grad_norm": 3.2703990936279297, "learning_rate": 1.721658134242944e-06, "loss": 0.1243, "step": 974 }, { "epoch": 0.27, "grad_norm": 2.9981961250305176, "learning_rate": 1.7210452415091475e-06, "loss": 0.1451, "step": 975 }, { "epoch": 0.27, "grad_norm": 2.869926929473877, "learning_rate": 1.7204317840912944e-06, "loss": 0.121, "step": 976 }, { "epoch": 0.27, "grad_norm": 2.680950880050659, "learning_rate": 1.7198177624698116e-06, "loss": 0.1215, "step": 977 }, { "epoch": 0.27, "grad_norm": 2.8917808532714844, "learning_rate": 1.7192031771255682e-06, "loss": 0.1189, "step": 978 }, { "epoch": 0.27, "grad_norm": 2.8637447357177734, "learning_rate": 1.718588028539874e-06, "loss": 0.1409, "step": 979 }, { "epoch": 0.27, "grad_norm": 3.082494020462036, "learning_rate": 1.717972317194481e-06, "loss": 0.1482, "step": 980 }, { "epoch": 0.27, "grad_norm": 3.49234938621521, "learning_rate": 1.7173560435715814e-06, "loss": 0.1397, "step": 981 }, { "epoch": 0.27, "grad_norm": 3.003164291381836, "learning_rate": 1.7167392081538074e-06, "loss": 0.1362, "step": 982 }, { "epoch": 0.27, "grad_norm": 2.805999279022217, "learning_rate": 1.7161218114242316e-06, "loss": 0.1315, "step": 983 }, { "epoch": 0.27, "grad_norm": 2.8828787803649902, "learning_rate": 1.7155038538663663e-06, "loss": 0.1282, "step": 984 }, { "epoch": 0.27, "grad_norm": 2.8884527683258057, "learning_rate": 1.7148853359641625e-06, "loss": 0.1297, "step": 985 }, { "epoch": 0.27, "grad_norm": 3.0269837379455566, "learning_rate": 1.7142662582020104e-06, "loss": 0.1316, "step": 986 }, { "epoch": 0.27, "grad_norm": 3.180825710296631, "learning_rate": 1.7136466210647387e-06, "loss": 0.1409, "step": 987 }, { "epoch": 0.27, "grad_norm": 3.00687313079834, "learning_rate": 1.7130264250376142e-06, "loss": 0.1441, "step": 988 }, { "epoch": 0.27, "grad_norm": 2.7717976570129395, "learning_rate": 1.7124056706063408e-06, "loss": 0.1282, "step": 989 }, { "epoch": 0.27, "grad_norm": 2.762643337249756, "learning_rate": 1.7117843582570606e-06, "loss": 0.1209, "step": 990 }, { "epoch": 0.27, "grad_norm": 2.950422763824463, "learning_rate": 1.7111624884763517e-06, "loss": 0.1222, "step": 991 }, { "epoch": 0.27, "grad_norm": 3.0254971981048584, "learning_rate": 1.7105400617512298e-06, "loss": 0.1289, "step": 992 }, { "epoch": 0.27, "grad_norm": 2.8435542583465576, "learning_rate": 1.7099170785691456e-06, "loss": 0.127, "step": 993 }, { "epoch": 0.27, "grad_norm": 2.956089973449707, "learning_rate": 1.709293539417987e-06, "loss": 0.1308, "step": 994 }, { "epoch": 0.27, "grad_norm": 2.9792909622192383, "learning_rate": 1.708669444786076e-06, "loss": 0.1277, "step": 995 }, { "epoch": 0.27, "grad_norm": 3.3625175952911377, "learning_rate": 1.70804479516217e-06, "loss": 0.1641, "step": 996 }, { "epoch": 0.27, "grad_norm": 2.9496147632598877, "learning_rate": 1.7074195910354616e-06, "loss": 0.1231, "step": 997 }, { "epoch": 0.27, "grad_norm": 3.3361380100250244, "learning_rate": 1.7067938328955766e-06, "loss": 0.1371, "step": 998 }, { "epoch": 0.27, "grad_norm": 3.1837551593780518, "learning_rate": 1.7061675212325759e-06, "loss": 0.1359, "step": 999 }, { "epoch": 0.27, "grad_norm": 2.8014943599700928, "learning_rate": 1.705540656536953e-06, "loss": 0.1261, "step": 1000 }, { "epoch": 0.27, "grad_norm": 3.034485101699829, "learning_rate": 1.704913239299635e-06, "loss": 0.1322, "step": 1001 }, { "epoch": 0.27, "grad_norm": 2.8884332180023193, "learning_rate": 1.7042852700119811e-06, "loss": 0.1368, "step": 1002 }, { "epoch": 0.27, "grad_norm": 3.1377642154693604, "learning_rate": 1.7036567491657836e-06, "loss": 0.143, "step": 1003 }, { "epoch": 0.27, "grad_norm": 3.1927852630615234, "learning_rate": 1.7030276772532664e-06, "loss": 0.1582, "step": 1004 }, { "epoch": 0.27, "grad_norm": 2.8954274654388428, "learning_rate": 1.7023980547670846e-06, "loss": 0.1382, "step": 1005 }, { "epoch": 0.27, "grad_norm": 3.169952630996704, "learning_rate": 1.7017678822003253e-06, "loss": 0.1336, "step": 1006 }, { "epoch": 0.28, "grad_norm": 2.876800537109375, "learning_rate": 1.701137160046506e-06, "loss": 0.1259, "step": 1007 }, { "epoch": 0.28, "grad_norm": 2.769343852996826, "learning_rate": 1.700505888799574e-06, "loss": 0.1253, "step": 1008 }, { "epoch": 0.28, "grad_norm": 3.1073548793792725, "learning_rate": 1.6998740689539075e-06, "loss": 0.1275, "step": 1009 }, { "epoch": 0.28, "grad_norm": 3.218838930130005, "learning_rate": 1.699241701004314e-06, "loss": 0.1474, "step": 1010 }, { "epoch": 0.28, "grad_norm": 2.921640157699585, "learning_rate": 1.6986087854460305e-06, "loss": 0.1291, "step": 1011 }, { "epoch": 0.28, "grad_norm": 2.973304271697998, "learning_rate": 1.697975322774722e-06, "loss": 0.1244, "step": 1012 }, { "epoch": 0.28, "grad_norm": 3.119814157485962, "learning_rate": 1.6973413134864827e-06, "loss": 0.1264, "step": 1013 }, { "epoch": 0.28, "grad_norm": 3.0828561782836914, "learning_rate": 1.6967067580778353e-06, "loss": 0.1439, "step": 1014 }, { "epoch": 0.28, "grad_norm": 3.010824680328369, "learning_rate": 1.6960716570457291e-06, "loss": 0.1339, "step": 1015 }, { "epoch": 0.28, "grad_norm": 2.9271926879882812, "learning_rate": 1.6954360108875415e-06, "loss": 0.1437, "step": 1016 }, { "epoch": 0.28, "grad_norm": 3.0377440452575684, "learning_rate": 1.6947998201010767e-06, "loss": 0.1377, "step": 1017 }, { "epoch": 0.28, "grad_norm": 3.0867815017700195, "learning_rate": 1.694163085184565e-06, "loss": 0.1362, "step": 1018 }, { "epoch": 0.28, "grad_norm": 2.6888203620910645, "learning_rate": 1.6935258066366632e-06, "loss": 0.1228, "step": 1019 }, { "epoch": 0.28, "grad_norm": 2.6803104877471924, "learning_rate": 1.6928879849564539e-06, "loss": 0.1151, "step": 1020 }, { "epoch": 0.28, "grad_norm": 2.61885142326355, "learning_rate": 1.6922496206434444e-06, "loss": 0.1319, "step": 1021 }, { "epoch": 0.28, "grad_norm": 3.1043663024902344, "learning_rate": 1.6916107141975685e-06, "loss": 0.17, "step": 1022 }, { "epoch": 0.28, "grad_norm": 2.94313383102417, "learning_rate": 1.6909712661191823e-06, "loss": 0.1372, "step": 1023 }, { "epoch": 0.28, "grad_norm": 3.073957920074463, "learning_rate": 1.690331276909068e-06, "loss": 0.1356, "step": 1024 }, { "epoch": 0.28, "grad_norm": 2.8185484409332275, "learning_rate": 1.6896907470684315e-06, "loss": 0.141, "step": 1025 }, { "epoch": 0.28, "grad_norm": 3.179748773574829, "learning_rate": 1.6890496770989001e-06, "loss": 0.1498, "step": 1026 }, { "epoch": 0.28, "grad_norm": 2.92128849029541, "learning_rate": 1.6884080675025268e-06, "loss": 0.1308, "step": 1027 }, { "epoch": 0.28, "grad_norm": 2.9293651580810547, "learning_rate": 1.687765918781785e-06, "loss": 0.1294, "step": 1028 }, { "epoch": 0.28, "grad_norm": 3.2544984817504883, "learning_rate": 1.6871232314395718e-06, "loss": 0.143, "step": 1029 }, { "epoch": 0.28, "grad_norm": 3.0878231525421143, "learning_rate": 1.6864800059792055e-06, "loss": 0.1269, "step": 1030 }, { "epoch": 0.28, "grad_norm": 3.029195547103882, "learning_rate": 1.6858362429044256e-06, "loss": 0.1413, "step": 1031 }, { "epoch": 0.28, "grad_norm": 2.8506369590759277, "learning_rate": 1.6851919427193925e-06, "loss": 0.1364, "step": 1032 }, { "epoch": 0.28, "grad_norm": 2.8560402393341064, "learning_rate": 1.6845471059286886e-06, "loss": 0.1205, "step": 1033 }, { "epoch": 0.28, "grad_norm": 2.9102232456207275, "learning_rate": 1.6839017330373151e-06, "loss": 0.1332, "step": 1034 }, { "epoch": 0.28, "grad_norm": 2.859626531600952, "learning_rate": 1.6832558245506933e-06, "loss": 0.1265, "step": 1035 }, { "epoch": 0.28, "grad_norm": 3.1594340801239014, "learning_rate": 1.6826093809746649e-06, "loss": 0.1344, "step": 1036 }, { "epoch": 0.28, "grad_norm": 2.971975803375244, "learning_rate": 1.681962402815489e-06, "loss": 0.1427, "step": 1037 }, { "epoch": 0.28, "grad_norm": 3.0042905807495117, "learning_rate": 1.6813148905798446e-06, "loss": 0.1411, "step": 1038 }, { "epoch": 0.28, "grad_norm": 3.0483460426330566, "learning_rate": 1.6806668447748292e-06, "loss": 0.1345, "step": 1039 }, { "epoch": 0.28, "grad_norm": 3.240797758102417, "learning_rate": 1.6800182659079567e-06, "loss": 0.151, "step": 1040 }, { "epoch": 0.28, "grad_norm": 3.112478256225586, "learning_rate": 1.6793691544871603e-06, "loss": 0.1556, "step": 1041 }, { "epoch": 0.28, "grad_norm": 2.8727810382843018, "learning_rate": 1.6787195110207884e-06, "loss": 0.1336, "step": 1042 }, { "epoch": 0.28, "grad_norm": 2.958864212036133, "learning_rate": 1.6780693360176075e-06, "loss": 0.1366, "step": 1043 }, { "epoch": 0.29, "grad_norm": 2.757554292678833, "learning_rate": 1.6774186299868e-06, "loss": 0.1361, "step": 1044 }, { "epoch": 0.29, "grad_norm": 5.423801422119141, "learning_rate": 1.6767673934379639e-06, "loss": 0.1544, "step": 1045 }, { "epoch": 0.29, "grad_norm": 2.8387649059295654, "learning_rate": 1.6761156268811128e-06, "loss": 0.1287, "step": 1046 }, { "epoch": 0.29, "grad_norm": 3.4863409996032715, "learning_rate": 1.6754633308266752e-06, "loss": 0.1576, "step": 1047 }, { "epoch": 0.29, "grad_norm": 2.8142569065093994, "learning_rate": 1.674810505785495e-06, "loss": 0.125, "step": 1048 }, { "epoch": 0.29, "grad_norm": 2.872755527496338, "learning_rate": 1.6741571522688294e-06, "loss": 0.1368, "step": 1049 }, { "epoch": 0.29, "grad_norm": 2.6436572074890137, "learning_rate": 1.67350327078835e-06, "loss": 0.1154, "step": 1050 }, { "epoch": 0.29, "grad_norm": 2.924184560775757, "learning_rate": 1.6728488618561417e-06, "loss": 0.1307, "step": 1051 }, { "epoch": 0.29, "grad_norm": 2.9721291065216064, "learning_rate": 1.672193925984703e-06, "loss": 0.1295, "step": 1052 }, { "epoch": 0.29, "grad_norm": 3.0370213985443115, "learning_rate": 1.6715384636869442e-06, "loss": 0.1244, "step": 1053 }, { "epoch": 0.29, "grad_norm": 3.0612335205078125, "learning_rate": 1.6708824754761886e-06, "loss": 0.1366, "step": 1054 }, { "epoch": 0.29, "grad_norm": 2.968006134033203, "learning_rate": 1.6702259618661708e-06, "loss": 0.1287, "step": 1055 }, { "epoch": 0.29, "grad_norm": 2.730593681335449, "learning_rate": 1.669568923371037e-06, "loss": 0.1293, "step": 1056 }, { "epoch": 0.29, "grad_norm": 3.0833163261413574, "learning_rate": 1.668911360505345e-06, "loss": 0.1294, "step": 1057 }, { "epoch": 0.29, "grad_norm": 2.8949716091156006, "learning_rate": 1.6682532737840628e-06, "loss": 0.1335, "step": 1058 }, { "epoch": 0.29, "grad_norm": 3.068634033203125, "learning_rate": 1.6675946637225688e-06, "loss": 0.1331, "step": 1059 }, { "epoch": 0.29, "grad_norm": 2.908865213394165, "learning_rate": 1.6669355308366507e-06, "loss": 0.1341, "step": 1060 }, { "epoch": 0.29, "grad_norm": 2.7697980403900146, "learning_rate": 1.6662758756425063e-06, "loss": 0.1248, "step": 1061 }, { "epoch": 0.29, "grad_norm": 2.9143946170806885, "learning_rate": 1.6656156986567427e-06, "loss": 0.1277, "step": 1062 }, { "epoch": 0.29, "grad_norm": 3.1084022521972656, "learning_rate": 1.6649550003963745e-06, "loss": 0.1373, "step": 1063 }, { "epoch": 0.29, "grad_norm": 3.0052878856658936, "learning_rate": 1.6642937813788258e-06, "loss": 0.1433, "step": 1064 }, { "epoch": 0.29, "grad_norm": 3.1521522998809814, "learning_rate": 1.6636320421219277e-06, "loss": 0.1681, "step": 1065 }, { "epoch": 0.29, "grad_norm": 2.8705294132232666, "learning_rate": 1.662969783143919e-06, "loss": 0.139, "step": 1066 }, { "epoch": 0.29, "grad_norm": 2.733748197555542, "learning_rate": 1.6623070049634453e-06, "loss": 0.1144, "step": 1067 }, { "epoch": 0.29, "grad_norm": 2.8265879154205322, "learning_rate": 1.6616437080995595e-06, "loss": 0.1339, "step": 1068 }, { "epoch": 0.29, "grad_norm": 3.024449110031128, "learning_rate": 1.6609798930717198e-06, "loss": 0.1484, "step": 1069 }, { "epoch": 0.29, "grad_norm": 2.904423475265503, "learning_rate": 1.6603155603997908e-06, "loss": 0.1308, "step": 1070 }, { "epoch": 0.29, "grad_norm": 3.1895720958709717, "learning_rate": 1.6596507106040422e-06, "loss": 0.1501, "step": 1071 }, { "epoch": 0.29, "grad_norm": 2.9068169593811035, "learning_rate": 1.658985344205149e-06, "loss": 0.1423, "step": 1072 }, { "epoch": 0.29, "grad_norm": 2.645341157913208, "learning_rate": 1.6583194617241906e-06, "loss": 0.1242, "step": 1073 }, { "epoch": 0.29, "grad_norm": 3.0872840881347656, "learning_rate": 1.6576530636826498e-06, "loss": 0.1323, "step": 1074 }, { "epoch": 0.29, "grad_norm": 3.080601453781128, "learning_rate": 1.6569861506024148e-06, "loss": 0.1289, "step": 1075 }, { "epoch": 0.29, "grad_norm": 2.7821171283721924, "learning_rate": 1.6563187230057759e-06, "loss": 0.1206, "step": 1076 }, { "epoch": 0.29, "grad_norm": 3.501741886138916, "learning_rate": 1.6556507814154264e-06, "loss": 0.1353, "step": 1077 }, { "epoch": 0.29, "grad_norm": 2.96410870552063, "learning_rate": 1.6549823263544628e-06, "loss": 0.1301, "step": 1078 }, { "epoch": 0.29, "grad_norm": 3.634714126586914, "learning_rate": 1.6543133583463833e-06, "loss": 0.1515, "step": 1079 }, { "epoch": 0.3, "grad_norm": 3.070134401321411, "learning_rate": 1.6536438779150878e-06, "loss": 0.1389, "step": 1080 }, { "epoch": 0.3, "grad_norm": 3.6893770694732666, "learning_rate": 1.6529738855848776e-06, "loss": 0.1598, "step": 1081 }, { "epoch": 0.3, "grad_norm": 3.5830516815185547, "learning_rate": 1.6523033818804549e-06, "loss": 0.1607, "step": 1082 }, { "epoch": 0.3, "grad_norm": 2.9699463844299316, "learning_rate": 1.6516323673269219e-06, "loss": 0.1406, "step": 1083 }, { "epoch": 0.3, "grad_norm": 2.899401903152466, "learning_rate": 1.650960842449782e-06, "loss": 0.1256, "step": 1084 }, { "epoch": 0.3, "grad_norm": 3.0776729583740234, "learning_rate": 1.650288807774937e-06, "loss": 0.1515, "step": 1085 }, { "epoch": 0.3, "grad_norm": 3.043003797531128, "learning_rate": 1.6496162638286886e-06, "loss": 0.1195, "step": 1086 }, { "epoch": 0.3, "grad_norm": 3.337824583053589, "learning_rate": 1.6489432111377372e-06, "loss": 0.1433, "step": 1087 }, { "epoch": 0.3, "grad_norm": 3.0308029651641846, "learning_rate": 1.6482696502291819e-06, "loss": 0.1308, "step": 1088 }, { "epoch": 0.3, "grad_norm": 2.727417230606079, "learning_rate": 1.6475955816305195e-06, "loss": 0.1352, "step": 1089 }, { "epoch": 0.3, "grad_norm": 2.7782535552978516, "learning_rate": 1.6469210058696446e-06, "loss": 0.1307, "step": 1090 }, { "epoch": 0.3, "grad_norm": 2.9431943893432617, "learning_rate": 1.6462459234748484e-06, "loss": 0.133, "step": 1091 }, { "epoch": 0.3, "grad_norm": 2.778409957885742, "learning_rate": 1.6455703349748197e-06, "loss": 0.1405, "step": 1092 }, { "epoch": 0.3, "grad_norm": 3.0530734062194824, "learning_rate": 1.644894240898643e-06, "loss": 0.14, "step": 1093 }, { "epoch": 0.3, "grad_norm": 3.2378525733947754, "learning_rate": 1.6442176417757992e-06, "loss": 0.1477, "step": 1094 }, { "epoch": 0.3, "grad_norm": 2.7790303230285645, "learning_rate": 1.6435405381361643e-06, "loss": 0.1168, "step": 1095 }, { "epoch": 0.3, "grad_norm": 3.0893919467926025, "learning_rate": 1.6428629305100102e-06, "loss": 0.1435, "step": 1096 }, { "epoch": 0.3, "grad_norm": 2.8517370223999023, "learning_rate": 1.6421848194280024e-06, "loss": 0.1342, "step": 1097 }, { "epoch": 0.3, "grad_norm": 2.777329921722412, "learning_rate": 1.6415062054212011e-06, "loss": 0.1223, "step": 1098 }, { "epoch": 0.3, "grad_norm": 2.9436533451080322, "learning_rate": 1.6408270890210612e-06, "loss": 0.1206, "step": 1099 }, { "epoch": 0.3, "grad_norm": 2.989617347717285, "learning_rate": 1.6401474707594296e-06, "loss": 0.1218, "step": 1100 }, { "epoch": 0.3, "grad_norm": 3.3940844535827637, "learning_rate": 1.6394673511685472e-06, "loss": 0.134, "step": 1101 }, { "epoch": 0.3, "grad_norm": 3.0965664386749268, "learning_rate": 1.6387867307810476e-06, "loss": 0.1305, "step": 1102 }, { "epoch": 0.3, "grad_norm": 2.931014060974121, "learning_rate": 1.638105610129956e-06, "loss": 0.1352, "step": 1103 }, { "epoch": 0.3, "grad_norm": 3.0383260250091553, "learning_rate": 1.6374239897486897e-06, "loss": 0.132, "step": 1104 }, { "epoch": 0.3, "grad_norm": 3.2072699069976807, "learning_rate": 1.6367418701710572e-06, "loss": 0.1673, "step": 1105 }, { "epoch": 0.3, "grad_norm": 2.983436346054077, "learning_rate": 1.6360592519312579e-06, "loss": 0.1254, "step": 1106 }, { "epoch": 0.3, "grad_norm": 2.875274896621704, "learning_rate": 1.6353761355638827e-06, "loss": 0.1351, "step": 1107 }, { "epoch": 0.3, "grad_norm": 3.192133665084839, "learning_rate": 1.6346925216039106e-06, "loss": 0.1503, "step": 1108 }, { "epoch": 0.3, "grad_norm": 2.901218891143799, "learning_rate": 1.6340084105867121e-06, "loss": 0.1483, "step": 1109 }, { "epoch": 0.3, "grad_norm": 2.900606632232666, "learning_rate": 1.633323803048047e-06, "loss": 0.1298, "step": 1110 }, { "epoch": 0.3, "grad_norm": 2.7904560565948486, "learning_rate": 1.6326386995240622e-06, "loss": 0.135, "step": 1111 }, { "epoch": 0.3, "grad_norm": 2.844744920730591, "learning_rate": 1.6319531005512945e-06, "loss": 0.1274, "step": 1112 }, { "epoch": 0.3, "grad_norm": 2.9561710357666016, "learning_rate": 1.6312670066666686e-06, "loss": 0.1205, "step": 1113 }, { "epoch": 0.3, "grad_norm": 3.266465663909912, "learning_rate": 1.6305804184074963e-06, "loss": 0.1351, "step": 1114 }, { "epoch": 0.3, "grad_norm": 3.0080487728118896, "learning_rate": 1.6298933363114767e-06, "loss": 0.1396, "step": 1115 }, { "epoch": 0.3, "grad_norm": 2.7729556560516357, "learning_rate": 1.629205760916696e-06, "loss": 0.1238, "step": 1116 }, { "epoch": 0.31, "grad_norm": 3.0315845012664795, "learning_rate": 1.6285176927616262e-06, "loss": 0.1336, "step": 1117 }, { "epoch": 0.31, "grad_norm": 3.1767919063568115, "learning_rate": 1.6278291323851257e-06, "loss": 0.147, "step": 1118 }, { "epoch": 0.31, "grad_norm": 3.098306179046631, "learning_rate": 1.6271400803264378e-06, "loss": 0.1425, "step": 1119 }, { "epoch": 0.31, "grad_norm": 3.0536861419677734, "learning_rate": 1.6264505371251915e-06, "loss": 0.1281, "step": 1120 }, { "epoch": 0.31, "grad_norm": 3.0145273208618164, "learning_rate": 1.6257605033214005e-06, "loss": 0.1387, "step": 1121 }, { "epoch": 0.31, "grad_norm": 3.1438162326812744, "learning_rate": 1.6250699794554614e-06, "loss": 0.1323, "step": 1122 }, { "epoch": 0.31, "grad_norm": 2.763699531555176, "learning_rate": 1.6243789660681565e-06, "loss": 0.1337, "step": 1123 }, { "epoch": 0.31, "grad_norm": 3.288756847381592, "learning_rate": 1.6236874637006497e-06, "loss": 0.1484, "step": 1124 }, { "epoch": 0.31, "grad_norm": 2.956301212310791, "learning_rate": 1.6229954728944895e-06, "loss": 0.1422, "step": 1125 }, { "epoch": 0.31, "grad_norm": 3.0409741401672363, "learning_rate": 1.6223029941916056e-06, "loss": 0.1502, "step": 1126 }, { "epoch": 0.31, "grad_norm": 2.7094151973724365, "learning_rate": 1.62161002813431e-06, "loss": 0.1272, "step": 1127 }, { "epoch": 0.31, "grad_norm": 3.1903421878814697, "learning_rate": 1.6209165752652974e-06, "loss": 0.1405, "step": 1128 }, { "epoch": 0.31, "grad_norm": 2.828828811645508, "learning_rate": 1.620222636127642e-06, "loss": 0.1287, "step": 1129 }, { "epoch": 0.31, "grad_norm": 2.87174916267395, "learning_rate": 1.6195282112648006e-06, "loss": 0.1181, "step": 1130 }, { "epoch": 0.31, "grad_norm": 2.855774402618408, "learning_rate": 1.6188333012206096e-06, "loss": 0.1347, "step": 1131 }, { "epoch": 0.31, "grad_norm": 2.7991037368774414, "learning_rate": 1.6181379065392848e-06, "loss": 0.1213, "step": 1132 }, { "epoch": 0.31, "grad_norm": 3.3876779079437256, "learning_rate": 1.6174420277654224e-06, "loss": 0.1382, "step": 1133 }, { "epoch": 0.31, "grad_norm": 2.8736510276794434, "learning_rate": 1.6167456654439978e-06, "loss": 0.1243, "step": 1134 }, { "epoch": 0.31, "grad_norm": 2.677625894546509, "learning_rate": 1.6160488201203642e-06, "loss": 0.1202, "step": 1135 }, { "epoch": 0.31, "grad_norm": 2.976384162902832, "learning_rate": 1.6153514923402536e-06, "loss": 0.1351, "step": 1136 }, { "epoch": 0.31, "grad_norm": 3.1343348026275635, "learning_rate": 1.614653682649776e-06, "loss": 0.1427, "step": 1137 }, { "epoch": 0.31, "grad_norm": 2.829636573791504, "learning_rate": 1.6139553915954186e-06, "loss": 0.1188, "step": 1138 }, { "epoch": 0.31, "grad_norm": 3.0276970863342285, "learning_rate": 1.6132566197240456e-06, "loss": 0.1205, "step": 1139 }, { "epoch": 0.31, "grad_norm": 2.8864333629608154, "learning_rate": 1.612557367582898e-06, "loss": 0.1335, "step": 1140 }, { "epoch": 0.31, "grad_norm": 2.779438018798828, "learning_rate": 1.6118576357195921e-06, "loss": 0.1298, "step": 1141 }, { "epoch": 0.31, "grad_norm": 3.176299571990967, "learning_rate": 1.6111574246821208e-06, "loss": 0.1432, "step": 1142 }, { "epoch": 0.31, "grad_norm": 2.911555528640747, "learning_rate": 1.6104567350188515e-06, "loss": 0.1326, "step": 1143 }, { "epoch": 0.31, "grad_norm": 3.7277283668518066, "learning_rate": 1.6097555672785276e-06, "loss": 0.1629, "step": 1144 }, { "epoch": 0.31, "grad_norm": 3.019008159637451, "learning_rate": 1.6090539220102657e-06, "loss": 0.1422, "step": 1145 }, { "epoch": 0.31, "grad_norm": 2.8250324726104736, "learning_rate": 1.6083517997635569e-06, "loss": 0.128, "step": 1146 }, { "epoch": 0.31, "grad_norm": 2.724057197570801, "learning_rate": 1.6076492010882658e-06, "loss": 0.134, "step": 1147 }, { "epoch": 0.31, "grad_norm": 2.6791696548461914, "learning_rate": 1.60694612653463e-06, "loss": 0.1404, "step": 1148 }, { "epoch": 0.31, "grad_norm": 3.141669511795044, "learning_rate": 1.6062425766532602e-06, "loss": 0.1369, "step": 1149 }, { "epoch": 0.31, "grad_norm": 3.1686415672302246, "learning_rate": 1.6055385519951387e-06, "loss": 0.1465, "step": 1150 }, { "epoch": 0.31, "grad_norm": 2.7634310722351074, "learning_rate": 1.60483405311162e-06, "loss": 0.128, "step": 1151 }, { "epoch": 0.31, "grad_norm": 2.9050796031951904, "learning_rate": 1.6041290805544301e-06, "loss": 0.1227, "step": 1152 }, { "epoch": 0.31, "grad_norm": 2.8661465644836426, "learning_rate": 1.6034236348756651e-06, "loss": 0.1305, "step": 1153 }, { "epoch": 0.32, "grad_norm": 2.8756277561187744, "learning_rate": 1.6027177166277937e-06, "loss": 0.1381, "step": 1154 }, { "epoch": 0.32, "grad_norm": 2.9020004272460938, "learning_rate": 1.602011326363652e-06, "loss": 0.1432, "step": 1155 }, { "epoch": 0.32, "grad_norm": 2.8704707622528076, "learning_rate": 1.6013044646364476e-06, "loss": 0.1422, "step": 1156 }, { "epoch": 0.32, "grad_norm": 3.0167694091796875, "learning_rate": 1.6005971319997568e-06, "loss": 0.1421, "step": 1157 }, { "epoch": 0.32, "grad_norm": 2.8320274353027344, "learning_rate": 1.5998893290075245e-06, "loss": 0.1186, "step": 1158 }, { "epoch": 0.32, "grad_norm": 2.787231683731079, "learning_rate": 1.5991810562140643e-06, "loss": 0.1309, "step": 1159 }, { "epoch": 0.32, "grad_norm": 2.6868605613708496, "learning_rate": 1.5984723141740574e-06, "loss": 0.1243, "step": 1160 }, { "epoch": 0.32, "grad_norm": 2.9979305267333984, "learning_rate": 1.5977631034425528e-06, "loss": 0.1373, "step": 1161 }, { "epoch": 0.32, "grad_norm": 2.7995948791503906, "learning_rate": 1.5970534245749663e-06, "loss": 0.1372, "step": 1162 }, { "epoch": 0.32, "grad_norm": 2.86142635345459, "learning_rate": 1.5963432781270805e-06, "loss": 0.1222, "step": 1163 }, { "epoch": 0.32, "grad_norm": 2.7392685413360596, "learning_rate": 1.5956326646550442e-06, "loss": 0.1303, "step": 1164 }, { "epoch": 0.32, "grad_norm": 2.9346110820770264, "learning_rate": 1.5949215847153715e-06, "loss": 0.136, "step": 1165 }, { "epoch": 0.32, "grad_norm": 3.010697841644287, "learning_rate": 1.5942100388649427e-06, "loss": 0.1435, "step": 1166 }, { "epoch": 0.32, "grad_norm": 2.903467893600464, "learning_rate": 1.5934980276610019e-06, "loss": 0.1426, "step": 1167 }, { "epoch": 0.32, "grad_norm": 2.727959632873535, "learning_rate": 1.5927855516611586e-06, "loss": 0.1367, "step": 1168 }, { "epoch": 0.32, "grad_norm": 2.812208414077759, "learning_rate": 1.5920726114233856e-06, "loss": 0.1162, "step": 1169 }, { "epoch": 0.32, "grad_norm": 3.0076522827148438, "learning_rate": 1.5913592075060197e-06, "loss": 0.1387, "step": 1170 }, { "epoch": 0.32, "grad_norm": 3.120340347290039, "learning_rate": 1.5906453404677606e-06, "loss": 0.1477, "step": 1171 }, { "epoch": 0.32, "grad_norm": 3.080254554748535, "learning_rate": 1.5899310108676708e-06, "loss": 0.141, "step": 1172 }, { "epoch": 0.32, "grad_norm": 3.1656322479248047, "learning_rate": 1.589216219265175e-06, "loss": 0.1391, "step": 1173 }, { "epoch": 0.32, "grad_norm": 3.0118024349212646, "learning_rate": 1.5885009662200596e-06, "loss": 0.1309, "step": 1174 }, { "epoch": 0.32, "grad_norm": 3.1373941898345947, "learning_rate": 1.587785252292473e-06, "loss": 0.1389, "step": 1175 }, { "epoch": 0.32, "grad_norm": 2.8171396255493164, "learning_rate": 1.5870690780429237e-06, "loss": 0.1255, "step": 1176 }, { "epoch": 0.32, "grad_norm": 2.952279806137085, "learning_rate": 1.5863524440322809e-06, "loss": 0.1387, "step": 1177 }, { "epoch": 0.32, "grad_norm": 2.9205703735351562, "learning_rate": 1.5856353508217747e-06, "loss": 0.1454, "step": 1178 }, { "epoch": 0.32, "grad_norm": 2.916257858276367, "learning_rate": 1.5849177989729931e-06, "loss": 0.1304, "step": 1179 }, { "epoch": 0.32, "grad_norm": 2.962117910385132, "learning_rate": 1.584199789047885e-06, "loss": 0.1311, "step": 1180 }, { "epoch": 0.32, "grad_norm": 2.8570611476898193, "learning_rate": 1.5834813216087578e-06, "loss": 0.1321, "step": 1181 }, { "epoch": 0.32, "grad_norm": 2.999396800994873, "learning_rate": 1.5827623972182753e-06, "loss": 0.1277, "step": 1182 }, { "epoch": 0.32, "grad_norm": 3.1423332691192627, "learning_rate": 1.5820430164394621e-06, "loss": 0.1399, "step": 1183 }, { "epoch": 0.32, "grad_norm": 3.01912522315979, "learning_rate": 1.581323179835698e-06, "loss": 0.1346, "step": 1184 }, { "epoch": 0.32, "grad_norm": 2.9051058292388916, "learning_rate": 1.5806028879707207e-06, "loss": 0.1247, "step": 1185 }, { "epoch": 0.32, "grad_norm": 2.99544358253479, "learning_rate": 1.5798821414086244e-06, "loss": 0.1292, "step": 1186 }, { "epoch": 0.32, "grad_norm": 3.1393465995788574, "learning_rate": 1.5791609407138587e-06, "loss": 0.1335, "step": 1187 }, { "epoch": 0.32, "grad_norm": 2.643645763397217, "learning_rate": 1.5784392864512297e-06, "loss": 0.1178, "step": 1188 }, { "epoch": 0.32, "grad_norm": 2.9057555198669434, "learning_rate": 1.5777171791858986e-06, "loss": 0.1292, "step": 1189 }, { "epoch": 0.33, "grad_norm": 2.9893436431884766, "learning_rate": 1.5769946194833813e-06, "loss": 0.1371, "step": 1190 }, { "epoch": 0.33, "grad_norm": 2.859166145324707, "learning_rate": 1.5762716079095477e-06, "loss": 0.1278, "step": 1191 }, { "epoch": 0.33, "grad_norm": 2.822263479232788, "learning_rate": 1.5755481450306216e-06, "loss": 0.1246, "step": 1192 }, { "epoch": 0.33, "grad_norm": 2.7825675010681152, "learning_rate": 1.5748242314131806e-06, "loss": 0.1225, "step": 1193 }, { "epoch": 0.33, "grad_norm": 3.0317020416259766, "learning_rate": 1.5740998676241548e-06, "loss": 0.1336, "step": 1194 }, { "epoch": 0.33, "grad_norm": 2.8351964950561523, "learning_rate": 1.5733750542308277e-06, "loss": 0.1202, "step": 1195 }, { "epoch": 0.33, "grad_norm": 3.0110056400299072, "learning_rate": 1.572649791800834e-06, "loss": 0.135, "step": 1196 }, { "epoch": 0.33, "grad_norm": 2.9327712059020996, "learning_rate": 1.5719240809021606e-06, "loss": 0.128, "step": 1197 }, { "epoch": 0.33, "grad_norm": 3.1209583282470703, "learning_rate": 1.5711979221031455e-06, "loss": 0.132, "step": 1198 }, { "epoch": 0.33, "grad_norm": 2.9687564373016357, "learning_rate": 1.5704713159724771e-06, "loss": 0.129, "step": 1199 }, { "epoch": 0.33, "grad_norm": 2.8928444385528564, "learning_rate": 1.5697442630791948e-06, "loss": 0.1357, "step": 1200 }, { "epoch": 0.33, "grad_norm": 2.8500170707702637, "learning_rate": 1.5690167639926875e-06, "loss": 0.1326, "step": 1201 }, { "epoch": 0.33, "grad_norm": 3.0176877975463867, "learning_rate": 1.5682888192826933e-06, "loss": 0.1498, "step": 1202 }, { "epoch": 0.33, "grad_norm": 3.251095771789551, "learning_rate": 1.5675604295193e-06, "loss": 0.1399, "step": 1203 }, { "epoch": 0.33, "grad_norm": 2.977865219116211, "learning_rate": 1.5668315952729427e-06, "loss": 0.1282, "step": 1204 }, { "epoch": 0.33, "grad_norm": 3.0575249195098877, "learning_rate": 1.5661023171144062e-06, "loss": 0.1307, "step": 1205 }, { "epoch": 0.33, "grad_norm": 3.0762460231781006, "learning_rate": 1.5653725956148215e-06, "loss": 0.1465, "step": 1206 }, { "epoch": 0.33, "grad_norm": 2.9267191886901855, "learning_rate": 1.564642431345668e-06, "loss": 0.1264, "step": 1207 }, { "epoch": 0.33, "grad_norm": 3.029406785964966, "learning_rate": 1.5639118248787714e-06, "loss": 0.1431, "step": 1208 }, { "epoch": 0.33, "grad_norm": 3.220940351486206, "learning_rate": 1.5631807767863029e-06, "loss": 0.1373, "step": 1209 }, { "epoch": 0.33, "grad_norm": 3.025521993637085, "learning_rate": 1.5624492876407807e-06, "loss": 0.1385, "step": 1210 }, { "epoch": 0.33, "grad_norm": 2.761337995529175, "learning_rate": 1.5617173580150675e-06, "loss": 0.1198, "step": 1211 }, { "epoch": 0.33, "grad_norm": 2.8094804286956787, "learning_rate": 1.5609849884823723e-06, "loss": 0.1316, "step": 1212 }, { "epoch": 0.33, "grad_norm": 3.0511271953582764, "learning_rate": 1.560252179616247e-06, "loss": 0.1406, "step": 1213 }, { "epoch": 0.33, "grad_norm": 2.9625461101531982, "learning_rate": 1.5595189319905887e-06, "loss": 0.1428, "step": 1214 }, { "epoch": 0.33, "grad_norm": 2.8088529109954834, "learning_rate": 1.5587852461796373e-06, "loss": 0.1335, "step": 1215 }, { "epoch": 0.33, "grad_norm": 3.0297744274139404, "learning_rate": 1.5580511227579764e-06, "loss": 0.1427, "step": 1216 }, { "epoch": 0.33, "grad_norm": 2.8155922889709473, "learning_rate": 1.5573165623005328e-06, "loss": 0.133, "step": 1217 }, { "epoch": 0.33, "grad_norm": 2.878079891204834, "learning_rate": 1.556581565382574e-06, "loss": 0.1207, "step": 1218 }, { "epoch": 0.33, "grad_norm": 2.9472103118896484, "learning_rate": 1.5558461325797109e-06, "loss": 0.1327, "step": 1219 }, { "epoch": 0.33, "grad_norm": 3.120007038116455, "learning_rate": 1.555110264467895e-06, "loss": 0.1539, "step": 1220 }, { "epoch": 0.33, "grad_norm": 2.8870222568511963, "learning_rate": 1.5543739616234186e-06, "loss": 0.1399, "step": 1221 }, { "epoch": 0.33, "grad_norm": 2.865922212600708, "learning_rate": 1.553637224622915e-06, "loss": 0.1292, "step": 1222 }, { "epoch": 0.33, "grad_norm": 2.926393747329712, "learning_rate": 1.5529000540433573e-06, "loss": 0.1386, "step": 1223 }, { "epoch": 0.33, "grad_norm": 2.885589122772217, "learning_rate": 1.5521624504620574e-06, "loss": 0.1231, "step": 1224 }, { "epoch": 0.33, "grad_norm": 2.996002197265625, "learning_rate": 1.5514244144566676e-06, "loss": 0.1482, "step": 1225 }, { "epoch": 0.33, "grad_norm": 3.2223353385925293, "learning_rate": 1.550685946605178e-06, "loss": 0.1406, "step": 1226 }, { "epoch": 0.34, "grad_norm": 3.146404504776001, "learning_rate": 1.5499470474859172e-06, "loss": 0.1316, "step": 1227 }, { "epoch": 0.34, "grad_norm": 3.082017421722412, "learning_rate": 1.5492077176775513e-06, "loss": 0.1521, "step": 1228 }, { "epoch": 0.34, "grad_norm": 3.1566617488861084, "learning_rate": 1.548467957759084e-06, "loss": 0.1516, "step": 1229 }, { "epoch": 0.34, "grad_norm": 2.732489824295044, "learning_rate": 1.5477277683098552e-06, "loss": 0.1292, "step": 1230 }, { "epoch": 0.34, "grad_norm": 2.7503480911254883, "learning_rate": 1.5469871499095425e-06, "loss": 0.1376, "step": 1231 }, { "epoch": 0.34, "grad_norm": 2.9067938327789307, "learning_rate": 1.5462461031381584e-06, "loss": 0.1331, "step": 1232 }, { "epoch": 0.34, "grad_norm": 2.9652981758117676, "learning_rate": 1.5455046285760505e-06, "loss": 0.129, "step": 1233 }, { "epoch": 0.34, "grad_norm": 2.9126827716827393, "learning_rate": 1.5447627268039028e-06, "loss": 0.1296, "step": 1234 }, { "epoch": 0.34, "grad_norm": 2.70180344581604, "learning_rate": 1.5440203984027322e-06, "loss": 0.1253, "step": 1235 }, { "epoch": 0.34, "grad_norm": 2.670848846435547, "learning_rate": 1.5432776439538912e-06, "loss": 0.1295, "step": 1236 }, { "epoch": 0.34, "grad_norm": 3.3088035583496094, "learning_rate": 1.5425344640390653e-06, "loss": 0.1369, "step": 1237 }, { "epoch": 0.34, "grad_norm": 2.6741421222686768, "learning_rate": 1.5417908592402734e-06, "loss": 0.12, "step": 1238 }, { "epoch": 0.34, "grad_norm": 3.0382497310638428, "learning_rate": 1.5410468301398663e-06, "loss": 0.1408, "step": 1239 }, { "epoch": 0.34, "grad_norm": 2.7590854167938232, "learning_rate": 1.5403023773205284e-06, "loss": 0.1349, "step": 1240 }, { "epoch": 0.34, "grad_norm": 2.749650001525879, "learning_rate": 1.5395575013652753e-06, "loss": 0.1329, "step": 1241 }, { "epoch": 0.34, "grad_norm": 2.8700966835021973, "learning_rate": 1.5388122028574538e-06, "loss": 0.1402, "step": 1242 }, { "epoch": 0.34, "grad_norm": 2.932111978530884, "learning_rate": 1.5380664823807416e-06, "loss": 0.128, "step": 1243 }, { "epoch": 0.34, "grad_norm": 3.245429515838623, "learning_rate": 1.5373203405191477e-06, "loss": 0.1282, "step": 1244 }, { "epoch": 0.34, "grad_norm": 2.9517509937286377, "learning_rate": 1.53657377785701e-06, "loss": 0.1332, "step": 1245 }, { "epoch": 0.34, "grad_norm": 2.945868492126465, "learning_rate": 1.5358267949789964e-06, "loss": 0.1345, "step": 1246 }, { "epoch": 0.34, "grad_norm": 2.7037761211395264, "learning_rate": 1.5350793924701045e-06, "loss": 0.1319, "step": 1247 }, { "epoch": 0.34, "grad_norm": 3.136314630508423, "learning_rate": 1.5343315709156594e-06, "loss": 0.1516, "step": 1248 }, { "epoch": 0.34, "grad_norm": 2.9882936477661133, "learning_rate": 1.533583330901315e-06, "loss": 0.1215, "step": 1249 }, { "epoch": 0.34, "grad_norm": 3.243441104888916, "learning_rate": 1.532834673013053e-06, "loss": 0.1336, "step": 1250 }, { "epoch": 0.34, "grad_norm": 3.034088134765625, "learning_rate": 1.5320855978371818e-06, "loss": 0.1412, "step": 1251 }, { "epoch": 0.34, "grad_norm": 2.9239449501037598, "learning_rate": 1.531336105960338e-06, "loss": 0.124, "step": 1252 }, { "epoch": 0.34, "grad_norm": 2.957061290740967, "learning_rate": 1.5305861979694826e-06, "loss": 0.1381, "step": 1253 }, { "epoch": 0.34, "grad_norm": 2.8607163429260254, "learning_rate": 1.5298358744519036e-06, "loss": 0.1175, "step": 1254 }, { "epoch": 0.34, "grad_norm": 2.9602956771850586, "learning_rate": 1.5290851359952144e-06, "loss": 0.1445, "step": 1255 }, { "epoch": 0.34, "grad_norm": 3.1619811058044434, "learning_rate": 1.5283339831873529e-06, "loss": 0.1551, "step": 1256 }, { "epoch": 0.34, "grad_norm": 2.7596523761749268, "learning_rate": 1.5275824166165823e-06, "loss": 0.1187, "step": 1257 }, { "epoch": 0.34, "grad_norm": 2.7872233390808105, "learning_rate": 1.5268304368714891e-06, "loss": 0.1342, "step": 1258 }, { "epoch": 0.34, "grad_norm": 3.116015911102295, "learning_rate": 1.5260780445409833e-06, "loss": 0.1358, "step": 1259 }, { "epoch": 0.34, "grad_norm": 3.3103036880493164, "learning_rate": 1.5253252402142986e-06, "loss": 0.1591, "step": 1260 }, { "epoch": 0.34, "grad_norm": 2.861786127090454, "learning_rate": 1.5245720244809914e-06, "loss": 0.1184, "step": 1261 }, { "epoch": 0.34, "grad_norm": 2.9362566471099854, "learning_rate": 1.5238183979309397e-06, "loss": 0.1436, "step": 1262 }, { "epoch": 0.35, "grad_norm": 2.962371349334717, "learning_rate": 1.523064361154343e-06, "loss": 0.1398, "step": 1263 }, { "epoch": 0.35, "grad_norm": 2.906949996948242, "learning_rate": 1.5223099147417226e-06, "loss": 0.1313, "step": 1264 }, { "epoch": 0.35, "grad_norm": 2.570661783218384, "learning_rate": 1.5215550592839217e-06, "loss": 0.1268, "step": 1265 }, { "epoch": 0.35, "grad_norm": 3.0509450435638428, "learning_rate": 1.5207997953721017e-06, "loss": 0.1342, "step": 1266 }, { "epoch": 0.35, "grad_norm": 2.721755027770996, "learning_rate": 1.5200441235977454e-06, "loss": 0.1323, "step": 1267 }, { "epoch": 0.35, "grad_norm": 3.1234641075134277, "learning_rate": 1.5192880445526537e-06, "loss": 0.1385, "step": 1268 }, { "epoch": 0.35, "grad_norm": 2.9297051429748535, "learning_rate": 1.5185315588289478e-06, "loss": 0.1339, "step": 1269 }, { "epoch": 0.35, "grad_norm": 2.916425943374634, "learning_rate": 1.5177746670190671e-06, "loss": 0.1321, "step": 1270 }, { "epoch": 0.35, "grad_norm": 2.894190788269043, "learning_rate": 1.5170173697157687e-06, "loss": 0.133, "step": 1271 }, { "epoch": 0.35, "grad_norm": 2.728078842163086, "learning_rate": 1.516259667512127e-06, "loss": 0.1322, "step": 1272 }, { "epoch": 0.35, "grad_norm": 2.996042013168335, "learning_rate": 1.515501561001534e-06, "loss": 0.1413, "step": 1273 }, { "epoch": 0.35, "grad_norm": 2.8164424896240234, "learning_rate": 1.5147430507776978e-06, "loss": 0.1314, "step": 1274 }, { "epoch": 0.35, "grad_norm": 2.785353899002075, "learning_rate": 1.5139841374346437e-06, "loss": 0.1167, "step": 1275 }, { "epoch": 0.35, "grad_norm": 3.056356191635132, "learning_rate": 1.5132248215667115e-06, "loss": 0.1388, "step": 1276 }, { "epoch": 0.35, "grad_norm": 2.864875316619873, "learning_rate": 1.512465103768557e-06, "loss": 0.1264, "step": 1277 }, { "epoch": 0.35, "grad_norm": 2.9353301525115967, "learning_rate": 1.5117049846351508e-06, "loss": 0.1321, "step": 1278 }, { "epoch": 0.35, "grad_norm": 2.9219601154327393, "learning_rate": 1.510944464761777e-06, "loss": 0.131, "step": 1279 }, { "epoch": 0.35, "grad_norm": 2.9697318077087402, "learning_rate": 1.5101835447440344e-06, "loss": 0.119, "step": 1280 }, { "epoch": 0.35, "grad_norm": 3.0618062019348145, "learning_rate": 1.5094222251778343e-06, "loss": 0.1382, "step": 1281 }, { "epoch": 0.35, "grad_norm": 3.128476142883301, "learning_rate": 1.5086605066594024e-06, "loss": 0.16, "step": 1282 }, { "epoch": 0.35, "grad_norm": 2.7182974815368652, "learning_rate": 1.5078983897852753e-06, "loss": 0.1217, "step": 1283 }, { "epoch": 0.35, "grad_norm": 3.076162338256836, "learning_rate": 1.507135875152302e-06, "loss": 0.1355, "step": 1284 }, { "epoch": 0.35, "grad_norm": 2.9355263710021973, "learning_rate": 1.506372963357644e-06, "loss": 0.1468, "step": 1285 }, { "epoch": 0.35, "grad_norm": 2.685256242752075, "learning_rate": 1.5056096549987718e-06, "loss": 0.1282, "step": 1286 }, { "epoch": 0.35, "grad_norm": 2.9893975257873535, "learning_rate": 1.5048459506734687e-06, "loss": 0.1212, "step": 1287 }, { "epoch": 0.35, "grad_norm": 2.740032196044922, "learning_rate": 1.5040818509798263e-06, "loss": 0.1323, "step": 1288 }, { "epoch": 0.35, "grad_norm": 3.0219709873199463, "learning_rate": 1.5033173565162472e-06, "loss": 0.1366, "step": 1289 }, { "epoch": 0.35, "grad_norm": 3.0304818153381348, "learning_rate": 1.5025524678814425e-06, "loss": 0.1386, "step": 1290 }, { "epoch": 0.35, "grad_norm": 2.810936212539673, "learning_rate": 1.5017871856744315e-06, "loss": 0.1259, "step": 1291 }, { "epoch": 0.35, "grad_norm": 2.741853713989258, "learning_rate": 1.501021510494543e-06, "loss": 0.1293, "step": 1292 }, { "epoch": 0.35, "grad_norm": 3.019928455352783, "learning_rate": 1.5002554429414123e-06, "loss": 0.1341, "step": 1293 }, { "epoch": 0.35, "grad_norm": 3.0014054775238037, "learning_rate": 1.4994889836149827e-06, "loss": 0.142, "step": 1294 }, { "epoch": 0.35, "grad_norm": 3.092749834060669, "learning_rate": 1.4987221331155042e-06, "loss": 0.1576, "step": 1295 }, { "epoch": 0.35, "grad_norm": 2.9108452796936035, "learning_rate": 1.4979548920435332e-06, "loss": 0.1313, "step": 1296 }, { "epoch": 0.35, "grad_norm": 2.6839687824249268, "learning_rate": 1.4971872609999315e-06, "loss": 0.1292, "step": 1297 }, { "epoch": 0.35, "grad_norm": 3.0319478511810303, "learning_rate": 1.496419240585867e-06, "loss": 0.148, "step": 1298 }, { "epoch": 0.35, "grad_norm": 2.8133440017700195, "learning_rate": 1.4956508314028118e-06, "loss": 0.1273, "step": 1299 }, { "epoch": 0.36, "grad_norm": 2.979665517807007, "learning_rate": 1.4948820340525437e-06, "loss": 0.1349, "step": 1300 }, { "epoch": 0.36, "grad_norm": 2.826272487640381, "learning_rate": 1.4941128491371426e-06, "loss": 0.1206, "step": 1301 }, { "epoch": 0.36, "grad_norm": 2.9452009201049805, "learning_rate": 1.4933432772589936e-06, "loss": 0.1387, "step": 1302 }, { "epoch": 0.36, "grad_norm": 2.8416945934295654, "learning_rate": 1.4925733190207839e-06, "loss": 0.1481, "step": 1303 }, { "epoch": 0.36, "grad_norm": 2.7696831226348877, "learning_rate": 1.4918029750255039e-06, "loss": 0.1186, "step": 1304 }, { "epoch": 0.36, "grad_norm": 3.0068044662475586, "learning_rate": 1.491032245876446e-06, "loss": 0.1231, "step": 1305 }, { "epoch": 0.36, "grad_norm": 3.0028553009033203, "learning_rate": 1.490261132177203e-06, "loss": 0.127, "step": 1306 }, { "epoch": 0.36, "grad_norm": 2.9873032569885254, "learning_rate": 1.4894896345316713e-06, "loss": 0.136, "step": 1307 }, { "epoch": 0.36, "grad_norm": 2.8812692165374756, "learning_rate": 1.4887177535440456e-06, "loss": 0.1322, "step": 1308 }, { "epoch": 0.36, "grad_norm": 3.014873743057251, "learning_rate": 1.4879454898188222e-06, "loss": 0.1282, "step": 1309 }, { "epoch": 0.36, "grad_norm": 3.1590218544006348, "learning_rate": 1.4871728439607964e-06, "loss": 0.1455, "step": 1310 }, { "epoch": 0.36, "grad_norm": 2.8542122840881348, "learning_rate": 1.4863998165750636e-06, "loss": 0.1448, "step": 1311 }, { "epoch": 0.36, "grad_norm": 2.68994140625, "learning_rate": 1.4856264082670169e-06, "loss": 0.127, "step": 1312 }, { "epoch": 0.36, "grad_norm": 3.079030752182007, "learning_rate": 1.484852619642349e-06, "loss": 0.1415, "step": 1313 }, { "epoch": 0.36, "grad_norm": 2.891287088394165, "learning_rate": 1.484078451307049e-06, "loss": 0.1374, "step": 1314 }, { "epoch": 0.36, "grad_norm": 3.1313259601593018, "learning_rate": 1.4833039038674046e-06, "loss": 0.1287, "step": 1315 }, { "epoch": 0.36, "grad_norm": 2.799778938293457, "learning_rate": 1.4825289779299998e-06, "loss": 0.1307, "step": 1316 }, { "epoch": 0.36, "grad_norm": 2.9091029167175293, "learning_rate": 1.4817536741017151e-06, "loss": 0.1322, "step": 1317 }, { "epoch": 0.36, "grad_norm": 2.757341146469116, "learning_rate": 1.4809779929897272e-06, "loss": 0.1218, "step": 1318 }, { "epoch": 0.36, "grad_norm": 3.112070083618164, "learning_rate": 1.4802019352015078e-06, "loss": 0.1241, "step": 1319 }, { "epoch": 0.36, "grad_norm": 2.776374578475952, "learning_rate": 1.479425501344824e-06, "loss": 0.1369, "step": 1320 }, { "epoch": 0.36, "grad_norm": 2.764132499694824, "learning_rate": 1.478648692027737e-06, "loss": 0.1197, "step": 1321 }, { "epoch": 0.36, "grad_norm": 2.757923126220703, "learning_rate": 1.477871507858602e-06, "loss": 0.1193, "step": 1322 }, { "epoch": 0.36, "grad_norm": 3.072037696838379, "learning_rate": 1.4770939494460696e-06, "loss": 0.1236, "step": 1323 }, { "epoch": 0.36, "grad_norm": 2.9252185821533203, "learning_rate": 1.4763160173990801e-06, "loss": 0.1221, "step": 1324 }, { "epoch": 0.36, "grad_norm": 3.2856593132019043, "learning_rate": 1.475537712326869e-06, "loss": 0.1436, "step": 1325 }, { "epoch": 0.36, "grad_norm": 3.1054296493530273, "learning_rate": 1.4747590348389638e-06, "loss": 0.1369, "step": 1326 }, { "epoch": 0.36, "grad_norm": 2.757472276687622, "learning_rate": 1.4739799855451819e-06, "loss": 0.1284, "step": 1327 }, { "epoch": 0.36, "grad_norm": 2.970815896987915, "learning_rate": 1.473200565055634e-06, "loss": 0.1452, "step": 1328 }, { "epoch": 0.36, "grad_norm": 2.9534873962402344, "learning_rate": 1.4724207739807199e-06, "loss": 0.1456, "step": 1329 }, { "epoch": 0.36, "grad_norm": 3.152365207672119, "learning_rate": 1.4716406129311306e-06, "loss": 0.1288, "step": 1330 }, { "epoch": 0.36, "grad_norm": 2.8408286571502686, "learning_rate": 1.4708600825178463e-06, "loss": 0.1315, "step": 1331 }, { "epoch": 0.36, "grad_norm": 2.619940996170044, "learning_rate": 1.4700791833521365e-06, "loss": 0.1284, "step": 1332 }, { "epoch": 0.36, "grad_norm": 2.636654853820801, "learning_rate": 1.4692979160455603e-06, "loss": 0.1132, "step": 1333 }, { "epoch": 0.36, "grad_norm": 2.939162015914917, "learning_rate": 1.4685162812099637e-06, "loss": 0.1359, "step": 1334 }, { "epoch": 0.36, "grad_norm": 2.7760133743286133, "learning_rate": 1.4677342794574815e-06, "loss": 0.1246, "step": 1335 }, { "epoch": 0.36, "grad_norm": 2.7537975311279297, "learning_rate": 1.4669519114005365e-06, "loss": 0.132, "step": 1336 }, { "epoch": 0.37, "grad_norm": 2.7049872875213623, "learning_rate": 1.4661691776518358e-06, "loss": 0.1351, "step": 1337 }, { "epoch": 0.37, "grad_norm": 2.9597678184509277, "learning_rate": 1.4653860788243764e-06, "loss": 0.1461, "step": 1338 }, { "epoch": 0.37, "grad_norm": 3.550565004348755, "learning_rate": 1.4646026155314382e-06, "loss": 0.1254, "step": 1339 }, { "epoch": 0.37, "grad_norm": 2.9513895511627197, "learning_rate": 1.463818788386588e-06, "loss": 0.1199, "step": 1340 }, { "epoch": 0.37, "grad_norm": 2.597242593765259, "learning_rate": 1.4630345980036773e-06, "loss": 0.1265, "step": 1341 }, { "epoch": 0.37, "grad_norm": 2.9622340202331543, "learning_rate": 1.4622500449968424e-06, "loss": 0.1487, "step": 1342 }, { "epoch": 0.37, "grad_norm": 2.835066795349121, "learning_rate": 1.461465129980503e-06, "loss": 0.1357, "step": 1343 }, { "epoch": 0.37, "grad_norm": 2.7730233669281006, "learning_rate": 1.4606798535693625e-06, "loss": 0.1332, "step": 1344 }, { "epoch": 0.37, "grad_norm": 3.090608596801758, "learning_rate": 1.459894216378407e-06, "loss": 0.1248, "step": 1345 }, { "epoch": 0.37, "grad_norm": 2.9221718311309814, "learning_rate": 1.4591082190229065e-06, "loss": 0.1263, "step": 1346 }, { "epoch": 0.37, "grad_norm": 2.7651219367980957, "learning_rate": 1.458321862118411e-06, "loss": 0.1321, "step": 1347 }, { "epoch": 0.37, "grad_norm": 2.8736658096313477, "learning_rate": 1.4575351462807542e-06, "loss": 0.1211, "step": 1348 }, { "epoch": 0.37, "grad_norm": 2.7251627445220947, "learning_rate": 1.4567480721260487e-06, "loss": 0.1309, "step": 1349 }, { "epoch": 0.37, "grad_norm": 2.879901647567749, "learning_rate": 1.4559606402706898e-06, "loss": 0.138, "step": 1350 }, { "epoch": 0.37, "grad_norm": 2.9609620571136475, "learning_rate": 1.4551728513313514e-06, "loss": 0.1315, "step": 1351 }, { "epoch": 0.37, "grad_norm": 2.6929612159729004, "learning_rate": 1.4543847059249882e-06, "loss": 0.1304, "step": 1352 }, { "epoch": 0.37, "grad_norm": 2.9134647846221924, "learning_rate": 1.4535962046688332e-06, "loss": 0.1422, "step": 1353 }, { "epoch": 0.37, "grad_norm": 3.054995059967041, "learning_rate": 1.4528073481803984e-06, "loss": 0.1358, "step": 1354 }, { "epoch": 0.37, "grad_norm": 2.8526723384857178, "learning_rate": 1.452018137077474e-06, "loss": 0.132, "step": 1355 }, { "epoch": 0.37, "grad_norm": 2.7417056560516357, "learning_rate": 1.4512285719781278e-06, "loss": 0.1258, "step": 1356 }, { "epoch": 0.37, "grad_norm": 3.0152394771575928, "learning_rate": 1.4504386535007054e-06, "loss": 0.1325, "step": 1357 }, { "epoch": 0.37, "grad_norm": 2.9597837924957275, "learning_rate": 1.4496483822638283e-06, "loss": 0.1428, "step": 1358 }, { "epoch": 0.37, "grad_norm": 2.6742889881134033, "learning_rate": 1.4488577588863947e-06, "loss": 0.1235, "step": 1359 }, { "epoch": 0.37, "grad_norm": 2.8367764949798584, "learning_rate": 1.4480667839875784e-06, "loss": 0.1384, "step": 1360 }, { "epoch": 0.37, "grad_norm": 3.017707586288452, "learning_rate": 1.447275458186829e-06, "loss": 0.1345, "step": 1361 }, { "epoch": 0.37, "grad_norm": 2.8236801624298096, "learning_rate": 1.4464837821038702e-06, "loss": 0.1328, "step": 1362 }, { "epoch": 0.37, "grad_norm": 2.7663307189941406, "learning_rate": 1.4456917563587006e-06, "loss": 0.1258, "step": 1363 }, { "epoch": 0.37, "grad_norm": 2.5681021213531494, "learning_rate": 1.444899381571592e-06, "loss": 0.1166, "step": 1364 }, { "epoch": 0.37, "grad_norm": 3.04805588722229, "learning_rate": 1.4441066583630903e-06, "loss": 0.1209, "step": 1365 }, { "epoch": 0.37, "grad_norm": 3.0715489387512207, "learning_rate": 1.4433135873540139e-06, "loss": 0.1524, "step": 1366 }, { "epoch": 0.37, "grad_norm": 3.092496871948242, "learning_rate": 1.4425201691654534e-06, "loss": 0.1462, "step": 1367 }, { "epoch": 0.37, "grad_norm": 2.8307652473449707, "learning_rate": 1.4417264044187718e-06, "loss": 0.1315, "step": 1368 }, { "epoch": 0.37, "grad_norm": 2.9191513061523438, "learning_rate": 1.4409322937356026e-06, "loss": 0.1332, "step": 1369 }, { "epoch": 0.37, "grad_norm": 2.7125518321990967, "learning_rate": 1.440137837737851e-06, "loss": 0.1276, "step": 1370 }, { "epoch": 0.37, "grad_norm": 3.0837535858154297, "learning_rate": 1.4393430370476931e-06, "loss": 0.1375, "step": 1371 }, { "epoch": 0.37, "grad_norm": 3.0928070545196533, "learning_rate": 1.4385478922875734e-06, "loss": 0.139, "step": 1372 }, { "epoch": 0.38, "grad_norm": 2.916564464569092, "learning_rate": 1.4377524040802072e-06, "loss": 0.1268, "step": 1373 }, { "epoch": 0.38, "grad_norm": 3.063411235809326, "learning_rate": 1.4369565730485785e-06, "loss": 0.1293, "step": 1374 }, { "epoch": 0.38, "grad_norm": 2.8868045806884766, "learning_rate": 1.4361603998159387e-06, "loss": 0.1239, "step": 1375 }, { "epoch": 0.38, "grad_norm": 3.281874179840088, "learning_rate": 1.4353638850058092e-06, "loss": 0.1504, "step": 1376 }, { "epoch": 0.38, "grad_norm": 2.732192039489746, "learning_rate": 1.434567029241977e-06, "loss": 0.1331, "step": 1377 }, { "epoch": 0.38, "grad_norm": 2.7928121089935303, "learning_rate": 1.433769833148497e-06, "loss": 0.1115, "step": 1378 }, { "epoch": 0.38, "grad_norm": 3.060171604156494, "learning_rate": 1.4329722973496908e-06, "loss": 0.1312, "step": 1379 }, { "epoch": 0.38, "grad_norm": 3.192661762237549, "learning_rate": 1.4321744224701458e-06, "loss": 0.145, "step": 1380 }, { "epoch": 0.38, "grad_norm": 2.8441617488861084, "learning_rate": 1.4313762091347148e-06, "loss": 0.1391, "step": 1381 }, { "epoch": 0.38, "grad_norm": 2.771820545196533, "learning_rate": 1.4305776579685155e-06, "loss": 0.1377, "step": 1382 }, { "epoch": 0.38, "grad_norm": 2.9881622791290283, "learning_rate": 1.4297787695969308e-06, "loss": 0.1382, "step": 1383 }, { "epoch": 0.38, "grad_norm": 2.8311970233917236, "learning_rate": 1.4289795446456074e-06, "loss": 0.1364, "step": 1384 }, { "epoch": 0.38, "grad_norm": 2.8374383449554443, "learning_rate": 1.428179983740455e-06, "loss": 0.137, "step": 1385 }, { "epoch": 0.38, "grad_norm": 2.8031299114227295, "learning_rate": 1.4273800875076478e-06, "loss": 0.1374, "step": 1386 }, { "epoch": 0.38, "grad_norm": 2.781954526901245, "learning_rate": 1.4265798565736209e-06, "loss": 0.1407, "step": 1387 }, { "epoch": 0.38, "grad_norm": 2.935701370239258, "learning_rate": 1.4257792915650725e-06, "loss": 0.1431, "step": 1388 }, { "epoch": 0.38, "grad_norm": 2.691863775253296, "learning_rate": 1.424978393108963e-06, "loss": 0.1233, "step": 1389 }, { "epoch": 0.38, "grad_norm": 3.0290353298187256, "learning_rate": 1.424177161832512e-06, "loss": 0.142, "step": 1390 }, { "epoch": 0.38, "grad_norm": 2.838080406188965, "learning_rate": 1.423375598363202e-06, "loss": 0.1301, "step": 1391 }, { "epoch": 0.38, "grad_norm": 2.8826422691345215, "learning_rate": 1.422573703328774e-06, "loss": 0.1289, "step": 1392 }, { "epoch": 0.38, "grad_norm": 3.0484912395477295, "learning_rate": 1.42177147735723e-06, "loss": 0.1292, "step": 1393 }, { "epoch": 0.38, "grad_norm": 3.0856308937072754, "learning_rate": 1.42096892107683e-06, "loss": 0.1358, "step": 1394 }, { "epoch": 0.38, "grad_norm": 2.663012742996216, "learning_rate": 1.4201660351160928e-06, "loss": 0.1213, "step": 1395 }, { "epoch": 0.38, "grad_norm": 2.953725814819336, "learning_rate": 1.4193628201037964e-06, "loss": 0.1262, "step": 1396 }, { "epoch": 0.38, "grad_norm": 3.0396006107330322, "learning_rate": 1.4185592766689751e-06, "loss": 0.1444, "step": 1397 }, { "epoch": 0.38, "grad_norm": 3.0202651023864746, "learning_rate": 1.4177554054409219e-06, "loss": 0.141, "step": 1398 }, { "epoch": 0.38, "grad_norm": 2.6195216178894043, "learning_rate": 1.4169512070491852e-06, "loss": 0.124, "step": 1399 }, { "epoch": 0.38, "grad_norm": 3.101203680038452, "learning_rate": 1.4161466821235703e-06, "loss": 0.1425, "step": 1400 }, { "epoch": 0.38, "grad_norm": 2.9621312618255615, "learning_rate": 1.4153418312941386e-06, "loss": 0.1407, "step": 1401 }, { "epoch": 0.38, "grad_norm": 3.1151602268218994, "learning_rate": 1.4145366551912052e-06, "loss": 0.1453, "step": 1402 }, { "epoch": 0.38, "grad_norm": 3.0556440353393555, "learning_rate": 1.4137311544453416e-06, "loss": 0.1287, "step": 1403 }, { "epoch": 0.38, "grad_norm": 2.853315830230713, "learning_rate": 1.4129253296873727e-06, "loss": 0.1268, "step": 1404 }, { "epoch": 0.38, "grad_norm": 3.153733968734741, "learning_rate": 1.4121191815483774e-06, "loss": 0.1389, "step": 1405 }, { "epoch": 0.38, "grad_norm": 3.0757758617401123, "learning_rate": 1.411312710659688e-06, "loss": 0.1498, "step": 1406 }, { "epoch": 0.38, "grad_norm": 3.0497725009918213, "learning_rate": 1.410505917652889e-06, "loss": 0.1516, "step": 1407 }, { "epoch": 0.38, "grad_norm": 2.795180082321167, "learning_rate": 1.4096988031598178e-06, "loss": 0.1285, "step": 1408 }, { "epoch": 0.38, "grad_norm": 2.8632426261901855, "learning_rate": 1.4088913678125628e-06, "loss": 0.1316, "step": 1409 }, { "epoch": 0.39, "grad_norm": 2.789442539215088, "learning_rate": 1.4080836122434648e-06, "loss": 0.1299, "step": 1410 }, { "epoch": 0.39, "grad_norm": 2.9751696586608887, "learning_rate": 1.4072755370851147e-06, "loss": 0.1414, "step": 1411 }, { "epoch": 0.39, "grad_norm": 2.9262402057647705, "learning_rate": 1.406467142970353e-06, "loss": 0.1327, "step": 1412 }, { "epoch": 0.39, "grad_norm": 2.9993863105773926, "learning_rate": 1.4056584305322714e-06, "loss": 0.1201, "step": 1413 }, { "epoch": 0.39, "grad_norm": 2.710305690765381, "learning_rate": 1.4048494004042102e-06, "loss": 0.1314, "step": 1414 }, { "epoch": 0.39, "grad_norm": 2.9878225326538086, "learning_rate": 1.404040053219758e-06, "loss": 0.128, "step": 1415 }, { "epoch": 0.39, "grad_norm": 2.8981685638427734, "learning_rate": 1.403230389612753e-06, "loss": 0.1177, "step": 1416 }, { "epoch": 0.39, "grad_norm": 2.8175344467163086, "learning_rate": 1.4024204102172797e-06, "loss": 0.1441, "step": 1417 }, { "epoch": 0.39, "grad_norm": 2.7358124256134033, "learning_rate": 1.401610115667671e-06, "loss": 0.1288, "step": 1418 }, { "epoch": 0.39, "grad_norm": 2.9867782592773438, "learning_rate": 1.400799506598506e-06, "loss": 0.1303, "step": 1419 }, { "epoch": 0.39, "grad_norm": 3.090707778930664, "learning_rate": 1.3999885836446104e-06, "loss": 0.1429, "step": 1420 }, { "epoch": 0.39, "grad_norm": 2.998790740966797, "learning_rate": 1.399177347441056e-06, "loss": 0.1298, "step": 1421 }, { "epoch": 0.39, "grad_norm": 2.760016679763794, "learning_rate": 1.3983657986231596e-06, "loss": 0.1381, "step": 1422 }, { "epoch": 0.39, "grad_norm": 2.9793732166290283, "learning_rate": 1.3975539378264823e-06, "loss": 0.1343, "step": 1423 }, { "epoch": 0.39, "grad_norm": 2.8108506202697754, "learning_rate": 1.3967417656868301e-06, "loss": 0.1386, "step": 1424 }, { "epoch": 0.39, "grad_norm": 2.9230880737304688, "learning_rate": 1.395929282840253e-06, "loss": 0.1454, "step": 1425 }, { "epoch": 0.39, "grad_norm": 2.837275981903076, "learning_rate": 1.3951164899230446e-06, "loss": 0.1343, "step": 1426 }, { "epoch": 0.39, "grad_norm": 2.712369680404663, "learning_rate": 1.3943033875717403e-06, "loss": 0.1331, "step": 1427 }, { "epoch": 0.39, "grad_norm": 2.855681896209717, "learning_rate": 1.3934899764231177e-06, "loss": 0.1184, "step": 1428 }, { "epoch": 0.39, "grad_norm": 2.837350368499756, "learning_rate": 1.392676257114198e-06, "loss": 0.1389, "step": 1429 }, { "epoch": 0.39, "grad_norm": 2.9050211906433105, "learning_rate": 1.3918622302822423e-06, "loss": 0.132, "step": 1430 }, { "epoch": 0.39, "grad_norm": 2.8807296752929688, "learning_rate": 1.3910478965647524e-06, "loss": 0.1399, "step": 1431 }, { "epoch": 0.39, "grad_norm": 2.598497152328491, "learning_rate": 1.3902332565994719e-06, "loss": 0.1257, "step": 1432 }, { "epoch": 0.39, "grad_norm": 2.9906957149505615, "learning_rate": 1.3894183110243819e-06, "loss": 0.1305, "step": 1433 }, { "epoch": 0.39, "grad_norm": 2.8242135047912598, "learning_rate": 1.3886030604777052e-06, "loss": 0.1277, "step": 1434 }, { "epoch": 0.39, "grad_norm": 2.646484851837158, "learning_rate": 1.387787505597902e-06, "loss": 0.1137, "step": 1435 }, { "epoch": 0.39, "grad_norm": 2.8431029319763184, "learning_rate": 1.3869716470236714e-06, "loss": 0.1386, "step": 1436 }, { "epoch": 0.39, "grad_norm": 3.0383403301239014, "learning_rate": 1.3861554853939503e-06, "loss": 0.1364, "step": 1437 }, { "epoch": 0.39, "grad_norm": 3.039416551589966, "learning_rate": 1.385339021347912e-06, "loss": 0.1301, "step": 1438 }, { "epoch": 0.39, "grad_norm": 2.651421308517456, "learning_rate": 1.384522255524969e-06, "loss": 0.1134, "step": 1439 }, { "epoch": 0.39, "grad_norm": 2.703716278076172, "learning_rate": 1.383705188564767e-06, "loss": 0.1272, "step": 1440 }, { "epoch": 0.39, "grad_norm": 2.7087182998657227, "learning_rate": 1.3828878211071902e-06, "loss": 0.1262, "step": 1441 }, { "epoch": 0.39, "grad_norm": 3.084522008895874, "learning_rate": 1.3820701537923567e-06, "loss": 0.1377, "step": 1442 }, { "epoch": 0.39, "grad_norm": 3.0757529735565186, "learning_rate": 1.3812521872606192e-06, "loss": 0.1368, "step": 1443 }, { "epoch": 0.39, "grad_norm": 3.411841869354248, "learning_rate": 1.3804339221525667e-06, "loss": 0.1441, "step": 1444 }, { "epoch": 0.39, "grad_norm": 2.864745616912842, "learning_rate": 1.3796153591090193e-06, "loss": 0.1391, "step": 1445 }, { "epoch": 0.4, "grad_norm": 3.1198856830596924, "learning_rate": 1.3787964987710325e-06, "loss": 0.1379, "step": 1446 }, { "epoch": 0.4, "grad_norm": 2.7072486877441406, "learning_rate": 1.3779773417798942e-06, "loss": 0.1187, "step": 1447 }, { "epoch": 0.4, "grad_norm": 2.6911025047302246, "learning_rate": 1.3771578887771231e-06, "loss": 0.1217, "step": 1448 }, { "epoch": 0.4, "grad_norm": 2.9282870292663574, "learning_rate": 1.3763381404044723e-06, "loss": 0.1371, "step": 1449 }, { "epoch": 0.4, "grad_norm": 2.673795461654663, "learning_rate": 1.375518097303924e-06, "loss": 0.1298, "step": 1450 }, { "epoch": 0.4, "grad_norm": 3.007877826690674, "learning_rate": 1.3746977601176925e-06, "loss": 0.1257, "step": 1451 }, { "epoch": 0.4, "grad_norm": 2.7329001426696777, "learning_rate": 1.3738771294882222e-06, "loss": 0.1255, "step": 1452 }, { "epoch": 0.4, "grad_norm": 3.172227621078491, "learning_rate": 1.373056206058186e-06, "loss": 0.1372, "step": 1453 }, { "epoch": 0.4, "grad_norm": 2.843055009841919, "learning_rate": 1.372234990470489e-06, "loss": 0.139, "step": 1454 }, { "epoch": 0.4, "grad_norm": 2.7065436840057373, "learning_rate": 1.3714134833682616e-06, "loss": 0.1245, "step": 1455 }, { "epoch": 0.4, "grad_norm": 3.0000193119049072, "learning_rate": 1.3705916853948652e-06, "loss": 0.1405, "step": 1456 }, { "epoch": 0.4, "grad_norm": 2.6848971843719482, "learning_rate": 1.3697695971938875e-06, "loss": 0.1198, "step": 1457 }, { "epoch": 0.4, "grad_norm": 2.851579189300537, "learning_rate": 1.3689472194091442e-06, "loss": 0.1305, "step": 1458 }, { "epoch": 0.4, "grad_norm": 3.1303484439849854, "learning_rate": 1.3681245526846781e-06, "loss": 0.1533, "step": 1459 }, { "epoch": 0.4, "grad_norm": 2.7993600368499756, "learning_rate": 1.3673015976647567e-06, "loss": 0.1332, "step": 1460 }, { "epoch": 0.4, "grad_norm": 3.001685380935669, "learning_rate": 1.3664783549938752e-06, "loss": 0.1393, "step": 1461 }, { "epoch": 0.4, "grad_norm": 2.8103721141815186, "learning_rate": 1.3656548253167529e-06, "loss": 0.1439, "step": 1462 }, { "epoch": 0.4, "grad_norm": 2.807375907897949, "learning_rate": 1.3648310092783342e-06, "loss": 0.1367, "step": 1463 }, { "epoch": 0.4, "grad_norm": 2.8973469734191895, "learning_rate": 1.364006907523788e-06, "loss": 0.1412, "step": 1464 }, { "epoch": 0.4, "grad_norm": 3.0541858673095703, "learning_rate": 1.3631825206985062e-06, "loss": 0.1372, "step": 1465 }, { "epoch": 0.4, "grad_norm": 3.022650957107544, "learning_rate": 1.3623578494481045e-06, "loss": 0.1332, "step": 1466 }, { "epoch": 0.4, "grad_norm": 2.6765482425689697, "learning_rate": 1.3615328944184219e-06, "loss": 0.122, "step": 1467 }, { "epoch": 0.4, "grad_norm": 2.8813273906707764, "learning_rate": 1.3607076562555185e-06, "loss": 0.1403, "step": 1468 }, { "epoch": 0.4, "grad_norm": 2.8541016578674316, "learning_rate": 1.3598821356056766e-06, "loss": 0.1278, "step": 1469 }, { "epoch": 0.4, "grad_norm": 2.8218047618865967, "learning_rate": 1.3590563331154005e-06, "loss": 0.1287, "step": 1470 }, { "epoch": 0.4, "grad_norm": 2.771939992904663, "learning_rate": 1.358230249431414e-06, "loss": 0.1186, "step": 1471 }, { "epoch": 0.4, "grad_norm": 2.6950502395629883, "learning_rate": 1.3574038852006618e-06, "loss": 0.1417, "step": 1472 }, { "epoch": 0.4, "grad_norm": 2.8820621967315674, "learning_rate": 1.3565772410703077e-06, "loss": 0.1333, "step": 1473 }, { "epoch": 0.4, "grad_norm": 3.0206656455993652, "learning_rate": 1.3557503176877356e-06, "loss": 0.1288, "step": 1474 }, { "epoch": 0.4, "grad_norm": 2.8983829021453857, "learning_rate": 1.3549231157005482e-06, "loss": 0.1346, "step": 1475 }, { "epoch": 0.4, "grad_norm": 3.0689306259155273, "learning_rate": 1.3540956357565648e-06, "loss": 0.1483, "step": 1476 }, { "epoch": 0.4, "grad_norm": 2.4660563468933105, "learning_rate": 1.3532678785038236e-06, "loss": 0.1109, "step": 1477 }, { "epoch": 0.4, "grad_norm": 3.0170247554779053, "learning_rate": 1.3524398445905802e-06, "loss": 0.1235, "step": 1478 }, { "epoch": 0.4, "grad_norm": 2.884615421295166, "learning_rate": 1.3516115346653063e-06, "loss": 0.1431, "step": 1479 }, { "epoch": 0.4, "grad_norm": 3.03633451461792, "learning_rate": 1.3507829493766903e-06, "loss": 0.1381, "step": 1480 }, { "epoch": 0.4, "grad_norm": 3.0857226848602295, "learning_rate": 1.3499540893736351e-06, "loss": 0.1444, "step": 1481 }, { "epoch": 0.4, "grad_norm": 3.016254186630249, "learning_rate": 1.34912495530526e-06, "loss": 0.1364, "step": 1482 }, { "epoch": 0.41, "grad_norm": 2.948850631713867, "learning_rate": 1.3482955478208983e-06, "loss": 0.1385, "step": 1483 }, { "epoch": 0.41, "grad_norm": 2.800083637237549, "learning_rate": 1.3474658675700976e-06, "loss": 0.1338, "step": 1484 }, { "epoch": 0.41, "grad_norm": 2.733985424041748, "learning_rate": 1.3466359152026195e-06, "loss": 0.127, "step": 1485 }, { "epoch": 0.41, "grad_norm": 2.8768179416656494, "learning_rate": 1.3458056913684372e-06, "loss": 0.1219, "step": 1486 }, { "epoch": 0.41, "grad_norm": 2.8934342861175537, "learning_rate": 1.344975196717739e-06, "loss": 0.1369, "step": 1487 }, { "epoch": 0.41, "grad_norm": 3.561100721359253, "learning_rate": 1.3441444319009226e-06, "loss": 0.122, "step": 1488 }, { "epoch": 0.41, "grad_norm": 2.6823904514312744, "learning_rate": 1.3433133975685994e-06, "loss": 0.1253, "step": 1489 }, { "epoch": 0.41, "grad_norm": 2.866231679916382, "learning_rate": 1.342482094371591e-06, "loss": 0.135, "step": 1490 }, { "epoch": 0.41, "grad_norm": 3.0388472080230713, "learning_rate": 1.3416505229609285e-06, "loss": 0.1488, "step": 1491 }, { "epoch": 0.41, "grad_norm": 2.9190943241119385, "learning_rate": 1.3408186839878556e-06, "loss": 0.1332, "step": 1492 }, { "epoch": 0.41, "grad_norm": 2.707432985305786, "learning_rate": 1.3399865781038233e-06, "loss": 0.1141, "step": 1493 }, { "epoch": 0.41, "grad_norm": 2.6063971519470215, "learning_rate": 1.3391542059604926e-06, "loss": 0.1226, "step": 1494 }, { "epoch": 0.41, "grad_norm": 3.019693374633789, "learning_rate": 1.3383215682097328e-06, "loss": 0.1216, "step": 1495 }, { "epoch": 0.41, "grad_norm": 2.6912519931793213, "learning_rate": 1.337488665503621e-06, "loss": 0.1328, "step": 1496 }, { "epoch": 0.41, "grad_norm": 2.7698280811309814, "learning_rate": 1.3366554984944428e-06, "loss": 0.1277, "step": 1497 }, { "epoch": 0.41, "grad_norm": 2.834601402282715, "learning_rate": 1.335822067834689e-06, "loss": 0.1325, "step": 1498 }, { "epoch": 0.41, "grad_norm": 2.886516809463501, "learning_rate": 1.3349883741770586e-06, "loss": 0.1219, "step": 1499 }, { "epoch": 0.41, "grad_norm": 2.703620195388794, "learning_rate": 1.3341544181744557e-06, "loss": 0.1192, "step": 1500 }, { "epoch": 0.41, "grad_norm": 2.719348430633545, "learning_rate": 1.3333202004799897e-06, "loss": 0.1162, "step": 1501 }, { "epoch": 0.41, "grad_norm": 2.807950258255005, "learning_rate": 1.332485721746976e-06, "loss": 0.1315, "step": 1502 }, { "epoch": 0.41, "grad_norm": 3.306149959564209, "learning_rate": 1.3316509826289331e-06, "loss": 0.1516, "step": 1503 }, { "epoch": 0.41, "grad_norm": 3.049546718597412, "learning_rate": 1.330815983779584e-06, "loss": 0.1318, "step": 1504 }, { "epoch": 0.41, "grad_norm": 2.990818500518799, "learning_rate": 1.3299807258528555e-06, "loss": 0.1396, "step": 1505 }, { "epoch": 0.41, "grad_norm": 2.500312328338623, "learning_rate": 1.3291452095028766e-06, "loss": 0.1095, "step": 1506 }, { "epoch": 0.41, "grad_norm": 3.0452494621276855, "learning_rate": 1.3283094353839792e-06, "loss": 0.1336, "step": 1507 }, { "epoch": 0.41, "grad_norm": 2.7827515602111816, "learning_rate": 1.3274734041506968e-06, "loss": 0.1277, "step": 1508 }, { "epoch": 0.41, "grad_norm": 3.001279830932617, "learning_rate": 1.3266371164577642e-06, "loss": 0.1424, "step": 1509 }, { "epoch": 0.41, "grad_norm": 3.0327725410461426, "learning_rate": 1.3258005729601176e-06, "loss": 0.1428, "step": 1510 }, { "epoch": 0.41, "grad_norm": 2.7685489654541016, "learning_rate": 1.3249637743128926e-06, "loss": 0.1107, "step": 1511 }, { "epoch": 0.41, "grad_norm": 2.811110019683838, "learning_rate": 1.3241267211714255e-06, "loss": 0.1332, "step": 1512 }, { "epoch": 0.41, "grad_norm": 2.7999536991119385, "learning_rate": 1.3232894141912512e-06, "loss": 0.1293, "step": 1513 }, { "epoch": 0.41, "grad_norm": 2.9219651222229004, "learning_rate": 1.322451854028104e-06, "loss": 0.1312, "step": 1514 }, { "epoch": 0.41, "grad_norm": 2.8820810317993164, "learning_rate": 1.3216140413379164e-06, "loss": 0.1518, "step": 1515 }, { "epoch": 0.41, "grad_norm": 2.7859690189361572, "learning_rate": 1.3207759767768177e-06, "loss": 0.1397, "step": 1516 }, { "epoch": 0.41, "grad_norm": 2.8043460845947266, "learning_rate": 1.3199376610011359e-06, "loss": 0.1325, "step": 1517 }, { "epoch": 0.41, "grad_norm": 3.043303966522217, "learning_rate": 1.3190990946673951e-06, "loss": 0.1485, "step": 1518 }, { "epoch": 0.41, "grad_norm": 2.965022325515747, "learning_rate": 1.3182602784323155e-06, "loss": 0.1364, "step": 1519 }, { "epoch": 0.42, "grad_norm": 2.5356366634368896, "learning_rate": 1.317421212952813e-06, "loss": 0.1113, "step": 1520 }, { "epoch": 0.42, "grad_norm": 2.8506321907043457, "learning_rate": 1.3165818988859984e-06, "loss": 0.1414, "step": 1521 }, { "epoch": 0.42, "grad_norm": 2.8930158615112305, "learning_rate": 1.315742336889178e-06, "loss": 0.1405, "step": 1522 }, { "epoch": 0.42, "grad_norm": 3.2710516452789307, "learning_rate": 1.3149025276198522e-06, "loss": 0.1554, "step": 1523 }, { "epoch": 0.42, "grad_norm": 2.6492574214935303, "learning_rate": 1.3140624717357141e-06, "loss": 0.1185, "step": 1524 }, { "epoch": 0.42, "grad_norm": 2.9090943336486816, "learning_rate": 1.3132221698946506e-06, "loss": 0.1411, "step": 1525 }, { "epoch": 0.42, "grad_norm": 3.1961607933044434, "learning_rate": 1.3123816227547413e-06, "loss": 0.1485, "step": 1526 }, { "epoch": 0.42, "grad_norm": 3.009904384613037, "learning_rate": 1.3115408309742577e-06, "loss": 0.1512, "step": 1527 }, { "epoch": 0.42, "grad_norm": 2.8224146366119385, "learning_rate": 1.310699795211663e-06, "loss": 0.1305, "step": 1528 }, { "epoch": 0.42, "grad_norm": 2.929018974304199, "learning_rate": 1.3098585161256112e-06, "loss": 0.1303, "step": 1529 }, { "epoch": 0.42, "grad_norm": 2.6305510997772217, "learning_rate": 1.3090169943749473e-06, "loss": 0.1268, "step": 1530 }, { "epoch": 0.42, "grad_norm": 2.6673898696899414, "learning_rate": 1.308175230618706e-06, "loss": 0.121, "step": 1531 }, { "epoch": 0.42, "grad_norm": 2.835024833679199, "learning_rate": 1.3073332255161119e-06, "loss": 0.1347, "step": 1532 }, { "epoch": 0.42, "grad_norm": 3.1054320335388184, "learning_rate": 1.3064909797265782e-06, "loss": 0.1493, "step": 1533 }, { "epoch": 0.42, "grad_norm": 2.9714009761810303, "learning_rate": 1.3056484939097063e-06, "loss": 0.1341, "step": 1534 }, { "epoch": 0.42, "grad_norm": 2.5670108795166016, "learning_rate": 1.3048057687252865e-06, "loss": 0.1191, "step": 1535 }, { "epoch": 0.42, "grad_norm": 2.666649580001831, "learning_rate": 1.303962804833296e-06, "loss": 0.1176, "step": 1536 }, { "epoch": 0.42, "grad_norm": 2.820812940597534, "learning_rate": 1.303119602893899e-06, "loss": 0.1305, "step": 1537 }, { "epoch": 0.42, "grad_norm": 2.6476902961730957, "learning_rate": 1.3022761635674465e-06, "loss": 0.1249, "step": 1538 }, { "epoch": 0.42, "grad_norm": 2.881699800491333, "learning_rate": 1.3014324875144742e-06, "loss": 0.1276, "step": 1539 }, { "epoch": 0.42, "grad_norm": 3.017333745956421, "learning_rate": 1.3005885753957046e-06, "loss": 0.1471, "step": 1540 }, { "epoch": 0.42, "grad_norm": 2.6423754692077637, "learning_rate": 1.2997444278720445e-06, "loss": 0.1205, "step": 1541 }, { "epoch": 0.42, "grad_norm": 2.902148723602295, "learning_rate": 1.298900045604585e-06, "loss": 0.1288, "step": 1542 }, { "epoch": 0.42, "grad_norm": 2.9265713691711426, "learning_rate": 1.2980554292546015e-06, "loss": 0.1328, "step": 1543 }, { "epoch": 0.42, "grad_norm": 3.1541149616241455, "learning_rate": 1.2972105794835518e-06, "loss": 0.1408, "step": 1544 }, { "epoch": 0.42, "grad_norm": 2.520120143890381, "learning_rate": 1.296365496953077e-06, "loss": 0.1096, "step": 1545 }, { "epoch": 0.42, "grad_norm": 2.5080769062042236, "learning_rate": 1.295520182325001e-06, "loss": 0.1125, "step": 1546 }, { "epoch": 0.42, "grad_norm": 2.9557883739471436, "learning_rate": 1.2946746362613285e-06, "loss": 0.1298, "step": 1547 }, { "epoch": 0.42, "grad_norm": 2.8763039112091064, "learning_rate": 1.2938288594242464e-06, "loss": 0.1346, "step": 1548 }, { "epoch": 0.42, "grad_norm": 3.1307058334350586, "learning_rate": 1.2929828524761215e-06, "loss": 0.1293, "step": 1549 }, { "epoch": 0.42, "grad_norm": 2.775430679321289, "learning_rate": 1.2921366160795016e-06, "loss": 0.1333, "step": 1550 }, { "epoch": 0.42, "grad_norm": 3.194153070449829, "learning_rate": 1.2912901508971132e-06, "loss": 0.1429, "step": 1551 }, { "epoch": 0.42, "grad_norm": 2.6535837650299072, "learning_rate": 1.290443457591863e-06, "loss": 0.1203, "step": 1552 }, { "epoch": 0.42, "grad_norm": 2.8033483028411865, "learning_rate": 1.289596536826836e-06, "loss": 0.1278, "step": 1553 }, { "epoch": 0.42, "grad_norm": 3.0123229026794434, "learning_rate": 1.2887493892652945e-06, "loss": 0.1342, "step": 1554 }, { "epoch": 0.42, "grad_norm": 2.6336660385131836, "learning_rate": 1.2879020155706802e-06, "loss": 0.1209, "step": 1555 }, { "epoch": 0.43, "grad_norm": 3.0163450241088867, "learning_rate": 1.2870544164066099e-06, "loss": 0.1182, "step": 1556 }, { "epoch": 0.43, "grad_norm": 3.007383346557617, "learning_rate": 1.286206592436878e-06, "loss": 0.1435, "step": 1557 }, { "epoch": 0.43, "grad_norm": 2.7327332496643066, "learning_rate": 1.285358544325456e-06, "loss": 0.1235, "step": 1558 }, { "epoch": 0.43, "grad_norm": 3.0295119285583496, "learning_rate": 1.284510272736488e-06, "loss": 0.1362, "step": 1559 }, { "epoch": 0.43, "grad_norm": 2.937821626663208, "learning_rate": 1.2836617783342967e-06, "loss": 0.1343, "step": 1560 }, { "epoch": 0.43, "grad_norm": 2.598306179046631, "learning_rate": 1.2828130617833766e-06, "loss": 0.1369, "step": 1561 }, { "epoch": 0.43, "grad_norm": 2.768315553665161, "learning_rate": 1.281964123748397e-06, "loss": 0.1195, "step": 1562 }, { "epoch": 0.43, "grad_norm": 3.2559378147125244, "learning_rate": 1.281114964894201e-06, "loss": 0.1329, "step": 1563 }, { "epoch": 0.43, "grad_norm": 3.381107807159424, "learning_rate": 1.2802655858858042e-06, "loss": 0.1181, "step": 1564 }, { "epoch": 0.43, "grad_norm": 2.7674670219421387, "learning_rate": 1.279415987388395e-06, "loss": 0.1355, "step": 1565 }, { "epoch": 0.43, "grad_norm": 2.898472309112549, "learning_rate": 1.2785661700673338e-06, "loss": 0.134, "step": 1566 }, { "epoch": 0.43, "grad_norm": 2.7646608352661133, "learning_rate": 1.2777161345881512e-06, "loss": 0.1413, "step": 1567 }, { "epoch": 0.43, "grad_norm": 3.0112876892089844, "learning_rate": 1.2768658816165504e-06, "loss": 0.1389, "step": 1568 }, { "epoch": 0.43, "grad_norm": 3.059509038925171, "learning_rate": 1.2760154118184035e-06, "loss": 0.1329, "step": 1569 }, { "epoch": 0.43, "grad_norm": 2.6462533473968506, "learning_rate": 1.275164725859753e-06, "loss": 0.1262, "step": 1570 }, { "epoch": 0.43, "grad_norm": 2.63196063041687, "learning_rate": 1.274313824406811e-06, "loss": 0.11, "step": 1571 }, { "epoch": 0.43, "grad_norm": 2.9079229831695557, "learning_rate": 1.2734627081259574e-06, "loss": 0.1432, "step": 1572 }, { "epoch": 0.43, "grad_norm": 2.9190385341644287, "learning_rate": 1.2726113776837415e-06, "loss": 0.1422, "step": 1573 }, { "epoch": 0.43, "grad_norm": 2.7876791954040527, "learning_rate": 1.2717598337468793e-06, "loss": 0.1329, "step": 1574 }, { "epoch": 0.43, "grad_norm": 2.8222997188568115, "learning_rate": 1.2709080769822546e-06, "loss": 0.1449, "step": 1575 }, { "epoch": 0.43, "grad_norm": 2.7818732261657715, "learning_rate": 1.270056108056918e-06, "loss": 0.1331, "step": 1576 }, { "epoch": 0.43, "grad_norm": 2.854518175125122, "learning_rate": 1.269203927638086e-06, "loss": 0.1285, "step": 1577 }, { "epoch": 0.43, "grad_norm": 2.730489730834961, "learning_rate": 1.2683515363931401e-06, "loss": 0.1294, "step": 1578 }, { "epoch": 0.43, "grad_norm": 3.053382158279419, "learning_rate": 1.2674989349896279e-06, "loss": 0.1213, "step": 1579 }, { "epoch": 0.43, "grad_norm": 3.0139403343200684, "learning_rate": 1.2666461240952612e-06, "loss": 0.1413, "step": 1580 }, { "epoch": 0.43, "grad_norm": 3.037388801574707, "learning_rate": 1.2657931043779162e-06, "loss": 0.149, "step": 1581 }, { "epoch": 0.43, "grad_norm": 3.1191294193267822, "learning_rate": 1.2649398765056316e-06, "loss": 0.1341, "step": 1582 }, { "epoch": 0.43, "grad_norm": 2.7319812774658203, "learning_rate": 1.2640864411466103e-06, "loss": 0.1293, "step": 1583 }, { "epoch": 0.43, "grad_norm": 3.1083836555480957, "learning_rate": 1.2632327989692172e-06, "loss": 0.1389, "step": 1584 }, { "epoch": 0.43, "grad_norm": 2.9495601654052734, "learning_rate": 1.262378950641979e-06, "loss": 0.1371, "step": 1585 }, { "epoch": 0.43, "grad_norm": 2.947674036026001, "learning_rate": 1.2615248968335844e-06, "loss": 0.1299, "step": 1586 }, { "epoch": 0.43, "grad_norm": 2.8491618633270264, "learning_rate": 1.2606706382128823e-06, "loss": 0.1219, "step": 1587 }, { "epoch": 0.43, "grad_norm": 2.953118324279785, "learning_rate": 1.259816175448882e-06, "loss": 0.1383, "step": 1588 }, { "epoch": 0.43, "grad_norm": 2.879915952682495, "learning_rate": 1.2589615092107538e-06, "loss": 0.136, "step": 1589 }, { "epoch": 0.43, "grad_norm": 2.6063785552978516, "learning_rate": 1.258106640167826e-06, "loss": 0.1122, "step": 1590 }, { "epoch": 0.43, "grad_norm": 2.721730947494507, "learning_rate": 1.2572515689895868e-06, "loss": 0.1309, "step": 1591 }, { "epoch": 0.43, "grad_norm": 3.0574612617492676, "learning_rate": 1.2563962963456818e-06, "loss": 0.1512, "step": 1592 }, { "epoch": 0.44, "grad_norm": 2.758530855178833, "learning_rate": 1.2555408229059148e-06, "loss": 0.1333, "step": 1593 }, { "epoch": 0.44, "grad_norm": 2.7727010250091553, "learning_rate": 1.254685149340247e-06, "loss": 0.1284, "step": 1594 }, { "epoch": 0.44, "grad_norm": 2.840688943862915, "learning_rate": 1.253829276318796e-06, "loss": 0.1376, "step": 1595 }, { "epoch": 0.44, "grad_norm": 2.5974299907684326, "learning_rate": 1.2529732045118363e-06, "loss": 0.1234, "step": 1596 }, { "epoch": 0.44, "grad_norm": 2.957911968231201, "learning_rate": 1.2521169345897963e-06, "loss": 0.1265, "step": 1597 }, { "epoch": 0.44, "grad_norm": 2.975114107131958, "learning_rate": 1.251260467223262e-06, "loss": 0.1308, "step": 1598 }, { "epoch": 0.44, "grad_norm": 2.8173699378967285, "learning_rate": 1.2504038030829724e-06, "loss": 0.1155, "step": 1599 }, { "epoch": 0.44, "grad_norm": 2.790580987930298, "learning_rate": 1.249546942839821e-06, "loss": 0.1238, "step": 1600 }, { "epoch": 0.44, "grad_norm": 2.909597873687744, "learning_rate": 1.2486898871648551e-06, "loss": 0.1377, "step": 1601 }, { "epoch": 0.44, "grad_norm": 2.7134740352630615, "learning_rate": 1.2478326367292741e-06, "loss": 0.1198, "step": 1602 }, { "epoch": 0.44, "grad_norm": 2.6046814918518066, "learning_rate": 1.2469751922044315e-06, "loss": 0.1271, "step": 1603 }, { "epoch": 0.44, "grad_norm": 2.4541776180267334, "learning_rate": 1.2461175542618318e-06, "loss": 0.1134, "step": 1604 }, { "epoch": 0.44, "grad_norm": 2.6802122592926025, "learning_rate": 1.245259723573131e-06, "loss": 0.1184, "step": 1605 }, { "epoch": 0.44, "grad_norm": 2.7969250679016113, "learning_rate": 1.2444017008101365e-06, "loss": 0.1345, "step": 1606 }, { "epoch": 0.44, "grad_norm": 2.6568596363067627, "learning_rate": 1.2435434866448053e-06, "loss": 0.128, "step": 1607 }, { "epoch": 0.44, "grad_norm": 3.19606614112854, "learning_rate": 1.2426850817492455e-06, "loss": 0.1431, "step": 1608 }, { "epoch": 0.44, "grad_norm": 2.808906316757202, "learning_rate": 1.2418264867957132e-06, "loss": 0.1297, "step": 1609 }, { "epoch": 0.44, "grad_norm": 2.6693055629730225, "learning_rate": 1.2409677024566143e-06, "loss": 0.1281, "step": 1610 }, { "epoch": 0.44, "grad_norm": 2.5801992416381836, "learning_rate": 1.2401087294045031e-06, "loss": 0.1179, "step": 1611 }, { "epoch": 0.44, "grad_norm": 2.660011053085327, "learning_rate": 1.2392495683120806e-06, "loss": 0.1207, "step": 1612 }, { "epoch": 0.44, "grad_norm": 2.9305505752563477, "learning_rate": 1.2383902198521963e-06, "loss": 0.1258, "step": 1613 }, { "epoch": 0.44, "grad_norm": 2.7699851989746094, "learning_rate": 1.2375306846978462e-06, "loss": 0.1227, "step": 1614 }, { "epoch": 0.44, "grad_norm": 2.96335768699646, "learning_rate": 1.2366709635221716e-06, "loss": 0.1377, "step": 1615 }, { "epoch": 0.44, "grad_norm": 2.8513786792755127, "learning_rate": 1.2358110569984608e-06, "loss": 0.1319, "step": 1616 }, { "epoch": 0.44, "grad_norm": 2.6833722591400146, "learning_rate": 1.2349509658001458e-06, "loss": 0.1338, "step": 1617 }, { "epoch": 0.44, "grad_norm": 3.0930840969085693, "learning_rate": 1.2340906906008046e-06, "loss": 0.1468, "step": 1618 }, { "epoch": 0.44, "grad_norm": 3.274177074432373, "learning_rate": 1.2332302320741587e-06, "loss": 0.1296, "step": 1619 }, { "epoch": 0.44, "grad_norm": 2.8681862354278564, "learning_rate": 1.2323695908940728e-06, "loss": 0.1322, "step": 1620 }, { "epoch": 0.44, "grad_norm": 2.6954641342163086, "learning_rate": 1.2315087677345556e-06, "loss": 0.121, "step": 1621 }, { "epoch": 0.44, "grad_norm": 2.7411937713623047, "learning_rate": 1.2306477632697568e-06, "loss": 0.1218, "step": 1622 }, { "epoch": 0.44, "grad_norm": 2.734572649002075, "learning_rate": 1.2297865781739699e-06, "loss": 0.1217, "step": 1623 }, { "epoch": 0.44, "grad_norm": 2.7590692043304443, "learning_rate": 1.228925213121629e-06, "loss": 0.1248, "step": 1624 }, { "epoch": 0.44, "grad_norm": 2.5387656688690186, "learning_rate": 1.2280636687873087e-06, "loss": 0.1206, "step": 1625 }, { "epoch": 0.44, "grad_norm": 2.6590218544006348, "learning_rate": 1.2272019458457243e-06, "loss": 0.1141, "step": 1626 }, { "epoch": 0.44, "grad_norm": 2.614525556564331, "learning_rate": 1.2263400449717317e-06, "loss": 0.119, "step": 1627 }, { "epoch": 0.44, "grad_norm": 2.675663709640503, "learning_rate": 1.225477966840325e-06, "loss": 0.1254, "step": 1628 }, { "epoch": 0.44, "grad_norm": 2.7015058994293213, "learning_rate": 1.2246157121266383e-06, "loss": 0.1243, "step": 1629 }, { "epoch": 0.45, "grad_norm": 2.6724138259887695, "learning_rate": 1.2237532815059426e-06, "loss": 0.1191, "step": 1630 }, { "epoch": 0.45, "grad_norm": 2.769043207168579, "learning_rate": 1.2228906756536478e-06, "loss": 0.1425, "step": 1631 }, { "epoch": 0.45, "grad_norm": 3.219146728515625, "learning_rate": 1.222027895245301e-06, "loss": 0.1366, "step": 1632 }, { "epoch": 0.45, "grad_norm": 2.754021644592285, "learning_rate": 1.221164940956585e-06, "loss": 0.1353, "step": 1633 }, { "epoch": 0.45, "grad_norm": 3.2277939319610596, "learning_rate": 1.22030181346332e-06, "loss": 0.1366, "step": 1634 }, { "epoch": 0.45, "grad_norm": 3.1995861530303955, "learning_rate": 1.2194385134414606e-06, "loss": 0.149, "step": 1635 }, { "epoch": 0.45, "grad_norm": 2.7883148193359375, "learning_rate": 1.2185750415670977e-06, "loss": 0.1293, "step": 1636 }, { "epoch": 0.45, "grad_norm": 2.66025710105896, "learning_rate": 1.2177113985164562e-06, "loss": 0.1173, "step": 1637 }, { "epoch": 0.45, "grad_norm": 2.7542994022369385, "learning_rate": 1.2168475849658951e-06, "loss": 0.1167, "step": 1638 }, { "epoch": 0.45, "grad_norm": 3.27205491065979, "learning_rate": 1.2159836015919075e-06, "loss": 0.1246, "step": 1639 }, { "epoch": 0.45, "grad_norm": 2.6621532440185547, "learning_rate": 1.2151194490711177e-06, "loss": 0.1231, "step": 1640 }, { "epoch": 0.45, "grad_norm": 2.733459234237671, "learning_rate": 1.2142551280802846e-06, "loss": 0.1187, "step": 1641 }, { "epoch": 0.45, "grad_norm": 3.0605053901672363, "learning_rate": 1.213390639296298e-06, "loss": 0.131, "step": 1642 }, { "epoch": 0.45, "grad_norm": 2.8399031162261963, "learning_rate": 1.2125259833961795e-06, "loss": 0.1391, "step": 1643 }, { "epoch": 0.45, "grad_norm": 3.032268762588501, "learning_rate": 1.211661161057081e-06, "loss": 0.1441, "step": 1644 }, { "epoch": 0.45, "grad_norm": 2.698371410369873, "learning_rate": 1.210796172956285e-06, "loss": 0.1222, "step": 1645 }, { "epoch": 0.45, "grad_norm": 3.248314142227173, "learning_rate": 1.209931019771204e-06, "loss": 0.1333, "step": 1646 }, { "epoch": 0.45, "grad_norm": 2.890303373336792, "learning_rate": 1.20906570217938e-06, "loss": 0.1224, "step": 1647 }, { "epoch": 0.45, "grad_norm": 2.574625015258789, "learning_rate": 1.2082002208584832e-06, "loss": 0.1142, "step": 1648 }, { "epoch": 0.45, "grad_norm": 2.6554789543151855, "learning_rate": 1.2073345764863125e-06, "loss": 0.117, "step": 1649 }, { "epoch": 0.45, "grad_norm": 2.891599178314209, "learning_rate": 1.2064687697407937e-06, "loss": 0.1257, "step": 1650 }, { "epoch": 0.45, "grad_norm": 2.9254744052886963, "learning_rate": 1.2056028012999808e-06, "loss": 0.1402, "step": 1651 }, { "epoch": 0.45, "grad_norm": 2.9599831104278564, "learning_rate": 1.204736671842054e-06, "loss": 0.1278, "step": 1652 }, { "epoch": 0.45, "grad_norm": 2.616624593734741, "learning_rate": 1.2038703820453192e-06, "loss": 0.1252, "step": 1653 }, { "epoch": 0.45, "grad_norm": 2.9988479614257812, "learning_rate": 1.2030039325882085e-06, "loss": 0.1411, "step": 1654 }, { "epoch": 0.45, "grad_norm": 3.0054354667663574, "learning_rate": 1.2021373241492785e-06, "loss": 0.1527, "step": 1655 }, { "epoch": 0.45, "grad_norm": 2.780163049697876, "learning_rate": 1.2012705574072105e-06, "loss": 0.1247, "step": 1656 }, { "epoch": 0.45, "grad_norm": 2.769150495529175, "learning_rate": 1.2004036330408104e-06, "loss": 0.1185, "step": 1657 }, { "epoch": 0.45, "grad_norm": 2.947723388671875, "learning_rate": 1.1995365517290066e-06, "loss": 0.1252, "step": 1658 }, { "epoch": 0.45, "grad_norm": 2.8834493160247803, "learning_rate": 1.198669314150851e-06, "loss": 0.1293, "step": 1659 }, { "epoch": 0.45, "grad_norm": 3.040342330932617, "learning_rate": 1.1978019209855173e-06, "loss": 0.1528, "step": 1660 }, { "epoch": 0.45, "grad_norm": 2.6697583198547363, "learning_rate": 1.1969343729123014e-06, "loss": 0.1285, "step": 1661 }, { "epoch": 0.45, "grad_norm": 3.018949031829834, "learning_rate": 1.1960666706106213e-06, "loss": 0.1321, "step": 1662 }, { "epoch": 0.45, "grad_norm": 3.0014421939849854, "learning_rate": 1.195198814760014e-06, "loss": 0.1287, "step": 1663 }, { "epoch": 0.45, "grad_norm": 2.7734243869781494, "learning_rate": 1.1943308060401389e-06, "loss": 0.1279, "step": 1664 }, { "epoch": 0.45, "grad_norm": 2.76412034034729, "learning_rate": 1.1934626451307726e-06, "loss": 0.1191, "step": 1665 }, { "epoch": 0.46, "grad_norm": 2.704519748687744, "learning_rate": 1.1925943327118132e-06, "loss": 0.1317, "step": 1666 }, { "epoch": 0.46, "grad_norm": 2.8398492336273193, "learning_rate": 1.1917258694632767e-06, "loss": 0.1309, "step": 1667 }, { "epoch": 0.46, "grad_norm": 3.1674561500549316, "learning_rate": 1.1908572560652968e-06, "loss": 0.1405, "step": 1668 }, { "epoch": 0.46, "grad_norm": 2.771796464920044, "learning_rate": 1.1899884931981247e-06, "loss": 0.1224, "step": 1669 }, { "epoch": 0.46, "grad_norm": 2.8627328872680664, "learning_rate": 1.189119581542129e-06, "loss": 0.1408, "step": 1670 }, { "epoch": 0.46, "grad_norm": 2.843153238296509, "learning_rate": 1.1882505217777953e-06, "loss": 0.1176, "step": 1671 }, { "epoch": 0.46, "grad_norm": 2.753012180328369, "learning_rate": 1.1873813145857248e-06, "loss": 0.1276, "step": 1672 }, { "epoch": 0.46, "grad_norm": 2.54606294631958, "learning_rate": 1.1865119606466332e-06, "loss": 0.116, "step": 1673 }, { "epoch": 0.46, "grad_norm": 2.690112590789795, "learning_rate": 1.1856424606413528e-06, "loss": 0.1194, "step": 1674 }, { "epoch": 0.46, "grad_norm": 2.8299365043640137, "learning_rate": 1.1847728152508291e-06, "loss": 0.136, "step": 1675 }, { "epoch": 0.46, "grad_norm": 2.7167913913726807, "learning_rate": 1.1839030251561222e-06, "loss": 0.1138, "step": 1676 }, { "epoch": 0.46, "grad_norm": 3.147341251373291, "learning_rate": 1.183033091038405e-06, "loss": 0.1526, "step": 1677 }, { "epoch": 0.46, "grad_norm": 2.694190263748169, "learning_rate": 1.1821630135789634e-06, "loss": 0.1208, "step": 1678 }, { "epoch": 0.46, "grad_norm": 2.7382729053497314, "learning_rate": 1.181292793459195e-06, "loss": 0.1191, "step": 1679 }, { "epoch": 0.46, "grad_norm": 2.824538469314575, "learning_rate": 1.1804224313606102e-06, "loss": 0.132, "step": 1680 }, { "epoch": 0.46, "grad_norm": 3.1137707233428955, "learning_rate": 1.17955192796483e-06, "loss": 0.1323, "step": 1681 }, { "epoch": 0.46, "grad_norm": 2.78731107711792, "learning_rate": 1.178681283953586e-06, "loss": 0.1196, "step": 1682 }, { "epoch": 0.46, "grad_norm": 2.640028953552246, "learning_rate": 1.1778105000087197e-06, "loss": 0.1248, "step": 1683 }, { "epoch": 0.46, "grad_norm": 2.5602917671203613, "learning_rate": 1.176939576812183e-06, "loss": 0.1143, "step": 1684 }, { "epoch": 0.46, "grad_norm": 2.6920769214630127, "learning_rate": 1.1760685150460361e-06, "loss": 0.1245, "step": 1685 }, { "epoch": 0.46, "grad_norm": 3.0413594245910645, "learning_rate": 1.175197315392448e-06, "loss": 0.1419, "step": 1686 }, { "epoch": 0.46, "grad_norm": 2.9711055755615234, "learning_rate": 1.174325978533696e-06, "loss": 0.14, "step": 1687 }, { "epoch": 0.46, "grad_norm": 2.8140616416931152, "learning_rate": 1.1734545051521639e-06, "loss": 0.1252, "step": 1688 }, { "epoch": 0.46, "grad_norm": 2.883749485015869, "learning_rate": 1.1725828959303432e-06, "loss": 0.1263, "step": 1689 }, { "epoch": 0.46, "grad_norm": 2.8676676750183105, "learning_rate": 1.1717111515508317e-06, "loss": 0.1366, "step": 1690 }, { "epoch": 0.46, "grad_norm": 3.049114942550659, "learning_rate": 1.170839272696333e-06, "loss": 0.1348, "step": 1691 }, { "epoch": 0.46, "grad_norm": 2.8665969371795654, "learning_rate": 1.169967260049656e-06, "loss": 0.1291, "step": 1692 }, { "epoch": 0.46, "grad_norm": 2.4342026710510254, "learning_rate": 1.1690951142937146e-06, "loss": 0.1083, "step": 1693 }, { "epoch": 0.46, "grad_norm": 3.093519687652588, "learning_rate": 1.168222836111526e-06, "loss": 0.1535, "step": 1694 }, { "epoch": 0.46, "grad_norm": 2.86672306060791, "learning_rate": 1.1673504261862123e-06, "loss": 0.1263, "step": 1695 }, { "epoch": 0.46, "grad_norm": 2.8939149379730225, "learning_rate": 1.1664778852009983e-06, "loss": 0.1229, "step": 1696 }, { "epoch": 0.46, "grad_norm": 2.8285973072052, "learning_rate": 1.1656052138392113e-06, "loss": 0.143, "step": 1697 }, { "epoch": 0.46, "grad_norm": 3.0051968097686768, "learning_rate": 1.1647324127842808e-06, "loss": 0.1485, "step": 1698 }, { "epoch": 0.46, "grad_norm": 2.889620304107666, "learning_rate": 1.1638594827197378e-06, "loss": 0.1375, "step": 1699 }, { "epoch": 0.46, "grad_norm": 2.844740867614746, "learning_rate": 1.1629864243292146e-06, "loss": 0.1382, "step": 1700 }, { "epoch": 0.46, "grad_norm": 2.7689208984375, "learning_rate": 1.1621132382964438e-06, "loss": 0.1291, "step": 1701 }, { "epoch": 0.46, "grad_norm": 2.8812568187713623, "learning_rate": 1.161239925305258e-06, "loss": 0.1248, "step": 1702 }, { "epoch": 0.47, "grad_norm": 2.9800965785980225, "learning_rate": 1.160366486039589e-06, "loss": 0.1427, "step": 1703 }, { "epoch": 0.47, "grad_norm": 2.602057456970215, "learning_rate": 1.1594929211834679e-06, "loss": 0.1124, "step": 1704 }, { "epoch": 0.47, "grad_norm": 2.93133544921875, "learning_rate": 1.1586192314210239e-06, "loss": 0.1396, "step": 1705 }, { "epoch": 0.47, "grad_norm": 2.7856240272521973, "learning_rate": 1.157745417436484e-06, "loss": 0.134, "step": 1706 }, { "epoch": 0.47, "grad_norm": 2.871933698654175, "learning_rate": 1.156871479914173e-06, "loss": 0.1342, "step": 1707 }, { "epoch": 0.47, "grad_norm": 2.958693265914917, "learning_rate": 1.1559974195385117e-06, "loss": 0.1339, "step": 1708 }, { "epoch": 0.47, "grad_norm": 2.81894588470459, "learning_rate": 1.1551232369940166e-06, "loss": 0.1279, "step": 1709 }, { "epoch": 0.47, "grad_norm": 2.6474156379699707, "learning_rate": 1.1542489329653022e-06, "loss": 0.1177, "step": 1710 }, { "epoch": 0.47, "grad_norm": 2.7948195934295654, "learning_rate": 1.1533745081370759e-06, "loss": 0.1289, "step": 1711 }, { "epoch": 0.47, "grad_norm": 2.8775722980499268, "learning_rate": 1.1524999631941405e-06, "loss": 0.1259, "step": 1712 }, { "epoch": 0.47, "grad_norm": 3.051638603210449, "learning_rate": 1.1516252988213926e-06, "loss": 0.1351, "step": 1713 }, { "epoch": 0.47, "grad_norm": 2.770986557006836, "learning_rate": 1.1507505157038226e-06, "loss": 0.1236, "step": 1714 }, { "epoch": 0.47, "grad_norm": 2.9146666526794434, "learning_rate": 1.1498756145265142e-06, "loss": 0.1296, "step": 1715 }, { "epoch": 0.47, "grad_norm": 3.0440783500671387, "learning_rate": 1.149000595974643e-06, "loss": 0.1333, "step": 1716 }, { "epoch": 0.47, "grad_norm": 2.7425525188446045, "learning_rate": 1.1481254607334766e-06, "loss": 0.1248, "step": 1717 }, { "epoch": 0.47, "grad_norm": 2.9264187812805176, "learning_rate": 1.1472502094883743e-06, "loss": 0.1146, "step": 1718 }, { "epoch": 0.47, "grad_norm": 2.5261569023132324, "learning_rate": 1.1463748429247852e-06, "loss": 0.1227, "step": 1719 }, { "epoch": 0.47, "grad_norm": 2.8340601921081543, "learning_rate": 1.1454993617282512e-06, "loss": 0.1337, "step": 1720 }, { "epoch": 0.47, "grad_norm": 2.691579818725586, "learning_rate": 1.144623766584401e-06, "loss": 0.1253, "step": 1721 }, { "epoch": 0.47, "grad_norm": 2.659074306488037, "learning_rate": 1.1437480581789546e-06, "loss": 0.1306, "step": 1722 }, { "epoch": 0.47, "grad_norm": 2.750162363052368, "learning_rate": 1.1428722371977192e-06, "loss": 0.1175, "step": 1723 }, { "epoch": 0.47, "grad_norm": 2.9749059677124023, "learning_rate": 1.1419963043265915e-06, "loss": 0.1367, "step": 1724 }, { "epoch": 0.47, "grad_norm": 2.6689412593841553, "learning_rate": 1.1411202602515554e-06, "loss": 0.1299, "step": 1725 }, { "epoch": 0.47, "grad_norm": 2.8214049339294434, "learning_rate": 1.1402441056586813e-06, "loss": 0.1257, "step": 1726 }, { "epoch": 0.47, "grad_norm": 2.8539130687713623, "learning_rate": 1.139367841234127e-06, "loss": 0.1413, "step": 1727 }, { "epoch": 0.47, "grad_norm": 3.122183084487915, "learning_rate": 1.1384914676641355e-06, "loss": 0.1445, "step": 1728 }, { "epoch": 0.47, "grad_norm": 2.572101593017578, "learning_rate": 1.137614985635036e-06, "loss": 0.1177, "step": 1729 }, { "epoch": 0.47, "grad_norm": 3.196446657180786, "learning_rate": 1.1367383958332425e-06, "loss": 0.1555, "step": 1730 }, { "epoch": 0.47, "grad_norm": 2.7023680210113525, "learning_rate": 1.1358616989452527e-06, "loss": 0.1217, "step": 1731 }, { "epoch": 0.47, "grad_norm": 2.5879292488098145, "learning_rate": 1.1349848956576492e-06, "loss": 0.1186, "step": 1732 }, { "epoch": 0.47, "grad_norm": 2.875540256500244, "learning_rate": 1.134107986657097e-06, "loss": 0.1266, "step": 1733 }, { "epoch": 0.47, "grad_norm": 2.756075382232666, "learning_rate": 1.1332309726303447e-06, "loss": 0.1346, "step": 1734 }, { "epoch": 0.47, "grad_norm": 2.7358367443084717, "learning_rate": 1.1323538542642227e-06, "loss": 0.1257, "step": 1735 }, { "epoch": 0.47, "grad_norm": 2.8995721340179443, "learning_rate": 1.1314766322456425e-06, "loss": 0.1388, "step": 1736 }, { "epoch": 0.47, "grad_norm": 2.6761934757232666, "learning_rate": 1.1305993072615984e-06, "loss": 0.1153, "step": 1737 }, { "epoch": 0.47, "grad_norm": 2.726055383682251, "learning_rate": 1.1297218799991641e-06, "loss": 0.1269, "step": 1738 }, { "epoch": 0.48, "grad_norm": 2.8937668800354004, "learning_rate": 1.1288443511454935e-06, "loss": 0.1306, "step": 1739 }, { "epoch": 0.48, "grad_norm": 2.7245571613311768, "learning_rate": 1.1279667213878203e-06, "loss": 0.1214, "step": 1740 }, { "epoch": 0.48, "grad_norm": 2.902127504348755, "learning_rate": 1.1270889914134573e-06, "loss": 0.146, "step": 1741 }, { "epoch": 0.48, "grad_norm": 2.838588237762451, "learning_rate": 1.1262111619097956e-06, "loss": 0.1233, "step": 1742 }, { "epoch": 0.48, "grad_norm": 2.698322057723999, "learning_rate": 1.1253332335643042e-06, "loss": 0.1128, "step": 1743 }, { "epoch": 0.48, "grad_norm": 2.864337682723999, "learning_rate": 1.1244552070645298e-06, "loss": 0.1328, "step": 1744 }, { "epoch": 0.48, "grad_norm": 2.853121519088745, "learning_rate": 1.1235770830980956e-06, "loss": 0.1177, "step": 1745 }, { "epoch": 0.48, "grad_norm": 2.7600739002227783, "learning_rate": 1.1226988623527013e-06, "loss": 0.1197, "step": 1746 }, { "epoch": 0.48, "grad_norm": 3.1173315048217773, "learning_rate": 1.1218205455161227e-06, "loss": 0.1439, "step": 1747 }, { "epoch": 0.48, "grad_norm": 2.690671682357788, "learning_rate": 1.12094213327621e-06, "loss": 0.1203, "step": 1748 }, { "epoch": 0.48, "grad_norm": 2.8105766773223877, "learning_rate": 1.1200636263208894e-06, "loss": 0.1304, "step": 1749 }, { "epoch": 0.48, "grad_norm": 2.6360340118408203, "learning_rate": 1.1191850253381601e-06, "loss": 0.1284, "step": 1750 }, { "epoch": 0.48, "grad_norm": 2.691828727722168, "learning_rate": 1.1183063310160953e-06, "loss": 0.1257, "step": 1751 }, { "epoch": 0.48, "grad_norm": 2.6944680213928223, "learning_rate": 1.1174275440428415e-06, "loss": 0.1203, "step": 1752 }, { "epoch": 0.48, "grad_norm": 2.6057870388031006, "learning_rate": 1.1165486651066176e-06, "loss": 0.1129, "step": 1753 }, { "epoch": 0.48, "grad_norm": 2.9981822967529297, "learning_rate": 1.1156696948957146e-06, "loss": 0.129, "step": 1754 }, { "epoch": 0.48, "grad_norm": 2.8687524795532227, "learning_rate": 1.1147906340984953e-06, "loss": 0.1346, "step": 1755 }, { "epoch": 0.48, "grad_norm": 2.571953535079956, "learning_rate": 1.1139114834033928e-06, "loss": 0.1185, "step": 1756 }, { "epoch": 0.48, "grad_norm": 3.296785593032837, "learning_rate": 1.1130322434989102e-06, "loss": 0.1572, "step": 1757 }, { "epoch": 0.48, "grad_norm": 2.9241154193878174, "learning_rate": 1.1121529150736223e-06, "loss": 0.1358, "step": 1758 }, { "epoch": 0.48, "grad_norm": 3.0534586906433105, "learning_rate": 1.1112734988161716e-06, "loss": 0.1336, "step": 1759 }, { "epoch": 0.48, "grad_norm": 3.169816493988037, "learning_rate": 1.1103939954152699e-06, "loss": 0.1397, "step": 1760 }, { "epoch": 0.48, "grad_norm": 2.8303816318511963, "learning_rate": 1.109514405559697e-06, "loss": 0.1156, "step": 1761 }, { "epoch": 0.48, "grad_norm": 2.6899285316467285, "learning_rate": 1.1086347299383003e-06, "loss": 0.1221, "step": 1762 }, { "epoch": 0.48, "grad_norm": 3.023895025253296, "learning_rate": 1.1077549692399958e-06, "loss": 0.127, "step": 1763 }, { "epoch": 0.48, "grad_norm": 2.7877230644226074, "learning_rate": 1.1068751241537641e-06, "loss": 0.1355, "step": 1764 }, { "epoch": 0.48, "grad_norm": 2.621915102005005, "learning_rate": 1.1059951953686534e-06, "loss": 0.1194, "step": 1765 }, { "epoch": 0.48, "grad_norm": 2.7423007488250732, "learning_rate": 1.1051151835737762e-06, "loss": 0.1247, "step": 1766 }, { "epoch": 0.48, "grad_norm": 2.949045181274414, "learning_rate": 1.1042350894583108e-06, "loss": 0.1252, "step": 1767 }, { "epoch": 0.48, "grad_norm": 2.995800495147705, "learning_rate": 1.1033549137115004e-06, "loss": 0.1432, "step": 1768 }, { "epoch": 0.48, "grad_norm": 2.968546152114868, "learning_rate": 1.1024746570226508e-06, "loss": 0.1292, "step": 1769 }, { "epoch": 0.48, "grad_norm": 2.7684168815612793, "learning_rate": 1.1015943200811323e-06, "loss": 0.1211, "step": 1770 }, { "epoch": 0.48, "grad_norm": 2.9041082859039307, "learning_rate": 1.1007139035763782e-06, "loss": 0.1341, "step": 1771 }, { "epoch": 0.48, "grad_norm": 2.9197094440460205, "learning_rate": 1.0998334081978825e-06, "loss": 0.1325, "step": 1772 }, { "epoch": 0.48, "grad_norm": 2.745283365249634, "learning_rate": 1.098952834635203e-06, "loss": 0.1174, "step": 1773 }, { "epoch": 0.48, "grad_norm": 2.711155891418457, "learning_rate": 1.0980721835779572e-06, "loss": 0.1228, "step": 1774 }, { "epoch": 0.48, "grad_norm": 2.708275556564331, "learning_rate": 1.0971914557158242e-06, "loss": 0.1096, "step": 1775 }, { "epoch": 0.49, "grad_norm": 2.7584431171417236, "learning_rate": 1.0963106517385433e-06, "loss": 0.1226, "step": 1776 }, { "epoch": 0.49, "grad_norm": 2.963024139404297, "learning_rate": 1.0954297723359118e-06, "loss": 0.1328, "step": 1777 }, { "epoch": 0.49, "grad_norm": 2.7407984733581543, "learning_rate": 1.0945488181977889e-06, "loss": 0.1238, "step": 1778 }, { "epoch": 0.49, "grad_norm": 2.9212539196014404, "learning_rate": 1.0936677900140898e-06, "loss": 0.1301, "step": 1779 }, { "epoch": 0.49, "grad_norm": 2.6921145915985107, "learning_rate": 1.092786688474789e-06, "loss": 0.115, "step": 1780 }, { "epoch": 0.49, "grad_norm": 2.883453607559204, "learning_rate": 1.0919055142699178e-06, "loss": 0.1363, "step": 1781 }, { "epoch": 0.49, "grad_norm": 2.5044760704040527, "learning_rate": 1.0910242680895648e-06, "loss": 0.1039, "step": 1782 }, { "epoch": 0.49, "grad_norm": 2.7206735610961914, "learning_rate": 1.0901429506238748e-06, "loss": 0.1314, "step": 1783 }, { "epoch": 0.49, "grad_norm": 2.778576374053955, "learning_rate": 1.0892615625630488e-06, "loss": 0.125, "step": 1784 }, { "epoch": 0.49, "grad_norm": 2.572385549545288, "learning_rate": 1.0883801045973423e-06, "loss": 0.1111, "step": 1785 }, { "epoch": 0.49, "grad_norm": 2.9828879833221436, "learning_rate": 1.0874985774170667e-06, "loss": 0.1285, "step": 1786 }, { "epoch": 0.49, "grad_norm": 2.835440158843994, "learning_rate": 1.0866169817125861e-06, "loss": 0.1198, "step": 1787 }, { "epoch": 0.49, "grad_norm": 2.6918647289276123, "learning_rate": 1.0857353181743198e-06, "loss": 0.1209, "step": 1788 }, { "epoch": 0.49, "grad_norm": 2.776198387145996, "learning_rate": 1.084853587492739e-06, "loss": 0.1235, "step": 1789 }, { "epoch": 0.49, "grad_norm": 2.8532979488372803, "learning_rate": 1.0839717903583683e-06, "loss": 0.1351, "step": 1790 }, { "epoch": 0.49, "grad_norm": 2.714279890060425, "learning_rate": 1.083089927461784e-06, "loss": 0.1205, "step": 1791 }, { "epoch": 0.49, "grad_norm": 2.987715482711792, "learning_rate": 1.0822079994936138e-06, "loss": 0.1314, "step": 1792 }, { "epoch": 0.49, "grad_norm": 2.685398578643799, "learning_rate": 1.0813260071445368e-06, "loss": 0.1276, "step": 1793 }, { "epoch": 0.49, "grad_norm": 2.7523744106292725, "learning_rate": 1.0804439511052817e-06, "loss": 0.1207, "step": 1794 }, { "epoch": 0.49, "grad_norm": 2.781987428665161, "learning_rate": 1.079561832066628e-06, "loss": 0.1248, "step": 1795 }, { "epoch": 0.49, "grad_norm": 2.8596763610839844, "learning_rate": 1.0786796507194037e-06, "loss": 0.1373, "step": 1796 }, { "epoch": 0.49, "grad_norm": 2.9784350395202637, "learning_rate": 1.0777974077544869e-06, "loss": 0.1283, "step": 1797 }, { "epoch": 0.49, "grad_norm": 2.8105826377868652, "learning_rate": 1.0769151038628026e-06, "loss": 0.124, "step": 1798 }, { "epoch": 0.49, "grad_norm": 2.6100962162017822, "learning_rate": 1.0760327397353237e-06, "loss": 0.1153, "step": 1799 }, { "epoch": 0.49, "grad_norm": 2.808901309967041, "learning_rate": 1.0751503160630708e-06, "loss": 0.1327, "step": 1800 }, { "epoch": 0.49, "grad_norm": 2.954089879989624, "learning_rate": 1.0742678335371111e-06, "loss": 0.1347, "step": 1801 }, { "epoch": 0.49, "grad_norm": 2.784660816192627, "learning_rate": 1.0733852928485574e-06, "loss": 0.1265, "step": 1802 }, { "epoch": 0.49, "grad_norm": 2.7837114334106445, "learning_rate": 1.0725026946885689e-06, "loss": 0.1236, "step": 1803 }, { "epoch": 0.49, "grad_norm": 2.7887163162231445, "learning_rate": 1.0716200397483483e-06, "loss": 0.1303, "step": 1804 }, { "epoch": 0.49, "grad_norm": 2.7233433723449707, "learning_rate": 1.0707373287191448e-06, "loss": 0.1224, "step": 1805 }, { "epoch": 0.49, "grad_norm": 2.7041988372802734, "learning_rate": 1.0698545622922497e-06, "loss": 0.1193, "step": 1806 }, { "epoch": 0.49, "grad_norm": 2.861286163330078, "learning_rate": 1.0689717411589984e-06, "loss": 0.1321, "step": 1807 }, { "epoch": 0.49, "grad_norm": 2.8406999111175537, "learning_rate": 1.06808886601077e-06, "loss": 0.1309, "step": 1808 }, { "epoch": 0.49, "grad_norm": 2.8524794578552246, "learning_rate": 1.0672059375389844e-06, "loss": 0.1334, "step": 1809 }, { "epoch": 0.49, "grad_norm": 2.7974801063537598, "learning_rate": 1.066322956435104e-06, "loss": 0.1343, "step": 1810 }, { "epoch": 0.49, "grad_norm": 2.945446252822876, "learning_rate": 1.0654399233906324e-06, "loss": 0.149, "step": 1811 }, { "epoch": 0.49, "grad_norm": 2.9166078567504883, "learning_rate": 1.064556839097114e-06, "loss": 0.1313, "step": 1812 }, { "epoch": 0.5, "grad_norm": 2.9036660194396973, "learning_rate": 1.063673704246133e-06, "loss": 0.1388, "step": 1813 }, { "epoch": 0.5, "grad_norm": 3.048311471939087, "learning_rate": 1.0627905195293135e-06, "loss": 0.1263, "step": 1814 }, { "epoch": 0.5, "grad_norm": 2.9274511337280273, "learning_rate": 1.061907285638318e-06, "loss": 0.1477, "step": 1815 }, { "epoch": 0.5, "grad_norm": 2.708244800567627, "learning_rate": 1.0610240032648492e-06, "loss": 0.1016, "step": 1816 }, { "epoch": 0.5, "grad_norm": 3.3445119857788086, "learning_rate": 1.0601406731006454e-06, "loss": 0.1459, "step": 1817 }, { "epoch": 0.5, "grad_norm": 2.64389705657959, "learning_rate": 1.059257295837484e-06, "loss": 0.1246, "step": 1818 }, { "epoch": 0.5, "grad_norm": 3.626828193664551, "learning_rate": 1.058373872167179e-06, "loss": 0.1302, "step": 1819 }, { "epoch": 0.5, "grad_norm": 2.9049739837646484, "learning_rate": 1.0574904027815801e-06, "loss": 0.1321, "step": 1820 }, { "epoch": 0.5, "grad_norm": 2.6238348484039307, "learning_rate": 1.056606888372574e-06, "loss": 0.1133, "step": 1821 }, { "epoch": 0.5, "grad_norm": 2.8575427532196045, "learning_rate": 1.0557233296320811e-06, "loss": 0.1346, "step": 1822 }, { "epoch": 0.5, "grad_norm": 3.0136499404907227, "learning_rate": 1.0548397272520578e-06, "loss": 0.1431, "step": 1823 }, { "epoch": 0.5, "grad_norm": 3.065021276473999, "learning_rate": 1.053956081924494e-06, "loss": 0.1479, "step": 1824 }, { "epoch": 0.5, "grad_norm": 2.9963295459747314, "learning_rate": 1.0530723943414133e-06, "loss": 0.1461, "step": 1825 }, { "epoch": 0.5, "grad_norm": 2.8013620376586914, "learning_rate": 1.052188665194873e-06, "loss": 0.1198, "step": 1826 }, { "epoch": 0.5, "grad_norm": 2.6950151920318604, "learning_rate": 1.0513048951769624e-06, "loss": 0.1145, "step": 1827 }, { "epoch": 0.5, "grad_norm": 2.88051700592041, "learning_rate": 1.0504210849798026e-06, "loss": 0.1344, "step": 1828 }, { "epoch": 0.5, "grad_norm": 2.5159189701080322, "learning_rate": 1.0495372352955467e-06, "loss": 0.1121, "step": 1829 }, { "epoch": 0.5, "grad_norm": 2.8956034183502197, "learning_rate": 1.0486533468163782e-06, "loss": 0.1189, "step": 1830 }, { "epoch": 0.5, "grad_norm": 2.808065414428711, "learning_rate": 1.0477694202345116e-06, "loss": 0.1341, "step": 1831 }, { "epoch": 0.5, "grad_norm": 2.836740732192993, "learning_rate": 1.0468854562421905e-06, "loss": 0.1183, "step": 1832 }, { "epoch": 0.5, "grad_norm": 2.8136489391326904, "learning_rate": 1.0460014555316886e-06, "loss": 0.1361, "step": 1833 }, { "epoch": 0.5, "grad_norm": 2.7605788707733154, "learning_rate": 1.0451174187953083e-06, "loss": 0.1095, "step": 1834 }, { "epoch": 0.5, "grad_norm": 3.0163135528564453, "learning_rate": 1.0442333467253788e-06, "loss": 0.1405, "step": 1835 }, { "epoch": 0.5, "grad_norm": 2.564580202102661, "learning_rate": 1.0433492400142589e-06, "loss": 0.1124, "step": 1836 }, { "epoch": 0.5, "grad_norm": 2.578174352645874, "learning_rate": 1.0424650993543337e-06, "loss": 0.1146, "step": 1837 }, { "epoch": 0.5, "grad_norm": 3.0214920043945312, "learning_rate": 1.0415809254380141e-06, "loss": 0.1362, "step": 1838 }, { "epoch": 0.5, "grad_norm": 3.012301206588745, "learning_rate": 1.0406967189577387e-06, "loss": 0.136, "step": 1839 }, { "epoch": 0.5, "grad_norm": 3.025559186935425, "learning_rate": 1.03981248060597e-06, "loss": 0.1573, "step": 1840 }, { "epoch": 0.5, "grad_norm": 2.862858295440674, "learning_rate": 1.038928211075197e-06, "loss": 0.1335, "step": 1841 }, { "epoch": 0.5, "grad_norm": 2.7830710411071777, "learning_rate": 1.0380439110579313e-06, "loss": 0.1228, "step": 1842 }, { "epoch": 0.5, "grad_norm": 2.717508554458618, "learning_rate": 1.0371595812467098e-06, "loss": 0.1284, "step": 1843 }, { "epoch": 0.5, "grad_norm": 2.8358590602874756, "learning_rate": 1.0362752223340925e-06, "loss": 0.1205, "step": 1844 }, { "epoch": 0.5, "grad_norm": 2.7825779914855957, "learning_rate": 1.0353908350126618e-06, "loss": 0.1365, "step": 1845 }, { "epoch": 0.5, "grad_norm": 3.3972370624542236, "learning_rate": 1.034506419975023e-06, "loss": 0.1296, "step": 1846 }, { "epoch": 0.5, "grad_norm": 2.787891387939453, "learning_rate": 1.0336219779138015e-06, "loss": 0.1295, "step": 1847 }, { "epoch": 0.5, "grad_norm": 2.9575958251953125, "learning_rate": 1.032737509521646e-06, "loss": 0.1358, "step": 1848 }, { "epoch": 0.51, "grad_norm": 2.6589770317077637, "learning_rate": 1.0318530154912244e-06, "loss": 0.1228, "step": 1849 }, { "epoch": 0.51, "grad_norm": 2.7603044509887695, "learning_rate": 1.0309684965152252e-06, "loss": 0.126, "step": 1850 }, { "epoch": 0.51, "grad_norm": 3.1505537033081055, "learning_rate": 1.0300839532863569e-06, "loss": 0.1423, "step": 1851 }, { "epoch": 0.51, "grad_norm": 2.751417636871338, "learning_rate": 1.0291993864973455e-06, "loss": 0.1275, "step": 1852 }, { "epoch": 0.51, "grad_norm": 2.5051889419555664, "learning_rate": 1.0283147968409365e-06, "loss": 0.1169, "step": 1853 }, { "epoch": 0.51, "grad_norm": 2.7912323474884033, "learning_rate": 1.0274301850098936e-06, "loss": 0.1272, "step": 1854 }, { "epoch": 0.51, "grad_norm": 2.8508260250091553, "learning_rate": 1.0265455516969976e-06, "loss": 0.1191, "step": 1855 }, { "epoch": 0.51, "grad_norm": 2.967703104019165, "learning_rate": 1.0256608975950458e-06, "loss": 0.1365, "step": 1856 }, { "epoch": 0.51, "grad_norm": 2.7232398986816406, "learning_rate": 1.0247762233968516e-06, "loss": 0.1233, "step": 1857 }, { "epoch": 0.51, "grad_norm": 2.7901456356048584, "learning_rate": 1.0238915297952449e-06, "loss": 0.1177, "step": 1858 }, { "epoch": 0.51, "grad_norm": 2.8532421588897705, "learning_rate": 1.0230068174830701e-06, "loss": 0.1295, "step": 1859 }, { "epoch": 0.51, "grad_norm": 2.837003231048584, "learning_rate": 1.0221220871531869e-06, "loss": 0.1376, "step": 1860 }, { "epoch": 0.51, "grad_norm": 2.890450954437256, "learning_rate": 1.0212373394984688e-06, "loss": 0.1461, "step": 1861 }, { "epoch": 0.51, "grad_norm": 2.723402261734009, "learning_rate": 1.0203525752118023e-06, "loss": 0.117, "step": 1862 }, { "epoch": 0.51, "grad_norm": 2.9096994400024414, "learning_rate": 1.0194677949860878e-06, "loss": 0.1375, "step": 1863 }, { "epoch": 0.51, "grad_norm": 2.727287769317627, "learning_rate": 1.0185829995142377e-06, "loss": 0.1172, "step": 1864 }, { "epoch": 0.51, "grad_norm": 2.74131441116333, "learning_rate": 1.0176981894891767e-06, "loss": 0.1274, "step": 1865 }, { "epoch": 0.51, "grad_norm": 2.803450345993042, "learning_rate": 1.0168133656038407e-06, "loss": 0.1277, "step": 1866 }, { "epoch": 0.51, "grad_norm": 2.807425022125244, "learning_rate": 1.0159285285511762e-06, "loss": 0.1303, "step": 1867 }, { "epoch": 0.51, "grad_norm": 2.745000123977661, "learning_rate": 1.0150436790241404e-06, "loss": 0.1299, "step": 1868 }, { "epoch": 0.51, "grad_norm": 2.740635395050049, "learning_rate": 1.0141588177156998e-06, "loss": 0.1241, "step": 1869 }, { "epoch": 0.51, "grad_norm": 2.799743413925171, "learning_rate": 1.0132739453188308e-06, "loss": 0.1213, "step": 1870 }, { "epoch": 0.51, "grad_norm": 2.739351749420166, "learning_rate": 1.0123890625265182e-06, "loss": 0.1205, "step": 1871 }, { "epoch": 0.51, "grad_norm": 3.156585454940796, "learning_rate": 1.0115041700317543e-06, "loss": 0.1447, "step": 1872 }, { "epoch": 0.51, "grad_norm": 2.9910778999328613, "learning_rate": 1.01061926852754e-06, "loss": 0.1357, "step": 1873 }, { "epoch": 0.51, "grad_norm": 2.6979784965515137, "learning_rate": 1.009734358706883e-06, "loss": 0.1157, "step": 1874 }, { "epoch": 0.51, "grad_norm": 2.8156447410583496, "learning_rate": 1.0088494412627967e-06, "loss": 0.1256, "step": 1875 }, { "epoch": 0.51, "grad_norm": 2.799232006072998, "learning_rate": 1.0079645168883018e-06, "loss": 0.1217, "step": 1876 }, { "epoch": 0.51, "grad_norm": 2.747084617614746, "learning_rate": 1.0070795862764232e-06, "loss": 0.1189, "step": 1877 }, { "epoch": 0.51, "grad_norm": 2.956552505493164, "learning_rate": 1.0061946501201913e-06, "loss": 0.1358, "step": 1878 }, { "epoch": 0.51, "grad_norm": 3.008732557296753, "learning_rate": 1.005309709112641e-06, "loss": 0.1396, "step": 1879 }, { "epoch": 0.51, "grad_norm": 2.9576668739318848, "learning_rate": 1.0044247639468105e-06, "loss": 0.1288, "step": 1880 }, { "epoch": 0.51, "grad_norm": 2.854033946990967, "learning_rate": 1.0035398153157416e-06, "loss": 0.1265, "step": 1881 }, { "epoch": 0.51, "grad_norm": 2.9028520584106445, "learning_rate": 1.002654863912479e-06, "loss": 0.1369, "step": 1882 }, { "epoch": 0.51, "grad_norm": 2.773729085922241, "learning_rate": 1.0017699104300685e-06, "loss": 0.1256, "step": 1883 }, { "epoch": 0.51, "grad_norm": 2.6618807315826416, "learning_rate": 1.0008849555615593e-06, "loss": 0.1246, "step": 1884 }, { "epoch": 0.51, "grad_norm": 2.5700972080230713, "learning_rate": 1e-06, "loss": 0.1149, "step": 1885 }, { "epoch": 0.52, "grad_norm": 3.0188512802124023, "learning_rate": 9.991150444384408e-07, "loss": 0.1381, "step": 1886 }, { "epoch": 0.52, "grad_norm": 2.6191985607147217, "learning_rate": 9.982300895699316e-07, "loss": 0.1209, "step": 1887 }, { "epoch": 0.52, "grad_norm": 2.8856725692749023, "learning_rate": 9.973451360875212e-07, "loss": 0.1322, "step": 1888 }, { "epoch": 0.52, "grad_norm": 2.8278348445892334, "learning_rate": 9.964601846842583e-07, "loss": 0.1279, "step": 1889 }, { "epoch": 0.52, "grad_norm": 2.9562864303588867, "learning_rate": 9.955752360531894e-07, "loss": 0.1282, "step": 1890 }, { "epoch": 0.52, "grad_norm": 2.8889904022216797, "learning_rate": 9.94690290887359e-07, "loss": 0.1235, "step": 1891 }, { "epoch": 0.52, "grad_norm": 2.9565634727478027, "learning_rate": 9.938053498798088e-07, "loss": 0.1188, "step": 1892 }, { "epoch": 0.52, "grad_norm": 2.7623958587646484, "learning_rate": 9.929204137235767e-07, "loss": 0.1233, "step": 1893 }, { "epoch": 0.52, "grad_norm": 2.517465353012085, "learning_rate": 9.920354831116983e-07, "loss": 0.1157, "step": 1894 }, { "epoch": 0.52, "grad_norm": 2.7477364540100098, "learning_rate": 9.911505587372032e-07, "loss": 0.1247, "step": 1895 }, { "epoch": 0.52, "grad_norm": 2.8950042724609375, "learning_rate": 9.90265641293117e-07, "loss": 0.1397, "step": 1896 }, { "epoch": 0.52, "grad_norm": 2.5006961822509766, "learning_rate": 9.8938073147246e-07, "loss": 0.1138, "step": 1897 }, { "epoch": 0.52, "grad_norm": 3.071542501449585, "learning_rate": 9.884958299682456e-07, "loss": 0.1351, "step": 1898 }, { "epoch": 0.52, "grad_norm": 2.998857259750366, "learning_rate": 9.87610937473482e-07, "loss": 0.1301, "step": 1899 }, { "epoch": 0.52, "grad_norm": 2.7444169521331787, "learning_rate": 9.867260546811692e-07, "loss": 0.1098, "step": 1900 }, { "epoch": 0.52, "grad_norm": 3.1419379711151123, "learning_rate": 9.858411822842999e-07, "loss": 0.1331, "step": 1901 }, { "epoch": 0.52, "grad_norm": 2.681468963623047, "learning_rate": 9.8495632097586e-07, "loss": 0.1203, "step": 1902 }, { "epoch": 0.52, "grad_norm": 2.7569844722747803, "learning_rate": 9.840714714488237e-07, "loss": 0.1123, "step": 1903 }, { "epoch": 0.52, "grad_norm": 2.6136653423309326, "learning_rate": 9.831866343961594e-07, "loss": 0.1208, "step": 1904 }, { "epoch": 0.52, "grad_norm": 2.7803595066070557, "learning_rate": 9.823018105108232e-07, "loss": 0.1232, "step": 1905 }, { "epoch": 0.52, "grad_norm": 3.172673463821411, "learning_rate": 9.81417000485762e-07, "loss": 0.1377, "step": 1906 }, { "epoch": 0.52, "grad_norm": 2.8042352199554443, "learning_rate": 9.805322050139125e-07, "loss": 0.1241, "step": 1907 }, { "epoch": 0.52, "grad_norm": 2.8047232627868652, "learning_rate": 9.796474247881978e-07, "loss": 0.1243, "step": 1908 }, { "epoch": 0.52, "grad_norm": 3.208739757537842, "learning_rate": 9.787626605015315e-07, "loss": 0.1397, "step": 1909 }, { "epoch": 0.52, "grad_norm": 2.6388320922851562, "learning_rate": 9.778779128468133e-07, "loss": 0.1206, "step": 1910 }, { "epoch": 0.52, "grad_norm": 2.615647077560425, "learning_rate": 9.769931825169296e-07, "loss": 0.1149, "step": 1911 }, { "epoch": 0.52, "grad_norm": 2.6340813636779785, "learning_rate": 9.761084702047555e-07, "loss": 0.115, "step": 1912 }, { "epoch": 0.52, "grad_norm": 2.719238758087158, "learning_rate": 9.752237766031485e-07, "loss": 0.116, "step": 1913 }, { "epoch": 0.52, "grad_norm": 2.8169920444488525, "learning_rate": 9.743391024049545e-07, "loss": 0.1344, "step": 1914 }, { "epoch": 0.52, "grad_norm": 2.631563425064087, "learning_rate": 9.734544483030025e-07, "loss": 0.117, "step": 1915 }, { "epoch": 0.52, "grad_norm": 2.8048999309539795, "learning_rate": 9.725698149901061e-07, "loss": 0.1291, "step": 1916 }, { "epoch": 0.52, "grad_norm": 2.9323315620422363, "learning_rate": 9.716852031590638e-07, "loss": 0.1283, "step": 1917 }, { "epoch": 0.52, "grad_norm": 2.747880697250366, "learning_rate": 9.708006135026546e-07, "loss": 0.1323, "step": 1918 }, { "epoch": 0.52, "grad_norm": 2.6227073669433594, "learning_rate": 9.699160467136433e-07, "loss": 0.1271, "step": 1919 }, { "epoch": 0.52, "grad_norm": 2.853242874145508, "learning_rate": 9.690315034847747e-07, "loss": 0.1407, "step": 1920 }, { "epoch": 0.52, "grad_norm": 2.7962889671325684, "learning_rate": 9.681469845087755e-07, "loss": 0.1094, "step": 1921 }, { "epoch": 0.53, "grad_norm": 2.5797970294952393, "learning_rate": 9.672624904783542e-07, "loss": 0.1097, "step": 1922 }, { "epoch": 0.53, "grad_norm": 2.655869722366333, "learning_rate": 9.663780220861986e-07, "loss": 0.1193, "step": 1923 }, { "epoch": 0.53, "grad_norm": 2.7732772827148438, "learning_rate": 9.654935800249772e-07, "loss": 0.1229, "step": 1924 }, { "epoch": 0.53, "grad_norm": 2.618173837661743, "learning_rate": 9.646091649873383e-07, "loss": 0.1222, "step": 1925 }, { "epoch": 0.53, "grad_norm": 2.573154926300049, "learning_rate": 9.637247776659074e-07, "loss": 0.1137, "step": 1926 }, { "epoch": 0.53, "grad_norm": 2.6391992568969727, "learning_rate": 9.628404187532901e-07, "loss": 0.1226, "step": 1927 }, { "epoch": 0.53, "grad_norm": 2.5460689067840576, "learning_rate": 9.619560889420688e-07, "loss": 0.1076, "step": 1928 }, { "epoch": 0.53, "grad_norm": 2.684030532836914, "learning_rate": 9.610717889248032e-07, "loss": 0.1167, "step": 1929 }, { "epoch": 0.53, "grad_norm": 2.813068151473999, "learning_rate": 9.6018751939403e-07, "loss": 0.1198, "step": 1930 }, { "epoch": 0.53, "grad_norm": 2.90629243850708, "learning_rate": 9.593032810422612e-07, "loss": 0.1298, "step": 1931 }, { "epoch": 0.53, "grad_norm": 2.8330368995666504, "learning_rate": 9.58419074561986e-07, "loss": 0.1224, "step": 1932 }, { "epoch": 0.53, "grad_norm": 2.701951742172241, "learning_rate": 9.575349006456664e-07, "loss": 0.113, "step": 1933 }, { "epoch": 0.53, "grad_norm": 2.9516077041625977, "learning_rate": 9.56650759985741e-07, "loss": 0.1378, "step": 1934 }, { "epoch": 0.53, "grad_norm": 2.7260758876800537, "learning_rate": 9.557666532746213e-07, "loss": 0.1233, "step": 1935 }, { "epoch": 0.53, "grad_norm": 2.8595633506774902, "learning_rate": 9.548825812046918e-07, "loss": 0.1265, "step": 1936 }, { "epoch": 0.53, "grad_norm": 2.5643112659454346, "learning_rate": 9.539985444683113e-07, "loss": 0.1107, "step": 1937 }, { "epoch": 0.53, "grad_norm": 3.2145724296569824, "learning_rate": 9.531145437578094e-07, "loss": 0.132, "step": 1938 }, { "epoch": 0.53, "grad_norm": 2.6504106521606445, "learning_rate": 9.522305797654886e-07, "loss": 0.1364, "step": 1939 }, { "epoch": 0.53, "grad_norm": 2.5980429649353027, "learning_rate": 9.513466531836221e-07, "loss": 0.1153, "step": 1940 }, { "epoch": 0.53, "grad_norm": 2.897460460662842, "learning_rate": 9.504627647044534e-07, "loss": 0.1324, "step": 1941 }, { "epoch": 0.53, "grad_norm": 2.8686153888702393, "learning_rate": 9.495789150201977e-07, "loss": 0.1301, "step": 1942 }, { "epoch": 0.53, "grad_norm": 3.08678936958313, "learning_rate": 9.486951048230377e-07, "loss": 0.1349, "step": 1943 }, { "epoch": 0.53, "grad_norm": 2.923462390899658, "learning_rate": 9.478113348051268e-07, "loss": 0.1258, "step": 1944 }, { "epoch": 0.53, "grad_norm": 2.8492536544799805, "learning_rate": 9.469276056585867e-07, "loss": 0.1335, "step": 1945 }, { "epoch": 0.53, "grad_norm": 2.766394853591919, "learning_rate": 9.46043918075506e-07, "loss": 0.1202, "step": 1946 }, { "epoch": 0.53, "grad_norm": 2.8734710216522217, "learning_rate": 9.451602727479424e-07, "loss": 0.1261, "step": 1947 }, { "epoch": 0.53, "grad_norm": 3.0753471851348877, "learning_rate": 9.44276670367919e-07, "loss": 0.1428, "step": 1948 }, { "epoch": 0.53, "grad_norm": 2.7942495346069336, "learning_rate": 9.433931116274258e-07, "loss": 0.1217, "step": 1949 }, { "epoch": 0.53, "grad_norm": 2.8738746643066406, "learning_rate": 9.425095972184198e-07, "loss": 0.1352, "step": 1950 }, { "epoch": 0.53, "grad_norm": 2.901026725769043, "learning_rate": 9.416261278328209e-07, "loss": 0.1225, "step": 1951 }, { "epoch": 0.53, "grad_norm": 2.6596286296844482, "learning_rate": 9.40742704162516e-07, "loss": 0.1211, "step": 1952 }, { "epoch": 0.53, "grad_norm": 2.844707727432251, "learning_rate": 9.398593268993546e-07, "loss": 0.1428, "step": 1953 }, { "epoch": 0.53, "grad_norm": 2.7951433658599854, "learning_rate": 9.389759967351507e-07, "loss": 0.1151, "step": 1954 }, { "epoch": 0.53, "grad_norm": 2.6156182289123535, "learning_rate": 9.380927143616819e-07, "loss": 0.1171, "step": 1955 }, { "epoch": 0.53, "grad_norm": 2.552344799041748, "learning_rate": 9.372094804706866e-07, "loss": 0.1173, "step": 1956 }, { "epoch": 0.53, "grad_norm": 2.7790207862854004, "learning_rate": 9.363262957538671e-07, "loss": 0.12, "step": 1957 }, { "epoch": 0.53, "grad_norm": 2.931581497192383, "learning_rate": 9.354431609028861e-07, "loss": 0.136, "step": 1958 }, { "epoch": 0.54, "grad_norm": 2.9483675956726074, "learning_rate": 9.345600766093674e-07, "loss": 0.1281, "step": 1959 }, { "epoch": 0.54, "grad_norm": 2.7541463375091553, "learning_rate": 9.336770435648963e-07, "loss": 0.1225, "step": 1960 }, { "epoch": 0.54, "grad_norm": 2.890131950378418, "learning_rate": 9.327940624610155e-07, "loss": 0.1273, "step": 1961 }, { "epoch": 0.54, "grad_norm": 2.757310390472412, "learning_rate": 9.319111339892302e-07, "loss": 0.1241, "step": 1962 }, { "epoch": 0.54, "grad_norm": 2.8916938304901123, "learning_rate": 9.310282588410014e-07, "loss": 0.125, "step": 1963 }, { "epoch": 0.54, "grad_norm": 2.793872356414795, "learning_rate": 9.301454377077502e-07, "loss": 0.1245, "step": 1964 }, { "epoch": 0.54, "grad_norm": 2.905181407928467, "learning_rate": 9.292626712808555e-07, "loss": 0.1256, "step": 1965 }, { "epoch": 0.54, "grad_norm": 2.534851312637329, "learning_rate": 9.283799602516516e-07, "loss": 0.1066, "step": 1966 }, { "epoch": 0.54, "grad_norm": 2.8749680519104004, "learning_rate": 9.274973053114314e-07, "loss": 0.1314, "step": 1967 }, { "epoch": 0.54, "grad_norm": 2.922060966491699, "learning_rate": 9.266147071514426e-07, "loss": 0.1357, "step": 1968 }, { "epoch": 0.54, "grad_norm": 2.6175835132598877, "learning_rate": 9.257321664628888e-07, "loss": 0.1164, "step": 1969 }, { "epoch": 0.54, "grad_norm": 2.780647039413452, "learning_rate": 9.248496839369292e-07, "loss": 0.1241, "step": 1970 }, { "epoch": 0.54, "grad_norm": 3.3802411556243896, "learning_rate": 9.239672602646764e-07, "loss": 0.1132, "step": 1971 }, { "epoch": 0.54, "grad_norm": 2.62099289894104, "learning_rate": 9.230848961371978e-07, "loss": 0.1172, "step": 1972 }, { "epoch": 0.54, "grad_norm": 2.7328224182128906, "learning_rate": 9.222025922455133e-07, "loss": 0.1175, "step": 1973 }, { "epoch": 0.54, "grad_norm": 2.7471697330474854, "learning_rate": 9.213203492805959e-07, "loss": 0.1111, "step": 1974 }, { "epoch": 0.54, "grad_norm": 2.845108985900879, "learning_rate": 9.204381679333722e-07, "loss": 0.1194, "step": 1975 }, { "epoch": 0.54, "grad_norm": 2.360717296600342, "learning_rate": 9.195560488947184e-07, "loss": 0.1028, "step": 1976 }, { "epoch": 0.54, "grad_norm": 2.8350260257720947, "learning_rate": 9.186739928554634e-07, "loss": 0.1274, "step": 1977 }, { "epoch": 0.54, "grad_norm": 2.673199415206909, "learning_rate": 9.177920005063864e-07, "loss": 0.1183, "step": 1978 }, { "epoch": 0.54, "grad_norm": 2.875682830810547, "learning_rate": 9.169100725382159e-07, "loss": 0.1223, "step": 1979 }, { "epoch": 0.54, "grad_norm": 2.9900684356689453, "learning_rate": 9.160282096416316e-07, "loss": 0.131, "step": 1980 }, { "epoch": 0.54, "grad_norm": 2.7693305015563965, "learning_rate": 9.15146412507261e-07, "loss": 0.125, "step": 1981 }, { "epoch": 0.54, "grad_norm": 3.0410921573638916, "learning_rate": 9.142646818256802e-07, "loss": 0.1342, "step": 1982 }, { "epoch": 0.54, "grad_norm": 2.9950759410858154, "learning_rate": 9.13383018287414e-07, "loss": 0.1199, "step": 1983 }, { "epoch": 0.54, "grad_norm": 2.8740227222442627, "learning_rate": 9.125014225829333e-07, "loss": 0.1308, "step": 1984 }, { "epoch": 0.54, "grad_norm": 2.735966444015503, "learning_rate": 9.116198954026576e-07, "loss": 0.1261, "step": 1985 }, { "epoch": 0.54, "grad_norm": 2.5520291328430176, "learning_rate": 9.107384374369513e-07, "loss": 0.1076, "step": 1986 }, { "epoch": 0.54, "grad_norm": 2.961522340774536, "learning_rate": 9.098570493761251e-07, "loss": 0.1396, "step": 1987 }, { "epoch": 0.54, "grad_norm": 2.695842981338501, "learning_rate": 9.089757319104354e-07, "loss": 0.1164, "step": 1988 }, { "epoch": 0.54, "grad_norm": 2.570087194442749, "learning_rate": 9.080944857300822e-07, "loss": 0.1154, "step": 1989 }, { "epoch": 0.54, "grad_norm": 2.604621171951294, "learning_rate": 9.072133115252112e-07, "loss": 0.1189, "step": 1990 }, { "epoch": 0.54, "grad_norm": 2.649380683898926, "learning_rate": 9.063322099859102e-07, "loss": 0.1366, "step": 1991 }, { "epoch": 0.54, "grad_norm": 2.7982804775238037, "learning_rate": 9.05451181802211e-07, "loss": 0.1205, "step": 1992 }, { "epoch": 0.54, "grad_norm": 2.7476391792297363, "learning_rate": 9.045702276640882e-07, "loss": 0.1283, "step": 1993 }, { "epoch": 0.54, "grad_norm": 2.9363062381744385, "learning_rate": 9.03689348261457e-07, "loss": 0.1309, "step": 1994 }, { "epoch": 0.54, "grad_norm": 2.87986421585083, "learning_rate": 9.028085442841759e-07, "loss": 0.1281, "step": 1995 }, { "epoch": 0.55, "grad_norm": 2.5653114318847656, "learning_rate": 9.019278164220428e-07, "loss": 0.1143, "step": 1996 }, { "epoch": 0.55, "grad_norm": 2.7345714569091797, "learning_rate": 9.01047165364797e-07, "loss": 0.1243, "step": 1997 }, { "epoch": 0.55, "grad_norm": 3.0675840377807617, "learning_rate": 9.001665918021178e-07, "loss": 0.1216, "step": 1998 }, { "epoch": 0.55, "grad_norm": 2.9636502265930176, "learning_rate": 8.99286096423622e-07, "loss": 0.126, "step": 1999 }, { "epoch": 0.55, "grad_norm": 2.9458322525024414, "learning_rate": 8.984056799188676e-07, "loss": 0.131, "step": 2000 }, { "epoch": 0.55, "grad_norm": 2.8930435180664062, "learning_rate": 8.975253429773492e-07, "loss": 0.1255, "step": 2001 }, { "epoch": 0.55, "grad_norm": 2.7570371627807617, "learning_rate": 8.966450862884994e-07, "loss": 0.144, "step": 2002 }, { "epoch": 0.55, "grad_norm": 2.710038661956787, "learning_rate": 8.957649105416893e-07, "loss": 0.1319, "step": 2003 }, { "epoch": 0.55, "grad_norm": 2.9276740550994873, "learning_rate": 8.948848164262238e-07, "loss": 0.1369, "step": 2004 }, { "epoch": 0.55, "grad_norm": 2.948108673095703, "learning_rate": 8.940048046313469e-07, "loss": 0.1268, "step": 2005 }, { "epoch": 0.55, "grad_norm": 2.8122799396514893, "learning_rate": 8.931248758462358e-07, "loss": 0.1228, "step": 2006 }, { "epoch": 0.55, "grad_norm": 2.95597505569458, "learning_rate": 8.922450307600039e-07, "loss": 0.1213, "step": 2007 }, { "epoch": 0.55, "grad_norm": 2.8452467918395996, "learning_rate": 8.913652700616996e-07, "loss": 0.1326, "step": 2008 }, { "epoch": 0.55, "grad_norm": 2.68770694732666, "learning_rate": 8.904855944403031e-07, "loss": 0.1266, "step": 2009 }, { "epoch": 0.55, "grad_norm": 2.7697670459747314, "learning_rate": 8.896060045847303e-07, "loss": 0.1131, "step": 2010 }, { "epoch": 0.55, "grad_norm": 2.6158204078674316, "learning_rate": 8.887265011838284e-07, "loss": 0.1165, "step": 2011 }, { "epoch": 0.55, "grad_norm": 3.025125503540039, "learning_rate": 8.878470849263774e-07, "loss": 0.1365, "step": 2012 }, { "epoch": 0.55, "grad_norm": 2.772484540939331, "learning_rate": 8.869677565010898e-07, "loss": 0.1322, "step": 2013 }, { "epoch": 0.55, "grad_norm": 2.7702977657318115, "learning_rate": 8.860885165966074e-07, "loss": 0.1204, "step": 2014 }, { "epoch": 0.55, "grad_norm": 2.765949249267578, "learning_rate": 8.852093659015049e-07, "loss": 0.1244, "step": 2015 }, { "epoch": 0.55, "grad_norm": 2.8452184200286865, "learning_rate": 8.843303051042853e-07, "loss": 0.1321, "step": 2016 }, { "epoch": 0.55, "grad_norm": 2.527989387512207, "learning_rate": 8.834513348933822e-07, "loss": 0.1148, "step": 2017 }, { "epoch": 0.55, "grad_norm": 2.9338791370391846, "learning_rate": 8.825724559571586e-07, "loss": 0.1422, "step": 2018 }, { "epoch": 0.55, "grad_norm": 2.7240614891052246, "learning_rate": 8.816936689839048e-07, "loss": 0.1262, "step": 2019 }, { "epoch": 0.55, "grad_norm": 2.6370601654052734, "learning_rate": 8.808149746618402e-07, "loss": 0.1147, "step": 2020 }, { "epoch": 0.55, "grad_norm": 2.624903440475464, "learning_rate": 8.799363736791106e-07, "loss": 0.1099, "step": 2021 }, { "epoch": 0.55, "grad_norm": 2.865074872970581, "learning_rate": 8.790578667237897e-07, "loss": 0.1356, "step": 2022 }, { "epoch": 0.55, "grad_norm": 2.718155860900879, "learning_rate": 8.781794544838774e-07, "loss": 0.1106, "step": 2023 }, { "epoch": 0.55, "grad_norm": 2.8180630207061768, "learning_rate": 8.773011376472986e-07, "loss": 0.1345, "step": 2024 }, { "epoch": 0.55, "grad_norm": 2.8360700607299805, "learning_rate": 8.764229169019046e-07, "loss": 0.1225, "step": 2025 }, { "epoch": 0.55, "grad_norm": 2.8616783618927, "learning_rate": 8.755447929354704e-07, "loss": 0.126, "step": 2026 }, { "epoch": 0.55, "grad_norm": 2.901785373687744, "learning_rate": 8.746667664356955e-07, "loss": 0.1365, "step": 2027 }, { "epoch": 0.55, "grad_norm": 2.877326011657715, "learning_rate": 8.737888380902044e-07, "loss": 0.1327, "step": 2028 }, { "epoch": 0.55, "grad_norm": 2.972341537475586, "learning_rate": 8.729110085865426e-07, "loss": 0.1308, "step": 2029 }, { "epoch": 0.55, "grad_norm": 2.934622049331665, "learning_rate": 8.720332786121798e-07, "loss": 0.1237, "step": 2030 }, { "epoch": 0.55, "grad_norm": 2.8102076053619385, "learning_rate": 8.711556488545067e-07, "loss": 0.133, "step": 2031 }, { "epoch": 0.56, "grad_norm": 2.740907907485962, "learning_rate": 8.702781200008358e-07, "loss": 0.1228, "step": 2032 }, { "epoch": 0.56, "grad_norm": 2.946277379989624, "learning_rate": 8.694006927384016e-07, "loss": 0.1217, "step": 2033 }, { "epoch": 0.56, "grad_norm": 2.9555811882019043, "learning_rate": 8.685233677543575e-07, "loss": 0.1225, "step": 2034 }, { "epoch": 0.56, "grad_norm": 2.7638421058654785, "learning_rate": 8.676461457357776e-07, "loss": 0.124, "step": 2035 }, { "epoch": 0.56, "grad_norm": 2.890644073486328, "learning_rate": 8.667690273696555e-07, "loss": 0.1232, "step": 2036 }, { "epoch": 0.56, "grad_norm": 2.742115020751953, "learning_rate": 8.658920133429028e-07, "loss": 0.1109, "step": 2037 }, { "epoch": 0.56, "grad_norm": 2.385840892791748, "learning_rate": 8.650151043423509e-07, "loss": 0.1084, "step": 2038 }, { "epoch": 0.56, "grad_norm": 2.701353073120117, "learning_rate": 8.641383010547473e-07, "loss": 0.1258, "step": 2039 }, { "epoch": 0.56, "grad_norm": 2.8525030612945557, "learning_rate": 8.632616041667575e-07, "loss": 0.1242, "step": 2040 }, { "epoch": 0.56, "grad_norm": 2.592622756958008, "learning_rate": 8.62385014364964e-07, "loss": 0.1158, "step": 2041 }, { "epoch": 0.56, "grad_norm": 2.4827704429626465, "learning_rate": 8.615085323358643e-07, "loss": 0.1169, "step": 2042 }, { "epoch": 0.56, "grad_norm": 3.138923406600952, "learning_rate": 8.60632158765873e-07, "loss": 0.1586, "step": 2043 }, { "epoch": 0.56, "grad_norm": 3.0605967044830322, "learning_rate": 8.597558943413186e-07, "loss": 0.1314, "step": 2044 }, { "epoch": 0.56, "grad_norm": 2.625797986984253, "learning_rate": 8.588797397484444e-07, "loss": 0.1317, "step": 2045 }, { "epoch": 0.56, "grad_norm": 3.0117976665496826, "learning_rate": 8.580036956734085e-07, "loss": 0.1449, "step": 2046 }, { "epoch": 0.56, "grad_norm": 2.7856662273406982, "learning_rate": 8.571277628022806e-07, "loss": 0.1233, "step": 2047 }, { "epoch": 0.56, "grad_norm": 2.7608420848846436, "learning_rate": 8.562519418210457e-07, "loss": 0.1159, "step": 2048 }, { "epoch": 0.56, "grad_norm": 2.6647586822509766, "learning_rate": 8.553762334155989e-07, "loss": 0.1081, "step": 2049 }, { "epoch": 0.56, "grad_norm": 2.610696315765381, "learning_rate": 8.545006382717485e-07, "loss": 0.1272, "step": 2050 }, { "epoch": 0.56, "grad_norm": 2.8872148990631104, "learning_rate": 8.536251570752147e-07, "loss": 0.1274, "step": 2051 }, { "epoch": 0.56, "grad_norm": 2.914360523223877, "learning_rate": 8.527497905116259e-07, "loss": 0.122, "step": 2052 }, { "epoch": 0.56, "grad_norm": 2.5790927410125732, "learning_rate": 8.518745392665236e-07, "loss": 0.1245, "step": 2053 }, { "epoch": 0.56, "grad_norm": 2.682628870010376, "learning_rate": 8.509994040253571e-07, "loss": 0.1254, "step": 2054 }, { "epoch": 0.56, "grad_norm": 2.5411360263824463, "learning_rate": 8.501243854734856e-07, "loss": 0.1102, "step": 2055 }, { "epoch": 0.56, "grad_norm": 2.8040127754211426, "learning_rate": 8.492494842961775e-07, "loss": 0.1316, "step": 2056 }, { "epoch": 0.56, "grad_norm": 2.9163131713867188, "learning_rate": 8.483747011786074e-07, "loss": 0.1303, "step": 2057 }, { "epoch": 0.56, "grad_norm": 3.061216354370117, "learning_rate": 8.475000368058598e-07, "loss": 0.1313, "step": 2058 }, { "epoch": 0.56, "grad_norm": 2.576927900314331, "learning_rate": 8.466254918629242e-07, "loss": 0.1247, "step": 2059 }, { "epoch": 0.56, "grad_norm": 2.627513885498047, "learning_rate": 8.457510670346974e-07, "loss": 0.1237, "step": 2060 }, { "epoch": 0.56, "grad_norm": 2.779042959213257, "learning_rate": 8.448767630059833e-07, "loss": 0.1216, "step": 2061 }, { "epoch": 0.56, "grad_norm": 2.6417055130004883, "learning_rate": 8.440025804614886e-07, "loss": 0.1263, "step": 2062 }, { "epoch": 0.56, "grad_norm": 3.055684804916382, "learning_rate": 8.431285200858271e-07, "loss": 0.1311, "step": 2063 }, { "epoch": 0.56, "grad_norm": 2.9278390407562256, "learning_rate": 8.422545825635159e-07, "loss": 0.1287, "step": 2064 }, { "epoch": 0.56, "grad_norm": 2.593324899673462, "learning_rate": 8.413807685789759e-07, "loss": 0.1071, "step": 2065 }, { "epoch": 0.56, "grad_norm": 2.6678123474121094, "learning_rate": 8.405070788165321e-07, "loss": 0.1282, "step": 2066 }, { "epoch": 0.56, "grad_norm": 3.019789695739746, "learning_rate": 8.396335139604111e-07, "loss": 0.1273, "step": 2067 }, { "epoch": 0.56, "grad_norm": 2.9056313037872314, "learning_rate": 8.387600746947423e-07, "loss": 0.1299, "step": 2068 }, { "epoch": 0.57, "grad_norm": 2.581608533859253, "learning_rate": 8.378867617035564e-07, "loss": 0.1216, "step": 2069 }, { "epoch": 0.57, "grad_norm": 2.6597416400909424, "learning_rate": 8.370135756707852e-07, "loss": 0.1235, "step": 2070 }, { "epoch": 0.57, "grad_norm": 2.624035120010376, "learning_rate": 8.361405172802623e-07, "loss": 0.1148, "step": 2071 }, { "epoch": 0.57, "grad_norm": 2.6497409343719482, "learning_rate": 8.352675872157192e-07, "loss": 0.1169, "step": 2072 }, { "epoch": 0.57, "grad_norm": 2.865757942199707, "learning_rate": 8.343947861607888e-07, "loss": 0.1304, "step": 2073 }, { "epoch": 0.57, "grad_norm": 2.6306159496307373, "learning_rate": 8.335221147990017e-07, "loss": 0.1155, "step": 2074 }, { "epoch": 0.57, "grad_norm": 2.8795928955078125, "learning_rate": 8.326495738137875e-07, "loss": 0.1188, "step": 2075 }, { "epoch": 0.57, "grad_norm": 2.7957141399383545, "learning_rate": 8.31777163888474e-07, "loss": 0.1234, "step": 2076 }, { "epoch": 0.57, "grad_norm": 2.5149428844451904, "learning_rate": 8.309048857062855e-07, "loss": 0.1081, "step": 2077 }, { "epoch": 0.57, "grad_norm": 2.843853235244751, "learning_rate": 8.300327399503439e-07, "loss": 0.1368, "step": 2078 }, { "epoch": 0.57, "grad_norm": 2.855299234390259, "learning_rate": 8.291607273036669e-07, "loss": 0.1424, "step": 2079 }, { "epoch": 0.57, "grad_norm": 2.8537607192993164, "learning_rate": 8.282888484491681e-07, "loss": 0.1281, "step": 2080 }, { "epoch": 0.57, "grad_norm": 2.6667559146881104, "learning_rate": 8.274171040696569e-07, "loss": 0.1174, "step": 2081 }, { "epoch": 0.57, "grad_norm": 2.697756767272949, "learning_rate": 8.265454948478363e-07, "loss": 0.1188, "step": 2082 }, { "epoch": 0.57, "grad_norm": 2.676966667175293, "learning_rate": 8.256740214663042e-07, "loss": 0.1333, "step": 2083 }, { "epoch": 0.57, "grad_norm": 2.9605026245117188, "learning_rate": 8.24802684607552e-07, "loss": 0.1243, "step": 2084 }, { "epoch": 0.57, "grad_norm": 2.8657665252685547, "learning_rate": 8.239314849539637e-07, "loss": 0.1269, "step": 2085 }, { "epoch": 0.57, "grad_norm": 2.6637632846832275, "learning_rate": 8.23060423187817e-07, "loss": 0.1254, "step": 2086 }, { "epoch": 0.57, "grad_norm": 2.775421619415283, "learning_rate": 8.221894999912802e-07, "loss": 0.121, "step": 2087 }, { "epoch": 0.57, "grad_norm": 2.9669418334960938, "learning_rate": 8.213187160464143e-07, "loss": 0.1401, "step": 2088 }, { "epoch": 0.57, "grad_norm": 2.907231569290161, "learning_rate": 8.204480720351702e-07, "loss": 0.1314, "step": 2089 }, { "epoch": 0.57, "grad_norm": 2.8540337085723877, "learning_rate": 8.195775686393896e-07, "loss": 0.126, "step": 2090 }, { "epoch": 0.57, "grad_norm": 2.768364191055298, "learning_rate": 8.18707206540805e-07, "loss": 0.1324, "step": 2091 }, { "epoch": 0.57, "grad_norm": 2.9182331562042236, "learning_rate": 8.178369864210368e-07, "loss": 0.1208, "step": 2092 }, { "epoch": 0.57, "grad_norm": 2.9153683185577393, "learning_rate": 8.169669089615947e-07, "loss": 0.1245, "step": 2093 }, { "epoch": 0.57, "grad_norm": 2.692770004272461, "learning_rate": 8.160969748438777e-07, "loss": 0.1173, "step": 2094 }, { "epoch": 0.57, "grad_norm": 2.8993098735809326, "learning_rate": 8.152271847491705e-07, "loss": 0.1298, "step": 2095 }, { "epoch": 0.57, "grad_norm": 2.8526406288146973, "learning_rate": 8.143575393586471e-07, "loss": 0.1374, "step": 2096 }, { "epoch": 0.57, "grad_norm": 2.7569406032562256, "learning_rate": 8.134880393533667e-07, "loss": 0.1228, "step": 2097 }, { "epoch": 0.57, "grad_norm": 2.7927284240722656, "learning_rate": 8.126186854142751e-07, "loss": 0.1245, "step": 2098 }, { "epoch": 0.57, "grad_norm": 2.929137706756592, "learning_rate": 8.117494782222047e-07, "loss": 0.1227, "step": 2099 }, { "epoch": 0.57, "grad_norm": 2.835092544555664, "learning_rate": 8.108804184578708e-07, "loss": 0.1233, "step": 2100 }, { "epoch": 0.57, "grad_norm": 3.0549395084381104, "learning_rate": 8.100115068018756e-07, "loss": 0.1423, "step": 2101 }, { "epoch": 0.57, "grad_norm": 2.9805521965026855, "learning_rate": 8.091427439347033e-07, "loss": 0.1338, "step": 2102 }, { "epoch": 0.57, "grad_norm": 2.843991756439209, "learning_rate": 8.082741305367229e-07, "loss": 0.1348, "step": 2103 }, { "epoch": 0.57, "grad_norm": 2.965733051300049, "learning_rate": 8.074056672881867e-07, "loss": 0.1262, "step": 2104 }, { "epoch": 0.58, "grad_norm": 2.747220754623413, "learning_rate": 8.065373548692271e-07, "loss": 0.1179, "step": 2105 }, { "epoch": 0.58, "grad_norm": 2.8165981769561768, "learning_rate": 8.056691939598615e-07, "loss": 0.1217, "step": 2106 }, { "epoch": 0.58, "grad_norm": 2.787397623062134, "learning_rate": 8.048011852399859e-07, "loss": 0.1298, "step": 2107 }, { "epoch": 0.58, "grad_norm": 2.7715790271759033, "learning_rate": 8.039333293893785e-07, "loss": 0.1173, "step": 2108 }, { "epoch": 0.58, "grad_norm": 2.5719151496887207, "learning_rate": 8.030656270876985e-07, "loss": 0.114, "step": 2109 }, { "epoch": 0.58, "grad_norm": 2.8761417865753174, "learning_rate": 8.021980790144826e-07, "loss": 0.1332, "step": 2110 }, { "epoch": 0.58, "grad_norm": 2.492717742919922, "learning_rate": 8.013306858491492e-07, "loss": 0.1121, "step": 2111 }, { "epoch": 0.58, "grad_norm": 2.8049910068511963, "learning_rate": 8.004634482709933e-07, "loss": 0.1208, "step": 2112 }, { "epoch": 0.58, "grad_norm": 2.7873449325561523, "learning_rate": 7.995963669591893e-07, "loss": 0.125, "step": 2113 }, { "epoch": 0.58, "grad_norm": 2.719320774078369, "learning_rate": 7.987294425927893e-07, "loss": 0.1283, "step": 2114 }, { "epoch": 0.58, "grad_norm": 3.1067440509796143, "learning_rate": 7.978626758507216e-07, "loss": 0.1252, "step": 2115 }, { "epoch": 0.58, "grad_norm": 2.72916316986084, "learning_rate": 7.969960674117918e-07, "loss": 0.114, "step": 2116 }, { "epoch": 0.58, "grad_norm": 2.606856107711792, "learning_rate": 7.96129617954681e-07, "loss": 0.114, "step": 2117 }, { "epoch": 0.58, "grad_norm": 2.792314052581787, "learning_rate": 7.952633281579459e-07, "loss": 0.134, "step": 2118 }, { "epoch": 0.58, "grad_norm": 2.6764605045318604, "learning_rate": 7.943971987000191e-07, "loss": 0.1224, "step": 2119 }, { "epoch": 0.58, "grad_norm": 2.646055221557617, "learning_rate": 7.935312302592062e-07, "loss": 0.1042, "step": 2120 }, { "epoch": 0.58, "grad_norm": 2.9272406101226807, "learning_rate": 7.926654235136878e-07, "loss": 0.1242, "step": 2121 }, { "epoch": 0.58, "grad_norm": 2.86539363861084, "learning_rate": 7.917997791415168e-07, "loss": 0.1298, "step": 2122 }, { "epoch": 0.58, "grad_norm": 2.893162727355957, "learning_rate": 7.909342978206197e-07, "loss": 0.1345, "step": 2123 }, { "epoch": 0.58, "grad_norm": 3.0884931087493896, "learning_rate": 7.900689802287959e-07, "loss": 0.1189, "step": 2124 }, { "epoch": 0.58, "grad_norm": 2.792923927307129, "learning_rate": 7.892038270437152e-07, "loss": 0.1192, "step": 2125 }, { "epoch": 0.58, "grad_norm": 2.8450675010681152, "learning_rate": 7.883388389429193e-07, "loss": 0.1304, "step": 2126 }, { "epoch": 0.58, "grad_norm": 2.8471620082855225, "learning_rate": 7.874740166038207e-07, "loss": 0.1287, "step": 2127 }, { "epoch": 0.58, "grad_norm": 2.7425572872161865, "learning_rate": 7.866093607037017e-07, "loss": 0.1115, "step": 2128 }, { "epoch": 0.58, "grad_norm": 2.7999820709228516, "learning_rate": 7.857448719197154e-07, "loss": 0.1102, "step": 2129 }, { "epoch": 0.58, "grad_norm": 2.686962842941284, "learning_rate": 7.848805509288824e-07, "loss": 0.1202, "step": 2130 }, { "epoch": 0.58, "grad_norm": 2.8658480644226074, "learning_rate": 7.84016398408093e-07, "loss": 0.1233, "step": 2131 }, { "epoch": 0.58, "grad_norm": 2.7625598907470703, "learning_rate": 7.831524150341049e-07, "loss": 0.1239, "step": 2132 }, { "epoch": 0.58, "grad_norm": 2.915579080581665, "learning_rate": 7.822886014835435e-07, "loss": 0.1378, "step": 2133 }, { "epoch": 0.58, "grad_norm": 2.6005876064300537, "learning_rate": 7.814249584329022e-07, "loss": 0.1212, "step": 2134 }, { "epoch": 0.58, "grad_norm": 2.9378654956817627, "learning_rate": 7.805614865585395e-07, "loss": 0.1225, "step": 2135 }, { "epoch": 0.58, "grad_norm": 2.862884759902954, "learning_rate": 7.796981865366804e-07, "loss": 0.1266, "step": 2136 }, { "epoch": 0.58, "grad_norm": 2.624966859817505, "learning_rate": 7.788350590434152e-07, "loss": 0.1136, "step": 2137 }, { "epoch": 0.58, "grad_norm": 2.667940378189087, "learning_rate": 7.77972104754699e-07, "loss": 0.1224, "step": 2138 }, { "epoch": 0.58, "grad_norm": 2.760246753692627, "learning_rate": 7.77109324346352e-07, "loss": 0.1287, "step": 2139 }, { "epoch": 0.58, "grad_norm": 2.884101152420044, "learning_rate": 7.762467184940573e-07, "loss": 0.1172, "step": 2140 }, { "epoch": 0.58, "grad_norm": 2.6695945262908936, "learning_rate": 7.75384287873362e-07, "loss": 0.1228, "step": 2141 }, { "epoch": 0.59, "grad_norm": 2.7546286582946777, "learning_rate": 7.745220331596749e-07, "loss": 0.1221, "step": 2142 }, { "epoch": 0.59, "grad_norm": 2.5956485271453857, "learning_rate": 7.73659955028268e-07, "loss": 0.1228, "step": 2143 }, { "epoch": 0.59, "grad_norm": 2.7694950103759766, "learning_rate": 7.727980541542757e-07, "loss": 0.1237, "step": 2144 }, { "epoch": 0.59, "grad_norm": 2.599437952041626, "learning_rate": 7.719363312126914e-07, "loss": 0.1144, "step": 2145 }, { "epoch": 0.59, "grad_norm": 2.906019926071167, "learning_rate": 7.710747868783713e-07, "loss": 0.138, "step": 2146 }, { "epoch": 0.59, "grad_norm": 2.8566348552703857, "learning_rate": 7.702134218260301e-07, "loss": 0.1258, "step": 2147 }, { "epoch": 0.59, "grad_norm": 2.5982682704925537, "learning_rate": 7.693522367302429e-07, "loss": 0.113, "step": 2148 }, { "epoch": 0.59, "grad_norm": 2.8539483547210693, "learning_rate": 7.684912322654448e-07, "loss": 0.1258, "step": 2149 }, { "epoch": 0.59, "grad_norm": 3.0186097621917725, "learning_rate": 7.676304091059272e-07, "loss": 0.1422, "step": 2150 }, { "epoch": 0.59, "grad_norm": 2.9395763874053955, "learning_rate": 7.667697679258416e-07, "loss": 0.1194, "step": 2151 }, { "epoch": 0.59, "grad_norm": 2.899362564086914, "learning_rate": 7.659093093991956e-07, "loss": 0.1408, "step": 2152 }, { "epoch": 0.59, "grad_norm": 2.491899013519287, "learning_rate": 7.650490341998541e-07, "loss": 0.1168, "step": 2153 }, { "epoch": 0.59, "grad_norm": 2.636516809463501, "learning_rate": 7.641889430015393e-07, "loss": 0.1135, "step": 2154 }, { "epoch": 0.59, "grad_norm": 2.9226226806640625, "learning_rate": 7.633290364778283e-07, "loss": 0.1339, "step": 2155 }, { "epoch": 0.59, "grad_norm": 2.9808900356292725, "learning_rate": 7.624693153021536e-07, "loss": 0.1295, "step": 2156 }, { "epoch": 0.59, "grad_norm": 2.777230978012085, "learning_rate": 7.616097801478036e-07, "loss": 0.1233, "step": 2157 }, { "epoch": 0.59, "grad_norm": 2.622840404510498, "learning_rate": 7.607504316879191e-07, "loss": 0.1271, "step": 2158 }, { "epoch": 0.59, "grad_norm": 2.8252713680267334, "learning_rate": 7.598912705954972e-07, "loss": 0.1216, "step": 2159 }, { "epoch": 0.59, "grad_norm": 2.814364433288574, "learning_rate": 7.590322975433856e-07, "loss": 0.1247, "step": 2160 }, { "epoch": 0.59, "grad_norm": 2.7726142406463623, "learning_rate": 7.581735132042866e-07, "loss": 0.1151, "step": 2161 }, { "epoch": 0.59, "grad_norm": 2.667328119277954, "learning_rate": 7.573149182507545e-07, "loss": 0.113, "step": 2162 }, { "epoch": 0.59, "grad_norm": 2.812042474746704, "learning_rate": 7.564565133551945e-07, "loss": 0.1367, "step": 2163 }, { "epoch": 0.59, "grad_norm": 2.9198157787323, "learning_rate": 7.555982991898636e-07, "loss": 0.1263, "step": 2164 }, { "epoch": 0.59, "grad_norm": 2.490802526473999, "learning_rate": 7.547402764268689e-07, "loss": 0.1111, "step": 2165 }, { "epoch": 0.59, "grad_norm": 2.663837194442749, "learning_rate": 7.538824457381679e-07, "loss": 0.1175, "step": 2166 }, { "epoch": 0.59, "grad_norm": 3.181468963623047, "learning_rate": 7.530248077955683e-07, "loss": 0.1515, "step": 2167 }, { "epoch": 0.59, "grad_norm": 2.888683795928955, "learning_rate": 7.521673632707259e-07, "loss": 0.1306, "step": 2168 }, { "epoch": 0.59, "grad_norm": 2.684504508972168, "learning_rate": 7.513101128351453e-07, "loss": 0.1224, "step": 2169 }, { "epoch": 0.59, "grad_norm": 2.837757110595703, "learning_rate": 7.504530571601791e-07, "loss": 0.1198, "step": 2170 }, { "epoch": 0.59, "grad_norm": 2.9464797973632812, "learning_rate": 7.495961969170275e-07, "loss": 0.1255, "step": 2171 }, { "epoch": 0.59, "grad_norm": 2.7112324237823486, "learning_rate": 7.487395327767381e-07, "loss": 0.1133, "step": 2172 }, { "epoch": 0.59, "grad_norm": 2.80619215965271, "learning_rate": 7.478830654102036e-07, "loss": 0.1108, "step": 2173 }, { "epoch": 0.59, "grad_norm": 2.8480100631713867, "learning_rate": 7.470267954881642e-07, "loss": 0.1249, "step": 2174 }, { "epoch": 0.59, "grad_norm": 2.9832444190979004, "learning_rate": 7.461707236812041e-07, "loss": 0.1369, "step": 2175 }, { "epoch": 0.59, "grad_norm": 2.8048830032348633, "learning_rate": 7.453148506597529e-07, "loss": 0.1201, "step": 2176 }, { "epoch": 0.59, "grad_norm": 2.626363515853882, "learning_rate": 7.444591770940852e-07, "loss": 0.1115, "step": 2177 }, { "epoch": 0.59, "grad_norm": 2.7009074687957764, "learning_rate": 7.436037036543183e-07, "loss": 0.1205, "step": 2178 }, { "epoch": 0.6, "grad_norm": 2.7532544136047363, "learning_rate": 7.427484310104135e-07, "loss": 0.114, "step": 2179 }, { "epoch": 0.6, "grad_norm": 2.7397122383117676, "learning_rate": 7.41893359832174e-07, "loss": 0.1201, "step": 2180 }, { "epoch": 0.6, "grad_norm": 2.975896120071411, "learning_rate": 7.410384907892461e-07, "loss": 0.1218, "step": 2181 }, { "epoch": 0.6, "grad_norm": 2.816995620727539, "learning_rate": 7.401838245511181e-07, "loss": 0.1258, "step": 2182 }, { "epoch": 0.6, "grad_norm": 2.8698947429656982, "learning_rate": 7.393293617871179e-07, "loss": 0.1315, "step": 2183 }, { "epoch": 0.6, "grad_norm": 2.6808691024780273, "learning_rate": 7.384751031664158e-07, "loss": 0.1219, "step": 2184 }, { "epoch": 0.6, "grad_norm": 2.5810961723327637, "learning_rate": 7.376210493580211e-07, "loss": 0.1111, "step": 2185 }, { "epoch": 0.6, "grad_norm": 2.4069201946258545, "learning_rate": 7.367672010307826e-07, "loss": 0.11, "step": 2186 }, { "epoch": 0.6, "grad_norm": 3.1182568073272705, "learning_rate": 7.359135588533896e-07, "loss": 0.1351, "step": 2187 }, { "epoch": 0.6, "grad_norm": 2.869210958480835, "learning_rate": 7.350601234943683e-07, "loss": 0.1249, "step": 2188 }, { "epoch": 0.6, "grad_norm": 3.0283706188201904, "learning_rate": 7.342068956220842e-07, "loss": 0.1372, "step": 2189 }, { "epoch": 0.6, "grad_norm": 2.760991334915161, "learning_rate": 7.333538759047389e-07, "loss": 0.1298, "step": 2190 }, { "epoch": 0.6, "grad_norm": 2.651310443878174, "learning_rate": 7.32501065010372e-07, "loss": 0.1134, "step": 2191 }, { "epoch": 0.6, "grad_norm": 2.4590115547180176, "learning_rate": 7.316484636068601e-07, "loss": 0.1139, "step": 2192 }, { "epoch": 0.6, "grad_norm": 2.888315200805664, "learning_rate": 7.307960723619142e-07, "loss": 0.1244, "step": 2193 }, { "epoch": 0.6, "grad_norm": 2.830096960067749, "learning_rate": 7.29943891943082e-07, "loss": 0.1294, "step": 2194 }, { "epoch": 0.6, "grad_norm": 2.517449140548706, "learning_rate": 7.290919230177454e-07, "loss": 0.1124, "step": 2195 }, { "epoch": 0.6, "grad_norm": 2.952615261077881, "learning_rate": 7.282401662531205e-07, "loss": 0.1371, "step": 2196 }, { "epoch": 0.6, "grad_norm": 2.8877615928649902, "learning_rate": 7.273886223162586e-07, "loss": 0.1271, "step": 2197 }, { "epoch": 0.6, "grad_norm": 3.0385706424713135, "learning_rate": 7.265372918740425e-07, "loss": 0.1291, "step": 2198 }, { "epoch": 0.6, "grad_norm": 2.778867483139038, "learning_rate": 7.256861755931894e-07, "loss": 0.1178, "step": 2199 }, { "epoch": 0.6, "grad_norm": 2.944976568222046, "learning_rate": 7.24835274140247e-07, "loss": 0.1254, "step": 2200 }, { "epoch": 0.6, "grad_norm": 2.80108642578125, "learning_rate": 7.239845881815964e-07, "loss": 0.1279, "step": 2201 }, { "epoch": 0.6, "grad_norm": 2.6430721282958984, "learning_rate": 7.231341183834496e-07, "loss": 0.1106, "step": 2202 }, { "epoch": 0.6, "grad_norm": 2.6731457710266113, "learning_rate": 7.222838654118487e-07, "loss": 0.112, "step": 2203 }, { "epoch": 0.6, "grad_norm": 2.8311150074005127, "learning_rate": 7.214338299326666e-07, "loss": 0.1208, "step": 2204 }, { "epoch": 0.6, "grad_norm": 2.6590795516967773, "learning_rate": 7.20584012611605e-07, "loss": 0.123, "step": 2205 }, { "epoch": 0.6, "grad_norm": 2.7941184043884277, "learning_rate": 7.197344141141957e-07, "loss": 0.1184, "step": 2206 }, { "epoch": 0.6, "grad_norm": 2.796651601791382, "learning_rate": 7.188850351057992e-07, "loss": 0.1295, "step": 2207 }, { "epoch": 0.6, "grad_norm": 2.8743748664855957, "learning_rate": 7.180358762516033e-07, "loss": 0.136, "step": 2208 }, { "epoch": 0.6, "grad_norm": 3.0480072498321533, "learning_rate": 7.171869382166237e-07, "loss": 0.1353, "step": 2209 }, { "epoch": 0.6, "grad_norm": 2.6174540519714355, "learning_rate": 7.163382216657033e-07, "loss": 0.1106, "step": 2210 }, { "epoch": 0.6, "grad_norm": 2.704287528991699, "learning_rate": 7.154897272635116e-07, "loss": 0.1105, "step": 2211 }, { "epoch": 0.6, "grad_norm": 3.0001027584075928, "learning_rate": 7.146414556745444e-07, "loss": 0.1249, "step": 2212 }, { "epoch": 0.6, "grad_norm": 2.7849698066711426, "learning_rate": 7.137934075631218e-07, "loss": 0.1157, "step": 2213 }, { "epoch": 0.6, "grad_norm": 2.784977436065674, "learning_rate": 7.129455835933899e-07, "loss": 0.1213, "step": 2214 }, { "epoch": 0.61, "grad_norm": 2.60339617729187, "learning_rate": 7.1209798442932e-07, "loss": 0.1076, "step": 2215 }, { "epoch": 0.61, "grad_norm": 2.718926191329956, "learning_rate": 7.112506107347052e-07, "loss": 0.1218, "step": 2216 }, { "epoch": 0.61, "grad_norm": 3.181000232696533, "learning_rate": 7.104034631731642e-07, "loss": 0.133, "step": 2217 }, { "epoch": 0.61, "grad_norm": 2.939786911010742, "learning_rate": 7.095565424081369e-07, "loss": 0.114, "step": 2218 }, { "epoch": 0.61, "grad_norm": 3.1787872314453125, "learning_rate": 7.087098491028865e-07, "loss": 0.1272, "step": 2219 }, { "epoch": 0.61, "grad_norm": 2.8389174938201904, "learning_rate": 7.078633839204984e-07, "loss": 0.1197, "step": 2220 }, { "epoch": 0.61, "grad_norm": 3.22316837310791, "learning_rate": 7.070171475238785e-07, "loss": 0.1404, "step": 2221 }, { "epoch": 0.61, "grad_norm": 2.7424261569976807, "learning_rate": 7.061711405757537e-07, "loss": 0.1129, "step": 2222 }, { "epoch": 0.61, "grad_norm": 2.948007822036743, "learning_rate": 7.053253637386715e-07, "loss": 0.1078, "step": 2223 }, { "epoch": 0.61, "grad_norm": 3.12904953956604, "learning_rate": 7.04479817674999e-07, "loss": 0.1465, "step": 2224 }, { "epoch": 0.61, "grad_norm": 2.7254724502563477, "learning_rate": 7.03634503046923e-07, "loss": 0.1244, "step": 2225 }, { "epoch": 0.61, "grad_norm": 2.7801690101623535, "learning_rate": 7.027894205164484e-07, "loss": 0.1188, "step": 2226 }, { "epoch": 0.61, "grad_norm": 2.992872953414917, "learning_rate": 7.019445707453988e-07, "loss": 0.1373, "step": 2227 }, { "epoch": 0.61, "grad_norm": 2.7374370098114014, "learning_rate": 7.01099954395415e-07, "loss": 0.1178, "step": 2228 }, { "epoch": 0.61, "grad_norm": 2.887143611907959, "learning_rate": 7.002555721279553e-07, "loss": 0.1181, "step": 2229 }, { "epoch": 0.61, "grad_norm": 2.7964494228363037, "learning_rate": 6.994114246042955e-07, "loss": 0.1256, "step": 2230 }, { "epoch": 0.61, "grad_norm": 2.726381778717041, "learning_rate": 6.985675124855259e-07, "loss": 0.1188, "step": 2231 }, { "epoch": 0.61, "grad_norm": 2.7800586223602295, "learning_rate": 6.977238364325539e-07, "loss": 0.1193, "step": 2232 }, { "epoch": 0.61, "grad_norm": 2.6497507095336914, "learning_rate": 6.96880397106101e-07, "loss": 0.1149, "step": 2233 }, { "epoch": 0.61, "grad_norm": 2.7353129386901855, "learning_rate": 6.960371951667036e-07, "loss": 0.1292, "step": 2234 }, { "epoch": 0.61, "grad_norm": 2.649967670440674, "learning_rate": 6.951942312747134e-07, "loss": 0.1086, "step": 2235 }, { "epoch": 0.61, "grad_norm": 2.7503740787506104, "learning_rate": 6.943515060902935e-07, "loss": 0.1278, "step": 2236 }, { "epoch": 0.61, "grad_norm": 2.9312124252319336, "learning_rate": 6.93509020273422e-07, "loss": 0.1215, "step": 2237 }, { "epoch": 0.61, "grad_norm": 2.8223354816436768, "learning_rate": 6.926667744838881e-07, "loss": 0.127, "step": 2238 }, { "epoch": 0.61, "grad_norm": 2.670806646347046, "learning_rate": 6.918247693812936e-07, "loss": 0.1347, "step": 2239 }, { "epoch": 0.61, "grad_norm": 2.6457080841064453, "learning_rate": 6.909830056250526e-07, "loss": 0.1146, "step": 2240 }, { "epoch": 0.61, "grad_norm": 2.948946714401245, "learning_rate": 6.901414838743886e-07, "loss": 0.1344, "step": 2241 }, { "epoch": 0.61, "grad_norm": 2.909689426422119, "learning_rate": 6.893002047883372e-07, "loss": 0.1219, "step": 2242 }, { "epoch": 0.61, "grad_norm": 2.824042558670044, "learning_rate": 6.884591690257425e-07, "loss": 0.1231, "step": 2243 }, { "epoch": 0.61, "grad_norm": 2.743844985961914, "learning_rate": 6.876183772452587e-07, "loss": 0.1161, "step": 2244 }, { "epoch": 0.61, "grad_norm": 2.7554304599761963, "learning_rate": 6.867778301053495e-07, "loss": 0.1207, "step": 2245 }, { "epoch": 0.61, "grad_norm": 3.1746580600738525, "learning_rate": 6.85937528264286e-07, "loss": 0.1354, "step": 2246 }, { "epoch": 0.61, "grad_norm": 2.7574260234832764, "learning_rate": 6.850974723801479e-07, "loss": 0.1264, "step": 2247 }, { "epoch": 0.61, "grad_norm": 2.657491683959961, "learning_rate": 6.842576631108219e-07, "loss": 0.1035, "step": 2248 }, { "epoch": 0.61, "grad_norm": 2.733588218688965, "learning_rate": 6.834181011140014e-07, "loss": 0.1119, "step": 2249 }, { "epoch": 0.61, "grad_norm": 2.7875351905822754, "learning_rate": 6.825787870471872e-07, "loss": 0.1282, "step": 2250 }, { "epoch": 0.61, "grad_norm": 2.7033324241638184, "learning_rate": 6.817397215676845e-07, "loss": 0.1197, "step": 2251 }, { "epoch": 0.62, "grad_norm": 3.04990291595459, "learning_rate": 6.809009053326049e-07, "loss": 0.1303, "step": 2252 }, { "epoch": 0.62, "grad_norm": 2.886972188949585, "learning_rate": 6.800623389988641e-07, "loss": 0.1342, "step": 2253 }, { "epoch": 0.62, "grad_norm": 2.8929286003112793, "learning_rate": 6.792240232231821e-07, "loss": 0.1216, "step": 2254 }, { "epoch": 0.62, "grad_norm": 2.6549034118652344, "learning_rate": 6.783859586620839e-07, "loss": 0.1084, "step": 2255 }, { "epoch": 0.62, "grad_norm": 2.9380385875701904, "learning_rate": 6.775481459718959e-07, "loss": 0.1389, "step": 2256 }, { "epoch": 0.62, "grad_norm": 2.8192174434661865, "learning_rate": 6.767105858087489e-07, "loss": 0.1221, "step": 2257 }, { "epoch": 0.62, "grad_norm": 2.571758270263672, "learning_rate": 6.758732788285746e-07, "loss": 0.1193, "step": 2258 }, { "epoch": 0.62, "grad_norm": 2.8911731243133545, "learning_rate": 6.750362256871074e-07, "loss": 0.1301, "step": 2259 }, { "epoch": 0.62, "grad_norm": 2.6638855934143066, "learning_rate": 6.741994270398825e-07, "loss": 0.1195, "step": 2260 }, { "epoch": 0.62, "grad_norm": 2.737165927886963, "learning_rate": 6.733628835422358e-07, "loss": 0.1212, "step": 2261 }, { "epoch": 0.62, "grad_norm": 2.893892526626587, "learning_rate": 6.725265958493034e-07, "loss": 0.1329, "step": 2262 }, { "epoch": 0.62, "grad_norm": 2.9284043312072754, "learning_rate": 6.716905646160208e-07, "loss": 0.1224, "step": 2263 }, { "epoch": 0.62, "grad_norm": 2.7682137489318848, "learning_rate": 6.708547904971233e-07, "loss": 0.1164, "step": 2264 }, { "epoch": 0.62, "grad_norm": 2.6199045181274414, "learning_rate": 6.700192741471446e-07, "loss": 0.1106, "step": 2265 }, { "epoch": 0.62, "grad_norm": 2.7449543476104736, "learning_rate": 6.691840162204161e-07, "loss": 0.1181, "step": 2266 }, { "epoch": 0.62, "grad_norm": 3.066565990447998, "learning_rate": 6.683490173710673e-07, "loss": 0.129, "step": 2267 }, { "epoch": 0.62, "grad_norm": 2.739189624786377, "learning_rate": 6.675142782530241e-07, "loss": 0.1096, "step": 2268 }, { "epoch": 0.62, "grad_norm": 2.745358467102051, "learning_rate": 6.6667979952001e-07, "loss": 0.1259, "step": 2269 }, { "epoch": 0.62, "grad_norm": 2.9944846630096436, "learning_rate": 6.658455818255444e-07, "loss": 0.1265, "step": 2270 }, { "epoch": 0.62, "grad_norm": 2.9806787967681885, "learning_rate": 6.650116258229414e-07, "loss": 0.1276, "step": 2271 }, { "epoch": 0.62, "grad_norm": 2.8522584438323975, "learning_rate": 6.641779321653108e-07, "loss": 0.1233, "step": 2272 }, { "epoch": 0.62, "grad_norm": 2.688368558883667, "learning_rate": 6.633445015055574e-07, "loss": 0.1166, "step": 2273 }, { "epoch": 0.62, "grad_norm": 2.6915407180786133, "learning_rate": 6.625113344963787e-07, "loss": 0.1152, "step": 2274 }, { "epoch": 0.62, "grad_norm": 2.67826247215271, "learning_rate": 6.616784317902673e-07, "loss": 0.125, "step": 2275 }, { "epoch": 0.62, "grad_norm": 2.910579204559326, "learning_rate": 6.608457940395075e-07, "loss": 0.1248, "step": 2276 }, { "epoch": 0.62, "grad_norm": 2.5989644527435303, "learning_rate": 6.600134218961764e-07, "loss": 0.12, "step": 2277 }, { "epoch": 0.62, "grad_norm": 2.681771993637085, "learning_rate": 6.591813160121444e-07, "loss": 0.1236, "step": 2278 }, { "epoch": 0.62, "grad_norm": 2.7182021141052246, "learning_rate": 6.583494770390713e-07, "loss": 0.1242, "step": 2279 }, { "epoch": 0.62, "grad_norm": 2.9027819633483887, "learning_rate": 6.575179056284095e-07, "loss": 0.1234, "step": 2280 }, { "epoch": 0.62, "grad_norm": 2.901743173599243, "learning_rate": 6.566866024314007e-07, "loss": 0.1412, "step": 2281 }, { "epoch": 0.62, "grad_norm": 2.7416176795959473, "learning_rate": 6.558555680990771e-07, "loss": 0.1088, "step": 2282 }, { "epoch": 0.62, "grad_norm": 2.6983866691589355, "learning_rate": 6.550248032822612e-07, "loss": 0.1332, "step": 2283 }, { "epoch": 0.62, "grad_norm": 2.726213216781616, "learning_rate": 6.541943086315625e-07, "loss": 0.121, "step": 2284 }, { "epoch": 0.62, "grad_norm": 2.6889567375183105, "learning_rate": 6.533640847973808e-07, "loss": 0.1182, "step": 2285 }, { "epoch": 0.62, "grad_norm": 2.8857784271240234, "learning_rate": 6.525341324299023e-07, "loss": 0.118, "step": 2286 }, { "epoch": 0.62, "grad_norm": 2.8600974082946777, "learning_rate": 6.517044521791015e-07, "loss": 0.1364, "step": 2287 }, { "epoch": 0.63, "grad_norm": 3.007960557937622, "learning_rate": 6.5087504469474e-07, "loss": 0.1312, "step": 2288 }, { "epoch": 0.63, "grad_norm": 2.543405771255493, "learning_rate": 6.500459106263649e-07, "loss": 0.1056, "step": 2289 }, { "epoch": 0.63, "grad_norm": 2.672752857208252, "learning_rate": 6.492170506233099e-07, "loss": 0.1115, "step": 2290 }, { "epoch": 0.63, "grad_norm": 2.805952310562134, "learning_rate": 6.483884653346936e-07, "loss": 0.1235, "step": 2291 }, { "epoch": 0.63, "grad_norm": 2.4604685306549072, "learning_rate": 6.475601554094196e-07, "loss": 0.1159, "step": 2292 }, { "epoch": 0.63, "grad_norm": 2.747077226638794, "learning_rate": 6.467321214961765e-07, "loss": 0.1313, "step": 2293 }, { "epoch": 0.63, "grad_norm": 2.718703269958496, "learning_rate": 6.459043642434355e-07, "loss": 0.1281, "step": 2294 }, { "epoch": 0.63, "grad_norm": 2.6458544731140137, "learning_rate": 6.450768842994522e-07, "loss": 0.116, "step": 2295 }, { "epoch": 0.63, "grad_norm": 2.7539422512054443, "learning_rate": 6.442496823122643e-07, "loss": 0.1172, "step": 2296 }, { "epoch": 0.63, "grad_norm": 3.0029823780059814, "learning_rate": 6.434227589296921e-07, "loss": 0.1219, "step": 2297 }, { "epoch": 0.63, "grad_norm": 2.757765054702759, "learning_rate": 6.425961147993384e-07, "loss": 0.1249, "step": 2298 }, { "epoch": 0.63, "grad_norm": 2.759751319885254, "learning_rate": 6.417697505685859e-07, "loss": 0.1235, "step": 2299 }, { "epoch": 0.63, "grad_norm": 2.6895663738250732, "learning_rate": 6.409436668845996e-07, "loss": 0.1117, "step": 2300 }, { "epoch": 0.63, "grad_norm": 2.910151243209839, "learning_rate": 6.401178643943233e-07, "loss": 0.131, "step": 2301 }, { "epoch": 0.63, "grad_norm": 2.927711248397827, "learning_rate": 6.392923437444815e-07, "loss": 0.117, "step": 2302 }, { "epoch": 0.63, "grad_norm": 2.8698267936706543, "learning_rate": 6.384671055815782e-07, "loss": 0.1251, "step": 2303 }, { "epoch": 0.63, "grad_norm": 3.05301833152771, "learning_rate": 6.376421505518954e-07, "loss": 0.1375, "step": 2304 }, { "epoch": 0.63, "grad_norm": 2.545640468597412, "learning_rate": 6.368174793014943e-07, "loss": 0.1141, "step": 2305 }, { "epoch": 0.63, "grad_norm": 2.779310703277588, "learning_rate": 6.359930924762122e-07, "loss": 0.1162, "step": 2306 }, { "epoch": 0.63, "grad_norm": 2.912362575531006, "learning_rate": 6.351689907216657e-07, "loss": 0.1192, "step": 2307 }, { "epoch": 0.63, "grad_norm": 2.7148022651672363, "learning_rate": 6.343451746832471e-07, "loss": 0.1133, "step": 2308 }, { "epoch": 0.63, "grad_norm": 2.3892126083374023, "learning_rate": 6.335216450061247e-07, "loss": 0.112, "step": 2309 }, { "epoch": 0.63, "grad_norm": 2.9409687519073486, "learning_rate": 6.326984023352434e-07, "loss": 0.1256, "step": 2310 }, { "epoch": 0.63, "grad_norm": 2.7799158096313477, "learning_rate": 6.31875447315322e-07, "loss": 0.1267, "step": 2311 }, { "epoch": 0.63, "grad_norm": 2.873560905456543, "learning_rate": 6.310527805908556e-07, "loss": 0.1264, "step": 2312 }, { "epoch": 0.63, "grad_norm": 2.6879260540008545, "learning_rate": 6.302304028061125e-07, "loss": 0.1344, "step": 2313 }, { "epoch": 0.63, "grad_norm": 2.8451976776123047, "learning_rate": 6.29408314605135e-07, "loss": 0.1267, "step": 2314 }, { "epoch": 0.63, "grad_norm": 2.7981550693511963, "learning_rate": 6.285865166317386e-07, "loss": 0.1287, "step": 2315 }, { "epoch": 0.63, "grad_norm": 2.8838305473327637, "learning_rate": 6.277650095295112e-07, "loss": 0.1207, "step": 2316 }, { "epoch": 0.63, "grad_norm": 2.844289779663086, "learning_rate": 6.269437939418136e-07, "loss": 0.1218, "step": 2317 }, { "epoch": 0.63, "grad_norm": 2.7332122325897217, "learning_rate": 6.26122870511778e-07, "loss": 0.1265, "step": 2318 }, { "epoch": 0.63, "grad_norm": 2.9675590991973877, "learning_rate": 6.253022398823075e-07, "loss": 0.1281, "step": 2319 }, { "epoch": 0.63, "grad_norm": 2.5885355472564697, "learning_rate": 6.244819026960761e-07, "loss": 0.1178, "step": 2320 }, { "epoch": 0.63, "grad_norm": 2.498624086380005, "learning_rate": 6.236618595955277e-07, "loss": 0.1143, "step": 2321 }, { "epoch": 0.63, "grad_norm": 2.7661285400390625, "learning_rate": 6.228421112228767e-07, "loss": 0.1229, "step": 2322 }, { "epoch": 0.63, "grad_norm": 2.705780029296875, "learning_rate": 6.220226582201061e-07, "loss": 0.1307, "step": 2323 }, { "epoch": 0.63, "grad_norm": 2.7333617210388184, "learning_rate": 6.212035012289674e-07, "loss": 0.1181, "step": 2324 }, { "epoch": 0.64, "grad_norm": 2.924672842025757, "learning_rate": 6.203846408909808e-07, "loss": 0.1299, "step": 2325 }, { "epoch": 0.64, "grad_norm": 2.5709447860717773, "learning_rate": 6.195660778474334e-07, "loss": 0.1165, "step": 2326 }, { "epoch": 0.64, "grad_norm": 2.628356695175171, "learning_rate": 6.187478127393806e-07, "loss": 0.1154, "step": 2327 }, { "epoch": 0.64, "grad_norm": 2.796377420425415, "learning_rate": 6.179298462076437e-07, "loss": 0.1121, "step": 2328 }, { "epoch": 0.64, "grad_norm": 2.963430166244507, "learning_rate": 6.1711217889281e-07, "loss": 0.1289, "step": 2329 }, { "epoch": 0.64, "grad_norm": 2.8228838443756104, "learning_rate": 6.162948114352328e-07, "loss": 0.1259, "step": 2330 }, { "epoch": 0.64, "grad_norm": 2.864867687225342, "learning_rate": 6.154777444750312e-07, "loss": 0.13, "step": 2331 }, { "epoch": 0.64, "grad_norm": 2.9602138996124268, "learning_rate": 6.146609786520877e-07, "loss": 0.1195, "step": 2332 }, { "epoch": 0.64, "grad_norm": 2.576875925064087, "learning_rate": 6.1384451460605e-07, "loss": 0.1136, "step": 2333 }, { "epoch": 0.64, "grad_norm": 2.717334032058716, "learning_rate": 6.130283529763286e-07, "loss": 0.1239, "step": 2334 }, { "epoch": 0.64, "grad_norm": 2.629843235015869, "learning_rate": 6.122124944020977e-07, "loss": 0.1163, "step": 2335 }, { "epoch": 0.64, "grad_norm": 2.5194149017333984, "learning_rate": 6.113969395222948e-07, "loss": 0.1007, "step": 2336 }, { "epoch": 0.64, "grad_norm": 2.5218937397003174, "learning_rate": 6.105816889756179e-07, "loss": 0.1052, "step": 2337 }, { "epoch": 0.64, "grad_norm": 2.9415934085845947, "learning_rate": 6.097667434005285e-07, "loss": 0.1188, "step": 2338 }, { "epoch": 0.64, "grad_norm": 2.800436019897461, "learning_rate": 6.089521034352474e-07, "loss": 0.1134, "step": 2339 }, { "epoch": 0.64, "grad_norm": 3.1739115715026855, "learning_rate": 6.081377697177576e-07, "loss": 0.1232, "step": 2340 }, { "epoch": 0.64, "grad_norm": 2.7899343967437744, "learning_rate": 6.073237428858019e-07, "loss": 0.1195, "step": 2341 }, { "epoch": 0.64, "grad_norm": 2.8649964332580566, "learning_rate": 6.06510023576882e-07, "loss": 0.1286, "step": 2342 }, { "epoch": 0.64, "grad_norm": 2.611206531524658, "learning_rate": 6.0569661242826e-07, "loss": 0.1059, "step": 2343 }, { "epoch": 0.64, "grad_norm": 2.775590658187866, "learning_rate": 6.048835100769555e-07, "loss": 0.1102, "step": 2344 }, { "epoch": 0.64, "grad_norm": 2.5424020290374756, "learning_rate": 6.040707171597465e-07, "loss": 0.1076, "step": 2345 }, { "epoch": 0.64, "grad_norm": 2.8652491569519043, "learning_rate": 6.032582343131698e-07, "loss": 0.1248, "step": 2346 }, { "epoch": 0.64, "grad_norm": 2.5659899711608887, "learning_rate": 6.024460621735179e-07, "loss": 0.1187, "step": 2347 }, { "epoch": 0.64, "grad_norm": 2.6558399200439453, "learning_rate": 6.016342013768407e-07, "loss": 0.1201, "step": 2348 }, { "epoch": 0.64, "grad_norm": 2.8213798999786377, "learning_rate": 6.00822652558944e-07, "loss": 0.1217, "step": 2349 }, { "epoch": 0.64, "grad_norm": 3.105112075805664, "learning_rate": 6.000114163553893e-07, "loss": 0.1288, "step": 2350 }, { "epoch": 0.64, "grad_norm": 2.8976495265960693, "learning_rate": 5.99200493401494e-07, "loss": 0.1345, "step": 2351 }, { "epoch": 0.64, "grad_norm": 3.064840316772461, "learning_rate": 5.983898843323291e-07, "loss": 0.1432, "step": 2352 }, { "epoch": 0.64, "grad_norm": 2.7999205589294434, "learning_rate": 5.975795897827205e-07, "loss": 0.1311, "step": 2353 }, { "epoch": 0.64, "grad_norm": 2.657651424407959, "learning_rate": 5.967696103872471e-07, "loss": 0.1074, "step": 2354 }, { "epoch": 0.64, "grad_norm": 3.456746816635132, "learning_rate": 5.959599467802417e-07, "loss": 0.131, "step": 2355 }, { "epoch": 0.64, "grad_norm": 2.8039300441741943, "learning_rate": 5.951505995957899e-07, "loss": 0.1156, "step": 2356 }, { "epoch": 0.64, "grad_norm": 2.621748447418213, "learning_rate": 5.943415694677285e-07, "loss": 0.1154, "step": 2357 }, { "epoch": 0.64, "grad_norm": 3.092664957046509, "learning_rate": 5.935328570296472e-07, "loss": 0.1358, "step": 2358 }, { "epoch": 0.64, "grad_norm": 2.7029240131378174, "learning_rate": 5.927244629148854e-07, "loss": 0.1096, "step": 2359 }, { "epoch": 0.64, "grad_norm": 2.715873956680298, "learning_rate": 5.919163877565349e-07, "loss": 0.115, "step": 2360 }, { "epoch": 0.64, "grad_norm": 2.766658306121826, "learning_rate": 5.911086321874371e-07, "loss": 0.1164, "step": 2361 }, { "epoch": 0.65, "grad_norm": 2.7069411277770996, "learning_rate": 5.903011968401823e-07, "loss": 0.1233, "step": 2362 }, { "epoch": 0.65, "grad_norm": 2.9014713764190674, "learning_rate": 5.894940823471112e-07, "loss": 0.1331, "step": 2363 }, { "epoch": 0.65, "grad_norm": 2.8393664360046387, "learning_rate": 5.886872893403118e-07, "loss": 0.115, "step": 2364 }, { "epoch": 0.65, "grad_norm": 2.6533043384552, "learning_rate": 5.878808184516224e-07, "loss": 0.113, "step": 2365 }, { "epoch": 0.65, "grad_norm": 2.948765993118286, "learning_rate": 5.870746703126272e-07, "loss": 0.1353, "step": 2366 }, { "epoch": 0.65, "grad_norm": 3.019033670425415, "learning_rate": 5.862688455546585e-07, "loss": 0.1352, "step": 2367 }, { "epoch": 0.65, "grad_norm": 2.90289044380188, "learning_rate": 5.854633448087951e-07, "loss": 0.1364, "step": 2368 }, { "epoch": 0.65, "grad_norm": 2.740635871887207, "learning_rate": 5.846581687058616e-07, "loss": 0.1271, "step": 2369 }, { "epoch": 0.65, "grad_norm": 2.637136459350586, "learning_rate": 5.838533178764294e-07, "loss": 0.1151, "step": 2370 }, { "epoch": 0.65, "grad_norm": 2.9797353744506836, "learning_rate": 5.830487929508147e-07, "loss": 0.1489, "step": 2371 }, { "epoch": 0.65, "grad_norm": 2.595749855041504, "learning_rate": 5.82244594559078e-07, "loss": 0.1263, "step": 2372 }, { "epoch": 0.65, "grad_norm": 2.810940742492676, "learning_rate": 5.814407233310248e-07, "loss": 0.1234, "step": 2373 }, { "epoch": 0.65, "grad_norm": 2.7239415645599365, "learning_rate": 5.806371798962039e-07, "loss": 0.1184, "step": 2374 }, { "epoch": 0.65, "grad_norm": 2.6839990615844727, "learning_rate": 5.798339648839073e-07, "loss": 0.1225, "step": 2375 }, { "epoch": 0.65, "grad_norm": 2.758370876312256, "learning_rate": 5.790310789231703e-07, "loss": 0.1281, "step": 2376 }, { "epoch": 0.65, "grad_norm": 2.836064577102661, "learning_rate": 5.782285226427699e-07, "loss": 0.1255, "step": 2377 }, { "epoch": 0.65, "grad_norm": 2.8172318935394287, "learning_rate": 5.774262966712258e-07, "loss": 0.1118, "step": 2378 }, { "epoch": 0.65, "grad_norm": 2.9178755283355713, "learning_rate": 5.766244016367981e-07, "loss": 0.1438, "step": 2379 }, { "epoch": 0.65, "grad_norm": 2.657327890396118, "learning_rate": 5.758228381674878e-07, "loss": 0.1161, "step": 2380 }, { "epoch": 0.65, "grad_norm": 2.856813669204712, "learning_rate": 5.750216068910374e-07, "loss": 0.143, "step": 2381 }, { "epoch": 0.65, "grad_norm": 2.58343243598938, "learning_rate": 5.742207084349273e-07, "loss": 0.1159, "step": 2382 }, { "epoch": 0.65, "grad_norm": 2.7350480556488037, "learning_rate": 5.734201434263792e-07, "loss": 0.1337, "step": 2383 }, { "epoch": 0.65, "grad_norm": 2.882652997970581, "learning_rate": 5.726199124923526e-07, "loss": 0.1339, "step": 2384 }, { "epoch": 0.65, "grad_norm": 2.671844482421875, "learning_rate": 5.718200162595448e-07, "loss": 0.1202, "step": 2385 }, { "epoch": 0.65, "grad_norm": 2.901212215423584, "learning_rate": 5.710204553543927e-07, "loss": 0.1299, "step": 2386 }, { "epoch": 0.65, "grad_norm": 2.746882200241089, "learning_rate": 5.702212304030689e-07, "loss": 0.1198, "step": 2387 }, { "epoch": 0.65, "grad_norm": 2.987044095993042, "learning_rate": 5.694223420314845e-07, "loss": 0.1174, "step": 2388 }, { "epoch": 0.65, "grad_norm": 2.5937628746032715, "learning_rate": 5.686237908652854e-07, "loss": 0.1078, "step": 2389 }, { "epoch": 0.65, "grad_norm": 2.7164485454559326, "learning_rate": 5.678255775298542e-07, "loss": 0.1222, "step": 2390 }, { "epoch": 0.65, "grad_norm": 2.930262804031372, "learning_rate": 5.670277026503092e-07, "loss": 0.1263, "step": 2391 }, { "epoch": 0.65, "grad_norm": 3.0716938972473145, "learning_rate": 5.662301668515029e-07, "loss": 0.1377, "step": 2392 }, { "epoch": 0.65, "grad_norm": 2.950962543487549, "learning_rate": 5.654329707580232e-07, "loss": 0.1325, "step": 2393 }, { "epoch": 0.65, "grad_norm": 2.8141679763793945, "learning_rate": 5.646361149941911e-07, "loss": 0.1124, "step": 2394 }, { "epoch": 0.65, "grad_norm": 2.8364176750183105, "learning_rate": 5.638396001840612e-07, "loss": 0.1165, "step": 2395 }, { "epoch": 0.65, "grad_norm": 2.9714386463165283, "learning_rate": 5.630434269514218e-07, "loss": 0.1164, "step": 2396 }, { "epoch": 0.65, "grad_norm": 2.8221168518066406, "learning_rate": 5.622475959197925e-07, "loss": 0.1156, "step": 2397 }, { "epoch": 0.66, "grad_norm": 2.6841065883636475, "learning_rate": 5.614521077124266e-07, "loss": 0.1061, "step": 2398 }, { "epoch": 0.66, "grad_norm": 2.5104100704193115, "learning_rate": 5.606569629523072e-07, "loss": 0.106, "step": 2399 }, { "epoch": 0.66, "grad_norm": 3.1628897190093994, "learning_rate": 5.598621622621489e-07, "loss": 0.1297, "step": 2400 }, { "epoch": 0.66, "grad_norm": 3.006103992462158, "learning_rate": 5.590677062643976e-07, "loss": 0.1284, "step": 2401 }, { "epoch": 0.66, "grad_norm": 2.9417686462402344, "learning_rate": 5.582735955812283e-07, "loss": 0.1252, "step": 2402 }, { "epoch": 0.66, "grad_norm": 2.9639692306518555, "learning_rate": 5.574798308345468e-07, "loss": 0.1342, "step": 2403 }, { "epoch": 0.66, "grad_norm": 2.877833366394043, "learning_rate": 5.566864126459863e-07, "loss": 0.1246, "step": 2404 }, { "epoch": 0.66, "grad_norm": 2.974093198776245, "learning_rate": 5.558933416369097e-07, "loss": 0.1227, "step": 2405 }, { "epoch": 0.66, "grad_norm": 2.7533552646636963, "learning_rate": 5.551006184284082e-07, "loss": 0.125, "step": 2406 }, { "epoch": 0.66, "grad_norm": 2.908942461013794, "learning_rate": 5.543082436412994e-07, "loss": 0.1246, "step": 2407 }, { "epoch": 0.66, "grad_norm": 2.829738140106201, "learning_rate": 5.535162178961299e-07, "loss": 0.1216, "step": 2408 }, { "epoch": 0.66, "grad_norm": 2.8664655685424805, "learning_rate": 5.527245418131713e-07, "loss": 0.1132, "step": 2409 }, { "epoch": 0.66, "grad_norm": 2.7472426891326904, "learning_rate": 5.519332160124215e-07, "loss": 0.1195, "step": 2410 }, { "epoch": 0.66, "grad_norm": 2.919637441635132, "learning_rate": 5.511422411136056e-07, "loss": 0.126, "step": 2411 }, { "epoch": 0.66, "grad_norm": 2.673835039138794, "learning_rate": 5.503516177361717e-07, "loss": 0.1224, "step": 2412 }, { "epoch": 0.66, "grad_norm": 2.6640067100524902, "learning_rate": 5.495613464992943e-07, "loss": 0.1164, "step": 2413 }, { "epoch": 0.66, "grad_norm": 2.688185214996338, "learning_rate": 5.487714280218722e-07, "loss": 0.1049, "step": 2414 }, { "epoch": 0.66, "grad_norm": 2.598675012588501, "learning_rate": 5.479818629225259e-07, "loss": 0.106, "step": 2415 }, { "epoch": 0.66, "grad_norm": 2.9329745769500732, "learning_rate": 5.471926518196017e-07, "loss": 0.1236, "step": 2416 }, { "epoch": 0.66, "grad_norm": 3.0776431560516357, "learning_rate": 5.464037953311667e-07, "loss": 0.1253, "step": 2417 }, { "epoch": 0.66, "grad_norm": 2.7781357765197754, "learning_rate": 5.456152940750113e-07, "loss": 0.1181, "step": 2418 }, { "epoch": 0.66, "grad_norm": 2.645616054534912, "learning_rate": 5.448271486686486e-07, "loss": 0.1118, "step": 2419 }, { "epoch": 0.66, "grad_norm": 2.6715989112854004, "learning_rate": 5.440393597293102e-07, "loss": 0.1135, "step": 2420 }, { "epoch": 0.66, "grad_norm": 2.7128186225891113, "learning_rate": 5.432519278739514e-07, "loss": 0.1079, "step": 2421 }, { "epoch": 0.66, "grad_norm": 2.6940815448760986, "learning_rate": 5.42464853719246e-07, "loss": 0.1138, "step": 2422 }, { "epoch": 0.66, "grad_norm": 2.902479410171509, "learning_rate": 5.416781378815885e-07, "loss": 0.1217, "step": 2423 }, { "epoch": 0.66, "grad_norm": 2.772676944732666, "learning_rate": 5.408917809770938e-07, "loss": 0.118, "step": 2424 }, { "epoch": 0.66, "grad_norm": 2.912195920944214, "learning_rate": 5.401057836215927e-07, "loss": 0.1201, "step": 2425 }, { "epoch": 0.66, "grad_norm": 2.821068286895752, "learning_rate": 5.393201464306378e-07, "loss": 0.1235, "step": 2426 }, { "epoch": 0.66, "grad_norm": 2.5337672233581543, "learning_rate": 5.38534870019497e-07, "loss": 0.1128, "step": 2427 }, { "epoch": 0.66, "grad_norm": 2.879474639892578, "learning_rate": 5.377499550031572e-07, "loss": 0.1218, "step": 2428 }, { "epoch": 0.66, "grad_norm": 2.6883537769317627, "learning_rate": 5.369654019963228e-07, "loss": 0.109, "step": 2429 }, { "epoch": 0.66, "grad_norm": 2.7536401748657227, "learning_rate": 5.361812116134121e-07, "loss": 0.1268, "step": 2430 }, { "epoch": 0.66, "grad_norm": 2.6968419551849365, "learning_rate": 5.35397384468562e-07, "loss": 0.1168, "step": 2431 }, { "epoch": 0.66, "grad_norm": 2.8103954792022705, "learning_rate": 5.346139211756236e-07, "loss": 0.111, "step": 2432 }, { "epoch": 0.66, "grad_norm": 2.854973793029785, "learning_rate": 5.338308223481637e-07, "loss": 0.1319, "step": 2433 }, { "epoch": 0.66, "grad_norm": 2.7689507007598877, "learning_rate": 5.330480885994639e-07, "loss": 0.1263, "step": 2434 }, { "epoch": 0.67, "grad_norm": 2.8621950149536133, "learning_rate": 5.322657205425183e-07, "loss": 0.1284, "step": 2435 }, { "epoch": 0.67, "grad_norm": 2.8837008476257324, "learning_rate": 5.314837187900366e-07, "loss": 0.1369, "step": 2436 }, { "epoch": 0.67, "grad_norm": 2.7992589473724365, "learning_rate": 5.307020839544398e-07, "loss": 0.1098, "step": 2437 }, { "epoch": 0.67, "grad_norm": 2.9363605976104736, "learning_rate": 5.299208166478632e-07, "loss": 0.1278, "step": 2438 }, { "epoch": 0.67, "grad_norm": 2.9051101207733154, "learning_rate": 5.291399174821538e-07, "loss": 0.1304, "step": 2439 }, { "epoch": 0.67, "grad_norm": 2.629927635192871, "learning_rate": 5.283593870688697e-07, "loss": 0.1085, "step": 2440 }, { "epoch": 0.67, "grad_norm": 2.664454936981201, "learning_rate": 5.275792260192804e-07, "loss": 0.1234, "step": 2441 }, { "epoch": 0.67, "grad_norm": 3.012160301208496, "learning_rate": 5.267994349443661e-07, "loss": 0.1287, "step": 2442 }, { "epoch": 0.67, "grad_norm": 3.0345184803009033, "learning_rate": 5.260200144548177e-07, "loss": 0.1313, "step": 2443 }, { "epoch": 0.67, "grad_norm": 3.057971715927124, "learning_rate": 5.252409651610363e-07, "loss": 0.1307, "step": 2444 }, { "epoch": 0.67, "grad_norm": 2.6942265033721924, "learning_rate": 5.244622876731308e-07, "loss": 0.1145, "step": 2445 }, { "epoch": 0.67, "grad_norm": 2.55898380279541, "learning_rate": 5.236839826009201e-07, "loss": 0.1121, "step": 2446 }, { "epoch": 0.67, "grad_norm": 3.0462722778320312, "learning_rate": 5.229060505539307e-07, "loss": 0.1275, "step": 2447 }, { "epoch": 0.67, "grad_norm": 2.7524399757385254, "learning_rate": 5.221284921413973e-07, "loss": 0.1218, "step": 2448 }, { "epoch": 0.67, "grad_norm": 2.9039595127105713, "learning_rate": 5.21351307972263e-07, "loss": 0.1348, "step": 2449 }, { "epoch": 0.67, "grad_norm": 2.817802906036377, "learning_rate": 5.205744986551762e-07, "loss": 0.1172, "step": 2450 }, { "epoch": 0.67, "grad_norm": 2.7943778038024902, "learning_rate": 5.197980647984921e-07, "loss": 0.1326, "step": 2451 }, { "epoch": 0.67, "grad_norm": 2.7645349502563477, "learning_rate": 5.190220070102727e-07, "loss": 0.1091, "step": 2452 }, { "epoch": 0.67, "grad_norm": 2.627145290374756, "learning_rate": 5.182463258982846e-07, "loss": 0.119, "step": 2453 }, { "epoch": 0.67, "grad_norm": 2.776512384414673, "learning_rate": 5.1747102207e-07, "loss": 0.1259, "step": 2454 }, { "epoch": 0.67, "grad_norm": 2.8029136657714844, "learning_rate": 5.166960961325955e-07, "loss": 0.116, "step": 2455 }, { "epoch": 0.67, "grad_norm": 3.0958304405212402, "learning_rate": 5.159215486929509e-07, "loss": 0.1193, "step": 2456 }, { "epoch": 0.67, "grad_norm": 3.364405870437622, "learning_rate": 5.151473803576512e-07, "loss": 0.1107, "step": 2457 }, { "epoch": 0.67, "grad_norm": 3.164470672607422, "learning_rate": 5.143735917329827e-07, "loss": 0.1337, "step": 2458 }, { "epoch": 0.67, "grad_norm": 2.607489824295044, "learning_rate": 5.136001834249364e-07, "loss": 0.1072, "step": 2459 }, { "epoch": 0.67, "grad_norm": 2.848759889602661, "learning_rate": 5.128271560392037e-07, "loss": 0.12, "step": 2460 }, { "epoch": 0.67, "grad_norm": 2.950552225112915, "learning_rate": 5.120545101811777e-07, "loss": 0.1168, "step": 2461 }, { "epoch": 0.67, "grad_norm": 2.749840021133423, "learning_rate": 5.112822464559544e-07, "loss": 0.1175, "step": 2462 }, { "epoch": 0.67, "grad_norm": 2.6408185958862305, "learning_rate": 5.105103654683285e-07, "loss": 0.1178, "step": 2463 }, { "epoch": 0.67, "grad_norm": 2.799758195877075, "learning_rate": 5.097388678227967e-07, "loss": 0.1226, "step": 2464 }, { "epoch": 0.67, "grad_norm": 2.613769769668579, "learning_rate": 5.089677541235543e-07, "loss": 0.1129, "step": 2465 }, { "epoch": 0.67, "grad_norm": 2.95413875579834, "learning_rate": 5.081970249744959e-07, "loss": 0.129, "step": 2466 }, { "epoch": 0.67, "grad_norm": 2.8071627616882324, "learning_rate": 5.07426680979216e-07, "loss": 0.1158, "step": 2467 }, { "epoch": 0.67, "grad_norm": 2.932875871658325, "learning_rate": 5.066567227410063e-07, "loss": 0.1216, "step": 2468 }, { "epoch": 0.67, "grad_norm": 3.034184217453003, "learning_rate": 5.058871508628575e-07, "loss": 0.1504, "step": 2469 }, { "epoch": 0.67, "grad_norm": 2.742445945739746, "learning_rate": 5.051179659474567e-07, "loss": 0.1194, "step": 2470 }, { "epoch": 0.67, "grad_norm": 2.574291706085205, "learning_rate": 5.043491685971879e-07, "loss": 0.103, "step": 2471 }, { "epoch": 0.68, "grad_norm": 2.6973471641540527, "learning_rate": 5.035807594141332e-07, "loss": 0.1068, "step": 2472 }, { "epoch": 0.68, "grad_norm": 2.9927546977996826, "learning_rate": 5.028127390000683e-07, "loss": 0.1222, "step": 2473 }, { "epoch": 0.68, "grad_norm": 2.7439558506011963, "learning_rate": 5.020451079564669e-07, "loss": 0.1153, "step": 2474 }, { "epoch": 0.68, "grad_norm": 2.746530532836914, "learning_rate": 5.012778668844959e-07, "loss": 0.1248, "step": 2475 }, { "epoch": 0.68, "grad_norm": 2.7004928588867188, "learning_rate": 5.005110163850173e-07, "loss": 0.1228, "step": 2476 }, { "epoch": 0.68, "grad_norm": 2.6957995891571045, "learning_rate": 4.997445570585878e-07, "loss": 0.1207, "step": 2477 }, { "epoch": 0.68, "grad_norm": 2.921882390975952, "learning_rate": 4.98978489505457e-07, "loss": 0.1311, "step": 2478 }, { "epoch": 0.68, "grad_norm": 2.918653964996338, "learning_rate": 4.982128143255684e-07, "loss": 0.1262, "step": 2479 }, { "epoch": 0.68, "grad_norm": 2.9749364852905273, "learning_rate": 4.974475321185578e-07, "loss": 0.1117, "step": 2480 }, { "epoch": 0.68, "grad_norm": 2.6308913230895996, "learning_rate": 4.966826434837527e-07, "loss": 0.118, "step": 2481 }, { "epoch": 0.68, "grad_norm": 2.6968283653259277, "learning_rate": 4.959181490201736e-07, "loss": 0.1064, "step": 2482 }, { "epoch": 0.68, "grad_norm": 3.173630952835083, "learning_rate": 4.951540493265313e-07, "loss": 0.139, "step": 2483 }, { "epoch": 0.68, "grad_norm": 2.7071099281311035, "learning_rate": 4.943903450012281e-07, "loss": 0.1234, "step": 2484 }, { "epoch": 0.68, "grad_norm": 2.7777867317199707, "learning_rate": 4.936270366423563e-07, "loss": 0.1213, "step": 2485 }, { "epoch": 0.68, "grad_norm": 2.982285261154175, "learning_rate": 4.928641248476977e-07, "loss": 0.1232, "step": 2486 }, { "epoch": 0.68, "grad_norm": 2.5845227241516113, "learning_rate": 4.921016102147247e-07, "loss": 0.106, "step": 2487 }, { "epoch": 0.68, "grad_norm": 2.687197208404541, "learning_rate": 4.913394933405974e-07, "loss": 0.1112, "step": 2488 }, { "epoch": 0.68, "grad_norm": 2.9104907512664795, "learning_rate": 4.905777748221656e-07, "loss": 0.1381, "step": 2489 }, { "epoch": 0.68, "grad_norm": 2.659237861633301, "learning_rate": 4.89816455255966e-07, "loss": 0.1051, "step": 2490 }, { "epoch": 0.68, "grad_norm": 2.7831077575683594, "learning_rate": 4.89055535238223e-07, "loss": 0.1163, "step": 2491 }, { "epoch": 0.68, "grad_norm": 2.8132784366607666, "learning_rate": 4.882950153648492e-07, "loss": 0.1323, "step": 2492 }, { "epoch": 0.68, "grad_norm": 2.8530664443969727, "learning_rate": 4.875348962314426e-07, "loss": 0.122, "step": 2493 }, { "epoch": 0.68, "grad_norm": 2.664740800857544, "learning_rate": 4.867751784332884e-07, "loss": 0.1148, "step": 2494 }, { "epoch": 0.68, "grad_norm": 2.7489805221557617, "learning_rate": 4.860158625653564e-07, "loss": 0.1177, "step": 2495 }, { "epoch": 0.68, "grad_norm": 2.747615098953247, "learning_rate": 4.852569492223021e-07, "loss": 0.1161, "step": 2496 }, { "epoch": 0.68, "grad_norm": 2.894929885864258, "learning_rate": 4.844984389984663e-07, "loss": 0.1238, "step": 2497 }, { "epoch": 0.68, "grad_norm": 3.0197770595550537, "learning_rate": 4.83740332487873e-07, "loss": 0.1434, "step": 2498 }, { "epoch": 0.68, "grad_norm": 2.6346349716186523, "learning_rate": 4.829826302842314e-07, "loss": 0.1021, "step": 2499 }, { "epoch": 0.68, "grad_norm": 2.76969313621521, "learning_rate": 4.82225332980933e-07, "loss": 0.1169, "step": 2500 }, { "epoch": 0.68, "grad_norm": 2.4715230464935303, "learning_rate": 4.81468441171052e-07, "loss": 0.1039, "step": 2501 }, { "epoch": 0.68, "grad_norm": 2.683863878250122, "learning_rate": 4.807119554473465e-07, "loss": 0.1121, "step": 2502 }, { "epoch": 0.68, "grad_norm": 3.25837779045105, "learning_rate": 4.799558764022549e-07, "loss": 0.1323, "step": 2503 }, { "epoch": 0.68, "grad_norm": 3.516805648803711, "learning_rate": 4.792002046278984e-07, "loss": 0.1342, "step": 2504 }, { "epoch": 0.68, "grad_norm": 2.6152870655059814, "learning_rate": 4.784449407160786e-07, "loss": 0.1062, "step": 2505 }, { "epoch": 0.68, "grad_norm": 2.5211596488952637, "learning_rate": 4.776900852582771e-07, "loss": 0.1045, "step": 2506 }, { "epoch": 0.68, "grad_norm": 2.9381637573242188, "learning_rate": 4.769356388456573e-07, "loss": 0.1261, "step": 2507 }, { "epoch": 0.69, "grad_norm": 2.643887519836426, "learning_rate": 4.7618160206906056e-07, "loss": 0.1156, "step": 2508 }, { "epoch": 0.69, "grad_norm": 2.8069515228271484, "learning_rate": 4.7542797551900824e-07, "loss": 0.1125, "step": 2509 }, { "epoch": 0.69, "grad_norm": 2.7435302734375, "learning_rate": 4.7467475978570136e-07, "loss": 0.1125, "step": 2510 }, { "epoch": 0.69, "grad_norm": 2.5782227516174316, "learning_rate": 4.7392195545901657e-07, "loss": 0.1056, "step": 2511 }, { "epoch": 0.69, "grad_norm": 2.9041056632995605, "learning_rate": 4.731695631285111e-07, "loss": 0.1408, "step": 2512 }, { "epoch": 0.69, "grad_norm": 2.7634119987487793, "learning_rate": 4.7241758338341763e-07, "loss": 0.1168, "step": 2513 }, { "epoch": 0.69, "grad_norm": 2.9808712005615234, "learning_rate": 4.7166601681264673e-07, "loss": 0.1344, "step": 2514 }, { "epoch": 0.69, "grad_norm": 3.2197020053863525, "learning_rate": 4.70914864004786e-07, "loss": 0.127, "step": 2515 }, { "epoch": 0.69, "grad_norm": 2.745090961456299, "learning_rate": 4.701641255480965e-07, "loss": 0.1106, "step": 2516 }, { "epoch": 0.69, "grad_norm": 2.7435736656188965, "learning_rate": 4.6941380203051774e-07, "loss": 0.1186, "step": 2517 }, { "epoch": 0.69, "grad_norm": 2.9876623153686523, "learning_rate": 4.68663894039662e-07, "loss": 0.129, "step": 2518 }, { "epoch": 0.69, "grad_norm": 2.932159423828125, "learning_rate": 4.679144021628176e-07, "loss": 0.1198, "step": 2519 }, { "epoch": 0.69, "grad_norm": 2.509103298187256, "learning_rate": 4.6716532698694734e-07, "loss": 0.1117, "step": 2520 }, { "epoch": 0.69, "grad_norm": 2.417865514755249, "learning_rate": 4.6641666909868506e-07, "loss": 0.1042, "step": 2521 }, { "epoch": 0.69, "grad_norm": 2.7826478481292725, "learning_rate": 4.656684290843409e-07, "loss": 0.1106, "step": 2522 }, { "epoch": 0.69, "grad_norm": 2.655090808868408, "learning_rate": 4.649206075298955e-07, "loss": 0.1068, "step": 2523 }, { "epoch": 0.69, "grad_norm": 2.719024181365967, "learning_rate": 4.641732050210031e-07, "loss": 0.1144, "step": 2524 }, { "epoch": 0.69, "grad_norm": 3.037306785583496, "learning_rate": 4.634262221429902e-07, "loss": 0.1299, "step": 2525 }, { "epoch": 0.69, "grad_norm": 2.895413875579834, "learning_rate": 4.626796594808523e-07, "loss": 0.1322, "step": 2526 }, { "epoch": 0.69, "grad_norm": 2.520425796508789, "learning_rate": 4.619335176192585e-07, "loss": 0.1072, "step": 2527 }, { "epoch": 0.69, "grad_norm": 2.7348873615264893, "learning_rate": 4.611877971425462e-07, "loss": 0.1101, "step": 2528 }, { "epoch": 0.69, "grad_norm": 2.6879892349243164, "learning_rate": 4.6044249863472453e-07, "loss": 0.1187, "step": 2529 }, { "epoch": 0.69, "grad_norm": 2.752936363220215, "learning_rate": 4.5969762267947175e-07, "loss": 0.1217, "step": 2530 }, { "epoch": 0.69, "grad_norm": 2.7619338035583496, "learning_rate": 4.5895316986013366e-07, "loss": 0.113, "step": 2531 }, { "epoch": 0.69, "grad_norm": 2.8417179584503174, "learning_rate": 4.5820914075972696e-07, "loss": 0.1207, "step": 2532 }, { "epoch": 0.69, "grad_norm": 2.787196159362793, "learning_rate": 4.574655359609345e-07, "loss": 0.1234, "step": 2533 }, { "epoch": 0.69, "grad_norm": 2.7195425033569336, "learning_rate": 4.5672235604610845e-07, "loss": 0.1167, "step": 2534 }, { "epoch": 0.69, "grad_norm": 2.5553388595581055, "learning_rate": 4.5597960159726767e-07, "loss": 0.118, "step": 2535 }, { "epoch": 0.69, "grad_norm": 3.0006186962127686, "learning_rate": 4.552372731960974e-07, "loss": 0.1386, "step": 2536 }, { "epoch": 0.69, "grad_norm": 2.705505609512329, "learning_rate": 4.5449537142394956e-07, "loss": 0.1207, "step": 2537 }, { "epoch": 0.69, "grad_norm": 2.9590044021606445, "learning_rate": 4.537538968618416e-07, "loss": 0.1184, "step": 2538 }, { "epoch": 0.69, "grad_norm": 2.792473316192627, "learning_rate": 4.530128500904571e-07, "loss": 0.1181, "step": 2539 }, { "epoch": 0.69, "grad_norm": 2.780245304107666, "learning_rate": 4.522722316901445e-07, "loss": 0.1181, "step": 2540 }, { "epoch": 0.69, "grad_norm": 2.752324342727661, "learning_rate": 4.5153204224091614e-07, "loss": 0.1153, "step": 2541 }, { "epoch": 0.69, "grad_norm": 2.748352527618408, "learning_rate": 4.507922823224489e-07, "loss": 0.1225, "step": 2542 }, { "epoch": 0.69, "grad_norm": 2.9857993125915527, "learning_rate": 4.500529525140828e-07, "loss": 0.1247, "step": 2543 }, { "epoch": 0.69, "grad_norm": 2.933687925338745, "learning_rate": 4.493140533948216e-07, "loss": 0.1158, "step": 2544 }, { "epoch": 0.7, "grad_norm": 2.8095855712890625, "learning_rate": 4.485755855433322e-07, "loss": 0.1146, "step": 2545 }, { "epoch": 0.7, "grad_norm": 3.357168674468994, "learning_rate": 4.478375495379426e-07, "loss": 0.1366, "step": 2546 }, { "epoch": 0.7, "grad_norm": 2.8076658248901367, "learning_rate": 4.47099945956643e-07, "loss": 0.1123, "step": 2547 }, { "epoch": 0.7, "grad_norm": 2.7585599422454834, "learning_rate": 4.4636277537708487e-07, "loss": 0.1144, "step": 2548 }, { "epoch": 0.7, "grad_norm": 2.844555616378784, "learning_rate": 4.45626038376581e-07, "loss": 0.1139, "step": 2549 }, { "epoch": 0.7, "grad_norm": 2.71215558052063, "learning_rate": 4.4488973553210483e-07, "loss": 0.117, "step": 2550 }, { "epoch": 0.7, "grad_norm": 2.676067352294922, "learning_rate": 4.4415386742028903e-07, "loss": 0.1176, "step": 2551 }, { "epoch": 0.7, "grad_norm": 2.9423668384552, "learning_rate": 4.434184346174261e-07, "loss": 0.1243, "step": 2552 }, { "epoch": 0.7, "grad_norm": 2.7105016708374023, "learning_rate": 4.426834376994673e-07, "loss": 0.1182, "step": 2553 }, { "epoch": 0.7, "grad_norm": 2.604095697402954, "learning_rate": 4.419488772420231e-07, "loss": 0.1062, "step": 2554 }, { "epoch": 0.7, "grad_norm": 2.8293213844299316, "learning_rate": 4.4121475382036253e-07, "loss": 0.1244, "step": 2555 }, { "epoch": 0.7, "grad_norm": 2.634725570678711, "learning_rate": 4.4048106800941143e-07, "loss": 0.104, "step": 2556 }, { "epoch": 0.7, "grad_norm": 2.670680284500122, "learning_rate": 4.3974782038375313e-07, "loss": 0.1233, "step": 2557 }, { "epoch": 0.7, "grad_norm": 2.6359667778015137, "learning_rate": 4.3901501151762764e-07, "loss": 0.1121, "step": 2558 }, { "epoch": 0.7, "grad_norm": 2.7859766483306885, "learning_rate": 4.3828264198493206e-07, "loss": 0.1134, "step": 2559 }, { "epoch": 0.7, "grad_norm": 2.64312481880188, "learning_rate": 4.3755071235921935e-07, "loss": 0.1115, "step": 2560 }, { "epoch": 0.7, "grad_norm": 2.8879735469818115, "learning_rate": 4.3681922321369726e-07, "loss": 0.1286, "step": 2561 }, { "epoch": 0.7, "grad_norm": 2.6948938369750977, "learning_rate": 4.3608817512122887e-07, "loss": 0.1062, "step": 2562 }, { "epoch": 0.7, "grad_norm": 3.2535085678100586, "learning_rate": 4.353575686543318e-07, "loss": 0.1344, "step": 2563 }, { "epoch": 0.7, "grad_norm": 2.81569242477417, "learning_rate": 4.346274043851781e-07, "loss": 0.1186, "step": 2564 }, { "epoch": 0.7, "grad_norm": 2.7789556980133057, "learning_rate": 4.338976828855938e-07, "loss": 0.1175, "step": 2565 }, { "epoch": 0.7, "grad_norm": 2.9362411499023438, "learning_rate": 4.331684047270574e-07, "loss": 0.12, "step": 2566 }, { "epoch": 0.7, "grad_norm": 2.8243889808654785, "learning_rate": 4.3243957048070015e-07, "loss": 0.1224, "step": 2567 }, { "epoch": 0.7, "grad_norm": 2.859623670578003, "learning_rate": 4.317111807173067e-07, "loss": 0.1249, "step": 2568 }, { "epoch": 0.7, "grad_norm": 2.561389684677124, "learning_rate": 4.3098323600731233e-07, "loss": 0.1084, "step": 2569 }, { "epoch": 0.7, "grad_norm": 2.856942892074585, "learning_rate": 4.302557369208051e-07, "loss": 0.1191, "step": 2570 }, { "epoch": 0.7, "grad_norm": 2.8887505531311035, "learning_rate": 4.2952868402752285e-07, "loss": 0.1152, "step": 2571 }, { "epoch": 0.7, "grad_norm": 2.62001895904541, "learning_rate": 4.288020778968544e-07, "loss": 0.1097, "step": 2572 }, { "epoch": 0.7, "grad_norm": 2.8298003673553467, "learning_rate": 4.2807591909783937e-07, "loss": 0.1214, "step": 2573 }, { "epoch": 0.7, "grad_norm": 2.9238924980163574, "learning_rate": 4.273502081991658e-07, "loss": 0.1352, "step": 2574 }, { "epoch": 0.7, "grad_norm": 3.063486099243164, "learning_rate": 4.266249457691723e-07, "loss": 0.1278, "step": 2575 }, { "epoch": 0.7, "grad_norm": 2.689558506011963, "learning_rate": 4.259001323758452e-07, "loss": 0.1274, "step": 2576 }, { "epoch": 0.7, "grad_norm": 2.7789130210876465, "learning_rate": 4.2517576858681945e-07, "loss": 0.1235, "step": 2577 }, { "epoch": 0.7, "grad_norm": 2.6102023124694824, "learning_rate": 4.244518549693785e-07, "loss": 0.1104, "step": 2578 }, { "epoch": 0.7, "grad_norm": 2.763643980026245, "learning_rate": 4.237283920904522e-07, "loss": 0.1106, "step": 2579 }, { "epoch": 0.7, "grad_norm": 2.6615610122680664, "learning_rate": 4.2300538051661847e-07, "loss": 0.1098, "step": 2580 }, { "epoch": 0.71, "grad_norm": 2.615488052368164, "learning_rate": 4.2228282081410126e-07, "loss": 0.116, "step": 2581 }, { "epoch": 0.71, "grad_norm": 2.768125057220459, "learning_rate": 4.215607135487701e-07, "loss": 0.1142, "step": 2582 }, { "epoch": 0.71, "grad_norm": 2.794842004776001, "learning_rate": 4.2083905928614147e-07, "loss": 0.1158, "step": 2583 }, { "epoch": 0.71, "grad_norm": 2.6256704330444336, "learning_rate": 4.2011785859137574e-07, "loss": 0.1043, "step": 2584 }, { "epoch": 0.71, "grad_norm": 2.975555658340454, "learning_rate": 4.193971120292793e-07, "loss": 0.1156, "step": 2585 }, { "epoch": 0.71, "grad_norm": 2.6213598251342773, "learning_rate": 4.1867682016430215e-07, "loss": 0.1147, "step": 2586 }, { "epoch": 0.71, "grad_norm": 2.846608877182007, "learning_rate": 4.179569835605379e-07, "loss": 0.1154, "step": 2587 }, { "epoch": 0.71, "grad_norm": 2.513469934463501, "learning_rate": 4.172376027817246e-07, "loss": 0.1084, "step": 2588 }, { "epoch": 0.71, "grad_norm": 2.876136302947998, "learning_rate": 4.1651867839124234e-07, "loss": 0.1304, "step": 2589 }, { "epoch": 0.71, "grad_norm": 3.251068115234375, "learning_rate": 4.158002109521148e-07, "loss": 0.1285, "step": 2590 }, { "epoch": 0.71, "grad_norm": 3.049268960952759, "learning_rate": 4.15082201027007e-07, "loss": 0.1333, "step": 2591 }, { "epoch": 0.71, "grad_norm": 2.8080382347106934, "learning_rate": 4.1436464917822546e-07, "loss": 0.1092, "step": 2592 }, { "epoch": 0.71, "grad_norm": 2.5367066860198975, "learning_rate": 4.136475559677191e-07, "loss": 0.1116, "step": 2593 }, { "epoch": 0.71, "grad_norm": 3.02242112159729, "learning_rate": 4.129309219570761e-07, "loss": 0.1264, "step": 2594 }, { "epoch": 0.71, "grad_norm": 2.5939440727233887, "learning_rate": 4.1221474770752696e-07, "loss": 0.1088, "step": 2595 }, { "epoch": 0.71, "grad_norm": 2.8899600505828857, "learning_rate": 4.1149903377994035e-07, "loss": 0.1167, "step": 2596 }, { "epoch": 0.71, "grad_norm": 2.6632378101348877, "learning_rate": 4.107837807348249e-07, "loss": 0.1078, "step": 2597 }, { "epoch": 0.71, "grad_norm": 2.7490880489349365, "learning_rate": 4.1006898913232937e-07, "loss": 0.1196, "step": 2598 }, { "epoch": 0.71, "grad_norm": 2.7322561740875244, "learning_rate": 4.0935465953223936e-07, "loss": 0.1212, "step": 2599 }, { "epoch": 0.71, "grad_norm": 2.687798023223877, "learning_rate": 4.086407924939803e-07, "loss": 0.1208, "step": 2600 }, { "epoch": 0.71, "grad_norm": 2.668848991394043, "learning_rate": 4.079273885766146e-07, "loss": 0.11, "step": 2601 }, { "epoch": 0.71, "grad_norm": 3.0383217334747314, "learning_rate": 4.0721444833884134e-07, "loss": 0.1337, "step": 2602 }, { "epoch": 0.71, "grad_norm": 2.8997931480407715, "learning_rate": 4.065019723389981e-07, "loss": 0.1216, "step": 2603 }, { "epoch": 0.71, "grad_norm": 2.6722733974456787, "learning_rate": 4.0578996113505713e-07, "loss": 0.1163, "step": 2604 }, { "epoch": 0.71, "grad_norm": 3.0935604572296143, "learning_rate": 4.0507841528462837e-07, "loss": 0.1295, "step": 2605 }, { "epoch": 0.71, "grad_norm": 2.7999727725982666, "learning_rate": 4.0436733534495595e-07, "loss": 0.1199, "step": 2606 }, { "epoch": 0.71, "grad_norm": 2.807915687561035, "learning_rate": 4.036567218729193e-07, "loss": 0.1136, "step": 2607 }, { "epoch": 0.71, "grad_norm": 2.9019205570220947, "learning_rate": 4.0294657542503373e-07, "loss": 0.1194, "step": 2608 }, { "epoch": 0.71, "grad_norm": 3.148651599884033, "learning_rate": 4.022368965574471e-07, "loss": 0.1307, "step": 2609 }, { "epoch": 0.71, "grad_norm": 2.4384212493896484, "learning_rate": 4.0152768582594266e-07, "loss": 0.1085, "step": 2610 }, { "epoch": 0.71, "grad_norm": 2.7910211086273193, "learning_rate": 4.008189437859361e-07, "loss": 0.1298, "step": 2611 }, { "epoch": 0.71, "grad_norm": 2.623723030090332, "learning_rate": 4.0011067099247565e-07, "loss": 0.1188, "step": 2612 }, { "epoch": 0.71, "grad_norm": 2.447112560272217, "learning_rate": 3.994028680002435e-07, "loss": 0.0984, "step": 2613 }, { "epoch": 0.71, "grad_norm": 2.915776491165161, "learning_rate": 3.9869553536355236e-07, "loss": 0.1257, "step": 2614 }, { "epoch": 0.71, "grad_norm": 2.71044921875, "learning_rate": 3.9798867363634815e-07, "loss": 0.1199, "step": 2615 }, { "epoch": 0.71, "grad_norm": 2.633300304412842, "learning_rate": 3.972822833722067e-07, "loss": 0.1203, "step": 2616 }, { "epoch": 0.71, "grad_norm": 2.6894752979278564, "learning_rate": 3.9657636512433466e-07, "loss": 0.1224, "step": 2617 }, { "epoch": 0.72, "grad_norm": 2.559011459350586, "learning_rate": 3.9587091944557015e-07, "loss": 0.104, "step": 2618 }, { "epoch": 0.72, "grad_norm": 3.1206631660461426, "learning_rate": 3.951659468883799e-07, "loss": 0.1313, "step": 2619 }, { "epoch": 0.72, "grad_norm": 2.814870595932007, "learning_rate": 3.9446144800486135e-07, "loss": 0.1229, "step": 2620 }, { "epoch": 0.72, "grad_norm": 2.710951566696167, "learning_rate": 3.9375742334674e-07, "loss": 0.1126, "step": 2621 }, { "epoch": 0.72, "grad_norm": 3.0050394535064697, "learning_rate": 3.9305387346536976e-07, "loss": 0.1282, "step": 2622 }, { "epoch": 0.72, "grad_norm": 2.760265588760376, "learning_rate": 3.9235079891173427e-07, "loss": 0.1193, "step": 2623 }, { "epoch": 0.72, "grad_norm": 3.1326093673706055, "learning_rate": 3.9164820023644297e-07, "loss": 0.1216, "step": 2624 }, { "epoch": 0.72, "grad_norm": 2.85953950881958, "learning_rate": 3.909460779897339e-07, "loss": 0.127, "step": 2625 }, { "epoch": 0.72, "grad_norm": 2.9403843879699707, "learning_rate": 3.9024443272147256e-07, "loss": 0.1282, "step": 2626 }, { "epoch": 0.72, "grad_norm": 2.566781997680664, "learning_rate": 3.895432649811483e-07, "loss": 0.1111, "step": 2627 }, { "epoch": 0.72, "grad_norm": 3.1966028213500977, "learning_rate": 3.8884257531787945e-07, "loss": 0.1208, "step": 2628 }, { "epoch": 0.72, "grad_norm": 3.0294313430786133, "learning_rate": 3.881423642804079e-07, "loss": 0.1236, "step": 2629 }, { "epoch": 0.72, "grad_norm": 3.069186210632324, "learning_rate": 3.8744263241710184e-07, "loss": 0.1455, "step": 2630 }, { "epoch": 0.72, "grad_norm": 2.585867166519165, "learning_rate": 3.867433802759541e-07, "loss": 0.1192, "step": 2631 }, { "epoch": 0.72, "grad_norm": 2.7379586696624756, "learning_rate": 3.860446084045813e-07, "loss": 0.1193, "step": 2632 }, { "epoch": 0.72, "grad_norm": 2.710688591003418, "learning_rate": 3.8534631735022406e-07, "loss": 0.112, "step": 2633 }, { "epoch": 0.72, "grad_norm": 2.7601280212402344, "learning_rate": 3.846485076597463e-07, "loss": 0.1209, "step": 2634 }, { "epoch": 0.72, "grad_norm": 3.2363827228546143, "learning_rate": 3.8395117987963565e-07, "loss": 0.148, "step": 2635 }, { "epoch": 0.72, "grad_norm": 3.099475383758545, "learning_rate": 3.832543345560021e-07, "loss": 0.1408, "step": 2636 }, { "epoch": 0.72, "grad_norm": 2.758754253387451, "learning_rate": 3.825579722345774e-07, "loss": 0.1125, "step": 2637 }, { "epoch": 0.72, "grad_norm": 3.01164174079895, "learning_rate": 3.818620934607153e-07, "loss": 0.116, "step": 2638 }, { "epoch": 0.72, "grad_norm": 2.769646406173706, "learning_rate": 3.8116669877939044e-07, "loss": 0.1124, "step": 2639 }, { "epoch": 0.72, "grad_norm": 3.0166378021240234, "learning_rate": 3.80471788735199e-07, "loss": 0.1279, "step": 2640 }, { "epoch": 0.72, "grad_norm": 2.505793333053589, "learning_rate": 3.797773638723578e-07, "loss": 0.1052, "step": 2641 }, { "epoch": 0.72, "grad_norm": 3.0237605571746826, "learning_rate": 3.790834247347028e-07, "loss": 0.1154, "step": 2642 }, { "epoch": 0.72, "grad_norm": 2.958221435546875, "learning_rate": 3.783899718656901e-07, "loss": 0.1155, "step": 2643 }, { "epoch": 0.72, "grad_norm": 2.838883399963379, "learning_rate": 3.7769700580839447e-07, "loss": 0.1272, "step": 2644 }, { "epoch": 0.72, "grad_norm": 2.793994665145874, "learning_rate": 3.7700452710551025e-07, "loss": 0.1181, "step": 2645 }, { "epoch": 0.72, "grad_norm": 2.674825429916382, "learning_rate": 3.7631253629935e-07, "loss": 0.1159, "step": 2646 }, { "epoch": 0.72, "grad_norm": 2.790365219116211, "learning_rate": 3.756210339318436e-07, "loss": 0.1255, "step": 2647 }, { "epoch": 0.72, "grad_norm": 2.7378766536712646, "learning_rate": 3.749300205445387e-07, "loss": 0.1216, "step": 2648 }, { "epoch": 0.72, "grad_norm": 2.8433449268341064, "learning_rate": 3.7423949667859967e-07, "loss": 0.1286, "step": 2649 }, { "epoch": 0.72, "grad_norm": 2.6227731704711914, "learning_rate": 3.735494628748082e-07, "loss": 0.1144, "step": 2650 }, { "epoch": 0.72, "grad_norm": 2.631502389907837, "learning_rate": 3.72859919673562e-07, "loss": 0.1143, "step": 2651 }, { "epoch": 0.72, "grad_norm": 2.8619110584259033, "learning_rate": 3.721708676148745e-07, "loss": 0.1293, "step": 2652 }, { "epoch": 0.72, "grad_norm": 3.010019540786743, "learning_rate": 3.71482307238374e-07, "loss": 0.1067, "step": 2653 }, { "epoch": 0.72, "grad_norm": 2.6308083534240723, "learning_rate": 3.707942390833041e-07, "loss": 0.1157, "step": 2654 }, { "epoch": 0.73, "grad_norm": 2.5576210021972656, "learning_rate": 3.7010666368852305e-07, "loss": 0.1043, "step": 2655 }, { "epoch": 0.73, "grad_norm": 2.6919572353363037, "learning_rate": 3.694195815925036e-07, "loss": 0.1177, "step": 2656 }, { "epoch": 0.73, "grad_norm": 2.72977876663208, "learning_rate": 3.687329933333315e-07, "loss": 0.1093, "step": 2657 }, { "epoch": 0.73, "grad_norm": 2.6672637462615967, "learning_rate": 3.680468994487056e-07, "loss": 0.1036, "step": 2658 }, { "epoch": 0.73, "grad_norm": 2.9066519737243652, "learning_rate": 3.6736130047593784e-07, "loss": 0.114, "step": 2659 }, { "epoch": 0.73, "grad_norm": 2.4339230060577393, "learning_rate": 3.666761969519528e-07, "loss": 0.1071, "step": 2660 }, { "epoch": 0.73, "grad_norm": 2.600412368774414, "learning_rate": 3.6599158941328755e-07, "loss": 0.1174, "step": 2661 }, { "epoch": 0.73, "grad_norm": 2.992175817489624, "learning_rate": 3.6530747839608943e-07, "loss": 0.118, "step": 2662 }, { "epoch": 0.73, "grad_norm": 2.8061258792877197, "learning_rate": 3.646238644361177e-07, "loss": 0.1185, "step": 2663 }, { "epoch": 0.73, "grad_norm": 2.548417329788208, "learning_rate": 3.63940748068742e-07, "loss": 0.0997, "step": 2664 }, { "epoch": 0.73, "grad_norm": 2.507392644882202, "learning_rate": 3.632581298289427e-07, "loss": 0.107, "step": 2665 }, { "epoch": 0.73, "grad_norm": 2.87033748626709, "learning_rate": 3.625760102513102e-07, "loss": 0.1306, "step": 2666 }, { "epoch": 0.73, "grad_norm": 2.7267277240753174, "learning_rate": 3.6189438987004403e-07, "loss": 0.1062, "step": 2667 }, { "epoch": 0.73, "grad_norm": 2.7781479358673096, "learning_rate": 3.6121326921895245e-07, "loss": 0.1218, "step": 2668 }, { "epoch": 0.73, "grad_norm": 2.554163694381714, "learning_rate": 3.605326488314526e-07, "loss": 0.1071, "step": 2669 }, { "epoch": 0.73, "grad_norm": 2.9026176929473877, "learning_rate": 3.5985252924057017e-07, "loss": 0.1226, "step": 2670 }, { "epoch": 0.73, "grad_norm": 2.7796084880828857, "learning_rate": 3.591729109789389e-07, "loss": 0.1152, "step": 2671 }, { "epoch": 0.73, "grad_norm": 2.7688584327697754, "learning_rate": 3.584937945787989e-07, "loss": 0.1228, "step": 2672 }, { "epoch": 0.73, "grad_norm": 2.813920497894287, "learning_rate": 3.57815180571998e-07, "loss": 0.1103, "step": 2673 }, { "epoch": 0.73, "grad_norm": 2.761597156524658, "learning_rate": 3.571370694899899e-07, "loss": 0.1128, "step": 2674 }, { "epoch": 0.73, "grad_norm": 2.8307440280914307, "learning_rate": 3.5645946186383544e-07, "loss": 0.1232, "step": 2675 }, { "epoch": 0.73, "grad_norm": 2.8210718631744385, "learning_rate": 3.557823582242008e-07, "loss": 0.1217, "step": 2676 }, { "epoch": 0.73, "grad_norm": 2.6191835403442383, "learning_rate": 3.551057591013572e-07, "loss": 0.1026, "step": 2677 }, { "epoch": 0.73, "grad_norm": 2.7669758796691895, "learning_rate": 3.544296650251807e-07, "loss": 0.1103, "step": 2678 }, { "epoch": 0.73, "grad_norm": 2.8590447902679443, "learning_rate": 3.5375407652515166e-07, "loss": 0.1139, "step": 2679 }, { "epoch": 0.73, "grad_norm": 2.8868892192840576, "learning_rate": 3.5307899413035534e-07, "loss": 0.1303, "step": 2680 }, { "epoch": 0.73, "grad_norm": 2.5479912757873535, "learning_rate": 3.524044183694803e-07, "loss": 0.1164, "step": 2681 }, { "epoch": 0.73, "grad_norm": 2.819713830947876, "learning_rate": 3.5173034977081807e-07, "loss": 0.1207, "step": 2682 }, { "epoch": 0.73, "grad_norm": 2.73115873336792, "learning_rate": 3.51056788862263e-07, "loss": 0.1191, "step": 2683 }, { "epoch": 0.73, "grad_norm": 2.7490108013153076, "learning_rate": 3.5038373617131156e-07, "loss": 0.123, "step": 2684 }, { "epoch": 0.73, "grad_norm": 2.8641395568847656, "learning_rate": 3.4971119222506296e-07, "loss": 0.1169, "step": 2685 }, { "epoch": 0.73, "grad_norm": 2.8399951457977295, "learning_rate": 3.4903915755021806e-07, "loss": 0.1289, "step": 2686 }, { "epoch": 0.73, "grad_norm": 2.8281517028808594, "learning_rate": 3.4836763267307814e-07, "loss": 0.1224, "step": 2687 }, { "epoch": 0.73, "grad_norm": 3.023627758026123, "learning_rate": 3.476966181195451e-07, "loss": 0.1337, "step": 2688 }, { "epoch": 0.73, "grad_norm": 2.7076070308685303, "learning_rate": 3.470261144151224e-07, "loss": 0.1098, "step": 2689 }, { "epoch": 0.73, "grad_norm": 2.512364387512207, "learning_rate": 3.4635612208491193e-07, "loss": 0.1058, "step": 2690 }, { "epoch": 0.74, "grad_norm": 2.8544671535491943, "learning_rate": 3.456866416536166e-07, "loss": 0.1208, "step": 2691 }, { "epoch": 0.74, "grad_norm": 2.7098636627197266, "learning_rate": 3.4501767364553723e-07, "loss": 0.1177, "step": 2692 }, { "epoch": 0.74, "grad_norm": 2.382507801055908, "learning_rate": 3.4434921858457355e-07, "loss": 0.0982, "step": 2693 }, { "epoch": 0.74, "grad_norm": 2.758180856704712, "learning_rate": 3.4368127699422434e-07, "loss": 0.1061, "step": 2694 }, { "epoch": 0.74, "grad_norm": 2.895392417907715, "learning_rate": 3.4301384939758513e-07, "loss": 0.1188, "step": 2695 }, { "epoch": 0.74, "grad_norm": 2.702798366546631, "learning_rate": 3.4234693631735026e-07, "loss": 0.1074, "step": 2696 }, { "epoch": 0.74, "grad_norm": 3.1038873195648193, "learning_rate": 3.416805382758099e-07, "loss": 0.1179, "step": 2697 }, { "epoch": 0.74, "grad_norm": 2.643678665161133, "learning_rate": 3.41014655794851e-07, "loss": 0.1083, "step": 2698 }, { "epoch": 0.74, "grad_norm": 2.7079343795776367, "learning_rate": 3.4034928939595785e-07, "loss": 0.1143, "step": 2699 }, { "epoch": 0.74, "grad_norm": 2.768155813217163, "learning_rate": 3.3968443960020907e-07, "loss": 0.1199, "step": 2700 }, { "epoch": 0.74, "grad_norm": 2.9409587383270264, "learning_rate": 3.390201069282802e-07, "loss": 0.1215, "step": 2701 }, { "epoch": 0.74, "grad_norm": 2.918840169906616, "learning_rate": 3.3835629190044066e-07, "loss": 0.1339, "step": 2702 }, { "epoch": 0.74, "grad_norm": 2.6571919918060303, "learning_rate": 3.3769299503655457e-07, "loss": 0.112, "step": 2703 }, { "epoch": 0.74, "grad_norm": 2.999922752380371, "learning_rate": 3.3703021685608115e-07, "loss": 0.1276, "step": 2704 }, { "epoch": 0.74, "grad_norm": 2.910522222518921, "learning_rate": 3.3636795787807225e-07, "loss": 0.1206, "step": 2705 }, { "epoch": 0.74, "grad_norm": 2.843425750732422, "learning_rate": 3.3570621862117423e-07, "loss": 0.1137, "step": 2706 }, { "epoch": 0.74, "grad_norm": 2.7062761783599854, "learning_rate": 3.350449996036255e-07, "loss": 0.1029, "step": 2707 }, { "epoch": 0.74, "grad_norm": 2.678119421005249, "learning_rate": 3.3438430134325734e-07, "loss": 0.1009, "step": 2708 }, { "epoch": 0.74, "grad_norm": 2.6809935569763184, "learning_rate": 3.337241243574936e-07, "loss": 0.1043, "step": 2709 }, { "epoch": 0.74, "grad_norm": 2.797308921813965, "learning_rate": 3.330644691633492e-07, "loss": 0.1257, "step": 2710 }, { "epoch": 0.74, "grad_norm": 2.5868420600891113, "learning_rate": 3.3240533627743126e-07, "loss": 0.1183, "step": 2711 }, { "epoch": 0.74, "grad_norm": 2.9918274879455566, "learning_rate": 3.3174672621593726e-07, "loss": 0.1381, "step": 2712 }, { "epoch": 0.74, "grad_norm": 2.644981622695923, "learning_rate": 3.310886394946548e-07, "loss": 0.1087, "step": 2713 }, { "epoch": 0.74, "grad_norm": 2.868210554122925, "learning_rate": 3.3043107662896295e-07, "loss": 0.1302, "step": 2714 }, { "epoch": 0.74, "grad_norm": 2.591280221939087, "learning_rate": 3.297740381338292e-07, "loss": 0.119, "step": 2715 }, { "epoch": 0.74, "grad_norm": 2.8235762119293213, "learning_rate": 3.2911752452381146e-07, "loss": 0.1144, "step": 2716 }, { "epoch": 0.74, "grad_norm": 2.710702657699585, "learning_rate": 3.2846153631305584e-07, "loss": 0.1193, "step": 2717 }, { "epoch": 0.74, "grad_norm": 2.6694576740264893, "learning_rate": 3.278060740152969e-07, "loss": 0.1206, "step": 2718 }, { "epoch": 0.74, "grad_norm": 2.732609987258911, "learning_rate": 3.271511381438582e-07, "loss": 0.1225, "step": 2719 }, { "epoch": 0.74, "grad_norm": 2.437609910964966, "learning_rate": 3.2649672921164993e-07, "loss": 0.0987, "step": 2720 }, { "epoch": 0.74, "grad_norm": 2.895250082015991, "learning_rate": 3.2584284773117066e-07, "loss": 0.1314, "step": 2721 }, { "epoch": 0.74, "grad_norm": 2.8994832038879395, "learning_rate": 3.2518949421450525e-07, "loss": 0.128, "step": 2722 }, { "epoch": 0.74, "grad_norm": 2.6383893489837646, "learning_rate": 3.2453666917332465e-07, "loss": 0.1139, "step": 2723 }, { "epoch": 0.74, "grad_norm": 2.823533535003662, "learning_rate": 3.2388437311888737e-07, "loss": 0.111, "step": 2724 }, { "epoch": 0.74, "grad_norm": 2.771145820617676, "learning_rate": 3.232326065620361e-07, "loss": 0.1279, "step": 2725 }, { "epoch": 0.74, "grad_norm": 2.848806619644165, "learning_rate": 3.2258137001320007e-07, "loss": 0.1205, "step": 2726 }, { "epoch": 0.74, "grad_norm": 2.788727283477783, "learning_rate": 3.219306639823923e-07, "loss": 0.1162, "step": 2727 }, { "epoch": 0.75, "grad_norm": 2.8441965579986572, "learning_rate": 3.212804889792117e-07, "loss": 0.1159, "step": 2728 }, { "epoch": 0.75, "grad_norm": 2.997556209564209, "learning_rate": 3.2063084551284004e-07, "loss": 0.1231, "step": 2729 }, { "epoch": 0.75, "grad_norm": 3.102024555206299, "learning_rate": 3.1998173409204323e-07, "loss": 0.1174, "step": 2730 }, { "epoch": 0.75, "grad_norm": 2.7217159271240234, "learning_rate": 3.19333155225171e-07, "loss": 0.1165, "step": 2731 }, { "epoch": 0.75, "grad_norm": 2.6979618072509766, "learning_rate": 3.186851094201551e-07, "loss": 0.1135, "step": 2732 }, { "epoch": 0.75, "grad_norm": 2.8890743255615234, "learning_rate": 3.1803759718451107e-07, "loss": 0.1199, "step": 2733 }, { "epoch": 0.75, "grad_norm": 2.689387083053589, "learning_rate": 3.173906190253355e-07, "loss": 0.1155, "step": 2734 }, { "epoch": 0.75, "grad_norm": 2.8426594734191895, "learning_rate": 3.1674417544930653e-07, "loss": 0.1201, "step": 2735 }, { "epoch": 0.75, "grad_norm": 3.093418836593628, "learning_rate": 3.1609826696268507e-07, "loss": 0.12, "step": 2736 }, { "epoch": 0.75, "grad_norm": 2.7327778339385986, "learning_rate": 3.154528940713113e-07, "loss": 0.1129, "step": 2737 }, { "epoch": 0.75, "grad_norm": 2.849271774291992, "learning_rate": 3.1480805728060745e-07, "loss": 0.1167, "step": 2738 }, { "epoch": 0.75, "grad_norm": 2.769320011138916, "learning_rate": 3.1416375709557483e-07, "loss": 0.1068, "step": 2739 }, { "epoch": 0.75, "grad_norm": 2.5702242851257324, "learning_rate": 3.1351999402079465e-07, "loss": 0.1012, "step": 2740 }, { "epoch": 0.75, "grad_norm": 2.8913018703460693, "learning_rate": 3.1287676856042824e-07, "loss": 0.1223, "step": 2741 }, { "epoch": 0.75, "grad_norm": 2.8712058067321777, "learning_rate": 3.122340812182148e-07, "loss": 0.1196, "step": 2742 }, { "epoch": 0.75, "grad_norm": 3.083658456802368, "learning_rate": 3.1159193249747327e-07, "loss": 0.1138, "step": 2743 }, { "epoch": 0.75, "grad_norm": 2.3772547245025635, "learning_rate": 3.109503229010999e-07, "loss": 0.0929, "step": 2744 }, { "epoch": 0.75, "grad_norm": 2.6538403034210205, "learning_rate": 3.103092529315686e-07, "loss": 0.1268, "step": 2745 }, { "epoch": 0.75, "grad_norm": 2.9843966960906982, "learning_rate": 3.096687230909315e-07, "loss": 0.1143, "step": 2746 }, { "epoch": 0.75, "grad_norm": 2.629732847213745, "learning_rate": 3.090287338808175e-07, "loss": 0.114, "step": 2747 }, { "epoch": 0.75, "grad_norm": 2.844168186187744, "learning_rate": 3.083892858024317e-07, "loss": 0.1233, "step": 2748 }, { "epoch": 0.75, "grad_norm": 2.8624022006988525, "learning_rate": 3.077503793565557e-07, "loss": 0.1256, "step": 2749 }, { "epoch": 0.75, "grad_norm": 2.8195860385894775, "learning_rate": 3.0711201504354623e-07, "loss": 0.1229, "step": 2750 }, { "epoch": 0.75, "grad_norm": 2.623037338256836, "learning_rate": 3.0647419336333656e-07, "loss": 0.1032, "step": 2751 }, { "epoch": 0.75, "grad_norm": 2.8414342403411865, "learning_rate": 3.0583691481543493e-07, "loss": 0.1271, "step": 2752 }, { "epoch": 0.75, "grad_norm": 2.9264533519744873, "learning_rate": 3.052001798989233e-07, "loss": 0.1232, "step": 2753 }, { "epoch": 0.75, "grad_norm": 2.7810049057006836, "learning_rate": 3.045639891124585e-07, "loss": 0.1071, "step": 2754 }, { "epoch": 0.75, "grad_norm": 2.9589149951934814, "learning_rate": 3.039283429542707e-07, "loss": 0.1192, "step": 2755 }, { "epoch": 0.75, "grad_norm": 2.6391968727111816, "learning_rate": 3.032932419221644e-07, "loss": 0.1068, "step": 2756 }, { "epoch": 0.75, "grad_norm": 2.6416263580322266, "learning_rate": 3.026586865135171e-07, "loss": 0.1014, "step": 2757 }, { "epoch": 0.75, "grad_norm": 2.9932734966278076, "learning_rate": 3.0202467722527823e-07, "loss": 0.1235, "step": 2758 }, { "epoch": 0.75, "grad_norm": 2.700866460800171, "learning_rate": 3.0139121455396985e-07, "loss": 0.1189, "step": 2759 }, { "epoch": 0.75, "grad_norm": 2.708264112472534, "learning_rate": 3.0075829899568593e-07, "loss": 0.1093, "step": 2760 }, { "epoch": 0.75, "grad_norm": 2.864259958267212, "learning_rate": 3.001259310460923e-07, "loss": 0.121, "step": 2761 }, { "epoch": 0.75, "grad_norm": 3.0818142890930176, "learning_rate": 2.99494111200426e-07, "loss": 0.1325, "step": 2762 }, { "epoch": 0.75, "grad_norm": 2.729863166809082, "learning_rate": 2.9886283995349413e-07, "loss": 0.1107, "step": 2763 }, { "epoch": 0.76, "grad_norm": 2.9157660007476807, "learning_rate": 2.9823211779967485e-07, "loss": 0.1162, "step": 2764 }, { "epoch": 0.76, "grad_norm": 2.7944540977478027, "learning_rate": 2.9760194523291525e-07, "loss": 0.1138, "step": 2765 }, { "epoch": 0.76, "grad_norm": 2.658810615539551, "learning_rate": 2.9697232274673355e-07, "loss": 0.1138, "step": 2766 }, { "epoch": 0.76, "grad_norm": 2.8354992866516113, "learning_rate": 2.963432508342164e-07, "loss": 0.1384, "step": 2767 }, { "epoch": 0.76, "grad_norm": 2.7997586727142334, "learning_rate": 2.9571472998801903e-07, "loss": 0.12, "step": 2768 }, { "epoch": 0.76, "grad_norm": 2.548947811126709, "learning_rate": 2.950867607003653e-07, "loss": 0.1161, "step": 2769 }, { "epoch": 0.76, "grad_norm": 2.7747020721435547, "learning_rate": 2.9445934346304703e-07, "loss": 0.1112, "step": 2770 }, { "epoch": 0.76, "grad_norm": 2.5072803497314453, "learning_rate": 2.938324787674239e-07, "loss": 0.1101, "step": 2771 }, { "epoch": 0.76, "grad_norm": 2.520566463470459, "learning_rate": 2.9320616710442326e-07, "loss": 0.1012, "step": 2772 }, { "epoch": 0.76, "grad_norm": 2.9922077655792236, "learning_rate": 2.9258040896453864e-07, "loss": 0.1182, "step": 2773 }, { "epoch": 0.76, "grad_norm": 2.825206756591797, "learning_rate": 2.919552048378302e-07, "loss": 0.1155, "step": 2774 }, { "epoch": 0.76, "grad_norm": 2.78865647315979, "learning_rate": 2.91330555213924e-07, "loss": 0.123, "step": 2775 }, { "epoch": 0.76, "grad_norm": 2.683983564376831, "learning_rate": 2.9070646058201276e-07, "loss": 0.1176, "step": 2776 }, { "epoch": 0.76, "grad_norm": 3.0451362133026123, "learning_rate": 2.9008292143085413e-07, "loss": 0.1172, "step": 2777 }, { "epoch": 0.76, "grad_norm": 2.7335100173950195, "learning_rate": 2.8945993824877033e-07, "loss": 0.1129, "step": 2778 }, { "epoch": 0.76, "grad_norm": 2.9792160987854004, "learning_rate": 2.8883751152364843e-07, "loss": 0.1227, "step": 2779 }, { "epoch": 0.76, "grad_norm": 2.7221388816833496, "learning_rate": 2.8821564174293957e-07, "loss": 0.1123, "step": 2780 }, { "epoch": 0.76, "grad_norm": 2.673191785812378, "learning_rate": 2.875943293936591e-07, "loss": 0.1078, "step": 2781 }, { "epoch": 0.76, "grad_norm": 2.7910141944885254, "learning_rate": 2.8697357496238584e-07, "loss": 0.1108, "step": 2782 }, { "epoch": 0.76, "grad_norm": 2.705504894256592, "learning_rate": 2.8635337893526137e-07, "loss": 0.1151, "step": 2783 }, { "epoch": 0.76, "grad_norm": 2.7229225635528564, "learning_rate": 2.857337417979898e-07, "loss": 0.1106, "step": 2784 }, { "epoch": 0.76, "grad_norm": 2.706153154373169, "learning_rate": 2.851146640358376e-07, "loss": 0.1175, "step": 2785 }, { "epoch": 0.76, "grad_norm": 2.9575228691101074, "learning_rate": 2.844961461336336e-07, "loss": 0.126, "step": 2786 }, { "epoch": 0.76, "grad_norm": 2.809384822845459, "learning_rate": 2.838781885757684e-07, "loss": 0.1084, "step": 2787 }, { "epoch": 0.76, "grad_norm": 2.77486252784729, "learning_rate": 2.8326079184619266e-07, "loss": 0.116, "step": 2788 }, { "epoch": 0.76, "grad_norm": 2.8169262409210205, "learning_rate": 2.826439564284189e-07, "loss": 0.1309, "step": 2789 }, { "epoch": 0.76, "grad_norm": 2.7485415935516357, "learning_rate": 2.820276828055189e-07, "loss": 0.1179, "step": 2790 }, { "epoch": 0.76, "grad_norm": 3.030266761779785, "learning_rate": 2.8141197146012575e-07, "loss": 0.1189, "step": 2791 }, { "epoch": 0.76, "grad_norm": 2.5383927822113037, "learning_rate": 2.8079682287443186e-07, "loss": 0.1062, "step": 2792 }, { "epoch": 0.76, "grad_norm": 2.9429070949554443, "learning_rate": 2.8018223753018844e-07, "loss": 0.1112, "step": 2793 }, { "epoch": 0.76, "grad_norm": 3.001762866973877, "learning_rate": 2.795682159087057e-07, "loss": 0.1282, "step": 2794 }, { "epoch": 0.76, "grad_norm": 2.8901684284210205, "learning_rate": 2.7895475849085246e-07, "loss": 0.124, "step": 2795 }, { "epoch": 0.76, "grad_norm": 2.7526793479919434, "learning_rate": 2.7834186575705585e-07, "loss": 0.1157, "step": 2796 }, { "epoch": 0.76, "grad_norm": 2.978522777557373, "learning_rate": 2.7772953818730106e-07, "loss": 0.123, "step": 2797 }, { "epoch": 0.76, "grad_norm": 2.629232168197632, "learning_rate": 2.7711777626112984e-07, "loss": 0.1056, "step": 2798 }, { "epoch": 0.76, "grad_norm": 2.5201046466827393, "learning_rate": 2.7650658045764175e-07, "loss": 0.1092, "step": 2799 }, { "epoch": 0.76, "grad_norm": 2.606663465499878, "learning_rate": 2.7589595125549193e-07, "loss": 0.1103, "step": 2800 }, { "epoch": 0.77, "grad_norm": 2.587350606918335, "learning_rate": 2.7528588913289305e-07, "loss": 0.112, "step": 2801 }, { "epoch": 0.77, "grad_norm": 2.537712335586548, "learning_rate": 2.7467639456761337e-07, "loss": 0.114, "step": 2802 }, { "epoch": 0.77, "grad_norm": 2.9924392700195312, "learning_rate": 2.740674680369761e-07, "loss": 0.1395, "step": 2803 }, { "epoch": 0.77, "grad_norm": 2.8187615871429443, "learning_rate": 2.734591100178597e-07, "loss": 0.1202, "step": 2804 }, { "epoch": 0.77, "grad_norm": 3.0450353622436523, "learning_rate": 2.728513209866981e-07, "loss": 0.1207, "step": 2805 }, { "epoch": 0.77, "grad_norm": 2.924205780029297, "learning_rate": 2.722441014194786e-07, "loss": 0.1403, "step": 2806 }, { "epoch": 0.77, "grad_norm": 2.9638490676879883, "learning_rate": 2.716374517917437e-07, "loss": 0.1245, "step": 2807 }, { "epoch": 0.77, "grad_norm": 2.8933804035186768, "learning_rate": 2.7103137257858863e-07, "loss": 0.1244, "step": 2808 }, { "epoch": 0.77, "grad_norm": 2.6209239959716797, "learning_rate": 2.7042586425466194e-07, "loss": 0.1152, "step": 2809 }, { "epoch": 0.77, "grad_norm": 2.8527228832244873, "learning_rate": 2.6982092729416585e-07, "loss": 0.1264, "step": 2810 }, { "epoch": 0.77, "grad_norm": 2.9052624702453613, "learning_rate": 2.692165621708541e-07, "loss": 0.1313, "step": 2811 }, { "epoch": 0.77, "grad_norm": 3.0797393321990967, "learning_rate": 2.686127693580338e-07, "loss": 0.1176, "step": 2812 }, { "epoch": 0.77, "grad_norm": 2.8544461727142334, "learning_rate": 2.680095493285627e-07, "loss": 0.1256, "step": 2813 }, { "epoch": 0.77, "grad_norm": 2.90590500831604, "learning_rate": 2.674069025548502e-07, "loss": 0.1123, "step": 2814 }, { "epoch": 0.77, "grad_norm": 2.509225368499756, "learning_rate": 2.668048295088577e-07, "loss": 0.1049, "step": 2815 }, { "epoch": 0.77, "grad_norm": 2.545013666152954, "learning_rate": 2.66203330662096e-07, "loss": 0.1069, "step": 2816 }, { "epoch": 0.77, "grad_norm": 2.698673963546753, "learning_rate": 2.6560240648562727e-07, "loss": 0.1135, "step": 2817 }, { "epoch": 0.77, "grad_norm": 2.9910836219787598, "learning_rate": 2.6500205745006296e-07, "loss": 0.12, "step": 2818 }, { "epoch": 0.77, "grad_norm": 2.9469501972198486, "learning_rate": 2.644022840255641e-07, "loss": 0.118, "step": 2819 }, { "epoch": 0.77, "grad_norm": 2.9956037998199463, "learning_rate": 2.638030866818416e-07, "loss": 0.1385, "step": 2820 }, { "epoch": 0.77, "grad_norm": 2.940786123275757, "learning_rate": 2.6320446588815425e-07, "loss": 0.1197, "step": 2821 }, { "epoch": 0.77, "grad_norm": 2.6238014698028564, "learning_rate": 2.6260642211331055e-07, "loss": 0.112, "step": 2822 }, { "epoch": 0.77, "grad_norm": 2.8474788665771484, "learning_rate": 2.620089558256655e-07, "loss": 0.1142, "step": 2823 }, { "epoch": 0.77, "grad_norm": 2.917656898498535, "learning_rate": 2.614120674931235e-07, "loss": 0.1298, "step": 2824 }, { "epoch": 0.77, "grad_norm": 2.8403565883636475, "learning_rate": 2.608157575831352e-07, "loss": 0.1276, "step": 2825 }, { "epoch": 0.77, "grad_norm": 2.9889371395111084, "learning_rate": 2.6022002656269846e-07, "loss": 0.1164, "step": 2826 }, { "epoch": 0.77, "grad_norm": 2.703030824661255, "learning_rate": 2.596248748983585e-07, "loss": 0.1034, "step": 2827 }, { "epoch": 0.77, "grad_norm": 2.5715551376342773, "learning_rate": 2.5903030305620545e-07, "loss": 0.1192, "step": 2828 }, { "epoch": 0.77, "grad_norm": 3.081249952316284, "learning_rate": 2.5843631150187707e-07, "loss": 0.1333, "step": 2829 }, { "epoch": 0.77, "grad_norm": 2.708294630050659, "learning_rate": 2.5784290070055514e-07, "loss": 0.1148, "step": 2830 }, { "epoch": 0.77, "grad_norm": 2.5890491008758545, "learning_rate": 2.572500711169673e-07, "loss": 0.1203, "step": 2831 }, { "epoch": 0.77, "grad_norm": 3.0034663677215576, "learning_rate": 2.566578232153863e-07, "loss": 0.1327, "step": 2832 }, { "epoch": 0.77, "grad_norm": 2.52756667137146, "learning_rate": 2.560661574596284e-07, "loss": 0.1102, "step": 2833 }, { "epoch": 0.77, "grad_norm": 2.7598891258239746, "learning_rate": 2.5547507431305547e-07, "loss": 0.1031, "step": 2834 }, { "epoch": 0.77, "grad_norm": 2.7306227684020996, "learning_rate": 2.548845742385717e-07, "loss": 0.117, "step": 2835 }, { "epoch": 0.77, "grad_norm": 2.724799156188965, "learning_rate": 2.5429465769862477e-07, "loss": 0.1139, "step": 2836 }, { "epoch": 0.77, "grad_norm": 2.8449912071228027, "learning_rate": 2.537053251552065e-07, "loss": 0.119, "step": 2837 }, { "epoch": 0.78, "grad_norm": 2.7894270420074463, "learning_rate": 2.531165770698499e-07, "loss": 0.1238, "step": 2838 }, { "epoch": 0.78, "grad_norm": 2.513648509979248, "learning_rate": 2.5252841390363165e-07, "loss": 0.1098, "step": 2839 }, { "epoch": 0.78, "grad_norm": 3.1153371334075928, "learning_rate": 2.519408361171693e-07, "loss": 0.1096, "step": 2840 }, { "epoch": 0.78, "grad_norm": 2.546874523162842, "learning_rate": 2.513538441706221e-07, "loss": 0.0989, "step": 2841 }, { "epoch": 0.78, "grad_norm": 2.72564435005188, "learning_rate": 2.5076743852369145e-07, "loss": 0.1205, "step": 2842 }, { "epoch": 0.78, "grad_norm": 2.6190879344940186, "learning_rate": 2.50181619635618e-07, "loss": 0.1044, "step": 2843 }, { "epoch": 0.78, "grad_norm": 2.7285542488098145, "learning_rate": 2.4959638796518455e-07, "loss": 0.1099, "step": 2844 }, { "epoch": 0.78, "grad_norm": 2.8141350746154785, "learning_rate": 2.49011743970713e-07, "loss": 0.1249, "step": 2845 }, { "epoch": 0.78, "grad_norm": 2.9485819339752197, "learning_rate": 2.4842768811006477e-07, "loss": 0.1236, "step": 2846 }, { "epoch": 0.78, "grad_norm": 2.8233511447906494, "learning_rate": 2.478442208406418e-07, "loss": 0.1155, "step": 2847 }, { "epoch": 0.78, "grad_norm": 2.608671188354492, "learning_rate": 2.47261342619384e-07, "loss": 0.1157, "step": 2848 }, { "epoch": 0.78, "grad_norm": 2.568787097930908, "learning_rate": 2.466790539027708e-07, "loss": 0.1108, "step": 2849 }, { "epoch": 0.78, "grad_norm": 2.983731746673584, "learning_rate": 2.460973551468194e-07, "loss": 0.1264, "step": 2850 }, { "epoch": 0.78, "grad_norm": 2.872974395751953, "learning_rate": 2.4551624680708484e-07, "loss": 0.1288, "step": 2851 }, { "epoch": 0.78, "grad_norm": 2.807682514190674, "learning_rate": 2.449357293386606e-07, "loss": 0.1282, "step": 2852 }, { "epoch": 0.78, "grad_norm": 2.9407236576080322, "learning_rate": 2.4435580319617624e-07, "loss": 0.1158, "step": 2853 }, { "epoch": 0.78, "grad_norm": 2.835533380508423, "learning_rate": 2.437764688337998e-07, "loss": 0.1142, "step": 2854 }, { "epoch": 0.78, "grad_norm": 2.828916072845459, "learning_rate": 2.431977267052343e-07, "loss": 0.1264, "step": 2855 }, { "epoch": 0.78, "grad_norm": 2.686336040496826, "learning_rate": 2.426195772637195e-07, "loss": 0.1134, "step": 2856 }, { "epoch": 0.78, "grad_norm": 2.516364812850952, "learning_rate": 2.4204202096203163e-07, "loss": 0.0996, "step": 2857 }, { "epoch": 0.78, "grad_norm": 2.841402769088745, "learning_rate": 2.4146505825248143e-07, "loss": 0.1279, "step": 2858 }, { "epoch": 0.78, "grad_norm": 2.809781074523926, "learning_rate": 2.408886895869157e-07, "loss": 0.1215, "step": 2859 }, { "epoch": 0.78, "grad_norm": 2.394096612930298, "learning_rate": 2.403129154167153e-07, "loss": 0.099, "step": 2860 }, { "epoch": 0.78, "grad_norm": 2.788119316101074, "learning_rate": 2.3973773619279533e-07, "loss": 0.1118, "step": 2861 }, { "epoch": 0.78, "grad_norm": 2.585268497467041, "learning_rate": 2.391631523656058e-07, "loss": 0.099, "step": 2862 }, { "epoch": 0.78, "grad_norm": 2.89695143699646, "learning_rate": 2.3858916438513043e-07, "loss": 0.1131, "step": 2863 }, { "epoch": 0.78, "grad_norm": 2.754573345184326, "learning_rate": 2.3801577270088535e-07, "loss": 0.1168, "step": 2864 }, { "epoch": 0.78, "grad_norm": 2.724893093109131, "learning_rate": 2.3744297776192047e-07, "loss": 0.1237, "step": 2865 }, { "epoch": 0.78, "grad_norm": 2.6611545085906982, "learning_rate": 2.368707800168176e-07, "loss": 0.1104, "step": 2866 }, { "epoch": 0.78, "grad_norm": 2.899855136871338, "learning_rate": 2.3629917991369198e-07, "loss": 0.1189, "step": 2867 }, { "epoch": 0.78, "grad_norm": 2.6722350120544434, "learning_rate": 2.357281779001904e-07, "loss": 0.1111, "step": 2868 }, { "epoch": 0.78, "grad_norm": 2.7901811599731445, "learning_rate": 2.351577744234907e-07, "loss": 0.1186, "step": 2869 }, { "epoch": 0.78, "grad_norm": 2.8303353786468506, "learning_rate": 2.345879699303025e-07, "loss": 0.1247, "step": 2870 }, { "epoch": 0.78, "grad_norm": 2.8870644569396973, "learning_rate": 2.340187648668658e-07, "loss": 0.1221, "step": 2871 }, { "epoch": 0.78, "grad_norm": 2.6357645988464355, "learning_rate": 2.3345015967895197e-07, "loss": 0.1229, "step": 2872 }, { "epoch": 0.78, "grad_norm": 2.5500104427337646, "learning_rate": 2.3288215481186235e-07, "loss": 0.1128, "step": 2873 }, { "epoch": 0.79, "grad_norm": 2.651008367538452, "learning_rate": 2.3231475071042773e-07, "loss": 0.1109, "step": 2874 }, { "epoch": 0.79, "grad_norm": 2.873354196548462, "learning_rate": 2.3174794781900853e-07, "loss": 0.1139, "step": 2875 }, { "epoch": 0.79, "grad_norm": 3.0768046379089355, "learning_rate": 2.3118174658149436e-07, "loss": 0.1296, "step": 2876 }, { "epoch": 0.79, "grad_norm": 2.6786396503448486, "learning_rate": 2.30616147441304e-07, "loss": 0.1111, "step": 2877 }, { "epoch": 0.79, "grad_norm": 3.07175612449646, "learning_rate": 2.300511508413845e-07, "loss": 0.1286, "step": 2878 }, { "epoch": 0.79, "grad_norm": 2.588007926940918, "learning_rate": 2.2948675722421085e-07, "loss": 0.1002, "step": 2879 }, { "epoch": 0.79, "grad_norm": 2.700286626815796, "learning_rate": 2.2892296703178592e-07, "loss": 0.1204, "step": 2880 }, { "epoch": 0.79, "grad_norm": 2.6056156158447266, "learning_rate": 2.283597807056399e-07, "loss": 0.0977, "step": 2881 }, { "epoch": 0.79, "grad_norm": 2.98291277885437, "learning_rate": 2.2779719868683013e-07, "loss": 0.1211, "step": 2882 }, { "epoch": 0.79, "grad_norm": 2.6762681007385254, "learning_rate": 2.272352214159412e-07, "loss": 0.1167, "step": 2883 }, { "epoch": 0.79, "grad_norm": 2.624441623687744, "learning_rate": 2.2667384933308352e-07, "loss": 0.1106, "step": 2884 }, { "epoch": 0.79, "grad_norm": 2.845133066177368, "learning_rate": 2.2611308287789344e-07, "loss": 0.1255, "step": 2885 }, { "epoch": 0.79, "grad_norm": 2.8640010356903076, "learning_rate": 2.2555292248953305e-07, "loss": 0.1304, "step": 2886 }, { "epoch": 0.79, "grad_norm": 2.504380702972412, "learning_rate": 2.2499336860669028e-07, "loss": 0.1037, "step": 2887 }, { "epoch": 0.79, "grad_norm": 2.75575852394104, "learning_rate": 2.244344216675781e-07, "loss": 0.1044, "step": 2888 }, { "epoch": 0.79, "grad_norm": 2.4212238788604736, "learning_rate": 2.2387608210993346e-07, "loss": 0.0993, "step": 2889 }, { "epoch": 0.79, "grad_norm": 2.851504325866699, "learning_rate": 2.233183503710182e-07, "loss": 0.122, "step": 2890 }, { "epoch": 0.79, "grad_norm": 2.895587682723999, "learning_rate": 2.2276122688761757e-07, "loss": 0.1126, "step": 2891 }, { "epoch": 0.79, "grad_norm": 2.7983148097991943, "learning_rate": 2.2220471209604119e-07, "loss": 0.1244, "step": 2892 }, { "epoch": 0.79, "grad_norm": 2.6924493312835693, "learning_rate": 2.2164880643212192e-07, "loss": 0.104, "step": 2893 }, { "epoch": 0.79, "grad_norm": 2.982330322265625, "learning_rate": 2.2109351033121514e-07, "loss": 0.1264, "step": 2894 }, { "epoch": 0.79, "grad_norm": 2.5642998218536377, "learning_rate": 2.2053882422819902e-07, "loss": 0.1027, "step": 2895 }, { "epoch": 0.79, "grad_norm": 2.423588752746582, "learning_rate": 2.1998474855747373e-07, "loss": 0.0971, "step": 2896 }, { "epoch": 0.79, "grad_norm": 2.9514458179473877, "learning_rate": 2.1943128375296194e-07, "loss": 0.1204, "step": 2897 }, { "epoch": 0.79, "grad_norm": 2.645742893218994, "learning_rate": 2.1887843024810803e-07, "loss": 0.1074, "step": 2898 }, { "epoch": 0.79, "grad_norm": 2.7427890300750732, "learning_rate": 2.183261884758769e-07, "loss": 0.1093, "step": 2899 }, { "epoch": 0.79, "grad_norm": 2.7665889263153076, "learning_rate": 2.1777455886875496e-07, "loss": 0.1043, "step": 2900 }, { "epoch": 0.79, "grad_norm": 2.7598376274108887, "learning_rate": 2.1722354185874846e-07, "loss": 0.1128, "step": 2901 }, { "epoch": 0.79, "grad_norm": 2.80429744720459, "learning_rate": 2.1667313787738496e-07, "loss": 0.1105, "step": 2902 }, { "epoch": 0.79, "grad_norm": 2.705533742904663, "learning_rate": 2.161233473557116e-07, "loss": 0.111, "step": 2903 }, { "epoch": 0.79, "grad_norm": 2.8968639373779297, "learning_rate": 2.1557417072429451e-07, "loss": 0.1219, "step": 2904 }, { "epoch": 0.79, "grad_norm": 2.773904323577881, "learning_rate": 2.150256084132196e-07, "loss": 0.1118, "step": 2905 }, { "epoch": 0.79, "grad_norm": 2.6877477169036865, "learning_rate": 2.144776608520913e-07, "loss": 0.1079, "step": 2906 }, { "epoch": 0.79, "grad_norm": 2.963486433029175, "learning_rate": 2.1393032847003289e-07, "loss": 0.1081, "step": 2907 }, { "epoch": 0.79, "grad_norm": 2.715536117553711, "learning_rate": 2.133836116956862e-07, "loss": 0.1106, "step": 2908 }, { "epoch": 0.79, "grad_norm": 2.7190189361572266, "learning_rate": 2.1283751095721024e-07, "loss": 0.1108, "step": 2909 }, { "epoch": 0.79, "grad_norm": 2.8466503620147705, "learning_rate": 2.1229202668228196e-07, "loss": 0.1122, "step": 2910 }, { "epoch": 0.8, "grad_norm": 3.005613088607788, "learning_rate": 2.1174715929809516e-07, "loss": 0.1126, "step": 2911 }, { "epoch": 0.8, "grad_norm": 2.653109312057495, "learning_rate": 2.1120290923136107e-07, "loss": 0.1151, "step": 2912 }, { "epoch": 0.8, "grad_norm": 2.745866060256958, "learning_rate": 2.1065927690830752e-07, "loss": 0.1112, "step": 2913 }, { "epoch": 0.8, "grad_norm": 3.076171636581421, "learning_rate": 2.1011626275467808e-07, "loss": 0.1361, "step": 2914 }, { "epoch": 0.8, "grad_norm": 2.815768003463745, "learning_rate": 2.0957386719573223e-07, "loss": 0.1189, "step": 2915 }, { "epoch": 0.8, "grad_norm": 2.729424476623535, "learning_rate": 2.0903209065624484e-07, "loss": 0.1127, "step": 2916 }, { "epoch": 0.8, "grad_norm": 2.8785223960876465, "learning_rate": 2.0849093356050685e-07, "loss": 0.1361, "step": 2917 }, { "epoch": 0.8, "grad_norm": 2.7613465785980225, "learning_rate": 2.0795039633232346e-07, "loss": 0.1212, "step": 2918 }, { "epoch": 0.8, "grad_norm": 2.8793344497680664, "learning_rate": 2.0741047939501434e-07, "loss": 0.1197, "step": 2919 }, { "epoch": 0.8, "grad_norm": 2.81070876121521, "learning_rate": 2.0687118317141406e-07, "loss": 0.1142, "step": 2920 }, { "epoch": 0.8, "grad_norm": 2.676225423812866, "learning_rate": 2.063325080838697e-07, "loss": 0.1138, "step": 2921 }, { "epoch": 0.8, "grad_norm": 2.6334424018859863, "learning_rate": 2.0579445455424315e-07, "loss": 0.119, "step": 2922 }, { "epoch": 0.8, "grad_norm": 2.856642484664917, "learning_rate": 2.0525702300390945e-07, "loss": 0.123, "step": 2923 }, { "epoch": 0.8, "grad_norm": 2.81278395652771, "learning_rate": 2.0472021385375572e-07, "loss": 0.1154, "step": 2924 }, { "epoch": 0.8, "grad_norm": 2.5518946647644043, "learning_rate": 2.0418402752418283e-07, "loss": 0.1129, "step": 2925 }, { "epoch": 0.8, "grad_norm": 2.406761646270752, "learning_rate": 2.0364846443510276e-07, "loss": 0.1062, "step": 2926 }, { "epoch": 0.8, "grad_norm": 2.778789758682251, "learning_rate": 2.031135250059397e-07, "loss": 0.1211, "step": 2927 }, { "epoch": 0.8, "grad_norm": 2.820254325866699, "learning_rate": 2.0257920965563012e-07, "loss": 0.1083, "step": 2928 }, { "epoch": 0.8, "grad_norm": 2.8451895713806152, "learning_rate": 2.0204551880262066e-07, "loss": 0.1135, "step": 2929 }, { "epoch": 0.8, "grad_norm": 3.0596845149993896, "learning_rate": 2.0151245286486996e-07, "loss": 0.1306, "step": 2930 }, { "epoch": 0.8, "grad_norm": 2.6155900955200195, "learning_rate": 2.009800122598465e-07, "loss": 0.1066, "step": 2931 }, { "epoch": 0.8, "grad_norm": 2.4980719089508057, "learning_rate": 2.0044819740452911e-07, "loss": 0.1001, "step": 2932 }, { "epoch": 0.8, "grad_norm": 2.474144458770752, "learning_rate": 1.9991700871540708e-07, "loss": 0.1033, "step": 2933 }, { "epoch": 0.8, "grad_norm": 2.7342374324798584, "learning_rate": 1.993864466084786e-07, "loss": 0.1024, "step": 2934 }, { "epoch": 0.8, "grad_norm": 2.7930715084075928, "learning_rate": 1.9885651149925188e-07, "loss": 0.1055, "step": 2935 }, { "epoch": 0.8, "grad_norm": 2.949699640274048, "learning_rate": 1.983272038027437e-07, "loss": 0.1316, "step": 2936 }, { "epoch": 0.8, "grad_norm": 2.5166850090026855, "learning_rate": 1.9779852393347907e-07, "loss": 0.1125, "step": 2937 }, { "epoch": 0.8, "grad_norm": 2.5932557582855225, "learning_rate": 1.9727047230549242e-07, "loss": 0.105, "step": 2938 }, { "epoch": 0.8, "grad_norm": 2.8534560203552246, "learning_rate": 1.9674304933232498e-07, "loss": 0.1143, "step": 2939 }, { "epoch": 0.8, "grad_norm": 3.1329243183135986, "learning_rate": 1.962162554270267e-07, "loss": 0.1247, "step": 2940 }, { "epoch": 0.8, "grad_norm": 3.1219422817230225, "learning_rate": 1.9569009100215418e-07, "loss": 0.1282, "step": 2941 }, { "epoch": 0.8, "grad_norm": 2.9756643772125244, "learning_rate": 1.9516455646977103e-07, "loss": 0.1147, "step": 2942 }, { "epoch": 0.8, "grad_norm": 2.833127498626709, "learning_rate": 1.9463965224144807e-07, "loss": 0.1119, "step": 2943 }, { "epoch": 0.8, "grad_norm": 2.7026820182800293, "learning_rate": 1.94115378728262e-07, "loss": 0.1098, "step": 2944 }, { "epoch": 0.8, "grad_norm": 2.903571844100952, "learning_rate": 1.9359173634079606e-07, "loss": 0.1277, "step": 2945 }, { "epoch": 0.8, "grad_norm": 2.713010787963867, "learning_rate": 1.9306872548913876e-07, "loss": 0.1058, "step": 2946 }, { "epoch": 0.81, "grad_norm": 3.126382827758789, "learning_rate": 1.9254634658288405e-07, "loss": 0.1176, "step": 2947 }, { "epoch": 0.81, "grad_norm": 2.9158408641815186, "learning_rate": 1.920246000311315e-07, "loss": 0.1177, "step": 2948 }, { "epoch": 0.81, "grad_norm": 2.6403298377990723, "learning_rate": 1.9150348624248468e-07, "loss": 0.1078, "step": 2949 }, { "epoch": 0.81, "grad_norm": 2.958865165710449, "learning_rate": 1.9098300562505264e-07, "loss": 0.1322, "step": 2950 }, { "epoch": 0.81, "grad_norm": 2.862724542617798, "learning_rate": 1.9046315858644746e-07, "loss": 0.1204, "step": 2951 }, { "epoch": 0.81, "grad_norm": 2.818732738494873, "learning_rate": 1.8994394553378556e-07, "loss": 0.1227, "step": 2952 }, { "epoch": 0.81, "grad_norm": 2.4232325553894043, "learning_rate": 1.8942536687368703e-07, "loss": 0.104, "step": 2953 }, { "epoch": 0.81, "grad_norm": 2.7659695148468018, "learning_rate": 1.8890742301227468e-07, "loss": 0.1233, "step": 2954 }, { "epoch": 0.81, "grad_norm": 2.607856273651123, "learning_rate": 1.883901143551747e-07, "loss": 0.1081, "step": 2955 }, { "epoch": 0.81, "grad_norm": 2.6588351726531982, "learning_rate": 1.878734413075156e-07, "loss": 0.1048, "step": 2956 }, { "epoch": 0.81, "grad_norm": 2.793586015701294, "learning_rate": 1.8735740427392755e-07, "loss": 0.1179, "step": 2957 }, { "epoch": 0.81, "grad_norm": 2.70487117767334, "learning_rate": 1.8684200365854375e-07, "loss": 0.1067, "step": 2958 }, { "epoch": 0.81, "grad_norm": 2.7607598304748535, "learning_rate": 1.8632723986499787e-07, "loss": 0.1112, "step": 2959 }, { "epoch": 0.81, "grad_norm": 2.9593124389648438, "learning_rate": 1.8581311329642591e-07, "loss": 0.1169, "step": 2960 }, { "epoch": 0.81, "grad_norm": 2.719787359237671, "learning_rate": 1.8529962435546398e-07, "loss": 0.1193, "step": 2961 }, { "epoch": 0.81, "grad_norm": 2.843885898590088, "learning_rate": 1.8478677344424898e-07, "loss": 0.1216, "step": 2962 }, { "epoch": 0.81, "grad_norm": 2.6336824893951416, "learning_rate": 1.8427456096441874e-07, "loss": 0.109, "step": 2963 }, { "epoch": 0.81, "grad_norm": 2.624443769454956, "learning_rate": 1.8376298731711016e-07, "loss": 0.1055, "step": 2964 }, { "epoch": 0.81, "grad_norm": 2.865316152572632, "learning_rate": 1.8325205290296098e-07, "loss": 0.1169, "step": 2965 }, { "epoch": 0.81, "grad_norm": 2.6282095909118652, "learning_rate": 1.8274175812210724e-07, "loss": 0.1084, "step": 2966 }, { "epoch": 0.81, "grad_norm": 2.7483646869659424, "learning_rate": 1.822321033741845e-07, "loss": 0.1177, "step": 2967 }, { "epoch": 0.81, "grad_norm": 2.7712483406066895, "learning_rate": 1.8172308905832735e-07, "loss": 0.1124, "step": 2968 }, { "epoch": 0.81, "grad_norm": 2.7491652965545654, "learning_rate": 1.8121471557316813e-07, "loss": 0.1081, "step": 2969 }, { "epoch": 0.81, "grad_norm": 2.5816409587860107, "learning_rate": 1.8070698331683841e-07, "loss": 0.1048, "step": 2970 }, { "epoch": 0.81, "grad_norm": 2.7578561305999756, "learning_rate": 1.8019989268696666e-07, "loss": 0.1077, "step": 2971 }, { "epoch": 0.81, "grad_norm": 2.678293466567993, "learning_rate": 1.7969344408067866e-07, "loss": 0.1237, "step": 2972 }, { "epoch": 0.81, "grad_norm": 2.8176612854003906, "learning_rate": 1.7918763789459857e-07, "loss": 0.1211, "step": 2973 }, { "epoch": 0.81, "grad_norm": 2.752276659011841, "learning_rate": 1.7868247452484608e-07, "loss": 0.1069, "step": 2974 }, { "epoch": 0.81, "grad_norm": 2.606996536254883, "learning_rate": 1.7817795436703874e-07, "loss": 0.1107, "step": 2975 }, { "epoch": 0.81, "grad_norm": 2.7089779376983643, "learning_rate": 1.776740778162895e-07, "loss": 0.1176, "step": 2976 }, { "epoch": 0.81, "grad_norm": 3.056824207305908, "learning_rate": 1.7717084526720728e-07, "loss": 0.1214, "step": 2977 }, { "epoch": 0.81, "grad_norm": 2.5314202308654785, "learning_rate": 1.7666825711389722e-07, "loss": 0.0998, "step": 2978 }, { "epoch": 0.81, "grad_norm": 2.7676849365234375, "learning_rate": 1.7616631374995904e-07, "loss": 0.117, "step": 2979 }, { "epoch": 0.81, "grad_norm": 2.4773690700531006, "learning_rate": 1.7566501556848855e-07, "loss": 0.0979, "step": 2980 }, { "epoch": 0.81, "grad_norm": 2.7643463611602783, "learning_rate": 1.7516436296207538e-07, "loss": 0.1172, "step": 2981 }, { "epoch": 0.81, "grad_norm": 2.681955337524414, "learning_rate": 1.7466435632280352e-07, "loss": 0.1206, "step": 2982 }, { "epoch": 0.81, "grad_norm": 2.6423914432525635, "learning_rate": 1.7416499604225176e-07, "loss": 0.111, "step": 2983 }, { "epoch": 0.82, "grad_norm": 2.6484272480010986, "learning_rate": 1.7366628251149252e-07, "loss": 0.1061, "step": 2984 }, { "epoch": 0.82, "grad_norm": 3.124119281768799, "learning_rate": 1.7316821612109135e-07, "loss": 0.1196, "step": 2985 }, { "epoch": 0.82, "grad_norm": 2.889885187149048, "learning_rate": 1.7267079726110723e-07, "loss": 0.1266, "step": 2986 }, { "epoch": 0.82, "grad_norm": 2.9232211112976074, "learning_rate": 1.721740263210918e-07, "loss": 0.1216, "step": 2987 }, { "epoch": 0.82, "grad_norm": 2.7191336154937744, "learning_rate": 1.716779036900895e-07, "loss": 0.1033, "step": 2988 }, { "epoch": 0.82, "grad_norm": 2.7927796840667725, "learning_rate": 1.7118242975663754e-07, "loss": 0.116, "step": 2989 }, { "epoch": 0.82, "grad_norm": 3.0129342079162598, "learning_rate": 1.7068760490876422e-07, "loss": 0.1265, "step": 2990 }, { "epoch": 0.82, "grad_norm": 2.707798480987549, "learning_rate": 1.7019342953398997e-07, "loss": 0.1153, "step": 2991 }, { "epoch": 0.82, "grad_norm": 3.0737831592559814, "learning_rate": 1.696999040193261e-07, "loss": 0.1102, "step": 2992 }, { "epoch": 0.82, "grad_norm": 2.4257168769836426, "learning_rate": 1.692070287512758e-07, "loss": 0.1031, "step": 2993 }, { "epoch": 0.82, "grad_norm": 2.5452399253845215, "learning_rate": 1.6871480411583283e-07, "loss": 0.0914, "step": 2994 }, { "epoch": 0.82, "grad_norm": 2.894639253616333, "learning_rate": 1.6822323049848087e-07, "loss": 0.133, "step": 2995 }, { "epoch": 0.82, "grad_norm": 2.816392183303833, "learning_rate": 1.6773230828419405e-07, "loss": 0.1206, "step": 2996 }, { "epoch": 0.82, "grad_norm": 3.0011281967163086, "learning_rate": 1.672420378574363e-07, "loss": 0.1242, "step": 2997 }, { "epoch": 0.82, "grad_norm": 2.957533597946167, "learning_rate": 1.6675241960216125e-07, "loss": 0.1177, "step": 2998 }, { "epoch": 0.82, "grad_norm": 2.866485595703125, "learning_rate": 1.6626345390181206e-07, "loss": 0.1113, "step": 2999 }, { "epoch": 0.82, "grad_norm": 3.1493520736694336, "learning_rate": 1.6577514113932035e-07, "loss": 0.1318, "step": 3000 }, { "epoch": 0.82, "grad_norm": 2.880171298980713, "learning_rate": 1.6528748169710638e-07, "loss": 0.1252, "step": 3001 }, { "epoch": 0.82, "grad_norm": 2.9624109268188477, "learning_rate": 1.648004759570787e-07, "loss": 0.1167, "step": 3002 }, { "epoch": 0.82, "grad_norm": 2.7028636932373047, "learning_rate": 1.6431412430063462e-07, "loss": 0.1129, "step": 3003 }, { "epoch": 0.82, "grad_norm": 2.9838626384735107, "learning_rate": 1.6382842710865875e-07, "loss": 0.1261, "step": 3004 }, { "epoch": 0.82, "grad_norm": 2.775270938873291, "learning_rate": 1.6334338476152288e-07, "loss": 0.1173, "step": 3005 }, { "epoch": 0.82, "grad_norm": 3.03607177734375, "learning_rate": 1.628589976390865e-07, "loss": 0.1228, "step": 3006 }, { "epoch": 0.82, "grad_norm": 2.8795881271362305, "learning_rate": 1.6237526612069508e-07, "loss": 0.1097, "step": 3007 }, { "epoch": 0.82, "grad_norm": 3.080247640609741, "learning_rate": 1.6189219058518177e-07, "loss": 0.1245, "step": 3008 }, { "epoch": 0.82, "grad_norm": 2.7216033935546875, "learning_rate": 1.6140977141086575e-07, "loss": 0.1128, "step": 3009 }, { "epoch": 0.82, "grad_norm": 2.648832082748413, "learning_rate": 1.6092800897555148e-07, "loss": 0.1059, "step": 3010 }, { "epoch": 0.82, "grad_norm": 2.8429346084594727, "learning_rate": 1.6044690365652957e-07, "loss": 0.1191, "step": 3011 }, { "epoch": 0.82, "grad_norm": 3.0376110076904297, "learning_rate": 1.599664558305759e-07, "loss": 0.1244, "step": 3012 }, { "epoch": 0.82, "grad_norm": 2.5975003242492676, "learning_rate": 1.5948666587395142e-07, "loss": 0.0968, "step": 3013 }, { "epoch": 0.82, "grad_norm": 2.4642364978790283, "learning_rate": 1.5900753416240255e-07, "loss": 0.0967, "step": 3014 }, { "epoch": 0.82, "grad_norm": 3.507802724838257, "learning_rate": 1.5852906107115893e-07, "loss": 0.1174, "step": 3015 }, { "epoch": 0.82, "grad_norm": 2.7020273208618164, "learning_rate": 1.5805124697493578e-07, "loss": 0.1116, "step": 3016 }, { "epoch": 0.82, "grad_norm": 2.912220001220703, "learning_rate": 1.5757409224793072e-07, "loss": 0.1152, "step": 3017 }, { "epoch": 0.82, "grad_norm": 2.606558084487915, "learning_rate": 1.5709759726382621e-07, "loss": 0.0978, "step": 3018 }, { "epoch": 0.82, "grad_norm": 3.11519718170166, "learning_rate": 1.5662176239578773e-07, "loss": 0.127, "step": 3019 }, { "epoch": 0.82, "grad_norm": 2.914428949356079, "learning_rate": 1.5614658801646353e-07, "loss": 0.1095, "step": 3020 }, { "epoch": 0.83, "grad_norm": 2.8984978199005127, "learning_rate": 1.5567207449798515e-07, "loss": 0.1234, "step": 3021 }, { "epoch": 0.83, "grad_norm": 2.8738179206848145, "learning_rate": 1.5519822221196544e-07, "loss": 0.1194, "step": 3022 }, { "epoch": 0.83, "grad_norm": 2.75415301322937, "learning_rate": 1.5472503152950056e-07, "loss": 0.1151, "step": 3023 }, { "epoch": 0.83, "grad_norm": 2.94234561920166, "learning_rate": 1.5425250282116842e-07, "loss": 0.1204, "step": 3024 }, { "epoch": 0.83, "grad_norm": 2.6877970695495605, "learning_rate": 1.5378063645702766e-07, "loss": 0.1145, "step": 3025 }, { "epoch": 0.83, "grad_norm": 2.7449467182159424, "learning_rate": 1.5330943280661967e-07, "loss": 0.1202, "step": 3026 }, { "epoch": 0.83, "grad_norm": 2.5695841312408447, "learning_rate": 1.5283889223896474e-07, "loss": 0.107, "step": 3027 }, { "epoch": 0.83, "grad_norm": 2.8050339221954346, "learning_rate": 1.5236901512256573e-07, "loss": 0.1172, "step": 3028 }, { "epoch": 0.83, "grad_norm": 2.907428503036499, "learning_rate": 1.518998018254054e-07, "loss": 0.1275, "step": 3029 }, { "epoch": 0.83, "grad_norm": 2.703794002532959, "learning_rate": 1.5143125271494606e-07, "loss": 0.103, "step": 3030 }, { "epoch": 0.83, "grad_norm": 2.751523733139038, "learning_rate": 1.5096336815813103e-07, "loss": 0.1107, "step": 3031 }, { "epoch": 0.83, "grad_norm": 2.788540840148926, "learning_rate": 1.5049614852138148e-07, "loss": 0.1229, "step": 3032 }, { "epoch": 0.83, "grad_norm": 2.535400867462158, "learning_rate": 1.5002959417059935e-07, "loss": 0.0967, "step": 3033 }, { "epoch": 0.83, "grad_norm": 2.8100125789642334, "learning_rate": 1.4956370547116527e-07, "loss": 0.1078, "step": 3034 }, { "epoch": 0.83, "grad_norm": 3.075692892074585, "learning_rate": 1.490984827879378e-07, "loss": 0.1261, "step": 3035 }, { "epoch": 0.83, "grad_norm": 2.9490807056427, "learning_rate": 1.486339264852553e-07, "loss": 0.1387, "step": 3036 }, { "epoch": 0.83, "grad_norm": 2.610565662384033, "learning_rate": 1.481700369269323e-07, "loss": 0.1008, "step": 3037 }, { "epoch": 0.83, "grad_norm": 2.6819276809692383, "learning_rate": 1.47706814476263e-07, "loss": 0.115, "step": 3038 }, { "epoch": 0.83, "grad_norm": 2.665633201599121, "learning_rate": 1.4724425949601837e-07, "loss": 0.1104, "step": 3039 }, { "epoch": 0.83, "grad_norm": 2.8097269535064697, "learning_rate": 1.4678237234844648e-07, "loss": 0.1225, "step": 3040 }, { "epoch": 0.83, "grad_norm": 2.511746644973755, "learning_rate": 1.4632115339527306e-07, "loss": 0.1047, "step": 3041 }, { "epoch": 0.83, "grad_norm": 2.7344541549682617, "learning_rate": 1.4586060299769975e-07, "loss": 0.115, "step": 3042 }, { "epoch": 0.83, "grad_norm": 2.909001111984253, "learning_rate": 1.4540072151640493e-07, "loss": 0.11, "step": 3043 }, { "epoch": 0.83, "grad_norm": 2.7571046352386475, "learning_rate": 1.4494150931154358e-07, "loss": 0.1176, "step": 3044 }, { "epoch": 0.83, "grad_norm": 2.781496524810791, "learning_rate": 1.4448296674274564e-07, "loss": 0.1262, "step": 3045 }, { "epoch": 0.83, "grad_norm": 3.0702664852142334, "learning_rate": 1.4402509416911756e-07, "loss": 0.1353, "step": 3046 }, { "epoch": 0.83, "grad_norm": 2.733211040496826, "learning_rate": 1.4356789194924045e-07, "loss": 0.1069, "step": 3047 }, { "epoch": 0.83, "grad_norm": 2.7156100273132324, "learning_rate": 1.4311136044117033e-07, "loss": 0.1042, "step": 3048 }, { "epoch": 0.83, "grad_norm": 3.0435407161712646, "learning_rate": 1.4265550000243886e-07, "loss": 0.1176, "step": 3049 }, { "epoch": 0.83, "grad_norm": 2.901156425476074, "learning_rate": 1.4220031099005092e-07, "loss": 0.1114, "step": 3050 }, { "epoch": 0.83, "grad_norm": 2.631953477859497, "learning_rate": 1.417457937604868e-07, "loss": 0.1265, "step": 3051 }, { "epoch": 0.83, "grad_norm": 2.4306070804595947, "learning_rate": 1.4129194866969973e-07, "loss": 0.1032, "step": 3052 }, { "epoch": 0.83, "grad_norm": 2.781864881515503, "learning_rate": 1.4083877607311667e-07, "loss": 0.1239, "step": 3053 }, { "epoch": 0.83, "grad_norm": 2.5214765071868896, "learning_rate": 1.4038627632563882e-07, "loss": 0.1117, "step": 3054 }, { "epoch": 0.83, "grad_norm": 2.526747941970825, "learning_rate": 1.3993444978163904e-07, "loss": 0.1056, "step": 3055 }, { "epoch": 0.83, "grad_norm": 2.746837854385376, "learning_rate": 1.394832967949643e-07, "loss": 0.1145, "step": 3056 }, { "epoch": 0.84, "grad_norm": 2.6512248516082764, "learning_rate": 1.3903281771893316e-07, "loss": 0.1101, "step": 3057 }, { "epoch": 0.84, "grad_norm": 2.8380045890808105, "learning_rate": 1.3858301290633667e-07, "loss": 0.1224, "step": 3058 }, { "epoch": 0.84, "grad_norm": 2.926839590072632, "learning_rate": 1.3813388270943828e-07, "loss": 0.1253, "step": 3059 }, { "epoch": 0.84, "grad_norm": 2.591266632080078, "learning_rate": 1.3768542747997214e-07, "loss": 0.1157, "step": 3060 }, { "epoch": 0.84, "grad_norm": 2.8145010471343994, "learning_rate": 1.37237647569145e-07, "loss": 0.1189, "step": 3061 }, { "epoch": 0.84, "grad_norm": 2.6650164127349854, "learning_rate": 1.3679054332763397e-07, "loss": 0.103, "step": 3062 }, { "epoch": 0.84, "grad_norm": 2.683551549911499, "learning_rate": 1.3634411510558675e-07, "loss": 0.1099, "step": 3063 }, { "epoch": 0.84, "grad_norm": 2.7763309478759766, "learning_rate": 1.358983632526226e-07, "loss": 0.1106, "step": 3064 }, { "epoch": 0.84, "grad_norm": 3.0121800899505615, "learning_rate": 1.3545328811783007e-07, "loss": 0.1264, "step": 3065 }, { "epoch": 0.84, "grad_norm": 2.6209750175476074, "learning_rate": 1.3500889004976857e-07, "loss": 0.1112, "step": 3066 }, { "epoch": 0.84, "grad_norm": 2.937760829925537, "learning_rate": 1.3456516939646679e-07, "loss": 0.1195, "step": 3067 }, { "epoch": 0.84, "grad_norm": 3.2091565132141113, "learning_rate": 1.3412212650542265e-07, "loss": 0.122, "step": 3068 }, { "epoch": 0.84, "grad_norm": 2.7288503646850586, "learning_rate": 1.3367976172360418e-07, "loss": 0.109, "step": 3069 }, { "epoch": 0.84, "grad_norm": 2.8172175884246826, "learning_rate": 1.3323807539744726e-07, "loss": 0.1085, "step": 3070 }, { "epoch": 0.84, "grad_norm": 2.8133957386016846, "learning_rate": 1.327970678728576e-07, "loss": 0.1076, "step": 3071 }, { "epoch": 0.84, "grad_norm": 2.762995719909668, "learning_rate": 1.3235673949520842e-07, "loss": 0.1232, "step": 3072 }, { "epoch": 0.84, "grad_norm": 2.835566997528076, "learning_rate": 1.3191709060934098e-07, "loss": 0.1282, "step": 3073 }, { "epoch": 0.84, "grad_norm": 2.9513890743255615, "learning_rate": 1.314781215595654e-07, "loss": 0.1148, "step": 3074 }, { "epoch": 0.84, "grad_norm": 2.9118540287017822, "learning_rate": 1.3103983268965824e-07, "loss": 0.1184, "step": 3075 }, { "epoch": 0.84, "grad_norm": 2.788700819015503, "learning_rate": 1.3060222434286429e-07, "loss": 0.115, "step": 3076 }, { "epoch": 0.84, "grad_norm": 2.9123387336730957, "learning_rate": 1.3016529686189482e-07, "loss": 0.1153, "step": 3077 }, { "epoch": 0.84, "grad_norm": 2.6774423122406006, "learning_rate": 1.297290505889278e-07, "loss": 0.0998, "step": 3078 }, { "epoch": 0.84, "grad_norm": 2.7714312076568604, "learning_rate": 1.2929348586560852e-07, "loss": 0.1215, "step": 3079 }, { "epoch": 0.84, "grad_norm": 2.696655511856079, "learning_rate": 1.288586030330474e-07, "loss": 0.1153, "step": 3080 }, { "epoch": 0.84, "grad_norm": 2.9519054889678955, "learning_rate": 1.2842440243182196e-07, "loss": 0.1238, "step": 3081 }, { "epoch": 0.84, "grad_norm": 2.8077220916748047, "learning_rate": 1.2799088440197447e-07, "loss": 0.1207, "step": 3082 }, { "epoch": 0.84, "grad_norm": 2.6261038780212402, "learning_rate": 1.2755804928301306e-07, "loss": 0.1133, "step": 3083 }, { "epoch": 0.84, "grad_norm": 2.76934814453125, "learning_rate": 1.2712589741391143e-07, "loss": 0.1221, "step": 3084 }, { "epoch": 0.84, "grad_norm": 2.999305009841919, "learning_rate": 1.2669442913310723e-07, "loss": 0.1298, "step": 3085 }, { "epoch": 0.84, "grad_norm": 2.842081308364868, "learning_rate": 1.2626364477850394e-07, "loss": 0.1106, "step": 3086 }, { "epoch": 0.84, "grad_norm": 2.7725205421447754, "learning_rate": 1.2583354468746843e-07, "loss": 0.099, "step": 3087 }, { "epoch": 0.84, "grad_norm": 2.852376937866211, "learning_rate": 1.2540412919683208e-07, "loss": 0.118, "step": 3088 }, { "epoch": 0.84, "grad_norm": 3.0244083404541016, "learning_rate": 1.249753986428903e-07, "loss": 0.1244, "step": 3089 }, { "epoch": 0.84, "grad_norm": 2.580749034881592, "learning_rate": 1.2454735336140166e-07, "loss": 0.1113, "step": 3090 }, { "epoch": 0.84, "grad_norm": 2.531583547592163, "learning_rate": 1.2411999368758874e-07, "loss": 0.1045, "step": 3091 }, { "epoch": 0.84, "grad_norm": 2.663281202316284, "learning_rate": 1.2369331995613663e-07, "loss": 0.125, "step": 3092 }, { "epoch": 0.84, "grad_norm": 3.0186655521392822, "learning_rate": 1.2326733250119292e-07, "loss": 0.1226, "step": 3093 }, { "epoch": 0.85, "grad_norm": 2.77361798286438, "learning_rate": 1.2284203165636886e-07, "loss": 0.1202, "step": 3094 }, { "epoch": 0.85, "grad_norm": 2.643756628036499, "learning_rate": 1.224174177547368e-07, "loss": 0.1127, "step": 3095 }, { "epoch": 0.85, "grad_norm": 2.6107122898101807, "learning_rate": 1.2199349112883194e-07, "loss": 0.1011, "step": 3096 }, { "epoch": 0.85, "grad_norm": 2.981204032897949, "learning_rate": 1.2157025211065097e-07, "loss": 0.1206, "step": 3097 }, { "epoch": 0.85, "grad_norm": 2.899815082550049, "learning_rate": 1.211477010316516e-07, "loss": 0.1165, "step": 3098 }, { "epoch": 0.85, "grad_norm": 2.7035837173461914, "learning_rate": 1.207258382227536e-07, "loss": 0.1101, "step": 3099 }, { "epoch": 0.85, "grad_norm": 2.7130398750305176, "learning_rate": 1.2030466401433748e-07, "loss": 0.1076, "step": 3100 }, { "epoch": 0.85, "grad_norm": 2.477532386779785, "learning_rate": 1.1988417873624414e-07, "loss": 0.1037, "step": 3101 }, { "epoch": 0.85, "grad_norm": 2.8978729248046875, "learning_rate": 1.1946438271777514e-07, "loss": 0.1227, "step": 3102 }, { "epoch": 0.85, "grad_norm": 2.6164002418518066, "learning_rate": 1.1904527628769212e-07, "loss": 0.1118, "step": 3103 }, { "epoch": 0.85, "grad_norm": 2.697582960128784, "learning_rate": 1.1862685977421704e-07, "loss": 0.1168, "step": 3104 }, { "epoch": 0.85, "grad_norm": 2.663384199142456, "learning_rate": 1.1820913350503137e-07, "loss": 0.1111, "step": 3105 }, { "epoch": 0.85, "grad_norm": 2.7724335193634033, "learning_rate": 1.1779209780727594e-07, "loss": 0.1192, "step": 3106 }, { "epoch": 0.85, "grad_norm": 2.4050371646881104, "learning_rate": 1.1737575300755077e-07, "loss": 0.0984, "step": 3107 }, { "epoch": 0.85, "grad_norm": 2.6711740493774414, "learning_rate": 1.1696009943191454e-07, "loss": 0.118, "step": 3108 }, { "epoch": 0.85, "grad_norm": 2.795835494995117, "learning_rate": 1.1654513740588523e-07, "loss": 0.1257, "step": 3109 }, { "epoch": 0.85, "grad_norm": 2.6235530376434326, "learning_rate": 1.1613086725443888e-07, "loss": 0.1092, "step": 3110 }, { "epoch": 0.85, "grad_norm": 3.0112788677215576, "learning_rate": 1.1571728930200952e-07, "loss": 0.1253, "step": 3111 }, { "epoch": 0.85, "grad_norm": 3.473054885864258, "learning_rate": 1.1530440387248985e-07, "loss": 0.1355, "step": 3112 }, { "epoch": 0.85, "grad_norm": 2.6995739936828613, "learning_rate": 1.1489221128922878e-07, "loss": 0.0993, "step": 3113 }, { "epoch": 0.85, "grad_norm": 2.7281997203826904, "learning_rate": 1.1448071187503383e-07, "loss": 0.116, "step": 3114 }, { "epoch": 0.85, "grad_norm": 2.543882131576538, "learning_rate": 1.140699059521697e-07, "loss": 0.1018, "step": 3115 }, { "epoch": 0.85, "grad_norm": 2.9289021492004395, "learning_rate": 1.1365979384235713e-07, "loss": 0.1199, "step": 3116 }, { "epoch": 0.85, "grad_norm": 2.6259896755218506, "learning_rate": 1.1325037586677444e-07, "loss": 0.1138, "step": 3117 }, { "epoch": 0.85, "grad_norm": 2.5959458351135254, "learning_rate": 1.1284165234605536e-07, "loss": 0.1066, "step": 3118 }, { "epoch": 0.85, "grad_norm": 2.7366015911102295, "learning_rate": 1.124336236002904e-07, "loss": 0.1035, "step": 3119 }, { "epoch": 0.85, "grad_norm": 2.8336050510406494, "learning_rate": 1.1202628994902629e-07, "loss": 0.1038, "step": 3120 }, { "epoch": 0.85, "grad_norm": 2.832141160964966, "learning_rate": 1.1161965171126441e-07, "loss": 0.1117, "step": 3121 }, { "epoch": 0.85, "grad_norm": 2.7790372371673584, "learning_rate": 1.1121370920546269e-07, "loss": 0.1147, "step": 3122 }, { "epoch": 0.85, "grad_norm": 2.91813063621521, "learning_rate": 1.1080846274953281e-07, "loss": 0.1195, "step": 3123 }, { "epoch": 0.85, "grad_norm": 2.7322089672088623, "learning_rate": 1.104039126608426e-07, "loss": 0.1129, "step": 3124 }, { "epoch": 0.85, "grad_norm": 2.848047971725464, "learning_rate": 1.1000005925621403e-07, "loss": 0.1192, "step": 3125 }, { "epoch": 0.85, "grad_norm": 2.6769890785217285, "learning_rate": 1.0959690285192324e-07, "loss": 0.1088, "step": 3126 }, { "epoch": 0.85, "grad_norm": 3.087414503097534, "learning_rate": 1.0919444376370135e-07, "loss": 0.1287, "step": 3127 }, { "epoch": 0.85, "grad_norm": 3.322653293609619, "learning_rate": 1.0879268230673188e-07, "loss": 0.1203, "step": 3128 }, { "epoch": 0.85, "grad_norm": 2.6233925819396973, "learning_rate": 1.083916187956534e-07, "loss": 0.1033, "step": 3129 }, { "epoch": 0.86, "grad_norm": 2.9331064224243164, "learning_rate": 1.0799125354455752e-07, "loss": 0.1305, "step": 3130 }, { "epoch": 0.86, "grad_norm": 2.7068936824798584, "learning_rate": 1.0759158686698865e-07, "loss": 0.1115, "step": 3131 }, { "epoch": 0.86, "grad_norm": 2.684677839279175, "learning_rate": 1.071926190759448e-07, "loss": 0.108, "step": 3132 }, { "epoch": 0.86, "grad_norm": 2.8053512573242188, "learning_rate": 1.0679435048387542e-07, "loss": 0.1084, "step": 3133 }, { "epoch": 0.86, "grad_norm": 2.674454689025879, "learning_rate": 1.063967814026836e-07, "loss": 0.1179, "step": 3134 }, { "epoch": 0.86, "grad_norm": 2.6979193687438965, "learning_rate": 1.0599991214372439e-07, "loss": 0.1132, "step": 3135 }, { "epoch": 0.86, "grad_norm": 2.542658567428589, "learning_rate": 1.0560374301780405e-07, "loss": 0.1097, "step": 3136 }, { "epoch": 0.86, "grad_norm": 2.8575947284698486, "learning_rate": 1.0520827433518154e-07, "loss": 0.1201, "step": 3137 }, { "epoch": 0.86, "grad_norm": 3.025064706802368, "learning_rate": 1.0481350640556652e-07, "loss": 0.1374, "step": 3138 }, { "epoch": 0.86, "grad_norm": 2.638869047164917, "learning_rate": 1.0441943953812005e-07, "loss": 0.1114, "step": 3139 }, { "epoch": 0.86, "grad_norm": 3.0275416374206543, "learning_rate": 1.0402607404145447e-07, "loss": 0.1191, "step": 3140 }, { "epoch": 0.86, "grad_norm": 2.9517245292663574, "learning_rate": 1.0363341022363225e-07, "loss": 0.1214, "step": 3141 }, { "epoch": 0.86, "grad_norm": 2.52838397026062, "learning_rate": 1.0324144839216698e-07, "loss": 0.1034, "step": 3142 }, { "epoch": 0.86, "grad_norm": 2.6634321212768555, "learning_rate": 1.0285018885402219e-07, "loss": 0.107, "step": 3143 }, { "epoch": 0.86, "grad_norm": 2.892220973968506, "learning_rate": 1.0245963191561103e-07, "loss": 0.1302, "step": 3144 }, { "epoch": 0.86, "grad_norm": 3.1188881397247314, "learning_rate": 1.0206977788279736e-07, "loss": 0.1304, "step": 3145 }, { "epoch": 0.86, "grad_norm": 2.8132681846618652, "learning_rate": 1.0168062706089354e-07, "loss": 0.107, "step": 3146 }, { "epoch": 0.86, "grad_norm": 2.581531286239624, "learning_rate": 1.0129217975466197e-07, "loss": 0.1005, "step": 3147 }, { "epoch": 0.86, "grad_norm": 2.7717487812042236, "learning_rate": 1.0090443626831368e-07, "loss": 0.1067, "step": 3148 }, { "epoch": 0.86, "grad_norm": 3.0757551193237305, "learning_rate": 1.0051739690550854e-07, "loss": 0.1415, "step": 3149 }, { "epoch": 0.86, "grad_norm": 2.7506611347198486, "learning_rate": 1.0013106196935529e-07, "loss": 0.1161, "step": 3150 }, { "epoch": 0.86, "grad_norm": 2.8733327388763428, "learning_rate": 9.974543176241046e-08, "loss": 0.1065, "step": 3151 }, { "epoch": 0.86, "grad_norm": 2.8457095623016357, "learning_rate": 9.936050658667938e-08, "loss": 0.1221, "step": 3152 }, { "epoch": 0.86, "grad_norm": 3.0331854820251465, "learning_rate": 9.897628674361469e-08, "loss": 0.1265, "step": 3153 }, { "epoch": 0.86, "grad_norm": 2.7377963066101074, "learning_rate": 9.859277253411668e-08, "loss": 0.1275, "step": 3154 }, { "epoch": 0.86, "grad_norm": 2.556013822555542, "learning_rate": 9.820996425853333e-08, "loss": 0.111, "step": 3155 }, { "epoch": 0.86, "grad_norm": 2.742039918899536, "learning_rate": 9.782786221665939e-08, "loss": 0.1095, "step": 3156 }, { "epoch": 0.86, "grad_norm": 2.950411319732666, "learning_rate": 9.744646670773716e-08, "loss": 0.137, "step": 3157 }, { "epoch": 0.86, "grad_norm": 2.9282593727111816, "learning_rate": 9.70657780304548e-08, "loss": 0.1261, "step": 3158 }, { "epoch": 0.86, "grad_norm": 2.6660447120666504, "learning_rate": 9.668579648294728e-08, "loss": 0.1152, "step": 3159 }, { "epoch": 0.86, "grad_norm": 2.705693483352661, "learning_rate": 9.630652236279625e-08, "loss": 0.1192, "step": 3160 }, { "epoch": 0.86, "grad_norm": 2.8832311630249023, "learning_rate": 9.59279559670284e-08, "loss": 0.1123, "step": 3161 }, { "epoch": 0.86, "grad_norm": 2.8442935943603516, "learning_rate": 9.555009759211707e-08, "loss": 0.1222, "step": 3162 }, { "epoch": 0.86, "grad_norm": 2.8412744998931885, "learning_rate": 9.517294753398064e-08, "loss": 0.113, "step": 3163 }, { "epoch": 0.86, "grad_norm": 2.9322669506073, "learning_rate": 9.479650608798251e-08, "loss": 0.1135, "step": 3164 }, { "epoch": 0.86, "grad_norm": 2.6838173866271973, "learning_rate": 9.442077354893196e-08, "loss": 0.1029, "step": 3165 }, { "epoch": 0.86, "grad_norm": 2.8191077709198, "learning_rate": 9.404575021108229e-08, "loss": 0.1205, "step": 3166 }, { "epoch": 0.87, "grad_norm": 2.5669922828674316, "learning_rate": 9.367143636813202e-08, "loss": 0.1166, "step": 3167 }, { "epoch": 0.87, "grad_norm": 3.1334023475646973, "learning_rate": 9.329783231322352e-08, "loss": 0.129, "step": 3168 }, { "epoch": 0.87, "grad_norm": 2.73909330368042, "learning_rate": 9.292493833894332e-08, "loss": 0.1129, "step": 3169 }, { "epoch": 0.87, "grad_norm": 2.9867608547210693, "learning_rate": 9.255275473732238e-08, "loss": 0.1213, "step": 3170 }, { "epoch": 0.87, "grad_norm": 2.9223008155822754, "learning_rate": 9.218128179983476e-08, "loss": 0.1194, "step": 3171 }, { "epoch": 0.87, "grad_norm": 2.776965379714966, "learning_rate": 9.18105198173984e-08, "loss": 0.1065, "step": 3172 }, { "epoch": 0.87, "grad_norm": 2.9588122367858887, "learning_rate": 9.144046908037407e-08, "loss": 0.1183, "step": 3173 }, { "epoch": 0.87, "grad_norm": 2.636444091796875, "learning_rate": 9.107112987856558e-08, "loss": 0.0999, "step": 3174 }, { "epoch": 0.87, "grad_norm": 2.6867878437042236, "learning_rate": 9.070250250122003e-08, "loss": 0.1139, "step": 3175 }, { "epoch": 0.87, "grad_norm": 2.6267762184143066, "learning_rate": 9.033458723702625e-08, "loss": 0.0993, "step": 3176 }, { "epoch": 0.87, "grad_norm": 2.670147180557251, "learning_rate": 8.99673843741161e-08, "loss": 0.1044, "step": 3177 }, { "epoch": 0.87, "grad_norm": 2.864187717437744, "learning_rate": 8.960089420006312e-08, "loss": 0.1235, "step": 3178 }, { "epoch": 0.87, "grad_norm": 2.868659019470215, "learning_rate": 8.923511700188258e-08, "loss": 0.1157, "step": 3179 }, { "epoch": 0.87, "grad_norm": 2.7398617267608643, "learning_rate": 8.887005306603201e-08, "loss": 0.1162, "step": 3180 }, { "epoch": 0.87, "grad_norm": 2.597961902618408, "learning_rate": 8.850570267840963e-08, "loss": 0.1011, "step": 3181 }, { "epoch": 0.87, "grad_norm": 3.0051791667938232, "learning_rate": 8.814206612435549e-08, "loss": 0.1243, "step": 3182 }, { "epoch": 0.87, "grad_norm": 2.9067301750183105, "learning_rate": 8.777914368865003e-08, "loss": 0.1244, "step": 3183 }, { "epoch": 0.87, "grad_norm": 2.6510720252990723, "learning_rate": 8.741693565551456e-08, "loss": 0.1083, "step": 3184 }, { "epoch": 0.87, "grad_norm": 3.2123913764953613, "learning_rate": 8.70554423086114e-08, "loss": 0.1268, "step": 3185 }, { "epoch": 0.87, "grad_norm": 2.637896776199341, "learning_rate": 8.669466393104241e-08, "loss": 0.1012, "step": 3186 }, { "epoch": 0.87, "grad_norm": 2.6280598640441895, "learning_rate": 8.633460080535038e-08, "loss": 0.0991, "step": 3187 }, { "epoch": 0.87, "grad_norm": 2.9076666831970215, "learning_rate": 8.597525321351717e-08, "loss": 0.1122, "step": 3188 }, { "epoch": 0.87, "grad_norm": 2.5415005683898926, "learning_rate": 8.561662143696446e-08, "loss": 0.1123, "step": 3189 }, { "epoch": 0.87, "grad_norm": 2.803208112716675, "learning_rate": 8.525870575655392e-08, "loss": 0.1091, "step": 3190 }, { "epoch": 0.87, "grad_norm": 2.746464252471924, "learning_rate": 8.490150645258542e-08, "loss": 0.1099, "step": 3191 }, { "epoch": 0.87, "grad_norm": 2.8301329612731934, "learning_rate": 8.454502380479889e-08, "loss": 0.1172, "step": 3192 }, { "epoch": 0.87, "grad_norm": 2.7032482624053955, "learning_rate": 8.418925809237209e-08, "loss": 0.108, "step": 3193 }, { "epoch": 0.87, "grad_norm": 2.8203492164611816, "learning_rate": 8.383420959392174e-08, "loss": 0.1136, "step": 3194 }, { "epoch": 0.87, "grad_norm": 2.732386350631714, "learning_rate": 8.347987858750306e-08, "loss": 0.1028, "step": 3195 }, { "epoch": 0.87, "grad_norm": 2.6165077686309814, "learning_rate": 8.312626535060874e-08, "loss": 0.11, "step": 3196 }, { "epoch": 0.87, "grad_norm": 2.588101387023926, "learning_rate": 8.277337016017016e-08, "loss": 0.1039, "step": 3197 }, { "epoch": 0.87, "grad_norm": 2.8107681274414062, "learning_rate": 8.242119329255582e-08, "loss": 0.1229, "step": 3198 }, { "epoch": 0.87, "grad_norm": 2.847639799118042, "learning_rate": 8.206973502357151e-08, "loss": 0.1006, "step": 3199 }, { "epoch": 0.87, "grad_norm": 2.666560411453247, "learning_rate": 8.171899562846097e-08, "loss": 0.104, "step": 3200 }, { "epoch": 0.87, "grad_norm": 2.765098810195923, "learning_rate": 8.136897538190424e-08, "loss": 0.1061, "step": 3201 }, { "epoch": 0.87, "grad_norm": 2.895667552947998, "learning_rate": 8.101967455801861e-08, "loss": 0.1382, "step": 3202 }, { "epoch": 0.87, "grad_norm": 3.130695343017578, "learning_rate": 8.067109343035783e-08, "loss": 0.1241, "step": 3203 }, { "epoch": 0.88, "grad_norm": 3.034909963607788, "learning_rate": 8.032323227191173e-08, "loss": 0.1121, "step": 3204 }, { "epoch": 0.88, "grad_norm": 2.6597087383270264, "learning_rate": 7.997609135510685e-08, "loss": 0.106, "step": 3205 }, { "epoch": 0.88, "grad_norm": 2.7990424633026123, "learning_rate": 7.962967095180518e-08, "loss": 0.1166, "step": 3206 }, { "epoch": 0.88, "grad_norm": 2.961679220199585, "learning_rate": 7.928397133330467e-08, "loss": 0.1265, "step": 3207 }, { "epoch": 0.88, "grad_norm": 2.8379929065704346, "learning_rate": 7.89389927703391e-08, "loss": 0.1171, "step": 3208 }, { "epoch": 0.88, "grad_norm": 2.9817962646484375, "learning_rate": 7.859473553307672e-08, "loss": 0.1145, "step": 3209 }, { "epoch": 0.88, "grad_norm": 2.8842551708221436, "learning_rate": 7.825119989112172e-08, "loss": 0.121, "step": 3210 }, { "epoch": 0.88, "grad_norm": 2.9692680835723877, "learning_rate": 7.790838611351258e-08, "loss": 0.1243, "step": 3211 }, { "epoch": 0.88, "grad_norm": 2.4734480381011963, "learning_rate": 7.756629446872288e-08, "loss": 0.1074, "step": 3212 }, { "epoch": 0.88, "grad_norm": 2.689995050430298, "learning_rate": 7.722492522466073e-08, "loss": 0.1163, "step": 3213 }, { "epoch": 0.88, "grad_norm": 2.814525604248047, "learning_rate": 7.688427864866776e-08, "loss": 0.1113, "step": 3214 }, { "epoch": 0.88, "grad_norm": 2.8867626190185547, "learning_rate": 7.654435500752055e-08, "loss": 0.116, "step": 3215 }, { "epoch": 0.88, "grad_norm": 2.705634832382202, "learning_rate": 7.620515456742871e-08, "loss": 0.1116, "step": 3216 }, { "epoch": 0.88, "grad_norm": 2.635175943374634, "learning_rate": 7.586667759403608e-08, "loss": 0.1133, "step": 3217 }, { "epoch": 0.88, "grad_norm": 3.0241026878356934, "learning_rate": 7.55289243524202e-08, "loss": 0.1254, "step": 3218 }, { "epoch": 0.88, "grad_norm": 2.9283294677734375, "learning_rate": 7.519189510709045e-08, "loss": 0.129, "step": 3219 }, { "epoch": 0.88, "grad_norm": 2.9080424308776855, "learning_rate": 7.485559012199061e-08, "loss": 0.1152, "step": 3220 }, { "epoch": 0.88, "grad_norm": 2.4616005420684814, "learning_rate": 7.452000966049676e-08, "loss": 0.0941, "step": 3221 }, { "epoch": 0.88, "grad_norm": 2.6700146198272705, "learning_rate": 7.418515398541736e-08, "loss": 0.1021, "step": 3222 }, { "epoch": 0.88, "grad_norm": 3.0130770206451416, "learning_rate": 7.385102335899396e-08, "loss": 0.1133, "step": 3223 }, { "epoch": 0.88, "grad_norm": 3.161093235015869, "learning_rate": 7.351761804289902e-08, "loss": 0.1035, "step": 3224 }, { "epoch": 0.88, "grad_norm": 2.837735891342163, "learning_rate": 7.318493829823813e-08, "loss": 0.1171, "step": 3225 }, { "epoch": 0.88, "grad_norm": 2.5457699298858643, "learning_rate": 7.285298438554844e-08, "loss": 0.1152, "step": 3226 }, { "epoch": 0.88, "grad_norm": 2.6670989990234375, "learning_rate": 7.25217565647982e-08, "loss": 0.1025, "step": 3227 }, { "epoch": 0.88, "grad_norm": 2.919137954711914, "learning_rate": 7.219125509538782e-08, "loss": 0.1215, "step": 3228 }, { "epoch": 0.88, "grad_norm": 2.712557315826416, "learning_rate": 7.186148023614758e-08, "loss": 0.1115, "step": 3229 }, { "epoch": 0.88, "grad_norm": 2.7375035285949707, "learning_rate": 7.153243224534e-08, "loss": 0.1163, "step": 3230 }, { "epoch": 0.88, "grad_norm": 2.5851237773895264, "learning_rate": 7.120411138065796e-08, "loss": 0.1167, "step": 3231 }, { "epoch": 0.88, "grad_norm": 3.0387001037597656, "learning_rate": 7.087651789922445e-08, "loss": 0.1095, "step": 3232 }, { "epoch": 0.88, "grad_norm": 2.860739231109619, "learning_rate": 7.054965205759345e-08, "loss": 0.1246, "step": 3233 }, { "epoch": 0.88, "grad_norm": 2.760218620300293, "learning_rate": 7.022351411174865e-08, "loss": 0.1116, "step": 3234 }, { "epoch": 0.88, "grad_norm": 2.8284144401550293, "learning_rate": 6.989810431710374e-08, "loss": 0.1259, "step": 3235 }, { "epoch": 0.88, "grad_norm": 2.6409196853637695, "learning_rate": 6.957342292850266e-08, "loss": 0.1059, "step": 3236 }, { "epoch": 0.88, "grad_norm": 2.788323163986206, "learning_rate": 6.924947020021798e-08, "loss": 0.11, "step": 3237 }, { "epoch": 0.88, "grad_norm": 2.9088175296783447, "learning_rate": 6.892624638595257e-08, "loss": 0.124, "step": 3238 }, { "epoch": 0.88, "grad_norm": 2.5352680683135986, "learning_rate": 6.860375173883781e-08, "loss": 0.1007, "step": 3239 }, { "epoch": 0.89, "grad_norm": 2.6265170574188232, "learning_rate": 6.828198651143424e-08, "loss": 0.1046, "step": 3240 }, { "epoch": 0.89, "grad_norm": 2.8138463497161865, "learning_rate": 6.79609509557313e-08, "loss": 0.1237, "step": 3241 }, { "epoch": 0.89, "grad_norm": 3.054506778717041, "learning_rate": 6.764064532314672e-08, "loss": 0.1146, "step": 3242 }, { "epoch": 0.89, "grad_norm": 2.840134859085083, "learning_rate": 6.73210698645269e-08, "loss": 0.119, "step": 3243 }, { "epoch": 0.89, "grad_norm": 3.059105157852173, "learning_rate": 6.700222483014617e-08, "loss": 0.1336, "step": 3244 }, { "epoch": 0.89, "grad_norm": 2.816920280456543, "learning_rate": 6.668411046970679e-08, "loss": 0.1128, "step": 3245 }, { "epoch": 0.89, "grad_norm": 2.9585933685302734, "learning_rate": 6.636672703233914e-08, "loss": 0.109, "step": 3246 }, { "epoch": 0.89, "grad_norm": 2.939216375350952, "learning_rate": 6.605007476660063e-08, "loss": 0.1314, "step": 3247 }, { "epoch": 0.89, "grad_norm": 3.184189558029175, "learning_rate": 6.573415392047666e-08, "loss": 0.1366, "step": 3248 }, { "epoch": 0.89, "grad_norm": 3.0321738719940186, "learning_rate": 6.541896474137954e-08, "loss": 0.1171, "step": 3249 }, { "epoch": 0.89, "grad_norm": 2.797530174255371, "learning_rate": 6.510450747614815e-08, "loss": 0.1283, "step": 3250 }, { "epoch": 0.89, "grad_norm": 2.5570662021636963, "learning_rate": 6.479078237104918e-08, "loss": 0.1077, "step": 3251 }, { "epoch": 0.89, "grad_norm": 2.6202569007873535, "learning_rate": 6.447778967177497e-08, "loss": 0.1191, "step": 3252 }, { "epoch": 0.89, "grad_norm": 2.5749614238739014, "learning_rate": 6.416552962344479e-08, "loss": 0.1011, "step": 3253 }, { "epoch": 0.89, "grad_norm": 2.7007076740264893, "learning_rate": 6.385400247060402e-08, "loss": 0.1059, "step": 3254 }, { "epoch": 0.89, "grad_norm": 2.770843744277954, "learning_rate": 6.354320845722394e-08, "loss": 0.123, "step": 3255 }, { "epoch": 0.89, "grad_norm": 2.8237979412078857, "learning_rate": 6.323314782670197e-08, "loss": 0.1167, "step": 3256 }, { "epoch": 0.89, "grad_norm": 2.7554519176483154, "learning_rate": 6.292382082186065e-08, "loss": 0.1061, "step": 3257 }, { "epoch": 0.89, "grad_norm": 2.568547487258911, "learning_rate": 6.261522768494886e-08, "loss": 0.1108, "step": 3258 }, { "epoch": 0.89, "grad_norm": 2.847320318222046, "learning_rate": 6.230736865763997e-08, "loss": 0.124, "step": 3259 }, { "epoch": 0.89, "grad_norm": 2.849395513534546, "learning_rate": 6.200024398103253e-08, "loss": 0.1153, "step": 3260 }, { "epoch": 0.89, "grad_norm": 2.570420503616333, "learning_rate": 6.169385389565051e-08, "loss": 0.1051, "step": 3261 }, { "epoch": 0.89, "grad_norm": 2.8272502422332764, "learning_rate": 6.138819864144185e-08, "loss": 0.1056, "step": 3262 }, { "epoch": 0.89, "grad_norm": 2.8246591091156006, "learning_rate": 6.108327845777972e-08, "loss": 0.1134, "step": 3263 }, { "epoch": 0.89, "grad_norm": 3.0128746032714844, "learning_rate": 6.077909358346123e-08, "loss": 0.1236, "step": 3264 }, { "epoch": 0.89, "grad_norm": 2.628431797027588, "learning_rate": 6.047564425670748e-08, "loss": 0.1047, "step": 3265 }, { "epoch": 0.89, "grad_norm": 3.2802343368530273, "learning_rate": 6.017293071516406e-08, "loss": 0.1057, "step": 3266 }, { "epoch": 0.89, "grad_norm": 2.8381617069244385, "learning_rate": 5.987095319589963e-08, "loss": 0.12, "step": 3267 }, { "epoch": 0.89, "grad_norm": 2.6833271980285645, "learning_rate": 5.956971193540728e-08, "loss": 0.1183, "step": 3268 }, { "epoch": 0.89, "grad_norm": 2.6846632957458496, "learning_rate": 5.926920716960282e-08, "loss": 0.1145, "step": 3269 }, { "epoch": 0.89, "grad_norm": 2.3703134059906006, "learning_rate": 5.896943913382546e-08, "loss": 0.0957, "step": 3270 }, { "epoch": 0.89, "grad_norm": 2.9268476963043213, "learning_rate": 5.8670408062837516e-08, "loss": 0.1244, "step": 3271 }, { "epoch": 0.89, "grad_norm": 2.760392904281616, "learning_rate": 5.837211419082411e-08, "loss": 0.1191, "step": 3272 }, { "epoch": 0.89, "grad_norm": 2.5630156993865967, "learning_rate": 5.807455775139325e-08, "loss": 0.1049, "step": 3273 }, { "epoch": 0.89, "grad_norm": 2.6172001361846924, "learning_rate": 5.7777738977574984e-08, "loss": 0.113, "step": 3274 }, { "epoch": 0.89, "grad_norm": 3.231501579284668, "learning_rate": 5.748165810182182e-08, "loss": 0.1492, "step": 3275 }, { "epoch": 0.89, "grad_norm": 2.893700122833252, "learning_rate": 5.718631535600882e-08, "loss": 0.109, "step": 3276 }, { "epoch": 0.9, "grad_norm": 3.144315719604492, "learning_rate": 5.6891710971432194e-08, "loss": 0.1307, "step": 3277 }, { "epoch": 0.9, "grad_norm": 2.6779861450195312, "learning_rate": 5.659784517881072e-08, "loss": 0.1073, "step": 3278 }, { "epoch": 0.9, "grad_norm": 3.0189919471740723, "learning_rate": 5.6304718208284194e-08, "loss": 0.1221, "step": 3279 }, { "epoch": 0.9, "grad_norm": 2.769556760787964, "learning_rate": 5.601233028941388e-08, "loss": 0.118, "step": 3280 }, { "epoch": 0.9, "grad_norm": 2.5217833518981934, "learning_rate": 5.57206816511826e-08, "loss": 0.1071, "step": 3281 }, { "epoch": 0.9, "grad_norm": 3.1602933406829834, "learning_rate": 5.5429772521993544e-08, "loss": 0.123, "step": 3282 }, { "epoch": 0.9, "grad_norm": 2.82954740524292, "learning_rate": 5.51396031296717e-08, "loss": 0.1155, "step": 3283 }, { "epoch": 0.9, "grad_norm": 2.6372883319854736, "learning_rate": 5.485017370146194e-08, "loss": 0.1128, "step": 3284 }, { "epoch": 0.9, "grad_norm": 2.437962532043457, "learning_rate": 5.456148446402975e-08, "loss": 0.0993, "step": 3285 }, { "epoch": 0.9, "grad_norm": 2.8904833793640137, "learning_rate": 5.427353564346138e-08, "loss": 0.1219, "step": 3286 }, { "epoch": 0.9, "grad_norm": 3.2096025943756104, "learning_rate": 5.398632746526277e-08, "loss": 0.1355, "step": 3287 }, { "epoch": 0.9, "grad_norm": 2.730506658554077, "learning_rate": 5.369986015436012e-08, "loss": 0.1147, "step": 3288 }, { "epoch": 0.9, "grad_norm": 2.7148666381835938, "learning_rate": 5.3414133935099304e-08, "loss": 0.1071, "step": 3289 }, { "epoch": 0.9, "grad_norm": 2.7386884689331055, "learning_rate": 5.312914903124566e-08, "loss": 0.1117, "step": 3290 }, { "epoch": 0.9, "grad_norm": 2.748896837234497, "learning_rate": 5.284490566598421e-08, "loss": 0.1199, "step": 3291 }, { "epoch": 0.9, "grad_norm": 2.766477346420288, "learning_rate": 5.2561404061919114e-08, "loss": 0.1066, "step": 3292 }, { "epoch": 0.9, "grad_norm": 2.713313341140747, "learning_rate": 5.227864444107377e-08, "loss": 0.1159, "step": 3293 }, { "epoch": 0.9, "grad_norm": 2.8210034370422363, "learning_rate": 5.1996627024890383e-08, "loss": 0.1103, "step": 3294 }, { "epoch": 0.9, "grad_norm": 2.5246126651763916, "learning_rate": 5.171535203422961e-08, "loss": 0.1037, "step": 3295 }, { "epoch": 0.9, "grad_norm": 2.7380261421203613, "learning_rate": 5.1434819689371464e-08, "loss": 0.1046, "step": 3296 }, { "epoch": 0.9, "grad_norm": 2.7986435890197754, "learning_rate": 5.115503021001333e-08, "loss": 0.104, "step": 3297 }, { "epoch": 0.9, "grad_norm": 2.699871063232422, "learning_rate": 5.087598381527181e-08, "loss": 0.1102, "step": 3298 }, { "epoch": 0.9, "grad_norm": 2.7522006034851074, "learning_rate": 5.059768072368098e-08, "loss": 0.1139, "step": 3299 }, { "epoch": 0.9, "grad_norm": 2.90615177154541, "learning_rate": 5.032012115319273e-08, "loss": 0.1167, "step": 3300 }, { "epoch": 0.9, "grad_norm": 2.7631006240844727, "learning_rate": 5.004330532117707e-08, "loss": 0.1105, "step": 3301 }, { "epoch": 0.9, "grad_norm": 2.9251725673675537, "learning_rate": 4.976723344442124e-08, "loss": 0.1312, "step": 3302 }, { "epoch": 0.9, "grad_norm": 2.746147394180298, "learning_rate": 4.949190573913009e-08, "loss": 0.1159, "step": 3303 }, { "epoch": 0.9, "grad_norm": 2.6207473278045654, "learning_rate": 4.921732242092569e-08, "loss": 0.0947, "step": 3304 }, { "epoch": 0.9, "grad_norm": 2.4841485023498535, "learning_rate": 4.8943483704846465e-08, "loss": 0.1086, "step": 3305 }, { "epoch": 0.9, "grad_norm": 2.878854274749756, "learning_rate": 4.867038980534877e-08, "loss": 0.1119, "step": 3306 }, { "epoch": 0.9, "grad_norm": 2.6521964073181152, "learning_rate": 4.839804093630484e-08, "loss": 0.1153, "step": 3307 }, { "epoch": 0.9, "grad_norm": 2.6814706325531006, "learning_rate": 4.8126437311003745e-08, "loss": 0.1061, "step": 3308 }, { "epoch": 0.9, "grad_norm": 2.625558376312256, "learning_rate": 4.785557914215132e-08, "loss": 0.1155, "step": 3309 }, { "epoch": 0.9, "grad_norm": 2.7465507984161377, "learning_rate": 4.7585466641868685e-08, "loss": 0.1149, "step": 3310 }, { "epoch": 0.9, "grad_norm": 2.826021194458008, "learning_rate": 4.731610002169384e-08, "loss": 0.1145, "step": 3311 }, { "epoch": 0.9, "grad_norm": 2.7166543006896973, "learning_rate": 4.704747949257992e-08, "loss": 0.1053, "step": 3312 }, { "epoch": 0.91, "grad_norm": 2.670891761779785, "learning_rate": 4.677960526489644e-08, "loss": 0.1169, "step": 3313 }, { "epoch": 0.91, "grad_norm": 2.9541141986846924, "learning_rate": 4.6512477548428465e-08, "loss": 0.116, "step": 3314 }, { "epoch": 0.91, "grad_norm": 2.7535674571990967, "learning_rate": 4.624609655237544e-08, "loss": 0.1112, "step": 3315 }, { "epoch": 0.91, "grad_norm": 2.9081640243530273, "learning_rate": 4.5980462485353254e-08, "loss": 0.1203, "step": 3316 }, { "epoch": 0.91, "grad_norm": 2.966815948486328, "learning_rate": 4.5715575555391964e-08, "loss": 0.1325, "step": 3317 }, { "epoch": 0.91, "grad_norm": 2.7810537815093994, "learning_rate": 4.545143596993695e-08, "loss": 0.1202, "step": 3318 }, { "epoch": 0.91, "grad_norm": 2.504977226257324, "learning_rate": 4.518804393584852e-08, "loss": 0.0956, "step": 3319 }, { "epoch": 0.91, "grad_norm": 2.7092530727386475, "learning_rate": 4.492539965940056e-08, "loss": 0.1074, "step": 3320 }, { "epoch": 0.91, "grad_norm": 3.001920461654663, "learning_rate": 4.466350334628266e-08, "loss": 0.1087, "step": 3321 }, { "epoch": 0.91, "grad_norm": 2.7182984352111816, "learning_rate": 4.440235520159752e-08, "loss": 0.105, "step": 3322 }, { "epoch": 0.91, "grad_norm": 2.900406837463379, "learning_rate": 4.414195542986265e-08, "loss": 0.1168, "step": 3323 }, { "epoch": 0.91, "grad_norm": 2.4970622062683105, "learning_rate": 4.3882304235009496e-08, "loss": 0.1086, "step": 3324 }, { "epoch": 0.91, "grad_norm": 2.6406490802764893, "learning_rate": 4.362340182038238e-08, "loss": 0.1003, "step": 3325 }, { "epoch": 0.91, "grad_norm": 2.74210524559021, "learning_rate": 4.336524838874023e-08, "loss": 0.1209, "step": 3326 }, { "epoch": 0.91, "grad_norm": 2.576692581176758, "learning_rate": 4.310784414225466e-08, "loss": 0.112, "step": 3327 }, { "epoch": 0.91, "grad_norm": 2.711721897125244, "learning_rate": 4.285118928251119e-08, "loss": 0.117, "step": 3328 }, { "epoch": 0.91, "grad_norm": 2.781630039215088, "learning_rate": 4.259528401050827e-08, "loss": 0.12, "step": 3329 }, { "epoch": 0.91, "grad_norm": 2.912167549133301, "learning_rate": 4.2340128526657024e-08, "loss": 0.1087, "step": 3330 }, { "epoch": 0.91, "grad_norm": 2.9487767219543457, "learning_rate": 4.208572303078162e-08, "loss": 0.1218, "step": 3331 }, { "epoch": 0.91, "grad_norm": 2.5757272243499756, "learning_rate": 4.183206772211867e-08, "loss": 0.1067, "step": 3332 }, { "epoch": 0.91, "grad_norm": 2.787808656692505, "learning_rate": 4.157916279931761e-08, "loss": 0.1124, "step": 3333 }, { "epoch": 0.91, "grad_norm": 2.7106688022613525, "learning_rate": 4.132700846044013e-08, "loss": 0.1207, "step": 3334 }, { "epoch": 0.91, "grad_norm": 2.7858059406280518, "learning_rate": 4.1075604902959915e-08, "loss": 0.1198, "step": 3335 }, { "epoch": 0.91, "grad_norm": 3.217522144317627, "learning_rate": 4.082495232376271e-08, "loss": 0.1314, "step": 3336 }, { "epoch": 0.91, "grad_norm": 2.835294485092163, "learning_rate": 4.0575050919146256e-08, "loss": 0.1193, "step": 3337 }, { "epoch": 0.91, "grad_norm": 2.501631021499634, "learning_rate": 4.032590088482002e-08, "loss": 0.0932, "step": 3338 }, { "epoch": 0.91, "grad_norm": 2.8946824073791504, "learning_rate": 4.007750241590502e-08, "loss": 0.1205, "step": 3339 }, { "epoch": 0.91, "grad_norm": 2.566824197769165, "learning_rate": 3.9829855706933536e-08, "loss": 0.1044, "step": 3340 }, { "epoch": 0.91, "grad_norm": 2.884089469909668, "learning_rate": 3.95829609518491e-08, "loss": 0.1233, "step": 3341 }, { "epoch": 0.91, "grad_norm": 2.869565486907959, "learning_rate": 3.933681834400682e-08, "loss": 0.117, "step": 3342 }, { "epoch": 0.91, "grad_norm": 2.831082582473755, "learning_rate": 3.909142807617205e-08, "loss": 0.1067, "step": 3343 }, { "epoch": 0.91, "grad_norm": 2.764885902404785, "learning_rate": 3.884679034052163e-08, "loss": 0.1119, "step": 3344 }, { "epoch": 0.91, "grad_norm": 3.153815507888794, "learning_rate": 3.8602905328642634e-08, "loss": 0.1282, "step": 3345 }, { "epoch": 0.91, "grad_norm": 3.548212766647339, "learning_rate": 3.835977323153261e-08, "loss": 0.1286, "step": 3346 }, { "epoch": 0.91, "grad_norm": 2.814499855041504, "learning_rate": 3.811739423959992e-08, "loss": 0.1176, "step": 3347 }, { "epoch": 0.91, "grad_norm": 2.7057933807373047, "learning_rate": 3.787576854266239e-08, "loss": 0.107, "step": 3348 }, { "epoch": 0.91, "grad_norm": 2.667623519897461, "learning_rate": 3.763489632994876e-08, "loss": 0.1013, "step": 3349 }, { "epoch": 0.92, "grad_norm": 3.1889543533325195, "learning_rate": 3.739477779009703e-08, "loss": 0.1145, "step": 3350 }, { "epoch": 0.92, "grad_norm": 2.842881917953491, "learning_rate": 3.715541311115522e-08, "loss": 0.1128, "step": 3351 }, { "epoch": 0.92, "grad_norm": 2.948249101638794, "learning_rate": 3.6916802480581046e-08, "loss": 0.1165, "step": 3352 }, { "epoch": 0.92, "grad_norm": 2.9211549758911133, "learning_rate": 3.6678946085241356e-08, "loss": 0.118, "step": 3353 }, { "epoch": 0.92, "grad_norm": 2.9507830142974854, "learning_rate": 3.6441844111412824e-08, "loss": 0.118, "step": 3354 }, { "epoch": 0.92, "grad_norm": 2.799957036972046, "learning_rate": 3.6205496744781014e-08, "loss": 0.1047, "step": 3355 }, { "epoch": 0.92, "grad_norm": 2.6917316913604736, "learning_rate": 3.5969904170440214e-08, "loss": 0.1118, "step": 3356 }, { "epoch": 0.92, "grad_norm": 2.744602918624878, "learning_rate": 3.573506657289427e-08, "loss": 0.1169, "step": 3357 }, { "epoch": 0.92, "grad_norm": 2.906572103500366, "learning_rate": 3.550098413605529e-08, "loss": 0.125, "step": 3358 }, { "epoch": 0.92, "grad_norm": 2.6737864017486572, "learning_rate": 3.5267657043244084e-08, "loss": 0.1044, "step": 3359 }, { "epoch": 0.92, "grad_norm": 2.5877609252929688, "learning_rate": 3.503508547719014e-08, "loss": 0.1093, "step": 3360 }, { "epoch": 0.92, "grad_norm": 2.703770875930786, "learning_rate": 3.480326962003077e-08, "loss": 0.1046, "step": 3361 }, { "epoch": 0.92, "grad_norm": 2.6350808143615723, "learning_rate": 3.4572209653311977e-08, "loss": 0.103, "step": 3362 }, { "epoch": 0.92, "grad_norm": 2.9087467193603516, "learning_rate": 3.434190575798734e-08, "loss": 0.1097, "step": 3363 }, { "epoch": 0.92, "grad_norm": 2.8744146823883057, "learning_rate": 3.4112358114418815e-08, "loss": 0.1224, "step": 3364 }, { "epoch": 0.92, "grad_norm": 3.058384895324707, "learning_rate": 3.388356690237582e-08, "loss": 0.1101, "step": 3365 }, { "epoch": 0.92, "grad_norm": 2.962033987045288, "learning_rate": 3.3655532301035017e-08, "loss": 0.1196, "step": 3366 }, { "epoch": 0.92, "grad_norm": 2.707650661468506, "learning_rate": 3.3428254488981455e-08, "loss": 0.1104, "step": 3367 }, { "epoch": 0.92, "grad_norm": 2.9100685119628906, "learning_rate": 3.320173364420642e-08, "loss": 0.1153, "step": 3368 }, { "epoch": 0.92, "grad_norm": 2.714811325073242, "learning_rate": 3.297596994410934e-08, "loss": 0.1062, "step": 3369 }, { "epoch": 0.92, "grad_norm": 2.850252151489258, "learning_rate": 3.2750963565496114e-08, "loss": 0.1274, "step": 3370 }, { "epoch": 0.92, "grad_norm": 2.6645967960357666, "learning_rate": 3.252671468457957e-08, "loss": 0.1126, "step": 3371 }, { "epoch": 0.92, "grad_norm": 2.775787115097046, "learning_rate": 3.230322347697967e-08, "loss": 0.0974, "step": 3372 }, { "epoch": 0.92, "grad_norm": 2.9510257244110107, "learning_rate": 3.208049011772263e-08, "loss": 0.1183, "step": 3373 }, { "epoch": 0.92, "grad_norm": 3.2880499362945557, "learning_rate": 3.1858514781241355e-08, "loss": 0.126, "step": 3374 }, { "epoch": 0.92, "grad_norm": 2.773585319519043, "learning_rate": 3.1637297641375015e-08, "loss": 0.1108, "step": 3375 }, { "epoch": 0.92, "grad_norm": 2.9001801013946533, "learning_rate": 3.141683887136892e-08, "loss": 0.1184, "step": 3376 }, { "epoch": 0.92, "grad_norm": 2.721400260925293, "learning_rate": 3.1197138643874744e-08, "loss": 0.1012, "step": 3377 }, { "epoch": 0.92, "grad_norm": 2.6031508445739746, "learning_rate": 3.097819713094996e-08, "loss": 0.1048, "step": 3378 }, { "epoch": 0.92, "grad_norm": 3.1122517585754395, "learning_rate": 3.076001450405785e-08, "loss": 0.1241, "step": 3379 }, { "epoch": 0.92, "grad_norm": 2.5732295513153076, "learning_rate": 3.05425909340673e-08, "loss": 0.1012, "step": 3380 }, { "epoch": 0.92, "grad_norm": 2.6614997386932373, "learning_rate": 3.032592659125277e-08, "loss": 0.1095, "step": 3381 }, { "epoch": 0.92, "grad_norm": 2.649832248687744, "learning_rate": 3.0110021645294415e-08, "loss": 0.0958, "step": 3382 }, { "epoch": 0.92, "grad_norm": 3.089282512664795, "learning_rate": 2.989487626527709e-08, "loss": 0.134, "step": 3383 }, { "epoch": 0.92, "grad_norm": 2.8147947788238525, "learning_rate": 2.9680490619691467e-08, "loss": 0.1098, "step": 3384 }, { "epoch": 0.92, "grad_norm": 2.984261989593506, "learning_rate": 2.9466864876432794e-08, "loss": 0.1176, "step": 3385 }, { "epoch": 0.92, "grad_norm": 2.651834011077881, "learning_rate": 2.925399920280114e-08, "loss": 0.108, "step": 3386 }, { "epoch": 0.93, "grad_norm": 2.6562538146972656, "learning_rate": 2.9041893765501925e-08, "loss": 0.1043, "step": 3387 }, { "epoch": 0.93, "grad_norm": 2.8498289585113525, "learning_rate": 2.8830548730644278e-08, "loss": 0.1117, "step": 3388 }, { "epoch": 0.93, "grad_norm": 2.793269634246826, "learning_rate": 2.8619964263742802e-08, "loss": 0.1228, "step": 3389 }, { "epoch": 0.93, "grad_norm": 2.785788059234619, "learning_rate": 2.84101405297158e-08, "loss": 0.1186, "step": 3390 }, { "epoch": 0.93, "grad_norm": 2.6449506282806396, "learning_rate": 2.820107769288571e-08, "loss": 0.1069, "step": 3391 }, { "epoch": 0.93, "grad_norm": 3.226897716522217, "learning_rate": 2.7992775916979795e-08, "loss": 0.1224, "step": 3392 }, { "epoch": 0.93, "grad_norm": 2.5964953899383545, "learning_rate": 2.778523536512867e-08, "loss": 0.1094, "step": 3393 }, { "epoch": 0.93, "grad_norm": 2.71575665473938, "learning_rate": 2.7578456199866983e-08, "loss": 0.1149, "step": 3394 }, { "epoch": 0.93, "grad_norm": 2.5593042373657227, "learning_rate": 2.7372438583133208e-08, "loss": 0.1139, "step": 3395 }, { "epoch": 0.93, "grad_norm": 2.764103412628174, "learning_rate": 2.716718267626905e-08, "loss": 0.1139, "step": 3396 }, { "epoch": 0.93, "grad_norm": 2.595871686935425, "learning_rate": 2.696268864002027e-08, "loss": 0.1053, "step": 3397 }, { "epoch": 0.93, "grad_norm": 2.902984857559204, "learning_rate": 2.6758956634535536e-08, "loss": 0.1249, "step": 3398 }, { "epoch": 0.93, "grad_norm": 2.6692392826080322, "learning_rate": 2.6555986819366772e-08, "loss": 0.1171, "step": 3399 }, { "epoch": 0.93, "grad_norm": 2.914112091064453, "learning_rate": 2.6353779353469385e-08, "loss": 0.1193, "step": 3400 }, { "epoch": 0.93, "grad_norm": 2.8296852111816406, "learning_rate": 2.6152334395200925e-08, "loss": 0.1169, "step": 3401 }, { "epoch": 0.93, "grad_norm": 2.7452199459075928, "learning_rate": 2.5951652102322862e-08, "loss": 0.1093, "step": 3402 }, { "epoch": 0.93, "grad_norm": 2.833092451095581, "learning_rate": 2.575173263199837e-08, "loss": 0.1177, "step": 3403 }, { "epoch": 0.93, "grad_norm": 2.8856849670410156, "learning_rate": 2.555257614079387e-08, "loss": 0.1178, "step": 3404 }, { "epoch": 0.93, "grad_norm": 2.7653207778930664, "learning_rate": 2.535418278467838e-08, "loss": 0.1273, "step": 3405 }, { "epoch": 0.93, "grad_norm": 2.9552650451660156, "learning_rate": 2.5156552719022394e-08, "loss": 0.1129, "step": 3406 }, { "epoch": 0.93, "grad_norm": 2.7580583095550537, "learning_rate": 2.4959686098599554e-08, "loss": 0.1214, "step": 3407 }, { "epoch": 0.93, "grad_norm": 2.765307903289795, "learning_rate": 2.4763583077585083e-08, "loss": 0.1117, "step": 3408 }, { "epoch": 0.93, "grad_norm": 2.7255513668060303, "learning_rate": 2.4568243809556577e-08, "loss": 0.1127, "step": 3409 }, { "epoch": 0.93, "grad_norm": 2.5587587356567383, "learning_rate": 2.4373668447493224e-08, "loss": 0.1042, "step": 3410 }, { "epoch": 0.93, "grad_norm": 2.553455352783203, "learning_rate": 2.4179857143776017e-08, "loss": 0.1137, "step": 3411 }, { "epoch": 0.93, "grad_norm": 2.822624444961548, "learning_rate": 2.3986810050187543e-08, "loss": 0.1121, "step": 3412 }, { "epoch": 0.93, "grad_norm": 2.892587661743164, "learning_rate": 2.3794527317911983e-08, "loss": 0.113, "step": 3413 }, { "epoch": 0.93, "grad_norm": 2.899445056915283, "learning_rate": 2.3603009097534986e-08, "loss": 0.1241, "step": 3414 }, { "epoch": 0.93, "grad_norm": 2.861823320388794, "learning_rate": 2.3412255539043357e-08, "loss": 0.1123, "step": 3415 }, { "epoch": 0.93, "grad_norm": 2.8747031688690186, "learning_rate": 2.3222266791824928e-08, "loss": 0.1169, "step": 3416 }, { "epoch": 0.93, "grad_norm": 2.610382318496704, "learning_rate": 2.3033043004668907e-08, "loss": 0.108, "step": 3417 }, { "epoch": 0.93, "grad_norm": 2.993743419647217, "learning_rate": 2.2844584325765083e-08, "loss": 0.132, "step": 3418 }, { "epoch": 0.93, "grad_norm": 2.6418840885162354, "learning_rate": 2.2656890902704175e-08, "loss": 0.0968, "step": 3419 }, { "epoch": 0.93, "grad_norm": 2.785472869873047, "learning_rate": 2.2469962882478043e-08, "loss": 0.1225, "step": 3420 }, { "epoch": 0.93, "grad_norm": 2.6220009326934814, "learning_rate": 2.228380041147815e-08, "loss": 0.1124, "step": 3421 }, { "epoch": 0.93, "grad_norm": 2.8879494667053223, "learning_rate": 2.209840363549742e-08, "loss": 0.1219, "step": 3422 }, { "epoch": 0.94, "grad_norm": 3.032655715942383, "learning_rate": 2.1913772699728273e-08, "loss": 0.1195, "step": 3423 }, { "epoch": 0.94, "grad_norm": 3.276188373565674, "learning_rate": 2.1729907748764152e-08, "loss": 0.1382, "step": 3424 }, { "epoch": 0.94, "grad_norm": 2.5606958866119385, "learning_rate": 2.1546808926598103e-08, "loss": 0.1086, "step": 3425 }, { "epoch": 0.94, "grad_norm": 3.0688209533691406, "learning_rate": 2.136447637662342e-08, "loss": 0.1297, "step": 3426 }, { "epoch": 0.94, "grad_norm": 2.614536762237549, "learning_rate": 2.118291024163299e-08, "loss": 0.1041, "step": 3427 }, { "epoch": 0.94, "grad_norm": 2.9773361682891846, "learning_rate": 2.100211066381985e-08, "loss": 0.1188, "step": 3428 }, { "epoch": 0.94, "grad_norm": 2.830986261367798, "learning_rate": 2.0822077784776516e-08, "loss": 0.1141, "step": 3429 }, { "epoch": 0.94, "grad_norm": 2.704211473464966, "learning_rate": 2.0642811745495204e-08, "loss": 0.1225, "step": 3430 }, { "epoch": 0.94, "grad_norm": 2.675964832305908, "learning_rate": 2.046431268636739e-08, "loss": 0.1136, "step": 3431 }, { "epoch": 0.94, "grad_norm": 2.658668279647827, "learning_rate": 2.0286580747184035e-08, "loss": 0.1036, "step": 3432 }, { "epoch": 0.94, "grad_norm": 2.5797698497772217, "learning_rate": 2.0109616067135126e-08, "loss": 0.1096, "step": 3433 }, { "epoch": 0.94, "grad_norm": 3.078524351119995, "learning_rate": 1.993341878481003e-08, "loss": 0.1322, "step": 3434 }, { "epoch": 0.94, "grad_norm": 2.666562080383301, "learning_rate": 1.9757989038197143e-08, "loss": 0.1189, "step": 3435 }, { "epoch": 0.94, "grad_norm": 2.893813133239746, "learning_rate": 1.9583326964683678e-08, "loss": 0.1166, "step": 3436 }, { "epoch": 0.94, "grad_norm": 2.696438789367676, "learning_rate": 1.940943270105544e-08, "loss": 0.1163, "step": 3437 }, { "epoch": 0.94, "grad_norm": 2.7810068130493164, "learning_rate": 1.9236306383497048e-08, "loss": 0.1085, "step": 3438 }, { "epoch": 0.94, "grad_norm": 2.816138744354248, "learning_rate": 1.9063948147592045e-08, "loss": 0.1224, "step": 3439 }, { "epoch": 0.94, "grad_norm": 2.743136167526245, "learning_rate": 1.8892358128322017e-08, "loss": 0.1186, "step": 3440 }, { "epoch": 0.94, "grad_norm": 2.7357254028320312, "learning_rate": 1.8721536460067244e-08, "loss": 0.1059, "step": 3441 }, { "epoch": 0.94, "grad_norm": 2.933887004852295, "learning_rate": 1.8551483276605938e-08, "loss": 0.1349, "step": 3442 }, { "epoch": 0.94, "grad_norm": 2.620206832885742, "learning_rate": 1.8382198711114572e-08, "loss": 0.1046, "step": 3443 }, { "epoch": 0.94, "grad_norm": 3.2741897106170654, "learning_rate": 1.821368289616798e-08, "loss": 0.1254, "step": 3444 }, { "epoch": 0.94, "grad_norm": 2.9951748847961426, "learning_rate": 1.8045935963738712e-08, "loss": 0.128, "step": 3445 }, { "epoch": 0.94, "grad_norm": 3.1498196125030518, "learning_rate": 1.7878958045197123e-08, "loss": 0.1163, "step": 3446 }, { "epoch": 0.94, "grad_norm": 2.7662010192871094, "learning_rate": 1.771274927131139e-08, "loss": 0.1206, "step": 3447 }, { "epoch": 0.94, "grad_norm": 2.666364908218384, "learning_rate": 1.7547309772247278e-08, "loss": 0.1154, "step": 3448 }, { "epoch": 0.94, "grad_norm": 2.7426912784576416, "learning_rate": 1.7382639677568146e-08, "loss": 0.1209, "step": 3449 }, { "epoch": 0.94, "grad_norm": 2.5739829540252686, "learning_rate": 1.721873911623506e-08, "loss": 0.1134, "step": 3450 }, { "epoch": 0.94, "grad_norm": 2.8193235397338867, "learning_rate": 1.70556082166059e-08, "loss": 0.1101, "step": 3451 }, { "epoch": 0.94, "grad_norm": 2.6647956371307373, "learning_rate": 1.6893247106436136e-08, "loss": 0.1111, "step": 3452 }, { "epoch": 0.94, "grad_norm": 2.510910987854004, "learning_rate": 1.6731655912878284e-08, "loss": 0.1016, "step": 3453 }, { "epoch": 0.94, "grad_norm": 2.9538257122039795, "learning_rate": 1.657083476248189e-08, "loss": 0.1216, "step": 3454 }, { "epoch": 0.94, "grad_norm": 2.696749448776245, "learning_rate": 1.641078378119365e-08, "loss": 0.1062, "step": 3455 }, { "epoch": 0.94, "grad_norm": 2.8078157901763916, "learning_rate": 1.6251503094356743e-08, "loss": 0.103, "step": 3456 }, { "epoch": 0.94, "grad_norm": 2.706535816192627, "learning_rate": 1.609299282671128e-08, "loss": 0.1236, "step": 3457 }, { "epoch": 0.94, "grad_norm": 2.724426507949829, "learning_rate": 1.5935253102394185e-08, "loss": 0.1068, "step": 3458 }, { "epoch": 0.94, "grad_norm": 2.6226446628570557, "learning_rate": 1.5778284044938528e-08, "loss": 0.1058, "step": 3459 }, { "epoch": 0.95, "grad_norm": 2.7852203845977783, "learning_rate": 1.5622085777274417e-08, "loss": 0.1154, "step": 3460 }, { "epoch": 0.95, "grad_norm": 2.600698947906494, "learning_rate": 1.5466658421727675e-08, "loss": 0.1023, "step": 3461 }, { "epoch": 0.95, "grad_norm": 2.805701494216919, "learning_rate": 1.5312002100020816e-08, "loss": 0.1214, "step": 3462 }, { "epoch": 0.95, "grad_norm": 2.6820991039276123, "learning_rate": 1.5158116933272402e-08, "loss": 0.1003, "step": 3463 }, { "epoch": 0.95, "grad_norm": 2.6976981163024902, "learning_rate": 1.500500304199692e-08, "loss": 0.1091, "step": 3464 }, { "epoch": 0.95, "grad_norm": 2.4099295139312744, "learning_rate": 1.4852660546105234e-08, "loss": 0.0961, "step": 3465 }, { "epoch": 0.95, "grad_norm": 2.765096664428711, "learning_rate": 1.470108956490379e-08, "loss": 0.1169, "step": 3466 }, { "epoch": 0.95, "grad_norm": 2.628892660140991, "learning_rate": 1.4550290217094529e-08, "loss": 0.1113, "step": 3467 }, { "epoch": 0.95, "grad_norm": 3.0857925415039062, "learning_rate": 1.4400262620775871e-08, "loss": 0.1271, "step": 3468 }, { "epoch": 0.95, "grad_norm": 3.171969175338745, "learning_rate": 1.4251006893441164e-08, "loss": 0.1541, "step": 3469 }, { "epoch": 0.95, "grad_norm": 2.68430233001709, "learning_rate": 1.4102523151979572e-08, "loss": 0.1288, "step": 3470 }, { "epoch": 0.95, "grad_norm": 2.7666542530059814, "learning_rate": 1.3954811512675636e-08, "loss": 0.103, "step": 3471 }, { "epoch": 0.95, "grad_norm": 2.5653326511383057, "learning_rate": 1.3807872091209038e-08, "loss": 0.1054, "step": 3472 }, { "epoch": 0.95, "grad_norm": 2.939504623413086, "learning_rate": 1.3661705002655177e-08, "loss": 0.1227, "step": 3473 }, { "epoch": 0.95, "grad_norm": 2.5027456283569336, "learning_rate": 1.351631036148404e-08, "loss": 0.1034, "step": 3474 }, { "epoch": 0.95, "grad_norm": 2.8690476417541504, "learning_rate": 1.3371688281560988e-08, "loss": 0.1131, "step": 3475 }, { "epoch": 0.95, "grad_norm": 2.7211506366729736, "learning_rate": 1.3227838876146425e-08, "loss": 0.122, "step": 3476 }, { "epoch": 0.95, "grad_norm": 3.132707118988037, "learning_rate": 1.3084762257895344e-08, "loss": 0.1387, "step": 3477 }, { "epoch": 0.95, "grad_norm": 2.824662208557129, "learning_rate": 1.2942458538857893e-08, "loss": 0.1177, "step": 3478 }, { "epoch": 0.95, "grad_norm": 2.608438491821289, "learning_rate": 1.280092783047848e-08, "loss": 0.12, "step": 3479 }, { "epoch": 0.95, "grad_norm": 2.574810028076172, "learning_rate": 1.2660170243596558e-08, "loss": 0.1164, "step": 3480 }, { "epoch": 0.95, "grad_norm": 2.7951624393463135, "learning_rate": 1.2520185888445945e-08, "loss": 0.1081, "step": 3481 }, { "epoch": 0.95, "grad_norm": 2.6384437084198, "learning_rate": 1.2380974874654837e-08, "loss": 0.1061, "step": 3482 }, { "epoch": 0.95, "grad_norm": 2.7016963958740234, "learning_rate": 1.2242537311245804e-08, "loss": 0.1099, "step": 3483 }, { "epoch": 0.95, "grad_norm": 2.588914155960083, "learning_rate": 1.2104873306635788e-08, "loss": 0.0982, "step": 3484 }, { "epoch": 0.95, "grad_norm": 2.8033766746520996, "learning_rate": 1.1967982968635992e-08, "loss": 0.1134, "step": 3485 }, { "epoch": 0.95, "grad_norm": 2.7870523929595947, "learning_rate": 1.1831866404451441e-08, "loss": 0.0995, "step": 3486 }, { "epoch": 0.95, "grad_norm": 2.943394899368286, "learning_rate": 1.1696523720681306e-08, "loss": 0.1316, "step": 3487 }, { "epoch": 0.95, "grad_norm": 2.647881507873535, "learning_rate": 1.1561955023318915e-08, "loss": 0.1152, "step": 3488 }, { "epoch": 0.95, "grad_norm": 2.8770816326141357, "learning_rate": 1.1428160417751186e-08, "loss": 0.135, "step": 3489 }, { "epoch": 0.95, "grad_norm": 2.982604503631592, "learning_rate": 1.1295140008758863e-08, "loss": 0.1231, "step": 3490 }, { "epoch": 0.95, "grad_norm": 2.55593204498291, "learning_rate": 1.1162893900516501e-08, "loss": 0.0984, "step": 3491 }, { "epoch": 0.95, "grad_norm": 2.924186944961548, "learning_rate": 1.1031422196592033e-08, "loss": 0.1278, "step": 3492 }, { "epoch": 0.95, "grad_norm": 2.758187770843506, "learning_rate": 1.090072499994732e-08, "loss": 0.11, "step": 3493 }, { "epoch": 0.95, "grad_norm": 2.764726161956787, "learning_rate": 1.0770802412937041e-08, "loss": 0.1144, "step": 3494 }, { "epoch": 0.95, "grad_norm": 2.798654794692993, "learning_rate": 1.064165453731003e-08, "loss": 0.1093, "step": 3495 }, { "epoch": 0.95, "grad_norm": 2.662637233734131, "learning_rate": 1.0513281474207714e-08, "loss": 0.106, "step": 3496 }, { "epoch": 0.96, "grad_norm": 2.7021846771240234, "learning_rate": 1.0385683324165007e-08, "loss": 0.1158, "step": 3497 }, { "epoch": 0.96, "grad_norm": 2.717860698699951, "learning_rate": 1.0258860187110085e-08, "loss": 0.1185, "step": 3498 }, { "epoch": 0.96, "grad_norm": 2.8753104209899902, "learning_rate": 1.0132812162363835e-08, "loss": 0.1122, "step": 3499 }, { "epoch": 0.96, "grad_norm": 2.7315731048583984, "learning_rate": 1.0007539348640736e-08, "loss": 0.1039, "step": 3500 }, { "epoch": 0.96, "grad_norm": 2.7231922149658203, "learning_rate": 9.883041844047313e-09, "loss": 0.1079, "step": 3501 }, { "epoch": 0.96, "grad_norm": 2.508077621459961, "learning_rate": 9.759319746083571e-09, "loss": 0.1022, "step": 3502 }, { "epoch": 0.96, "grad_norm": 2.6812562942504883, "learning_rate": 9.636373151642008e-09, "loss": 0.1047, "step": 3503 }, { "epoch": 0.96, "grad_norm": 2.833235263824463, "learning_rate": 9.514202157007822e-09, "loss": 0.1244, "step": 3504 }, { "epoch": 0.96, "grad_norm": 2.7213900089263916, "learning_rate": 9.392806857858815e-09, "loss": 0.1147, "step": 3505 }, { "epoch": 0.96, "grad_norm": 2.867309808731079, "learning_rate": 9.27218734926527e-09, "loss": 0.1177, "step": 3506 }, { "epoch": 0.96, "grad_norm": 2.6550357341766357, "learning_rate": 9.152343725689848e-09, "loss": 0.1129, "step": 3507 }, { "epoch": 0.96, "grad_norm": 2.9081578254699707, "learning_rate": 9.033276080987805e-09, "loss": 0.1199, "step": 3508 }, { "epoch": 0.96, "grad_norm": 2.612014055252075, "learning_rate": 8.914984508406331e-09, "loss": 0.1026, "step": 3509 }, { "epoch": 0.96, "grad_norm": 2.647467613220215, "learning_rate": 8.79746910058543e-09, "loss": 0.1042, "step": 3510 }, { "epoch": 0.96, "grad_norm": 2.7883193492889404, "learning_rate": 8.680729949556597e-09, "loss": 0.1047, "step": 3511 }, { "epoch": 0.96, "grad_norm": 2.792728900909424, "learning_rate": 8.564767146743701e-09, "loss": 0.1172, "step": 3512 }, { "epoch": 0.96, "grad_norm": 2.797672748565674, "learning_rate": 8.449580782962763e-09, "loss": 0.1229, "step": 3513 }, { "epoch": 0.96, "grad_norm": 2.8624258041381836, "learning_rate": 8.335170948421288e-09, "loss": 0.1176, "step": 3514 }, { "epoch": 0.96, "grad_norm": 2.7784693241119385, "learning_rate": 8.221537732719275e-09, "loss": 0.107, "step": 3515 }, { "epoch": 0.96, "grad_norm": 2.8000707626342773, "learning_rate": 8.108681224848091e-09, "loss": 0.1218, "step": 3516 }, { "epoch": 0.96, "grad_norm": 2.9061808586120605, "learning_rate": 7.996601513190704e-09, "loss": 0.1111, "step": 3517 }, { "epoch": 0.96, "grad_norm": 2.769812822341919, "learning_rate": 7.885298685522235e-09, "loss": 0.1137, "step": 3518 }, { "epoch": 0.96, "grad_norm": 3.251732110977173, "learning_rate": 7.774772829008847e-09, "loss": 0.1348, "step": 3519 }, { "epoch": 0.96, "grad_norm": 2.7810757160186768, "learning_rate": 7.665024030208633e-09, "loss": 0.1214, "step": 3520 }, { "epoch": 0.96, "grad_norm": 2.72450590133667, "learning_rate": 7.556052375070954e-09, "loss": 0.1058, "step": 3521 }, { "epoch": 0.96, "grad_norm": 2.5659492015838623, "learning_rate": 7.447857948936654e-09, "loss": 0.1089, "step": 3522 }, { "epoch": 0.96, "grad_norm": 2.7431042194366455, "learning_rate": 7.340440836537731e-09, "loss": 0.1078, "step": 3523 }, { "epoch": 0.96, "grad_norm": 2.6699774265289307, "learning_rate": 7.2338011219973405e-09, "loss": 0.1102, "step": 3524 }, { "epoch": 0.96, "grad_norm": 2.532212734222412, "learning_rate": 7.1279388888303425e-09, "loss": 0.094, "step": 3525 }, { "epoch": 0.96, "grad_norm": 2.7536232471466064, "learning_rate": 7.022854219942198e-09, "loss": 0.1141, "step": 3526 }, { "epoch": 0.96, "grad_norm": 2.757707118988037, "learning_rate": 6.9185471976296314e-09, "loss": 0.1162, "step": 3527 }, { "epoch": 0.96, "grad_norm": 2.800217628479004, "learning_rate": 6.8150179035803e-09, "loss": 0.1091, "step": 3528 }, { "epoch": 0.96, "grad_norm": 2.950460195541382, "learning_rate": 6.712266418872792e-09, "loss": 0.125, "step": 3529 }, { "epoch": 0.96, "grad_norm": 2.503347873687744, "learning_rate": 6.610292823976627e-09, "loss": 0.0961, "step": 3530 }, { "epoch": 0.96, "grad_norm": 2.7922205924987793, "learning_rate": 6.509097198752144e-09, "loss": 0.1141, "step": 3531 }, { "epoch": 0.96, "grad_norm": 2.7933483123779297, "learning_rate": 6.408679622450064e-09, "loss": 0.1178, "step": 3532 }, { "epoch": 0.97, "grad_norm": 2.8715362548828125, "learning_rate": 6.309040173712366e-09, "loss": 0.1126, "step": 3533 }, { "epoch": 0.97, "grad_norm": 2.7140932083129883, "learning_rate": 6.210178930571186e-09, "loss": 0.1133, "step": 3534 }, { "epoch": 0.97, "grad_norm": 2.7340691089630127, "learning_rate": 6.11209597044926e-09, "loss": 0.1003, "step": 3535 }, { "epoch": 0.97, "grad_norm": 2.64202618598938, "learning_rate": 6.0147913701601436e-09, "loss": 0.1133, "step": 3536 }, { "epoch": 0.97, "grad_norm": 2.721435308456421, "learning_rate": 5.918265205907547e-09, "loss": 0.1208, "step": 3537 }, { "epoch": 0.97, "grad_norm": 2.8083791732788086, "learning_rate": 5.822517553285444e-09, "loss": 0.1217, "step": 3538 }, { "epoch": 0.97, "grad_norm": 2.845024347305298, "learning_rate": 5.7275484872783e-09, "loss": 0.1258, "step": 3539 }, { "epoch": 0.97, "grad_norm": 2.8469386100769043, "learning_rate": 5.633358082260953e-09, "loss": 0.1186, "step": 3540 }, { "epoch": 0.97, "grad_norm": 2.926581859588623, "learning_rate": 5.539946411998286e-09, "loss": 0.1174, "step": 3541 }, { "epoch": 0.97, "grad_norm": 3.118927478790283, "learning_rate": 5.447313549645116e-09, "loss": 0.1265, "step": 3542 }, { "epoch": 0.97, "grad_norm": 2.5963892936706543, "learning_rate": 5.3554595677467455e-09, "loss": 0.1034, "step": 3543 }, { "epoch": 0.97, "grad_norm": 2.7718214988708496, "learning_rate": 5.264384538238187e-09, "loss": 0.1155, "step": 3544 }, { "epoch": 0.97, "grad_norm": 2.664069414138794, "learning_rate": 5.174088532444609e-09, "loss": 0.1048, "step": 3545 }, { "epoch": 0.97, "grad_norm": 2.590613842010498, "learning_rate": 5.084571621080891e-09, "loss": 0.1121, "step": 3546 }, { "epoch": 0.97, "grad_norm": 2.689129114151001, "learning_rate": 4.995833874252064e-09, "loss": 0.1129, "step": 3547 }, { "epoch": 0.97, "grad_norm": 3.0058634281158447, "learning_rate": 4.907875361452762e-09, "loss": 0.1259, "step": 3548 }, { "epoch": 0.97, "grad_norm": 3.1234350204467773, "learning_rate": 4.820696151567105e-09, "loss": 0.1205, "step": 3549 }, { "epoch": 0.97, "grad_norm": 2.5136115550994873, "learning_rate": 4.734296312869479e-09, "loss": 0.095, "step": 3550 }, { "epoch": 0.97, "grad_norm": 2.9749624729156494, "learning_rate": 4.648675913023648e-09, "loss": 0.1194, "step": 3551 }, { "epoch": 0.97, "grad_norm": 2.788620948791504, "learning_rate": 4.563835019082751e-09, "loss": 0.1075, "step": 3552 }, { "epoch": 0.97, "grad_norm": 2.709120273590088, "learning_rate": 4.479773697489642e-09, "loss": 0.1075, "step": 3553 }, { "epoch": 0.97, "grad_norm": 2.753480911254883, "learning_rate": 4.396492014076769e-09, "loss": 0.1168, "step": 3554 }, { "epoch": 0.97, "grad_norm": 2.555407762527466, "learning_rate": 4.31399003406574e-09, "loss": 0.105, "step": 3555 }, { "epoch": 0.97, "grad_norm": 2.8909194469451904, "learning_rate": 4.23226782206787e-09, "loss": 0.106, "step": 3556 }, { "epoch": 0.97, "grad_norm": 2.591259002685547, "learning_rate": 4.15132544208352e-09, "loss": 0.1094, "step": 3557 }, { "epoch": 0.97, "grad_norm": 2.816685914993286, "learning_rate": 4.071162957502428e-09, "loss": 0.1203, "step": 3558 }, { "epoch": 0.97, "grad_norm": 2.5624732971191406, "learning_rate": 3.991780431103597e-09, "loss": 0.1138, "step": 3559 }, { "epoch": 0.97, "grad_norm": 2.464156150817871, "learning_rate": 3.913177925055189e-09, "loss": 0.0939, "step": 3560 }, { "epoch": 0.97, "grad_norm": 2.878142833709717, "learning_rate": 3.835355500914405e-09, "loss": 0.1185, "step": 3561 }, { "epoch": 0.97, "grad_norm": 2.9618802070617676, "learning_rate": 3.758313219627718e-09, "loss": 0.1303, "step": 3562 }, { "epoch": 0.97, "grad_norm": 2.6873321533203125, "learning_rate": 3.682051141530418e-09, "loss": 0.1128, "step": 3563 }, { "epoch": 0.97, "grad_norm": 2.9435203075408936, "learning_rate": 3.606569326346842e-09, "loss": 0.1114, "step": 3564 }, { "epoch": 0.97, "grad_norm": 2.599876642227173, "learning_rate": 3.531867833190483e-09, "loss": 0.1017, "step": 3565 }, { "epoch": 0.97, "grad_norm": 2.731457471847534, "learning_rate": 3.4579467205634315e-09, "loss": 0.1232, "step": 3566 }, { "epoch": 0.97, "grad_norm": 2.5069997310638428, "learning_rate": 3.384806046356714e-09, "loss": 0.1017, "step": 3567 }, { "epoch": 0.97, "grad_norm": 2.9809789657592773, "learning_rate": 3.3124458678503996e-09, "loss": 0.1224, "step": 3568 }, { "epoch": 0.97, "grad_norm": 2.742586612701416, "learning_rate": 3.240866241712825e-09, "loss": 0.1144, "step": 3569 }, { "epoch": 0.98, "grad_norm": 2.6126275062561035, "learning_rate": 3.1700672240014825e-09, "loss": 0.1083, "step": 3570 }, { "epoch": 0.98, "grad_norm": 2.58613657951355, "learning_rate": 3.100048870162353e-09, "loss": 0.1104, "step": 3571 }, { "epoch": 0.98, "grad_norm": 3.0407299995422363, "learning_rate": 3.0308112350301284e-09, "loss": 0.1241, "step": 3572 }, { "epoch": 0.98, "grad_norm": 2.790891647338867, "learning_rate": 2.9623543728279908e-09, "loss": 0.113, "step": 3573 }, { "epoch": 0.98, "grad_norm": 2.999558210372925, "learning_rate": 2.894678337167611e-09, "loss": 0.1163, "step": 3574 }, { "epoch": 0.98, "grad_norm": 2.754976511001587, "learning_rate": 2.827783181049259e-09, "loss": 0.1083, "step": 3575 }, { "epoch": 0.98, "grad_norm": 2.7892959117889404, "learning_rate": 2.7616689568616957e-09, "loss": 0.1154, "step": 3576 }, { "epoch": 0.98, "grad_norm": 2.6803035736083984, "learning_rate": 2.696335716382059e-09, "loss": 0.1106, "step": 3577 }, { "epoch": 0.98, "grad_norm": 2.6981587409973145, "learning_rate": 2.6317835107757535e-09, "loss": 0.111, "step": 3578 }, { "epoch": 0.98, "grad_norm": 2.82981276512146, "learning_rate": 2.5680123905966745e-09, "loss": 0.125, "step": 3579 }, { "epoch": 0.98, "grad_norm": 2.6334357261657715, "learning_rate": 2.5050224057868716e-09, "loss": 0.1105, "step": 3580 }, { "epoch": 0.98, "grad_norm": 2.6155431270599365, "learning_rate": 2.4428136056768856e-09, "loss": 0.1023, "step": 3581 }, { "epoch": 0.98, "grad_norm": 2.829718828201294, "learning_rate": 2.3813860389853004e-09, "loss": 0.111, "step": 3582 }, { "epoch": 0.98, "grad_norm": 2.755837917327881, "learning_rate": 2.320739753818746e-09, "loss": 0.1192, "step": 3583 }, { "epoch": 0.98, "grad_norm": 2.7120723724365234, "learning_rate": 2.260874797672341e-09, "loss": 0.108, "step": 3584 }, { "epoch": 0.98, "grad_norm": 2.784351110458374, "learning_rate": 2.2017912174289164e-09, "loss": 0.0985, "step": 3585 }, { "epoch": 0.98, "grad_norm": 2.7912638187408447, "learning_rate": 2.1434890593596823e-09, "loss": 0.1152, "step": 3586 }, { "epoch": 0.98, "grad_norm": 2.814800262451172, "learning_rate": 2.0859683691238916e-09, "loss": 0.1122, "step": 3587 }, { "epoch": 0.98, "grad_norm": 2.7484853267669678, "learning_rate": 2.0292291917684e-09, "loss": 0.1215, "step": 3588 }, { "epoch": 0.98, "grad_norm": 2.751107931137085, "learning_rate": 1.973271571728441e-09, "loss": 0.1075, "step": 3589 }, { "epoch": 0.98, "grad_norm": 2.893585681915283, "learning_rate": 1.9180955528270705e-09, "loss": 0.1162, "step": 3590 }, { "epoch": 0.98, "grad_norm": 2.7889368534088135, "learning_rate": 1.8637011782751675e-09, "loss": 0.1089, "step": 3591 }, { "epoch": 0.98, "grad_norm": 2.986337184906006, "learning_rate": 1.8100884906714353e-09, "loss": 0.1218, "step": 3592 }, { "epoch": 0.98, "grad_norm": 2.7190985679626465, "learning_rate": 1.7572575320023987e-09, "loss": 0.1069, "step": 3593 }, { "epoch": 0.98, "grad_norm": 2.880392551422119, "learning_rate": 1.705208343642739e-09, "loss": 0.1255, "step": 3594 }, { "epoch": 0.98, "grad_norm": 2.922776460647583, "learning_rate": 1.6539409663542947e-09, "loss": 0.1243, "step": 3595 }, { "epoch": 0.98, "grad_norm": 2.5914292335510254, "learning_rate": 1.6034554402870603e-09, "loss": 0.107, "step": 3596 }, { "epoch": 0.98, "grad_norm": 2.5544075965881348, "learning_rate": 1.5537518049785204e-09, "loss": 0.1048, "step": 3597 }, { "epoch": 0.98, "grad_norm": 2.5028624534606934, "learning_rate": 1.504830099353982e-09, "loss": 0.1016, "step": 3598 }, { "epoch": 0.98, "grad_norm": 2.731548309326172, "learning_rate": 1.4566903617263537e-09, "loss": 0.1201, "step": 3599 }, { "epoch": 0.98, "grad_norm": 2.51127552986145, "learning_rate": 1.409332629795923e-09, "loss": 0.108, "step": 3600 }, { "epoch": 0.98, "grad_norm": 2.9954733848571777, "learning_rate": 1.3627569406509109e-09, "loss": 0.1191, "step": 3601 }, { "epoch": 0.98, "grad_norm": 2.8282675743103027, "learning_rate": 1.316963330766807e-09, "loss": 0.1226, "step": 3602 }, { "epoch": 0.98, "grad_norm": 2.9065797328948975, "learning_rate": 1.2719518360068127e-09, "loss": 0.1172, "step": 3603 }, { "epoch": 0.98, "grad_norm": 2.609205484390259, "learning_rate": 1.227722491621397e-09, "loss": 0.1067, "step": 3604 }, { "epoch": 0.98, "grad_norm": 2.8958191871643066, "learning_rate": 1.18427533224863e-09, "loss": 0.1272, "step": 3605 }, { "epoch": 0.99, "grad_norm": 3.1069350242614746, "learning_rate": 1.1416103919141828e-09, "loss": 0.1145, "step": 3606 }, { "epoch": 0.99, "grad_norm": 3.0344345569610596, "learning_rate": 1.0997277040306619e-09, "loss": 0.1246, "step": 3607 }, { "epoch": 0.99, "grad_norm": 2.7096669673919678, "learning_rate": 1.058627301398607e-09, "loss": 0.106, "step": 3608 }, { "epoch": 0.99, "grad_norm": 2.5916988849639893, "learning_rate": 1.018309216205493e-09, "loss": 0.1099, "step": 3609 }, { "epoch": 0.99, "grad_norm": 2.8465921878814697, "learning_rate": 9.787734800263959e-10, "loss": 0.1218, "step": 3610 }, { "epoch": 0.99, "grad_norm": 2.4942994117736816, "learning_rate": 9.400201238235484e-10, "loss": 0.0934, "step": 3611 }, { "epoch": 0.99, "grad_norm": 2.657019853591919, "learning_rate": 9.020491779464512e-10, "loss": 0.1116, "step": 3612 }, { "epoch": 0.99, "grad_norm": 2.7186219692230225, "learning_rate": 8.64860672131984e-10, "loss": 0.1143, "step": 3613 }, { "epoch": 0.99, "grad_norm": 2.7532966136932373, "learning_rate": 8.284546355041833e-10, "loss": 0.1102, "step": 3614 }, { "epoch": 0.99, "grad_norm": 2.6225197315216064, "learning_rate": 7.928310965742424e-10, "loss": 0.0991, "step": 3615 }, { "epoch": 0.99, "grad_norm": 2.7968692779541016, "learning_rate": 7.579900832407338e-10, "loss": 0.1177, "step": 3616 }, { "epoch": 0.99, "grad_norm": 2.871469020843506, "learning_rate": 7.239316227891645e-10, "loss": 0.1258, "step": 3617 }, { "epoch": 0.99, "grad_norm": 2.570632219314575, "learning_rate": 6.906557418923098e-10, "loss": 0.1111, "step": 3618 }, { "epoch": 0.99, "grad_norm": 2.5789928436279297, "learning_rate": 6.581624666102126e-10, "loss": 0.1092, "step": 3619 }, { "epoch": 0.99, "grad_norm": 2.8352792263031006, "learning_rate": 6.264518223896287e-10, "loss": 0.1321, "step": 3620 }, { "epoch": 0.99, "grad_norm": 2.631747245788574, "learning_rate": 5.955238340648039e-10, "loss": 0.1173, "step": 3621 }, { "epoch": 0.99, "grad_norm": 2.6391682624816895, "learning_rate": 5.653785258568078e-10, "loss": 0.1041, "step": 3622 }, { "epoch": 0.99, "grad_norm": 2.706432342529297, "learning_rate": 5.360159213738669e-10, "loss": 0.1133, "step": 3623 }, { "epoch": 0.99, "grad_norm": 2.7213144302368164, "learning_rate": 5.074360436112535e-10, "loss": 0.1156, "step": 3624 }, { "epoch": 0.99, "grad_norm": 2.980194568634033, "learning_rate": 4.796389149511748e-10, "loss": 0.1339, "step": 3625 }, { "epoch": 0.99, "grad_norm": 2.680392265319824, "learning_rate": 4.526245571627729e-10, "loss": 0.1139, "step": 3626 }, { "epoch": 0.99, "grad_norm": 2.7701408863067627, "learning_rate": 4.2639299140223574e-10, "loss": 0.1123, "step": 3627 }, { "epoch": 0.99, "grad_norm": 2.9241676330566406, "learning_rate": 4.00944238212797e-10, "loss": 0.1376, "step": 3628 }, { "epoch": 0.99, "grad_norm": 2.728848934173584, "learning_rate": 3.7627831752462534e-10, "loss": 0.1079, "step": 3629 }, { "epoch": 0.99, "grad_norm": 2.6433091163635254, "learning_rate": 3.5239524865460224e-10, "loss": 0.1052, "step": 3630 }, { "epoch": 0.99, "grad_norm": 2.6879498958587646, "learning_rate": 3.2929505030676594e-10, "loss": 0.1137, "step": 3631 }, { "epoch": 0.99, "grad_norm": 2.6512322425842285, "learning_rate": 3.0697774057197867e-10, "loss": 0.1084, "step": 3632 }, { "epoch": 0.99, "grad_norm": 2.888759136199951, "learning_rate": 2.854433369278153e-10, "loss": 0.1168, "step": 3633 }, { "epoch": 0.99, "grad_norm": 2.929271697998047, "learning_rate": 2.646918562390077e-10, "loss": 0.1282, "step": 3634 }, { "epoch": 0.99, "grad_norm": 2.8085947036743164, "learning_rate": 2.447233147570005e-10, "loss": 0.1083, "step": 3635 }, { "epoch": 0.99, "grad_norm": 2.3818328380584717, "learning_rate": 2.255377281199511e-10, "loss": 0.0916, "step": 3636 }, { "epoch": 0.99, "grad_norm": 2.786105155944824, "learning_rate": 2.0713511135317386e-10, "loss": 0.1027, "step": 3637 }, { "epoch": 0.99, "grad_norm": 2.8876585960388184, "learning_rate": 1.8951547886858488e-10, "loss": 0.1132, "step": 3638 }, { "epoch": 0.99, "grad_norm": 2.8356857299804688, "learning_rate": 1.7267884446470205e-10, "loss": 0.1197, "step": 3639 }, { "epoch": 0.99, "grad_norm": 2.8191254138946533, "learning_rate": 1.5662522132742218e-10, "loss": 0.128, "step": 3640 }, { "epoch": 0.99, "grad_norm": 2.6730127334594727, "learning_rate": 1.4135462202879977e-10, "loss": 0.1158, "step": 3641 }, { "epoch": 0.99, "grad_norm": 2.982252359390259, "learning_rate": 1.2686705852804625e-10, "loss": 0.1205, "step": 3642 }, { "epoch": 1.0, "grad_norm": 2.9249508380889893, "learning_rate": 1.1316254217119681e-10, "loss": 0.1337, "step": 3643 }, { "epoch": 1.0, "grad_norm": 3.0043392181396484, "learning_rate": 1.0024108369066641e-10, "loss": 0.1161, "step": 3644 }, { "epoch": 1.0, "grad_norm": 2.7565393447875977, "learning_rate": 8.810269320591591e-11, "loss": 0.106, "step": 3645 }, { "epoch": 1.0, "grad_norm": 3.222933530807495, "learning_rate": 7.674738022311888e-11, "loss": 0.1317, "step": 3646 }, { "epoch": 1.0, "grad_norm": 2.6725003719329834, "learning_rate": 6.617515363527282e-11, "loss": 0.107, "step": 3647 }, { "epoch": 1.0, "grad_norm": 2.7961084842681885, "learning_rate": 5.638602172175488e-11, "loss": 0.1191, "step": 3648 }, { "epoch": 1.0, "grad_norm": 2.64941668510437, "learning_rate": 4.737999214898814e-11, "loss": 0.1056, "step": 3649 }, { "epoch": 1.0, "grad_norm": 2.6278815269470215, "learning_rate": 3.91570719699974e-11, "loss": 0.1107, "step": 3650 }, { "epoch": 1.0, "grad_norm": 2.79891037940979, "learning_rate": 3.1717267624520316e-11, "loss": 0.122, "step": 3651 }, { "epoch": 1.0, "grad_norm": 2.6240437030792236, "learning_rate": 2.5060584939118334e-11, "loss": 0.1045, "step": 3652 }, { "epoch": 1.0, "grad_norm": 2.7435734272003174, "learning_rate": 1.9187029126843666e-11, "loss": 0.1113, "step": 3653 }, { "epoch": 1.0, "grad_norm": 2.657651662826538, "learning_rate": 1.4096604787572353e-11, "loss": 0.1108, "step": 3654 }, { "epoch": 1.0, "grad_norm": 2.700277328491211, "learning_rate": 9.789315907893226e-12, "loss": 0.118, "step": 3655 }, { "epoch": 1.0, "grad_norm": 2.825643301010132, "learning_rate": 6.2651658608858795e-12, "loss": 0.1242, "step": 3656 }, { "epoch": 1.0, "grad_norm": 2.6722335815429688, "learning_rate": 3.5241574067867983e-12, "loss": 0.111, "step": 3657 }, { "epoch": 1.0, "grad_norm": 2.6393723487854004, "learning_rate": 1.566292691879134e-12, "loss": 0.1113, "step": 3658 }, { "epoch": 1.0, "grad_norm": 3.331031560897827, "learning_rate": 3.9157324960292783e-13, "loss": 0.1432, "step": 3659 }, { "epoch": 1.0, "grad_norm": 2.9109230041503906, "learning_rate": 0.0, "loss": 0.1082, "step": 3660 }, { "epoch": 1.0, "step": 3660, "total_flos": 1.0073310583974789e+18, "train_loss": 0.12687737517710265, "train_runtime": 6334.926, "train_samples_per_second": 73.964, "train_steps_per_second": 0.578 } ], "logging_steps": 1.0, "max_steps": 3660, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "total_flos": 1.0073310583974789e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }