{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9997680083516993, "eval_steps": 500, "global_step": 8620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023199164830066117, "grad_norm": 74.39870819089624, "learning_rate": 0.0, "loss": 2.1794, "step": 1 }, { "epoch": 0.00046398329660132234, "grad_norm": 112.28468562448423, "learning_rate": 3.1041712554031024e-07, "loss": 2.3896, "step": 2 }, { "epoch": 0.0006959749449019835, "grad_norm": 75.42842213959644, "learning_rate": 4.919995035630432e-07, "loss": 2.0454, "step": 3 }, { "epoch": 0.0009279665932026447, "grad_norm": 105.0533217529224, "learning_rate": 6.208342510806205e-07, "loss": 2.3266, "step": 4 }, { "epoch": 0.0011599582415033058, "grad_norm": 148.58005460638825, "learning_rate": 7.207662449262237e-07, "loss": 2.6506, "step": 5 }, { "epoch": 0.001391949889803967, "grad_norm": 69.77358744845148, "learning_rate": 8.024166291033534e-07, "loss": 1.8992, "step": 6 }, { "epoch": 0.0016239415381046282, "grad_norm": 114.58129896954524, "learning_rate": 8.71451045276563e-07, "loss": 1.137, "step": 7 }, { "epoch": 0.0018559331864052894, "grad_norm": 94.46712989635765, "learning_rate": 9.312513766209306e-07, "loss": 0.9458, "step": 8 }, { "epoch": 0.0020879248347059507, "grad_norm": 137.0968911795173, "learning_rate": 9.839990071260865e-07, "loss": 0.7461, "step": 9 }, { "epoch": 0.0023199164830066117, "grad_norm": 31.144565059150484, "learning_rate": 1.031183370466534e-06, "loss": 0.9004, "step": 10 }, { "epoch": 0.002551908131307273, "grad_norm": 32.870107426052726, "learning_rate": 1.0738668190606527e-06, "loss": 0.9956, "step": 11 }, { "epoch": 0.002783899779607934, "grad_norm": 52.54313022163986, "learning_rate": 1.1128337546436637e-06, "loss": 0.7702, "step": 12 }, { "epoch": 0.0030158914279085954, "grad_norm": 43.63442634279012, "learning_rate": 1.1486798605405537e-06, "loss": 0.8403, "step": 13 }, { "epoch": 0.0032478830762092564, "grad_norm": 18.930724925338343, "learning_rate": 1.1818681708168735e-06, "loss": 0.9008, "step": 14 }, { "epoch": 0.0034798747245099178, "grad_norm": 37.59291122718292, "learning_rate": 1.212765748489267e-06, "loss": 0.8522, "step": 15 }, { "epoch": 0.0037118663728105787, "grad_norm": 771.0884603354021, "learning_rate": 1.241668502161241e-06, "loss": 0.721, "step": 16 }, { "epoch": 0.00394385802111124, "grad_norm": 28.570318092553354, "learning_rate": 1.2688184659337598e-06, "loss": 0.7248, "step": 17 }, { "epoch": 0.0041758496694119015, "grad_norm": 24.426651580387347, "learning_rate": 1.2944161326663965e-06, "loss": 0.8323, "step": 18 }, { "epoch": 0.004407841317712562, "grad_norm": 19.369835379439408, "learning_rate": 1.3186294482267552e-06, "loss": 0.696, "step": 19 }, { "epoch": 0.004639832966013223, "grad_norm": 41.14435097126691, "learning_rate": 1.3416004960068441e-06, "loss": 0.7574, "step": 20 }, { "epoch": 0.004871824614313885, "grad_norm": 13.411501946608672, "learning_rate": 1.3634505488396065e-06, "loss": 0.7499, "step": 21 }, { "epoch": 0.005103816262614546, "grad_norm": 32.892805474385675, "learning_rate": 1.384283944600963e-06, "loss": 0.6824, "step": 22 }, { "epoch": 0.005335807910915207, "grad_norm": 39.84552686658576, "learning_rate": 1.4041910996027212e-06, "loss": 0.4729, "step": 23 }, { "epoch": 0.005567799559215868, "grad_norm": 22.06254779204553, "learning_rate": 1.423250880183974e-06, "loss": 0.6192, "step": 24 }, { "epoch": 0.0057997912075165295, "grad_norm": 37.344375903194816, "learning_rate": 1.4415324898524475e-06, "loss": 0.7357, "step": 25 }, { "epoch": 0.006031782855817191, "grad_norm": 32.31772823142612, "learning_rate": 1.459096986080864e-06, "loss": 0.5671, "step": 26 }, { "epoch": 0.006263774504117851, "grad_norm": 10.92384480057118, "learning_rate": 1.4759985106891297e-06, "loss": 0.6269, "step": 27 }, { "epoch": 0.006495766152418513, "grad_norm": 35.369564086076316, "learning_rate": 1.4922852963571837e-06, "loss": 0.6397, "step": 28 }, { "epoch": 0.006727757800719174, "grad_norm": 18.49217287082658, "learning_rate": 1.5080004964369569e-06, "loss": 0.628, "step": 29 }, { "epoch": 0.0069597494490198355, "grad_norm": 41.13501433825827, "learning_rate": 1.5231828740295771e-06, "loss": 0.6695, "step": 30 }, { "epoch": 0.007191741097320496, "grad_norm": 54.22576765655488, "learning_rate": 1.5378673780327045e-06, "loss": 0.6157, "step": 31 }, { "epoch": 0.007423732745621157, "grad_norm": 16.447372540566636, "learning_rate": 1.5520856277015512e-06, "loss": 0.5254, "step": 32 }, { "epoch": 0.007655724393921819, "grad_norm": 32.63353299704918, "learning_rate": 1.5658663226236957e-06, "loss": 0.6434, "step": 33 }, { "epoch": 0.00788771604222248, "grad_norm": 24.91733224181655, "learning_rate": 1.57923559147407e-06, "loss": 0.5484, "step": 34 }, { "epoch": 0.00811970769052314, "grad_norm": 25.808395796187114, "learning_rate": 1.592217290202787e-06, "loss": 0.5112, "step": 35 }, { "epoch": 0.008351699338823803, "grad_norm": 16.243462702948914, "learning_rate": 1.6048332582067068e-06, "loss": 0.4698, "step": 36 }, { "epoch": 0.008583690987124463, "grad_norm": 23.97628827091189, "learning_rate": 1.6171035393948334e-06, "loss": 0.5316, "step": 37 }, { "epoch": 0.008815682635425124, "grad_norm": 27.040198776241745, "learning_rate": 1.6290465737670655e-06, "loss": 0.485, "step": 38 }, { "epoch": 0.009047674283725786, "grad_norm": 38.2640149689467, "learning_rate": 1.6406793641035967e-06, "loss": 0.5918, "step": 39 }, { "epoch": 0.009279665932026447, "grad_norm": 22.98109068927263, "learning_rate": 1.6520176215471544e-06, "loss": 0.4683, "step": 40 }, { "epoch": 0.009511657580327109, "grad_norm": 48.11308667579086, "learning_rate": 1.6630758932062726e-06, "loss": 0.625, "step": 41 }, { "epoch": 0.00974364922862777, "grad_norm": 33.404519417339436, "learning_rate": 1.6738676743799167e-06, "loss": 0.5841, "step": 42 }, { "epoch": 0.00997564087692843, "grad_norm": 58.61162129240976, "learning_rate": 1.6844055075753218e-06, "loss": 0.4964, "step": 43 }, { "epoch": 0.010207632525229092, "grad_norm": 38.349120521370125, "learning_rate": 1.694701070141273e-06, "loss": 0.5981, "step": 44 }, { "epoch": 0.010439624173529753, "grad_norm": 14.12305267247844, "learning_rate": 1.7047652520523101e-06, "loss": 0.3997, "step": 45 }, { "epoch": 0.010671615821830413, "grad_norm": 51.54573323050249, "learning_rate": 1.7146082251430314e-06, "loss": 0.5963, "step": 46 }, { "epoch": 0.010903607470131076, "grad_norm": 39.1682679389499, "learning_rate": 1.7242395048960248e-06, "loss": 0.5537, "step": 47 }, { "epoch": 0.011135599118431736, "grad_norm": 46.60587716219377, "learning_rate": 1.7336680057242842e-06, "loss": 0.6186, "step": 48 }, { "epoch": 0.011367590766732398, "grad_norm": 34.28832247434921, "learning_rate": 1.742902090553126e-06, "loss": 0.4981, "step": 49 }, { "epoch": 0.011599582415033059, "grad_norm": 36.911818320913, "learning_rate": 1.7519496153927577e-06, "loss": 0.5478, "step": 50 }, { "epoch": 0.01183157406333372, "grad_norm": 19.20406082351278, "learning_rate": 1.760817969496803e-06, "loss": 0.4656, "step": 51 }, { "epoch": 0.012063565711634382, "grad_norm": 25.02534356933273, "learning_rate": 1.7695141116211742e-06, "loss": 0.3916, "step": 52 }, { "epoch": 0.012295557359935042, "grad_norm": 18.593964365810773, "learning_rate": 1.7780446028290557e-06, "loss": 0.4645, "step": 53 }, { "epoch": 0.012527549008235703, "grad_norm": 17.886039648783353, "learning_rate": 1.7864156362294398e-06, "loss": 0.4096, "step": 54 }, { "epoch": 0.012759540656536365, "grad_norm": 34.88545271696248, "learning_rate": 1.7946330639868765e-06, "loss": 0.642, "step": 55 }, { "epoch": 0.012991532304837025, "grad_norm": 18.784349156418234, "learning_rate": 1.802702421897494e-06, "loss": 0.4415, "step": 56 }, { "epoch": 0.013223523953137688, "grad_norm": 37.50634451781366, "learning_rate": 1.8106289517897987e-06, "loss": 0.3873, "step": 57 }, { "epoch": 0.013455515601438348, "grad_norm": 16.880603630761, "learning_rate": 1.818417621977267e-06, "loss": 0.5032, "step": 58 }, { "epoch": 0.013687507249739009, "grad_norm": 17.02585172512492, "learning_rate": 1.8260731459625882e-06, "loss": 0.4495, "step": 59 }, { "epoch": 0.013919498898039671, "grad_norm": 107.35379091818216, "learning_rate": 1.8335999995698872e-06, "loss": 0.696, "step": 60 }, { "epoch": 0.014151490546340332, "grad_norm": 19.40508069322086, "learning_rate": 1.8410024366608638e-06, "loss": 0.4458, "step": 61 }, { "epoch": 0.014383482194640992, "grad_norm": 23.044132880786158, "learning_rate": 1.8482845035730148e-06, "loss": 0.5393, "step": 62 }, { "epoch": 0.014615473842941654, "grad_norm": 28.654552405092105, "learning_rate": 1.8554500524026495e-06, "loss": 0.5256, "step": 63 }, { "epoch": 0.014847465491242315, "grad_norm": 48.577148536807755, "learning_rate": 1.8625027532418612e-06, "loss": 0.5776, "step": 64 }, { "epoch": 0.015079457139542977, "grad_norm": 21.245792672313023, "learning_rate": 1.869446105466777e-06, "loss": 0.4652, "step": 65 }, { "epoch": 0.015311448787843638, "grad_norm": 71.60793690174401, "learning_rate": 1.8762834481640057e-06, "loss": 0.5653, "step": 66 }, { "epoch": 0.015543440436144298, "grad_norm": 26.128541484591057, "learning_rate": 1.8830179697730491e-06, "loss": 0.535, "step": 67 }, { "epoch": 0.01577543208444496, "grad_norm": 33.64066732768441, "learning_rate": 1.8896527170143803e-06, "loss": 0.5011, "step": 68 }, { "epoch": 0.01600742373274562, "grad_norm": 26.88917149800284, "learning_rate": 1.8961906031657644e-06, "loss": 0.5581, "step": 69 }, { "epoch": 0.01623941538104628, "grad_norm": 25.195762814497893, "learning_rate": 1.9026344157430974e-06, "loss": 0.4721, "step": 70 }, { "epoch": 0.016471407029346942, "grad_norm": 29.42534723385049, "learning_rate": 1.908986823636446e-06, "loss": 0.5184, "step": 71 }, { "epoch": 0.016703398677647606, "grad_norm": 16.62919699797701, "learning_rate": 1.9152503837470172e-06, "loss": 0.4352, "step": 72 }, { "epoch": 0.016935390325948266, "grad_norm": 19.78067075131713, "learning_rate": 1.921427547166354e-06, "loss": 0.4264, "step": 73 }, { "epoch": 0.017167381974248927, "grad_norm": 26.136176618497988, "learning_rate": 1.927520664935144e-06, "loss": 0.4323, "step": 74 }, { "epoch": 0.017399373622549587, "grad_norm": 20.42370322133567, "learning_rate": 1.9335319934154905e-06, "loss": 0.4796, "step": 75 }, { "epoch": 0.017631365270850248, "grad_norm": 16.995153150338616, "learning_rate": 1.939463699307376e-06, "loss": 0.3959, "step": 76 }, { "epoch": 0.017863356919150912, "grad_norm": 16.339592308878576, "learning_rate": 1.945317864337216e-06, "loss": 0.3985, "step": 77 }, { "epoch": 0.018095348567451573, "grad_norm": 19.2386041009152, "learning_rate": 1.951096489643907e-06, "loss": 0.4068, "step": 78 }, { "epoch": 0.018327340215752233, "grad_norm": 23.57524018997069, "learning_rate": 1.9568014998854824e-06, "loss": 0.3625, "step": 79 }, { "epoch": 0.018559331864052894, "grad_norm": 32.00958631342296, "learning_rate": 1.9624347470874646e-06, "loss": 0.4652, "step": 80 }, { "epoch": 0.018791323512353554, "grad_norm": 29.277000871097183, "learning_rate": 1.967998014252173e-06, "loss": 0.3404, "step": 81 }, { "epoch": 0.019023315160654218, "grad_norm": 26.679579757688188, "learning_rate": 1.973493018746583e-06, "loss": 0.5614, "step": 82 }, { "epoch": 0.01925530680895488, "grad_norm": 34.03519083482874, "learning_rate": 1.9789214154848464e-06, "loss": 0.4527, "step": 83 }, { "epoch": 0.01948729845725554, "grad_norm": 9.307613839376076, "learning_rate": 1.984284799920227e-06, "loss": 0.3238, "step": 84 }, { "epoch": 0.0197192901055562, "grad_norm": 28.17577070445086, "learning_rate": 1.989584710859984e-06, "loss": 0.584, "step": 85 }, { "epoch": 0.01995128175385686, "grad_norm": 32.013482960486684, "learning_rate": 1.994822633115632e-06, "loss": 0.4381, "step": 86 }, { "epoch": 0.02018327340215752, "grad_norm": 26.285276118944726, "learning_rate": 2e-06, "loss": 0.3722, "step": 87 }, { "epoch": 0.020415265050458185, "grad_norm": 37.319181949930496, "learning_rate": 2e-06, "loss": 0.4557, "step": 88 }, { "epoch": 0.020647256698758845, "grad_norm": 22.094760938419988, "learning_rate": 2e-06, "loss": 0.3266, "step": 89 }, { "epoch": 0.020879248347059506, "grad_norm": 25.245767362643576, "learning_rate": 2e-06, "loss": 0.3231, "step": 90 }, { "epoch": 0.021111239995360166, "grad_norm": 21.175594320556343, "learning_rate": 2e-06, "loss": 0.4793, "step": 91 }, { "epoch": 0.021343231643660827, "grad_norm": 34.0650426085687, "learning_rate": 2e-06, "loss": 0.3412, "step": 92 }, { "epoch": 0.02157522329196149, "grad_norm": 18.241508100008726, "learning_rate": 2e-06, "loss": 0.4224, "step": 93 }, { "epoch": 0.02180721494026215, "grad_norm": 60.461910401186756, "learning_rate": 2e-06, "loss": 0.3686, "step": 94 }, { "epoch": 0.022039206588562812, "grad_norm": 30.838743569237803, "learning_rate": 2e-06, "loss": 0.4954, "step": 95 }, { "epoch": 0.022271198236863472, "grad_norm": 18.870087576100286, "learning_rate": 2e-06, "loss": 0.4342, "step": 96 }, { "epoch": 0.022503189885164133, "grad_norm": 17.830426935702803, "learning_rate": 2e-06, "loss": 0.3998, "step": 97 }, { "epoch": 0.022735181533464797, "grad_norm": 22.72931222482498, "learning_rate": 2e-06, "loss": 0.4001, "step": 98 }, { "epoch": 0.022967173181765457, "grad_norm": 28.15915823308931, "learning_rate": 2e-06, "loss": 0.3818, "step": 99 }, { "epoch": 0.023199164830066118, "grad_norm": 19.509920544774893, "learning_rate": 2e-06, "loss": 0.3897, "step": 100 }, { "epoch": 0.02343115647836678, "grad_norm": 22.200505904185203, "learning_rate": 2e-06, "loss": 0.3631, "step": 101 }, { "epoch": 0.02366314812666744, "grad_norm": 46.8337504969018, "learning_rate": 2e-06, "loss": 0.4703, "step": 102 }, { "epoch": 0.023895139774968103, "grad_norm": 30.52389628493483, "learning_rate": 2e-06, "loss": 0.4831, "step": 103 }, { "epoch": 0.024127131423268763, "grad_norm": 33.49480618399563, "learning_rate": 2e-06, "loss": 0.4041, "step": 104 }, { "epoch": 0.024359123071569424, "grad_norm": 50.81297884729123, "learning_rate": 2e-06, "loss": 0.6375, "step": 105 }, { "epoch": 0.024591114719870084, "grad_norm": 17.464920058894066, "learning_rate": 2e-06, "loss": 0.3552, "step": 106 }, { "epoch": 0.024823106368170745, "grad_norm": 24.45022223936699, "learning_rate": 2e-06, "loss": 0.4592, "step": 107 }, { "epoch": 0.025055098016471405, "grad_norm": 19.966546967508155, "learning_rate": 2e-06, "loss": 0.4533, "step": 108 }, { "epoch": 0.02528708966477207, "grad_norm": 28.450205806954816, "learning_rate": 2e-06, "loss": 0.4727, "step": 109 }, { "epoch": 0.02551908131307273, "grad_norm": 41.13916073966549, "learning_rate": 2e-06, "loss": 0.497, "step": 110 }, { "epoch": 0.02575107296137339, "grad_norm": 16.112754647057017, "learning_rate": 2e-06, "loss": 0.3844, "step": 111 }, { "epoch": 0.02598306460967405, "grad_norm": 28.162277990599502, "learning_rate": 2e-06, "loss": 0.4953, "step": 112 }, { "epoch": 0.02621505625797471, "grad_norm": 23.758505464030357, "learning_rate": 2e-06, "loss": 0.3308, "step": 113 }, { "epoch": 0.026447047906275375, "grad_norm": 32.06452268639564, "learning_rate": 2e-06, "loss": 0.4046, "step": 114 }, { "epoch": 0.026679039554576036, "grad_norm": 17.30981532939412, "learning_rate": 2e-06, "loss": 0.3985, "step": 115 }, { "epoch": 0.026911031202876697, "grad_norm": 33.43471126599173, "learning_rate": 2e-06, "loss": 0.4519, "step": 116 }, { "epoch": 0.027143022851177357, "grad_norm": 27.51227040876881, "learning_rate": 2e-06, "loss": 0.4136, "step": 117 }, { "epoch": 0.027375014499478018, "grad_norm": 23.974504782175565, "learning_rate": 2e-06, "loss": 0.477, "step": 118 }, { "epoch": 0.02760700614777868, "grad_norm": 30.741717997138643, "learning_rate": 2e-06, "loss": 0.4243, "step": 119 }, { "epoch": 0.027838997796079342, "grad_norm": 11.972532620974395, "learning_rate": 2e-06, "loss": 0.2537, "step": 120 }, { "epoch": 0.028070989444380003, "grad_norm": 11.808199219768877, "learning_rate": 2e-06, "loss": 0.3055, "step": 121 }, { "epoch": 0.028302981092680663, "grad_norm": 41.16658965347815, "learning_rate": 2e-06, "loss": 0.446, "step": 122 }, { "epoch": 0.028534972740981324, "grad_norm": 36.30280128025487, "learning_rate": 2e-06, "loss": 0.3894, "step": 123 }, { "epoch": 0.028766964389281984, "grad_norm": 38.95823329601204, "learning_rate": 2e-06, "loss": 0.4648, "step": 124 }, { "epoch": 0.028998956037582648, "grad_norm": 22.228812125565018, "learning_rate": 2e-06, "loss": 0.4194, "step": 125 }, { "epoch": 0.02923094768588331, "grad_norm": 15.979110407781146, "learning_rate": 2e-06, "loss": 0.4041, "step": 126 }, { "epoch": 0.02946293933418397, "grad_norm": 37.18105571848787, "learning_rate": 2e-06, "loss": 0.4889, "step": 127 }, { "epoch": 0.02969493098248463, "grad_norm": 30.857636960509943, "learning_rate": 2e-06, "loss": 0.4519, "step": 128 }, { "epoch": 0.02992692263078529, "grad_norm": 39.61185636984746, "learning_rate": 2e-06, "loss": 0.3358, "step": 129 }, { "epoch": 0.030158914279085954, "grad_norm": 42.498140804000236, "learning_rate": 2e-06, "loss": 0.5417, "step": 130 }, { "epoch": 0.030390905927386615, "grad_norm": 10.908948081927463, "learning_rate": 2e-06, "loss": 0.3374, "step": 131 }, { "epoch": 0.030622897575687275, "grad_norm": 14.806615866444728, "learning_rate": 2e-06, "loss": 0.3544, "step": 132 }, { "epoch": 0.030854889223987936, "grad_norm": 39.602376233592565, "learning_rate": 2e-06, "loss": 0.5983, "step": 133 }, { "epoch": 0.031086880872288596, "grad_norm": 25.46432735816376, "learning_rate": 2e-06, "loss": 0.3938, "step": 134 }, { "epoch": 0.03131887252058926, "grad_norm": 23.611133494637883, "learning_rate": 2e-06, "loss": 0.4045, "step": 135 }, { "epoch": 0.03155086416888992, "grad_norm": 20.360702542186612, "learning_rate": 2e-06, "loss": 0.3803, "step": 136 }, { "epoch": 0.03178285581719058, "grad_norm": 18.853473807665043, "learning_rate": 2e-06, "loss": 0.3924, "step": 137 }, { "epoch": 0.03201484746549124, "grad_norm": 17.706059410717437, "learning_rate": 2e-06, "loss": 0.3616, "step": 138 }, { "epoch": 0.032246839113791906, "grad_norm": 25.910709281503944, "learning_rate": 2e-06, "loss": 0.4097, "step": 139 }, { "epoch": 0.03247883076209256, "grad_norm": 18.78408165355993, "learning_rate": 2e-06, "loss": 0.4219, "step": 140 }, { "epoch": 0.03271082241039323, "grad_norm": 17.592827541949497, "learning_rate": 2e-06, "loss": 0.4167, "step": 141 }, { "epoch": 0.032942814058693884, "grad_norm": 20.38480421675272, "learning_rate": 2e-06, "loss": 0.3778, "step": 142 }, { "epoch": 0.03317480570699455, "grad_norm": 26.556214891507235, "learning_rate": 2e-06, "loss": 0.5647, "step": 143 }, { "epoch": 0.03340679735529521, "grad_norm": 10.280063069710177, "learning_rate": 2e-06, "loss": 0.2938, "step": 144 }, { "epoch": 0.03363878900359587, "grad_norm": 35.07937942331207, "learning_rate": 2e-06, "loss": 0.4143, "step": 145 }, { "epoch": 0.03387078065189653, "grad_norm": 28.537691552524954, "learning_rate": 2e-06, "loss": 0.4831, "step": 146 }, { "epoch": 0.03410277230019719, "grad_norm": 16.60729525087867, "learning_rate": 2e-06, "loss": 0.4095, "step": 147 }, { "epoch": 0.034334763948497854, "grad_norm": 12.328955527513653, "learning_rate": 2e-06, "loss": 0.3769, "step": 148 }, { "epoch": 0.03456675559679852, "grad_norm": 18.092020051851453, "learning_rate": 2e-06, "loss": 0.3555, "step": 149 }, { "epoch": 0.034798747245099175, "grad_norm": 44.18114865674143, "learning_rate": 2e-06, "loss": 0.5052, "step": 150 }, { "epoch": 0.03503073889339984, "grad_norm": 16.552193439071686, "learning_rate": 2e-06, "loss": 0.3389, "step": 151 }, { "epoch": 0.035262730541700496, "grad_norm": 28.697477777252256, "learning_rate": 2e-06, "loss": 0.3826, "step": 152 }, { "epoch": 0.03549472219000116, "grad_norm": 9.627827612017324, "learning_rate": 2e-06, "loss": 0.3534, "step": 153 }, { "epoch": 0.035726713838301824, "grad_norm": 18.495594499790304, "learning_rate": 2e-06, "loss": 0.2883, "step": 154 }, { "epoch": 0.03595870548660248, "grad_norm": 14.819731470172004, "learning_rate": 2e-06, "loss": 0.3543, "step": 155 }, { "epoch": 0.036190697134903145, "grad_norm": 30.084238573946536, "learning_rate": 2e-06, "loss": 0.3936, "step": 156 }, { "epoch": 0.0364226887832038, "grad_norm": 38.32707194245598, "learning_rate": 2e-06, "loss": 0.5518, "step": 157 }, { "epoch": 0.036654680431504466, "grad_norm": 30.99662633417196, "learning_rate": 2e-06, "loss": 0.3367, "step": 158 }, { "epoch": 0.03688667207980513, "grad_norm": 29.788479638237202, "learning_rate": 2e-06, "loss": 0.44, "step": 159 }, { "epoch": 0.03711866372810579, "grad_norm": 17.494933531167206, "learning_rate": 2e-06, "loss": 0.3427, "step": 160 }, { "epoch": 0.03735065537640645, "grad_norm": 33.72980029025054, "learning_rate": 2e-06, "loss": 0.3467, "step": 161 }, { "epoch": 0.03758264702470711, "grad_norm": 13.379171231265387, "learning_rate": 2e-06, "loss": 0.3747, "step": 162 }, { "epoch": 0.03781463867300777, "grad_norm": 20.70832977530479, "learning_rate": 2e-06, "loss": 0.3986, "step": 163 }, { "epoch": 0.038046630321308436, "grad_norm": 28.74074583084915, "learning_rate": 2e-06, "loss": 0.3806, "step": 164 }, { "epoch": 0.03827862196960909, "grad_norm": 52.2052448308457, "learning_rate": 2e-06, "loss": 0.4445, "step": 165 }, { "epoch": 0.03851061361790976, "grad_norm": 31.048472582638617, "learning_rate": 2e-06, "loss": 0.4167, "step": 166 }, { "epoch": 0.038742605266210414, "grad_norm": 21.227802893594617, "learning_rate": 2e-06, "loss": 0.4363, "step": 167 }, { "epoch": 0.03897459691451108, "grad_norm": 22.483410466187955, "learning_rate": 2e-06, "loss": 0.3964, "step": 168 }, { "epoch": 0.03920658856281174, "grad_norm": 15.724111111556514, "learning_rate": 2e-06, "loss": 0.3786, "step": 169 }, { "epoch": 0.0394385802111124, "grad_norm": 17.28514355448206, "learning_rate": 2e-06, "loss": 0.3596, "step": 170 }, { "epoch": 0.03967057185941306, "grad_norm": 13.689374632434935, "learning_rate": 2e-06, "loss": 0.283, "step": 171 }, { "epoch": 0.03990256350771372, "grad_norm": 14.577661385441322, "learning_rate": 2e-06, "loss": 0.3097, "step": 172 }, { "epoch": 0.040134555156014384, "grad_norm": 24.861537338293093, "learning_rate": 2e-06, "loss": 0.4145, "step": 173 }, { "epoch": 0.04036654680431504, "grad_norm": 33.91806599011101, "learning_rate": 2e-06, "loss": 0.4137, "step": 174 }, { "epoch": 0.040598538452615705, "grad_norm": 37.330205558404835, "learning_rate": 2e-06, "loss": 0.4001, "step": 175 }, { "epoch": 0.04083053010091637, "grad_norm": 10.75490224381776, "learning_rate": 2e-06, "loss": 0.3346, "step": 176 }, { "epoch": 0.041062521749217026, "grad_norm": 17.920632030971035, "learning_rate": 2e-06, "loss": 0.3957, "step": 177 }, { "epoch": 0.04129451339751769, "grad_norm": 35.06077517129045, "learning_rate": 2e-06, "loss": 0.5442, "step": 178 }, { "epoch": 0.04152650504581835, "grad_norm": 37.373576118845364, "learning_rate": 2e-06, "loss": 0.3909, "step": 179 }, { "epoch": 0.04175849669411901, "grad_norm": 21.774127565938404, "learning_rate": 2e-06, "loss": 0.3612, "step": 180 }, { "epoch": 0.041990488342419675, "grad_norm": 22.310572412193864, "learning_rate": 2e-06, "loss": 0.3541, "step": 181 }, { "epoch": 0.04222247999072033, "grad_norm": 19.91238122930712, "learning_rate": 2e-06, "loss": 0.3518, "step": 182 }, { "epoch": 0.042454471639020996, "grad_norm": 38.82299720583173, "learning_rate": 2e-06, "loss": 0.4455, "step": 183 }, { "epoch": 0.04268646328732165, "grad_norm": 81.78612014143567, "learning_rate": 2e-06, "loss": 0.4527, "step": 184 }, { "epoch": 0.04291845493562232, "grad_norm": 38.671646262718454, "learning_rate": 2e-06, "loss": 0.4736, "step": 185 }, { "epoch": 0.04315044658392298, "grad_norm": 28.292078981601982, "learning_rate": 2e-06, "loss": 0.3465, "step": 186 }, { "epoch": 0.04338243823222364, "grad_norm": 28.201025845524587, "learning_rate": 2e-06, "loss": 0.3274, "step": 187 }, { "epoch": 0.0436144298805243, "grad_norm": 16.502642381982742, "learning_rate": 2e-06, "loss": 0.3552, "step": 188 }, { "epoch": 0.04384642152882496, "grad_norm": 21.502109491213528, "learning_rate": 2e-06, "loss": 0.4031, "step": 189 }, { "epoch": 0.044078413177125624, "grad_norm": 45.395445806829784, "learning_rate": 2e-06, "loss": 0.3891, "step": 190 }, { "epoch": 0.04431040482542629, "grad_norm": 16.772245712643286, "learning_rate": 2e-06, "loss": 0.3811, "step": 191 }, { "epoch": 0.044542396473726945, "grad_norm": 16.484095337228272, "learning_rate": 2e-06, "loss": 0.3893, "step": 192 }, { "epoch": 0.04477438812202761, "grad_norm": 19.347435512156647, "learning_rate": 2e-06, "loss": 0.3649, "step": 193 }, { "epoch": 0.045006379770328266, "grad_norm": 24.383625612454, "learning_rate": 2e-06, "loss": 0.3976, "step": 194 }, { "epoch": 0.04523837141862893, "grad_norm": 16.780375260685737, "learning_rate": 2e-06, "loss": 0.2944, "step": 195 }, { "epoch": 0.045470363066929594, "grad_norm": 16.538828083347266, "learning_rate": 2e-06, "loss": 0.3967, "step": 196 }, { "epoch": 0.04570235471523025, "grad_norm": 25.831467483102088, "learning_rate": 2e-06, "loss": 0.4292, "step": 197 }, { "epoch": 0.045934346363530915, "grad_norm": 29.933106472676695, "learning_rate": 2e-06, "loss": 0.3907, "step": 198 }, { "epoch": 0.04616633801183157, "grad_norm": 34.722557394383415, "learning_rate": 2e-06, "loss": 0.4854, "step": 199 }, { "epoch": 0.046398329660132236, "grad_norm": 23.307072911383692, "learning_rate": 2e-06, "loss": 0.3485, "step": 200 }, { "epoch": 0.0466303213084329, "grad_norm": 44.83081493918162, "learning_rate": 2e-06, "loss": 0.3677, "step": 201 }, { "epoch": 0.04686231295673356, "grad_norm": 17.456496077989364, "learning_rate": 2e-06, "loss": 0.2696, "step": 202 }, { "epoch": 0.04709430460503422, "grad_norm": 47.09280879290812, "learning_rate": 2e-06, "loss": 0.4629, "step": 203 }, { "epoch": 0.04732629625333488, "grad_norm": 28.03807838243593, "learning_rate": 2e-06, "loss": 0.4508, "step": 204 }, { "epoch": 0.04755828790163554, "grad_norm": 22.95656727807171, "learning_rate": 2e-06, "loss": 0.5087, "step": 205 }, { "epoch": 0.047790279549936206, "grad_norm": 19.916633250635453, "learning_rate": 2e-06, "loss": 0.3502, "step": 206 }, { "epoch": 0.04802227119823686, "grad_norm": 18.429230451297414, "learning_rate": 2e-06, "loss": 0.3992, "step": 207 }, { "epoch": 0.04825426284653753, "grad_norm": 19.146947435265297, "learning_rate": 2e-06, "loss": 0.37, "step": 208 }, { "epoch": 0.048486254494838184, "grad_norm": 19.635638653510593, "learning_rate": 2e-06, "loss": 0.4428, "step": 209 }, { "epoch": 0.04871824614313885, "grad_norm": 16.65678239968762, "learning_rate": 2e-06, "loss": 0.3539, "step": 210 }, { "epoch": 0.048950237791439505, "grad_norm": 12.811312162663347, "learning_rate": 2e-06, "loss": 0.3111, "step": 211 }, { "epoch": 0.04918222943974017, "grad_norm": 26.544754694919302, "learning_rate": 2e-06, "loss": 0.3996, "step": 212 }, { "epoch": 0.04941422108804083, "grad_norm": 27.038312574981394, "learning_rate": 2e-06, "loss": 0.4816, "step": 213 }, { "epoch": 0.04964621273634149, "grad_norm": 19.110034604846202, "learning_rate": 2e-06, "loss": 0.3396, "step": 214 }, { "epoch": 0.049878204384642154, "grad_norm": 17.23770447640579, "learning_rate": 2e-06, "loss": 0.4118, "step": 215 }, { "epoch": 0.05011019603294281, "grad_norm": 25.7082676561769, "learning_rate": 2e-06, "loss": 0.3486, "step": 216 }, { "epoch": 0.050342187681243475, "grad_norm": 29.31762312050998, "learning_rate": 2e-06, "loss": 0.4206, "step": 217 }, { "epoch": 0.05057417932954414, "grad_norm": 28.316750519747554, "learning_rate": 2e-06, "loss": 0.3658, "step": 218 }, { "epoch": 0.050806170977844796, "grad_norm": 18.295835928970263, "learning_rate": 2e-06, "loss": 0.3191, "step": 219 }, { "epoch": 0.05103816262614546, "grad_norm": 22.15390958004895, "learning_rate": 2e-06, "loss": 0.3842, "step": 220 }, { "epoch": 0.05127015427444612, "grad_norm": 31.55727431986, "learning_rate": 2e-06, "loss": 0.4348, "step": 221 }, { "epoch": 0.05150214592274678, "grad_norm": 18.142365032470167, "learning_rate": 2e-06, "loss": 0.3251, "step": 222 }, { "epoch": 0.051734137571047445, "grad_norm": 14.63675771579077, "learning_rate": 2e-06, "loss": 0.3872, "step": 223 }, { "epoch": 0.0519661292193481, "grad_norm": 15.783701599136487, "learning_rate": 2e-06, "loss": 0.4255, "step": 224 }, { "epoch": 0.052198120867648766, "grad_norm": 21.07727010080486, "learning_rate": 2e-06, "loss": 0.3722, "step": 225 }, { "epoch": 0.05243011251594942, "grad_norm": 19.40775575437732, "learning_rate": 2e-06, "loss": 0.3341, "step": 226 }, { "epoch": 0.05266210416425009, "grad_norm": 27.236675635773192, "learning_rate": 2e-06, "loss": 0.2782, "step": 227 }, { "epoch": 0.05289409581255075, "grad_norm": 24.8485428044272, "learning_rate": 2e-06, "loss": 0.4178, "step": 228 }, { "epoch": 0.05312608746085141, "grad_norm": 25.44002982309577, "learning_rate": 2e-06, "loss": 0.4114, "step": 229 }, { "epoch": 0.05335807910915207, "grad_norm": 32.39078707431755, "learning_rate": 2e-06, "loss": 0.3624, "step": 230 }, { "epoch": 0.05359007075745273, "grad_norm": 20.898925225007876, "learning_rate": 2e-06, "loss": 0.3823, "step": 231 }, { "epoch": 0.05382206240575339, "grad_norm": 14.481012835578381, "learning_rate": 2e-06, "loss": 0.2783, "step": 232 }, { "epoch": 0.05405405405405406, "grad_norm": 33.345472766269374, "learning_rate": 2e-06, "loss": 0.4105, "step": 233 }, { "epoch": 0.054286045702354714, "grad_norm": 27.313411652481147, "learning_rate": 2e-06, "loss": 0.3728, "step": 234 }, { "epoch": 0.05451803735065538, "grad_norm": 57.409672982284185, "learning_rate": 2e-06, "loss": 0.2871, "step": 235 }, { "epoch": 0.054750028998956035, "grad_norm": 16.72238049877021, "learning_rate": 2e-06, "loss": 0.3353, "step": 236 }, { "epoch": 0.0549820206472567, "grad_norm": 29.3975251341519, "learning_rate": 2e-06, "loss": 0.4285, "step": 237 }, { "epoch": 0.05521401229555736, "grad_norm": 16.557186007773907, "learning_rate": 2e-06, "loss": 0.3779, "step": 238 }, { "epoch": 0.05544600394385802, "grad_norm": 14.576894842730137, "learning_rate": 2e-06, "loss": 0.2714, "step": 239 }, { "epoch": 0.055677995592158684, "grad_norm": 33.24881434167466, "learning_rate": 2e-06, "loss": 0.4713, "step": 240 }, { "epoch": 0.05590998724045934, "grad_norm": 30.214762606934197, "learning_rate": 2e-06, "loss": 0.3491, "step": 241 }, { "epoch": 0.056141978888760005, "grad_norm": 24.468541329934542, "learning_rate": 2e-06, "loss": 0.3431, "step": 242 }, { "epoch": 0.05637397053706067, "grad_norm": 16.08552961180912, "learning_rate": 2e-06, "loss": 0.4033, "step": 243 }, { "epoch": 0.056605962185361326, "grad_norm": 14.354582913537627, "learning_rate": 2e-06, "loss": 0.3266, "step": 244 }, { "epoch": 0.05683795383366199, "grad_norm": 19.620651208208127, "learning_rate": 2e-06, "loss": 0.3277, "step": 245 }, { "epoch": 0.05706994548196265, "grad_norm": 15.691928454576992, "learning_rate": 2e-06, "loss": 0.3291, "step": 246 }, { "epoch": 0.05730193713026331, "grad_norm": 17.42590199984151, "learning_rate": 2e-06, "loss": 0.3284, "step": 247 }, { "epoch": 0.05753392877856397, "grad_norm": 11.225459850807889, "learning_rate": 2e-06, "loss": 0.2378, "step": 248 }, { "epoch": 0.05776592042686463, "grad_norm": 23.956123325800576, "learning_rate": 2e-06, "loss": 0.4609, "step": 249 }, { "epoch": 0.057997912075165296, "grad_norm": 30.195231126761364, "learning_rate": 2e-06, "loss": 0.3564, "step": 250 }, { "epoch": 0.05822990372346595, "grad_norm": 16.841385249087196, "learning_rate": 2e-06, "loss": 0.4339, "step": 251 }, { "epoch": 0.05846189537176662, "grad_norm": 18.80534936760663, "learning_rate": 2e-06, "loss": 0.3906, "step": 252 }, { "epoch": 0.058693887020067274, "grad_norm": 21.267472826987586, "learning_rate": 2e-06, "loss": 0.4437, "step": 253 }, { "epoch": 0.05892587866836794, "grad_norm": 16.367714475333194, "learning_rate": 2e-06, "loss": 0.3078, "step": 254 }, { "epoch": 0.0591578703166686, "grad_norm": 16.808024362000612, "learning_rate": 2e-06, "loss": 0.4284, "step": 255 }, { "epoch": 0.05938986196496926, "grad_norm": 14.47203839492185, "learning_rate": 2e-06, "loss": 0.3487, "step": 256 }, { "epoch": 0.05962185361326992, "grad_norm": 14.422808168299367, "learning_rate": 2e-06, "loss": 0.3002, "step": 257 }, { "epoch": 0.05985384526157058, "grad_norm": 17.241932663553175, "learning_rate": 2e-06, "loss": 0.3285, "step": 258 }, { "epoch": 0.060085836909871244, "grad_norm": 10.136726922616239, "learning_rate": 2e-06, "loss": 0.2544, "step": 259 }, { "epoch": 0.06031782855817191, "grad_norm": 31.901966098793842, "learning_rate": 2e-06, "loss": 0.3934, "step": 260 }, { "epoch": 0.060549820206472565, "grad_norm": 10.203419621588298, "learning_rate": 2e-06, "loss": 0.2739, "step": 261 }, { "epoch": 0.06078181185477323, "grad_norm": 30.516359662869256, "learning_rate": 2e-06, "loss": 0.4177, "step": 262 }, { "epoch": 0.061013803503073887, "grad_norm": 23.490412449083408, "learning_rate": 2e-06, "loss": 0.3069, "step": 263 }, { "epoch": 0.06124579515137455, "grad_norm": 26.962705108860312, "learning_rate": 2e-06, "loss": 0.488, "step": 264 }, { "epoch": 0.061477786799675214, "grad_norm": 21.8606732946384, "learning_rate": 2e-06, "loss": 0.3353, "step": 265 }, { "epoch": 0.06170977844797587, "grad_norm": 11.84435585441604, "learning_rate": 2e-06, "loss": 0.2634, "step": 266 }, { "epoch": 0.061941770096276536, "grad_norm": 18.759386751316498, "learning_rate": 2e-06, "loss": 0.335, "step": 267 }, { "epoch": 0.06217376174457719, "grad_norm": 10.860341407594653, "learning_rate": 2e-06, "loss": 0.269, "step": 268 }, { "epoch": 0.06240575339287786, "grad_norm": 22.756283905059107, "learning_rate": 2e-06, "loss": 0.4431, "step": 269 }, { "epoch": 0.06263774504117851, "grad_norm": 18.857691106467716, "learning_rate": 2e-06, "loss": 0.4064, "step": 270 }, { "epoch": 0.06286973668947918, "grad_norm": 13.71142172349342, "learning_rate": 2e-06, "loss": 0.3452, "step": 271 }, { "epoch": 0.06310172833777984, "grad_norm": 28.59068887733619, "learning_rate": 2e-06, "loss": 0.4787, "step": 272 }, { "epoch": 0.0633337199860805, "grad_norm": 34.9471101130114, "learning_rate": 2e-06, "loss": 0.4529, "step": 273 }, { "epoch": 0.06356571163438116, "grad_norm": 15.432357523010165, "learning_rate": 2e-06, "loss": 0.302, "step": 274 }, { "epoch": 0.06379770328268182, "grad_norm": 21.176022226286484, "learning_rate": 2e-06, "loss": 0.3524, "step": 275 }, { "epoch": 0.06402969493098248, "grad_norm": 20.398745690881963, "learning_rate": 2e-06, "loss": 0.3602, "step": 276 }, { "epoch": 0.06426168657928315, "grad_norm": 45.61620524073223, "learning_rate": 2e-06, "loss": 0.4475, "step": 277 }, { "epoch": 0.06449367822758381, "grad_norm": 11.778603437990974, "learning_rate": 2e-06, "loss": 0.2865, "step": 278 }, { "epoch": 0.06472566987588446, "grad_norm": 16.05408430375991, "learning_rate": 2e-06, "loss": 0.3002, "step": 279 }, { "epoch": 0.06495766152418513, "grad_norm": 24.413991600204813, "learning_rate": 2e-06, "loss": 0.4776, "step": 280 }, { "epoch": 0.06518965317248579, "grad_norm": 20.658726900866288, "learning_rate": 2e-06, "loss": 0.4699, "step": 281 }, { "epoch": 0.06542164482078645, "grad_norm": 20.214349439391693, "learning_rate": 2e-06, "loss": 0.3172, "step": 282 }, { "epoch": 0.06565363646908712, "grad_norm": 18.253321100048833, "learning_rate": 2e-06, "loss": 0.3053, "step": 283 }, { "epoch": 0.06588562811738777, "grad_norm": 25.27398761329044, "learning_rate": 2e-06, "loss": 0.4358, "step": 284 }, { "epoch": 0.06611761976568843, "grad_norm": 23.617098432941468, "learning_rate": 2e-06, "loss": 0.3992, "step": 285 }, { "epoch": 0.0663496114139891, "grad_norm": 8.904262369286501, "learning_rate": 2e-06, "loss": 0.2789, "step": 286 }, { "epoch": 0.06658160306228976, "grad_norm": 24.24410486299016, "learning_rate": 2e-06, "loss": 0.2762, "step": 287 }, { "epoch": 0.06681359471059042, "grad_norm": 18.83345601805634, "learning_rate": 2e-06, "loss": 0.3888, "step": 288 }, { "epoch": 0.06704558635889107, "grad_norm": 14.660856467155357, "learning_rate": 2e-06, "loss": 0.2764, "step": 289 }, { "epoch": 0.06727757800719174, "grad_norm": 33.45569066237724, "learning_rate": 2e-06, "loss": 0.4414, "step": 290 }, { "epoch": 0.0675095696554924, "grad_norm": 21.82220797728714, "learning_rate": 2e-06, "loss": 0.3749, "step": 291 }, { "epoch": 0.06774156130379307, "grad_norm": 26.735744320549216, "learning_rate": 2e-06, "loss": 0.3758, "step": 292 }, { "epoch": 0.06797355295209373, "grad_norm": 19.018016510002802, "learning_rate": 2e-06, "loss": 0.3474, "step": 293 }, { "epoch": 0.06820554460039438, "grad_norm": 20.773279197240708, "learning_rate": 2e-06, "loss": 0.292, "step": 294 }, { "epoch": 0.06843753624869504, "grad_norm": 21.652882354360884, "learning_rate": 2e-06, "loss": 0.4562, "step": 295 }, { "epoch": 0.06866952789699571, "grad_norm": 23.18839543070114, "learning_rate": 2e-06, "loss": 0.4007, "step": 296 }, { "epoch": 0.06890151954529637, "grad_norm": 20.933515687339632, "learning_rate": 2e-06, "loss": 0.3702, "step": 297 }, { "epoch": 0.06913351119359704, "grad_norm": 17.835747475594196, "learning_rate": 2e-06, "loss": 0.2801, "step": 298 }, { "epoch": 0.06936550284189769, "grad_norm": 17.448326380657978, "learning_rate": 2e-06, "loss": 0.3179, "step": 299 }, { "epoch": 0.06959749449019835, "grad_norm": 26.014972961257733, "learning_rate": 2e-06, "loss": 0.3853, "step": 300 }, { "epoch": 0.06982948613849901, "grad_norm": 25.590774728334065, "learning_rate": 2e-06, "loss": 0.3799, "step": 301 }, { "epoch": 0.07006147778679968, "grad_norm": 27.64512941308947, "learning_rate": 2e-06, "loss": 0.4052, "step": 302 }, { "epoch": 0.07029346943510034, "grad_norm": 30.313882980750208, "learning_rate": 2e-06, "loss": 0.4532, "step": 303 }, { "epoch": 0.07052546108340099, "grad_norm": 6.010401569300229, "learning_rate": 2e-06, "loss": 0.265, "step": 304 }, { "epoch": 0.07075745273170166, "grad_norm": 13.149785684164643, "learning_rate": 2e-06, "loss": 0.3219, "step": 305 }, { "epoch": 0.07098944438000232, "grad_norm": 11.675290474453474, "learning_rate": 2e-06, "loss": 0.2681, "step": 306 }, { "epoch": 0.07122143602830298, "grad_norm": 23.30421897740761, "learning_rate": 2e-06, "loss": 0.4489, "step": 307 }, { "epoch": 0.07145342767660365, "grad_norm": 17.335889262546875, "learning_rate": 2e-06, "loss": 0.3924, "step": 308 }, { "epoch": 0.0716854193249043, "grad_norm": 19.63995512405009, "learning_rate": 2e-06, "loss": 0.3371, "step": 309 }, { "epoch": 0.07191741097320496, "grad_norm": 24.18936339860376, "learning_rate": 2e-06, "loss": 0.5017, "step": 310 }, { "epoch": 0.07214940262150563, "grad_norm": 19.796051503412293, "learning_rate": 2e-06, "loss": 0.3462, "step": 311 }, { "epoch": 0.07238139426980629, "grad_norm": 24.245217740356853, "learning_rate": 2e-06, "loss": 0.3896, "step": 312 }, { "epoch": 0.07261338591810695, "grad_norm": 12.613025856749827, "learning_rate": 2e-06, "loss": 0.3432, "step": 313 }, { "epoch": 0.0728453775664076, "grad_norm": 13.186064582017874, "learning_rate": 2e-06, "loss": 0.3445, "step": 314 }, { "epoch": 0.07307736921470827, "grad_norm": 19.879322945226193, "learning_rate": 2e-06, "loss": 0.3519, "step": 315 }, { "epoch": 0.07330936086300893, "grad_norm": 15.234452095839453, "learning_rate": 2e-06, "loss": 0.3882, "step": 316 }, { "epoch": 0.0735413525113096, "grad_norm": 19.154427840014012, "learning_rate": 2e-06, "loss": 0.4501, "step": 317 }, { "epoch": 0.07377334415961026, "grad_norm": 31.599294049661268, "learning_rate": 2e-06, "loss": 0.4159, "step": 318 }, { "epoch": 0.07400533580791091, "grad_norm": 12.57362027981612, "learning_rate": 2e-06, "loss": 0.3952, "step": 319 }, { "epoch": 0.07423732745621157, "grad_norm": 16.12107163904157, "learning_rate": 2e-06, "loss": 0.3671, "step": 320 }, { "epoch": 0.07446931910451224, "grad_norm": 18.572850404526363, "learning_rate": 2e-06, "loss": 0.3022, "step": 321 }, { "epoch": 0.0747013107528129, "grad_norm": 34.59231978461037, "learning_rate": 2e-06, "loss": 0.4849, "step": 322 }, { "epoch": 0.07493330240111357, "grad_norm": 13.285785318659888, "learning_rate": 2e-06, "loss": 0.2325, "step": 323 }, { "epoch": 0.07516529404941422, "grad_norm": 15.365050538063457, "learning_rate": 2e-06, "loss": 0.3808, "step": 324 }, { "epoch": 0.07539728569771488, "grad_norm": 19.091634443388465, "learning_rate": 2e-06, "loss": 0.3394, "step": 325 }, { "epoch": 0.07562927734601554, "grad_norm": 18.766369082786838, "learning_rate": 2e-06, "loss": 0.4084, "step": 326 }, { "epoch": 0.07586126899431621, "grad_norm": 22.93179092378503, "learning_rate": 2e-06, "loss": 0.3995, "step": 327 }, { "epoch": 0.07609326064261687, "grad_norm": 26.769794964089836, "learning_rate": 2e-06, "loss": 0.3826, "step": 328 }, { "epoch": 0.07632525229091752, "grad_norm": 17.51489111550706, "learning_rate": 2e-06, "loss": 0.416, "step": 329 }, { "epoch": 0.07655724393921819, "grad_norm": 16.635578775167737, "learning_rate": 2e-06, "loss": 0.4193, "step": 330 }, { "epoch": 0.07678923558751885, "grad_norm": 22.91519385563069, "learning_rate": 2e-06, "loss": 0.4024, "step": 331 }, { "epoch": 0.07702122723581951, "grad_norm": 24.359791173757518, "learning_rate": 2e-06, "loss": 0.418, "step": 332 }, { "epoch": 0.07725321888412018, "grad_norm": 17.329821601312467, "learning_rate": 2e-06, "loss": 0.3709, "step": 333 }, { "epoch": 0.07748521053242083, "grad_norm": 27.673581294656348, "learning_rate": 2e-06, "loss": 0.5115, "step": 334 }, { "epoch": 0.07771720218072149, "grad_norm": 27.328269842064092, "learning_rate": 2e-06, "loss": 0.4333, "step": 335 }, { "epoch": 0.07794919382902216, "grad_norm": 17.986657057309625, "learning_rate": 2e-06, "loss": 0.3317, "step": 336 }, { "epoch": 0.07818118547732282, "grad_norm": 15.710760855716062, "learning_rate": 2e-06, "loss": 0.3436, "step": 337 }, { "epoch": 0.07841317712562348, "grad_norm": 28.54747304199656, "learning_rate": 2e-06, "loss": 0.3698, "step": 338 }, { "epoch": 0.07864516877392413, "grad_norm": 28.406446425076776, "learning_rate": 2e-06, "loss": 0.4491, "step": 339 }, { "epoch": 0.0788771604222248, "grad_norm": 13.00998113821747, "learning_rate": 2e-06, "loss": 0.302, "step": 340 }, { "epoch": 0.07910915207052546, "grad_norm": 24.394710444728887, "learning_rate": 2e-06, "loss": 0.3499, "step": 341 }, { "epoch": 0.07934114371882613, "grad_norm": 16.848986324981123, "learning_rate": 2e-06, "loss": 0.3152, "step": 342 }, { "epoch": 0.07957313536712678, "grad_norm": 31.12501067169636, "learning_rate": 2e-06, "loss": 0.4136, "step": 343 }, { "epoch": 0.07980512701542744, "grad_norm": 33.14948891851159, "learning_rate": 2e-06, "loss": 0.4752, "step": 344 }, { "epoch": 0.0800371186637281, "grad_norm": 27.33198419147624, "learning_rate": 2e-06, "loss": 0.4415, "step": 345 }, { "epoch": 0.08026911031202877, "grad_norm": 23.586083476624456, "learning_rate": 2e-06, "loss": 0.3476, "step": 346 }, { "epoch": 0.08050110196032943, "grad_norm": 20.590998680061357, "learning_rate": 2e-06, "loss": 0.2976, "step": 347 }, { "epoch": 0.08073309360863008, "grad_norm": 24.375154903581354, "learning_rate": 2e-06, "loss": 0.4577, "step": 348 }, { "epoch": 0.08096508525693075, "grad_norm": 28.988118815723574, "learning_rate": 2e-06, "loss": 0.4254, "step": 349 }, { "epoch": 0.08119707690523141, "grad_norm": 16.422024111545927, "learning_rate": 2e-06, "loss": 0.3159, "step": 350 }, { "epoch": 0.08142906855353207, "grad_norm": 28.466863498914023, "learning_rate": 2e-06, "loss": 0.4772, "step": 351 }, { "epoch": 0.08166106020183274, "grad_norm": 24.154986228673167, "learning_rate": 2e-06, "loss": 0.3228, "step": 352 }, { "epoch": 0.08189305185013339, "grad_norm": 10.822885849051195, "learning_rate": 2e-06, "loss": 0.3191, "step": 353 }, { "epoch": 0.08212504349843405, "grad_norm": 12.170005974274169, "learning_rate": 2e-06, "loss": 0.3375, "step": 354 }, { "epoch": 0.08235703514673472, "grad_norm": 16.35142729634425, "learning_rate": 2e-06, "loss": 0.3974, "step": 355 }, { "epoch": 0.08258902679503538, "grad_norm": 20.3269200243023, "learning_rate": 2e-06, "loss": 0.3734, "step": 356 }, { "epoch": 0.08282101844333604, "grad_norm": 18.34613766392723, "learning_rate": 2e-06, "loss": 0.464, "step": 357 }, { "epoch": 0.0830530100916367, "grad_norm": 18.712920482784153, "learning_rate": 2e-06, "loss": 0.3485, "step": 358 }, { "epoch": 0.08328500173993736, "grad_norm": 14.590189583694684, "learning_rate": 2e-06, "loss": 0.4094, "step": 359 }, { "epoch": 0.08351699338823802, "grad_norm": 16.316678168602866, "learning_rate": 2e-06, "loss": 0.3666, "step": 360 }, { "epoch": 0.08374898503653869, "grad_norm": 21.600786547305564, "learning_rate": 2e-06, "loss": 0.3957, "step": 361 }, { "epoch": 0.08398097668483935, "grad_norm": 6.130256185688987, "learning_rate": 2e-06, "loss": 0.2955, "step": 362 }, { "epoch": 0.08421296833314, "grad_norm": 15.583176633847687, "learning_rate": 2e-06, "loss": 0.3424, "step": 363 }, { "epoch": 0.08444495998144066, "grad_norm": 10.731580878972554, "learning_rate": 2e-06, "loss": 0.3268, "step": 364 }, { "epoch": 0.08467695162974133, "grad_norm": 16.475523855583248, "learning_rate": 2e-06, "loss": 0.3199, "step": 365 }, { "epoch": 0.08490894327804199, "grad_norm": 19.190504550314518, "learning_rate": 2e-06, "loss": 0.3784, "step": 366 }, { "epoch": 0.08514093492634266, "grad_norm": 22.69383405935886, "learning_rate": 2e-06, "loss": 0.3864, "step": 367 }, { "epoch": 0.0853729265746433, "grad_norm": 6.1048447030660125, "learning_rate": 2e-06, "loss": 0.3483, "step": 368 }, { "epoch": 0.08560491822294397, "grad_norm": 25.73071880587071, "learning_rate": 2e-06, "loss": 0.4592, "step": 369 }, { "epoch": 0.08583690987124463, "grad_norm": 18.351202040668635, "learning_rate": 2e-06, "loss": 0.3476, "step": 370 }, { "epoch": 0.0860689015195453, "grad_norm": 16.26909477918099, "learning_rate": 2e-06, "loss": 0.3124, "step": 371 }, { "epoch": 0.08630089316784596, "grad_norm": 14.241063324741525, "learning_rate": 2e-06, "loss": 0.3076, "step": 372 }, { "epoch": 0.08653288481614661, "grad_norm": 14.506215185214822, "learning_rate": 2e-06, "loss": 0.3984, "step": 373 }, { "epoch": 0.08676487646444728, "grad_norm": 9.382376932185698, "learning_rate": 2e-06, "loss": 0.2976, "step": 374 }, { "epoch": 0.08699686811274794, "grad_norm": 50.59490449016764, "learning_rate": 2e-06, "loss": 0.3454, "step": 375 }, { "epoch": 0.0872288597610486, "grad_norm": 27.547531076602287, "learning_rate": 2e-06, "loss": 0.4362, "step": 376 }, { "epoch": 0.08746085140934927, "grad_norm": 16.082596257621272, "learning_rate": 2e-06, "loss": 0.3319, "step": 377 }, { "epoch": 0.08769284305764992, "grad_norm": 31.876537236032785, "learning_rate": 2e-06, "loss": 0.4915, "step": 378 }, { "epoch": 0.08792483470595058, "grad_norm": 22.54072299553908, "learning_rate": 2e-06, "loss": 0.3151, "step": 379 }, { "epoch": 0.08815682635425125, "grad_norm": 17.614758213456692, "learning_rate": 2e-06, "loss": 0.3949, "step": 380 }, { "epoch": 0.08838881800255191, "grad_norm": 14.148275945729827, "learning_rate": 2e-06, "loss": 0.3013, "step": 381 }, { "epoch": 0.08862080965085258, "grad_norm": 11.314995508339132, "learning_rate": 2e-06, "loss": 0.285, "step": 382 }, { "epoch": 0.08885280129915323, "grad_norm": 17.241195116605457, "learning_rate": 2e-06, "loss": 0.3807, "step": 383 }, { "epoch": 0.08908479294745389, "grad_norm": 15.385646501389344, "learning_rate": 2e-06, "loss": 0.3809, "step": 384 }, { "epoch": 0.08931678459575455, "grad_norm": 17.684525111155942, "learning_rate": 2e-06, "loss": 0.1991, "step": 385 }, { "epoch": 0.08954877624405522, "grad_norm": 24.501559406718627, "learning_rate": 2e-06, "loss": 0.4261, "step": 386 }, { "epoch": 0.08978076789235588, "grad_norm": 18.143176673818, "learning_rate": 2e-06, "loss": 0.2896, "step": 387 }, { "epoch": 0.09001275954065653, "grad_norm": 27.499625471928624, "learning_rate": 2e-06, "loss": 0.3139, "step": 388 }, { "epoch": 0.0902447511889572, "grad_norm": 13.029164508292824, "learning_rate": 2e-06, "loss": 0.3339, "step": 389 }, { "epoch": 0.09047674283725786, "grad_norm": 22.06911169822742, "learning_rate": 2e-06, "loss": 0.4013, "step": 390 }, { "epoch": 0.09070873448555852, "grad_norm": 25.660629113538942, "learning_rate": 2e-06, "loss": 0.3766, "step": 391 }, { "epoch": 0.09094072613385919, "grad_norm": 12.330854489315392, "learning_rate": 2e-06, "loss": 0.3189, "step": 392 }, { "epoch": 0.09117271778215984, "grad_norm": 12.26820205350865, "learning_rate": 2e-06, "loss": 0.3068, "step": 393 }, { "epoch": 0.0914047094304605, "grad_norm": 19.489121042795496, "learning_rate": 2e-06, "loss": 0.315, "step": 394 }, { "epoch": 0.09163670107876117, "grad_norm": 17.211896639231988, "learning_rate": 2e-06, "loss": 0.2837, "step": 395 }, { "epoch": 0.09186869272706183, "grad_norm": 12.236343790554217, "learning_rate": 2e-06, "loss": 0.2436, "step": 396 }, { "epoch": 0.0921006843753625, "grad_norm": 12.287095537549003, "learning_rate": 2e-06, "loss": 0.3363, "step": 397 }, { "epoch": 0.09233267602366314, "grad_norm": 36.20698132473805, "learning_rate": 2e-06, "loss": 0.3322, "step": 398 }, { "epoch": 0.09256466767196381, "grad_norm": 30.012469417565942, "learning_rate": 2e-06, "loss": 0.3058, "step": 399 }, { "epoch": 0.09279665932026447, "grad_norm": 18.70204786894355, "learning_rate": 2e-06, "loss": 0.28, "step": 400 }, { "epoch": 0.09302865096856514, "grad_norm": 23.040397243965792, "learning_rate": 2e-06, "loss": 0.3506, "step": 401 }, { "epoch": 0.0932606426168658, "grad_norm": 14.503689982051851, "learning_rate": 2e-06, "loss": 0.3534, "step": 402 }, { "epoch": 0.09349263426516645, "grad_norm": 19.06995983751695, "learning_rate": 2e-06, "loss": 0.3877, "step": 403 }, { "epoch": 0.09372462591346711, "grad_norm": 15.528379690183382, "learning_rate": 2e-06, "loss": 0.2853, "step": 404 }, { "epoch": 0.09395661756176778, "grad_norm": 12.183328931435046, "learning_rate": 2e-06, "loss": 0.2328, "step": 405 }, { "epoch": 0.09418860921006844, "grad_norm": 10.249081656164702, "learning_rate": 2e-06, "loss": 0.3154, "step": 406 }, { "epoch": 0.0944206008583691, "grad_norm": 34.79842136298589, "learning_rate": 2e-06, "loss": 0.392, "step": 407 }, { "epoch": 0.09465259250666976, "grad_norm": 27.944587538361805, "learning_rate": 2e-06, "loss": 0.4147, "step": 408 }, { "epoch": 0.09488458415497042, "grad_norm": 20.30318182509188, "learning_rate": 2e-06, "loss": 0.4244, "step": 409 }, { "epoch": 0.09511657580327108, "grad_norm": 43.576920841913044, "learning_rate": 2e-06, "loss": 0.49, "step": 410 }, { "epoch": 0.09534856745157175, "grad_norm": 25.236131980802266, "learning_rate": 2e-06, "loss": 0.4109, "step": 411 }, { "epoch": 0.09558055909987241, "grad_norm": 29.170644729481666, "learning_rate": 2e-06, "loss": 0.5216, "step": 412 }, { "epoch": 0.09581255074817306, "grad_norm": 19.516436049903138, "learning_rate": 2e-06, "loss": 0.4077, "step": 413 }, { "epoch": 0.09604454239647373, "grad_norm": 17.128714650282106, "learning_rate": 2e-06, "loss": 0.3999, "step": 414 }, { "epoch": 0.09627653404477439, "grad_norm": 12.006511406067414, "learning_rate": 2e-06, "loss": 0.2462, "step": 415 }, { "epoch": 0.09650852569307505, "grad_norm": 16.275965696790472, "learning_rate": 2e-06, "loss": 0.302, "step": 416 }, { "epoch": 0.0967405173413757, "grad_norm": 14.7500730370023, "learning_rate": 2e-06, "loss": 0.2836, "step": 417 }, { "epoch": 0.09697250898967637, "grad_norm": 18.667197603654824, "learning_rate": 2e-06, "loss": 0.3598, "step": 418 }, { "epoch": 0.09720450063797703, "grad_norm": 17.955626925134116, "learning_rate": 2e-06, "loss": 0.2975, "step": 419 }, { "epoch": 0.0974364922862777, "grad_norm": 21.36474071148061, "learning_rate": 2e-06, "loss": 0.3349, "step": 420 }, { "epoch": 0.09766848393457836, "grad_norm": 37.371825648292855, "learning_rate": 2e-06, "loss": 0.3935, "step": 421 }, { "epoch": 0.09790047558287901, "grad_norm": 25.24655195690179, "learning_rate": 2e-06, "loss": 0.3842, "step": 422 }, { "epoch": 0.09813246723117967, "grad_norm": 18.403246533860493, "learning_rate": 2e-06, "loss": 0.4077, "step": 423 }, { "epoch": 0.09836445887948034, "grad_norm": 23.049463332646905, "learning_rate": 2e-06, "loss": 0.4219, "step": 424 }, { "epoch": 0.098596450527781, "grad_norm": 22.791123792015163, "learning_rate": 2e-06, "loss": 0.386, "step": 425 }, { "epoch": 0.09882844217608167, "grad_norm": 20.05570413178967, "learning_rate": 2e-06, "loss": 0.4471, "step": 426 }, { "epoch": 0.09906043382438232, "grad_norm": 24.374099834079217, "learning_rate": 2e-06, "loss": 0.3568, "step": 427 }, { "epoch": 0.09929242547268298, "grad_norm": 26.877154115175962, "learning_rate": 2e-06, "loss": 0.4576, "step": 428 }, { "epoch": 0.09952441712098364, "grad_norm": 24.697932991796872, "learning_rate": 2e-06, "loss": 0.2783, "step": 429 }, { "epoch": 0.09975640876928431, "grad_norm": 60.2313041091182, "learning_rate": 2e-06, "loss": 0.3714, "step": 430 }, { "epoch": 0.09998840041758497, "grad_norm": 13.60633605845247, "learning_rate": 2e-06, "loss": 0.3332, "step": 431 }, { "epoch": 0.10022039206588562, "grad_norm": 42.72261628492271, "learning_rate": 2e-06, "loss": 0.4944, "step": 432 }, { "epoch": 0.10045238371418629, "grad_norm": 12.503324964724692, "learning_rate": 2e-06, "loss": 0.3633, "step": 433 }, { "epoch": 0.10068437536248695, "grad_norm": 22.839555311492852, "learning_rate": 2e-06, "loss": 0.4408, "step": 434 }, { "epoch": 0.10091636701078761, "grad_norm": 28.45242700473296, "learning_rate": 2e-06, "loss": 0.4241, "step": 435 }, { "epoch": 0.10114835865908828, "grad_norm": 25.2907973397533, "learning_rate": 2e-06, "loss": 0.3094, "step": 436 }, { "epoch": 0.10138035030738893, "grad_norm": 20.645973630360405, "learning_rate": 2e-06, "loss": 0.365, "step": 437 }, { "epoch": 0.10161234195568959, "grad_norm": 13.503907309499892, "learning_rate": 2e-06, "loss": 0.3956, "step": 438 }, { "epoch": 0.10184433360399026, "grad_norm": 26.58537180570347, "learning_rate": 2e-06, "loss": 0.3936, "step": 439 }, { "epoch": 0.10207632525229092, "grad_norm": 14.061720828202937, "learning_rate": 2e-06, "loss": 0.399, "step": 440 }, { "epoch": 0.10230831690059158, "grad_norm": 21.802081089998484, "learning_rate": 2e-06, "loss": 0.4034, "step": 441 }, { "epoch": 0.10254030854889223, "grad_norm": 16.007298216847893, "learning_rate": 2e-06, "loss": 0.227, "step": 442 }, { "epoch": 0.1027723001971929, "grad_norm": 13.543803794607616, "learning_rate": 2e-06, "loss": 0.2848, "step": 443 }, { "epoch": 0.10300429184549356, "grad_norm": 11.917643036483089, "learning_rate": 2e-06, "loss": 0.3229, "step": 444 }, { "epoch": 0.10323628349379423, "grad_norm": 10.540211537982008, "learning_rate": 2e-06, "loss": 0.3835, "step": 445 }, { "epoch": 0.10346827514209489, "grad_norm": 13.057127763815975, "learning_rate": 2e-06, "loss": 0.294, "step": 446 }, { "epoch": 0.10370026679039554, "grad_norm": 40.636902358293355, "learning_rate": 2e-06, "loss": 0.5286, "step": 447 }, { "epoch": 0.1039322584386962, "grad_norm": 9.295471134140202, "learning_rate": 2e-06, "loss": 0.2626, "step": 448 }, { "epoch": 0.10416425008699687, "grad_norm": 16.71420553457328, "learning_rate": 2e-06, "loss": 0.4241, "step": 449 }, { "epoch": 0.10439624173529753, "grad_norm": 8.626134436732368, "learning_rate": 2e-06, "loss": 0.3212, "step": 450 }, { "epoch": 0.1046282333835982, "grad_norm": 30.773166312699107, "learning_rate": 2e-06, "loss": 0.4788, "step": 451 }, { "epoch": 0.10486022503189885, "grad_norm": 13.090372218449275, "learning_rate": 2e-06, "loss": 0.3331, "step": 452 }, { "epoch": 0.10509221668019951, "grad_norm": 13.979905174034451, "learning_rate": 2e-06, "loss": 0.2949, "step": 453 }, { "epoch": 0.10532420832850017, "grad_norm": 42.856080673219175, "learning_rate": 2e-06, "loss": 0.4169, "step": 454 }, { "epoch": 0.10555619997680084, "grad_norm": 23.273704420042517, "learning_rate": 2e-06, "loss": 0.3922, "step": 455 }, { "epoch": 0.1057881916251015, "grad_norm": 12.605371417868731, "learning_rate": 2e-06, "loss": 0.2395, "step": 456 }, { "epoch": 0.10602018327340215, "grad_norm": 11.582289734117069, "learning_rate": 2e-06, "loss": 0.3205, "step": 457 }, { "epoch": 0.10625217492170282, "grad_norm": 13.586535826870845, "learning_rate": 2e-06, "loss": 0.3675, "step": 458 }, { "epoch": 0.10648416657000348, "grad_norm": 17.080354404774585, "learning_rate": 2e-06, "loss": 0.3486, "step": 459 }, { "epoch": 0.10671615821830414, "grad_norm": 17.925684598773802, "learning_rate": 2e-06, "loss": 0.3022, "step": 460 }, { "epoch": 0.10694814986660481, "grad_norm": 32.39272785312845, "learning_rate": 2e-06, "loss": 0.3359, "step": 461 }, { "epoch": 0.10718014151490546, "grad_norm": 18.254480374694175, "learning_rate": 2e-06, "loss": 0.3263, "step": 462 }, { "epoch": 0.10741213316320612, "grad_norm": 23.591719076716867, "learning_rate": 2e-06, "loss": 0.4446, "step": 463 }, { "epoch": 0.10764412481150679, "grad_norm": 19.865109921119632, "learning_rate": 2e-06, "loss": 0.4048, "step": 464 }, { "epoch": 0.10787611645980745, "grad_norm": 8.884190876686526, "learning_rate": 2e-06, "loss": 0.2853, "step": 465 }, { "epoch": 0.10810810810810811, "grad_norm": 19.61323282534642, "learning_rate": 2e-06, "loss": 0.3167, "step": 466 }, { "epoch": 0.10834009975640876, "grad_norm": 12.695565418193748, "learning_rate": 2e-06, "loss": 0.369, "step": 467 }, { "epoch": 0.10857209140470943, "grad_norm": 10.243463772330834, "learning_rate": 2e-06, "loss": 0.3189, "step": 468 }, { "epoch": 0.10880408305301009, "grad_norm": 10.293892595757947, "learning_rate": 2e-06, "loss": 0.3302, "step": 469 }, { "epoch": 0.10903607470131076, "grad_norm": 21.389390468394783, "learning_rate": 2e-06, "loss": 0.3741, "step": 470 }, { "epoch": 0.10926806634961142, "grad_norm": 11.940062834217704, "learning_rate": 2e-06, "loss": 0.3796, "step": 471 }, { "epoch": 0.10950005799791207, "grad_norm": 9.774522293402262, "learning_rate": 2e-06, "loss": 0.2699, "step": 472 }, { "epoch": 0.10973204964621273, "grad_norm": 11.12192006208675, "learning_rate": 2e-06, "loss": 0.3056, "step": 473 }, { "epoch": 0.1099640412945134, "grad_norm": 17.91288583179564, "learning_rate": 2e-06, "loss": 0.3468, "step": 474 }, { "epoch": 0.11019603294281406, "grad_norm": 20.480071149104823, "learning_rate": 2e-06, "loss": 0.3443, "step": 475 }, { "epoch": 0.11042802459111473, "grad_norm": 24.143986404076905, "learning_rate": 2e-06, "loss": 0.4692, "step": 476 }, { "epoch": 0.11066001623941538, "grad_norm": 14.08740260271415, "learning_rate": 2e-06, "loss": 0.3726, "step": 477 }, { "epoch": 0.11089200788771604, "grad_norm": 10.616284079876811, "learning_rate": 2e-06, "loss": 0.3315, "step": 478 }, { "epoch": 0.1111239995360167, "grad_norm": 12.99385356040717, "learning_rate": 2e-06, "loss": 0.2818, "step": 479 }, { "epoch": 0.11135599118431737, "grad_norm": 6.6027185157360035, "learning_rate": 2e-06, "loss": 0.2372, "step": 480 }, { "epoch": 0.11158798283261803, "grad_norm": 17.59288186274003, "learning_rate": 2e-06, "loss": 0.3019, "step": 481 }, { "epoch": 0.11181997448091868, "grad_norm": 18.112113143400485, "learning_rate": 2e-06, "loss": 0.3168, "step": 482 }, { "epoch": 0.11205196612921935, "grad_norm": 18.58516521806112, "learning_rate": 2e-06, "loss": 0.2882, "step": 483 }, { "epoch": 0.11228395777752001, "grad_norm": 20.018829935182424, "learning_rate": 2e-06, "loss": 0.326, "step": 484 }, { "epoch": 0.11251594942582067, "grad_norm": 20.822972874843753, "learning_rate": 2e-06, "loss": 0.4031, "step": 485 }, { "epoch": 0.11274794107412134, "grad_norm": 21.934178884656355, "learning_rate": 2e-06, "loss": 0.3179, "step": 486 }, { "epoch": 0.11297993272242199, "grad_norm": 21.66633969901522, "learning_rate": 2e-06, "loss": 0.3193, "step": 487 }, { "epoch": 0.11321192437072265, "grad_norm": 16.374257943413088, "learning_rate": 2e-06, "loss": 0.374, "step": 488 }, { "epoch": 0.11344391601902332, "grad_norm": 19.504755329296714, "learning_rate": 2e-06, "loss": 0.3048, "step": 489 }, { "epoch": 0.11367590766732398, "grad_norm": 14.763736020821334, "learning_rate": 2e-06, "loss": 0.3575, "step": 490 }, { "epoch": 0.11390789931562464, "grad_norm": 10.60113072500508, "learning_rate": 2e-06, "loss": 0.2901, "step": 491 }, { "epoch": 0.1141398909639253, "grad_norm": 15.115397184387295, "learning_rate": 2e-06, "loss": 0.3871, "step": 492 }, { "epoch": 0.11437188261222596, "grad_norm": 17.2191940468864, "learning_rate": 2e-06, "loss": 0.4249, "step": 493 }, { "epoch": 0.11460387426052662, "grad_norm": 12.074212003251892, "learning_rate": 2e-06, "loss": 0.2801, "step": 494 }, { "epoch": 0.11483586590882729, "grad_norm": 35.686595454103376, "learning_rate": 2e-06, "loss": 0.3694, "step": 495 }, { "epoch": 0.11506785755712794, "grad_norm": 25.21616405090084, "learning_rate": 2e-06, "loss": 0.3483, "step": 496 }, { "epoch": 0.1152998492054286, "grad_norm": 17.78054548198579, "learning_rate": 2e-06, "loss": 0.3453, "step": 497 }, { "epoch": 0.11553184085372926, "grad_norm": 16.1860008387927, "learning_rate": 2e-06, "loss": 0.3795, "step": 498 }, { "epoch": 0.11576383250202993, "grad_norm": 10.73184589288529, "learning_rate": 2e-06, "loss": 0.2779, "step": 499 }, { "epoch": 0.11599582415033059, "grad_norm": 21.798651290466267, "learning_rate": 2e-06, "loss": 0.3505, "step": 500 }, { "epoch": 0.11622781579863124, "grad_norm": 20.404407291282162, "learning_rate": 2e-06, "loss": 0.3475, "step": 501 }, { "epoch": 0.1164598074469319, "grad_norm": 21.873321256737448, "learning_rate": 2e-06, "loss": 0.4091, "step": 502 }, { "epoch": 0.11669179909523257, "grad_norm": 11.859969122962756, "learning_rate": 2e-06, "loss": 0.3084, "step": 503 }, { "epoch": 0.11692379074353323, "grad_norm": 26.627204119044684, "learning_rate": 2e-06, "loss": 0.422, "step": 504 }, { "epoch": 0.1171557823918339, "grad_norm": 8.92584627620991, "learning_rate": 2e-06, "loss": 0.3135, "step": 505 }, { "epoch": 0.11738777404013455, "grad_norm": 11.993139051388196, "learning_rate": 2e-06, "loss": 0.3289, "step": 506 }, { "epoch": 0.11761976568843521, "grad_norm": 12.897442540293113, "learning_rate": 2e-06, "loss": 0.2981, "step": 507 }, { "epoch": 0.11785175733673588, "grad_norm": 14.110845460131074, "learning_rate": 2e-06, "loss": 0.2998, "step": 508 }, { "epoch": 0.11808374898503654, "grad_norm": 25.33181859269643, "learning_rate": 2e-06, "loss": 0.4741, "step": 509 }, { "epoch": 0.1183157406333372, "grad_norm": 13.834992576536884, "learning_rate": 2e-06, "loss": 0.3232, "step": 510 }, { "epoch": 0.11854773228163785, "grad_norm": 14.893252752827362, "learning_rate": 2e-06, "loss": 0.3444, "step": 511 }, { "epoch": 0.11877972392993852, "grad_norm": 15.761119502374642, "learning_rate": 2e-06, "loss": 0.3421, "step": 512 }, { "epoch": 0.11901171557823918, "grad_norm": 12.35069424882054, "learning_rate": 2e-06, "loss": 0.3332, "step": 513 }, { "epoch": 0.11924370722653985, "grad_norm": 19.600361404784426, "learning_rate": 2e-06, "loss": 0.377, "step": 514 }, { "epoch": 0.11947569887484051, "grad_norm": 24.217849992141204, "learning_rate": 2e-06, "loss": 0.4162, "step": 515 }, { "epoch": 0.11970769052314116, "grad_norm": 17.814790081649537, "learning_rate": 2e-06, "loss": 0.3611, "step": 516 }, { "epoch": 0.11993968217144182, "grad_norm": 11.464981304657163, "learning_rate": 2e-06, "loss": 0.2793, "step": 517 }, { "epoch": 0.12017167381974249, "grad_norm": 26.99997443522923, "learning_rate": 2e-06, "loss": 0.3731, "step": 518 }, { "epoch": 0.12040366546804315, "grad_norm": 17.150784636126104, "learning_rate": 2e-06, "loss": 0.4064, "step": 519 }, { "epoch": 0.12063565711634382, "grad_norm": 17.853860712732363, "learning_rate": 2e-06, "loss": 0.3889, "step": 520 }, { "epoch": 0.12086764876464447, "grad_norm": 14.08571749826114, "learning_rate": 2e-06, "loss": 0.2729, "step": 521 }, { "epoch": 0.12109964041294513, "grad_norm": 20.754236483960852, "learning_rate": 2e-06, "loss": 0.4262, "step": 522 }, { "epoch": 0.1213316320612458, "grad_norm": 16.55873039914546, "learning_rate": 2e-06, "loss": 0.3337, "step": 523 }, { "epoch": 0.12156362370954646, "grad_norm": 32.91313558109254, "learning_rate": 2e-06, "loss": 0.4844, "step": 524 }, { "epoch": 0.12179561535784712, "grad_norm": 11.950163381560618, "learning_rate": 2e-06, "loss": 0.3711, "step": 525 }, { "epoch": 0.12202760700614777, "grad_norm": 34.77644420590345, "learning_rate": 2e-06, "loss": 0.5912, "step": 526 }, { "epoch": 0.12225959865444844, "grad_norm": 18.000554624080966, "learning_rate": 2e-06, "loss": 0.4028, "step": 527 }, { "epoch": 0.1224915903027491, "grad_norm": 12.121073135087734, "learning_rate": 2e-06, "loss": 0.3123, "step": 528 }, { "epoch": 0.12272358195104976, "grad_norm": 26.90471471621084, "learning_rate": 2e-06, "loss": 0.427, "step": 529 }, { "epoch": 0.12295557359935043, "grad_norm": 21.990425205897445, "learning_rate": 2e-06, "loss": 0.4281, "step": 530 }, { "epoch": 0.12318756524765108, "grad_norm": 19.60118632192807, "learning_rate": 2e-06, "loss": 0.3642, "step": 531 }, { "epoch": 0.12341955689595174, "grad_norm": 11.736726359770271, "learning_rate": 2e-06, "loss": 0.2567, "step": 532 }, { "epoch": 0.12365154854425241, "grad_norm": 12.09972833385718, "learning_rate": 2e-06, "loss": 0.3409, "step": 533 }, { "epoch": 0.12388354019255307, "grad_norm": 9.957374867715385, "learning_rate": 2e-06, "loss": 0.2945, "step": 534 }, { "epoch": 0.12411553184085374, "grad_norm": 10.561712491251779, "learning_rate": 2e-06, "loss": 0.3149, "step": 535 }, { "epoch": 0.12434752348915439, "grad_norm": 22.962972593305583, "learning_rate": 2e-06, "loss": 0.3786, "step": 536 }, { "epoch": 0.12457951513745505, "grad_norm": 21.81855834354691, "learning_rate": 2e-06, "loss": 0.4408, "step": 537 }, { "epoch": 0.12481150678575571, "grad_norm": 43.577518805619206, "learning_rate": 2e-06, "loss": 0.5206, "step": 538 }, { "epoch": 0.12504349843405638, "grad_norm": 21.43613203738653, "learning_rate": 2e-06, "loss": 0.4016, "step": 539 }, { "epoch": 0.12527549008235703, "grad_norm": 8.521408142301265, "learning_rate": 2e-06, "loss": 0.3127, "step": 540 }, { "epoch": 0.1255074817306577, "grad_norm": 21.675160395032297, "learning_rate": 2e-06, "loss": 0.4071, "step": 541 }, { "epoch": 0.12573947337895836, "grad_norm": 10.01476995805297, "learning_rate": 2e-06, "loss": 0.3015, "step": 542 }, { "epoch": 0.125971465027259, "grad_norm": 12.745527693576305, "learning_rate": 2e-06, "loss": 0.3208, "step": 543 }, { "epoch": 0.12620345667555968, "grad_norm": 10.28277249296884, "learning_rate": 2e-06, "loss": 0.2952, "step": 544 }, { "epoch": 0.12643544832386033, "grad_norm": 24.837971502781365, "learning_rate": 2e-06, "loss": 0.3971, "step": 545 }, { "epoch": 0.126667439972161, "grad_norm": 10.839451211684377, "learning_rate": 2e-06, "loss": 0.336, "step": 546 }, { "epoch": 0.12689943162046166, "grad_norm": 15.188607097212989, "learning_rate": 2e-06, "loss": 0.2531, "step": 547 }, { "epoch": 0.1271314232687623, "grad_norm": 11.852306795226589, "learning_rate": 2e-06, "loss": 0.3548, "step": 548 }, { "epoch": 0.127363414917063, "grad_norm": 10.728552291466901, "learning_rate": 2e-06, "loss": 0.2975, "step": 549 }, { "epoch": 0.12759540656536364, "grad_norm": 12.197563885040825, "learning_rate": 2e-06, "loss": 0.3412, "step": 550 }, { "epoch": 0.12782739821366432, "grad_norm": 11.460186845755205, "learning_rate": 2e-06, "loss": 0.3667, "step": 551 }, { "epoch": 0.12805938986196497, "grad_norm": 24.191419011715134, "learning_rate": 2e-06, "loss": 0.4359, "step": 552 }, { "epoch": 0.12829138151026562, "grad_norm": 15.072369818460848, "learning_rate": 2e-06, "loss": 0.3481, "step": 553 }, { "epoch": 0.1285233731585663, "grad_norm": 15.493554128998317, "learning_rate": 2e-06, "loss": 0.3944, "step": 554 }, { "epoch": 0.12875536480686695, "grad_norm": 10.299584834939997, "learning_rate": 2e-06, "loss": 0.2933, "step": 555 }, { "epoch": 0.12898735645516762, "grad_norm": 19.667944578719855, "learning_rate": 2e-06, "loss": 0.3843, "step": 556 }, { "epoch": 0.12921934810346827, "grad_norm": 13.020604199902435, "learning_rate": 2e-06, "loss": 0.3362, "step": 557 }, { "epoch": 0.12945133975176892, "grad_norm": 18.494565081255427, "learning_rate": 2e-06, "loss": 0.2426, "step": 558 }, { "epoch": 0.1296833314000696, "grad_norm": 11.481350296677014, "learning_rate": 2e-06, "loss": 0.3884, "step": 559 }, { "epoch": 0.12991532304837025, "grad_norm": 14.37603161322684, "learning_rate": 2e-06, "loss": 0.3521, "step": 560 }, { "epoch": 0.13014731469667093, "grad_norm": 17.945165746050844, "learning_rate": 2e-06, "loss": 0.4201, "step": 561 }, { "epoch": 0.13037930634497158, "grad_norm": 10.206165197896242, "learning_rate": 2e-06, "loss": 0.2525, "step": 562 }, { "epoch": 0.13061129799327223, "grad_norm": 11.366171051296087, "learning_rate": 2e-06, "loss": 0.2697, "step": 563 }, { "epoch": 0.1308432896415729, "grad_norm": 17.908668657075456, "learning_rate": 2e-06, "loss": 0.3358, "step": 564 }, { "epoch": 0.13107528128987356, "grad_norm": 15.699717818446853, "learning_rate": 2e-06, "loss": 0.379, "step": 565 }, { "epoch": 0.13130727293817424, "grad_norm": 16.386214174399377, "learning_rate": 2e-06, "loss": 0.3298, "step": 566 }, { "epoch": 0.13153926458647489, "grad_norm": 10.902641798900966, "learning_rate": 2e-06, "loss": 0.2858, "step": 567 }, { "epoch": 0.13177125623477554, "grad_norm": 19.20552496348048, "learning_rate": 2e-06, "loss": 0.3325, "step": 568 }, { "epoch": 0.1320032478830762, "grad_norm": 17.225944620309544, "learning_rate": 2e-06, "loss": 0.3848, "step": 569 }, { "epoch": 0.13223523953137686, "grad_norm": 12.063065237924471, "learning_rate": 2e-06, "loss": 0.3181, "step": 570 }, { "epoch": 0.13246723117967754, "grad_norm": 15.04183112468636, "learning_rate": 2e-06, "loss": 0.3095, "step": 571 }, { "epoch": 0.1326992228279782, "grad_norm": 29.59285246867334, "learning_rate": 2e-06, "loss": 0.3663, "step": 572 }, { "epoch": 0.13293121447627884, "grad_norm": 22.314922985673483, "learning_rate": 2e-06, "loss": 0.3433, "step": 573 }, { "epoch": 0.13316320612457952, "grad_norm": 20.246625843146674, "learning_rate": 2e-06, "loss": 0.3287, "step": 574 }, { "epoch": 0.13339519777288017, "grad_norm": 35.745096593143096, "learning_rate": 2e-06, "loss": 0.5195, "step": 575 }, { "epoch": 0.13362718942118085, "grad_norm": 21.991019971295252, "learning_rate": 2e-06, "loss": 0.3267, "step": 576 }, { "epoch": 0.1338591810694815, "grad_norm": 11.547842625012965, "learning_rate": 2e-06, "loss": 0.3013, "step": 577 }, { "epoch": 0.13409117271778215, "grad_norm": 7.575492023294917, "learning_rate": 2e-06, "loss": 0.2593, "step": 578 }, { "epoch": 0.13432316436608283, "grad_norm": 14.200955942542265, "learning_rate": 2e-06, "loss": 0.1945, "step": 579 }, { "epoch": 0.13455515601438348, "grad_norm": 17.254167222368046, "learning_rate": 2e-06, "loss": 0.3549, "step": 580 }, { "epoch": 0.13478714766268415, "grad_norm": 14.858177542842116, "learning_rate": 2e-06, "loss": 0.2782, "step": 581 }, { "epoch": 0.1350191393109848, "grad_norm": 14.332877132531262, "learning_rate": 2e-06, "loss": 0.2892, "step": 582 }, { "epoch": 0.13525113095928545, "grad_norm": 11.706174595821098, "learning_rate": 2e-06, "loss": 0.263, "step": 583 }, { "epoch": 0.13548312260758613, "grad_norm": 13.445854658600583, "learning_rate": 2e-06, "loss": 0.3788, "step": 584 }, { "epoch": 0.13571511425588678, "grad_norm": 11.033594620835709, "learning_rate": 2e-06, "loss": 0.3227, "step": 585 }, { "epoch": 0.13594710590418746, "grad_norm": 19.704778968598895, "learning_rate": 2e-06, "loss": 0.3464, "step": 586 }, { "epoch": 0.1361790975524881, "grad_norm": 23.207724263487698, "learning_rate": 2e-06, "loss": 0.4422, "step": 587 }, { "epoch": 0.13641108920078876, "grad_norm": 17.446548566313393, "learning_rate": 2e-06, "loss": 0.3404, "step": 588 }, { "epoch": 0.13664308084908944, "grad_norm": 12.890562334412161, "learning_rate": 2e-06, "loss": 0.3314, "step": 589 }, { "epoch": 0.1368750724973901, "grad_norm": 9.320297975561534, "learning_rate": 2e-06, "loss": 0.2527, "step": 590 }, { "epoch": 0.13710706414569077, "grad_norm": 13.408963579965981, "learning_rate": 2e-06, "loss": 0.3661, "step": 591 }, { "epoch": 0.13733905579399142, "grad_norm": 17.926859871243074, "learning_rate": 2e-06, "loss": 0.3791, "step": 592 }, { "epoch": 0.13757104744229207, "grad_norm": 6.07597891688596, "learning_rate": 2e-06, "loss": 0.2008, "step": 593 }, { "epoch": 0.13780303909059274, "grad_norm": 19.555073576535285, "learning_rate": 2e-06, "loss": 0.331, "step": 594 }, { "epoch": 0.1380350307388934, "grad_norm": 12.512008844378691, "learning_rate": 2e-06, "loss": 0.3331, "step": 595 }, { "epoch": 0.13826702238719407, "grad_norm": 20.58792756025946, "learning_rate": 2e-06, "loss": 0.3247, "step": 596 }, { "epoch": 0.13849901403549472, "grad_norm": 13.69854700104819, "learning_rate": 2e-06, "loss": 0.3676, "step": 597 }, { "epoch": 0.13873100568379537, "grad_norm": 7.247642328163105, "learning_rate": 2e-06, "loss": 0.2787, "step": 598 }, { "epoch": 0.13896299733209605, "grad_norm": 20.662562349843842, "learning_rate": 2e-06, "loss": 0.4137, "step": 599 }, { "epoch": 0.1391949889803967, "grad_norm": 23.74211769605697, "learning_rate": 2e-06, "loss": 0.4117, "step": 600 }, { "epoch": 0.13942698062869738, "grad_norm": 14.54527602752155, "learning_rate": 2e-06, "loss": 0.3589, "step": 601 }, { "epoch": 0.13965897227699803, "grad_norm": 13.128633707615593, "learning_rate": 2e-06, "loss": 0.3302, "step": 602 }, { "epoch": 0.13989096392529868, "grad_norm": 13.78506522209295, "learning_rate": 2e-06, "loss": 0.3327, "step": 603 }, { "epoch": 0.14012295557359936, "grad_norm": 15.841352731539514, "learning_rate": 2e-06, "loss": 0.3152, "step": 604 }, { "epoch": 0.1403549472219, "grad_norm": 9.421455470531463, "learning_rate": 2e-06, "loss": 0.3354, "step": 605 }, { "epoch": 0.14058693887020068, "grad_norm": 10.08321808770723, "learning_rate": 2e-06, "loss": 0.2326, "step": 606 }, { "epoch": 0.14081893051850133, "grad_norm": 18.689274942037507, "learning_rate": 2e-06, "loss": 0.4126, "step": 607 }, { "epoch": 0.14105092216680198, "grad_norm": 12.358109062169618, "learning_rate": 2e-06, "loss": 0.3798, "step": 608 }, { "epoch": 0.14128291381510266, "grad_norm": 21.92588737376762, "learning_rate": 2e-06, "loss": 0.4711, "step": 609 }, { "epoch": 0.1415149054634033, "grad_norm": 19.45991230773491, "learning_rate": 2e-06, "loss": 0.4289, "step": 610 }, { "epoch": 0.141746897111704, "grad_norm": 13.005478035817163, "learning_rate": 2e-06, "loss": 0.3862, "step": 611 }, { "epoch": 0.14197888876000464, "grad_norm": 13.562324246271288, "learning_rate": 2e-06, "loss": 0.3999, "step": 612 }, { "epoch": 0.1422108804083053, "grad_norm": 31.784848826892258, "learning_rate": 2e-06, "loss": 0.3502, "step": 613 }, { "epoch": 0.14244287205660597, "grad_norm": 10.206030065161611, "learning_rate": 2e-06, "loss": 0.2569, "step": 614 }, { "epoch": 0.14267486370490662, "grad_norm": 24.276173026549397, "learning_rate": 2e-06, "loss": 0.2952, "step": 615 }, { "epoch": 0.1429068553532073, "grad_norm": 18.342906382940086, "learning_rate": 2e-06, "loss": 0.3026, "step": 616 }, { "epoch": 0.14313884700150795, "grad_norm": 8.993574006033441, "learning_rate": 2e-06, "loss": 0.2586, "step": 617 }, { "epoch": 0.1433708386498086, "grad_norm": 23.936719026871057, "learning_rate": 2e-06, "loss": 0.3789, "step": 618 }, { "epoch": 0.14360283029810927, "grad_norm": 16.32194373617926, "learning_rate": 2e-06, "loss": 0.3839, "step": 619 }, { "epoch": 0.14383482194640992, "grad_norm": 12.988145492420317, "learning_rate": 2e-06, "loss": 0.3824, "step": 620 }, { "epoch": 0.1440668135947106, "grad_norm": 20.669035386443895, "learning_rate": 2e-06, "loss": 0.3758, "step": 621 }, { "epoch": 0.14429880524301125, "grad_norm": 24.990471580039006, "learning_rate": 2e-06, "loss": 0.3434, "step": 622 }, { "epoch": 0.1445307968913119, "grad_norm": 17.99722260635703, "learning_rate": 2e-06, "loss": 0.3738, "step": 623 }, { "epoch": 0.14476278853961258, "grad_norm": 28.862374960031236, "learning_rate": 2e-06, "loss": 0.3462, "step": 624 }, { "epoch": 0.14499478018791323, "grad_norm": 16.286544229911005, "learning_rate": 2e-06, "loss": 0.3677, "step": 625 }, { "epoch": 0.1452267718362139, "grad_norm": 13.749182210734784, "learning_rate": 2e-06, "loss": 0.3409, "step": 626 }, { "epoch": 0.14545876348451456, "grad_norm": 8.781576263071432, "learning_rate": 2e-06, "loss": 0.3408, "step": 627 }, { "epoch": 0.1456907551328152, "grad_norm": 20.23008968468618, "learning_rate": 2e-06, "loss": 0.3334, "step": 628 }, { "epoch": 0.1459227467811159, "grad_norm": 21.403961956791857, "learning_rate": 2e-06, "loss": 0.3627, "step": 629 }, { "epoch": 0.14615473842941654, "grad_norm": 19.072138287773942, "learning_rate": 2e-06, "loss": 0.2983, "step": 630 }, { "epoch": 0.14638673007771721, "grad_norm": 22.329610088782662, "learning_rate": 2e-06, "loss": 0.4157, "step": 631 }, { "epoch": 0.14661872172601786, "grad_norm": 11.982533942082721, "learning_rate": 2e-06, "loss": 0.2553, "step": 632 }, { "epoch": 0.14685071337431851, "grad_norm": 22.78573635381739, "learning_rate": 2e-06, "loss": 0.3195, "step": 633 }, { "epoch": 0.1470827050226192, "grad_norm": 10.40653341397135, "learning_rate": 2e-06, "loss": 0.3246, "step": 634 }, { "epoch": 0.14731469667091984, "grad_norm": 14.463244019994837, "learning_rate": 2e-06, "loss": 0.3692, "step": 635 }, { "epoch": 0.14754668831922052, "grad_norm": 19.800480987537895, "learning_rate": 2e-06, "loss": 0.3389, "step": 636 }, { "epoch": 0.14777867996752117, "grad_norm": 10.631478775111985, "learning_rate": 2e-06, "loss": 0.2379, "step": 637 }, { "epoch": 0.14801067161582182, "grad_norm": 19.589467703354284, "learning_rate": 2e-06, "loss": 0.3806, "step": 638 }, { "epoch": 0.1482426632641225, "grad_norm": 17.235561484760968, "learning_rate": 2e-06, "loss": 0.3159, "step": 639 }, { "epoch": 0.14847465491242315, "grad_norm": 14.29645254007326, "learning_rate": 2e-06, "loss": 0.2914, "step": 640 }, { "epoch": 0.14870664656072383, "grad_norm": 15.25682225167021, "learning_rate": 2e-06, "loss": 0.2814, "step": 641 }, { "epoch": 0.14893863820902448, "grad_norm": 12.72336393966868, "learning_rate": 2e-06, "loss": 0.31, "step": 642 }, { "epoch": 0.14917062985732513, "grad_norm": 34.56907190484323, "learning_rate": 2e-06, "loss": 0.394, "step": 643 }, { "epoch": 0.1494026215056258, "grad_norm": 20.092572113695343, "learning_rate": 2e-06, "loss": 0.4555, "step": 644 }, { "epoch": 0.14963461315392645, "grad_norm": 12.558275124737698, "learning_rate": 2e-06, "loss": 0.2319, "step": 645 }, { "epoch": 0.14986660480222713, "grad_norm": 10.926979723682406, "learning_rate": 2e-06, "loss": 0.3174, "step": 646 }, { "epoch": 0.15009859645052778, "grad_norm": 21.759994543709613, "learning_rate": 2e-06, "loss": 0.3082, "step": 647 }, { "epoch": 0.15033058809882843, "grad_norm": 14.962828288114487, "learning_rate": 2e-06, "loss": 0.2977, "step": 648 }, { "epoch": 0.1505625797471291, "grad_norm": 14.945826490956852, "learning_rate": 2e-06, "loss": 0.3518, "step": 649 }, { "epoch": 0.15079457139542976, "grad_norm": 12.677037134321225, "learning_rate": 2e-06, "loss": 0.353, "step": 650 }, { "epoch": 0.15102656304373044, "grad_norm": 16.329612153370707, "learning_rate": 2e-06, "loss": 0.3625, "step": 651 }, { "epoch": 0.1512585546920311, "grad_norm": 25.316498260503856, "learning_rate": 2e-06, "loss": 0.3509, "step": 652 }, { "epoch": 0.15149054634033174, "grad_norm": 9.505821526608154, "learning_rate": 2e-06, "loss": 0.3144, "step": 653 }, { "epoch": 0.15172253798863242, "grad_norm": 23.54621096323739, "learning_rate": 2e-06, "loss": 0.3844, "step": 654 }, { "epoch": 0.15195452963693307, "grad_norm": 11.882344502450662, "learning_rate": 2e-06, "loss": 0.3435, "step": 655 }, { "epoch": 0.15218652128523374, "grad_norm": 21.91198574912016, "learning_rate": 2e-06, "loss": 0.3632, "step": 656 }, { "epoch": 0.1524185129335344, "grad_norm": 12.393539964259297, "learning_rate": 2e-06, "loss": 0.278, "step": 657 }, { "epoch": 0.15265050458183504, "grad_norm": 18.168511634243217, "learning_rate": 2e-06, "loss": 0.2467, "step": 658 }, { "epoch": 0.15288249623013572, "grad_norm": 14.178757384068685, "learning_rate": 2e-06, "loss": 0.3257, "step": 659 }, { "epoch": 0.15311448787843637, "grad_norm": 32.74130918364026, "learning_rate": 2e-06, "loss": 0.3114, "step": 660 }, { "epoch": 0.15334647952673705, "grad_norm": 12.222002529077335, "learning_rate": 2e-06, "loss": 0.3026, "step": 661 }, { "epoch": 0.1535784711750377, "grad_norm": 10.954915709678856, "learning_rate": 2e-06, "loss": 0.3513, "step": 662 }, { "epoch": 0.15381046282333835, "grad_norm": 20.24850597550675, "learning_rate": 2e-06, "loss": 0.4142, "step": 663 }, { "epoch": 0.15404245447163903, "grad_norm": 15.214951718141643, "learning_rate": 2e-06, "loss": 0.3569, "step": 664 }, { "epoch": 0.15427444611993968, "grad_norm": 17.304343638378498, "learning_rate": 2e-06, "loss": 0.3982, "step": 665 }, { "epoch": 0.15450643776824036, "grad_norm": 10.307606751687013, "learning_rate": 2e-06, "loss": 0.3254, "step": 666 }, { "epoch": 0.154738429416541, "grad_norm": 15.932744708373177, "learning_rate": 2e-06, "loss": 0.3459, "step": 667 }, { "epoch": 0.15497042106484166, "grad_norm": 22.80289035080992, "learning_rate": 2e-06, "loss": 0.3968, "step": 668 }, { "epoch": 0.15520241271314233, "grad_norm": 23.14623782349953, "learning_rate": 2e-06, "loss": 0.2405, "step": 669 }, { "epoch": 0.15543440436144298, "grad_norm": 18.0912009844887, "learning_rate": 2e-06, "loss": 0.338, "step": 670 }, { "epoch": 0.15566639600974366, "grad_norm": 18.677691324496845, "learning_rate": 2e-06, "loss": 0.3047, "step": 671 }, { "epoch": 0.1558983876580443, "grad_norm": 8.998825461022793, "learning_rate": 2e-06, "loss": 0.303, "step": 672 }, { "epoch": 0.15613037930634496, "grad_norm": 18.045426173107654, "learning_rate": 2e-06, "loss": 0.3606, "step": 673 }, { "epoch": 0.15636237095464564, "grad_norm": 11.154343481153669, "learning_rate": 2e-06, "loss": 0.3413, "step": 674 }, { "epoch": 0.1565943626029463, "grad_norm": 13.332750429948154, "learning_rate": 2e-06, "loss": 0.3186, "step": 675 }, { "epoch": 0.15682635425124697, "grad_norm": 15.74486222469269, "learning_rate": 2e-06, "loss": 0.3002, "step": 676 }, { "epoch": 0.15705834589954762, "grad_norm": 19.667188570074135, "learning_rate": 2e-06, "loss": 0.4405, "step": 677 }, { "epoch": 0.15729033754784827, "grad_norm": 21.031839435111806, "learning_rate": 2e-06, "loss": 0.3478, "step": 678 }, { "epoch": 0.15752232919614895, "grad_norm": 10.39843349546204, "learning_rate": 2e-06, "loss": 0.266, "step": 679 }, { "epoch": 0.1577543208444496, "grad_norm": 16.454956918604434, "learning_rate": 2e-06, "loss": 0.3867, "step": 680 }, { "epoch": 0.15798631249275025, "grad_norm": 16.358326166838868, "learning_rate": 2e-06, "loss": 0.418, "step": 681 }, { "epoch": 0.15821830414105092, "grad_norm": 25.612389596778975, "learning_rate": 2e-06, "loss": 0.3624, "step": 682 }, { "epoch": 0.15845029578935158, "grad_norm": 10.449260622783445, "learning_rate": 2e-06, "loss": 0.2386, "step": 683 }, { "epoch": 0.15868228743765225, "grad_norm": 11.170225689154774, "learning_rate": 2e-06, "loss": 0.2839, "step": 684 }, { "epoch": 0.1589142790859529, "grad_norm": 20.379086082147165, "learning_rate": 2e-06, "loss": 0.362, "step": 685 }, { "epoch": 0.15914627073425355, "grad_norm": 29.30564423884362, "learning_rate": 2e-06, "loss": 0.4104, "step": 686 }, { "epoch": 0.15937826238255423, "grad_norm": 12.564718453938301, "learning_rate": 2e-06, "loss": 0.3009, "step": 687 }, { "epoch": 0.15961025403085488, "grad_norm": 16.60002829086695, "learning_rate": 2e-06, "loss": 0.3076, "step": 688 }, { "epoch": 0.15984224567915556, "grad_norm": 18.44420873851104, "learning_rate": 2e-06, "loss": 0.376, "step": 689 }, { "epoch": 0.1600742373274562, "grad_norm": 14.716335866668372, "learning_rate": 2e-06, "loss": 0.3314, "step": 690 }, { "epoch": 0.16030622897575686, "grad_norm": 11.136643251719736, "learning_rate": 2e-06, "loss": 0.3109, "step": 691 }, { "epoch": 0.16053822062405754, "grad_norm": 15.005943488342586, "learning_rate": 2e-06, "loss": 0.3069, "step": 692 }, { "epoch": 0.1607702122723582, "grad_norm": 24.658145570665926, "learning_rate": 2e-06, "loss": 0.3951, "step": 693 }, { "epoch": 0.16100220392065887, "grad_norm": 10.62390063366631, "learning_rate": 2e-06, "loss": 0.2891, "step": 694 }, { "epoch": 0.16123419556895952, "grad_norm": 14.095175484680707, "learning_rate": 2e-06, "loss": 0.3373, "step": 695 }, { "epoch": 0.16146618721726017, "grad_norm": 12.851412497958066, "learning_rate": 2e-06, "loss": 0.3065, "step": 696 }, { "epoch": 0.16169817886556084, "grad_norm": 8.31681767749006, "learning_rate": 2e-06, "loss": 0.3037, "step": 697 }, { "epoch": 0.1619301705138615, "grad_norm": 15.879779774741346, "learning_rate": 2e-06, "loss": 0.2867, "step": 698 }, { "epoch": 0.16216216216216217, "grad_norm": 15.024589339052074, "learning_rate": 2e-06, "loss": 0.3346, "step": 699 }, { "epoch": 0.16239415381046282, "grad_norm": 25.728722695798243, "learning_rate": 2e-06, "loss": 0.3263, "step": 700 }, { "epoch": 0.16262614545876347, "grad_norm": 13.962249772821057, "learning_rate": 2e-06, "loss": 0.4021, "step": 701 }, { "epoch": 0.16285813710706415, "grad_norm": 16.6701984573303, "learning_rate": 2e-06, "loss": 0.2992, "step": 702 }, { "epoch": 0.1630901287553648, "grad_norm": 20.731069653085676, "learning_rate": 2e-06, "loss": 0.2982, "step": 703 }, { "epoch": 0.16332212040366548, "grad_norm": 22.497997523362585, "learning_rate": 2e-06, "loss": 0.334, "step": 704 }, { "epoch": 0.16355411205196613, "grad_norm": 13.511717785171559, "learning_rate": 2e-06, "loss": 0.2525, "step": 705 }, { "epoch": 0.16378610370026678, "grad_norm": 14.860067921359914, "learning_rate": 2e-06, "loss": 0.3137, "step": 706 }, { "epoch": 0.16401809534856746, "grad_norm": 14.477755167374747, "learning_rate": 2e-06, "loss": 0.2181, "step": 707 }, { "epoch": 0.1642500869968681, "grad_norm": 31.17589495214125, "learning_rate": 2e-06, "loss": 0.4513, "step": 708 }, { "epoch": 0.16448207864516878, "grad_norm": 30.74207575332384, "learning_rate": 2e-06, "loss": 0.3825, "step": 709 }, { "epoch": 0.16471407029346943, "grad_norm": 40.584780969108955, "learning_rate": 2e-06, "loss": 0.4184, "step": 710 }, { "epoch": 0.16494606194177008, "grad_norm": 47.0752374589824, "learning_rate": 2e-06, "loss": 0.3444, "step": 711 }, { "epoch": 0.16517805359007076, "grad_norm": 24.05198512196974, "learning_rate": 2e-06, "loss": 0.2746, "step": 712 }, { "epoch": 0.1654100452383714, "grad_norm": 20.96193342212991, "learning_rate": 2e-06, "loss": 0.3844, "step": 713 }, { "epoch": 0.1656420368866721, "grad_norm": 26.844108345388346, "learning_rate": 2e-06, "loss": 0.3303, "step": 714 }, { "epoch": 0.16587402853497274, "grad_norm": 18.361559812521087, "learning_rate": 2e-06, "loss": 0.2599, "step": 715 }, { "epoch": 0.1661060201832734, "grad_norm": 10.334556071049587, "learning_rate": 2e-06, "loss": 0.2415, "step": 716 }, { "epoch": 0.16633801183157407, "grad_norm": 17.466059203458997, "learning_rate": 2e-06, "loss": 0.2934, "step": 717 }, { "epoch": 0.16657000347987472, "grad_norm": 13.792496069247843, "learning_rate": 2e-06, "loss": 0.2913, "step": 718 }, { "epoch": 0.1668019951281754, "grad_norm": 27.344398206578028, "learning_rate": 2e-06, "loss": 0.5, "step": 719 }, { "epoch": 0.16703398677647605, "grad_norm": 16.570069610376084, "learning_rate": 2e-06, "loss": 0.3282, "step": 720 }, { "epoch": 0.1672659784247767, "grad_norm": 12.517016004539395, "learning_rate": 2e-06, "loss": 0.316, "step": 721 }, { "epoch": 0.16749797007307737, "grad_norm": 9.454686300920637, "learning_rate": 2e-06, "loss": 0.2546, "step": 722 }, { "epoch": 0.16772996172137802, "grad_norm": 17.013538405587067, "learning_rate": 2e-06, "loss": 0.3796, "step": 723 }, { "epoch": 0.1679619533696787, "grad_norm": 24.06500993181185, "learning_rate": 2e-06, "loss": 0.4782, "step": 724 }, { "epoch": 0.16819394501797935, "grad_norm": 21.754943523584142, "learning_rate": 2e-06, "loss": 0.4722, "step": 725 }, { "epoch": 0.16842593666628, "grad_norm": 19.33220164628588, "learning_rate": 2e-06, "loss": 0.3475, "step": 726 }, { "epoch": 0.16865792831458068, "grad_norm": 14.11612192602318, "learning_rate": 2e-06, "loss": 0.3717, "step": 727 }, { "epoch": 0.16888991996288133, "grad_norm": 20.07653914337054, "learning_rate": 2e-06, "loss": 0.3061, "step": 728 }, { "epoch": 0.169121911611182, "grad_norm": 15.694126220394503, "learning_rate": 2e-06, "loss": 0.3679, "step": 729 }, { "epoch": 0.16935390325948266, "grad_norm": 7.597244637446782, "learning_rate": 2e-06, "loss": 0.3023, "step": 730 }, { "epoch": 0.1695858949077833, "grad_norm": 9.960347483549528, "learning_rate": 2e-06, "loss": 0.3413, "step": 731 }, { "epoch": 0.16981788655608399, "grad_norm": 13.00232896611134, "learning_rate": 2e-06, "loss": 0.3, "step": 732 }, { "epoch": 0.17004987820438464, "grad_norm": 18.64285936948962, "learning_rate": 2e-06, "loss": 0.3768, "step": 733 }, { "epoch": 0.1702818698526853, "grad_norm": 19.56053782005133, "learning_rate": 2e-06, "loss": 0.3985, "step": 734 }, { "epoch": 0.17051386150098596, "grad_norm": 10.622675208141215, "learning_rate": 2e-06, "loss": 0.3562, "step": 735 }, { "epoch": 0.1707458531492866, "grad_norm": 15.804895543467035, "learning_rate": 2e-06, "loss": 0.3077, "step": 736 }, { "epoch": 0.1709778447975873, "grad_norm": 18.447008280828154, "learning_rate": 2e-06, "loss": 0.3354, "step": 737 }, { "epoch": 0.17120983644588794, "grad_norm": 14.621557018343863, "learning_rate": 2e-06, "loss": 0.459, "step": 738 }, { "epoch": 0.17144182809418862, "grad_norm": 8.763421231576094, "learning_rate": 2e-06, "loss": 0.2067, "step": 739 }, { "epoch": 0.17167381974248927, "grad_norm": 21.564285979581147, "learning_rate": 2e-06, "loss": 0.3686, "step": 740 }, { "epoch": 0.17190581139078992, "grad_norm": 15.462860742192579, "learning_rate": 2e-06, "loss": 0.2838, "step": 741 }, { "epoch": 0.1721378030390906, "grad_norm": 18.866472307797476, "learning_rate": 2e-06, "loss": 0.3532, "step": 742 }, { "epoch": 0.17236979468739125, "grad_norm": 11.27689988835768, "learning_rate": 2e-06, "loss": 0.3262, "step": 743 }, { "epoch": 0.17260178633569193, "grad_norm": 6.560862378907595, "learning_rate": 2e-06, "loss": 0.2374, "step": 744 }, { "epoch": 0.17283377798399258, "grad_norm": 17.22733586413075, "learning_rate": 2e-06, "loss": 0.3586, "step": 745 }, { "epoch": 0.17306576963229323, "grad_norm": 8.816541653445544, "learning_rate": 2e-06, "loss": 0.2539, "step": 746 }, { "epoch": 0.1732977612805939, "grad_norm": 9.561143157495499, "learning_rate": 2e-06, "loss": 0.3186, "step": 747 }, { "epoch": 0.17352975292889455, "grad_norm": 12.445921344090433, "learning_rate": 2e-06, "loss": 0.3107, "step": 748 }, { "epoch": 0.17376174457719523, "grad_norm": 14.75541877744063, "learning_rate": 2e-06, "loss": 0.3665, "step": 749 }, { "epoch": 0.17399373622549588, "grad_norm": 11.932681911379351, "learning_rate": 2e-06, "loss": 0.305, "step": 750 }, { "epoch": 0.17422572787379653, "grad_norm": 13.778515110830838, "learning_rate": 2e-06, "loss": 0.2846, "step": 751 }, { "epoch": 0.1744577195220972, "grad_norm": 17.98604309887815, "learning_rate": 2e-06, "loss": 0.3341, "step": 752 }, { "epoch": 0.17468971117039786, "grad_norm": 17.334206140348677, "learning_rate": 2e-06, "loss": 0.2994, "step": 753 }, { "epoch": 0.17492170281869854, "grad_norm": 17.67618987598563, "learning_rate": 2e-06, "loss": 0.4629, "step": 754 }, { "epoch": 0.1751536944669992, "grad_norm": 13.136167347637274, "learning_rate": 2e-06, "loss": 0.3304, "step": 755 }, { "epoch": 0.17538568611529984, "grad_norm": 8.952224830050373, "learning_rate": 2e-06, "loss": 0.3626, "step": 756 }, { "epoch": 0.17561767776360052, "grad_norm": 11.193654263857521, "learning_rate": 2e-06, "loss": 0.2494, "step": 757 }, { "epoch": 0.17584966941190117, "grad_norm": 22.320651074813423, "learning_rate": 2e-06, "loss": 0.3289, "step": 758 }, { "epoch": 0.17608166106020184, "grad_norm": 17.535712646014808, "learning_rate": 2e-06, "loss": 0.4499, "step": 759 }, { "epoch": 0.1763136527085025, "grad_norm": 12.733734517226111, "learning_rate": 2e-06, "loss": 0.3214, "step": 760 }, { "epoch": 0.17654564435680314, "grad_norm": 13.353201137951201, "learning_rate": 2e-06, "loss": 0.3699, "step": 761 }, { "epoch": 0.17677763600510382, "grad_norm": 9.201780284194042, "learning_rate": 2e-06, "loss": 0.3144, "step": 762 }, { "epoch": 0.17700962765340447, "grad_norm": 9.586095433749264, "learning_rate": 2e-06, "loss": 0.2671, "step": 763 }, { "epoch": 0.17724161930170515, "grad_norm": 18.8866400319037, "learning_rate": 2e-06, "loss": 0.3327, "step": 764 }, { "epoch": 0.1774736109500058, "grad_norm": 16.97686477233965, "learning_rate": 2e-06, "loss": 0.3186, "step": 765 }, { "epoch": 0.17770560259830645, "grad_norm": 23.109981923087553, "learning_rate": 2e-06, "loss": 0.3106, "step": 766 }, { "epoch": 0.17793759424660713, "grad_norm": 16.775430605152785, "learning_rate": 2e-06, "loss": 0.3196, "step": 767 }, { "epoch": 0.17816958589490778, "grad_norm": 11.87197374426931, "learning_rate": 2e-06, "loss": 0.2493, "step": 768 }, { "epoch": 0.17840157754320846, "grad_norm": 18.34787640158497, "learning_rate": 2e-06, "loss": 0.4318, "step": 769 }, { "epoch": 0.1786335691915091, "grad_norm": 12.462233834921067, "learning_rate": 2e-06, "loss": 0.3509, "step": 770 }, { "epoch": 0.17886556083980976, "grad_norm": 9.523701984608357, "learning_rate": 2e-06, "loss": 0.2247, "step": 771 }, { "epoch": 0.17909755248811043, "grad_norm": 20.77996685844607, "learning_rate": 2e-06, "loss": 0.3783, "step": 772 }, { "epoch": 0.17932954413641108, "grad_norm": 14.594333033140241, "learning_rate": 2e-06, "loss": 0.2588, "step": 773 }, { "epoch": 0.17956153578471176, "grad_norm": 11.82242238113082, "learning_rate": 2e-06, "loss": 0.2729, "step": 774 }, { "epoch": 0.1797935274330124, "grad_norm": 13.30174298015603, "learning_rate": 2e-06, "loss": 0.3338, "step": 775 }, { "epoch": 0.18002551908131306, "grad_norm": 21.963362480858713, "learning_rate": 2e-06, "loss": 0.4504, "step": 776 }, { "epoch": 0.18025751072961374, "grad_norm": 16.089942933076735, "learning_rate": 2e-06, "loss": 0.2697, "step": 777 }, { "epoch": 0.1804895023779144, "grad_norm": 22.34317850213985, "learning_rate": 2e-06, "loss": 0.3904, "step": 778 }, { "epoch": 0.18072149402621507, "grad_norm": 23.94242658386868, "learning_rate": 2e-06, "loss": 0.38, "step": 779 }, { "epoch": 0.18095348567451572, "grad_norm": 12.818302873757448, "learning_rate": 2e-06, "loss": 0.3119, "step": 780 }, { "epoch": 0.18118547732281637, "grad_norm": 15.137628022957086, "learning_rate": 2e-06, "loss": 0.3781, "step": 781 }, { "epoch": 0.18141746897111705, "grad_norm": 24.94966829807091, "learning_rate": 2e-06, "loss": 0.3605, "step": 782 }, { "epoch": 0.1816494606194177, "grad_norm": 13.102831529880358, "learning_rate": 2e-06, "loss": 0.2806, "step": 783 }, { "epoch": 0.18188145226771837, "grad_norm": 19.645424296337186, "learning_rate": 2e-06, "loss": 0.2957, "step": 784 }, { "epoch": 0.18211344391601902, "grad_norm": 23.164457029140216, "learning_rate": 2e-06, "loss": 0.3374, "step": 785 }, { "epoch": 0.18234543556431967, "grad_norm": 20.373224626818722, "learning_rate": 2e-06, "loss": 0.3663, "step": 786 }, { "epoch": 0.18257742721262035, "grad_norm": 22.147470638665496, "learning_rate": 2e-06, "loss": 0.4535, "step": 787 }, { "epoch": 0.182809418860921, "grad_norm": 15.781492413550604, "learning_rate": 2e-06, "loss": 0.3429, "step": 788 }, { "epoch": 0.18304141050922168, "grad_norm": 14.581736302069048, "learning_rate": 2e-06, "loss": 0.3106, "step": 789 }, { "epoch": 0.18327340215752233, "grad_norm": 20.75205079068296, "learning_rate": 2e-06, "loss": 0.2501, "step": 790 }, { "epoch": 0.18350539380582298, "grad_norm": 11.99955208209882, "learning_rate": 2e-06, "loss": 0.3079, "step": 791 }, { "epoch": 0.18373738545412366, "grad_norm": 23.105384860674214, "learning_rate": 2e-06, "loss": 0.3447, "step": 792 }, { "epoch": 0.1839693771024243, "grad_norm": 15.03266460995024, "learning_rate": 2e-06, "loss": 0.3274, "step": 793 }, { "epoch": 0.184201368750725, "grad_norm": 16.42107190060086, "learning_rate": 2e-06, "loss": 0.3123, "step": 794 }, { "epoch": 0.18443336039902564, "grad_norm": 10.80224803928146, "learning_rate": 2e-06, "loss": 0.2789, "step": 795 }, { "epoch": 0.1846653520473263, "grad_norm": 12.57701768453657, "learning_rate": 2e-06, "loss": 0.3366, "step": 796 }, { "epoch": 0.18489734369562696, "grad_norm": 18.119585754482213, "learning_rate": 2e-06, "loss": 0.3537, "step": 797 }, { "epoch": 0.18512933534392761, "grad_norm": 13.742042613570336, "learning_rate": 2e-06, "loss": 0.3081, "step": 798 }, { "epoch": 0.1853613269922283, "grad_norm": 12.840783424332322, "learning_rate": 2e-06, "loss": 0.2905, "step": 799 }, { "epoch": 0.18559331864052894, "grad_norm": 22.663177940728435, "learning_rate": 2e-06, "loss": 0.3788, "step": 800 }, { "epoch": 0.1858253102888296, "grad_norm": 9.773808862135011, "learning_rate": 2e-06, "loss": 0.3019, "step": 801 }, { "epoch": 0.18605730193713027, "grad_norm": 13.74093840790118, "learning_rate": 2e-06, "loss": 0.3438, "step": 802 }, { "epoch": 0.18628929358543092, "grad_norm": 10.157630882906007, "learning_rate": 2e-06, "loss": 0.3606, "step": 803 }, { "epoch": 0.1865212852337316, "grad_norm": 19.94680174631867, "learning_rate": 2e-06, "loss": 0.3932, "step": 804 }, { "epoch": 0.18675327688203225, "grad_norm": 19.62427920903281, "learning_rate": 2e-06, "loss": 0.411, "step": 805 }, { "epoch": 0.1869852685303329, "grad_norm": 20.749400909089836, "learning_rate": 2e-06, "loss": 0.3252, "step": 806 }, { "epoch": 0.18721726017863358, "grad_norm": 16.912302806170956, "learning_rate": 2e-06, "loss": 0.3226, "step": 807 }, { "epoch": 0.18744925182693423, "grad_norm": 7.836093031268887, "learning_rate": 2e-06, "loss": 0.3174, "step": 808 }, { "epoch": 0.1876812434752349, "grad_norm": 10.36121215740947, "learning_rate": 2e-06, "loss": 0.2763, "step": 809 }, { "epoch": 0.18791323512353555, "grad_norm": 11.951105686627765, "learning_rate": 2e-06, "loss": 0.2768, "step": 810 }, { "epoch": 0.1881452267718362, "grad_norm": 14.9237046497379, "learning_rate": 2e-06, "loss": 0.3761, "step": 811 }, { "epoch": 0.18837721842013688, "grad_norm": 14.974947243668653, "learning_rate": 2e-06, "loss": 0.3652, "step": 812 }, { "epoch": 0.18860921006843753, "grad_norm": 21.727343631884633, "learning_rate": 2e-06, "loss": 0.288, "step": 813 }, { "epoch": 0.1888412017167382, "grad_norm": 20.573896161218798, "learning_rate": 2e-06, "loss": 0.4486, "step": 814 }, { "epoch": 0.18907319336503886, "grad_norm": 16.25349741628844, "learning_rate": 2e-06, "loss": 0.2948, "step": 815 }, { "epoch": 0.1893051850133395, "grad_norm": 26.182205854500186, "learning_rate": 2e-06, "loss": 0.4274, "step": 816 }, { "epoch": 0.1895371766616402, "grad_norm": 17.041520476227614, "learning_rate": 2e-06, "loss": 0.2824, "step": 817 }, { "epoch": 0.18976916830994084, "grad_norm": 12.614284283681387, "learning_rate": 2e-06, "loss": 0.3327, "step": 818 }, { "epoch": 0.19000115995824152, "grad_norm": 21.138920185559407, "learning_rate": 2e-06, "loss": 0.2783, "step": 819 }, { "epoch": 0.19023315160654217, "grad_norm": 21.829436439600233, "learning_rate": 2e-06, "loss": 0.4277, "step": 820 }, { "epoch": 0.19046514325484282, "grad_norm": 13.577464451034475, "learning_rate": 2e-06, "loss": 0.3058, "step": 821 }, { "epoch": 0.1906971349031435, "grad_norm": 20.013660716589733, "learning_rate": 2e-06, "loss": 0.3382, "step": 822 }, { "epoch": 0.19092912655144414, "grad_norm": 22.59776772609933, "learning_rate": 2e-06, "loss": 0.365, "step": 823 }, { "epoch": 0.19116111819974482, "grad_norm": 14.284326073429842, "learning_rate": 2e-06, "loss": 0.2696, "step": 824 }, { "epoch": 0.19139310984804547, "grad_norm": 21.38410672676771, "learning_rate": 2e-06, "loss": 0.4893, "step": 825 }, { "epoch": 0.19162510149634612, "grad_norm": 15.799118469111308, "learning_rate": 2e-06, "loss": 0.3119, "step": 826 }, { "epoch": 0.1918570931446468, "grad_norm": 8.29295418290862, "learning_rate": 2e-06, "loss": 0.353, "step": 827 }, { "epoch": 0.19208908479294745, "grad_norm": 11.550631260812212, "learning_rate": 2e-06, "loss": 0.2806, "step": 828 }, { "epoch": 0.19232107644124813, "grad_norm": 15.316290042899775, "learning_rate": 2e-06, "loss": 0.3393, "step": 829 }, { "epoch": 0.19255306808954878, "grad_norm": 19.489646646108323, "learning_rate": 2e-06, "loss": 0.281, "step": 830 }, { "epoch": 0.19278505973784943, "grad_norm": 9.929271193196863, "learning_rate": 2e-06, "loss": 0.2407, "step": 831 }, { "epoch": 0.1930170513861501, "grad_norm": 14.739191818389179, "learning_rate": 2e-06, "loss": 0.4059, "step": 832 }, { "epoch": 0.19324904303445076, "grad_norm": 15.977615302695332, "learning_rate": 2e-06, "loss": 0.3081, "step": 833 }, { "epoch": 0.1934810346827514, "grad_norm": 17.21606409393626, "learning_rate": 2e-06, "loss": 0.2522, "step": 834 }, { "epoch": 0.19371302633105209, "grad_norm": 10.464523778961931, "learning_rate": 2e-06, "loss": 0.3314, "step": 835 }, { "epoch": 0.19394501797935274, "grad_norm": 21.76286939880773, "learning_rate": 2e-06, "loss": 0.2913, "step": 836 }, { "epoch": 0.1941770096276534, "grad_norm": 19.049786854010637, "learning_rate": 2e-06, "loss": 0.3758, "step": 837 }, { "epoch": 0.19440900127595406, "grad_norm": 11.044066209039185, "learning_rate": 2e-06, "loss": 0.2418, "step": 838 }, { "epoch": 0.1946409929242547, "grad_norm": 8.866011503268515, "learning_rate": 2e-06, "loss": 0.3765, "step": 839 }, { "epoch": 0.1948729845725554, "grad_norm": 13.395474628389369, "learning_rate": 2e-06, "loss": 0.2689, "step": 840 }, { "epoch": 0.19510497622085604, "grad_norm": 23.036725052626185, "learning_rate": 2e-06, "loss": 0.32, "step": 841 }, { "epoch": 0.19533696786915672, "grad_norm": 15.691738893928159, "learning_rate": 2e-06, "loss": 0.4299, "step": 842 }, { "epoch": 0.19556895951745737, "grad_norm": 18.60561924491579, "learning_rate": 2e-06, "loss": 0.3365, "step": 843 }, { "epoch": 0.19580095116575802, "grad_norm": 23.651757254556525, "learning_rate": 2e-06, "loss": 0.3196, "step": 844 }, { "epoch": 0.1960329428140587, "grad_norm": 11.371947892209338, "learning_rate": 2e-06, "loss": 0.288, "step": 845 }, { "epoch": 0.19626493446235935, "grad_norm": 10.090779614062537, "learning_rate": 2e-06, "loss": 0.3127, "step": 846 }, { "epoch": 0.19649692611066003, "grad_norm": 17.306165471527102, "learning_rate": 2e-06, "loss": 0.3371, "step": 847 }, { "epoch": 0.19672891775896068, "grad_norm": 17.408658612319552, "learning_rate": 2e-06, "loss": 0.3374, "step": 848 }, { "epoch": 0.19696090940726133, "grad_norm": 31.10483274697328, "learning_rate": 2e-06, "loss": 0.419, "step": 849 }, { "epoch": 0.197192901055562, "grad_norm": 13.739629867076612, "learning_rate": 2e-06, "loss": 0.4226, "step": 850 }, { "epoch": 0.19742489270386265, "grad_norm": 18.10650904524103, "learning_rate": 2e-06, "loss": 0.4111, "step": 851 }, { "epoch": 0.19765688435216333, "grad_norm": 9.724254659000593, "learning_rate": 2e-06, "loss": 0.2907, "step": 852 }, { "epoch": 0.19788887600046398, "grad_norm": 11.70840979887669, "learning_rate": 2e-06, "loss": 0.3338, "step": 853 }, { "epoch": 0.19812086764876463, "grad_norm": 18.223636670611263, "learning_rate": 2e-06, "loss": 0.3297, "step": 854 }, { "epoch": 0.1983528592970653, "grad_norm": 19.969087791024695, "learning_rate": 2e-06, "loss": 0.3981, "step": 855 }, { "epoch": 0.19858485094536596, "grad_norm": 24.668935395200606, "learning_rate": 2e-06, "loss": 0.4325, "step": 856 }, { "epoch": 0.19881684259366664, "grad_norm": 8.569207714004078, "learning_rate": 2e-06, "loss": 0.2341, "step": 857 }, { "epoch": 0.1990488342419673, "grad_norm": 18.141428991522456, "learning_rate": 2e-06, "loss": 0.3797, "step": 858 }, { "epoch": 0.19928082589026794, "grad_norm": 13.83844732174976, "learning_rate": 2e-06, "loss": 0.3004, "step": 859 }, { "epoch": 0.19951281753856862, "grad_norm": 7.570402229629171, "learning_rate": 2e-06, "loss": 0.2659, "step": 860 }, { "epoch": 0.19974480918686927, "grad_norm": 19.151243048179083, "learning_rate": 2e-06, "loss": 0.3457, "step": 861 }, { "epoch": 0.19997680083516994, "grad_norm": 9.023686026024441, "learning_rate": 2e-06, "loss": 0.2003, "step": 862 }, { "epoch": 0.2002087924834706, "grad_norm": 10.652610316585804, "learning_rate": 2e-06, "loss": 0.2778, "step": 863 }, { "epoch": 0.20044078413177124, "grad_norm": 21.104189125544075, "learning_rate": 2e-06, "loss": 0.3277, "step": 864 }, { "epoch": 0.20067277578007192, "grad_norm": 17.871109485100916, "learning_rate": 2e-06, "loss": 0.3817, "step": 865 }, { "epoch": 0.20090476742837257, "grad_norm": 10.595302363200277, "learning_rate": 2e-06, "loss": 0.3649, "step": 866 }, { "epoch": 0.20113675907667325, "grad_norm": 18.58896905743985, "learning_rate": 2e-06, "loss": 0.2794, "step": 867 }, { "epoch": 0.2013687507249739, "grad_norm": 8.56618420358794, "learning_rate": 2e-06, "loss": 0.332, "step": 868 }, { "epoch": 0.20160074237327455, "grad_norm": 7.871954271104144, "learning_rate": 2e-06, "loss": 0.3256, "step": 869 }, { "epoch": 0.20183273402157523, "grad_norm": 14.482295828257122, "learning_rate": 2e-06, "loss": 0.3513, "step": 870 }, { "epoch": 0.20206472566987588, "grad_norm": 15.59436837133702, "learning_rate": 2e-06, "loss": 0.4059, "step": 871 }, { "epoch": 0.20229671731817656, "grad_norm": 11.695894552841496, "learning_rate": 2e-06, "loss": 0.271, "step": 872 }, { "epoch": 0.2025287089664772, "grad_norm": 9.279739517381302, "learning_rate": 2e-06, "loss": 0.3572, "step": 873 }, { "epoch": 0.20276070061477786, "grad_norm": 9.436327949040212, "learning_rate": 2e-06, "loss": 0.3279, "step": 874 }, { "epoch": 0.20299269226307853, "grad_norm": 14.37356404419197, "learning_rate": 2e-06, "loss": 0.3537, "step": 875 }, { "epoch": 0.20322468391137918, "grad_norm": 10.460306167068557, "learning_rate": 2e-06, "loss": 0.3193, "step": 876 }, { "epoch": 0.20345667555967986, "grad_norm": 9.957388619811127, "learning_rate": 2e-06, "loss": 0.2464, "step": 877 }, { "epoch": 0.2036886672079805, "grad_norm": 14.043925791536124, "learning_rate": 2e-06, "loss": 0.3229, "step": 878 }, { "epoch": 0.20392065885628116, "grad_norm": 10.916638475637072, "learning_rate": 2e-06, "loss": 0.3094, "step": 879 }, { "epoch": 0.20415265050458184, "grad_norm": 14.207690648062343, "learning_rate": 2e-06, "loss": 0.2728, "step": 880 }, { "epoch": 0.2043846421528825, "grad_norm": 29.774947857721138, "learning_rate": 2e-06, "loss": 0.4734, "step": 881 }, { "epoch": 0.20461663380118317, "grad_norm": 12.886953112164875, "learning_rate": 2e-06, "loss": 0.2808, "step": 882 }, { "epoch": 0.20484862544948382, "grad_norm": 54.83652013490153, "learning_rate": 2e-06, "loss": 0.4345, "step": 883 }, { "epoch": 0.20508061709778447, "grad_norm": 13.836004473480973, "learning_rate": 2e-06, "loss": 0.2673, "step": 884 }, { "epoch": 0.20531260874608515, "grad_norm": 16.236716597033208, "learning_rate": 2e-06, "loss": 0.3773, "step": 885 }, { "epoch": 0.2055446003943858, "grad_norm": 9.584331646818612, "learning_rate": 2e-06, "loss": 0.2832, "step": 886 }, { "epoch": 0.20577659204268647, "grad_norm": 14.986402451734643, "learning_rate": 2e-06, "loss": 0.3277, "step": 887 }, { "epoch": 0.20600858369098712, "grad_norm": 17.0253000035854, "learning_rate": 2e-06, "loss": 0.3905, "step": 888 }, { "epoch": 0.20624057533928777, "grad_norm": 16.450631095420082, "learning_rate": 2e-06, "loss": 0.4094, "step": 889 }, { "epoch": 0.20647256698758845, "grad_norm": 18.750612286588492, "learning_rate": 2e-06, "loss": 0.3131, "step": 890 }, { "epoch": 0.2067045586358891, "grad_norm": 18.73328993359991, "learning_rate": 2e-06, "loss": 0.3061, "step": 891 }, { "epoch": 0.20693655028418978, "grad_norm": 12.70523808816334, "learning_rate": 2e-06, "loss": 0.282, "step": 892 }, { "epoch": 0.20716854193249043, "grad_norm": 19.8824827414663, "learning_rate": 2e-06, "loss": 0.2741, "step": 893 }, { "epoch": 0.20740053358079108, "grad_norm": 17.324392469861575, "learning_rate": 2e-06, "loss": 0.2701, "step": 894 }, { "epoch": 0.20763252522909176, "grad_norm": 13.78937251217679, "learning_rate": 2e-06, "loss": 0.328, "step": 895 }, { "epoch": 0.2078645168773924, "grad_norm": 16.027091968408882, "learning_rate": 2e-06, "loss": 0.3655, "step": 896 }, { "epoch": 0.20809650852569309, "grad_norm": 7.60913786100639, "learning_rate": 2e-06, "loss": 0.1704, "step": 897 }, { "epoch": 0.20832850017399374, "grad_norm": 28.677874376982054, "learning_rate": 2e-06, "loss": 0.4329, "step": 898 }, { "epoch": 0.20856049182229439, "grad_norm": 8.221011139573621, "learning_rate": 2e-06, "loss": 0.2507, "step": 899 }, { "epoch": 0.20879248347059506, "grad_norm": 11.4744661875519, "learning_rate": 2e-06, "loss": 0.3259, "step": 900 }, { "epoch": 0.20902447511889571, "grad_norm": 15.294883355532624, "learning_rate": 2e-06, "loss": 0.3727, "step": 901 }, { "epoch": 0.2092564667671964, "grad_norm": 19.055402797561403, "learning_rate": 2e-06, "loss": 0.303, "step": 902 }, { "epoch": 0.20948845841549704, "grad_norm": 23.698316035534873, "learning_rate": 2e-06, "loss": 0.3667, "step": 903 }, { "epoch": 0.2097204500637977, "grad_norm": 22.8054584811822, "learning_rate": 2e-06, "loss": 0.3585, "step": 904 }, { "epoch": 0.20995244171209837, "grad_norm": 20.083038658083378, "learning_rate": 2e-06, "loss": 0.3192, "step": 905 }, { "epoch": 0.21018443336039902, "grad_norm": 22.584586110449916, "learning_rate": 2e-06, "loss": 0.4078, "step": 906 }, { "epoch": 0.2104164250086997, "grad_norm": 15.037422485453048, "learning_rate": 2e-06, "loss": 0.2889, "step": 907 }, { "epoch": 0.21064841665700035, "grad_norm": 20.203868005835083, "learning_rate": 2e-06, "loss": 0.448, "step": 908 }, { "epoch": 0.210880408305301, "grad_norm": 15.16465590647367, "learning_rate": 2e-06, "loss": 0.3637, "step": 909 }, { "epoch": 0.21111239995360168, "grad_norm": 5.2078995129577805, "learning_rate": 2e-06, "loss": 0.1984, "step": 910 }, { "epoch": 0.21134439160190233, "grad_norm": 17.776999304081468, "learning_rate": 2e-06, "loss": 0.4419, "step": 911 }, { "epoch": 0.211576383250203, "grad_norm": 20.801335368698737, "learning_rate": 2e-06, "loss": 0.3527, "step": 912 }, { "epoch": 0.21180837489850365, "grad_norm": 12.89318139470534, "learning_rate": 2e-06, "loss": 0.2925, "step": 913 }, { "epoch": 0.2120403665468043, "grad_norm": 13.279124604676984, "learning_rate": 2e-06, "loss": 0.2692, "step": 914 }, { "epoch": 0.21227235819510498, "grad_norm": 31.361102208165416, "learning_rate": 2e-06, "loss": 0.4164, "step": 915 }, { "epoch": 0.21250434984340563, "grad_norm": 14.926166059180384, "learning_rate": 2e-06, "loss": 0.2819, "step": 916 }, { "epoch": 0.2127363414917063, "grad_norm": 9.505395111229008, "learning_rate": 2e-06, "loss": 0.3, "step": 917 }, { "epoch": 0.21296833314000696, "grad_norm": 18.19667325838775, "learning_rate": 2e-06, "loss": 0.2777, "step": 918 }, { "epoch": 0.2132003247883076, "grad_norm": 34.244764055740525, "learning_rate": 2e-06, "loss": 0.2731, "step": 919 }, { "epoch": 0.2134323164366083, "grad_norm": 13.827735412000134, "learning_rate": 2e-06, "loss": 0.312, "step": 920 }, { "epoch": 0.21366430808490894, "grad_norm": 20.705363084487367, "learning_rate": 2e-06, "loss": 0.3044, "step": 921 }, { "epoch": 0.21389629973320962, "grad_norm": 23.492053633413406, "learning_rate": 2e-06, "loss": 0.3717, "step": 922 }, { "epoch": 0.21412829138151027, "grad_norm": 17.796209396317078, "learning_rate": 2e-06, "loss": 0.3501, "step": 923 }, { "epoch": 0.21436028302981092, "grad_norm": 12.432070649047894, "learning_rate": 2e-06, "loss": 0.2967, "step": 924 }, { "epoch": 0.2145922746781116, "grad_norm": 7.377036669491922, "learning_rate": 2e-06, "loss": 0.2317, "step": 925 }, { "epoch": 0.21482426632641224, "grad_norm": 9.763829157513072, "learning_rate": 2e-06, "loss": 0.3069, "step": 926 }, { "epoch": 0.21505625797471292, "grad_norm": 14.067087869685949, "learning_rate": 2e-06, "loss": 0.3761, "step": 927 }, { "epoch": 0.21528824962301357, "grad_norm": 18.626123583576774, "learning_rate": 2e-06, "loss": 0.2611, "step": 928 }, { "epoch": 0.21552024127131422, "grad_norm": 13.482003556824175, "learning_rate": 2e-06, "loss": 0.3761, "step": 929 }, { "epoch": 0.2157522329196149, "grad_norm": 13.154857034380507, "learning_rate": 2e-06, "loss": 0.2722, "step": 930 }, { "epoch": 0.21598422456791555, "grad_norm": 31.45359846464405, "learning_rate": 2e-06, "loss": 0.3893, "step": 931 }, { "epoch": 0.21621621621621623, "grad_norm": 17.454676536343737, "learning_rate": 2e-06, "loss": 0.3429, "step": 932 }, { "epoch": 0.21644820786451688, "grad_norm": 13.781514708400756, "learning_rate": 2e-06, "loss": 0.3244, "step": 933 }, { "epoch": 0.21668019951281753, "grad_norm": 17.4126190084326, "learning_rate": 2e-06, "loss": 0.3813, "step": 934 }, { "epoch": 0.2169121911611182, "grad_norm": 18.19944774510841, "learning_rate": 2e-06, "loss": 0.3342, "step": 935 }, { "epoch": 0.21714418280941886, "grad_norm": 18.326170459536367, "learning_rate": 2e-06, "loss": 0.3103, "step": 936 }, { "epoch": 0.21737617445771953, "grad_norm": 10.250425954162694, "learning_rate": 2e-06, "loss": 0.3748, "step": 937 }, { "epoch": 0.21760816610602018, "grad_norm": 16.066598951701888, "learning_rate": 2e-06, "loss": 0.4068, "step": 938 }, { "epoch": 0.21784015775432083, "grad_norm": 10.189922232130474, "learning_rate": 2e-06, "loss": 0.3328, "step": 939 }, { "epoch": 0.2180721494026215, "grad_norm": 11.083488151842678, "learning_rate": 2e-06, "loss": 0.3137, "step": 940 }, { "epoch": 0.21830414105092216, "grad_norm": 15.80352256233468, "learning_rate": 2e-06, "loss": 0.417, "step": 941 }, { "epoch": 0.21853613269922284, "grad_norm": 10.397398946874906, "learning_rate": 2e-06, "loss": 0.258, "step": 942 }, { "epoch": 0.2187681243475235, "grad_norm": 6.2699124866115605, "learning_rate": 2e-06, "loss": 0.2945, "step": 943 }, { "epoch": 0.21900011599582414, "grad_norm": 23.96778950909715, "learning_rate": 2e-06, "loss": 0.3493, "step": 944 }, { "epoch": 0.21923210764412482, "grad_norm": 12.741819491680975, "learning_rate": 2e-06, "loss": 0.3068, "step": 945 }, { "epoch": 0.21946409929242547, "grad_norm": 22.214441345100916, "learning_rate": 2e-06, "loss": 0.306, "step": 946 }, { "epoch": 0.21969609094072615, "grad_norm": 12.91661021107639, "learning_rate": 2e-06, "loss": 0.2215, "step": 947 }, { "epoch": 0.2199280825890268, "grad_norm": 15.890385515844484, "learning_rate": 2e-06, "loss": 0.3135, "step": 948 }, { "epoch": 0.22016007423732745, "grad_norm": 17.44548027799827, "learning_rate": 2e-06, "loss": 0.4428, "step": 949 }, { "epoch": 0.22039206588562812, "grad_norm": 29.612385060726385, "learning_rate": 2e-06, "loss": 0.4595, "step": 950 }, { "epoch": 0.22062405753392877, "grad_norm": 14.192682314656473, "learning_rate": 2e-06, "loss": 0.3099, "step": 951 }, { "epoch": 0.22085604918222945, "grad_norm": 22.44367441783778, "learning_rate": 2e-06, "loss": 0.4008, "step": 952 }, { "epoch": 0.2210880408305301, "grad_norm": 16.51988527879832, "learning_rate": 2e-06, "loss": 0.3694, "step": 953 }, { "epoch": 0.22132003247883075, "grad_norm": 13.145552437440195, "learning_rate": 2e-06, "loss": 0.2829, "step": 954 }, { "epoch": 0.22155202412713143, "grad_norm": 17.045976637894437, "learning_rate": 2e-06, "loss": 0.3363, "step": 955 }, { "epoch": 0.22178401577543208, "grad_norm": 7.152755972453679, "learning_rate": 2e-06, "loss": 0.2048, "step": 956 }, { "epoch": 0.22201600742373276, "grad_norm": 14.476364773777387, "learning_rate": 2e-06, "loss": 0.3139, "step": 957 }, { "epoch": 0.2222479990720334, "grad_norm": 20.17448656913104, "learning_rate": 2e-06, "loss": 0.4056, "step": 958 }, { "epoch": 0.22247999072033406, "grad_norm": 17.447954517156088, "learning_rate": 2e-06, "loss": 0.3249, "step": 959 }, { "epoch": 0.22271198236863474, "grad_norm": 16.47574513287585, "learning_rate": 2e-06, "loss": 0.3578, "step": 960 }, { "epoch": 0.2229439740169354, "grad_norm": 18.833569055388487, "learning_rate": 2e-06, "loss": 0.2698, "step": 961 }, { "epoch": 0.22317596566523606, "grad_norm": 12.878810264947404, "learning_rate": 2e-06, "loss": 0.322, "step": 962 }, { "epoch": 0.22340795731353671, "grad_norm": 9.754848029157317, "learning_rate": 2e-06, "loss": 0.3028, "step": 963 }, { "epoch": 0.22363994896183736, "grad_norm": 10.458699501656957, "learning_rate": 2e-06, "loss": 0.2659, "step": 964 }, { "epoch": 0.22387194061013804, "grad_norm": 9.213002126694942, "learning_rate": 2e-06, "loss": 0.2413, "step": 965 }, { "epoch": 0.2241039322584387, "grad_norm": 22.77852040491952, "learning_rate": 2e-06, "loss": 0.3402, "step": 966 }, { "epoch": 0.22433592390673937, "grad_norm": 19.97429495238275, "learning_rate": 2e-06, "loss": 0.3356, "step": 967 }, { "epoch": 0.22456791555504002, "grad_norm": 22.725310459439754, "learning_rate": 2e-06, "loss": 0.3688, "step": 968 }, { "epoch": 0.22479990720334067, "grad_norm": 15.880456669293403, "learning_rate": 2e-06, "loss": 0.2583, "step": 969 }, { "epoch": 0.22503189885164135, "grad_norm": 12.562977799213726, "learning_rate": 2e-06, "loss": 0.3211, "step": 970 }, { "epoch": 0.225263890499942, "grad_norm": 38.52385320084602, "learning_rate": 2e-06, "loss": 0.4099, "step": 971 }, { "epoch": 0.22549588214824268, "grad_norm": 19.568782203246148, "learning_rate": 2e-06, "loss": 0.3392, "step": 972 }, { "epoch": 0.22572787379654333, "grad_norm": 22.75231297936888, "learning_rate": 2e-06, "loss": 0.3981, "step": 973 }, { "epoch": 0.22595986544484398, "grad_norm": 23.093165664624546, "learning_rate": 2e-06, "loss": 0.4342, "step": 974 }, { "epoch": 0.22619185709314465, "grad_norm": 21.85862109103549, "learning_rate": 2e-06, "loss": 0.3366, "step": 975 }, { "epoch": 0.2264238487414453, "grad_norm": 15.783198583306087, "learning_rate": 2e-06, "loss": 0.3545, "step": 976 }, { "epoch": 0.22665584038974598, "grad_norm": 20.00115524370116, "learning_rate": 2e-06, "loss": 0.419, "step": 977 }, { "epoch": 0.22688783203804663, "grad_norm": 17.13575397262277, "learning_rate": 2e-06, "loss": 0.4164, "step": 978 }, { "epoch": 0.22711982368634728, "grad_norm": 20.66236442506612, "learning_rate": 2e-06, "loss": 0.3921, "step": 979 }, { "epoch": 0.22735181533464796, "grad_norm": 26.425130152598022, "learning_rate": 2e-06, "loss": 0.4785, "step": 980 }, { "epoch": 0.2275838069829486, "grad_norm": 18.382125908922546, "learning_rate": 2e-06, "loss": 0.3235, "step": 981 }, { "epoch": 0.2278157986312493, "grad_norm": 22.287573999128128, "learning_rate": 2e-06, "loss": 0.3582, "step": 982 }, { "epoch": 0.22804779027954994, "grad_norm": 15.318127091663474, "learning_rate": 2e-06, "loss": 0.3105, "step": 983 }, { "epoch": 0.2282797819278506, "grad_norm": 15.2128289752655, "learning_rate": 2e-06, "loss": 0.2805, "step": 984 }, { "epoch": 0.22851177357615127, "grad_norm": 13.6362169257321, "learning_rate": 2e-06, "loss": 0.3603, "step": 985 }, { "epoch": 0.22874376522445192, "grad_norm": 19.427830286160788, "learning_rate": 2e-06, "loss": 0.3149, "step": 986 }, { "epoch": 0.22897575687275257, "grad_norm": 10.236059122445331, "learning_rate": 2e-06, "loss": 0.341, "step": 987 }, { "epoch": 0.22920774852105325, "grad_norm": 20.821455151022416, "learning_rate": 2e-06, "loss": 0.4091, "step": 988 }, { "epoch": 0.2294397401693539, "grad_norm": 45.94263705822538, "learning_rate": 2e-06, "loss": 0.2858, "step": 989 }, { "epoch": 0.22967173181765457, "grad_norm": 10.338890421609332, "learning_rate": 2e-06, "loss": 0.378, "step": 990 }, { "epoch": 0.22990372346595522, "grad_norm": 16.299094568182728, "learning_rate": 2e-06, "loss": 0.3556, "step": 991 }, { "epoch": 0.23013571511425587, "grad_norm": 26.428391344147883, "learning_rate": 2e-06, "loss": 0.4697, "step": 992 }, { "epoch": 0.23036770676255655, "grad_norm": 13.437299905823535, "learning_rate": 2e-06, "loss": 0.3016, "step": 993 }, { "epoch": 0.2305996984108572, "grad_norm": 12.436162205821939, "learning_rate": 2e-06, "loss": 0.3067, "step": 994 }, { "epoch": 0.23083169005915788, "grad_norm": 9.291303336441867, "learning_rate": 2e-06, "loss": 0.2107, "step": 995 }, { "epoch": 0.23106368170745853, "grad_norm": 18.667844626673965, "learning_rate": 2e-06, "loss": 0.277, "step": 996 }, { "epoch": 0.23129567335575918, "grad_norm": 10.092912796644061, "learning_rate": 2e-06, "loss": 0.3074, "step": 997 }, { "epoch": 0.23152766500405986, "grad_norm": 9.506478549775954, "learning_rate": 2e-06, "loss": 0.213, "step": 998 }, { "epoch": 0.2317596566523605, "grad_norm": 9.16061150676762, "learning_rate": 2e-06, "loss": 0.2959, "step": 999 }, { "epoch": 0.23199164830066119, "grad_norm": 13.513819503869733, "learning_rate": 2e-06, "loss": 0.3028, "step": 1000 }, { "epoch": 0.23222363994896184, "grad_norm": 12.741939938596376, "learning_rate": 2e-06, "loss": 0.3136, "step": 1001 }, { "epoch": 0.23245563159726249, "grad_norm": 19.698673117365807, "learning_rate": 2e-06, "loss": 0.3715, "step": 1002 }, { "epoch": 0.23268762324556316, "grad_norm": 19.48596566945166, "learning_rate": 2e-06, "loss": 0.3778, "step": 1003 }, { "epoch": 0.2329196148938638, "grad_norm": 13.556562374482986, "learning_rate": 2e-06, "loss": 0.3665, "step": 1004 }, { "epoch": 0.2331516065421645, "grad_norm": 9.87402518242158, "learning_rate": 2e-06, "loss": 0.3285, "step": 1005 }, { "epoch": 0.23338359819046514, "grad_norm": 13.973925681529213, "learning_rate": 2e-06, "loss": 0.3313, "step": 1006 }, { "epoch": 0.2336155898387658, "grad_norm": 12.270413839177959, "learning_rate": 2e-06, "loss": 0.2936, "step": 1007 }, { "epoch": 0.23384758148706647, "grad_norm": 10.414829485394955, "learning_rate": 2e-06, "loss": 0.3295, "step": 1008 }, { "epoch": 0.23407957313536712, "grad_norm": 8.359953878679415, "learning_rate": 2e-06, "loss": 0.2915, "step": 1009 }, { "epoch": 0.2343115647836678, "grad_norm": 21.553315281917737, "learning_rate": 2e-06, "loss": 0.244, "step": 1010 }, { "epoch": 0.23454355643196845, "grad_norm": 15.282341825830445, "learning_rate": 2e-06, "loss": 0.364, "step": 1011 }, { "epoch": 0.2347755480802691, "grad_norm": 11.105284562316186, "learning_rate": 2e-06, "loss": 0.3318, "step": 1012 }, { "epoch": 0.23500753972856978, "grad_norm": 26.70366182684123, "learning_rate": 2e-06, "loss": 0.4745, "step": 1013 }, { "epoch": 0.23523953137687043, "grad_norm": 13.969126715794628, "learning_rate": 2e-06, "loss": 0.3891, "step": 1014 }, { "epoch": 0.2354715230251711, "grad_norm": 7.149208235386196, "learning_rate": 2e-06, "loss": 0.2255, "step": 1015 }, { "epoch": 0.23570351467347175, "grad_norm": 9.282816481833471, "learning_rate": 2e-06, "loss": 0.2991, "step": 1016 }, { "epoch": 0.2359355063217724, "grad_norm": 11.997722779384231, "learning_rate": 2e-06, "loss": 0.3355, "step": 1017 }, { "epoch": 0.23616749797007308, "grad_norm": 7.261743927913489, "learning_rate": 2e-06, "loss": 0.2568, "step": 1018 }, { "epoch": 0.23639948961837373, "grad_norm": 9.285412136288146, "learning_rate": 2e-06, "loss": 0.2552, "step": 1019 }, { "epoch": 0.2366314812666744, "grad_norm": 14.550212842325665, "learning_rate": 2e-06, "loss": 0.2656, "step": 1020 }, { "epoch": 0.23686347291497506, "grad_norm": 13.672680558038127, "learning_rate": 2e-06, "loss": 0.3041, "step": 1021 }, { "epoch": 0.2370954645632757, "grad_norm": 13.175353642289746, "learning_rate": 2e-06, "loss": 0.2793, "step": 1022 }, { "epoch": 0.2373274562115764, "grad_norm": 14.857712932971321, "learning_rate": 2e-06, "loss": 0.2785, "step": 1023 }, { "epoch": 0.23755944785987704, "grad_norm": 14.940824776770855, "learning_rate": 2e-06, "loss": 0.2517, "step": 1024 }, { "epoch": 0.23779143950817772, "grad_norm": 27.551149012916042, "learning_rate": 2e-06, "loss": 0.412, "step": 1025 }, { "epoch": 0.23802343115647837, "grad_norm": 13.21079485337451, "learning_rate": 2e-06, "loss": 0.2841, "step": 1026 }, { "epoch": 0.23825542280477902, "grad_norm": 8.073937246007285, "learning_rate": 2e-06, "loss": 0.4117, "step": 1027 }, { "epoch": 0.2384874144530797, "grad_norm": 15.211564630308953, "learning_rate": 2e-06, "loss": 0.3509, "step": 1028 }, { "epoch": 0.23871940610138034, "grad_norm": 6.753994429637856, "learning_rate": 2e-06, "loss": 0.2745, "step": 1029 }, { "epoch": 0.23895139774968102, "grad_norm": 16.579544298722894, "learning_rate": 2e-06, "loss": 0.3769, "step": 1030 }, { "epoch": 0.23918338939798167, "grad_norm": 12.312689236209662, "learning_rate": 2e-06, "loss": 0.2979, "step": 1031 }, { "epoch": 0.23941538104628232, "grad_norm": 22.961333721387835, "learning_rate": 2e-06, "loss": 0.2667, "step": 1032 }, { "epoch": 0.239647372694583, "grad_norm": 26.38181638368504, "learning_rate": 2e-06, "loss": 0.2407, "step": 1033 }, { "epoch": 0.23987936434288365, "grad_norm": 21.122533742687466, "learning_rate": 2e-06, "loss": 0.324, "step": 1034 }, { "epoch": 0.24011135599118433, "grad_norm": 9.385543716224886, "learning_rate": 2e-06, "loss": 0.2322, "step": 1035 }, { "epoch": 0.24034334763948498, "grad_norm": 22.411151234064008, "learning_rate": 2e-06, "loss": 0.3298, "step": 1036 }, { "epoch": 0.24057533928778563, "grad_norm": 8.712554848424903, "learning_rate": 2e-06, "loss": 0.2662, "step": 1037 }, { "epoch": 0.2408073309360863, "grad_norm": 23.670062322774925, "learning_rate": 2e-06, "loss": 0.3704, "step": 1038 }, { "epoch": 0.24103932258438696, "grad_norm": 14.416551486323577, "learning_rate": 2e-06, "loss": 0.339, "step": 1039 }, { "epoch": 0.24127131423268763, "grad_norm": 11.777740161029039, "learning_rate": 2e-06, "loss": 0.307, "step": 1040 }, { "epoch": 0.24150330588098828, "grad_norm": 16.333864517062604, "learning_rate": 2e-06, "loss": 0.3238, "step": 1041 }, { "epoch": 0.24173529752928893, "grad_norm": 16.200709138501136, "learning_rate": 2e-06, "loss": 0.3817, "step": 1042 }, { "epoch": 0.2419672891775896, "grad_norm": 16.75392028228238, "learning_rate": 2e-06, "loss": 0.3657, "step": 1043 }, { "epoch": 0.24219928082589026, "grad_norm": 16.7005437223624, "learning_rate": 2e-06, "loss": 0.2499, "step": 1044 }, { "epoch": 0.24243127247419094, "grad_norm": 8.676317977311099, "learning_rate": 2e-06, "loss": 0.3238, "step": 1045 }, { "epoch": 0.2426632641224916, "grad_norm": 25.531733213211815, "learning_rate": 2e-06, "loss": 0.3677, "step": 1046 }, { "epoch": 0.24289525577079224, "grad_norm": 15.53633794877907, "learning_rate": 2e-06, "loss": 0.3253, "step": 1047 }, { "epoch": 0.24312724741909292, "grad_norm": 19.81381137188102, "learning_rate": 2e-06, "loss": 0.3648, "step": 1048 }, { "epoch": 0.24335923906739357, "grad_norm": 12.25975324260386, "learning_rate": 2e-06, "loss": 0.1973, "step": 1049 }, { "epoch": 0.24359123071569425, "grad_norm": 14.00865679850547, "learning_rate": 2e-06, "loss": 0.3152, "step": 1050 }, { "epoch": 0.2438232223639949, "grad_norm": 12.392977697215564, "learning_rate": 2e-06, "loss": 0.3584, "step": 1051 }, { "epoch": 0.24405521401229555, "grad_norm": 13.265639482781935, "learning_rate": 2e-06, "loss": 0.3614, "step": 1052 }, { "epoch": 0.24428720566059622, "grad_norm": 12.65724022381264, "learning_rate": 2e-06, "loss": 0.2673, "step": 1053 }, { "epoch": 0.24451919730889687, "grad_norm": 19.42837132743988, "learning_rate": 2e-06, "loss": 0.3473, "step": 1054 }, { "epoch": 0.24475118895719755, "grad_norm": 14.077878346698146, "learning_rate": 2e-06, "loss": 0.3495, "step": 1055 }, { "epoch": 0.2449831806054982, "grad_norm": 14.184080625576248, "learning_rate": 2e-06, "loss": 0.2904, "step": 1056 }, { "epoch": 0.24521517225379885, "grad_norm": 14.88015498904286, "learning_rate": 2e-06, "loss": 0.2611, "step": 1057 }, { "epoch": 0.24544716390209953, "grad_norm": 18.393845134928288, "learning_rate": 2e-06, "loss": 0.3404, "step": 1058 }, { "epoch": 0.24567915555040018, "grad_norm": 12.814313086491294, "learning_rate": 2e-06, "loss": 0.3263, "step": 1059 }, { "epoch": 0.24591114719870086, "grad_norm": 10.02474449009755, "learning_rate": 2e-06, "loss": 0.1984, "step": 1060 }, { "epoch": 0.2461431388470015, "grad_norm": 22.079614637832556, "learning_rate": 2e-06, "loss": 0.405, "step": 1061 }, { "epoch": 0.24637513049530216, "grad_norm": 20.259832978927108, "learning_rate": 2e-06, "loss": 0.3395, "step": 1062 }, { "epoch": 0.24660712214360284, "grad_norm": 15.223930734276674, "learning_rate": 2e-06, "loss": 0.3289, "step": 1063 }, { "epoch": 0.24683911379190349, "grad_norm": 15.643931293073702, "learning_rate": 2e-06, "loss": 0.2943, "step": 1064 }, { "epoch": 0.24707110544020416, "grad_norm": 10.985354961154403, "learning_rate": 2e-06, "loss": 0.2698, "step": 1065 }, { "epoch": 0.24730309708850481, "grad_norm": 14.823172015971181, "learning_rate": 2e-06, "loss": 0.2952, "step": 1066 }, { "epoch": 0.24753508873680546, "grad_norm": 23.05124526115744, "learning_rate": 2e-06, "loss": 0.3497, "step": 1067 }, { "epoch": 0.24776708038510614, "grad_norm": 20.657372065022596, "learning_rate": 2e-06, "loss": 0.3718, "step": 1068 }, { "epoch": 0.2479990720334068, "grad_norm": 8.63115021917154, "learning_rate": 2e-06, "loss": 0.2653, "step": 1069 }, { "epoch": 0.24823106368170747, "grad_norm": 9.151095904091301, "learning_rate": 2e-06, "loss": 0.2463, "step": 1070 }, { "epoch": 0.24846305533000812, "grad_norm": 17.992692596229116, "learning_rate": 2e-06, "loss": 0.2568, "step": 1071 }, { "epoch": 0.24869504697830877, "grad_norm": 14.4339784894775, "learning_rate": 2e-06, "loss": 0.298, "step": 1072 }, { "epoch": 0.24892703862660945, "grad_norm": 12.390189004009237, "learning_rate": 2e-06, "loss": 0.2571, "step": 1073 }, { "epoch": 0.2491590302749101, "grad_norm": 12.565613467230603, "learning_rate": 2e-06, "loss": 0.2154, "step": 1074 }, { "epoch": 0.24939102192321078, "grad_norm": 23.2959019541224, "learning_rate": 2e-06, "loss": 0.3911, "step": 1075 }, { "epoch": 0.24962301357151143, "grad_norm": 14.039259928255964, "learning_rate": 2e-06, "loss": 0.2285, "step": 1076 }, { "epoch": 0.24985500521981208, "grad_norm": 19.598970977130815, "learning_rate": 2e-06, "loss": 0.4487, "step": 1077 }, { "epoch": 0.25008699686811275, "grad_norm": 12.73183841773306, "learning_rate": 2e-06, "loss": 0.3772, "step": 1078 }, { "epoch": 0.25031898851641343, "grad_norm": 17.74810849129701, "learning_rate": 2e-06, "loss": 0.3162, "step": 1079 }, { "epoch": 0.25055098016471405, "grad_norm": 23.191357988387217, "learning_rate": 2e-06, "loss": 0.4153, "step": 1080 }, { "epoch": 0.25078297181301473, "grad_norm": 20.19849388943989, "learning_rate": 2e-06, "loss": 0.4059, "step": 1081 }, { "epoch": 0.2510149634613154, "grad_norm": 11.232999446060697, "learning_rate": 2e-06, "loss": 0.3919, "step": 1082 }, { "epoch": 0.25124695510961603, "grad_norm": 16.45997289285503, "learning_rate": 2e-06, "loss": 0.3498, "step": 1083 }, { "epoch": 0.2514789467579167, "grad_norm": 15.723585351231751, "learning_rate": 2e-06, "loss": 0.2632, "step": 1084 }, { "epoch": 0.2517109384062174, "grad_norm": 7.330028584689176, "learning_rate": 2e-06, "loss": 0.2012, "step": 1085 }, { "epoch": 0.251942930054518, "grad_norm": 13.88113627267473, "learning_rate": 2e-06, "loss": 0.2686, "step": 1086 }, { "epoch": 0.2521749217028187, "grad_norm": 19.509094614015467, "learning_rate": 2e-06, "loss": 0.3515, "step": 1087 }, { "epoch": 0.25240691335111937, "grad_norm": 20.16682881221863, "learning_rate": 2e-06, "loss": 0.4119, "step": 1088 }, { "epoch": 0.25263890499942004, "grad_norm": 11.957155629814546, "learning_rate": 2e-06, "loss": 0.3324, "step": 1089 }, { "epoch": 0.25287089664772067, "grad_norm": 21.806967508834347, "learning_rate": 2e-06, "loss": 0.4624, "step": 1090 }, { "epoch": 0.25310288829602134, "grad_norm": 13.749685076401235, "learning_rate": 2e-06, "loss": 0.3831, "step": 1091 }, { "epoch": 0.253334879944322, "grad_norm": 16.656854478247116, "learning_rate": 2e-06, "loss": 0.2794, "step": 1092 }, { "epoch": 0.25356687159262264, "grad_norm": 17.627596146798037, "learning_rate": 2e-06, "loss": 0.3494, "step": 1093 }, { "epoch": 0.2537988632409233, "grad_norm": 19.745119628195066, "learning_rate": 2e-06, "loss": 0.4636, "step": 1094 }, { "epoch": 0.254030854889224, "grad_norm": 17.18830926296359, "learning_rate": 2e-06, "loss": 0.3584, "step": 1095 }, { "epoch": 0.2542628465375246, "grad_norm": 9.4163939741239, "learning_rate": 2e-06, "loss": 0.3597, "step": 1096 }, { "epoch": 0.2544948381858253, "grad_norm": 16.018761774541133, "learning_rate": 2e-06, "loss": 0.3183, "step": 1097 }, { "epoch": 0.254726829834126, "grad_norm": 17.99094305205517, "learning_rate": 2e-06, "loss": 0.3835, "step": 1098 }, { "epoch": 0.25495882148242666, "grad_norm": 19.22211629507197, "learning_rate": 2e-06, "loss": 0.2728, "step": 1099 }, { "epoch": 0.2551908131307273, "grad_norm": 11.092634757643538, "learning_rate": 2e-06, "loss": 0.2726, "step": 1100 }, { "epoch": 0.25542280477902796, "grad_norm": 27.83008647407575, "learning_rate": 2e-06, "loss": 0.4326, "step": 1101 }, { "epoch": 0.25565479642732863, "grad_norm": 15.001540429809646, "learning_rate": 2e-06, "loss": 0.3416, "step": 1102 }, { "epoch": 0.25588678807562926, "grad_norm": 15.144143645466546, "learning_rate": 2e-06, "loss": 0.3476, "step": 1103 }, { "epoch": 0.25611877972392993, "grad_norm": 15.645113127825065, "learning_rate": 2e-06, "loss": 0.3551, "step": 1104 }, { "epoch": 0.2563507713722306, "grad_norm": 10.928220909805345, "learning_rate": 2e-06, "loss": 0.2243, "step": 1105 }, { "epoch": 0.25658276302053123, "grad_norm": 11.445144928478546, "learning_rate": 2e-06, "loss": 0.2368, "step": 1106 }, { "epoch": 0.2568147546688319, "grad_norm": 7.806367904222486, "learning_rate": 2e-06, "loss": 0.211, "step": 1107 }, { "epoch": 0.2570467463171326, "grad_norm": 22.770218588703784, "learning_rate": 2e-06, "loss": 0.4157, "step": 1108 }, { "epoch": 0.25727873796543327, "grad_norm": 24.118256132266875, "learning_rate": 2e-06, "loss": 0.3311, "step": 1109 }, { "epoch": 0.2575107296137339, "grad_norm": 14.418260243799653, "learning_rate": 2e-06, "loss": 0.2665, "step": 1110 }, { "epoch": 0.25774272126203457, "grad_norm": 17.95557610022089, "learning_rate": 2e-06, "loss": 0.4597, "step": 1111 }, { "epoch": 0.25797471291033525, "grad_norm": 9.998203620016008, "learning_rate": 2e-06, "loss": 0.264, "step": 1112 }, { "epoch": 0.25820670455863587, "grad_norm": 15.98410256214507, "learning_rate": 2e-06, "loss": 0.3399, "step": 1113 }, { "epoch": 0.25843869620693655, "grad_norm": 14.65274151237759, "learning_rate": 2e-06, "loss": 0.3357, "step": 1114 }, { "epoch": 0.2586706878552372, "grad_norm": 15.099128606884662, "learning_rate": 2e-06, "loss": 0.254, "step": 1115 }, { "epoch": 0.25890267950353785, "grad_norm": 24.82947299465006, "learning_rate": 2e-06, "loss": 0.3305, "step": 1116 }, { "epoch": 0.2591346711518385, "grad_norm": 13.261084761295198, "learning_rate": 2e-06, "loss": 0.3377, "step": 1117 }, { "epoch": 0.2593666628001392, "grad_norm": 22.691482458679683, "learning_rate": 2e-06, "loss": 0.3698, "step": 1118 }, { "epoch": 0.2595986544484399, "grad_norm": 14.41997073811382, "learning_rate": 2e-06, "loss": 0.4644, "step": 1119 }, { "epoch": 0.2598306460967405, "grad_norm": 15.695126032511784, "learning_rate": 2e-06, "loss": 0.3084, "step": 1120 }, { "epoch": 0.2600626377450412, "grad_norm": 20.452165103407637, "learning_rate": 2e-06, "loss": 0.3405, "step": 1121 }, { "epoch": 0.26029462939334186, "grad_norm": 19.152384814044723, "learning_rate": 2e-06, "loss": 0.3941, "step": 1122 }, { "epoch": 0.2605266210416425, "grad_norm": 10.929796244886692, "learning_rate": 2e-06, "loss": 0.3085, "step": 1123 }, { "epoch": 0.26075861268994316, "grad_norm": 17.796433072113317, "learning_rate": 2e-06, "loss": 0.4109, "step": 1124 }, { "epoch": 0.26099060433824384, "grad_norm": 10.425466920495225, "learning_rate": 2e-06, "loss": 0.3517, "step": 1125 }, { "epoch": 0.26122259598654446, "grad_norm": 6.015552175860235, "learning_rate": 2e-06, "loss": 0.2988, "step": 1126 }, { "epoch": 0.26145458763484514, "grad_norm": 6.126586375825967, "learning_rate": 2e-06, "loss": 0.2415, "step": 1127 }, { "epoch": 0.2616865792831458, "grad_norm": 11.863623548309842, "learning_rate": 2e-06, "loss": 0.2739, "step": 1128 }, { "epoch": 0.2619185709314465, "grad_norm": 14.909953393587152, "learning_rate": 2e-06, "loss": 0.3223, "step": 1129 }, { "epoch": 0.2621505625797471, "grad_norm": 14.646054099158548, "learning_rate": 2e-06, "loss": 0.3307, "step": 1130 }, { "epoch": 0.2623825542280478, "grad_norm": 13.629312091258925, "learning_rate": 2e-06, "loss": 0.3004, "step": 1131 }, { "epoch": 0.26261454587634847, "grad_norm": 24.391131892080814, "learning_rate": 2e-06, "loss": 0.3546, "step": 1132 }, { "epoch": 0.2628465375246491, "grad_norm": 9.685129053473396, "learning_rate": 2e-06, "loss": 0.3143, "step": 1133 }, { "epoch": 0.26307852917294977, "grad_norm": 19.920212953026695, "learning_rate": 2e-06, "loss": 0.3734, "step": 1134 }, { "epoch": 0.26331052082125045, "grad_norm": 18.873355014808933, "learning_rate": 2e-06, "loss": 0.2707, "step": 1135 }, { "epoch": 0.26354251246955107, "grad_norm": 15.679849289037026, "learning_rate": 2e-06, "loss": 0.338, "step": 1136 }, { "epoch": 0.26377450411785175, "grad_norm": 13.53924087047476, "learning_rate": 2e-06, "loss": 0.3066, "step": 1137 }, { "epoch": 0.2640064957661524, "grad_norm": 11.745611010016228, "learning_rate": 2e-06, "loss": 0.3399, "step": 1138 }, { "epoch": 0.2642384874144531, "grad_norm": 15.225621934010864, "learning_rate": 2e-06, "loss": 0.244, "step": 1139 }, { "epoch": 0.2644704790627537, "grad_norm": 10.40783059783653, "learning_rate": 2e-06, "loss": 0.2386, "step": 1140 }, { "epoch": 0.2647024707110544, "grad_norm": 15.851959230703185, "learning_rate": 2e-06, "loss": 0.3335, "step": 1141 }, { "epoch": 0.2649344623593551, "grad_norm": 22.66210347750043, "learning_rate": 2e-06, "loss": 0.3033, "step": 1142 }, { "epoch": 0.2651664540076557, "grad_norm": 26.031860194956312, "learning_rate": 2e-06, "loss": 0.3799, "step": 1143 }, { "epoch": 0.2653984456559564, "grad_norm": 18.188321934019086, "learning_rate": 2e-06, "loss": 0.3478, "step": 1144 }, { "epoch": 0.26563043730425706, "grad_norm": 9.418783234825963, "learning_rate": 2e-06, "loss": 0.2319, "step": 1145 }, { "epoch": 0.2658624289525577, "grad_norm": 12.05426823213287, "learning_rate": 2e-06, "loss": 0.4227, "step": 1146 }, { "epoch": 0.26609442060085836, "grad_norm": 26.72325329812725, "learning_rate": 2e-06, "loss": 0.3139, "step": 1147 }, { "epoch": 0.26632641224915904, "grad_norm": 16.733829455551838, "learning_rate": 2e-06, "loss": 0.3918, "step": 1148 }, { "epoch": 0.2665584038974597, "grad_norm": 14.249457828238011, "learning_rate": 2e-06, "loss": 0.242, "step": 1149 }, { "epoch": 0.26679039554576034, "grad_norm": 19.86506070787913, "learning_rate": 2e-06, "loss": 0.3798, "step": 1150 }, { "epoch": 0.267022387194061, "grad_norm": 13.956094717202392, "learning_rate": 2e-06, "loss": 0.2447, "step": 1151 }, { "epoch": 0.2672543788423617, "grad_norm": 10.456531626943017, "learning_rate": 2e-06, "loss": 0.3394, "step": 1152 }, { "epoch": 0.2674863704906623, "grad_norm": 11.825638890958947, "learning_rate": 2e-06, "loss": 0.2495, "step": 1153 }, { "epoch": 0.267718362138963, "grad_norm": 16.889888110765746, "learning_rate": 2e-06, "loss": 0.3971, "step": 1154 }, { "epoch": 0.2679503537872637, "grad_norm": 14.904612459119354, "learning_rate": 2e-06, "loss": 0.2268, "step": 1155 }, { "epoch": 0.2681823454355643, "grad_norm": 15.751958535325018, "learning_rate": 2e-06, "loss": 0.2576, "step": 1156 }, { "epoch": 0.268414337083865, "grad_norm": 11.142765466493485, "learning_rate": 2e-06, "loss": 0.3464, "step": 1157 }, { "epoch": 0.26864632873216565, "grad_norm": 11.463689658847697, "learning_rate": 2e-06, "loss": 0.2759, "step": 1158 }, { "epoch": 0.26887832038046633, "grad_norm": 15.396711358653583, "learning_rate": 2e-06, "loss": 0.2698, "step": 1159 }, { "epoch": 0.26911031202876695, "grad_norm": 20.355215766836594, "learning_rate": 2e-06, "loss": 0.2896, "step": 1160 }, { "epoch": 0.26934230367706763, "grad_norm": 22.16248015841271, "learning_rate": 2e-06, "loss": 0.3936, "step": 1161 }, { "epoch": 0.2695742953253683, "grad_norm": 25.343187131455633, "learning_rate": 2e-06, "loss": 0.3586, "step": 1162 }, { "epoch": 0.26980628697366893, "grad_norm": 7.171393164606418, "learning_rate": 2e-06, "loss": 0.1983, "step": 1163 }, { "epoch": 0.2700382786219696, "grad_norm": 19.05142625020754, "learning_rate": 2e-06, "loss": 0.3657, "step": 1164 }, { "epoch": 0.2702702702702703, "grad_norm": 8.95382335460153, "learning_rate": 2e-06, "loss": 0.3213, "step": 1165 }, { "epoch": 0.2705022619185709, "grad_norm": 12.81735944131886, "learning_rate": 2e-06, "loss": 0.3069, "step": 1166 }, { "epoch": 0.2707342535668716, "grad_norm": 17.805420961529475, "learning_rate": 2e-06, "loss": 0.2557, "step": 1167 }, { "epoch": 0.27096624521517226, "grad_norm": 16.874564931891637, "learning_rate": 2e-06, "loss": 0.3154, "step": 1168 }, { "epoch": 0.27119823686347294, "grad_norm": 11.799433595683006, "learning_rate": 2e-06, "loss": 0.2859, "step": 1169 }, { "epoch": 0.27143022851177356, "grad_norm": 9.39700626037502, "learning_rate": 2e-06, "loss": 0.2184, "step": 1170 }, { "epoch": 0.27166222016007424, "grad_norm": 17.073536459242636, "learning_rate": 2e-06, "loss": 0.2849, "step": 1171 }, { "epoch": 0.2718942118083749, "grad_norm": 21.22630275472056, "learning_rate": 2e-06, "loss": 0.3663, "step": 1172 }, { "epoch": 0.27212620345667554, "grad_norm": 12.59115651190941, "learning_rate": 2e-06, "loss": 0.3009, "step": 1173 }, { "epoch": 0.2723581951049762, "grad_norm": 16.968572807837223, "learning_rate": 2e-06, "loss": 0.4332, "step": 1174 }, { "epoch": 0.2725901867532769, "grad_norm": 9.655637385536188, "learning_rate": 2e-06, "loss": 0.2609, "step": 1175 }, { "epoch": 0.2728221784015775, "grad_norm": 13.890112671278628, "learning_rate": 2e-06, "loss": 0.2625, "step": 1176 }, { "epoch": 0.2730541700498782, "grad_norm": 12.606139803898643, "learning_rate": 2e-06, "loss": 0.3785, "step": 1177 }, { "epoch": 0.2732861616981789, "grad_norm": 13.28056042143577, "learning_rate": 2e-06, "loss": 0.2906, "step": 1178 }, { "epoch": 0.27351815334647955, "grad_norm": 30.27887213841802, "learning_rate": 2e-06, "loss": 0.4022, "step": 1179 }, { "epoch": 0.2737501449947802, "grad_norm": 7.930895909161131, "learning_rate": 2e-06, "loss": 0.2247, "step": 1180 }, { "epoch": 0.27398213664308085, "grad_norm": 13.533083064817841, "learning_rate": 2e-06, "loss": 0.2595, "step": 1181 }, { "epoch": 0.27421412829138153, "grad_norm": 10.212297449937365, "learning_rate": 2e-06, "loss": 0.275, "step": 1182 }, { "epoch": 0.27444611993968215, "grad_norm": 10.988417124060847, "learning_rate": 2e-06, "loss": 0.213, "step": 1183 }, { "epoch": 0.27467811158798283, "grad_norm": 24.17803575510525, "learning_rate": 2e-06, "loss": 0.4077, "step": 1184 }, { "epoch": 0.2749101032362835, "grad_norm": 9.680825814303265, "learning_rate": 2e-06, "loss": 0.2429, "step": 1185 }, { "epoch": 0.27514209488458413, "grad_norm": 6.049044301937683, "learning_rate": 2e-06, "loss": 0.2024, "step": 1186 }, { "epoch": 0.2753740865328848, "grad_norm": 18.00295476464138, "learning_rate": 2e-06, "loss": 0.2559, "step": 1187 }, { "epoch": 0.2756060781811855, "grad_norm": 15.30270566627656, "learning_rate": 2e-06, "loss": 0.3257, "step": 1188 }, { "epoch": 0.27583806982948617, "grad_norm": 21.694395810281325, "learning_rate": 2e-06, "loss": 0.3079, "step": 1189 }, { "epoch": 0.2760700614777868, "grad_norm": 8.331967188153596, "learning_rate": 2e-06, "loss": 0.1824, "step": 1190 }, { "epoch": 0.27630205312608747, "grad_norm": 15.415548630144007, "learning_rate": 2e-06, "loss": 0.3489, "step": 1191 }, { "epoch": 0.27653404477438814, "grad_norm": 14.233338810505089, "learning_rate": 2e-06, "loss": 0.1883, "step": 1192 }, { "epoch": 0.27676603642268877, "grad_norm": 13.344673584990591, "learning_rate": 2e-06, "loss": 0.2581, "step": 1193 }, { "epoch": 0.27699802807098944, "grad_norm": 8.892289670198297, "learning_rate": 2e-06, "loss": 0.3472, "step": 1194 }, { "epoch": 0.2772300197192901, "grad_norm": 7.283026978981145, "learning_rate": 2e-06, "loss": 0.2081, "step": 1195 }, { "epoch": 0.27746201136759074, "grad_norm": 20.765644495960995, "learning_rate": 2e-06, "loss": 0.3821, "step": 1196 }, { "epoch": 0.2776940030158914, "grad_norm": 11.812462517867834, "learning_rate": 2e-06, "loss": 0.2431, "step": 1197 }, { "epoch": 0.2779259946641921, "grad_norm": 21.853687986115002, "learning_rate": 2e-06, "loss": 0.4368, "step": 1198 }, { "epoch": 0.2781579863124928, "grad_norm": 29.536970035347192, "learning_rate": 2e-06, "loss": 0.2561, "step": 1199 }, { "epoch": 0.2783899779607934, "grad_norm": 15.492748748291122, "learning_rate": 2e-06, "loss": 0.2616, "step": 1200 }, { "epoch": 0.2786219696090941, "grad_norm": 11.006289448991424, "learning_rate": 2e-06, "loss": 0.2992, "step": 1201 }, { "epoch": 0.27885396125739476, "grad_norm": 16.026435056899846, "learning_rate": 2e-06, "loss": 0.2834, "step": 1202 }, { "epoch": 0.2790859529056954, "grad_norm": 14.183111450349523, "learning_rate": 2e-06, "loss": 0.3305, "step": 1203 }, { "epoch": 0.27931794455399606, "grad_norm": 12.028101213734947, "learning_rate": 2e-06, "loss": 0.4311, "step": 1204 }, { "epoch": 0.27954993620229673, "grad_norm": 15.728963078237921, "learning_rate": 2e-06, "loss": 0.2617, "step": 1205 }, { "epoch": 0.27978192785059736, "grad_norm": 18.74674073643408, "learning_rate": 2e-06, "loss": 0.2332, "step": 1206 }, { "epoch": 0.28001391949889803, "grad_norm": 7.8767526680463735, "learning_rate": 2e-06, "loss": 0.2635, "step": 1207 }, { "epoch": 0.2802459111471987, "grad_norm": 19.589274869050506, "learning_rate": 2e-06, "loss": 0.3261, "step": 1208 }, { "epoch": 0.2804779027954994, "grad_norm": 11.569333600691113, "learning_rate": 2e-06, "loss": 0.2491, "step": 1209 }, { "epoch": 0.2807098944438, "grad_norm": 20.27838346921668, "learning_rate": 2e-06, "loss": 0.365, "step": 1210 }, { "epoch": 0.2809418860921007, "grad_norm": 16.174668249143103, "learning_rate": 2e-06, "loss": 0.2677, "step": 1211 }, { "epoch": 0.28117387774040137, "grad_norm": 9.592205036160628, "learning_rate": 2e-06, "loss": 0.4643, "step": 1212 }, { "epoch": 0.281405869388702, "grad_norm": 11.243429081806028, "learning_rate": 2e-06, "loss": 0.2655, "step": 1213 }, { "epoch": 0.28163786103700267, "grad_norm": 22.27363746188085, "learning_rate": 2e-06, "loss": 0.4154, "step": 1214 }, { "epoch": 0.28186985268530335, "grad_norm": 16.73996394226011, "learning_rate": 2e-06, "loss": 0.3184, "step": 1215 }, { "epoch": 0.28210184433360397, "grad_norm": 15.017671158539848, "learning_rate": 2e-06, "loss": 0.184, "step": 1216 }, { "epoch": 0.28233383598190465, "grad_norm": 10.718819280489564, "learning_rate": 2e-06, "loss": 0.226, "step": 1217 }, { "epoch": 0.2825658276302053, "grad_norm": 16.07308944496508, "learning_rate": 2e-06, "loss": 0.4901, "step": 1218 }, { "epoch": 0.28279781927850595, "grad_norm": 21.358455166015435, "learning_rate": 2e-06, "loss": 0.3208, "step": 1219 }, { "epoch": 0.2830298109268066, "grad_norm": 11.131587047899961, "learning_rate": 2e-06, "loss": 0.4496, "step": 1220 }, { "epoch": 0.2832618025751073, "grad_norm": 13.583352811882996, "learning_rate": 2e-06, "loss": 0.2877, "step": 1221 }, { "epoch": 0.283493794223408, "grad_norm": 18.275221017841407, "learning_rate": 2e-06, "loss": 0.346, "step": 1222 }, { "epoch": 0.2837257858717086, "grad_norm": 13.559004500486491, "learning_rate": 2e-06, "loss": 0.305, "step": 1223 }, { "epoch": 0.2839577775200093, "grad_norm": 12.193009244722145, "learning_rate": 2e-06, "loss": 0.3228, "step": 1224 }, { "epoch": 0.28418976916830996, "grad_norm": 16.417106957183268, "learning_rate": 2e-06, "loss": 0.3148, "step": 1225 }, { "epoch": 0.2844217608166106, "grad_norm": 6.873063427629634, "learning_rate": 2e-06, "loss": 0.2427, "step": 1226 }, { "epoch": 0.28465375246491126, "grad_norm": 14.000907125813223, "learning_rate": 2e-06, "loss": 0.266, "step": 1227 }, { "epoch": 0.28488574411321194, "grad_norm": 6.750238251953412, "learning_rate": 2e-06, "loss": 0.2787, "step": 1228 }, { "epoch": 0.28511773576151256, "grad_norm": 9.187628279617533, "learning_rate": 2e-06, "loss": 0.3106, "step": 1229 }, { "epoch": 0.28534972740981324, "grad_norm": 26.616245578904888, "learning_rate": 2e-06, "loss": 0.2525, "step": 1230 }, { "epoch": 0.2855817190581139, "grad_norm": 14.429684634728005, "learning_rate": 2e-06, "loss": 0.3717, "step": 1231 }, { "epoch": 0.2858137107064146, "grad_norm": 15.696007478073717, "learning_rate": 2e-06, "loss": 0.3244, "step": 1232 }, { "epoch": 0.2860457023547152, "grad_norm": 21.349186599737536, "learning_rate": 2e-06, "loss": 0.3987, "step": 1233 }, { "epoch": 0.2862776940030159, "grad_norm": 8.12123750761895, "learning_rate": 2e-06, "loss": 0.2641, "step": 1234 }, { "epoch": 0.28650968565131657, "grad_norm": 15.21984604745377, "learning_rate": 2e-06, "loss": 0.2997, "step": 1235 }, { "epoch": 0.2867416772996172, "grad_norm": 12.726342178155084, "learning_rate": 2e-06, "loss": 0.3333, "step": 1236 }, { "epoch": 0.28697366894791787, "grad_norm": 16.90525595728918, "learning_rate": 2e-06, "loss": 0.3284, "step": 1237 }, { "epoch": 0.28720566059621855, "grad_norm": 17.579056592317873, "learning_rate": 2e-06, "loss": 0.3973, "step": 1238 }, { "epoch": 0.28743765224451917, "grad_norm": 15.35796271778437, "learning_rate": 2e-06, "loss": 0.3152, "step": 1239 }, { "epoch": 0.28766964389281985, "grad_norm": 21.02104399086055, "learning_rate": 2e-06, "loss": 0.3467, "step": 1240 }, { "epoch": 0.2879016355411205, "grad_norm": 5.394065636271639, "learning_rate": 2e-06, "loss": 0.217, "step": 1241 }, { "epoch": 0.2881336271894212, "grad_norm": 18.591886314525397, "learning_rate": 2e-06, "loss": 0.3213, "step": 1242 }, { "epoch": 0.2883656188377218, "grad_norm": 22.04197488485568, "learning_rate": 2e-06, "loss": 0.303, "step": 1243 }, { "epoch": 0.2885976104860225, "grad_norm": 12.995736617500656, "learning_rate": 2e-06, "loss": 0.2668, "step": 1244 }, { "epoch": 0.2888296021343232, "grad_norm": 23.646397067968046, "learning_rate": 2e-06, "loss": 0.3176, "step": 1245 }, { "epoch": 0.2890615937826238, "grad_norm": 11.409118689634937, "learning_rate": 2e-06, "loss": 0.2784, "step": 1246 }, { "epoch": 0.2892935854309245, "grad_norm": 11.742027071172174, "learning_rate": 2e-06, "loss": 0.4533, "step": 1247 }, { "epoch": 0.28952557707922516, "grad_norm": 8.003574151157743, "learning_rate": 2e-06, "loss": 0.2324, "step": 1248 }, { "epoch": 0.2897575687275258, "grad_norm": 18.4898821926245, "learning_rate": 2e-06, "loss": 0.3342, "step": 1249 }, { "epoch": 0.28998956037582646, "grad_norm": 19.712215280172444, "learning_rate": 2e-06, "loss": 0.3294, "step": 1250 }, { "epoch": 0.29022155202412714, "grad_norm": 12.006452342747606, "learning_rate": 2e-06, "loss": 0.2672, "step": 1251 }, { "epoch": 0.2904535436724278, "grad_norm": 9.992023893613998, "learning_rate": 2e-06, "loss": 0.2019, "step": 1252 }, { "epoch": 0.29068553532072844, "grad_norm": 13.6861127169211, "learning_rate": 2e-06, "loss": 0.255, "step": 1253 }, { "epoch": 0.2909175269690291, "grad_norm": 15.02484608301213, "learning_rate": 2e-06, "loss": 0.3086, "step": 1254 }, { "epoch": 0.2911495186173298, "grad_norm": 9.942533776328188, "learning_rate": 2e-06, "loss": 0.2822, "step": 1255 }, { "epoch": 0.2913815102656304, "grad_norm": 15.070113001242195, "learning_rate": 2e-06, "loss": 0.2637, "step": 1256 }, { "epoch": 0.2916135019139311, "grad_norm": 20.127966390681635, "learning_rate": 2e-06, "loss": 0.3592, "step": 1257 }, { "epoch": 0.2918454935622318, "grad_norm": 16.783537665196086, "learning_rate": 2e-06, "loss": 0.4248, "step": 1258 }, { "epoch": 0.2920774852105324, "grad_norm": 14.996063668291244, "learning_rate": 2e-06, "loss": 0.3356, "step": 1259 }, { "epoch": 0.2923094768588331, "grad_norm": 13.280194937801802, "learning_rate": 2e-06, "loss": 0.2595, "step": 1260 }, { "epoch": 0.29254146850713375, "grad_norm": 12.639332668504503, "learning_rate": 2e-06, "loss": 0.3405, "step": 1261 }, { "epoch": 0.29277346015543443, "grad_norm": 11.504927643928248, "learning_rate": 2e-06, "loss": 0.2966, "step": 1262 }, { "epoch": 0.29300545180373505, "grad_norm": 20.752244836231803, "learning_rate": 2e-06, "loss": 0.4051, "step": 1263 }, { "epoch": 0.29323744345203573, "grad_norm": 11.657319471913775, "learning_rate": 2e-06, "loss": 0.2778, "step": 1264 }, { "epoch": 0.2934694351003364, "grad_norm": 16.861970554754627, "learning_rate": 2e-06, "loss": 0.3725, "step": 1265 }, { "epoch": 0.29370142674863703, "grad_norm": 14.885281184009598, "learning_rate": 2e-06, "loss": 0.2854, "step": 1266 }, { "epoch": 0.2939334183969377, "grad_norm": 9.448485631920036, "learning_rate": 2e-06, "loss": 0.3432, "step": 1267 }, { "epoch": 0.2941654100452384, "grad_norm": 14.217616728146512, "learning_rate": 2e-06, "loss": 0.3417, "step": 1268 }, { "epoch": 0.294397401693539, "grad_norm": 9.39865709086706, "learning_rate": 2e-06, "loss": 0.2979, "step": 1269 }, { "epoch": 0.2946293933418397, "grad_norm": 19.94441583861392, "learning_rate": 2e-06, "loss": 0.3479, "step": 1270 }, { "epoch": 0.29486138499014036, "grad_norm": 11.9760265364321, "learning_rate": 2e-06, "loss": 0.3599, "step": 1271 }, { "epoch": 0.29509337663844104, "grad_norm": 11.245365111905581, "learning_rate": 2e-06, "loss": 0.2648, "step": 1272 }, { "epoch": 0.29532536828674166, "grad_norm": 14.520006307802863, "learning_rate": 2e-06, "loss": 0.2594, "step": 1273 }, { "epoch": 0.29555735993504234, "grad_norm": 18.75988516752072, "learning_rate": 2e-06, "loss": 0.2644, "step": 1274 }, { "epoch": 0.295789351583343, "grad_norm": 15.494061198744825, "learning_rate": 2e-06, "loss": 0.3706, "step": 1275 }, { "epoch": 0.29602134323164364, "grad_norm": 18.800626470937203, "learning_rate": 2e-06, "loss": 0.3949, "step": 1276 }, { "epoch": 0.2962533348799443, "grad_norm": 21.137356291700847, "learning_rate": 2e-06, "loss": 0.4411, "step": 1277 }, { "epoch": 0.296485326528245, "grad_norm": 14.69093229850745, "learning_rate": 2e-06, "loss": 0.3186, "step": 1278 }, { "epoch": 0.2967173181765456, "grad_norm": 19.660974625480087, "learning_rate": 2e-06, "loss": 0.3263, "step": 1279 }, { "epoch": 0.2969493098248463, "grad_norm": 11.241549838681076, "learning_rate": 2e-06, "loss": 0.2473, "step": 1280 }, { "epoch": 0.297181301473147, "grad_norm": 13.986806951706525, "learning_rate": 2e-06, "loss": 0.2402, "step": 1281 }, { "epoch": 0.29741329312144765, "grad_norm": 14.285571512151392, "learning_rate": 2e-06, "loss": 0.374, "step": 1282 }, { "epoch": 0.2976452847697483, "grad_norm": 10.958162154047578, "learning_rate": 2e-06, "loss": 0.2306, "step": 1283 }, { "epoch": 0.29787727641804895, "grad_norm": 18.532356058362055, "learning_rate": 2e-06, "loss": 0.3142, "step": 1284 }, { "epoch": 0.29810926806634963, "grad_norm": 12.891377637696152, "learning_rate": 2e-06, "loss": 0.2602, "step": 1285 }, { "epoch": 0.29834125971465025, "grad_norm": 16.11181170520358, "learning_rate": 2e-06, "loss": 0.3242, "step": 1286 }, { "epoch": 0.29857325136295093, "grad_norm": 9.615362986016594, "learning_rate": 2e-06, "loss": 0.2893, "step": 1287 }, { "epoch": 0.2988052430112516, "grad_norm": 17.945990919432717, "learning_rate": 2e-06, "loss": 0.3861, "step": 1288 }, { "epoch": 0.29903723465955223, "grad_norm": 17.99362450227029, "learning_rate": 2e-06, "loss": 0.3724, "step": 1289 }, { "epoch": 0.2992692263078529, "grad_norm": 17.107598158507745, "learning_rate": 2e-06, "loss": 0.3102, "step": 1290 }, { "epoch": 0.2995012179561536, "grad_norm": 10.520699209722633, "learning_rate": 2e-06, "loss": 0.2517, "step": 1291 }, { "epoch": 0.29973320960445426, "grad_norm": 22.305669883971103, "learning_rate": 2e-06, "loss": 0.3745, "step": 1292 }, { "epoch": 0.2999652012527549, "grad_norm": 23.331585901418507, "learning_rate": 2e-06, "loss": 0.297, "step": 1293 }, { "epoch": 0.30019719290105557, "grad_norm": 9.13037425685583, "learning_rate": 2e-06, "loss": 0.2517, "step": 1294 }, { "epoch": 0.30042918454935624, "grad_norm": 25.03598967873586, "learning_rate": 2e-06, "loss": 0.2891, "step": 1295 }, { "epoch": 0.30066117619765687, "grad_norm": 6.227195868422087, "learning_rate": 2e-06, "loss": 0.2703, "step": 1296 }, { "epoch": 0.30089316784595754, "grad_norm": 10.524631044435568, "learning_rate": 2e-06, "loss": 0.2402, "step": 1297 }, { "epoch": 0.3011251594942582, "grad_norm": 14.452260998510923, "learning_rate": 2e-06, "loss": 0.2147, "step": 1298 }, { "epoch": 0.30135715114255884, "grad_norm": 16.572736184416854, "learning_rate": 2e-06, "loss": 0.3713, "step": 1299 }, { "epoch": 0.3015891427908595, "grad_norm": 12.092715650876997, "learning_rate": 2e-06, "loss": 0.4643, "step": 1300 }, { "epoch": 0.3018211344391602, "grad_norm": 11.763924015078992, "learning_rate": 2e-06, "loss": 0.249, "step": 1301 }, { "epoch": 0.3020531260874609, "grad_norm": 11.297415978022148, "learning_rate": 2e-06, "loss": 0.3153, "step": 1302 }, { "epoch": 0.3022851177357615, "grad_norm": 22.418021514922543, "learning_rate": 2e-06, "loss": 0.3138, "step": 1303 }, { "epoch": 0.3025171093840622, "grad_norm": 20.535356510555992, "learning_rate": 2e-06, "loss": 0.2667, "step": 1304 }, { "epoch": 0.30274910103236286, "grad_norm": 15.480983737195427, "learning_rate": 2e-06, "loss": 0.3294, "step": 1305 }, { "epoch": 0.3029810926806635, "grad_norm": 18.091773010729597, "learning_rate": 2e-06, "loss": 0.3949, "step": 1306 }, { "epoch": 0.30321308432896416, "grad_norm": 11.142072308484599, "learning_rate": 2e-06, "loss": 0.299, "step": 1307 }, { "epoch": 0.30344507597726483, "grad_norm": 10.747760068034689, "learning_rate": 2e-06, "loss": 0.2483, "step": 1308 }, { "epoch": 0.30367706762556546, "grad_norm": 16.711980069760436, "learning_rate": 2e-06, "loss": 0.3531, "step": 1309 }, { "epoch": 0.30390905927386613, "grad_norm": 17.991188041443273, "learning_rate": 2e-06, "loss": 0.2774, "step": 1310 }, { "epoch": 0.3041410509221668, "grad_norm": 11.576762335855799, "learning_rate": 2e-06, "loss": 0.395, "step": 1311 }, { "epoch": 0.3043730425704675, "grad_norm": 10.466971442475415, "learning_rate": 2e-06, "loss": 0.3074, "step": 1312 }, { "epoch": 0.3046050342187681, "grad_norm": 18.42622648855536, "learning_rate": 2e-06, "loss": 0.2287, "step": 1313 }, { "epoch": 0.3048370258670688, "grad_norm": 15.540120049867527, "learning_rate": 2e-06, "loss": 0.3262, "step": 1314 }, { "epoch": 0.30506901751536947, "grad_norm": 17.587207558740374, "learning_rate": 2e-06, "loss": 0.3041, "step": 1315 }, { "epoch": 0.3053010091636701, "grad_norm": 17.44985967664141, "learning_rate": 2e-06, "loss": 0.2743, "step": 1316 }, { "epoch": 0.30553300081197077, "grad_norm": 8.54965869254267, "learning_rate": 2e-06, "loss": 0.2416, "step": 1317 }, { "epoch": 0.30576499246027145, "grad_norm": 8.979997168292295, "learning_rate": 2e-06, "loss": 0.348, "step": 1318 }, { "epoch": 0.30599698410857207, "grad_norm": 28.59983116861295, "learning_rate": 2e-06, "loss": 0.4125, "step": 1319 }, { "epoch": 0.30622897575687275, "grad_norm": 8.07380333347449, "learning_rate": 2e-06, "loss": 0.2392, "step": 1320 }, { "epoch": 0.3064609674051734, "grad_norm": 10.676150094033074, "learning_rate": 2e-06, "loss": 0.2199, "step": 1321 }, { "epoch": 0.3066929590534741, "grad_norm": 17.299675012306995, "learning_rate": 2e-06, "loss": 0.2338, "step": 1322 }, { "epoch": 0.3069249507017747, "grad_norm": 11.668388317530653, "learning_rate": 2e-06, "loss": 0.3085, "step": 1323 }, { "epoch": 0.3071569423500754, "grad_norm": 13.351683945674617, "learning_rate": 2e-06, "loss": 0.3607, "step": 1324 }, { "epoch": 0.3073889339983761, "grad_norm": 13.197158579360199, "learning_rate": 2e-06, "loss": 0.3151, "step": 1325 }, { "epoch": 0.3076209256466767, "grad_norm": 7.283939134417037, "learning_rate": 2e-06, "loss": 0.1872, "step": 1326 }, { "epoch": 0.3078529172949774, "grad_norm": 10.299996868078303, "learning_rate": 2e-06, "loss": 0.3248, "step": 1327 }, { "epoch": 0.30808490894327806, "grad_norm": 24.67342231320794, "learning_rate": 2e-06, "loss": 0.3031, "step": 1328 }, { "epoch": 0.3083169005915787, "grad_norm": 9.223137640402232, "learning_rate": 2e-06, "loss": 0.3476, "step": 1329 }, { "epoch": 0.30854889223987936, "grad_norm": 13.917374776095183, "learning_rate": 2e-06, "loss": 0.2151, "step": 1330 }, { "epoch": 0.30878088388818004, "grad_norm": 22.274647106743906, "learning_rate": 2e-06, "loss": 0.3928, "step": 1331 }, { "epoch": 0.3090128755364807, "grad_norm": 14.705277230122983, "learning_rate": 2e-06, "loss": 0.3337, "step": 1332 }, { "epoch": 0.30924486718478134, "grad_norm": 16.660797472432396, "learning_rate": 2e-06, "loss": 0.3067, "step": 1333 }, { "epoch": 0.309476858833082, "grad_norm": 10.20320184386022, "learning_rate": 2e-06, "loss": 0.2807, "step": 1334 }, { "epoch": 0.3097088504813827, "grad_norm": 14.164750183745458, "learning_rate": 2e-06, "loss": 0.3187, "step": 1335 }, { "epoch": 0.3099408421296833, "grad_norm": 17.276330857778227, "learning_rate": 2e-06, "loss": 0.4243, "step": 1336 }, { "epoch": 0.310172833777984, "grad_norm": 15.575398625815712, "learning_rate": 2e-06, "loss": 0.3158, "step": 1337 }, { "epoch": 0.31040482542628467, "grad_norm": 18.940988228755096, "learning_rate": 2e-06, "loss": 0.2767, "step": 1338 }, { "epoch": 0.3106368170745853, "grad_norm": 15.728923178327832, "learning_rate": 2e-06, "loss": 0.2889, "step": 1339 }, { "epoch": 0.31086880872288597, "grad_norm": 12.861312651073032, "learning_rate": 2e-06, "loss": 0.3422, "step": 1340 }, { "epoch": 0.31110080037118665, "grad_norm": 7.844519890976591, "learning_rate": 2e-06, "loss": 0.1895, "step": 1341 }, { "epoch": 0.3113327920194873, "grad_norm": 16.205539259080346, "learning_rate": 2e-06, "loss": 0.319, "step": 1342 }, { "epoch": 0.31156478366778795, "grad_norm": 13.541084983572475, "learning_rate": 2e-06, "loss": 0.3174, "step": 1343 }, { "epoch": 0.3117967753160886, "grad_norm": 15.138498429814257, "learning_rate": 2e-06, "loss": 0.2069, "step": 1344 }, { "epoch": 0.3120287669643893, "grad_norm": 17.90538146115018, "learning_rate": 2e-06, "loss": 0.307, "step": 1345 }, { "epoch": 0.3122607586126899, "grad_norm": 16.52864730654945, "learning_rate": 2e-06, "loss": 0.2979, "step": 1346 }, { "epoch": 0.3124927502609906, "grad_norm": 10.476892047549619, "learning_rate": 2e-06, "loss": 0.29, "step": 1347 }, { "epoch": 0.3127247419092913, "grad_norm": 22.11804081976467, "learning_rate": 2e-06, "loss": 0.2963, "step": 1348 }, { "epoch": 0.3129567335575919, "grad_norm": 14.362786675309335, "learning_rate": 2e-06, "loss": 0.2286, "step": 1349 }, { "epoch": 0.3131887252058926, "grad_norm": 7.571507878073395, "learning_rate": 2e-06, "loss": 0.1847, "step": 1350 }, { "epoch": 0.31342071685419326, "grad_norm": 18.40040614466039, "learning_rate": 2e-06, "loss": 0.3439, "step": 1351 }, { "epoch": 0.31365270850249394, "grad_norm": 29.00454963048086, "learning_rate": 2e-06, "loss": 0.2897, "step": 1352 }, { "epoch": 0.31388470015079456, "grad_norm": 12.1743071611877, "learning_rate": 2e-06, "loss": 0.3104, "step": 1353 }, { "epoch": 0.31411669179909524, "grad_norm": 13.407021819241502, "learning_rate": 2e-06, "loss": 0.2899, "step": 1354 }, { "epoch": 0.3143486834473959, "grad_norm": 13.353584074233229, "learning_rate": 2e-06, "loss": 0.1983, "step": 1355 }, { "epoch": 0.31458067509569654, "grad_norm": 17.248859424682657, "learning_rate": 2e-06, "loss": 0.3753, "step": 1356 }, { "epoch": 0.3148126667439972, "grad_norm": 10.014448704532423, "learning_rate": 2e-06, "loss": 0.1834, "step": 1357 }, { "epoch": 0.3150446583922979, "grad_norm": 21.763953176545574, "learning_rate": 2e-06, "loss": 0.3596, "step": 1358 }, { "epoch": 0.3152766500405985, "grad_norm": 19.940820725887995, "learning_rate": 2e-06, "loss": 0.2957, "step": 1359 }, { "epoch": 0.3155086416888992, "grad_norm": 18.912097481709182, "learning_rate": 2e-06, "loss": 0.3685, "step": 1360 }, { "epoch": 0.31574063333719987, "grad_norm": 11.346471266770104, "learning_rate": 2e-06, "loss": 0.2816, "step": 1361 }, { "epoch": 0.3159726249855005, "grad_norm": 28.193240828763855, "learning_rate": 2e-06, "loss": 0.4203, "step": 1362 }, { "epoch": 0.31620461663380117, "grad_norm": 16.73102518384111, "learning_rate": 2e-06, "loss": 0.3564, "step": 1363 }, { "epoch": 0.31643660828210185, "grad_norm": 9.763506994333493, "learning_rate": 2e-06, "loss": 0.2947, "step": 1364 }, { "epoch": 0.31666859993040253, "grad_norm": 16.116070672609183, "learning_rate": 2e-06, "loss": 0.2653, "step": 1365 }, { "epoch": 0.31690059157870315, "grad_norm": 16.30539281629766, "learning_rate": 2e-06, "loss": 0.2291, "step": 1366 }, { "epoch": 0.31713258322700383, "grad_norm": 19.019320340396867, "learning_rate": 2e-06, "loss": 0.3052, "step": 1367 }, { "epoch": 0.3173645748753045, "grad_norm": 18.228935063012884, "learning_rate": 2e-06, "loss": 0.4026, "step": 1368 }, { "epoch": 0.31759656652360513, "grad_norm": 18.242836131999766, "learning_rate": 2e-06, "loss": 0.2933, "step": 1369 }, { "epoch": 0.3178285581719058, "grad_norm": 11.764367942970903, "learning_rate": 2e-06, "loss": 0.2875, "step": 1370 }, { "epoch": 0.3180605498202065, "grad_norm": 16.85260895830024, "learning_rate": 2e-06, "loss": 0.3813, "step": 1371 }, { "epoch": 0.3182925414685071, "grad_norm": 15.517837361759963, "learning_rate": 2e-06, "loss": 0.3062, "step": 1372 }, { "epoch": 0.3185245331168078, "grad_norm": 16.41448843521921, "learning_rate": 2e-06, "loss": 0.449, "step": 1373 }, { "epoch": 0.31875652476510846, "grad_norm": 21.721345612017643, "learning_rate": 2e-06, "loss": 0.3781, "step": 1374 }, { "epoch": 0.31898851641340914, "grad_norm": 19.833120601711492, "learning_rate": 2e-06, "loss": 0.3795, "step": 1375 }, { "epoch": 0.31922050806170976, "grad_norm": 7.1809944456557, "learning_rate": 2e-06, "loss": 0.2324, "step": 1376 }, { "epoch": 0.31945249971001044, "grad_norm": 27.35532202666317, "learning_rate": 2e-06, "loss": 0.4693, "step": 1377 }, { "epoch": 0.3196844913583111, "grad_norm": 14.076587114302116, "learning_rate": 2e-06, "loss": 0.2823, "step": 1378 }, { "epoch": 0.31991648300661174, "grad_norm": 8.780774638050996, "learning_rate": 2e-06, "loss": 0.2473, "step": 1379 }, { "epoch": 0.3201484746549124, "grad_norm": 11.116062441609776, "learning_rate": 2e-06, "loss": 0.2186, "step": 1380 }, { "epoch": 0.3203804663032131, "grad_norm": 8.693893969917667, "learning_rate": 2e-06, "loss": 0.2627, "step": 1381 }, { "epoch": 0.3206124579515137, "grad_norm": 10.604765368834062, "learning_rate": 2e-06, "loss": 0.1967, "step": 1382 }, { "epoch": 0.3208444495998144, "grad_norm": 17.142486434058437, "learning_rate": 2e-06, "loss": 0.3608, "step": 1383 }, { "epoch": 0.3210764412481151, "grad_norm": 12.196660226224022, "learning_rate": 2e-06, "loss": 0.2737, "step": 1384 }, { "epoch": 0.32130843289641575, "grad_norm": 7.812040078901983, "learning_rate": 2e-06, "loss": 0.2015, "step": 1385 }, { "epoch": 0.3215404245447164, "grad_norm": 12.31031716391756, "learning_rate": 2e-06, "loss": 0.3417, "step": 1386 }, { "epoch": 0.32177241619301705, "grad_norm": 5.6895948115371455, "learning_rate": 2e-06, "loss": 0.2746, "step": 1387 }, { "epoch": 0.32200440784131773, "grad_norm": 26.500274825195785, "learning_rate": 2e-06, "loss": 0.3603, "step": 1388 }, { "epoch": 0.32223639948961835, "grad_norm": 11.59197120918137, "learning_rate": 2e-06, "loss": 0.2567, "step": 1389 }, { "epoch": 0.32246839113791903, "grad_norm": 21.22159104373813, "learning_rate": 2e-06, "loss": 0.2878, "step": 1390 }, { "epoch": 0.3227003827862197, "grad_norm": 7.653899247960632, "learning_rate": 2e-06, "loss": 0.1782, "step": 1391 }, { "epoch": 0.32293237443452033, "grad_norm": 24.21228884650294, "learning_rate": 2e-06, "loss": 0.4119, "step": 1392 }, { "epoch": 0.323164366082821, "grad_norm": 19.528609223344635, "learning_rate": 2e-06, "loss": 0.369, "step": 1393 }, { "epoch": 0.3233963577311217, "grad_norm": 30.983200470204057, "learning_rate": 2e-06, "loss": 0.5261, "step": 1394 }, { "epoch": 0.32362834937942236, "grad_norm": 16.414562055891793, "learning_rate": 2e-06, "loss": 0.2037, "step": 1395 }, { "epoch": 0.323860341027723, "grad_norm": 13.884976117418837, "learning_rate": 2e-06, "loss": 0.3318, "step": 1396 }, { "epoch": 0.32409233267602366, "grad_norm": 17.823166997510224, "learning_rate": 2e-06, "loss": 0.2978, "step": 1397 }, { "epoch": 0.32432432432432434, "grad_norm": 16.094709140807357, "learning_rate": 2e-06, "loss": 0.3587, "step": 1398 }, { "epoch": 0.32455631597262496, "grad_norm": 21.996263140110763, "learning_rate": 2e-06, "loss": 0.4132, "step": 1399 }, { "epoch": 0.32478830762092564, "grad_norm": 15.17287807041555, "learning_rate": 2e-06, "loss": 0.1584, "step": 1400 }, { "epoch": 0.3250202992692263, "grad_norm": 9.076859822738712, "learning_rate": 2e-06, "loss": 0.1804, "step": 1401 }, { "epoch": 0.32525229091752694, "grad_norm": 12.833433028804583, "learning_rate": 2e-06, "loss": 0.3237, "step": 1402 }, { "epoch": 0.3254842825658276, "grad_norm": 21.081007557262744, "learning_rate": 2e-06, "loss": 0.3414, "step": 1403 }, { "epoch": 0.3257162742141283, "grad_norm": 24.44170266561206, "learning_rate": 2e-06, "loss": 0.4184, "step": 1404 }, { "epoch": 0.325948265862429, "grad_norm": 15.761112175024815, "learning_rate": 2e-06, "loss": 0.3424, "step": 1405 }, { "epoch": 0.3261802575107296, "grad_norm": 26.682059157151674, "learning_rate": 2e-06, "loss": 0.3506, "step": 1406 }, { "epoch": 0.3264122491590303, "grad_norm": 26.625870041802795, "learning_rate": 2e-06, "loss": 0.4061, "step": 1407 }, { "epoch": 0.32664424080733095, "grad_norm": 8.524660616405798, "learning_rate": 2e-06, "loss": 0.3291, "step": 1408 }, { "epoch": 0.3268762324556316, "grad_norm": 13.785566655746681, "learning_rate": 2e-06, "loss": 0.2278, "step": 1409 }, { "epoch": 0.32710822410393225, "grad_norm": 10.510689880378692, "learning_rate": 2e-06, "loss": 0.2374, "step": 1410 }, { "epoch": 0.32734021575223293, "grad_norm": 13.282208345671345, "learning_rate": 2e-06, "loss": 0.3365, "step": 1411 }, { "epoch": 0.32757220740053355, "grad_norm": 7.200604791903009, "learning_rate": 2e-06, "loss": 0.3043, "step": 1412 }, { "epoch": 0.32780419904883423, "grad_norm": 16.380763960725297, "learning_rate": 2e-06, "loss": 0.336, "step": 1413 }, { "epoch": 0.3280361906971349, "grad_norm": 7.862667605788348, "learning_rate": 2e-06, "loss": 0.2364, "step": 1414 }, { "epoch": 0.3282681823454356, "grad_norm": 13.792537373401919, "learning_rate": 2e-06, "loss": 0.3635, "step": 1415 }, { "epoch": 0.3285001739937362, "grad_norm": 21.034934320250013, "learning_rate": 2e-06, "loss": 0.4046, "step": 1416 }, { "epoch": 0.3287321656420369, "grad_norm": 22.086878012364966, "learning_rate": 2e-06, "loss": 0.2852, "step": 1417 }, { "epoch": 0.32896415729033757, "grad_norm": 14.784229950459304, "learning_rate": 2e-06, "loss": 0.2574, "step": 1418 }, { "epoch": 0.3291961489386382, "grad_norm": 12.373195748036627, "learning_rate": 2e-06, "loss": 0.2822, "step": 1419 }, { "epoch": 0.32942814058693887, "grad_norm": 38.01272152718663, "learning_rate": 2e-06, "loss": 0.3616, "step": 1420 }, { "epoch": 0.32966013223523954, "grad_norm": 11.991749136874276, "learning_rate": 2e-06, "loss": 0.353, "step": 1421 }, { "epoch": 0.32989212388354017, "grad_norm": 26.97583446550163, "learning_rate": 2e-06, "loss": 0.3066, "step": 1422 }, { "epoch": 0.33012411553184084, "grad_norm": 15.834084651170842, "learning_rate": 2e-06, "loss": 0.298, "step": 1423 }, { "epoch": 0.3303561071801415, "grad_norm": 18.92977879289124, "learning_rate": 2e-06, "loss": 0.392, "step": 1424 }, { "epoch": 0.3305880988284422, "grad_norm": 13.530259540842376, "learning_rate": 2e-06, "loss": 0.3095, "step": 1425 }, { "epoch": 0.3308200904767428, "grad_norm": 10.827914794149137, "learning_rate": 2e-06, "loss": 0.3398, "step": 1426 }, { "epoch": 0.3310520821250435, "grad_norm": 9.71939624119191, "learning_rate": 2e-06, "loss": 0.3422, "step": 1427 }, { "epoch": 0.3312840737733442, "grad_norm": 8.993853404795685, "learning_rate": 2e-06, "loss": 0.2793, "step": 1428 }, { "epoch": 0.3315160654216448, "grad_norm": 11.037296435326578, "learning_rate": 2e-06, "loss": 0.3753, "step": 1429 }, { "epoch": 0.3317480570699455, "grad_norm": 15.519371017718381, "learning_rate": 2e-06, "loss": 0.3152, "step": 1430 }, { "epoch": 0.33198004871824616, "grad_norm": 12.947799922606766, "learning_rate": 2e-06, "loss": 0.3816, "step": 1431 }, { "epoch": 0.3322120403665468, "grad_norm": 43.44503329570567, "learning_rate": 2e-06, "loss": 0.4163, "step": 1432 }, { "epoch": 0.33244403201484746, "grad_norm": 10.85914909093172, "learning_rate": 2e-06, "loss": 0.3539, "step": 1433 }, { "epoch": 0.33267602366314813, "grad_norm": 16.45369684618797, "learning_rate": 2e-06, "loss": 0.223, "step": 1434 }, { "epoch": 0.3329080153114488, "grad_norm": 11.856791453456106, "learning_rate": 2e-06, "loss": 0.2651, "step": 1435 }, { "epoch": 0.33314000695974944, "grad_norm": 28.227771959540448, "learning_rate": 2e-06, "loss": 0.356, "step": 1436 }, { "epoch": 0.3333719986080501, "grad_norm": 16.11516272567042, "learning_rate": 2e-06, "loss": 0.3581, "step": 1437 }, { "epoch": 0.3336039902563508, "grad_norm": 17.205293488598823, "learning_rate": 2e-06, "loss": 0.305, "step": 1438 }, { "epoch": 0.3338359819046514, "grad_norm": 18.59915835527598, "learning_rate": 2e-06, "loss": 0.2897, "step": 1439 }, { "epoch": 0.3340679735529521, "grad_norm": 9.665209145110666, "learning_rate": 2e-06, "loss": 0.3308, "step": 1440 }, { "epoch": 0.33429996520125277, "grad_norm": 9.68040316568584, "learning_rate": 2e-06, "loss": 0.1829, "step": 1441 }, { "epoch": 0.3345319568495534, "grad_norm": 13.189970236007511, "learning_rate": 2e-06, "loss": 0.3357, "step": 1442 }, { "epoch": 0.33476394849785407, "grad_norm": 19.650408705244487, "learning_rate": 2e-06, "loss": 0.3785, "step": 1443 }, { "epoch": 0.33499594014615475, "grad_norm": 28.296732624494975, "learning_rate": 2e-06, "loss": 0.335, "step": 1444 }, { "epoch": 0.3352279317944554, "grad_norm": 18.467670187497443, "learning_rate": 2e-06, "loss": 0.3028, "step": 1445 }, { "epoch": 0.33545992344275605, "grad_norm": 18.270227526717235, "learning_rate": 2e-06, "loss": 0.2988, "step": 1446 }, { "epoch": 0.3356919150910567, "grad_norm": 11.52991657081852, "learning_rate": 2e-06, "loss": 0.2774, "step": 1447 }, { "epoch": 0.3359239067393574, "grad_norm": 23.744389862640357, "learning_rate": 2e-06, "loss": 0.548, "step": 1448 }, { "epoch": 0.336155898387658, "grad_norm": 18.275621368601392, "learning_rate": 2e-06, "loss": 0.4032, "step": 1449 }, { "epoch": 0.3363878900359587, "grad_norm": 14.829177620147869, "learning_rate": 2e-06, "loss": 0.3983, "step": 1450 }, { "epoch": 0.3366198816842594, "grad_norm": 13.640819060025391, "learning_rate": 2e-06, "loss": 0.3402, "step": 1451 }, { "epoch": 0.33685187333256, "grad_norm": 20.325335878357638, "learning_rate": 2e-06, "loss": 0.2938, "step": 1452 }, { "epoch": 0.3370838649808607, "grad_norm": 10.1654491529205, "learning_rate": 2e-06, "loss": 0.3499, "step": 1453 }, { "epoch": 0.33731585662916136, "grad_norm": 18.11067937334146, "learning_rate": 2e-06, "loss": 0.3405, "step": 1454 }, { "epoch": 0.33754784827746204, "grad_norm": 7.213728305081726, "learning_rate": 2e-06, "loss": 0.3285, "step": 1455 }, { "epoch": 0.33777983992576266, "grad_norm": 8.111878159324867, "learning_rate": 2e-06, "loss": 0.2244, "step": 1456 }, { "epoch": 0.33801183157406334, "grad_norm": 16.26670030048152, "learning_rate": 2e-06, "loss": 0.3037, "step": 1457 }, { "epoch": 0.338243823222364, "grad_norm": 24.565372997992675, "learning_rate": 2e-06, "loss": 0.3137, "step": 1458 }, { "epoch": 0.33847581487066464, "grad_norm": 22.886787785735034, "learning_rate": 2e-06, "loss": 0.3186, "step": 1459 }, { "epoch": 0.3387078065189653, "grad_norm": 21.436646107450535, "learning_rate": 2e-06, "loss": 0.2531, "step": 1460 }, { "epoch": 0.338939798167266, "grad_norm": 12.328393696710915, "learning_rate": 2e-06, "loss": 0.3254, "step": 1461 }, { "epoch": 0.3391717898155666, "grad_norm": 15.206807262565173, "learning_rate": 2e-06, "loss": 0.3559, "step": 1462 }, { "epoch": 0.3394037814638673, "grad_norm": 14.31015543426358, "learning_rate": 2e-06, "loss": 0.2817, "step": 1463 }, { "epoch": 0.33963577311216797, "grad_norm": 21.67014327512649, "learning_rate": 2e-06, "loss": 0.3082, "step": 1464 }, { "epoch": 0.33986776476046865, "grad_norm": 7.896970994527335, "learning_rate": 2e-06, "loss": 0.2985, "step": 1465 }, { "epoch": 0.34009975640876927, "grad_norm": 10.908642187522922, "learning_rate": 2e-06, "loss": 0.4137, "step": 1466 }, { "epoch": 0.34033174805706995, "grad_norm": 14.30417006499471, "learning_rate": 2e-06, "loss": 0.3977, "step": 1467 }, { "epoch": 0.3405637397053706, "grad_norm": 12.724543568104627, "learning_rate": 2e-06, "loss": 0.2614, "step": 1468 }, { "epoch": 0.34079573135367125, "grad_norm": 13.860781856965406, "learning_rate": 2e-06, "loss": 0.3516, "step": 1469 }, { "epoch": 0.3410277230019719, "grad_norm": 8.11891853375226, "learning_rate": 2e-06, "loss": 0.2225, "step": 1470 }, { "epoch": 0.3412597146502726, "grad_norm": 18.084721461864714, "learning_rate": 2e-06, "loss": 0.2586, "step": 1471 }, { "epoch": 0.3414917062985732, "grad_norm": 7.181534455328431, "learning_rate": 2e-06, "loss": 0.2283, "step": 1472 }, { "epoch": 0.3417236979468739, "grad_norm": 10.249481209532288, "learning_rate": 2e-06, "loss": 0.2586, "step": 1473 }, { "epoch": 0.3419556895951746, "grad_norm": 20.235711545193375, "learning_rate": 2e-06, "loss": 0.2793, "step": 1474 }, { "epoch": 0.34218768124347526, "grad_norm": 12.615681034876618, "learning_rate": 2e-06, "loss": 0.2957, "step": 1475 }, { "epoch": 0.3424196728917759, "grad_norm": 20.373845008527187, "learning_rate": 2e-06, "loss": 0.5045, "step": 1476 }, { "epoch": 0.34265166454007656, "grad_norm": 23.646655211696096, "learning_rate": 2e-06, "loss": 0.3433, "step": 1477 }, { "epoch": 0.34288365618837724, "grad_norm": 10.779640440881222, "learning_rate": 2e-06, "loss": 0.26, "step": 1478 }, { "epoch": 0.34311564783667786, "grad_norm": 14.955799866697927, "learning_rate": 2e-06, "loss": 0.3575, "step": 1479 }, { "epoch": 0.34334763948497854, "grad_norm": 23.34240373648745, "learning_rate": 2e-06, "loss": 0.3543, "step": 1480 }, { "epoch": 0.3435796311332792, "grad_norm": 27.198043313140015, "learning_rate": 2e-06, "loss": 0.3609, "step": 1481 }, { "epoch": 0.34381162278157984, "grad_norm": 4.740917917219977, "learning_rate": 2e-06, "loss": 0.2064, "step": 1482 }, { "epoch": 0.3440436144298805, "grad_norm": 17.595434056508047, "learning_rate": 2e-06, "loss": 0.289, "step": 1483 }, { "epoch": 0.3442756060781812, "grad_norm": 9.572257970860454, "learning_rate": 2e-06, "loss": 0.3235, "step": 1484 }, { "epoch": 0.3445075977264819, "grad_norm": 21.185559442915206, "learning_rate": 2e-06, "loss": 0.3622, "step": 1485 }, { "epoch": 0.3447395893747825, "grad_norm": 19.125381912342608, "learning_rate": 2e-06, "loss": 0.3033, "step": 1486 }, { "epoch": 0.3449715810230832, "grad_norm": 14.398206313708432, "learning_rate": 2e-06, "loss": 0.3011, "step": 1487 }, { "epoch": 0.34520357267138385, "grad_norm": 20.4757543531186, "learning_rate": 2e-06, "loss": 0.3481, "step": 1488 }, { "epoch": 0.3454355643196845, "grad_norm": 13.521107412007973, "learning_rate": 2e-06, "loss": 0.3284, "step": 1489 }, { "epoch": 0.34566755596798515, "grad_norm": 12.82873921884128, "learning_rate": 2e-06, "loss": 0.333, "step": 1490 }, { "epoch": 0.34589954761628583, "grad_norm": 16.50102131285493, "learning_rate": 2e-06, "loss": 0.3682, "step": 1491 }, { "epoch": 0.34613153926458645, "grad_norm": 15.968468514516362, "learning_rate": 2e-06, "loss": 0.3645, "step": 1492 }, { "epoch": 0.34636353091288713, "grad_norm": 15.411384633795938, "learning_rate": 2e-06, "loss": 0.2859, "step": 1493 }, { "epoch": 0.3465955225611878, "grad_norm": 10.528121007020545, "learning_rate": 2e-06, "loss": 0.3602, "step": 1494 }, { "epoch": 0.3468275142094885, "grad_norm": 12.50351889191618, "learning_rate": 2e-06, "loss": 0.2303, "step": 1495 }, { "epoch": 0.3470595058577891, "grad_norm": 22.53265667953538, "learning_rate": 2e-06, "loss": 0.3537, "step": 1496 }, { "epoch": 0.3472914975060898, "grad_norm": 13.161514617319222, "learning_rate": 2e-06, "loss": 0.3082, "step": 1497 }, { "epoch": 0.34752348915439046, "grad_norm": 12.514909309899709, "learning_rate": 2e-06, "loss": 0.3135, "step": 1498 }, { "epoch": 0.3477554808026911, "grad_norm": 27.200429723788545, "learning_rate": 2e-06, "loss": 0.3255, "step": 1499 }, { "epoch": 0.34798747245099176, "grad_norm": 14.382791715885393, "learning_rate": 2e-06, "loss": 0.2845, "step": 1500 }, { "epoch": 0.34821946409929244, "grad_norm": 27.770489340950228, "learning_rate": 2e-06, "loss": 0.3061, "step": 1501 }, { "epoch": 0.34845145574759306, "grad_norm": 13.103877107565271, "learning_rate": 2e-06, "loss": 0.2417, "step": 1502 }, { "epoch": 0.34868344739589374, "grad_norm": 22.095341754977923, "learning_rate": 2e-06, "loss": 0.3161, "step": 1503 }, { "epoch": 0.3489154390441944, "grad_norm": 17.66831432896543, "learning_rate": 2e-06, "loss": 0.3733, "step": 1504 }, { "epoch": 0.3491474306924951, "grad_norm": 23.243882502598925, "learning_rate": 2e-06, "loss": 0.2508, "step": 1505 }, { "epoch": 0.3493794223407957, "grad_norm": 14.198012691546005, "learning_rate": 2e-06, "loss": 0.2499, "step": 1506 }, { "epoch": 0.3496114139890964, "grad_norm": 16.38842201576127, "learning_rate": 2e-06, "loss": 0.3034, "step": 1507 }, { "epoch": 0.3498434056373971, "grad_norm": 12.030502731601553, "learning_rate": 2e-06, "loss": 0.2351, "step": 1508 }, { "epoch": 0.3500753972856977, "grad_norm": 12.003382984655511, "learning_rate": 2e-06, "loss": 0.2391, "step": 1509 }, { "epoch": 0.3503073889339984, "grad_norm": 16.653026981636675, "learning_rate": 2e-06, "loss": 0.267, "step": 1510 }, { "epoch": 0.35053938058229905, "grad_norm": 26.30925965099654, "learning_rate": 2e-06, "loss": 0.2996, "step": 1511 }, { "epoch": 0.3507713722305997, "grad_norm": 8.268051727806895, "learning_rate": 2e-06, "loss": 0.3707, "step": 1512 }, { "epoch": 0.35100336387890035, "grad_norm": 16.492831306112947, "learning_rate": 2e-06, "loss": 0.3764, "step": 1513 }, { "epoch": 0.35123535552720103, "grad_norm": 12.953009967608708, "learning_rate": 2e-06, "loss": 0.3184, "step": 1514 }, { "epoch": 0.35146734717550165, "grad_norm": 13.868907702767919, "learning_rate": 2e-06, "loss": 0.275, "step": 1515 }, { "epoch": 0.35169933882380233, "grad_norm": 14.707904973561801, "learning_rate": 2e-06, "loss": 0.3275, "step": 1516 }, { "epoch": 0.351931330472103, "grad_norm": 18.927232123905426, "learning_rate": 2e-06, "loss": 0.4729, "step": 1517 }, { "epoch": 0.3521633221204037, "grad_norm": 26.683198978700762, "learning_rate": 2e-06, "loss": 0.3891, "step": 1518 }, { "epoch": 0.3523953137687043, "grad_norm": 23.200118728090455, "learning_rate": 2e-06, "loss": 0.3123, "step": 1519 }, { "epoch": 0.352627305417005, "grad_norm": 12.772671965285351, "learning_rate": 2e-06, "loss": 0.2973, "step": 1520 }, { "epoch": 0.35285929706530567, "grad_norm": 19.03558091042126, "learning_rate": 2e-06, "loss": 0.3802, "step": 1521 }, { "epoch": 0.3530912887136063, "grad_norm": 18.97245741876137, "learning_rate": 2e-06, "loss": 0.3188, "step": 1522 }, { "epoch": 0.35332328036190697, "grad_norm": 14.336059096837559, "learning_rate": 2e-06, "loss": 0.3388, "step": 1523 }, { "epoch": 0.35355527201020764, "grad_norm": 11.160440481969392, "learning_rate": 2e-06, "loss": 0.2824, "step": 1524 }, { "epoch": 0.35378726365850827, "grad_norm": 23.929660422992608, "learning_rate": 2e-06, "loss": 0.311, "step": 1525 }, { "epoch": 0.35401925530680894, "grad_norm": 23.916293169824055, "learning_rate": 2e-06, "loss": 0.4163, "step": 1526 }, { "epoch": 0.3542512469551096, "grad_norm": 20.043119923355555, "learning_rate": 2e-06, "loss": 0.329, "step": 1527 }, { "epoch": 0.3544832386034103, "grad_norm": 12.250980750726233, "learning_rate": 2e-06, "loss": 0.2934, "step": 1528 }, { "epoch": 0.3547152302517109, "grad_norm": 14.48483674657129, "learning_rate": 2e-06, "loss": 0.3425, "step": 1529 }, { "epoch": 0.3549472219000116, "grad_norm": 11.553582803477648, "learning_rate": 2e-06, "loss": 0.3147, "step": 1530 }, { "epoch": 0.3551792135483123, "grad_norm": 11.560989065944293, "learning_rate": 2e-06, "loss": 0.2615, "step": 1531 }, { "epoch": 0.3554112051966129, "grad_norm": 11.17415333216845, "learning_rate": 2e-06, "loss": 0.3311, "step": 1532 }, { "epoch": 0.3556431968449136, "grad_norm": 12.185737847018876, "learning_rate": 2e-06, "loss": 0.2516, "step": 1533 }, { "epoch": 0.35587518849321426, "grad_norm": 8.191288874419087, "learning_rate": 2e-06, "loss": 0.1495, "step": 1534 }, { "epoch": 0.3561071801415149, "grad_norm": 10.949054148249836, "learning_rate": 2e-06, "loss": 0.343, "step": 1535 }, { "epoch": 0.35633917178981556, "grad_norm": 12.278173030546192, "learning_rate": 2e-06, "loss": 0.3576, "step": 1536 }, { "epoch": 0.35657116343811623, "grad_norm": 13.977798272440518, "learning_rate": 2e-06, "loss": 0.2952, "step": 1537 }, { "epoch": 0.3568031550864169, "grad_norm": 11.83754990738849, "learning_rate": 2e-06, "loss": 0.2399, "step": 1538 }, { "epoch": 0.35703514673471753, "grad_norm": 16.49812571960583, "learning_rate": 2e-06, "loss": 0.3304, "step": 1539 }, { "epoch": 0.3572671383830182, "grad_norm": 17.13588560605484, "learning_rate": 2e-06, "loss": 0.3186, "step": 1540 }, { "epoch": 0.3574991300313189, "grad_norm": 18.403814271161163, "learning_rate": 2e-06, "loss": 0.2804, "step": 1541 }, { "epoch": 0.3577311216796195, "grad_norm": 12.827052629703486, "learning_rate": 2e-06, "loss": 0.2458, "step": 1542 }, { "epoch": 0.3579631133279202, "grad_norm": 12.760937396611903, "learning_rate": 2e-06, "loss": 0.2407, "step": 1543 }, { "epoch": 0.35819510497622087, "grad_norm": 16.539137967377687, "learning_rate": 2e-06, "loss": 0.383, "step": 1544 }, { "epoch": 0.3584270966245215, "grad_norm": 20.039107817355806, "learning_rate": 2e-06, "loss": 0.3159, "step": 1545 }, { "epoch": 0.35865908827282217, "grad_norm": 13.64428431793954, "learning_rate": 2e-06, "loss": 0.2665, "step": 1546 }, { "epoch": 0.35889107992112285, "grad_norm": 12.42057044851482, "learning_rate": 2e-06, "loss": 0.3102, "step": 1547 }, { "epoch": 0.3591230715694235, "grad_norm": 22.523044251326006, "learning_rate": 2e-06, "loss": 0.2732, "step": 1548 }, { "epoch": 0.35935506321772415, "grad_norm": 16.928365338505053, "learning_rate": 2e-06, "loss": 0.3164, "step": 1549 }, { "epoch": 0.3595870548660248, "grad_norm": 18.232182331487515, "learning_rate": 2e-06, "loss": 0.2678, "step": 1550 }, { "epoch": 0.3598190465143255, "grad_norm": 21.978434417091222, "learning_rate": 2e-06, "loss": 0.3644, "step": 1551 }, { "epoch": 0.3600510381626261, "grad_norm": 7.471359625072732, "learning_rate": 2e-06, "loss": 0.273, "step": 1552 }, { "epoch": 0.3602830298109268, "grad_norm": 19.050119083208457, "learning_rate": 2e-06, "loss": 0.2834, "step": 1553 }, { "epoch": 0.3605150214592275, "grad_norm": 8.666454101947402, "learning_rate": 2e-06, "loss": 0.2591, "step": 1554 }, { "epoch": 0.3607470131075281, "grad_norm": 14.71395010544609, "learning_rate": 2e-06, "loss": 0.329, "step": 1555 }, { "epoch": 0.3609790047558288, "grad_norm": 15.222939559224029, "learning_rate": 2e-06, "loss": 0.2663, "step": 1556 }, { "epoch": 0.36121099640412946, "grad_norm": 16.19223554787744, "learning_rate": 2e-06, "loss": 0.4093, "step": 1557 }, { "epoch": 0.36144298805243014, "grad_norm": 12.171363940214102, "learning_rate": 2e-06, "loss": 0.2952, "step": 1558 }, { "epoch": 0.36167497970073076, "grad_norm": 15.245125862878577, "learning_rate": 2e-06, "loss": 0.4228, "step": 1559 }, { "epoch": 0.36190697134903144, "grad_norm": 19.04913330454029, "learning_rate": 2e-06, "loss": 0.3924, "step": 1560 }, { "epoch": 0.3621389629973321, "grad_norm": 15.994627326757682, "learning_rate": 2e-06, "loss": 0.3107, "step": 1561 }, { "epoch": 0.36237095464563274, "grad_norm": 21.858725247811957, "learning_rate": 2e-06, "loss": 0.4131, "step": 1562 }, { "epoch": 0.3626029462939334, "grad_norm": 12.86548452333493, "learning_rate": 2e-06, "loss": 0.3945, "step": 1563 }, { "epoch": 0.3628349379422341, "grad_norm": 11.10614066767929, "learning_rate": 2e-06, "loss": 0.2737, "step": 1564 }, { "epoch": 0.3630669295905347, "grad_norm": 7.711392585957257, "learning_rate": 2e-06, "loss": 0.2551, "step": 1565 }, { "epoch": 0.3632989212388354, "grad_norm": 13.634176534870537, "learning_rate": 2e-06, "loss": 0.2395, "step": 1566 }, { "epoch": 0.36353091288713607, "grad_norm": 10.852789631053897, "learning_rate": 2e-06, "loss": 0.2562, "step": 1567 }, { "epoch": 0.36376290453543675, "grad_norm": 10.476346649167414, "learning_rate": 2e-06, "loss": 0.2577, "step": 1568 }, { "epoch": 0.36399489618373737, "grad_norm": 15.507037512786859, "learning_rate": 2e-06, "loss": 0.2622, "step": 1569 }, { "epoch": 0.36422688783203805, "grad_norm": 19.351470038814583, "learning_rate": 2e-06, "loss": 0.4442, "step": 1570 }, { "epoch": 0.3644588794803387, "grad_norm": 27.313074395348565, "learning_rate": 2e-06, "loss": 0.4793, "step": 1571 }, { "epoch": 0.36469087112863935, "grad_norm": 16.532665789399566, "learning_rate": 2e-06, "loss": 0.3345, "step": 1572 }, { "epoch": 0.36492286277694, "grad_norm": 13.405927824510398, "learning_rate": 2e-06, "loss": 0.3608, "step": 1573 }, { "epoch": 0.3651548544252407, "grad_norm": 10.875804546859849, "learning_rate": 2e-06, "loss": 0.3128, "step": 1574 }, { "epoch": 0.3653868460735413, "grad_norm": 18.823706038628313, "learning_rate": 2e-06, "loss": 0.3562, "step": 1575 }, { "epoch": 0.365618837721842, "grad_norm": 18.61416232947668, "learning_rate": 2e-06, "loss": 0.2618, "step": 1576 }, { "epoch": 0.3658508293701427, "grad_norm": 24.02212692496374, "learning_rate": 2e-06, "loss": 0.3201, "step": 1577 }, { "epoch": 0.36608282101844336, "grad_norm": 10.800817628224939, "learning_rate": 2e-06, "loss": 0.2754, "step": 1578 }, { "epoch": 0.366314812666744, "grad_norm": 9.361010948025115, "learning_rate": 2e-06, "loss": 0.2917, "step": 1579 }, { "epoch": 0.36654680431504466, "grad_norm": 24.186205696257296, "learning_rate": 2e-06, "loss": 0.3642, "step": 1580 }, { "epoch": 0.36677879596334534, "grad_norm": 18.65446560349194, "learning_rate": 2e-06, "loss": 0.3754, "step": 1581 }, { "epoch": 0.36701078761164596, "grad_norm": 24.572827718997146, "learning_rate": 2e-06, "loss": 0.3356, "step": 1582 }, { "epoch": 0.36724277925994664, "grad_norm": 25.30961911705546, "learning_rate": 2e-06, "loss": 0.3706, "step": 1583 }, { "epoch": 0.3674747709082473, "grad_norm": 19.183888435579004, "learning_rate": 2e-06, "loss": 0.391, "step": 1584 }, { "epoch": 0.36770676255654794, "grad_norm": 16.32160541185497, "learning_rate": 2e-06, "loss": 0.2556, "step": 1585 }, { "epoch": 0.3679387542048486, "grad_norm": 24.10960800511522, "learning_rate": 2e-06, "loss": 0.3104, "step": 1586 }, { "epoch": 0.3681707458531493, "grad_norm": 15.480985134797434, "learning_rate": 2e-06, "loss": 0.3862, "step": 1587 }, { "epoch": 0.36840273750145, "grad_norm": 12.142741471902808, "learning_rate": 2e-06, "loss": 0.3515, "step": 1588 }, { "epoch": 0.3686347291497506, "grad_norm": 11.702276399480478, "learning_rate": 2e-06, "loss": 0.311, "step": 1589 }, { "epoch": 0.3688667207980513, "grad_norm": 16.70719405848102, "learning_rate": 2e-06, "loss": 0.3269, "step": 1590 }, { "epoch": 0.36909871244635195, "grad_norm": 12.12225751120716, "learning_rate": 2e-06, "loss": 0.3404, "step": 1591 }, { "epoch": 0.3693307040946526, "grad_norm": 11.512749205669706, "learning_rate": 2e-06, "loss": 0.2705, "step": 1592 }, { "epoch": 0.36956269574295325, "grad_norm": 7.127614749150153, "learning_rate": 2e-06, "loss": 0.2072, "step": 1593 }, { "epoch": 0.36979468739125393, "grad_norm": 23.513456031877478, "learning_rate": 2e-06, "loss": 0.3113, "step": 1594 }, { "epoch": 0.37002667903955455, "grad_norm": 8.289546448002808, "learning_rate": 2e-06, "loss": 0.3195, "step": 1595 }, { "epoch": 0.37025867068785523, "grad_norm": 15.104930483599123, "learning_rate": 2e-06, "loss": 0.2434, "step": 1596 }, { "epoch": 0.3704906623361559, "grad_norm": 17.009568657067756, "learning_rate": 2e-06, "loss": 0.2678, "step": 1597 }, { "epoch": 0.3707226539844566, "grad_norm": 17.809610643285815, "learning_rate": 2e-06, "loss": 0.387, "step": 1598 }, { "epoch": 0.3709546456327572, "grad_norm": 20.129437927118126, "learning_rate": 2e-06, "loss": 0.3004, "step": 1599 }, { "epoch": 0.3711866372810579, "grad_norm": 15.245701649509975, "learning_rate": 2e-06, "loss": 0.3453, "step": 1600 }, { "epoch": 0.37141862892935856, "grad_norm": 12.497912883042815, "learning_rate": 2e-06, "loss": 0.2064, "step": 1601 }, { "epoch": 0.3716506205776592, "grad_norm": 12.763218076540506, "learning_rate": 2e-06, "loss": 0.3223, "step": 1602 }, { "epoch": 0.37188261222595986, "grad_norm": 14.76310202808672, "learning_rate": 2e-06, "loss": 0.4025, "step": 1603 }, { "epoch": 0.37211460387426054, "grad_norm": 10.854093941092742, "learning_rate": 2e-06, "loss": 0.2103, "step": 1604 }, { "epoch": 0.37234659552256116, "grad_norm": 20.010957585816257, "learning_rate": 2e-06, "loss": 0.502, "step": 1605 }, { "epoch": 0.37257858717086184, "grad_norm": 20.461827465123676, "learning_rate": 2e-06, "loss": 0.3678, "step": 1606 }, { "epoch": 0.3728105788191625, "grad_norm": 12.35407993340569, "learning_rate": 2e-06, "loss": 0.2872, "step": 1607 }, { "epoch": 0.3730425704674632, "grad_norm": 16.69775007977456, "learning_rate": 2e-06, "loss": 0.3112, "step": 1608 }, { "epoch": 0.3732745621157638, "grad_norm": 16.24608996111506, "learning_rate": 2e-06, "loss": 0.3128, "step": 1609 }, { "epoch": 0.3735065537640645, "grad_norm": 21.652530385792378, "learning_rate": 2e-06, "loss": 0.31, "step": 1610 }, { "epoch": 0.3737385454123652, "grad_norm": 12.622235889749975, "learning_rate": 2e-06, "loss": 0.3509, "step": 1611 }, { "epoch": 0.3739705370606658, "grad_norm": 17.157816059926006, "learning_rate": 2e-06, "loss": 0.3221, "step": 1612 }, { "epoch": 0.3742025287089665, "grad_norm": 9.928656920548773, "learning_rate": 2e-06, "loss": 0.2486, "step": 1613 }, { "epoch": 0.37443452035726715, "grad_norm": 7.878328610402713, "learning_rate": 2e-06, "loss": 0.2231, "step": 1614 }, { "epoch": 0.3746665120055678, "grad_norm": 21.179210126353382, "learning_rate": 2e-06, "loss": 0.3815, "step": 1615 }, { "epoch": 0.37489850365386845, "grad_norm": 19.483475307452156, "learning_rate": 2e-06, "loss": 0.2831, "step": 1616 }, { "epoch": 0.37513049530216913, "grad_norm": 18.918324334501875, "learning_rate": 2e-06, "loss": 0.3439, "step": 1617 }, { "epoch": 0.3753624869504698, "grad_norm": 13.974426869543807, "learning_rate": 2e-06, "loss": 0.3879, "step": 1618 }, { "epoch": 0.37559447859877043, "grad_norm": 26.12250512557313, "learning_rate": 2e-06, "loss": 0.4059, "step": 1619 }, { "epoch": 0.3758264702470711, "grad_norm": 16.61178037941796, "learning_rate": 2e-06, "loss": 0.4071, "step": 1620 }, { "epoch": 0.3760584618953718, "grad_norm": 11.416733585531539, "learning_rate": 2e-06, "loss": 0.2167, "step": 1621 }, { "epoch": 0.3762904535436724, "grad_norm": 13.319214185114532, "learning_rate": 2e-06, "loss": 0.3348, "step": 1622 }, { "epoch": 0.3765224451919731, "grad_norm": 13.655422759429916, "learning_rate": 2e-06, "loss": 0.389, "step": 1623 }, { "epoch": 0.37675443684027377, "grad_norm": 12.053182330123681, "learning_rate": 2e-06, "loss": 0.2747, "step": 1624 }, { "epoch": 0.3769864284885744, "grad_norm": 18.83496077772956, "learning_rate": 2e-06, "loss": 0.3359, "step": 1625 }, { "epoch": 0.37721842013687507, "grad_norm": 17.905370457467995, "learning_rate": 2e-06, "loss": 0.3528, "step": 1626 }, { "epoch": 0.37745041178517574, "grad_norm": 17.468656650631704, "learning_rate": 2e-06, "loss": 0.2534, "step": 1627 }, { "epoch": 0.3776824034334764, "grad_norm": 23.713256640121145, "learning_rate": 2e-06, "loss": 0.2424, "step": 1628 }, { "epoch": 0.37791439508177704, "grad_norm": 11.843935984726398, "learning_rate": 2e-06, "loss": 0.3334, "step": 1629 }, { "epoch": 0.3781463867300777, "grad_norm": 16.972846583250554, "learning_rate": 2e-06, "loss": 0.3979, "step": 1630 }, { "epoch": 0.3783783783783784, "grad_norm": 16.37480579398604, "learning_rate": 2e-06, "loss": 0.3796, "step": 1631 }, { "epoch": 0.378610370026679, "grad_norm": 19.360782354701076, "learning_rate": 2e-06, "loss": 0.3681, "step": 1632 }, { "epoch": 0.3788423616749797, "grad_norm": 10.154342633825937, "learning_rate": 2e-06, "loss": 0.2564, "step": 1633 }, { "epoch": 0.3790743533232804, "grad_norm": 22.689267404415812, "learning_rate": 2e-06, "loss": 0.3216, "step": 1634 }, { "epoch": 0.379306344971581, "grad_norm": 22.7895374079743, "learning_rate": 2e-06, "loss": 0.3923, "step": 1635 }, { "epoch": 0.3795383366198817, "grad_norm": 23.763833374699864, "learning_rate": 2e-06, "loss": 0.3587, "step": 1636 }, { "epoch": 0.37977032826818236, "grad_norm": 9.367475197108027, "learning_rate": 2e-06, "loss": 0.2917, "step": 1637 }, { "epoch": 0.38000231991648303, "grad_norm": 13.235342399140125, "learning_rate": 2e-06, "loss": 0.242, "step": 1638 }, { "epoch": 0.38023431156478366, "grad_norm": 16.83143948537525, "learning_rate": 2e-06, "loss": 0.3111, "step": 1639 }, { "epoch": 0.38046630321308433, "grad_norm": 13.590884945633038, "learning_rate": 2e-06, "loss": 0.3164, "step": 1640 }, { "epoch": 0.380698294861385, "grad_norm": 9.172619097320478, "learning_rate": 2e-06, "loss": 0.2266, "step": 1641 }, { "epoch": 0.38093028650968563, "grad_norm": 35.29786602689293, "learning_rate": 2e-06, "loss": 0.405, "step": 1642 }, { "epoch": 0.3811622781579863, "grad_norm": 21.620132837757843, "learning_rate": 2e-06, "loss": 0.3185, "step": 1643 }, { "epoch": 0.381394269806287, "grad_norm": 15.970781077578724, "learning_rate": 2e-06, "loss": 0.2512, "step": 1644 }, { "epoch": 0.3816262614545876, "grad_norm": 14.70064658110598, "learning_rate": 2e-06, "loss": 0.2931, "step": 1645 }, { "epoch": 0.3818582531028883, "grad_norm": 13.887474908567368, "learning_rate": 2e-06, "loss": 0.2872, "step": 1646 }, { "epoch": 0.38209024475118897, "grad_norm": 19.884616625123037, "learning_rate": 2e-06, "loss": 0.4227, "step": 1647 }, { "epoch": 0.38232223639948965, "grad_norm": 15.302936516623225, "learning_rate": 2e-06, "loss": 0.3547, "step": 1648 }, { "epoch": 0.38255422804779027, "grad_norm": 14.32883875962087, "learning_rate": 2e-06, "loss": 0.2966, "step": 1649 }, { "epoch": 0.38278621969609095, "grad_norm": 11.768538296099742, "learning_rate": 2e-06, "loss": 0.2556, "step": 1650 }, { "epoch": 0.3830182113443916, "grad_norm": 22.138118983802702, "learning_rate": 2e-06, "loss": 0.3406, "step": 1651 }, { "epoch": 0.38325020299269225, "grad_norm": 13.11426253348709, "learning_rate": 2e-06, "loss": 0.2848, "step": 1652 }, { "epoch": 0.3834821946409929, "grad_norm": 21.608807534604892, "learning_rate": 2e-06, "loss": 0.2958, "step": 1653 }, { "epoch": 0.3837141862892936, "grad_norm": 9.369420646181656, "learning_rate": 2e-06, "loss": 0.3086, "step": 1654 }, { "epoch": 0.3839461779375942, "grad_norm": 21.073996636387196, "learning_rate": 2e-06, "loss": 0.2979, "step": 1655 }, { "epoch": 0.3841781695858949, "grad_norm": 20.865881418922612, "learning_rate": 2e-06, "loss": 0.2507, "step": 1656 }, { "epoch": 0.3844101612341956, "grad_norm": 18.164322650255222, "learning_rate": 2e-06, "loss": 0.2604, "step": 1657 }, { "epoch": 0.38464215288249626, "grad_norm": 10.166821633179827, "learning_rate": 2e-06, "loss": 0.2436, "step": 1658 }, { "epoch": 0.3848741445307969, "grad_norm": 20.213626738684884, "learning_rate": 2e-06, "loss": 0.3739, "step": 1659 }, { "epoch": 0.38510613617909756, "grad_norm": 21.43928377483437, "learning_rate": 2e-06, "loss": 0.3391, "step": 1660 }, { "epoch": 0.38533812782739824, "grad_norm": 12.13247627608106, "learning_rate": 2e-06, "loss": 0.3681, "step": 1661 }, { "epoch": 0.38557011947569886, "grad_norm": 15.29816035374616, "learning_rate": 2e-06, "loss": 0.2788, "step": 1662 }, { "epoch": 0.38580211112399954, "grad_norm": 31.623005766886344, "learning_rate": 2e-06, "loss": 0.2836, "step": 1663 }, { "epoch": 0.3860341027723002, "grad_norm": 11.67738748278718, "learning_rate": 2e-06, "loss": 0.2403, "step": 1664 }, { "epoch": 0.38626609442060084, "grad_norm": 13.416501155518167, "learning_rate": 2e-06, "loss": 0.2573, "step": 1665 }, { "epoch": 0.3864980860689015, "grad_norm": 11.920384086621059, "learning_rate": 2e-06, "loss": 0.2362, "step": 1666 }, { "epoch": 0.3867300777172022, "grad_norm": 18.72931889505314, "learning_rate": 2e-06, "loss": 0.3919, "step": 1667 }, { "epoch": 0.3869620693655028, "grad_norm": 8.137213985487316, "learning_rate": 2e-06, "loss": 0.2741, "step": 1668 }, { "epoch": 0.3871940610138035, "grad_norm": 18.030404210623193, "learning_rate": 2e-06, "loss": 0.2867, "step": 1669 }, { "epoch": 0.38742605266210417, "grad_norm": 23.792439026114092, "learning_rate": 2e-06, "loss": 0.3914, "step": 1670 }, { "epoch": 0.38765804431040485, "grad_norm": 16.554147682332868, "learning_rate": 2e-06, "loss": 0.398, "step": 1671 }, { "epoch": 0.38789003595870547, "grad_norm": 18.15107420377198, "learning_rate": 2e-06, "loss": 0.3045, "step": 1672 }, { "epoch": 0.38812202760700615, "grad_norm": 16.492763041860865, "learning_rate": 2e-06, "loss": 0.3261, "step": 1673 }, { "epoch": 0.3883540192553068, "grad_norm": 19.950535800151258, "learning_rate": 2e-06, "loss": 0.3572, "step": 1674 }, { "epoch": 0.38858601090360745, "grad_norm": 12.332895478452894, "learning_rate": 2e-06, "loss": 0.2484, "step": 1675 }, { "epoch": 0.3888180025519081, "grad_norm": 18.816090795558736, "learning_rate": 2e-06, "loss": 0.4445, "step": 1676 }, { "epoch": 0.3890499942002088, "grad_norm": 11.172173233194435, "learning_rate": 2e-06, "loss": 0.2278, "step": 1677 }, { "epoch": 0.3892819858485094, "grad_norm": 22.84534502367574, "learning_rate": 2e-06, "loss": 0.4344, "step": 1678 }, { "epoch": 0.3895139774968101, "grad_norm": 9.327583213833936, "learning_rate": 2e-06, "loss": 0.1992, "step": 1679 }, { "epoch": 0.3897459691451108, "grad_norm": 11.737114391340048, "learning_rate": 2e-06, "loss": 0.2423, "step": 1680 }, { "epoch": 0.38997796079341146, "grad_norm": 12.417614516132264, "learning_rate": 2e-06, "loss": 0.3601, "step": 1681 }, { "epoch": 0.3902099524417121, "grad_norm": 20.0655001781096, "learning_rate": 2e-06, "loss": 0.3668, "step": 1682 }, { "epoch": 0.39044194409001276, "grad_norm": 11.796345412021118, "learning_rate": 2e-06, "loss": 0.319, "step": 1683 }, { "epoch": 0.39067393573831344, "grad_norm": 17.12049151202, "learning_rate": 2e-06, "loss": 0.3473, "step": 1684 }, { "epoch": 0.39090592738661406, "grad_norm": 13.345906672000439, "learning_rate": 2e-06, "loss": 0.2233, "step": 1685 }, { "epoch": 0.39113791903491474, "grad_norm": 13.240562612933, "learning_rate": 2e-06, "loss": 0.3004, "step": 1686 }, { "epoch": 0.3913699106832154, "grad_norm": 14.035831267305534, "learning_rate": 2e-06, "loss": 0.3141, "step": 1687 }, { "epoch": 0.39160190233151604, "grad_norm": 13.460145672208174, "learning_rate": 2e-06, "loss": 0.2406, "step": 1688 }, { "epoch": 0.3918338939798167, "grad_norm": 13.097576569454555, "learning_rate": 2e-06, "loss": 0.2964, "step": 1689 }, { "epoch": 0.3920658856281174, "grad_norm": 7.234995154351904, "learning_rate": 2e-06, "loss": 0.3318, "step": 1690 }, { "epoch": 0.39229787727641807, "grad_norm": 21.42498802164779, "learning_rate": 2e-06, "loss": 0.3629, "step": 1691 }, { "epoch": 0.3925298689247187, "grad_norm": 29.42585971810702, "learning_rate": 2e-06, "loss": 0.398, "step": 1692 }, { "epoch": 0.3927618605730194, "grad_norm": 14.67197715062144, "learning_rate": 2e-06, "loss": 0.2969, "step": 1693 }, { "epoch": 0.39299385222132005, "grad_norm": 26.94553900507181, "learning_rate": 2e-06, "loss": 0.4015, "step": 1694 }, { "epoch": 0.3932258438696207, "grad_norm": 14.144027051794579, "learning_rate": 2e-06, "loss": 0.3245, "step": 1695 }, { "epoch": 0.39345783551792135, "grad_norm": 13.121990072480953, "learning_rate": 2e-06, "loss": 0.3125, "step": 1696 }, { "epoch": 0.39368982716622203, "grad_norm": 14.27997838244899, "learning_rate": 2e-06, "loss": 0.34, "step": 1697 }, { "epoch": 0.39392181881452265, "grad_norm": 13.623194592867334, "learning_rate": 2e-06, "loss": 0.2952, "step": 1698 }, { "epoch": 0.39415381046282333, "grad_norm": 11.2125716282906, "learning_rate": 2e-06, "loss": 0.2761, "step": 1699 }, { "epoch": 0.394385802111124, "grad_norm": 12.100281502825506, "learning_rate": 2e-06, "loss": 0.3967, "step": 1700 }, { "epoch": 0.3946177937594247, "grad_norm": 16.621454147354402, "learning_rate": 2e-06, "loss": 0.3108, "step": 1701 }, { "epoch": 0.3948497854077253, "grad_norm": 10.874602148419658, "learning_rate": 2e-06, "loss": 0.288, "step": 1702 }, { "epoch": 0.395081777056026, "grad_norm": 9.863341650478107, "learning_rate": 2e-06, "loss": 0.2274, "step": 1703 }, { "epoch": 0.39531376870432666, "grad_norm": 12.068471550080465, "learning_rate": 2e-06, "loss": 0.2886, "step": 1704 }, { "epoch": 0.3955457603526273, "grad_norm": 12.144258760523668, "learning_rate": 2e-06, "loss": 0.2762, "step": 1705 }, { "epoch": 0.39577775200092796, "grad_norm": 14.432399080933754, "learning_rate": 2e-06, "loss": 0.378, "step": 1706 }, { "epoch": 0.39600974364922864, "grad_norm": 21.47189822882105, "learning_rate": 2e-06, "loss": 0.379, "step": 1707 }, { "epoch": 0.39624173529752926, "grad_norm": 14.139723328501157, "learning_rate": 2e-06, "loss": 0.2892, "step": 1708 }, { "epoch": 0.39647372694582994, "grad_norm": 12.874557329749697, "learning_rate": 2e-06, "loss": 0.25, "step": 1709 }, { "epoch": 0.3967057185941306, "grad_norm": 19.74261744906016, "learning_rate": 2e-06, "loss": 0.2884, "step": 1710 }, { "epoch": 0.3969377102424313, "grad_norm": 13.526740825328071, "learning_rate": 2e-06, "loss": 0.2469, "step": 1711 }, { "epoch": 0.3971697018907319, "grad_norm": 17.29573806918465, "learning_rate": 2e-06, "loss": 0.3666, "step": 1712 }, { "epoch": 0.3974016935390326, "grad_norm": 22.00074367090561, "learning_rate": 2e-06, "loss": 0.3315, "step": 1713 }, { "epoch": 0.3976336851873333, "grad_norm": 12.216104847810138, "learning_rate": 2e-06, "loss": 0.3578, "step": 1714 }, { "epoch": 0.3978656768356339, "grad_norm": 33.55844174882175, "learning_rate": 2e-06, "loss": 0.4602, "step": 1715 }, { "epoch": 0.3980976684839346, "grad_norm": 6.798634249805131, "learning_rate": 2e-06, "loss": 0.178, "step": 1716 }, { "epoch": 0.39832966013223525, "grad_norm": 17.31017228664811, "learning_rate": 2e-06, "loss": 0.202, "step": 1717 }, { "epoch": 0.3985616517805359, "grad_norm": 15.660089205257279, "learning_rate": 2e-06, "loss": 0.3996, "step": 1718 }, { "epoch": 0.39879364342883655, "grad_norm": 21.208381895451023, "learning_rate": 2e-06, "loss": 0.3099, "step": 1719 }, { "epoch": 0.39902563507713723, "grad_norm": 14.960757327235939, "learning_rate": 2e-06, "loss": 0.2789, "step": 1720 }, { "epoch": 0.3992576267254379, "grad_norm": 30.451286730187253, "learning_rate": 2e-06, "loss": 0.3973, "step": 1721 }, { "epoch": 0.39948961837373853, "grad_norm": 29.620181933264263, "learning_rate": 2e-06, "loss": 0.3298, "step": 1722 }, { "epoch": 0.3997216100220392, "grad_norm": 13.07179496764708, "learning_rate": 2e-06, "loss": 0.3204, "step": 1723 }, { "epoch": 0.3999536016703399, "grad_norm": 31.938037556895573, "learning_rate": 2e-06, "loss": 0.4647, "step": 1724 }, { "epoch": 0.4001855933186405, "grad_norm": 11.521405989449157, "learning_rate": 2e-06, "loss": 0.3678, "step": 1725 }, { "epoch": 0.4004175849669412, "grad_norm": 13.366109839116676, "learning_rate": 2e-06, "loss": 0.4173, "step": 1726 }, { "epoch": 0.40064957661524186, "grad_norm": 17.831226552760167, "learning_rate": 2e-06, "loss": 0.3475, "step": 1727 }, { "epoch": 0.4008815682635425, "grad_norm": 16.687838976166226, "learning_rate": 2e-06, "loss": 0.1913, "step": 1728 }, { "epoch": 0.40111355991184316, "grad_norm": 8.855862056448904, "learning_rate": 2e-06, "loss": 0.2669, "step": 1729 }, { "epoch": 0.40134555156014384, "grad_norm": 20.294045592533415, "learning_rate": 2e-06, "loss": 0.4138, "step": 1730 }, { "epoch": 0.4015775432084445, "grad_norm": 20.282307181816307, "learning_rate": 2e-06, "loss": 0.3926, "step": 1731 }, { "epoch": 0.40180953485674514, "grad_norm": 17.150362589886832, "learning_rate": 2e-06, "loss": 0.1616, "step": 1732 }, { "epoch": 0.4020415265050458, "grad_norm": 16.07804665946086, "learning_rate": 2e-06, "loss": 0.2544, "step": 1733 }, { "epoch": 0.4022735181533465, "grad_norm": 15.330586830512058, "learning_rate": 2e-06, "loss": 0.3177, "step": 1734 }, { "epoch": 0.4025055098016471, "grad_norm": 12.730645638537759, "learning_rate": 2e-06, "loss": 0.303, "step": 1735 }, { "epoch": 0.4027375014499478, "grad_norm": 16.595958589744118, "learning_rate": 2e-06, "loss": 0.3585, "step": 1736 }, { "epoch": 0.4029694930982485, "grad_norm": 8.706544673603664, "learning_rate": 2e-06, "loss": 0.311, "step": 1737 }, { "epoch": 0.4032014847465491, "grad_norm": 16.090059723709984, "learning_rate": 2e-06, "loss": 0.2455, "step": 1738 }, { "epoch": 0.4034334763948498, "grad_norm": 8.675772602038379, "learning_rate": 2e-06, "loss": 0.1731, "step": 1739 }, { "epoch": 0.40366546804315045, "grad_norm": 18.296364172981882, "learning_rate": 2e-06, "loss": 0.3908, "step": 1740 }, { "epoch": 0.40389745969145113, "grad_norm": 16.115322502756978, "learning_rate": 2e-06, "loss": 0.2754, "step": 1741 }, { "epoch": 0.40412945133975176, "grad_norm": 20.310432071130496, "learning_rate": 2e-06, "loss": 0.3214, "step": 1742 }, { "epoch": 0.40436144298805243, "grad_norm": 15.865946919832218, "learning_rate": 2e-06, "loss": 0.3414, "step": 1743 }, { "epoch": 0.4045934346363531, "grad_norm": 14.325433367552268, "learning_rate": 2e-06, "loss": 0.3477, "step": 1744 }, { "epoch": 0.40482542628465373, "grad_norm": 16.91522871754405, "learning_rate": 2e-06, "loss": 0.2881, "step": 1745 }, { "epoch": 0.4050574179329544, "grad_norm": 12.156285411915164, "learning_rate": 2e-06, "loss": 0.2757, "step": 1746 }, { "epoch": 0.4052894095812551, "grad_norm": 15.700298066784649, "learning_rate": 2e-06, "loss": 0.2911, "step": 1747 }, { "epoch": 0.4055214012295557, "grad_norm": 14.070074963571189, "learning_rate": 2e-06, "loss": 0.2735, "step": 1748 }, { "epoch": 0.4057533928778564, "grad_norm": 20.567228387763485, "learning_rate": 2e-06, "loss": 0.3966, "step": 1749 }, { "epoch": 0.40598538452615707, "grad_norm": 21.568521644903555, "learning_rate": 2e-06, "loss": 0.4069, "step": 1750 }, { "epoch": 0.40621737617445774, "grad_norm": 24.98305458957807, "learning_rate": 2e-06, "loss": 0.3, "step": 1751 }, { "epoch": 0.40644936782275837, "grad_norm": 11.182159916834218, "learning_rate": 2e-06, "loss": 0.2476, "step": 1752 }, { "epoch": 0.40668135947105905, "grad_norm": 17.16773776017756, "learning_rate": 2e-06, "loss": 0.2769, "step": 1753 }, { "epoch": 0.4069133511193597, "grad_norm": 8.087817209372279, "learning_rate": 2e-06, "loss": 0.2333, "step": 1754 }, { "epoch": 0.40714534276766035, "grad_norm": 9.737889285495338, "learning_rate": 2e-06, "loss": 0.3203, "step": 1755 }, { "epoch": 0.407377334415961, "grad_norm": 13.276234387033147, "learning_rate": 2e-06, "loss": 0.3975, "step": 1756 }, { "epoch": 0.4076093260642617, "grad_norm": 12.833120524941462, "learning_rate": 2e-06, "loss": 0.3393, "step": 1757 }, { "epoch": 0.4078413177125623, "grad_norm": 10.864122378055994, "learning_rate": 2e-06, "loss": 0.213, "step": 1758 }, { "epoch": 0.408073309360863, "grad_norm": 17.43651215931162, "learning_rate": 2e-06, "loss": 0.2571, "step": 1759 }, { "epoch": 0.4083053010091637, "grad_norm": 9.462835101636259, "learning_rate": 2e-06, "loss": 0.2363, "step": 1760 }, { "epoch": 0.40853729265746436, "grad_norm": 14.60179515190759, "learning_rate": 2e-06, "loss": 0.358, "step": 1761 }, { "epoch": 0.408769284305765, "grad_norm": 9.35250829968875, "learning_rate": 2e-06, "loss": 0.2229, "step": 1762 }, { "epoch": 0.40900127595406566, "grad_norm": 16.893224625393078, "learning_rate": 2e-06, "loss": 0.2516, "step": 1763 }, { "epoch": 0.40923326760236634, "grad_norm": 18.305246064820754, "learning_rate": 2e-06, "loss": 0.4163, "step": 1764 }, { "epoch": 0.40946525925066696, "grad_norm": 14.924968231349286, "learning_rate": 2e-06, "loss": 0.4418, "step": 1765 }, { "epoch": 0.40969725089896764, "grad_norm": 19.839080541303428, "learning_rate": 2e-06, "loss": 0.298, "step": 1766 }, { "epoch": 0.4099292425472683, "grad_norm": 11.601214327833912, "learning_rate": 2e-06, "loss": 0.2756, "step": 1767 }, { "epoch": 0.41016123419556894, "grad_norm": 10.968524968551193, "learning_rate": 2e-06, "loss": 0.276, "step": 1768 }, { "epoch": 0.4103932258438696, "grad_norm": 6.826904440619662, "learning_rate": 2e-06, "loss": 0.255, "step": 1769 }, { "epoch": 0.4106252174921703, "grad_norm": 17.323631423776984, "learning_rate": 2e-06, "loss": 0.3229, "step": 1770 }, { "epoch": 0.41085720914047097, "grad_norm": 13.28354425099362, "learning_rate": 2e-06, "loss": 0.3321, "step": 1771 }, { "epoch": 0.4110892007887716, "grad_norm": 10.03252119573095, "learning_rate": 2e-06, "loss": 0.308, "step": 1772 }, { "epoch": 0.41132119243707227, "grad_norm": 23.554278728776346, "learning_rate": 2e-06, "loss": 0.3286, "step": 1773 }, { "epoch": 0.41155318408537295, "grad_norm": 13.998688298441905, "learning_rate": 2e-06, "loss": 0.2936, "step": 1774 }, { "epoch": 0.41178517573367357, "grad_norm": 22.02491607538067, "learning_rate": 2e-06, "loss": 0.3387, "step": 1775 }, { "epoch": 0.41201716738197425, "grad_norm": 13.99636863261507, "learning_rate": 2e-06, "loss": 0.2389, "step": 1776 }, { "epoch": 0.4122491590302749, "grad_norm": 14.508439431491093, "learning_rate": 2e-06, "loss": 0.2818, "step": 1777 }, { "epoch": 0.41248115067857555, "grad_norm": 22.92519999043949, "learning_rate": 2e-06, "loss": 0.4278, "step": 1778 }, { "epoch": 0.4127131423268762, "grad_norm": 14.599442491978975, "learning_rate": 2e-06, "loss": 0.4189, "step": 1779 }, { "epoch": 0.4129451339751769, "grad_norm": 20.83978644941764, "learning_rate": 2e-06, "loss": 0.2988, "step": 1780 }, { "epoch": 0.4131771256234776, "grad_norm": 16.8812419501217, "learning_rate": 2e-06, "loss": 0.2913, "step": 1781 }, { "epoch": 0.4134091172717782, "grad_norm": 22.735272830579238, "learning_rate": 2e-06, "loss": 0.3513, "step": 1782 }, { "epoch": 0.4136411089200789, "grad_norm": 9.301251987696757, "learning_rate": 2e-06, "loss": 0.305, "step": 1783 }, { "epoch": 0.41387310056837956, "grad_norm": 18.72825758685982, "learning_rate": 2e-06, "loss": 0.3613, "step": 1784 }, { "epoch": 0.4141050922166802, "grad_norm": 33.62199677330242, "learning_rate": 2e-06, "loss": 0.3825, "step": 1785 }, { "epoch": 0.41433708386498086, "grad_norm": 9.097995766326937, "learning_rate": 2e-06, "loss": 0.3069, "step": 1786 }, { "epoch": 0.41456907551328154, "grad_norm": 5.898798150314532, "learning_rate": 2e-06, "loss": 0.2112, "step": 1787 }, { "epoch": 0.41480106716158216, "grad_norm": 16.7077409628407, "learning_rate": 2e-06, "loss": 0.2519, "step": 1788 }, { "epoch": 0.41503305880988284, "grad_norm": 14.29526419345486, "learning_rate": 2e-06, "loss": 0.2502, "step": 1789 }, { "epoch": 0.4152650504581835, "grad_norm": 18.288221743113432, "learning_rate": 2e-06, "loss": 0.3396, "step": 1790 }, { "epoch": 0.4154970421064842, "grad_norm": 8.845939616997287, "learning_rate": 2e-06, "loss": 0.2663, "step": 1791 }, { "epoch": 0.4157290337547848, "grad_norm": 12.965756776065295, "learning_rate": 2e-06, "loss": 0.3467, "step": 1792 }, { "epoch": 0.4159610254030855, "grad_norm": 15.914929213419684, "learning_rate": 2e-06, "loss": 0.3533, "step": 1793 }, { "epoch": 0.41619301705138617, "grad_norm": 14.67947860185235, "learning_rate": 2e-06, "loss": 0.337, "step": 1794 }, { "epoch": 0.4164250086996868, "grad_norm": 19.295061646529685, "learning_rate": 2e-06, "loss": 0.3472, "step": 1795 }, { "epoch": 0.41665700034798747, "grad_norm": 8.408293380915646, "learning_rate": 2e-06, "loss": 0.2147, "step": 1796 }, { "epoch": 0.41688899199628815, "grad_norm": 10.062098294402563, "learning_rate": 2e-06, "loss": 0.2418, "step": 1797 }, { "epoch": 0.41712098364458877, "grad_norm": 11.463553835251005, "learning_rate": 2e-06, "loss": 0.2798, "step": 1798 }, { "epoch": 0.41735297529288945, "grad_norm": 18.860146153362425, "learning_rate": 2e-06, "loss": 0.3719, "step": 1799 }, { "epoch": 0.4175849669411901, "grad_norm": 13.389079107989657, "learning_rate": 2e-06, "loss": 0.2659, "step": 1800 }, { "epoch": 0.4178169585894908, "grad_norm": 19.569806795295246, "learning_rate": 2e-06, "loss": 0.3267, "step": 1801 }, { "epoch": 0.41804895023779143, "grad_norm": 12.072125881404723, "learning_rate": 2e-06, "loss": 0.2558, "step": 1802 }, { "epoch": 0.4182809418860921, "grad_norm": 20.466583975370828, "learning_rate": 2e-06, "loss": 0.3409, "step": 1803 }, { "epoch": 0.4185129335343928, "grad_norm": 16.68337800654914, "learning_rate": 2e-06, "loss": 0.258, "step": 1804 }, { "epoch": 0.4187449251826934, "grad_norm": 13.062693644167947, "learning_rate": 2e-06, "loss": 0.2986, "step": 1805 }, { "epoch": 0.4189769168309941, "grad_norm": 14.091819481275975, "learning_rate": 2e-06, "loss": 0.2461, "step": 1806 }, { "epoch": 0.41920890847929476, "grad_norm": 18.249361092614027, "learning_rate": 2e-06, "loss": 0.3251, "step": 1807 }, { "epoch": 0.4194409001275954, "grad_norm": 18.720542222479533, "learning_rate": 2e-06, "loss": 0.2487, "step": 1808 }, { "epoch": 0.41967289177589606, "grad_norm": 6.835853742556412, "learning_rate": 2e-06, "loss": 0.1909, "step": 1809 }, { "epoch": 0.41990488342419674, "grad_norm": 21.313152980029592, "learning_rate": 2e-06, "loss": 0.2814, "step": 1810 }, { "epoch": 0.4201368750724974, "grad_norm": 19.15105976427836, "learning_rate": 2e-06, "loss": 0.383, "step": 1811 }, { "epoch": 0.42036886672079804, "grad_norm": 11.295461296397983, "learning_rate": 2e-06, "loss": 0.2857, "step": 1812 }, { "epoch": 0.4206008583690987, "grad_norm": 7.541556412734428, "learning_rate": 2e-06, "loss": 0.2447, "step": 1813 }, { "epoch": 0.4208328500173994, "grad_norm": 18.291856914356643, "learning_rate": 2e-06, "loss": 0.4423, "step": 1814 }, { "epoch": 0.4210648416657, "grad_norm": 8.321430630856858, "learning_rate": 2e-06, "loss": 0.2377, "step": 1815 }, { "epoch": 0.4212968333140007, "grad_norm": 8.615068388386895, "learning_rate": 2e-06, "loss": 0.2333, "step": 1816 }, { "epoch": 0.4215288249623014, "grad_norm": 18.587239540548637, "learning_rate": 2e-06, "loss": 0.4264, "step": 1817 }, { "epoch": 0.421760816610602, "grad_norm": 19.258406635069775, "learning_rate": 2e-06, "loss": 0.3658, "step": 1818 }, { "epoch": 0.4219928082589027, "grad_norm": 4.718414240134861, "learning_rate": 2e-06, "loss": 0.1482, "step": 1819 }, { "epoch": 0.42222479990720335, "grad_norm": 19.28316315364071, "learning_rate": 2e-06, "loss": 0.2905, "step": 1820 }, { "epoch": 0.422456791555504, "grad_norm": 12.782558855185856, "learning_rate": 2e-06, "loss": 0.3345, "step": 1821 }, { "epoch": 0.42268878320380465, "grad_norm": 21.96367145052797, "learning_rate": 2e-06, "loss": 0.339, "step": 1822 }, { "epoch": 0.42292077485210533, "grad_norm": 15.16140918689698, "learning_rate": 2e-06, "loss": 0.3114, "step": 1823 }, { "epoch": 0.423152766500406, "grad_norm": 10.192502648774663, "learning_rate": 2e-06, "loss": 0.1998, "step": 1824 }, { "epoch": 0.42338475814870663, "grad_norm": 16.432028981570053, "learning_rate": 2e-06, "loss": 0.2548, "step": 1825 }, { "epoch": 0.4236167497970073, "grad_norm": 12.767243541706549, "learning_rate": 2e-06, "loss": 0.2338, "step": 1826 }, { "epoch": 0.423848741445308, "grad_norm": 23.907500035971236, "learning_rate": 2e-06, "loss": 0.3951, "step": 1827 }, { "epoch": 0.4240807330936086, "grad_norm": 20.369900827476446, "learning_rate": 2e-06, "loss": 0.3829, "step": 1828 }, { "epoch": 0.4243127247419093, "grad_norm": 12.49686598283754, "learning_rate": 2e-06, "loss": 0.2322, "step": 1829 }, { "epoch": 0.42454471639020996, "grad_norm": 17.01708522916691, "learning_rate": 2e-06, "loss": 0.291, "step": 1830 }, { "epoch": 0.4247767080385106, "grad_norm": 8.448959698676287, "learning_rate": 2e-06, "loss": 0.2344, "step": 1831 }, { "epoch": 0.42500869968681126, "grad_norm": 13.391004267278555, "learning_rate": 2e-06, "loss": 0.2412, "step": 1832 }, { "epoch": 0.42524069133511194, "grad_norm": 32.59089985580288, "learning_rate": 2e-06, "loss": 0.4774, "step": 1833 }, { "epoch": 0.4254726829834126, "grad_norm": 14.407359323922236, "learning_rate": 2e-06, "loss": 0.3374, "step": 1834 }, { "epoch": 0.42570467463171324, "grad_norm": 12.317732601842495, "learning_rate": 2e-06, "loss": 0.2778, "step": 1835 }, { "epoch": 0.4259366662800139, "grad_norm": 19.8003091897629, "learning_rate": 2e-06, "loss": 0.418, "step": 1836 }, { "epoch": 0.4261686579283146, "grad_norm": 11.484951169175416, "learning_rate": 2e-06, "loss": 0.217, "step": 1837 }, { "epoch": 0.4264006495766152, "grad_norm": 13.45733271265659, "learning_rate": 2e-06, "loss": 0.31, "step": 1838 }, { "epoch": 0.4266326412249159, "grad_norm": 15.472653333707216, "learning_rate": 2e-06, "loss": 0.2737, "step": 1839 }, { "epoch": 0.4268646328732166, "grad_norm": 24.25670999804905, "learning_rate": 2e-06, "loss": 0.3363, "step": 1840 }, { "epoch": 0.4270966245215172, "grad_norm": 19.94460399795094, "learning_rate": 2e-06, "loss": 0.3361, "step": 1841 }, { "epoch": 0.4273286161698179, "grad_norm": 13.063517439435312, "learning_rate": 2e-06, "loss": 0.3786, "step": 1842 }, { "epoch": 0.42756060781811855, "grad_norm": 10.281433902687104, "learning_rate": 2e-06, "loss": 0.3698, "step": 1843 }, { "epoch": 0.42779259946641923, "grad_norm": 14.490270130967572, "learning_rate": 2e-06, "loss": 0.3, "step": 1844 }, { "epoch": 0.42802459111471985, "grad_norm": 12.412525094210439, "learning_rate": 2e-06, "loss": 0.3253, "step": 1845 }, { "epoch": 0.42825658276302053, "grad_norm": 20.377294425853872, "learning_rate": 2e-06, "loss": 0.3974, "step": 1846 }, { "epoch": 0.4284885744113212, "grad_norm": 18.736659411511127, "learning_rate": 2e-06, "loss": 0.4384, "step": 1847 }, { "epoch": 0.42872056605962183, "grad_norm": 12.415806850795308, "learning_rate": 2e-06, "loss": 0.3279, "step": 1848 }, { "epoch": 0.4289525577079225, "grad_norm": 23.342252148205862, "learning_rate": 2e-06, "loss": 0.2838, "step": 1849 }, { "epoch": 0.4291845493562232, "grad_norm": 13.276574562564289, "learning_rate": 2e-06, "loss": 0.2406, "step": 1850 }, { "epoch": 0.4294165410045238, "grad_norm": 10.567426494535969, "learning_rate": 2e-06, "loss": 0.2654, "step": 1851 }, { "epoch": 0.4296485326528245, "grad_norm": 8.000618948096822, "learning_rate": 2e-06, "loss": 0.2258, "step": 1852 }, { "epoch": 0.42988052430112517, "grad_norm": 8.852477043978167, "learning_rate": 2e-06, "loss": 0.292, "step": 1853 }, { "epoch": 0.43011251594942584, "grad_norm": 8.604882799928584, "learning_rate": 2e-06, "loss": 0.2531, "step": 1854 }, { "epoch": 0.43034450759772647, "grad_norm": 15.729433265309597, "learning_rate": 2e-06, "loss": 0.3744, "step": 1855 }, { "epoch": 0.43057649924602714, "grad_norm": 15.22527467684575, "learning_rate": 2e-06, "loss": 0.3011, "step": 1856 }, { "epoch": 0.4308084908943278, "grad_norm": 14.485139169410346, "learning_rate": 2e-06, "loss": 0.2902, "step": 1857 }, { "epoch": 0.43104048254262844, "grad_norm": 28.39814798556948, "learning_rate": 2e-06, "loss": 0.3888, "step": 1858 }, { "epoch": 0.4312724741909291, "grad_norm": 11.340683696031968, "learning_rate": 2e-06, "loss": 0.2898, "step": 1859 }, { "epoch": 0.4315044658392298, "grad_norm": 9.388704159058799, "learning_rate": 2e-06, "loss": 0.2385, "step": 1860 }, { "epoch": 0.4317364574875304, "grad_norm": 12.78526627182259, "learning_rate": 2e-06, "loss": 0.2516, "step": 1861 }, { "epoch": 0.4319684491358311, "grad_norm": 11.221452479221814, "learning_rate": 2e-06, "loss": 0.2505, "step": 1862 }, { "epoch": 0.4322004407841318, "grad_norm": 8.805058760349361, "learning_rate": 2e-06, "loss": 0.3661, "step": 1863 }, { "epoch": 0.43243243243243246, "grad_norm": 20.94913580794226, "learning_rate": 2e-06, "loss": 0.4231, "step": 1864 }, { "epoch": 0.4326644240807331, "grad_norm": 18.669289662216194, "learning_rate": 2e-06, "loss": 0.3046, "step": 1865 }, { "epoch": 0.43289641572903376, "grad_norm": 12.36670710729202, "learning_rate": 2e-06, "loss": 0.2493, "step": 1866 }, { "epoch": 0.43312840737733443, "grad_norm": 11.374741286569185, "learning_rate": 2e-06, "loss": 0.3335, "step": 1867 }, { "epoch": 0.43336039902563506, "grad_norm": 33.88185912182654, "learning_rate": 2e-06, "loss": 0.3517, "step": 1868 }, { "epoch": 0.43359239067393573, "grad_norm": 22.20508173062101, "learning_rate": 2e-06, "loss": 0.2955, "step": 1869 }, { "epoch": 0.4338243823222364, "grad_norm": 7.340339839461144, "learning_rate": 2e-06, "loss": 0.1892, "step": 1870 }, { "epoch": 0.43405637397053703, "grad_norm": 20.551325961108073, "learning_rate": 2e-06, "loss": 0.3139, "step": 1871 }, { "epoch": 0.4342883656188377, "grad_norm": 14.707610072766458, "learning_rate": 2e-06, "loss": 0.3444, "step": 1872 }, { "epoch": 0.4345203572671384, "grad_norm": 15.411962906532079, "learning_rate": 2e-06, "loss": 0.3017, "step": 1873 }, { "epoch": 0.43475234891543907, "grad_norm": 13.893129257683169, "learning_rate": 2e-06, "loss": 0.3638, "step": 1874 }, { "epoch": 0.4349843405637397, "grad_norm": 16.280513594402603, "learning_rate": 2e-06, "loss": 0.2568, "step": 1875 }, { "epoch": 0.43521633221204037, "grad_norm": 18.16785510645439, "learning_rate": 2e-06, "loss": 0.3422, "step": 1876 }, { "epoch": 0.43544832386034105, "grad_norm": 9.750925911740046, "learning_rate": 2e-06, "loss": 0.2206, "step": 1877 }, { "epoch": 0.43568031550864167, "grad_norm": 13.192130318022839, "learning_rate": 2e-06, "loss": 0.3196, "step": 1878 }, { "epoch": 0.43591230715694235, "grad_norm": 12.223261978383052, "learning_rate": 2e-06, "loss": 0.2715, "step": 1879 }, { "epoch": 0.436144298805243, "grad_norm": 11.059180241388743, "learning_rate": 2e-06, "loss": 0.2609, "step": 1880 }, { "epoch": 0.43637629045354365, "grad_norm": 15.662230117945928, "learning_rate": 2e-06, "loss": 0.2097, "step": 1881 }, { "epoch": 0.4366082821018443, "grad_norm": 7.331387077331177, "learning_rate": 2e-06, "loss": 0.2691, "step": 1882 }, { "epoch": 0.436840273750145, "grad_norm": 12.61809767481381, "learning_rate": 2e-06, "loss": 0.2374, "step": 1883 }, { "epoch": 0.4370722653984457, "grad_norm": 12.78740040484834, "learning_rate": 2e-06, "loss": 0.2293, "step": 1884 }, { "epoch": 0.4373042570467463, "grad_norm": 14.424100789930472, "learning_rate": 2e-06, "loss": 0.3617, "step": 1885 }, { "epoch": 0.437536248695047, "grad_norm": 80.96212131101774, "learning_rate": 2e-06, "loss": 0.4131, "step": 1886 }, { "epoch": 0.43776824034334766, "grad_norm": 15.011266652601709, "learning_rate": 2e-06, "loss": 0.3044, "step": 1887 }, { "epoch": 0.4380002319916483, "grad_norm": 11.718612332506902, "learning_rate": 2e-06, "loss": 0.2712, "step": 1888 }, { "epoch": 0.43823222363994896, "grad_norm": 9.69556958468024, "learning_rate": 2e-06, "loss": 0.2875, "step": 1889 }, { "epoch": 0.43846421528824964, "grad_norm": 13.37107182380002, "learning_rate": 2e-06, "loss": 0.2525, "step": 1890 }, { "epoch": 0.43869620693655026, "grad_norm": 25.52509004317602, "learning_rate": 2e-06, "loss": 0.2865, "step": 1891 }, { "epoch": 0.43892819858485094, "grad_norm": 25.557432614648956, "learning_rate": 2e-06, "loss": 0.2095, "step": 1892 }, { "epoch": 0.4391601902331516, "grad_norm": 10.227165672394735, "learning_rate": 2e-06, "loss": 0.1997, "step": 1893 }, { "epoch": 0.4393921818814523, "grad_norm": 10.833971024024654, "learning_rate": 2e-06, "loss": 0.3021, "step": 1894 }, { "epoch": 0.4396241735297529, "grad_norm": 6.8477809683100395, "learning_rate": 2e-06, "loss": 0.2074, "step": 1895 }, { "epoch": 0.4398561651780536, "grad_norm": 12.612097699683517, "learning_rate": 2e-06, "loss": 0.268, "step": 1896 }, { "epoch": 0.44008815682635427, "grad_norm": 14.93894889485441, "learning_rate": 2e-06, "loss": 0.295, "step": 1897 }, { "epoch": 0.4403201484746549, "grad_norm": 12.675335108037462, "learning_rate": 2e-06, "loss": 0.2563, "step": 1898 }, { "epoch": 0.44055214012295557, "grad_norm": 14.079067493345839, "learning_rate": 2e-06, "loss": 0.252, "step": 1899 }, { "epoch": 0.44078413177125625, "grad_norm": 22.713016972391877, "learning_rate": 2e-06, "loss": 0.3575, "step": 1900 }, { "epoch": 0.44101612341955687, "grad_norm": 16.909479818805984, "learning_rate": 2e-06, "loss": 0.2408, "step": 1901 }, { "epoch": 0.44124811506785755, "grad_norm": 26.53615495358387, "learning_rate": 2e-06, "loss": 0.3782, "step": 1902 }, { "epoch": 0.4414801067161582, "grad_norm": 11.637938103347429, "learning_rate": 2e-06, "loss": 0.2748, "step": 1903 }, { "epoch": 0.4417120983644589, "grad_norm": 13.790174253848365, "learning_rate": 2e-06, "loss": 0.3713, "step": 1904 }, { "epoch": 0.4419440900127595, "grad_norm": 12.622131028191134, "learning_rate": 2e-06, "loss": 0.2575, "step": 1905 }, { "epoch": 0.4421760816610602, "grad_norm": 11.39969737402925, "learning_rate": 2e-06, "loss": 0.2648, "step": 1906 }, { "epoch": 0.4424080733093609, "grad_norm": 16.675642271360555, "learning_rate": 2e-06, "loss": 0.4741, "step": 1907 }, { "epoch": 0.4426400649576615, "grad_norm": 10.78164946540168, "learning_rate": 2e-06, "loss": 0.2563, "step": 1908 }, { "epoch": 0.4428720566059622, "grad_norm": 19.68038839531077, "learning_rate": 2e-06, "loss": 0.4349, "step": 1909 }, { "epoch": 0.44310404825426286, "grad_norm": 14.168099510681532, "learning_rate": 2e-06, "loss": 0.3591, "step": 1910 }, { "epoch": 0.4433360399025635, "grad_norm": 13.257912219731839, "learning_rate": 2e-06, "loss": 0.2278, "step": 1911 }, { "epoch": 0.44356803155086416, "grad_norm": 28.111517131517505, "learning_rate": 2e-06, "loss": 0.2837, "step": 1912 }, { "epoch": 0.44380002319916484, "grad_norm": 25.10965649267154, "learning_rate": 2e-06, "loss": 0.3274, "step": 1913 }, { "epoch": 0.4440320148474655, "grad_norm": 14.93557483973632, "learning_rate": 2e-06, "loss": 0.2515, "step": 1914 }, { "epoch": 0.44426400649576614, "grad_norm": 16.642468175359035, "learning_rate": 2e-06, "loss": 0.3378, "step": 1915 }, { "epoch": 0.4444959981440668, "grad_norm": 6.84438606505179, "learning_rate": 2e-06, "loss": 0.3409, "step": 1916 }, { "epoch": 0.4447279897923675, "grad_norm": 8.477797064840939, "learning_rate": 2e-06, "loss": 0.3412, "step": 1917 }, { "epoch": 0.4449599814406681, "grad_norm": 12.99853283907464, "learning_rate": 2e-06, "loss": 0.2441, "step": 1918 }, { "epoch": 0.4451919730889688, "grad_norm": 10.977481652548592, "learning_rate": 2e-06, "loss": 0.3244, "step": 1919 }, { "epoch": 0.4454239647372695, "grad_norm": 10.570234822426068, "learning_rate": 2e-06, "loss": 0.2877, "step": 1920 }, { "epoch": 0.4456559563855701, "grad_norm": 6.860732361741255, "learning_rate": 2e-06, "loss": 0.2442, "step": 1921 }, { "epoch": 0.4458879480338708, "grad_norm": 19.302410055233295, "learning_rate": 2e-06, "loss": 0.4067, "step": 1922 }, { "epoch": 0.44611993968217145, "grad_norm": 18.20793820249486, "learning_rate": 2e-06, "loss": 0.3678, "step": 1923 }, { "epoch": 0.44635193133047213, "grad_norm": 8.97289273170762, "learning_rate": 2e-06, "loss": 0.2128, "step": 1924 }, { "epoch": 0.44658392297877275, "grad_norm": 6.3471643353963, "learning_rate": 2e-06, "loss": 0.1974, "step": 1925 }, { "epoch": 0.44681591462707343, "grad_norm": 12.638237055112246, "learning_rate": 2e-06, "loss": 0.3043, "step": 1926 }, { "epoch": 0.4470479062753741, "grad_norm": 16.3087905268165, "learning_rate": 2e-06, "loss": 0.3217, "step": 1927 }, { "epoch": 0.44727989792367473, "grad_norm": 6.312405813391441, "learning_rate": 2e-06, "loss": 0.2761, "step": 1928 }, { "epoch": 0.4475118895719754, "grad_norm": 9.42320757238294, "learning_rate": 2e-06, "loss": 0.178, "step": 1929 }, { "epoch": 0.4477438812202761, "grad_norm": 14.571508418966776, "learning_rate": 2e-06, "loss": 0.3671, "step": 1930 }, { "epoch": 0.4479758728685767, "grad_norm": 11.19866672252569, "learning_rate": 2e-06, "loss": 0.4218, "step": 1931 }, { "epoch": 0.4482078645168774, "grad_norm": 18.094999969369557, "learning_rate": 2e-06, "loss": 0.2873, "step": 1932 }, { "epoch": 0.44843985616517806, "grad_norm": 17.776408186188583, "learning_rate": 2e-06, "loss": 0.212, "step": 1933 }, { "epoch": 0.44867184781347874, "grad_norm": 13.319460218034406, "learning_rate": 2e-06, "loss": 0.4009, "step": 1934 }, { "epoch": 0.44890383946177936, "grad_norm": 20.49299261920701, "learning_rate": 2e-06, "loss": 0.3528, "step": 1935 }, { "epoch": 0.44913583111008004, "grad_norm": 12.207768363429762, "learning_rate": 2e-06, "loss": 0.2797, "step": 1936 }, { "epoch": 0.4493678227583807, "grad_norm": 13.241157529714169, "learning_rate": 2e-06, "loss": 0.2463, "step": 1937 }, { "epoch": 0.44959981440668134, "grad_norm": 11.964061797665712, "learning_rate": 2e-06, "loss": 0.3486, "step": 1938 }, { "epoch": 0.449831806054982, "grad_norm": 17.760405028259854, "learning_rate": 2e-06, "loss": 0.2826, "step": 1939 }, { "epoch": 0.4500637977032827, "grad_norm": 18.835286779046086, "learning_rate": 2e-06, "loss": 0.2983, "step": 1940 }, { "epoch": 0.4502957893515833, "grad_norm": 10.061056247616934, "learning_rate": 2e-06, "loss": 0.2066, "step": 1941 }, { "epoch": 0.450527780999884, "grad_norm": 15.190416553972092, "learning_rate": 2e-06, "loss": 0.2897, "step": 1942 }, { "epoch": 0.4507597726481847, "grad_norm": 18.100146573334026, "learning_rate": 2e-06, "loss": 0.2557, "step": 1943 }, { "epoch": 0.45099176429648535, "grad_norm": 12.145254242892683, "learning_rate": 2e-06, "loss": 0.2617, "step": 1944 }, { "epoch": 0.451223755944786, "grad_norm": 8.675074169684049, "learning_rate": 2e-06, "loss": 0.2641, "step": 1945 }, { "epoch": 0.45145574759308665, "grad_norm": 14.379004029242951, "learning_rate": 2e-06, "loss": 0.3171, "step": 1946 }, { "epoch": 0.45168773924138733, "grad_norm": 16.35450216910894, "learning_rate": 2e-06, "loss": 0.2428, "step": 1947 }, { "epoch": 0.45191973088968795, "grad_norm": 15.501856211045736, "learning_rate": 2e-06, "loss": 0.2962, "step": 1948 }, { "epoch": 0.45215172253798863, "grad_norm": 13.519337257061718, "learning_rate": 2e-06, "loss": 0.2869, "step": 1949 }, { "epoch": 0.4523837141862893, "grad_norm": 22.08178341082319, "learning_rate": 2e-06, "loss": 0.3634, "step": 1950 }, { "epoch": 0.45261570583458993, "grad_norm": 19.244466854707706, "learning_rate": 2e-06, "loss": 0.4142, "step": 1951 }, { "epoch": 0.4528476974828906, "grad_norm": 13.45744682479427, "learning_rate": 2e-06, "loss": 0.3247, "step": 1952 }, { "epoch": 0.4530796891311913, "grad_norm": 19.971997627098528, "learning_rate": 2e-06, "loss": 0.2832, "step": 1953 }, { "epoch": 0.45331168077949197, "grad_norm": 7.784071208122583, "learning_rate": 2e-06, "loss": 0.1705, "step": 1954 }, { "epoch": 0.4535436724277926, "grad_norm": 26.175747345649796, "learning_rate": 2e-06, "loss": 0.2603, "step": 1955 }, { "epoch": 0.45377566407609327, "grad_norm": 9.977944745048129, "learning_rate": 2e-06, "loss": 0.3606, "step": 1956 }, { "epoch": 0.45400765572439394, "grad_norm": 9.241863633870448, "learning_rate": 2e-06, "loss": 0.1683, "step": 1957 }, { "epoch": 0.45423964737269457, "grad_norm": 7.854276987996447, "learning_rate": 2e-06, "loss": 0.2218, "step": 1958 }, { "epoch": 0.45447163902099524, "grad_norm": 17.508549301128976, "learning_rate": 2e-06, "loss": 0.265, "step": 1959 }, { "epoch": 0.4547036306692959, "grad_norm": 14.495906592291902, "learning_rate": 2e-06, "loss": 0.2092, "step": 1960 }, { "epoch": 0.45493562231759654, "grad_norm": 11.396050739221131, "learning_rate": 2e-06, "loss": 0.2218, "step": 1961 }, { "epoch": 0.4551676139658972, "grad_norm": 17.21341794034456, "learning_rate": 2e-06, "loss": 0.4191, "step": 1962 }, { "epoch": 0.4553996056141979, "grad_norm": 39.71999834351291, "learning_rate": 2e-06, "loss": 0.4364, "step": 1963 }, { "epoch": 0.4556315972624986, "grad_norm": 14.344933276382463, "learning_rate": 2e-06, "loss": 0.2449, "step": 1964 }, { "epoch": 0.4558635889107992, "grad_norm": 15.083856831023695, "learning_rate": 2e-06, "loss": 0.2171, "step": 1965 }, { "epoch": 0.4560955805590999, "grad_norm": 30.1833620615398, "learning_rate": 2e-06, "loss": 0.3772, "step": 1966 }, { "epoch": 0.45632757220740056, "grad_norm": 5.694038865160228, "learning_rate": 2e-06, "loss": 0.1896, "step": 1967 }, { "epoch": 0.4565595638557012, "grad_norm": 7.238584483897142, "learning_rate": 2e-06, "loss": 0.2347, "step": 1968 }, { "epoch": 0.45679155550400186, "grad_norm": 17.40421564491547, "learning_rate": 2e-06, "loss": 0.3307, "step": 1969 }, { "epoch": 0.45702354715230253, "grad_norm": 11.75504415856113, "learning_rate": 2e-06, "loss": 0.2683, "step": 1970 }, { "epoch": 0.45725553880060316, "grad_norm": 12.08785488011168, "learning_rate": 2e-06, "loss": 0.2822, "step": 1971 }, { "epoch": 0.45748753044890383, "grad_norm": 5.746202672849507, "learning_rate": 2e-06, "loss": 0.2149, "step": 1972 }, { "epoch": 0.4577195220972045, "grad_norm": 10.556481746351935, "learning_rate": 2e-06, "loss": 0.2994, "step": 1973 }, { "epoch": 0.45795151374550513, "grad_norm": 13.933486628952526, "learning_rate": 2e-06, "loss": 0.2687, "step": 1974 }, { "epoch": 0.4581835053938058, "grad_norm": 19.368610008099733, "learning_rate": 2e-06, "loss": 0.2651, "step": 1975 }, { "epoch": 0.4584154970421065, "grad_norm": 7.269647480537984, "learning_rate": 2e-06, "loss": 0.2359, "step": 1976 }, { "epoch": 0.45864748869040717, "grad_norm": 14.717656516693904, "learning_rate": 2e-06, "loss": 0.3163, "step": 1977 }, { "epoch": 0.4588794803387078, "grad_norm": 13.554029836416927, "learning_rate": 2e-06, "loss": 0.3661, "step": 1978 }, { "epoch": 0.45911147198700847, "grad_norm": 14.72655286380002, "learning_rate": 2e-06, "loss": 0.2868, "step": 1979 }, { "epoch": 0.45934346363530915, "grad_norm": 19.754084297636542, "learning_rate": 2e-06, "loss": 0.4112, "step": 1980 }, { "epoch": 0.45957545528360977, "grad_norm": 18.040682007952025, "learning_rate": 2e-06, "loss": 0.3363, "step": 1981 }, { "epoch": 0.45980744693191045, "grad_norm": 23.563968224340655, "learning_rate": 2e-06, "loss": 0.2206, "step": 1982 }, { "epoch": 0.4600394385802111, "grad_norm": 14.553620166412069, "learning_rate": 2e-06, "loss": 0.2806, "step": 1983 }, { "epoch": 0.46027143022851175, "grad_norm": 7.424935719456725, "learning_rate": 2e-06, "loss": 0.2433, "step": 1984 }, { "epoch": 0.4605034218768124, "grad_norm": 12.02707486472308, "learning_rate": 2e-06, "loss": 0.1909, "step": 1985 }, { "epoch": 0.4607354135251131, "grad_norm": 26.349289928963117, "learning_rate": 2e-06, "loss": 0.288, "step": 1986 }, { "epoch": 0.4609674051734138, "grad_norm": 11.77000173436258, "learning_rate": 2e-06, "loss": 0.2468, "step": 1987 }, { "epoch": 0.4611993968217144, "grad_norm": 14.221580423288055, "learning_rate": 2e-06, "loss": 0.324, "step": 1988 }, { "epoch": 0.4614313884700151, "grad_norm": 19.491108377638728, "learning_rate": 2e-06, "loss": 0.413, "step": 1989 }, { "epoch": 0.46166338011831576, "grad_norm": 16.717819048175322, "learning_rate": 2e-06, "loss": 0.4165, "step": 1990 }, { "epoch": 0.4618953717666164, "grad_norm": 16.506916137038182, "learning_rate": 2e-06, "loss": 0.371, "step": 1991 }, { "epoch": 0.46212736341491706, "grad_norm": 12.255157261779837, "learning_rate": 2e-06, "loss": 0.3191, "step": 1992 }, { "epoch": 0.46235935506321774, "grad_norm": 10.690498300962876, "learning_rate": 2e-06, "loss": 0.2414, "step": 1993 }, { "epoch": 0.46259134671151836, "grad_norm": 9.877054825920622, "learning_rate": 2e-06, "loss": 0.3728, "step": 1994 }, { "epoch": 0.46282333835981904, "grad_norm": 15.340735025865651, "learning_rate": 2e-06, "loss": 0.4747, "step": 1995 }, { "epoch": 0.4630553300081197, "grad_norm": 15.341221735588649, "learning_rate": 2e-06, "loss": 0.3361, "step": 1996 }, { "epoch": 0.4632873216564204, "grad_norm": 10.945005995090467, "learning_rate": 2e-06, "loss": 0.2797, "step": 1997 }, { "epoch": 0.463519313304721, "grad_norm": 5.284853062707283, "learning_rate": 2e-06, "loss": 0.1841, "step": 1998 }, { "epoch": 0.4637513049530217, "grad_norm": 11.633188950114901, "learning_rate": 2e-06, "loss": 0.2856, "step": 1999 }, { "epoch": 0.46398329660132237, "grad_norm": 6.859718365049897, "learning_rate": 2e-06, "loss": 0.3048, "step": 2000 }, { "epoch": 0.464215288249623, "grad_norm": 16.589305902090143, "learning_rate": 2e-06, "loss": 0.4069, "step": 2001 }, { "epoch": 0.46444727989792367, "grad_norm": 10.342457937117073, "learning_rate": 2e-06, "loss": 0.3057, "step": 2002 }, { "epoch": 0.46467927154622435, "grad_norm": 12.333270314902068, "learning_rate": 2e-06, "loss": 0.2834, "step": 2003 }, { "epoch": 0.46491126319452497, "grad_norm": 22.423928853702186, "learning_rate": 2e-06, "loss": 0.3894, "step": 2004 }, { "epoch": 0.46514325484282565, "grad_norm": 13.766616278689142, "learning_rate": 2e-06, "loss": 0.375, "step": 2005 }, { "epoch": 0.4653752464911263, "grad_norm": 16.62716850114615, "learning_rate": 2e-06, "loss": 0.3018, "step": 2006 }, { "epoch": 0.465607238139427, "grad_norm": 18.43425031492219, "learning_rate": 2e-06, "loss": 0.3473, "step": 2007 }, { "epoch": 0.4658392297877276, "grad_norm": 8.281568359570956, "learning_rate": 2e-06, "loss": 0.2622, "step": 2008 }, { "epoch": 0.4660712214360283, "grad_norm": 14.635641990408606, "learning_rate": 2e-06, "loss": 0.2376, "step": 2009 }, { "epoch": 0.466303213084329, "grad_norm": 16.20241517427514, "learning_rate": 2e-06, "loss": 0.3306, "step": 2010 }, { "epoch": 0.4665352047326296, "grad_norm": 11.51127206202647, "learning_rate": 2e-06, "loss": 0.2747, "step": 2011 }, { "epoch": 0.4667671963809303, "grad_norm": 58.30128405919469, "learning_rate": 2e-06, "loss": 0.3029, "step": 2012 }, { "epoch": 0.46699918802923096, "grad_norm": 13.9317037243102, "learning_rate": 2e-06, "loss": 0.3309, "step": 2013 }, { "epoch": 0.4672311796775316, "grad_norm": 20.31824848079081, "learning_rate": 2e-06, "loss": 0.3761, "step": 2014 }, { "epoch": 0.46746317132583226, "grad_norm": 9.457890163970369, "learning_rate": 2e-06, "loss": 0.3379, "step": 2015 }, { "epoch": 0.46769516297413294, "grad_norm": 18.053400087811408, "learning_rate": 2e-06, "loss": 0.3155, "step": 2016 }, { "epoch": 0.4679271546224336, "grad_norm": 11.634119001512087, "learning_rate": 2e-06, "loss": 0.3429, "step": 2017 }, { "epoch": 0.46815914627073424, "grad_norm": 13.942355437046755, "learning_rate": 2e-06, "loss": 0.3103, "step": 2018 }, { "epoch": 0.4683911379190349, "grad_norm": 14.711577692597876, "learning_rate": 2e-06, "loss": 0.3242, "step": 2019 }, { "epoch": 0.4686231295673356, "grad_norm": 12.822970373558434, "learning_rate": 2e-06, "loss": 0.2824, "step": 2020 }, { "epoch": 0.4688551212156362, "grad_norm": 7.922564466961149, "learning_rate": 2e-06, "loss": 0.3574, "step": 2021 }, { "epoch": 0.4690871128639369, "grad_norm": 16.645591544402976, "learning_rate": 2e-06, "loss": 0.26, "step": 2022 }, { "epoch": 0.4693191045122376, "grad_norm": 17.150991755358596, "learning_rate": 2e-06, "loss": 0.3091, "step": 2023 }, { "epoch": 0.4695510961605382, "grad_norm": 19.355662866561232, "learning_rate": 2e-06, "loss": 0.3248, "step": 2024 }, { "epoch": 0.4697830878088389, "grad_norm": 8.98763782114109, "learning_rate": 2e-06, "loss": 0.2328, "step": 2025 }, { "epoch": 0.47001507945713955, "grad_norm": 10.750289888646794, "learning_rate": 2e-06, "loss": 0.2208, "step": 2026 }, { "epoch": 0.47024707110544023, "grad_norm": 15.590133390676085, "learning_rate": 2e-06, "loss": 0.2389, "step": 2027 }, { "epoch": 0.47047906275374085, "grad_norm": 16.63610849979652, "learning_rate": 2e-06, "loss": 0.3112, "step": 2028 }, { "epoch": 0.47071105440204153, "grad_norm": 10.792337381759852, "learning_rate": 2e-06, "loss": 0.3559, "step": 2029 }, { "epoch": 0.4709430460503422, "grad_norm": 14.816149884219211, "learning_rate": 2e-06, "loss": 0.3003, "step": 2030 }, { "epoch": 0.47117503769864283, "grad_norm": 10.341262243880099, "learning_rate": 2e-06, "loss": 0.3463, "step": 2031 }, { "epoch": 0.4714070293469435, "grad_norm": 17.72305433274037, "learning_rate": 2e-06, "loss": 0.4462, "step": 2032 }, { "epoch": 0.4716390209952442, "grad_norm": 16.83950046901211, "learning_rate": 2e-06, "loss": 0.2417, "step": 2033 }, { "epoch": 0.4718710126435448, "grad_norm": 18.10327281457699, "learning_rate": 2e-06, "loss": 0.3096, "step": 2034 }, { "epoch": 0.4721030042918455, "grad_norm": 14.254882642959036, "learning_rate": 2e-06, "loss": 0.3366, "step": 2035 }, { "epoch": 0.47233499594014616, "grad_norm": 21.676286798359246, "learning_rate": 2e-06, "loss": 0.3546, "step": 2036 }, { "epoch": 0.47256698758844684, "grad_norm": 12.720790909738758, "learning_rate": 2e-06, "loss": 0.3007, "step": 2037 }, { "epoch": 0.47279897923674746, "grad_norm": 11.498119426389403, "learning_rate": 2e-06, "loss": 0.2476, "step": 2038 }, { "epoch": 0.47303097088504814, "grad_norm": 20.816542225879648, "learning_rate": 2e-06, "loss": 0.4009, "step": 2039 }, { "epoch": 0.4732629625333488, "grad_norm": 16.82688865312617, "learning_rate": 2e-06, "loss": 0.2652, "step": 2040 }, { "epoch": 0.47349495418164944, "grad_norm": 9.70477771542857, "learning_rate": 2e-06, "loss": 0.2974, "step": 2041 }, { "epoch": 0.4737269458299501, "grad_norm": 13.296534871793732, "learning_rate": 2e-06, "loss": 0.2746, "step": 2042 }, { "epoch": 0.4739589374782508, "grad_norm": 14.065427392790259, "learning_rate": 2e-06, "loss": 0.3149, "step": 2043 }, { "epoch": 0.4741909291265514, "grad_norm": 22.38889426658264, "learning_rate": 2e-06, "loss": 0.433, "step": 2044 }, { "epoch": 0.4744229207748521, "grad_norm": 14.376475918423807, "learning_rate": 2e-06, "loss": 0.3366, "step": 2045 }, { "epoch": 0.4746549124231528, "grad_norm": 9.277440540725683, "learning_rate": 2e-06, "loss": 0.337, "step": 2046 }, { "epoch": 0.47488690407145345, "grad_norm": 17.340785257332662, "learning_rate": 2e-06, "loss": 0.2725, "step": 2047 }, { "epoch": 0.4751188957197541, "grad_norm": 9.985084330908725, "learning_rate": 2e-06, "loss": 0.2141, "step": 2048 }, { "epoch": 0.47535088736805475, "grad_norm": 10.633352809709951, "learning_rate": 2e-06, "loss": 0.2789, "step": 2049 }, { "epoch": 0.47558287901635543, "grad_norm": 23.121629632798786, "learning_rate": 2e-06, "loss": 0.2321, "step": 2050 }, { "epoch": 0.47581487066465605, "grad_norm": 10.891572671577103, "learning_rate": 2e-06, "loss": 0.3372, "step": 2051 }, { "epoch": 0.47604686231295673, "grad_norm": 23.9600752713284, "learning_rate": 2e-06, "loss": 0.3471, "step": 2052 }, { "epoch": 0.4762788539612574, "grad_norm": 15.048695431676713, "learning_rate": 2e-06, "loss": 0.4033, "step": 2053 }, { "epoch": 0.47651084560955803, "grad_norm": 19.555167465786873, "learning_rate": 2e-06, "loss": 0.3338, "step": 2054 }, { "epoch": 0.4767428372578587, "grad_norm": 27.78200124896194, "learning_rate": 2e-06, "loss": 0.4486, "step": 2055 }, { "epoch": 0.4769748289061594, "grad_norm": 12.272255303881282, "learning_rate": 2e-06, "loss": 0.3018, "step": 2056 }, { "epoch": 0.47720682055446006, "grad_norm": 15.556624908312893, "learning_rate": 2e-06, "loss": 0.5194, "step": 2057 }, { "epoch": 0.4774388122027607, "grad_norm": 16.338581145118493, "learning_rate": 2e-06, "loss": 0.3547, "step": 2058 }, { "epoch": 0.47767080385106137, "grad_norm": 9.074037753398741, "learning_rate": 2e-06, "loss": 0.2408, "step": 2059 }, { "epoch": 0.47790279549936204, "grad_norm": 10.66378411033167, "learning_rate": 2e-06, "loss": 0.3439, "step": 2060 }, { "epoch": 0.47813478714766267, "grad_norm": 15.797434529468434, "learning_rate": 2e-06, "loss": 0.2944, "step": 2061 }, { "epoch": 0.47836677879596334, "grad_norm": 13.227433292179827, "learning_rate": 2e-06, "loss": 0.339, "step": 2062 }, { "epoch": 0.478598770444264, "grad_norm": 12.03833396979913, "learning_rate": 2e-06, "loss": 0.3403, "step": 2063 }, { "epoch": 0.47883076209256464, "grad_norm": 24.553130563738854, "learning_rate": 2e-06, "loss": 0.3004, "step": 2064 }, { "epoch": 0.4790627537408653, "grad_norm": 20.737155578901096, "learning_rate": 2e-06, "loss": 0.4112, "step": 2065 }, { "epoch": 0.479294745389166, "grad_norm": 27.884478252430842, "learning_rate": 2e-06, "loss": 0.3297, "step": 2066 }, { "epoch": 0.4795267370374667, "grad_norm": 13.103378328133143, "learning_rate": 2e-06, "loss": 0.3001, "step": 2067 }, { "epoch": 0.4797587286857673, "grad_norm": 17.80927820776487, "learning_rate": 2e-06, "loss": 0.4377, "step": 2068 }, { "epoch": 0.479990720334068, "grad_norm": 20.661695218080254, "learning_rate": 2e-06, "loss": 0.3473, "step": 2069 }, { "epoch": 0.48022271198236866, "grad_norm": 28.217870160429282, "learning_rate": 2e-06, "loss": 0.3783, "step": 2070 }, { "epoch": 0.4804547036306693, "grad_norm": 17.477461536493266, "learning_rate": 2e-06, "loss": 0.3352, "step": 2071 }, { "epoch": 0.48068669527896996, "grad_norm": 23.889925987937147, "learning_rate": 2e-06, "loss": 0.3111, "step": 2072 }, { "epoch": 0.48091868692727063, "grad_norm": 7.9886444116729205, "learning_rate": 2e-06, "loss": 0.3253, "step": 2073 }, { "epoch": 0.48115067857557126, "grad_norm": 19.73259296094963, "learning_rate": 2e-06, "loss": 0.3367, "step": 2074 }, { "epoch": 0.48138267022387193, "grad_norm": 9.559685773496563, "learning_rate": 2e-06, "loss": 0.234, "step": 2075 }, { "epoch": 0.4816146618721726, "grad_norm": 9.743531669983685, "learning_rate": 2e-06, "loss": 0.2975, "step": 2076 }, { "epoch": 0.4818466535204733, "grad_norm": 16.81635360695747, "learning_rate": 2e-06, "loss": 0.2438, "step": 2077 }, { "epoch": 0.4820786451687739, "grad_norm": 11.790522180861018, "learning_rate": 2e-06, "loss": 0.2424, "step": 2078 }, { "epoch": 0.4823106368170746, "grad_norm": 14.632744832313552, "learning_rate": 2e-06, "loss": 0.3295, "step": 2079 }, { "epoch": 0.48254262846537527, "grad_norm": 11.717947162424096, "learning_rate": 2e-06, "loss": 0.3074, "step": 2080 }, { "epoch": 0.4827746201136759, "grad_norm": 8.98069286186794, "learning_rate": 2e-06, "loss": 0.2319, "step": 2081 }, { "epoch": 0.48300661176197657, "grad_norm": 10.350196437648169, "learning_rate": 2e-06, "loss": 0.3483, "step": 2082 }, { "epoch": 0.48323860341027725, "grad_norm": 18.872461989879593, "learning_rate": 2e-06, "loss": 0.3595, "step": 2083 }, { "epoch": 0.48347059505857787, "grad_norm": 18.968088081083174, "learning_rate": 2e-06, "loss": 0.3471, "step": 2084 }, { "epoch": 0.48370258670687855, "grad_norm": 14.399667825466722, "learning_rate": 2e-06, "loss": 0.3329, "step": 2085 }, { "epoch": 0.4839345783551792, "grad_norm": 13.474511688092948, "learning_rate": 2e-06, "loss": 0.3144, "step": 2086 }, { "epoch": 0.4841665700034799, "grad_norm": 11.216722839581688, "learning_rate": 2e-06, "loss": 0.2157, "step": 2087 }, { "epoch": 0.4843985616517805, "grad_norm": 14.86844187814411, "learning_rate": 2e-06, "loss": 0.4169, "step": 2088 }, { "epoch": 0.4846305533000812, "grad_norm": 7.8447621775911545, "learning_rate": 2e-06, "loss": 0.3091, "step": 2089 }, { "epoch": 0.4848625449483819, "grad_norm": 10.254262575831111, "learning_rate": 2e-06, "loss": 0.359, "step": 2090 }, { "epoch": 0.4850945365966825, "grad_norm": 11.876234611995246, "learning_rate": 2e-06, "loss": 0.269, "step": 2091 }, { "epoch": 0.4853265282449832, "grad_norm": 14.067135704743569, "learning_rate": 2e-06, "loss": 0.3312, "step": 2092 }, { "epoch": 0.48555851989328386, "grad_norm": 7.128865969322003, "learning_rate": 2e-06, "loss": 0.235, "step": 2093 }, { "epoch": 0.4857905115415845, "grad_norm": 26.351599690207127, "learning_rate": 2e-06, "loss": 0.3259, "step": 2094 }, { "epoch": 0.48602250318988516, "grad_norm": 16.241201805268105, "learning_rate": 2e-06, "loss": 0.2984, "step": 2095 }, { "epoch": 0.48625449483818584, "grad_norm": 14.767969302463698, "learning_rate": 2e-06, "loss": 0.3041, "step": 2096 }, { "epoch": 0.4864864864864865, "grad_norm": 15.7565988823627, "learning_rate": 2e-06, "loss": 0.329, "step": 2097 }, { "epoch": 0.48671847813478714, "grad_norm": 16.203714255793574, "learning_rate": 2e-06, "loss": 0.2604, "step": 2098 }, { "epoch": 0.4869504697830878, "grad_norm": 10.42518604548034, "learning_rate": 2e-06, "loss": 0.3288, "step": 2099 }, { "epoch": 0.4871824614313885, "grad_norm": 22.287666620106165, "learning_rate": 2e-06, "loss": 0.3003, "step": 2100 }, { "epoch": 0.4874144530796891, "grad_norm": 15.199158506152191, "learning_rate": 2e-06, "loss": 0.2466, "step": 2101 }, { "epoch": 0.4876464447279898, "grad_norm": 17.558767455624956, "learning_rate": 2e-06, "loss": 0.3145, "step": 2102 }, { "epoch": 0.48787843637629047, "grad_norm": 18.285953331339602, "learning_rate": 2e-06, "loss": 0.4843, "step": 2103 }, { "epoch": 0.4881104280245911, "grad_norm": 15.432844223638583, "learning_rate": 2e-06, "loss": 0.2451, "step": 2104 }, { "epoch": 0.48834241967289177, "grad_norm": 11.767138467996935, "learning_rate": 2e-06, "loss": 0.2904, "step": 2105 }, { "epoch": 0.48857441132119245, "grad_norm": 14.707804934793018, "learning_rate": 2e-06, "loss": 0.3309, "step": 2106 }, { "epoch": 0.4888064029694931, "grad_norm": 12.873460858804348, "learning_rate": 2e-06, "loss": 0.2538, "step": 2107 }, { "epoch": 0.48903839461779375, "grad_norm": 15.551365719566405, "learning_rate": 2e-06, "loss": 0.3376, "step": 2108 }, { "epoch": 0.4892703862660944, "grad_norm": 22.354249058364896, "learning_rate": 2e-06, "loss": 0.3482, "step": 2109 }, { "epoch": 0.4895023779143951, "grad_norm": 12.13426092309433, "learning_rate": 2e-06, "loss": 0.2767, "step": 2110 }, { "epoch": 0.4897343695626957, "grad_norm": 6.378416323954354, "learning_rate": 2e-06, "loss": 0.1642, "step": 2111 }, { "epoch": 0.4899663612109964, "grad_norm": 12.172120073384693, "learning_rate": 2e-06, "loss": 0.2751, "step": 2112 }, { "epoch": 0.4901983528592971, "grad_norm": 8.006798240440455, "learning_rate": 2e-06, "loss": 0.2359, "step": 2113 }, { "epoch": 0.4904303445075977, "grad_norm": 15.544069438253967, "learning_rate": 2e-06, "loss": 0.3287, "step": 2114 }, { "epoch": 0.4906623361558984, "grad_norm": 7.0112959850423735, "learning_rate": 2e-06, "loss": 0.1766, "step": 2115 }, { "epoch": 0.49089432780419906, "grad_norm": 15.549511728081491, "learning_rate": 2e-06, "loss": 0.3081, "step": 2116 }, { "epoch": 0.49112631945249974, "grad_norm": 11.256547217001314, "learning_rate": 2e-06, "loss": 0.3567, "step": 2117 }, { "epoch": 0.49135831110080036, "grad_norm": 14.704389837873286, "learning_rate": 2e-06, "loss": 0.2985, "step": 2118 }, { "epoch": 0.49159030274910104, "grad_norm": 17.727161833868298, "learning_rate": 2e-06, "loss": 0.3379, "step": 2119 }, { "epoch": 0.4918222943974017, "grad_norm": 13.301220933708462, "learning_rate": 2e-06, "loss": 0.2558, "step": 2120 }, { "epoch": 0.49205428604570234, "grad_norm": 6.108889993757156, "learning_rate": 2e-06, "loss": 0.2027, "step": 2121 }, { "epoch": 0.492286277694003, "grad_norm": 10.868425584914485, "learning_rate": 2e-06, "loss": 0.2438, "step": 2122 }, { "epoch": 0.4925182693423037, "grad_norm": 17.635453142218786, "learning_rate": 2e-06, "loss": 0.4297, "step": 2123 }, { "epoch": 0.4927502609906043, "grad_norm": 17.445941519621602, "learning_rate": 2e-06, "loss": 0.436, "step": 2124 }, { "epoch": 0.492982252638905, "grad_norm": 16.86257932055074, "learning_rate": 2e-06, "loss": 0.334, "step": 2125 }, { "epoch": 0.49321424428720567, "grad_norm": 14.557790324443094, "learning_rate": 2e-06, "loss": 0.3895, "step": 2126 }, { "epoch": 0.4934462359355063, "grad_norm": 15.365709948457615, "learning_rate": 2e-06, "loss": 0.2738, "step": 2127 }, { "epoch": 0.49367822758380697, "grad_norm": 11.727258104871728, "learning_rate": 2e-06, "loss": 0.1987, "step": 2128 }, { "epoch": 0.49391021923210765, "grad_norm": 13.763772152669365, "learning_rate": 2e-06, "loss": 0.2981, "step": 2129 }, { "epoch": 0.49414221088040833, "grad_norm": 12.891289421629347, "learning_rate": 2e-06, "loss": 0.241, "step": 2130 }, { "epoch": 0.49437420252870895, "grad_norm": 15.264107197755068, "learning_rate": 2e-06, "loss": 0.319, "step": 2131 }, { "epoch": 0.49460619417700963, "grad_norm": 19.049486690093694, "learning_rate": 2e-06, "loss": 0.2844, "step": 2132 }, { "epoch": 0.4948381858253103, "grad_norm": 20.289091498324936, "learning_rate": 2e-06, "loss": 0.3354, "step": 2133 }, { "epoch": 0.49507017747361093, "grad_norm": 8.846363557878725, "learning_rate": 2e-06, "loss": 0.2455, "step": 2134 }, { "epoch": 0.4953021691219116, "grad_norm": 19.68036789620874, "learning_rate": 2e-06, "loss": 0.3254, "step": 2135 }, { "epoch": 0.4955341607702123, "grad_norm": 19.01167373777457, "learning_rate": 2e-06, "loss": 0.3668, "step": 2136 }, { "epoch": 0.4957661524185129, "grad_norm": 20.271394808053454, "learning_rate": 2e-06, "loss": 0.3329, "step": 2137 }, { "epoch": 0.4959981440668136, "grad_norm": 13.713450589308374, "learning_rate": 2e-06, "loss": 0.3172, "step": 2138 }, { "epoch": 0.49623013571511426, "grad_norm": 15.681288599458853, "learning_rate": 2e-06, "loss": 0.2464, "step": 2139 }, { "epoch": 0.49646212736341494, "grad_norm": 22.489577644012517, "learning_rate": 2e-06, "loss": 0.466, "step": 2140 }, { "epoch": 0.49669411901171556, "grad_norm": 11.929339914832337, "learning_rate": 2e-06, "loss": 0.2438, "step": 2141 }, { "epoch": 0.49692611066001624, "grad_norm": 6.908041019643261, "learning_rate": 2e-06, "loss": 0.1916, "step": 2142 }, { "epoch": 0.4971581023083169, "grad_norm": 16.87423395285013, "learning_rate": 2e-06, "loss": 0.3377, "step": 2143 }, { "epoch": 0.49739009395661754, "grad_norm": 21.484609766510122, "learning_rate": 2e-06, "loss": 0.3187, "step": 2144 }, { "epoch": 0.4976220856049182, "grad_norm": 10.005933510006772, "learning_rate": 2e-06, "loss": 0.2997, "step": 2145 }, { "epoch": 0.4978540772532189, "grad_norm": 20.390856830943978, "learning_rate": 2e-06, "loss": 0.2964, "step": 2146 }, { "epoch": 0.4980860689015195, "grad_norm": 20.54252638356709, "learning_rate": 2e-06, "loss": 0.3739, "step": 2147 }, { "epoch": 0.4983180605498202, "grad_norm": 6.440030667693158, "learning_rate": 2e-06, "loss": 0.213, "step": 2148 }, { "epoch": 0.4985500521981209, "grad_norm": 13.909759042917415, "learning_rate": 2e-06, "loss": 0.1993, "step": 2149 }, { "epoch": 0.49878204384642155, "grad_norm": 13.442729423292715, "learning_rate": 2e-06, "loss": 0.1918, "step": 2150 }, { "epoch": 0.4990140354947222, "grad_norm": 9.777832167761424, "learning_rate": 2e-06, "loss": 0.1691, "step": 2151 }, { "epoch": 0.49924602714302285, "grad_norm": 7.528993896065364, "learning_rate": 2e-06, "loss": 0.2213, "step": 2152 }, { "epoch": 0.49947801879132353, "grad_norm": 25.537326149047804, "learning_rate": 2e-06, "loss": 0.4174, "step": 2153 }, { "epoch": 0.49971001043962415, "grad_norm": 13.598357491155793, "learning_rate": 2e-06, "loss": 0.3848, "step": 2154 }, { "epoch": 0.49994200208792483, "grad_norm": 22.388810531659942, "learning_rate": 2e-06, "loss": 0.3423, "step": 2155 }, { "epoch": 0.5001739937362255, "grad_norm": 13.93373343416442, "learning_rate": 2e-06, "loss": 0.2189, "step": 2156 }, { "epoch": 0.5004059853845262, "grad_norm": 14.328986795854332, "learning_rate": 2e-06, "loss": 0.3242, "step": 2157 }, { "epoch": 0.5006379770328269, "grad_norm": 8.904671756440253, "learning_rate": 2e-06, "loss": 0.2041, "step": 2158 }, { "epoch": 0.5008699686811274, "grad_norm": 23.515968807666702, "learning_rate": 2e-06, "loss": 0.4471, "step": 2159 }, { "epoch": 0.5011019603294281, "grad_norm": 21.827464695996884, "learning_rate": 2e-06, "loss": 0.3769, "step": 2160 }, { "epoch": 0.5013339519777288, "grad_norm": 18.861402954271224, "learning_rate": 2e-06, "loss": 0.372, "step": 2161 }, { "epoch": 0.5015659436260295, "grad_norm": 16.535871849830876, "learning_rate": 2e-06, "loss": 0.2858, "step": 2162 }, { "epoch": 0.5017979352743301, "grad_norm": 10.303277065169574, "learning_rate": 2e-06, "loss": 0.1672, "step": 2163 }, { "epoch": 0.5020299269226308, "grad_norm": 15.957852373614422, "learning_rate": 2e-06, "loss": 0.2696, "step": 2164 }, { "epoch": 0.5022619185709315, "grad_norm": 26.862222528082594, "learning_rate": 2e-06, "loss": 0.3697, "step": 2165 }, { "epoch": 0.5024939102192321, "grad_norm": 13.53436358948289, "learning_rate": 2e-06, "loss": 0.3213, "step": 2166 }, { "epoch": 0.5027259018675327, "grad_norm": 29.75890831496249, "learning_rate": 2e-06, "loss": 0.3747, "step": 2167 }, { "epoch": 0.5029578935158334, "grad_norm": 17.201119948928586, "learning_rate": 2e-06, "loss": 0.4157, "step": 2168 }, { "epoch": 0.5031898851641341, "grad_norm": 20.322957627453476, "learning_rate": 2e-06, "loss": 0.3425, "step": 2169 }, { "epoch": 0.5034218768124348, "grad_norm": 7.242148992234226, "learning_rate": 2e-06, "loss": 0.2237, "step": 2170 }, { "epoch": 0.5036538684607355, "grad_norm": 11.3995466559775, "learning_rate": 2e-06, "loss": 0.2041, "step": 2171 }, { "epoch": 0.503885860109036, "grad_norm": 13.692661532868897, "learning_rate": 2e-06, "loss": 0.2766, "step": 2172 }, { "epoch": 0.5041178517573367, "grad_norm": 13.045830152901408, "learning_rate": 2e-06, "loss": 0.2802, "step": 2173 }, { "epoch": 0.5043498434056374, "grad_norm": 13.19375863975848, "learning_rate": 2e-06, "loss": 0.3715, "step": 2174 }, { "epoch": 0.504581835053938, "grad_norm": 12.549473738148507, "learning_rate": 2e-06, "loss": 0.2536, "step": 2175 }, { "epoch": 0.5048138267022387, "grad_norm": 18.270592559002413, "learning_rate": 2e-06, "loss": 0.2249, "step": 2176 }, { "epoch": 0.5050458183505394, "grad_norm": 22.483451955144304, "learning_rate": 2e-06, "loss": 0.3166, "step": 2177 }, { "epoch": 0.5052778099988401, "grad_norm": 17.316063660340365, "learning_rate": 2e-06, "loss": 0.2714, "step": 2178 }, { "epoch": 0.5055098016471407, "grad_norm": 10.947393037556699, "learning_rate": 2e-06, "loss": 0.3, "step": 2179 }, { "epoch": 0.5057417932954413, "grad_norm": 22.039873971526152, "learning_rate": 2e-06, "loss": 0.4411, "step": 2180 }, { "epoch": 0.505973784943742, "grad_norm": 10.035311774289827, "learning_rate": 2e-06, "loss": 0.2457, "step": 2181 }, { "epoch": 0.5062057765920427, "grad_norm": 20.400488855842067, "learning_rate": 2e-06, "loss": 0.3309, "step": 2182 }, { "epoch": 0.5064377682403434, "grad_norm": 11.139661608372032, "learning_rate": 2e-06, "loss": 0.2716, "step": 2183 }, { "epoch": 0.506669759888644, "grad_norm": 49.22644449344963, "learning_rate": 2e-06, "loss": 0.413, "step": 2184 }, { "epoch": 0.5069017515369447, "grad_norm": 19.282249953371107, "learning_rate": 2e-06, "loss": 0.2559, "step": 2185 }, { "epoch": 0.5071337431852453, "grad_norm": 9.96155712975433, "learning_rate": 2e-06, "loss": 0.2553, "step": 2186 }, { "epoch": 0.507365734833546, "grad_norm": 11.389788449201504, "learning_rate": 2e-06, "loss": 0.2749, "step": 2187 }, { "epoch": 0.5075977264818466, "grad_norm": 9.573367203354213, "learning_rate": 2e-06, "loss": 0.325, "step": 2188 }, { "epoch": 0.5078297181301473, "grad_norm": 17.638556783160844, "learning_rate": 2e-06, "loss": 0.3245, "step": 2189 }, { "epoch": 0.508061709778448, "grad_norm": 25.24343689451894, "learning_rate": 2e-06, "loss": 0.4962, "step": 2190 }, { "epoch": 0.5082937014267487, "grad_norm": 15.432898553377086, "learning_rate": 2e-06, "loss": 0.2843, "step": 2191 }, { "epoch": 0.5085256930750492, "grad_norm": 18.526829636362876, "learning_rate": 2e-06, "loss": 0.3193, "step": 2192 }, { "epoch": 0.5087576847233499, "grad_norm": 20.829626240692, "learning_rate": 2e-06, "loss": 0.2898, "step": 2193 }, { "epoch": 0.5089896763716506, "grad_norm": 12.080813626427616, "learning_rate": 2e-06, "loss": 0.2743, "step": 2194 }, { "epoch": 0.5092216680199513, "grad_norm": 8.755550213350691, "learning_rate": 2e-06, "loss": 0.2772, "step": 2195 }, { "epoch": 0.509453659668252, "grad_norm": 17.579316445335433, "learning_rate": 2e-06, "loss": 0.3266, "step": 2196 }, { "epoch": 0.5096856513165526, "grad_norm": 17.325365704374104, "learning_rate": 2e-06, "loss": 0.2805, "step": 2197 }, { "epoch": 0.5099176429648533, "grad_norm": 14.492615581490334, "learning_rate": 2e-06, "loss": 0.3094, "step": 2198 }, { "epoch": 0.5101496346131539, "grad_norm": 14.234190346875504, "learning_rate": 2e-06, "loss": 0.2348, "step": 2199 }, { "epoch": 0.5103816262614546, "grad_norm": 7.5398495123313465, "learning_rate": 2e-06, "loss": 0.1718, "step": 2200 }, { "epoch": 0.5106136179097552, "grad_norm": 6.97541242627694, "learning_rate": 2e-06, "loss": 0.3266, "step": 2201 }, { "epoch": 0.5108456095580559, "grad_norm": 12.721195042383458, "learning_rate": 2e-06, "loss": 0.253, "step": 2202 }, { "epoch": 0.5110776012063566, "grad_norm": 7.105303434910561, "learning_rate": 2e-06, "loss": 0.2085, "step": 2203 }, { "epoch": 0.5113095928546573, "grad_norm": 16.65842865369534, "learning_rate": 2e-06, "loss": 0.326, "step": 2204 }, { "epoch": 0.511541584502958, "grad_norm": 24.447691272915243, "learning_rate": 2e-06, "loss": 0.3282, "step": 2205 }, { "epoch": 0.5117735761512585, "grad_norm": 7.472587814339079, "learning_rate": 2e-06, "loss": 0.1978, "step": 2206 }, { "epoch": 0.5120055677995592, "grad_norm": 27.452162551642193, "learning_rate": 2e-06, "loss": 0.2873, "step": 2207 }, { "epoch": 0.5122375594478599, "grad_norm": 12.531644069360812, "learning_rate": 2e-06, "loss": 0.2842, "step": 2208 }, { "epoch": 0.5124695510961605, "grad_norm": 13.022291960747255, "learning_rate": 2e-06, "loss": 0.3008, "step": 2209 }, { "epoch": 0.5127015427444612, "grad_norm": 11.436705149111399, "learning_rate": 2e-06, "loss": 0.2965, "step": 2210 }, { "epoch": 0.5129335343927619, "grad_norm": 9.325082731789037, "learning_rate": 2e-06, "loss": 0.2812, "step": 2211 }, { "epoch": 0.5131655260410625, "grad_norm": 15.090660618989261, "learning_rate": 2e-06, "loss": 0.2271, "step": 2212 }, { "epoch": 0.5133975176893631, "grad_norm": 12.092657076627773, "learning_rate": 2e-06, "loss": 0.2794, "step": 2213 }, { "epoch": 0.5136295093376638, "grad_norm": 16.494372122641952, "learning_rate": 2e-06, "loss": 0.3281, "step": 2214 }, { "epoch": 0.5138615009859645, "grad_norm": 12.449203842828988, "learning_rate": 2e-06, "loss": 0.2594, "step": 2215 }, { "epoch": 0.5140934926342652, "grad_norm": 15.52348629468538, "learning_rate": 2e-06, "loss": 0.3694, "step": 2216 }, { "epoch": 0.5143254842825659, "grad_norm": 17.681642491435536, "learning_rate": 2e-06, "loss": 0.3573, "step": 2217 }, { "epoch": 0.5145574759308665, "grad_norm": 6.604458338708483, "learning_rate": 2e-06, "loss": 0.2758, "step": 2218 }, { "epoch": 0.5147894675791671, "grad_norm": 13.732407906659214, "learning_rate": 2e-06, "loss": 0.304, "step": 2219 }, { "epoch": 0.5150214592274678, "grad_norm": 19.435164902984656, "learning_rate": 2e-06, "loss": 0.2704, "step": 2220 }, { "epoch": 0.5152534508757685, "grad_norm": 17.545281421409236, "learning_rate": 2e-06, "loss": 0.3262, "step": 2221 }, { "epoch": 0.5154854425240691, "grad_norm": 9.242074142541009, "learning_rate": 2e-06, "loss": 0.2567, "step": 2222 }, { "epoch": 0.5157174341723698, "grad_norm": 15.786352449218043, "learning_rate": 2e-06, "loss": 0.3375, "step": 2223 }, { "epoch": 0.5159494258206705, "grad_norm": 5.905837709522254, "learning_rate": 2e-06, "loss": 0.2207, "step": 2224 }, { "epoch": 0.5161814174689712, "grad_norm": 14.735766964453777, "learning_rate": 2e-06, "loss": 0.3925, "step": 2225 }, { "epoch": 0.5164134091172717, "grad_norm": 15.461827147246586, "learning_rate": 2e-06, "loss": 0.3894, "step": 2226 }, { "epoch": 0.5166454007655724, "grad_norm": 10.058630282691166, "learning_rate": 2e-06, "loss": 0.2264, "step": 2227 }, { "epoch": 0.5168773924138731, "grad_norm": 6.832754603678109, "learning_rate": 2e-06, "loss": 0.281, "step": 2228 }, { "epoch": 0.5171093840621738, "grad_norm": 16.511843498691217, "learning_rate": 2e-06, "loss": 0.3108, "step": 2229 }, { "epoch": 0.5173413757104744, "grad_norm": 19.925078422841, "learning_rate": 2e-06, "loss": 0.3226, "step": 2230 }, { "epoch": 0.5175733673587751, "grad_norm": 23.279579301825684, "learning_rate": 2e-06, "loss": 0.3833, "step": 2231 }, { "epoch": 0.5178053590070757, "grad_norm": 10.204446780260694, "learning_rate": 2e-06, "loss": 0.2996, "step": 2232 }, { "epoch": 0.5180373506553764, "grad_norm": 12.123237316873128, "learning_rate": 2e-06, "loss": 0.2906, "step": 2233 }, { "epoch": 0.518269342303677, "grad_norm": 11.329122471507537, "learning_rate": 2e-06, "loss": 0.3399, "step": 2234 }, { "epoch": 0.5185013339519777, "grad_norm": 24.975527522311257, "learning_rate": 2e-06, "loss": 0.4095, "step": 2235 }, { "epoch": 0.5187333256002784, "grad_norm": 15.940577202895128, "learning_rate": 2e-06, "loss": 0.3415, "step": 2236 }, { "epoch": 0.5189653172485791, "grad_norm": 20.14661131715556, "learning_rate": 2e-06, "loss": 0.3974, "step": 2237 }, { "epoch": 0.5191973088968798, "grad_norm": 25.747100784967884, "learning_rate": 2e-06, "loss": 0.5037, "step": 2238 }, { "epoch": 0.5194293005451803, "grad_norm": 14.208535073812218, "learning_rate": 2e-06, "loss": 0.2438, "step": 2239 }, { "epoch": 0.519661292193481, "grad_norm": 17.03550596170188, "learning_rate": 2e-06, "loss": 0.3653, "step": 2240 }, { "epoch": 0.5198932838417817, "grad_norm": 20.93553667412997, "learning_rate": 2e-06, "loss": 0.2732, "step": 2241 }, { "epoch": 0.5201252754900824, "grad_norm": 10.008111793167586, "learning_rate": 2e-06, "loss": 0.2699, "step": 2242 }, { "epoch": 0.520357267138383, "grad_norm": 20.26414491317248, "learning_rate": 2e-06, "loss": 0.3475, "step": 2243 }, { "epoch": 0.5205892587866837, "grad_norm": 24.230542480582827, "learning_rate": 2e-06, "loss": 0.3996, "step": 2244 }, { "epoch": 0.5208212504349844, "grad_norm": 20.463428916265258, "learning_rate": 2e-06, "loss": 0.2895, "step": 2245 }, { "epoch": 0.521053242083285, "grad_norm": 12.477672191195685, "learning_rate": 2e-06, "loss": 0.2641, "step": 2246 }, { "epoch": 0.5212852337315856, "grad_norm": 100.35080063444947, "learning_rate": 2e-06, "loss": 0.2666, "step": 2247 }, { "epoch": 0.5215172253798863, "grad_norm": 24.628890125223037, "learning_rate": 2e-06, "loss": 0.3212, "step": 2248 }, { "epoch": 0.521749217028187, "grad_norm": 9.204998287585962, "learning_rate": 2e-06, "loss": 0.2658, "step": 2249 }, { "epoch": 0.5219812086764877, "grad_norm": 18.863388131065673, "learning_rate": 2e-06, "loss": 0.3096, "step": 2250 }, { "epoch": 0.5222132003247884, "grad_norm": 15.814258917614072, "learning_rate": 2e-06, "loss": 0.2506, "step": 2251 }, { "epoch": 0.5224451919730889, "grad_norm": 12.02163164782188, "learning_rate": 2e-06, "loss": 0.2721, "step": 2252 }, { "epoch": 0.5226771836213896, "grad_norm": 13.910898698958496, "learning_rate": 2e-06, "loss": 0.2853, "step": 2253 }, { "epoch": 0.5229091752696903, "grad_norm": 17.072635763484605, "learning_rate": 2e-06, "loss": 0.2623, "step": 2254 }, { "epoch": 0.523141166917991, "grad_norm": 14.665093774873432, "learning_rate": 2e-06, "loss": 0.3196, "step": 2255 }, { "epoch": 0.5233731585662916, "grad_norm": 17.038607429232925, "learning_rate": 2e-06, "loss": 0.3338, "step": 2256 }, { "epoch": 0.5236051502145923, "grad_norm": 8.32717226665899, "learning_rate": 2e-06, "loss": 0.2404, "step": 2257 }, { "epoch": 0.523837141862893, "grad_norm": 16.883098831842254, "learning_rate": 2e-06, "loss": 0.2216, "step": 2258 }, { "epoch": 0.5240691335111936, "grad_norm": 20.377352713674426, "learning_rate": 2e-06, "loss": 0.3238, "step": 2259 }, { "epoch": 0.5243011251594942, "grad_norm": 12.458863951225556, "learning_rate": 2e-06, "loss": 0.3009, "step": 2260 }, { "epoch": 0.5245331168077949, "grad_norm": 11.239890354739732, "learning_rate": 2e-06, "loss": 0.3214, "step": 2261 }, { "epoch": 0.5247651084560956, "grad_norm": 18.620507924392726, "learning_rate": 2e-06, "loss": 0.3723, "step": 2262 }, { "epoch": 0.5249971001043963, "grad_norm": 13.254486794790614, "learning_rate": 2e-06, "loss": 0.2997, "step": 2263 }, { "epoch": 0.5252290917526969, "grad_norm": 13.22770783264009, "learning_rate": 2e-06, "loss": 0.3, "step": 2264 }, { "epoch": 0.5254610834009976, "grad_norm": 17.009437904290014, "learning_rate": 2e-06, "loss": 0.3899, "step": 2265 }, { "epoch": 0.5256930750492982, "grad_norm": 20.946786476780414, "learning_rate": 2e-06, "loss": 0.3711, "step": 2266 }, { "epoch": 0.5259250666975989, "grad_norm": 13.43035616771182, "learning_rate": 2e-06, "loss": 0.2795, "step": 2267 }, { "epoch": 0.5261570583458995, "grad_norm": 15.701373292627125, "learning_rate": 2e-06, "loss": 0.34, "step": 2268 }, { "epoch": 0.5263890499942002, "grad_norm": 11.788398579381099, "learning_rate": 2e-06, "loss": 0.2095, "step": 2269 }, { "epoch": 0.5266210416425009, "grad_norm": 21.228215030045135, "learning_rate": 2e-06, "loss": 0.3226, "step": 2270 }, { "epoch": 0.5268530332908016, "grad_norm": 13.830399196968724, "learning_rate": 2e-06, "loss": 0.2479, "step": 2271 }, { "epoch": 0.5270850249391021, "grad_norm": 19.86319616283615, "learning_rate": 2e-06, "loss": 0.3438, "step": 2272 }, { "epoch": 0.5273170165874028, "grad_norm": 11.956746996575461, "learning_rate": 2e-06, "loss": 0.2824, "step": 2273 }, { "epoch": 0.5275490082357035, "grad_norm": 11.180178543415238, "learning_rate": 2e-06, "loss": 0.2802, "step": 2274 }, { "epoch": 0.5277809998840042, "grad_norm": 9.880631130726139, "learning_rate": 2e-06, "loss": 0.3052, "step": 2275 }, { "epoch": 0.5280129915323049, "grad_norm": 9.766750451583386, "learning_rate": 2e-06, "loss": 0.2437, "step": 2276 }, { "epoch": 0.5282449831806055, "grad_norm": 18.584827551294715, "learning_rate": 2e-06, "loss": 0.3032, "step": 2277 }, { "epoch": 0.5284769748289062, "grad_norm": 13.19289878347082, "learning_rate": 2e-06, "loss": 0.2042, "step": 2278 }, { "epoch": 0.5287089664772068, "grad_norm": 8.841165204840243, "learning_rate": 2e-06, "loss": 0.1651, "step": 2279 }, { "epoch": 0.5289409581255075, "grad_norm": 12.913411705213157, "learning_rate": 2e-06, "loss": 0.2826, "step": 2280 }, { "epoch": 0.5291729497738081, "grad_norm": 16.76037343633817, "learning_rate": 2e-06, "loss": 0.2525, "step": 2281 }, { "epoch": 0.5294049414221088, "grad_norm": 15.153622033605663, "learning_rate": 2e-06, "loss": 0.2516, "step": 2282 }, { "epoch": 0.5296369330704095, "grad_norm": 9.680270585277633, "learning_rate": 2e-06, "loss": 0.2359, "step": 2283 }, { "epoch": 0.5298689247187102, "grad_norm": 22.396002284150175, "learning_rate": 2e-06, "loss": 0.3231, "step": 2284 }, { "epoch": 0.5301009163670107, "grad_norm": 16.939145166831942, "learning_rate": 2e-06, "loss": 0.4175, "step": 2285 }, { "epoch": 0.5303329080153114, "grad_norm": 13.878641540351467, "learning_rate": 2e-06, "loss": 0.2965, "step": 2286 }, { "epoch": 0.5305648996636121, "grad_norm": 14.818456238172777, "learning_rate": 2e-06, "loss": 0.2961, "step": 2287 }, { "epoch": 0.5307968913119128, "grad_norm": 12.767136730539981, "learning_rate": 2e-06, "loss": 0.258, "step": 2288 }, { "epoch": 0.5310288829602134, "grad_norm": 10.171811680648132, "learning_rate": 2e-06, "loss": 0.2368, "step": 2289 }, { "epoch": 0.5312608746085141, "grad_norm": 16.018420281641706, "learning_rate": 2e-06, "loss": 0.3154, "step": 2290 }, { "epoch": 0.5314928662568148, "grad_norm": 13.07454574100996, "learning_rate": 2e-06, "loss": 0.2373, "step": 2291 }, { "epoch": 0.5317248579051154, "grad_norm": 13.353026905275584, "learning_rate": 2e-06, "loss": 0.4516, "step": 2292 }, { "epoch": 0.531956849553416, "grad_norm": 12.39102054984825, "learning_rate": 2e-06, "loss": 0.2753, "step": 2293 }, { "epoch": 0.5321888412017167, "grad_norm": 17.34864838714827, "learning_rate": 2e-06, "loss": 0.3422, "step": 2294 }, { "epoch": 0.5324208328500174, "grad_norm": 16.64109635116588, "learning_rate": 2e-06, "loss": 0.1882, "step": 2295 }, { "epoch": 0.5326528244983181, "grad_norm": 14.168599271666439, "learning_rate": 2e-06, "loss": 0.4041, "step": 2296 }, { "epoch": 0.5328848161466188, "grad_norm": 16.024561602681622, "learning_rate": 2e-06, "loss": 0.2647, "step": 2297 }, { "epoch": 0.5331168077949194, "grad_norm": 15.266324319466408, "learning_rate": 2e-06, "loss": 0.3007, "step": 2298 }, { "epoch": 0.53334879944322, "grad_norm": 14.756981269977882, "learning_rate": 2e-06, "loss": 0.2312, "step": 2299 }, { "epoch": 0.5335807910915207, "grad_norm": 20.247224134109825, "learning_rate": 2e-06, "loss": 0.302, "step": 2300 }, { "epoch": 0.5338127827398214, "grad_norm": 18.21501807524069, "learning_rate": 2e-06, "loss": 0.2802, "step": 2301 }, { "epoch": 0.534044774388122, "grad_norm": 9.884233780654693, "learning_rate": 2e-06, "loss": 0.2565, "step": 2302 }, { "epoch": 0.5342767660364227, "grad_norm": 17.423012073490213, "learning_rate": 2e-06, "loss": 0.3299, "step": 2303 }, { "epoch": 0.5345087576847234, "grad_norm": 18.25976551054708, "learning_rate": 2e-06, "loss": 0.4491, "step": 2304 }, { "epoch": 0.534740749333024, "grad_norm": 11.64769360043185, "learning_rate": 2e-06, "loss": 0.2243, "step": 2305 }, { "epoch": 0.5349727409813246, "grad_norm": 14.89009167595676, "learning_rate": 2e-06, "loss": 0.2694, "step": 2306 }, { "epoch": 0.5352047326296253, "grad_norm": 17.75693597667897, "learning_rate": 2e-06, "loss": 0.4001, "step": 2307 }, { "epoch": 0.535436724277926, "grad_norm": 17.159122883056842, "learning_rate": 2e-06, "loss": 0.3234, "step": 2308 }, { "epoch": 0.5356687159262267, "grad_norm": 13.326925118558393, "learning_rate": 2e-06, "loss": 0.2136, "step": 2309 }, { "epoch": 0.5359007075745273, "grad_norm": 24.72149462549094, "learning_rate": 2e-06, "loss": 0.3685, "step": 2310 }, { "epoch": 0.536132699222828, "grad_norm": 8.387532478488598, "learning_rate": 2e-06, "loss": 0.2289, "step": 2311 }, { "epoch": 0.5363646908711286, "grad_norm": 18.357082381108455, "learning_rate": 2e-06, "loss": 0.3841, "step": 2312 }, { "epoch": 0.5365966825194293, "grad_norm": 12.95410078049241, "learning_rate": 2e-06, "loss": 0.3858, "step": 2313 }, { "epoch": 0.53682867416773, "grad_norm": 13.089473379625638, "learning_rate": 2e-06, "loss": 0.2664, "step": 2314 }, { "epoch": 0.5370606658160306, "grad_norm": 19.433935514249765, "learning_rate": 2e-06, "loss": 0.2824, "step": 2315 }, { "epoch": 0.5372926574643313, "grad_norm": 10.02339829476107, "learning_rate": 2e-06, "loss": 0.2437, "step": 2316 }, { "epoch": 0.537524649112632, "grad_norm": 12.552941976553708, "learning_rate": 2e-06, "loss": 0.3056, "step": 2317 }, { "epoch": 0.5377566407609327, "grad_norm": 7.083892332328677, "learning_rate": 2e-06, "loss": 0.1957, "step": 2318 }, { "epoch": 0.5379886324092332, "grad_norm": 20.42922099874839, "learning_rate": 2e-06, "loss": 0.4354, "step": 2319 }, { "epoch": 0.5382206240575339, "grad_norm": 20.31467380319267, "learning_rate": 2e-06, "loss": 0.3671, "step": 2320 }, { "epoch": 0.5384526157058346, "grad_norm": 14.633241874519927, "learning_rate": 2e-06, "loss": 0.2382, "step": 2321 }, { "epoch": 0.5386846073541353, "grad_norm": 12.98039438440369, "learning_rate": 2e-06, "loss": 0.2664, "step": 2322 }, { "epoch": 0.5389165990024359, "grad_norm": 16.565699553079266, "learning_rate": 2e-06, "loss": 0.2974, "step": 2323 }, { "epoch": 0.5391485906507366, "grad_norm": 15.238803129232114, "learning_rate": 2e-06, "loss": 0.343, "step": 2324 }, { "epoch": 0.5393805822990372, "grad_norm": 6.829254929370642, "learning_rate": 2e-06, "loss": 0.1979, "step": 2325 }, { "epoch": 0.5396125739473379, "grad_norm": 9.858620413535052, "learning_rate": 2e-06, "loss": 0.2957, "step": 2326 }, { "epoch": 0.5398445655956385, "grad_norm": 6.932270398065999, "learning_rate": 2e-06, "loss": 0.2907, "step": 2327 }, { "epoch": 0.5400765572439392, "grad_norm": 12.710146071255126, "learning_rate": 2e-06, "loss": 0.2605, "step": 2328 }, { "epoch": 0.5403085488922399, "grad_norm": 7.804004164747687, "learning_rate": 2e-06, "loss": 0.1733, "step": 2329 }, { "epoch": 0.5405405405405406, "grad_norm": 21.81106908709857, "learning_rate": 2e-06, "loss": 0.3851, "step": 2330 }, { "epoch": 0.5407725321888412, "grad_norm": 5.8748588911926145, "learning_rate": 2e-06, "loss": 0.2951, "step": 2331 }, { "epoch": 0.5410045238371418, "grad_norm": 16.2691670015431, "learning_rate": 2e-06, "loss": 0.2875, "step": 2332 }, { "epoch": 0.5412365154854425, "grad_norm": 17.236361159889725, "learning_rate": 2e-06, "loss": 0.2956, "step": 2333 }, { "epoch": 0.5414685071337432, "grad_norm": 10.273390136146526, "learning_rate": 2e-06, "loss": 0.3004, "step": 2334 }, { "epoch": 0.5417004987820438, "grad_norm": 20.87612590795775, "learning_rate": 2e-06, "loss": 0.3434, "step": 2335 }, { "epoch": 0.5419324904303445, "grad_norm": 27.08238944500166, "learning_rate": 2e-06, "loss": 0.5375, "step": 2336 }, { "epoch": 0.5421644820786452, "grad_norm": 6.215206436525487, "learning_rate": 2e-06, "loss": 0.1769, "step": 2337 }, { "epoch": 0.5423964737269459, "grad_norm": 15.724186262250118, "learning_rate": 2e-06, "loss": 0.3055, "step": 2338 }, { "epoch": 0.5426284653752464, "grad_norm": 11.368510674204973, "learning_rate": 2e-06, "loss": 0.2766, "step": 2339 }, { "epoch": 0.5428604570235471, "grad_norm": 18.357210746849432, "learning_rate": 2e-06, "loss": 0.3577, "step": 2340 }, { "epoch": 0.5430924486718478, "grad_norm": 12.542863291662933, "learning_rate": 2e-06, "loss": 0.2229, "step": 2341 }, { "epoch": 0.5433244403201485, "grad_norm": 12.147225659096481, "learning_rate": 2e-06, "loss": 0.3056, "step": 2342 }, { "epoch": 0.5435564319684492, "grad_norm": 12.837094753441027, "learning_rate": 2e-06, "loss": 0.3689, "step": 2343 }, { "epoch": 0.5437884236167498, "grad_norm": 14.999446526361522, "learning_rate": 2e-06, "loss": 0.2976, "step": 2344 }, { "epoch": 0.5440204152650504, "grad_norm": 26.954049953958133, "learning_rate": 2e-06, "loss": 0.4802, "step": 2345 }, { "epoch": 0.5442524069133511, "grad_norm": 15.525353341388612, "learning_rate": 2e-06, "loss": 0.3723, "step": 2346 }, { "epoch": 0.5444843985616518, "grad_norm": 18.84480370516095, "learning_rate": 2e-06, "loss": 0.4144, "step": 2347 }, { "epoch": 0.5447163902099524, "grad_norm": 9.673118895122698, "learning_rate": 2e-06, "loss": 0.1825, "step": 2348 }, { "epoch": 0.5449483818582531, "grad_norm": 6.578330514551692, "learning_rate": 2e-06, "loss": 0.228, "step": 2349 }, { "epoch": 0.5451803735065538, "grad_norm": 21.222832338767525, "learning_rate": 2e-06, "loss": 0.3208, "step": 2350 }, { "epoch": 0.5454123651548545, "grad_norm": 19.578665813588188, "learning_rate": 2e-06, "loss": 0.3288, "step": 2351 }, { "epoch": 0.545644356803155, "grad_norm": 17.952013710643097, "learning_rate": 2e-06, "loss": 0.2957, "step": 2352 }, { "epoch": 0.5458763484514557, "grad_norm": 16.066508436001325, "learning_rate": 2e-06, "loss": 0.3123, "step": 2353 }, { "epoch": 0.5461083400997564, "grad_norm": 8.080325358675177, "learning_rate": 2e-06, "loss": 0.267, "step": 2354 }, { "epoch": 0.5463403317480571, "grad_norm": 16.236401419663363, "learning_rate": 2e-06, "loss": 0.3246, "step": 2355 }, { "epoch": 0.5465723233963578, "grad_norm": 23.272151572351987, "learning_rate": 2e-06, "loss": 0.353, "step": 2356 }, { "epoch": 0.5468043150446584, "grad_norm": 13.538462553357316, "learning_rate": 2e-06, "loss": 0.2789, "step": 2357 }, { "epoch": 0.5470363066929591, "grad_norm": 16.407723112546826, "learning_rate": 2e-06, "loss": 0.2905, "step": 2358 }, { "epoch": 0.5472682983412597, "grad_norm": 8.124259163275745, "learning_rate": 2e-06, "loss": 0.2108, "step": 2359 }, { "epoch": 0.5475002899895604, "grad_norm": 20.691277150417193, "learning_rate": 2e-06, "loss": 0.3986, "step": 2360 }, { "epoch": 0.547732281637861, "grad_norm": 24.34822370897118, "learning_rate": 2e-06, "loss": 0.3407, "step": 2361 }, { "epoch": 0.5479642732861617, "grad_norm": 14.216553246002624, "learning_rate": 2e-06, "loss": 0.3413, "step": 2362 }, { "epoch": 0.5481962649344624, "grad_norm": 14.793477667414159, "learning_rate": 2e-06, "loss": 0.3285, "step": 2363 }, { "epoch": 0.5484282565827631, "grad_norm": 12.641379310724114, "learning_rate": 2e-06, "loss": 0.2678, "step": 2364 }, { "epoch": 0.5486602482310636, "grad_norm": 13.317895196477977, "learning_rate": 2e-06, "loss": 0.325, "step": 2365 }, { "epoch": 0.5488922398793643, "grad_norm": 16.161930551381584, "learning_rate": 2e-06, "loss": 0.3113, "step": 2366 }, { "epoch": 0.549124231527665, "grad_norm": 7.385801035359576, "learning_rate": 2e-06, "loss": 0.2321, "step": 2367 }, { "epoch": 0.5493562231759657, "grad_norm": 11.702660325886935, "learning_rate": 2e-06, "loss": 0.3183, "step": 2368 }, { "epoch": 0.5495882148242663, "grad_norm": 14.062392473354258, "learning_rate": 2e-06, "loss": 0.254, "step": 2369 }, { "epoch": 0.549820206472567, "grad_norm": 18.748060159797518, "learning_rate": 2e-06, "loss": 0.3368, "step": 2370 }, { "epoch": 0.5500521981208677, "grad_norm": 14.300497850364994, "learning_rate": 2e-06, "loss": 0.3072, "step": 2371 }, { "epoch": 0.5502841897691683, "grad_norm": 10.418969804626075, "learning_rate": 2e-06, "loss": 0.2342, "step": 2372 }, { "epoch": 0.5505161814174689, "grad_norm": 20.44987186445462, "learning_rate": 2e-06, "loss": 0.3585, "step": 2373 }, { "epoch": 0.5507481730657696, "grad_norm": 20.05953857845853, "learning_rate": 2e-06, "loss": 0.2701, "step": 2374 }, { "epoch": 0.5509801647140703, "grad_norm": 16.11962443192865, "learning_rate": 2e-06, "loss": 0.2922, "step": 2375 }, { "epoch": 0.551212156362371, "grad_norm": 10.646811964940431, "learning_rate": 2e-06, "loss": 0.2887, "step": 2376 }, { "epoch": 0.5514441480106717, "grad_norm": 18.017398046246253, "learning_rate": 2e-06, "loss": 0.3291, "step": 2377 }, { "epoch": 0.5516761396589723, "grad_norm": 9.988883639517411, "learning_rate": 2e-06, "loss": 0.2606, "step": 2378 }, { "epoch": 0.5519081313072729, "grad_norm": 12.064044391011736, "learning_rate": 2e-06, "loss": 0.3694, "step": 2379 }, { "epoch": 0.5521401229555736, "grad_norm": 11.821170069247135, "learning_rate": 2e-06, "loss": 0.1949, "step": 2380 }, { "epoch": 0.5523721146038743, "grad_norm": 9.066163899738006, "learning_rate": 2e-06, "loss": 0.1487, "step": 2381 }, { "epoch": 0.5526041062521749, "grad_norm": 11.322561241704996, "learning_rate": 2e-06, "loss": 0.3068, "step": 2382 }, { "epoch": 0.5528360979004756, "grad_norm": 7.512021724930349, "learning_rate": 2e-06, "loss": 0.2746, "step": 2383 }, { "epoch": 0.5530680895487763, "grad_norm": 9.300021551053998, "learning_rate": 2e-06, "loss": 0.2483, "step": 2384 }, { "epoch": 0.5533000811970769, "grad_norm": 18.989662991902502, "learning_rate": 2e-06, "loss": 0.3323, "step": 2385 }, { "epoch": 0.5535320728453775, "grad_norm": 10.986524529354641, "learning_rate": 2e-06, "loss": 0.1893, "step": 2386 }, { "epoch": 0.5537640644936782, "grad_norm": 10.924830004619258, "learning_rate": 2e-06, "loss": 0.311, "step": 2387 }, { "epoch": 0.5539960561419789, "grad_norm": 14.54093682002165, "learning_rate": 2e-06, "loss": 0.2469, "step": 2388 }, { "epoch": 0.5542280477902796, "grad_norm": 23.32331970457638, "learning_rate": 2e-06, "loss": 0.3154, "step": 2389 }, { "epoch": 0.5544600394385802, "grad_norm": 14.801663169850901, "learning_rate": 2e-06, "loss": 0.2924, "step": 2390 }, { "epoch": 0.5546920310868809, "grad_norm": 12.918355697187497, "learning_rate": 2e-06, "loss": 0.2827, "step": 2391 }, { "epoch": 0.5549240227351815, "grad_norm": 17.089038419122126, "learning_rate": 2e-06, "loss": 0.321, "step": 2392 }, { "epoch": 0.5551560143834822, "grad_norm": 9.67915384286139, "learning_rate": 2e-06, "loss": 0.2727, "step": 2393 }, { "epoch": 0.5553880060317828, "grad_norm": 7.766141239959042, "learning_rate": 2e-06, "loss": 0.191, "step": 2394 }, { "epoch": 0.5556199976800835, "grad_norm": 18.090735119556687, "learning_rate": 2e-06, "loss": 0.3184, "step": 2395 }, { "epoch": 0.5558519893283842, "grad_norm": 8.505508622938232, "learning_rate": 2e-06, "loss": 0.2925, "step": 2396 }, { "epoch": 0.5560839809766849, "grad_norm": 14.387996447013048, "learning_rate": 2e-06, "loss": 0.3296, "step": 2397 }, { "epoch": 0.5563159726249856, "grad_norm": 16.06393041702412, "learning_rate": 2e-06, "loss": 0.3835, "step": 2398 }, { "epoch": 0.5565479642732861, "grad_norm": 13.847206072754162, "learning_rate": 2e-06, "loss": 0.2664, "step": 2399 }, { "epoch": 0.5567799559215868, "grad_norm": 6.8657066601555625, "learning_rate": 2e-06, "loss": 0.2501, "step": 2400 }, { "epoch": 0.5570119475698875, "grad_norm": 19.52261317249476, "learning_rate": 2e-06, "loss": 0.3469, "step": 2401 }, { "epoch": 0.5572439392181882, "grad_norm": 8.778377305253947, "learning_rate": 2e-06, "loss": 0.3689, "step": 2402 }, { "epoch": 0.5574759308664888, "grad_norm": 16.378735216573, "learning_rate": 2e-06, "loss": 0.3538, "step": 2403 }, { "epoch": 0.5577079225147895, "grad_norm": 11.787808238531637, "learning_rate": 2e-06, "loss": 0.2358, "step": 2404 }, { "epoch": 0.5579399141630901, "grad_norm": 9.171065652456917, "learning_rate": 2e-06, "loss": 0.2658, "step": 2405 }, { "epoch": 0.5581719058113908, "grad_norm": 10.920307577783632, "learning_rate": 2e-06, "loss": 0.3377, "step": 2406 }, { "epoch": 0.5584038974596914, "grad_norm": 16.384068560006575, "learning_rate": 2e-06, "loss": 0.245, "step": 2407 }, { "epoch": 0.5586358891079921, "grad_norm": 15.407554795589347, "learning_rate": 2e-06, "loss": 0.316, "step": 2408 }, { "epoch": 0.5588678807562928, "grad_norm": 8.255536852752579, "learning_rate": 2e-06, "loss": 0.3018, "step": 2409 }, { "epoch": 0.5590998724045935, "grad_norm": 15.595870272913924, "learning_rate": 2e-06, "loss": 0.2373, "step": 2410 }, { "epoch": 0.5593318640528941, "grad_norm": 10.573620346477986, "learning_rate": 2e-06, "loss": 0.2634, "step": 2411 }, { "epoch": 0.5595638557011947, "grad_norm": 13.44498052742146, "learning_rate": 2e-06, "loss": 0.2921, "step": 2412 }, { "epoch": 0.5597958473494954, "grad_norm": 18.226161572351923, "learning_rate": 2e-06, "loss": 0.3855, "step": 2413 }, { "epoch": 0.5600278389977961, "grad_norm": 26.04602255991392, "learning_rate": 2e-06, "loss": 0.3388, "step": 2414 }, { "epoch": 0.5602598306460967, "grad_norm": 11.40825468917101, "learning_rate": 2e-06, "loss": 0.2476, "step": 2415 }, { "epoch": 0.5604918222943974, "grad_norm": 8.047637255378097, "learning_rate": 2e-06, "loss": 0.2142, "step": 2416 }, { "epoch": 0.5607238139426981, "grad_norm": 9.763552278815421, "learning_rate": 2e-06, "loss": 0.2166, "step": 2417 }, { "epoch": 0.5609558055909988, "grad_norm": 15.426847438028386, "learning_rate": 2e-06, "loss": 0.3242, "step": 2418 }, { "epoch": 0.5611877972392993, "grad_norm": 10.019695584582262, "learning_rate": 2e-06, "loss": 0.2315, "step": 2419 }, { "epoch": 0.5614197888876, "grad_norm": 11.950391616471022, "learning_rate": 2e-06, "loss": 0.3032, "step": 2420 }, { "epoch": 0.5616517805359007, "grad_norm": 9.821360624161986, "learning_rate": 2e-06, "loss": 0.2753, "step": 2421 }, { "epoch": 0.5618837721842014, "grad_norm": 13.779153309516882, "learning_rate": 2e-06, "loss": 0.2562, "step": 2422 }, { "epoch": 0.5621157638325021, "grad_norm": 20.55957101204331, "learning_rate": 2e-06, "loss": 0.348, "step": 2423 }, { "epoch": 0.5623477554808027, "grad_norm": 10.717575945681206, "learning_rate": 2e-06, "loss": 0.3057, "step": 2424 }, { "epoch": 0.5625797471291033, "grad_norm": 9.5656433019114, "learning_rate": 2e-06, "loss": 0.2073, "step": 2425 }, { "epoch": 0.562811738777404, "grad_norm": 14.943321776155047, "learning_rate": 2e-06, "loss": 0.2825, "step": 2426 }, { "epoch": 0.5630437304257047, "grad_norm": 14.854031581718427, "learning_rate": 2e-06, "loss": 0.4032, "step": 2427 }, { "epoch": 0.5632757220740053, "grad_norm": 19.945968462228393, "learning_rate": 2e-06, "loss": 0.2607, "step": 2428 }, { "epoch": 0.563507713722306, "grad_norm": 21.348685688736698, "learning_rate": 2e-06, "loss": 0.2983, "step": 2429 }, { "epoch": 0.5637397053706067, "grad_norm": 8.603273140615295, "learning_rate": 2e-06, "loss": 0.296, "step": 2430 }, { "epoch": 0.5639716970189074, "grad_norm": 18.347215498971828, "learning_rate": 2e-06, "loss": 0.3555, "step": 2431 }, { "epoch": 0.5642036886672079, "grad_norm": 11.246191700500239, "learning_rate": 2e-06, "loss": 0.2004, "step": 2432 }, { "epoch": 0.5644356803155086, "grad_norm": 14.682817929588682, "learning_rate": 2e-06, "loss": 0.3376, "step": 2433 }, { "epoch": 0.5646676719638093, "grad_norm": 20.22286878096074, "learning_rate": 2e-06, "loss": 0.2333, "step": 2434 }, { "epoch": 0.56489966361211, "grad_norm": 16.49444735633445, "learning_rate": 2e-06, "loss": 0.2472, "step": 2435 }, { "epoch": 0.5651316552604106, "grad_norm": 6.68551153484024, "learning_rate": 2e-06, "loss": 0.225, "step": 2436 }, { "epoch": 0.5653636469087113, "grad_norm": 9.686076248914613, "learning_rate": 2e-06, "loss": 0.3447, "step": 2437 }, { "epoch": 0.5655956385570119, "grad_norm": 17.76599719791514, "learning_rate": 2e-06, "loss": 0.2478, "step": 2438 }, { "epoch": 0.5658276302053126, "grad_norm": 21.389768804288273, "learning_rate": 2e-06, "loss": 0.3174, "step": 2439 }, { "epoch": 0.5660596218536132, "grad_norm": 14.049780815178298, "learning_rate": 2e-06, "loss": 0.3321, "step": 2440 }, { "epoch": 0.5662916135019139, "grad_norm": 14.033113374604143, "learning_rate": 2e-06, "loss": 0.3, "step": 2441 }, { "epoch": 0.5665236051502146, "grad_norm": 22.448007786902185, "learning_rate": 2e-06, "loss": 0.259, "step": 2442 }, { "epoch": 0.5667555967985153, "grad_norm": 14.619950092423217, "learning_rate": 2e-06, "loss": 0.3202, "step": 2443 }, { "epoch": 0.566987588446816, "grad_norm": 19.221172213265596, "learning_rate": 2e-06, "loss": 0.4239, "step": 2444 }, { "epoch": 0.5672195800951165, "grad_norm": 10.266028401673237, "learning_rate": 2e-06, "loss": 0.2292, "step": 2445 }, { "epoch": 0.5674515717434172, "grad_norm": 16.665276447968612, "learning_rate": 2e-06, "loss": 0.2707, "step": 2446 }, { "epoch": 0.5676835633917179, "grad_norm": 10.6274177866923, "learning_rate": 2e-06, "loss": 0.3535, "step": 2447 }, { "epoch": 0.5679155550400186, "grad_norm": 11.8099997640705, "learning_rate": 2e-06, "loss": 0.366, "step": 2448 }, { "epoch": 0.5681475466883192, "grad_norm": 12.555980498186655, "learning_rate": 2e-06, "loss": 0.3625, "step": 2449 }, { "epoch": 0.5683795383366199, "grad_norm": 19.410923811816254, "learning_rate": 2e-06, "loss": 0.2926, "step": 2450 }, { "epoch": 0.5686115299849206, "grad_norm": 2.8525262146093486, "learning_rate": 2e-06, "loss": 0.1367, "step": 2451 }, { "epoch": 0.5688435216332212, "grad_norm": 10.782898016379185, "learning_rate": 2e-06, "loss": 0.3504, "step": 2452 }, { "epoch": 0.5690755132815218, "grad_norm": 16.295736831717836, "learning_rate": 2e-06, "loss": 0.3744, "step": 2453 }, { "epoch": 0.5693075049298225, "grad_norm": 12.536042109056044, "learning_rate": 2e-06, "loss": 0.2732, "step": 2454 }, { "epoch": 0.5695394965781232, "grad_norm": 14.381777859735644, "learning_rate": 2e-06, "loss": 0.3513, "step": 2455 }, { "epoch": 0.5697714882264239, "grad_norm": 18.1258827344621, "learning_rate": 2e-06, "loss": 0.2716, "step": 2456 }, { "epoch": 0.5700034798747246, "grad_norm": 10.674561459760259, "learning_rate": 2e-06, "loss": 0.314, "step": 2457 }, { "epoch": 0.5702354715230251, "grad_norm": 16.690016743500063, "learning_rate": 2e-06, "loss": 0.3153, "step": 2458 }, { "epoch": 0.5704674631713258, "grad_norm": 17.419336891896343, "learning_rate": 2e-06, "loss": 0.2838, "step": 2459 }, { "epoch": 0.5706994548196265, "grad_norm": 9.199259539316124, "learning_rate": 2e-06, "loss": 0.315, "step": 2460 }, { "epoch": 0.5709314464679272, "grad_norm": 11.35509051682607, "learning_rate": 2e-06, "loss": 0.3286, "step": 2461 }, { "epoch": 0.5711634381162278, "grad_norm": 13.829657807396838, "learning_rate": 2e-06, "loss": 0.2636, "step": 2462 }, { "epoch": 0.5713954297645285, "grad_norm": 13.03723236578468, "learning_rate": 2e-06, "loss": 0.3142, "step": 2463 }, { "epoch": 0.5716274214128292, "grad_norm": 14.110551716191281, "learning_rate": 2e-06, "loss": 0.323, "step": 2464 }, { "epoch": 0.5718594130611298, "grad_norm": 9.653588565786267, "learning_rate": 2e-06, "loss": 0.2312, "step": 2465 }, { "epoch": 0.5720914047094304, "grad_norm": 9.622618204933014, "learning_rate": 2e-06, "loss": 0.3371, "step": 2466 }, { "epoch": 0.5723233963577311, "grad_norm": 26.944013997098477, "learning_rate": 2e-06, "loss": 0.3133, "step": 2467 }, { "epoch": 0.5725553880060318, "grad_norm": 22.606898378576506, "learning_rate": 2e-06, "loss": 0.4094, "step": 2468 }, { "epoch": 0.5727873796543325, "grad_norm": 14.59816805685614, "learning_rate": 2e-06, "loss": 0.4009, "step": 2469 }, { "epoch": 0.5730193713026331, "grad_norm": 16.673847044715497, "learning_rate": 2e-06, "loss": 0.3406, "step": 2470 }, { "epoch": 0.5732513629509338, "grad_norm": 14.847720782363652, "learning_rate": 2e-06, "loss": 0.2871, "step": 2471 }, { "epoch": 0.5734833545992344, "grad_norm": 18.607494659772954, "learning_rate": 2e-06, "loss": 0.4925, "step": 2472 }, { "epoch": 0.5737153462475351, "grad_norm": 14.499260801159384, "learning_rate": 2e-06, "loss": 0.3471, "step": 2473 }, { "epoch": 0.5739473378958357, "grad_norm": 10.426182616781508, "learning_rate": 2e-06, "loss": 0.2871, "step": 2474 }, { "epoch": 0.5741793295441364, "grad_norm": 11.674093773224836, "learning_rate": 2e-06, "loss": 0.2291, "step": 2475 }, { "epoch": 0.5744113211924371, "grad_norm": 17.47014738833898, "learning_rate": 2e-06, "loss": 0.2749, "step": 2476 }, { "epoch": 0.5746433128407378, "grad_norm": 11.770554342989735, "learning_rate": 2e-06, "loss": 0.264, "step": 2477 }, { "epoch": 0.5748753044890383, "grad_norm": 20.94092375824433, "learning_rate": 2e-06, "loss": 0.3868, "step": 2478 }, { "epoch": 0.575107296137339, "grad_norm": 13.262166143545379, "learning_rate": 2e-06, "loss": 0.2915, "step": 2479 }, { "epoch": 0.5753392877856397, "grad_norm": 8.214286704099807, "learning_rate": 2e-06, "loss": 0.3086, "step": 2480 }, { "epoch": 0.5755712794339404, "grad_norm": 8.507786708511688, "learning_rate": 2e-06, "loss": 0.2798, "step": 2481 }, { "epoch": 0.575803271082241, "grad_norm": 18.642647129706745, "learning_rate": 2e-06, "loss": 0.3305, "step": 2482 }, { "epoch": 0.5760352627305417, "grad_norm": 15.689231004790807, "learning_rate": 2e-06, "loss": 0.2448, "step": 2483 }, { "epoch": 0.5762672543788424, "grad_norm": 12.418802098698727, "learning_rate": 2e-06, "loss": 0.2637, "step": 2484 }, { "epoch": 0.576499246027143, "grad_norm": 11.158407856020904, "learning_rate": 2e-06, "loss": 0.3159, "step": 2485 }, { "epoch": 0.5767312376754437, "grad_norm": 19.61483635744511, "learning_rate": 2e-06, "loss": 0.294, "step": 2486 }, { "epoch": 0.5769632293237443, "grad_norm": 14.865542445369957, "learning_rate": 2e-06, "loss": 0.2789, "step": 2487 }, { "epoch": 0.577195220972045, "grad_norm": 15.83240384681972, "learning_rate": 2e-06, "loss": 0.3269, "step": 2488 }, { "epoch": 0.5774272126203457, "grad_norm": 15.78410172720427, "learning_rate": 2e-06, "loss": 0.3453, "step": 2489 }, { "epoch": 0.5776592042686464, "grad_norm": 8.634452953530715, "learning_rate": 2e-06, "loss": 0.2287, "step": 2490 }, { "epoch": 0.577891195916947, "grad_norm": 12.732603792922383, "learning_rate": 2e-06, "loss": 0.2653, "step": 2491 }, { "epoch": 0.5781231875652476, "grad_norm": 16.61088777946793, "learning_rate": 2e-06, "loss": 0.2511, "step": 2492 }, { "epoch": 0.5783551792135483, "grad_norm": 15.248936766635833, "learning_rate": 2e-06, "loss": 0.2938, "step": 2493 }, { "epoch": 0.578587170861849, "grad_norm": 22.75939149932287, "learning_rate": 2e-06, "loss": 0.4973, "step": 2494 }, { "epoch": 0.5788191625101496, "grad_norm": 13.169262362496072, "learning_rate": 2e-06, "loss": 0.3417, "step": 2495 }, { "epoch": 0.5790511541584503, "grad_norm": 14.869556808260299, "learning_rate": 2e-06, "loss": 0.3089, "step": 2496 }, { "epoch": 0.579283145806751, "grad_norm": 7.219403236004867, "learning_rate": 2e-06, "loss": 0.1704, "step": 2497 }, { "epoch": 0.5795151374550516, "grad_norm": 20.08537920234524, "learning_rate": 2e-06, "loss": 0.2664, "step": 2498 }, { "epoch": 0.5797471291033522, "grad_norm": 6.917849407953844, "learning_rate": 2e-06, "loss": 0.2856, "step": 2499 }, { "epoch": 0.5799791207516529, "grad_norm": 17.85469764329847, "learning_rate": 2e-06, "loss": 0.3051, "step": 2500 }, { "epoch": 0.5802111123999536, "grad_norm": 9.285602566523718, "learning_rate": 2e-06, "loss": 0.1979, "step": 2501 }, { "epoch": 0.5804431040482543, "grad_norm": 12.654586179547152, "learning_rate": 2e-06, "loss": 0.2821, "step": 2502 }, { "epoch": 0.580675095696555, "grad_norm": 20.484107836541714, "learning_rate": 2e-06, "loss": 0.4086, "step": 2503 }, { "epoch": 0.5809070873448556, "grad_norm": 30.810539020797318, "learning_rate": 2e-06, "loss": 0.4463, "step": 2504 }, { "epoch": 0.5811390789931562, "grad_norm": 8.239820096389755, "learning_rate": 2e-06, "loss": 0.1982, "step": 2505 }, { "epoch": 0.5813710706414569, "grad_norm": 6.987799616443373, "learning_rate": 2e-06, "loss": 0.2571, "step": 2506 }, { "epoch": 0.5816030622897576, "grad_norm": 9.824023287155521, "learning_rate": 2e-06, "loss": 0.2848, "step": 2507 }, { "epoch": 0.5818350539380582, "grad_norm": 19.77791150461645, "learning_rate": 2e-06, "loss": 0.3736, "step": 2508 }, { "epoch": 0.5820670455863589, "grad_norm": 35.11127353541142, "learning_rate": 2e-06, "loss": 0.4481, "step": 2509 }, { "epoch": 0.5822990372346596, "grad_norm": 11.140002417533, "learning_rate": 2e-06, "loss": 0.3415, "step": 2510 }, { "epoch": 0.5825310288829603, "grad_norm": 17.981016984708337, "learning_rate": 2e-06, "loss": 0.3271, "step": 2511 }, { "epoch": 0.5827630205312608, "grad_norm": 11.72871904063151, "learning_rate": 2e-06, "loss": 0.2438, "step": 2512 }, { "epoch": 0.5829950121795615, "grad_norm": 5.025027934513667, "learning_rate": 2e-06, "loss": 0.1639, "step": 2513 }, { "epoch": 0.5832270038278622, "grad_norm": 11.810865289588344, "learning_rate": 2e-06, "loss": 0.2174, "step": 2514 }, { "epoch": 0.5834589954761629, "grad_norm": 24.26560656264537, "learning_rate": 2e-06, "loss": 0.2419, "step": 2515 }, { "epoch": 0.5836909871244635, "grad_norm": 18.46991140854295, "learning_rate": 2e-06, "loss": 0.2605, "step": 2516 }, { "epoch": 0.5839229787727642, "grad_norm": 15.03403044306349, "learning_rate": 2e-06, "loss": 0.3359, "step": 2517 }, { "epoch": 0.5841549704210648, "grad_norm": 11.645891200490585, "learning_rate": 2e-06, "loss": 0.2115, "step": 2518 }, { "epoch": 0.5843869620693655, "grad_norm": 14.340377431021409, "learning_rate": 2e-06, "loss": 0.2448, "step": 2519 }, { "epoch": 0.5846189537176661, "grad_norm": 14.147283918955994, "learning_rate": 2e-06, "loss": 0.2075, "step": 2520 }, { "epoch": 0.5848509453659668, "grad_norm": 17.865648426932434, "learning_rate": 2e-06, "loss": 0.2673, "step": 2521 }, { "epoch": 0.5850829370142675, "grad_norm": 14.55836248361842, "learning_rate": 2e-06, "loss": 0.2814, "step": 2522 }, { "epoch": 0.5853149286625682, "grad_norm": 16.81136985208734, "learning_rate": 2e-06, "loss": 0.3298, "step": 2523 }, { "epoch": 0.5855469203108689, "grad_norm": 20.565540320996366, "learning_rate": 2e-06, "loss": 0.3327, "step": 2524 }, { "epoch": 0.5857789119591694, "grad_norm": 24.14059008951668, "learning_rate": 2e-06, "loss": 0.337, "step": 2525 }, { "epoch": 0.5860109036074701, "grad_norm": 15.991993948265032, "learning_rate": 2e-06, "loss": 0.4677, "step": 2526 }, { "epoch": 0.5862428952557708, "grad_norm": 12.974831364373912, "learning_rate": 2e-06, "loss": 0.3675, "step": 2527 }, { "epoch": 0.5864748869040715, "grad_norm": 15.676183243588898, "learning_rate": 2e-06, "loss": 0.3662, "step": 2528 }, { "epoch": 0.5867068785523721, "grad_norm": 20.18352831596817, "learning_rate": 2e-06, "loss": 0.3298, "step": 2529 }, { "epoch": 0.5869388702006728, "grad_norm": 11.742845399116613, "learning_rate": 2e-06, "loss": 0.2105, "step": 2530 }, { "epoch": 0.5871708618489735, "grad_norm": 15.036929135487856, "learning_rate": 2e-06, "loss": 0.3134, "step": 2531 }, { "epoch": 0.5874028534972741, "grad_norm": 10.653932848203507, "learning_rate": 2e-06, "loss": 0.224, "step": 2532 }, { "epoch": 0.5876348451455747, "grad_norm": 16.1086614736084, "learning_rate": 2e-06, "loss": 0.3744, "step": 2533 }, { "epoch": 0.5878668367938754, "grad_norm": 29.101406200313374, "learning_rate": 2e-06, "loss": 0.3329, "step": 2534 }, { "epoch": 0.5880988284421761, "grad_norm": 38.350231530496075, "learning_rate": 2e-06, "loss": 0.4513, "step": 2535 }, { "epoch": 0.5883308200904768, "grad_norm": 23.724065742515208, "learning_rate": 2e-06, "loss": 0.3881, "step": 2536 }, { "epoch": 0.5885628117387774, "grad_norm": 15.37795572922142, "learning_rate": 2e-06, "loss": 0.2551, "step": 2537 }, { "epoch": 0.588794803387078, "grad_norm": 16.865307773422217, "learning_rate": 2e-06, "loss": 0.315, "step": 2538 }, { "epoch": 0.5890267950353787, "grad_norm": 17.793328192027396, "learning_rate": 2e-06, "loss": 0.2581, "step": 2539 }, { "epoch": 0.5892587866836794, "grad_norm": 17.195738504318005, "learning_rate": 2e-06, "loss": 0.2906, "step": 2540 }, { "epoch": 0.58949077833198, "grad_norm": 10.37882031859556, "learning_rate": 2e-06, "loss": 0.3523, "step": 2541 }, { "epoch": 0.5897227699802807, "grad_norm": 8.259590272826024, "learning_rate": 2e-06, "loss": 0.2488, "step": 2542 }, { "epoch": 0.5899547616285814, "grad_norm": 19.193523289503357, "learning_rate": 2e-06, "loss": 0.338, "step": 2543 }, { "epoch": 0.5901867532768821, "grad_norm": 14.629504299379278, "learning_rate": 2e-06, "loss": 0.254, "step": 2544 }, { "epoch": 0.5904187449251826, "grad_norm": 8.224400622005064, "learning_rate": 2e-06, "loss": 0.2536, "step": 2545 }, { "epoch": 0.5906507365734833, "grad_norm": 15.86256323018292, "learning_rate": 2e-06, "loss": 0.2646, "step": 2546 }, { "epoch": 0.590882728221784, "grad_norm": 18.072618750754682, "learning_rate": 2e-06, "loss": 0.2477, "step": 2547 }, { "epoch": 0.5911147198700847, "grad_norm": 13.436673286426196, "learning_rate": 2e-06, "loss": 0.299, "step": 2548 }, { "epoch": 0.5913467115183854, "grad_norm": 6.801983823427113, "learning_rate": 2e-06, "loss": 0.3031, "step": 2549 }, { "epoch": 0.591578703166686, "grad_norm": 14.892621067621787, "learning_rate": 2e-06, "loss": 0.3393, "step": 2550 }, { "epoch": 0.5918106948149867, "grad_norm": 15.883135619779361, "learning_rate": 2e-06, "loss": 0.3129, "step": 2551 }, { "epoch": 0.5920426864632873, "grad_norm": 17.51304692590423, "learning_rate": 2e-06, "loss": 0.3318, "step": 2552 }, { "epoch": 0.592274678111588, "grad_norm": 13.23396182628087, "learning_rate": 2e-06, "loss": 0.2345, "step": 2553 }, { "epoch": 0.5925066697598886, "grad_norm": 16.088956436374215, "learning_rate": 2e-06, "loss": 0.3597, "step": 2554 }, { "epoch": 0.5927386614081893, "grad_norm": 19.618846840750198, "learning_rate": 2e-06, "loss": 0.3431, "step": 2555 }, { "epoch": 0.59297065305649, "grad_norm": 16.66909534596031, "learning_rate": 2e-06, "loss": 0.2486, "step": 2556 }, { "epoch": 0.5932026447047907, "grad_norm": 10.560779229199174, "learning_rate": 2e-06, "loss": 0.2887, "step": 2557 }, { "epoch": 0.5934346363530912, "grad_norm": 9.21356550066053, "learning_rate": 2e-06, "loss": 0.1793, "step": 2558 }, { "epoch": 0.5936666280013919, "grad_norm": 14.076285578466297, "learning_rate": 2e-06, "loss": 0.3869, "step": 2559 }, { "epoch": 0.5938986196496926, "grad_norm": 12.847638105175127, "learning_rate": 2e-06, "loss": 0.2315, "step": 2560 }, { "epoch": 0.5941306112979933, "grad_norm": 14.765732513424698, "learning_rate": 2e-06, "loss": 0.2799, "step": 2561 }, { "epoch": 0.594362602946294, "grad_norm": 9.942317670085435, "learning_rate": 2e-06, "loss": 0.2714, "step": 2562 }, { "epoch": 0.5945945945945946, "grad_norm": 6.0178161019588945, "learning_rate": 2e-06, "loss": 0.2055, "step": 2563 }, { "epoch": 0.5948265862428953, "grad_norm": 16.221366734512934, "learning_rate": 2e-06, "loss": 0.3824, "step": 2564 }, { "epoch": 0.5950585778911959, "grad_norm": 13.889351920592597, "learning_rate": 2e-06, "loss": 0.303, "step": 2565 }, { "epoch": 0.5952905695394966, "grad_norm": 23.308845437497098, "learning_rate": 2e-06, "loss": 0.3687, "step": 2566 }, { "epoch": 0.5955225611877972, "grad_norm": 16.15487734503701, "learning_rate": 2e-06, "loss": 0.2385, "step": 2567 }, { "epoch": 0.5957545528360979, "grad_norm": 14.492592621249058, "learning_rate": 2e-06, "loss": 0.2203, "step": 2568 }, { "epoch": 0.5959865444843986, "grad_norm": 14.475746535168714, "learning_rate": 2e-06, "loss": 0.3553, "step": 2569 }, { "epoch": 0.5962185361326993, "grad_norm": 14.198491742040812, "learning_rate": 2e-06, "loss": 0.2305, "step": 2570 }, { "epoch": 0.5964505277809999, "grad_norm": 17.549194217988287, "learning_rate": 2e-06, "loss": 0.3198, "step": 2571 }, { "epoch": 0.5966825194293005, "grad_norm": 13.338985893175025, "learning_rate": 2e-06, "loss": 0.3296, "step": 2572 }, { "epoch": 0.5969145110776012, "grad_norm": 17.512050505262632, "learning_rate": 2e-06, "loss": 0.2729, "step": 2573 }, { "epoch": 0.5971465027259019, "grad_norm": 6.628241469541592, "learning_rate": 2e-06, "loss": 0.2512, "step": 2574 }, { "epoch": 0.5973784943742025, "grad_norm": 21.899371256267624, "learning_rate": 2e-06, "loss": 0.418, "step": 2575 }, { "epoch": 0.5976104860225032, "grad_norm": 21.141754928031396, "learning_rate": 2e-06, "loss": 0.321, "step": 2576 }, { "epoch": 0.5978424776708039, "grad_norm": 21.29611023083716, "learning_rate": 2e-06, "loss": 0.3016, "step": 2577 }, { "epoch": 0.5980744693191045, "grad_norm": 9.439127869148606, "learning_rate": 2e-06, "loss": 0.2466, "step": 2578 }, { "epoch": 0.5983064609674051, "grad_norm": 12.79725874640436, "learning_rate": 2e-06, "loss": 0.2972, "step": 2579 }, { "epoch": 0.5985384526157058, "grad_norm": 13.59370150264999, "learning_rate": 2e-06, "loss": 0.2991, "step": 2580 }, { "epoch": 0.5987704442640065, "grad_norm": 14.290613413702845, "learning_rate": 2e-06, "loss": 0.2741, "step": 2581 }, { "epoch": 0.5990024359123072, "grad_norm": 14.739111004523165, "learning_rate": 2e-06, "loss": 0.299, "step": 2582 }, { "epoch": 0.5992344275606079, "grad_norm": 18.43042050151962, "learning_rate": 2e-06, "loss": 0.3581, "step": 2583 }, { "epoch": 0.5994664192089085, "grad_norm": 16.742572811047022, "learning_rate": 2e-06, "loss": 0.405, "step": 2584 }, { "epoch": 0.5996984108572091, "grad_norm": 13.803051539755291, "learning_rate": 2e-06, "loss": 0.2549, "step": 2585 }, { "epoch": 0.5999304025055098, "grad_norm": 18.181730133497524, "learning_rate": 2e-06, "loss": 0.3734, "step": 2586 }, { "epoch": 0.6001623941538105, "grad_norm": 11.635904384068603, "learning_rate": 2e-06, "loss": 0.3005, "step": 2587 }, { "epoch": 0.6003943858021111, "grad_norm": 15.347115874466267, "learning_rate": 2e-06, "loss": 0.3835, "step": 2588 }, { "epoch": 0.6006263774504118, "grad_norm": 8.825379842280917, "learning_rate": 2e-06, "loss": 0.2267, "step": 2589 }, { "epoch": 0.6008583690987125, "grad_norm": 9.577537283110104, "learning_rate": 2e-06, "loss": 0.3363, "step": 2590 }, { "epoch": 0.601090360747013, "grad_norm": 27.429490879194134, "learning_rate": 2e-06, "loss": 0.3661, "step": 2591 }, { "epoch": 0.6013223523953137, "grad_norm": 22.96181063077117, "learning_rate": 2e-06, "loss": 0.3991, "step": 2592 }, { "epoch": 0.6015543440436144, "grad_norm": 20.831944745474473, "learning_rate": 2e-06, "loss": 0.3794, "step": 2593 }, { "epoch": 0.6017863356919151, "grad_norm": 24.6377518098682, "learning_rate": 2e-06, "loss": 0.3596, "step": 2594 }, { "epoch": 0.6020183273402158, "grad_norm": 16.685655535948804, "learning_rate": 2e-06, "loss": 0.3078, "step": 2595 }, { "epoch": 0.6022503189885164, "grad_norm": 16.949976752704817, "learning_rate": 2e-06, "loss": 0.3985, "step": 2596 }, { "epoch": 0.6024823106368171, "grad_norm": 10.333889262423241, "learning_rate": 2e-06, "loss": 0.2667, "step": 2597 }, { "epoch": 0.6027143022851177, "grad_norm": 14.510670341351082, "learning_rate": 2e-06, "loss": 0.3442, "step": 2598 }, { "epoch": 0.6029462939334184, "grad_norm": 17.69640920886926, "learning_rate": 2e-06, "loss": 0.2401, "step": 2599 }, { "epoch": 0.603178285581719, "grad_norm": 23.660783062967415, "learning_rate": 2e-06, "loss": 0.3295, "step": 2600 }, { "epoch": 0.6034102772300197, "grad_norm": 18.057275005324275, "learning_rate": 2e-06, "loss": 0.3407, "step": 2601 }, { "epoch": 0.6036422688783204, "grad_norm": 24.069099929213795, "learning_rate": 2e-06, "loss": 0.3268, "step": 2602 }, { "epoch": 0.6038742605266211, "grad_norm": 10.642561428651906, "learning_rate": 2e-06, "loss": 0.3963, "step": 2603 }, { "epoch": 0.6041062521749218, "grad_norm": 20.174001034192678, "learning_rate": 2e-06, "loss": 0.2681, "step": 2604 }, { "epoch": 0.6043382438232223, "grad_norm": 9.175946684897225, "learning_rate": 2e-06, "loss": 0.2181, "step": 2605 }, { "epoch": 0.604570235471523, "grad_norm": 8.166771981839268, "learning_rate": 2e-06, "loss": 0.2716, "step": 2606 }, { "epoch": 0.6048022271198237, "grad_norm": 16.899229840133128, "learning_rate": 2e-06, "loss": 0.2287, "step": 2607 }, { "epoch": 0.6050342187681244, "grad_norm": 9.44602089006785, "learning_rate": 2e-06, "loss": 0.2403, "step": 2608 }, { "epoch": 0.605266210416425, "grad_norm": 13.783306010662567, "learning_rate": 2e-06, "loss": 0.3988, "step": 2609 }, { "epoch": 0.6054982020647257, "grad_norm": 10.320072678209218, "learning_rate": 2e-06, "loss": 0.2224, "step": 2610 }, { "epoch": 0.6057301937130263, "grad_norm": 15.997254592009337, "learning_rate": 2e-06, "loss": 0.3589, "step": 2611 }, { "epoch": 0.605962185361327, "grad_norm": 8.183158067579095, "learning_rate": 2e-06, "loss": 0.168, "step": 2612 }, { "epoch": 0.6061941770096276, "grad_norm": 10.745161014883454, "learning_rate": 2e-06, "loss": 0.2714, "step": 2613 }, { "epoch": 0.6064261686579283, "grad_norm": 11.462258671047206, "learning_rate": 2e-06, "loss": 0.2523, "step": 2614 }, { "epoch": 0.606658160306229, "grad_norm": 12.016709149335806, "learning_rate": 2e-06, "loss": 0.314, "step": 2615 }, { "epoch": 0.6068901519545297, "grad_norm": 17.155313994573596, "learning_rate": 2e-06, "loss": 0.408, "step": 2616 }, { "epoch": 0.6071221436028303, "grad_norm": 11.915214369441893, "learning_rate": 2e-06, "loss": 0.3494, "step": 2617 }, { "epoch": 0.6073541352511309, "grad_norm": 9.603980367813907, "learning_rate": 2e-06, "loss": 0.3015, "step": 2618 }, { "epoch": 0.6075861268994316, "grad_norm": 14.152671605729422, "learning_rate": 2e-06, "loss": 0.2807, "step": 2619 }, { "epoch": 0.6078181185477323, "grad_norm": 9.895894107038899, "learning_rate": 2e-06, "loss": 0.2248, "step": 2620 }, { "epoch": 0.608050110196033, "grad_norm": 21.33095069897468, "learning_rate": 2e-06, "loss": 0.3436, "step": 2621 }, { "epoch": 0.6082821018443336, "grad_norm": 7.935644587182702, "learning_rate": 2e-06, "loss": 0.3164, "step": 2622 }, { "epoch": 0.6085140934926343, "grad_norm": 19.81730932480036, "learning_rate": 2e-06, "loss": 0.3868, "step": 2623 }, { "epoch": 0.608746085140935, "grad_norm": 15.406381840658463, "learning_rate": 2e-06, "loss": 0.2037, "step": 2624 }, { "epoch": 0.6089780767892355, "grad_norm": 15.622610047217227, "learning_rate": 2e-06, "loss": 0.353, "step": 2625 }, { "epoch": 0.6092100684375362, "grad_norm": 19.054233656211007, "learning_rate": 2e-06, "loss": 0.3748, "step": 2626 }, { "epoch": 0.6094420600858369, "grad_norm": 17.01955389711876, "learning_rate": 2e-06, "loss": 0.3669, "step": 2627 }, { "epoch": 0.6096740517341376, "grad_norm": 13.138403748635863, "learning_rate": 2e-06, "loss": 0.3121, "step": 2628 }, { "epoch": 0.6099060433824383, "grad_norm": 7.430059477695104, "learning_rate": 2e-06, "loss": 0.2589, "step": 2629 }, { "epoch": 0.6101380350307389, "grad_norm": 12.017745160492542, "learning_rate": 2e-06, "loss": 0.3337, "step": 2630 }, { "epoch": 0.6103700266790395, "grad_norm": 21.893420801946824, "learning_rate": 2e-06, "loss": 0.3723, "step": 2631 }, { "epoch": 0.6106020183273402, "grad_norm": 7.716883732515496, "learning_rate": 2e-06, "loss": 0.246, "step": 2632 }, { "epoch": 0.6108340099756409, "grad_norm": 12.394263618488978, "learning_rate": 2e-06, "loss": 0.2482, "step": 2633 }, { "epoch": 0.6110660016239415, "grad_norm": 13.85844630687673, "learning_rate": 2e-06, "loss": 0.3278, "step": 2634 }, { "epoch": 0.6112979932722422, "grad_norm": 15.23846624738061, "learning_rate": 2e-06, "loss": 0.3521, "step": 2635 }, { "epoch": 0.6115299849205429, "grad_norm": 17.00492197768118, "learning_rate": 2e-06, "loss": 0.2893, "step": 2636 }, { "epoch": 0.6117619765688436, "grad_norm": 13.586664525927867, "learning_rate": 2e-06, "loss": 0.2788, "step": 2637 }, { "epoch": 0.6119939682171441, "grad_norm": 15.014978243660334, "learning_rate": 2e-06, "loss": 0.3178, "step": 2638 }, { "epoch": 0.6122259598654448, "grad_norm": 13.84349412358929, "learning_rate": 2e-06, "loss": 0.2344, "step": 2639 }, { "epoch": 0.6124579515137455, "grad_norm": 22.096753673828697, "learning_rate": 2e-06, "loss": 0.3522, "step": 2640 }, { "epoch": 0.6126899431620462, "grad_norm": 14.059995206076655, "learning_rate": 2e-06, "loss": 0.3093, "step": 2641 }, { "epoch": 0.6129219348103468, "grad_norm": 8.321542567832232, "learning_rate": 2e-06, "loss": 0.3574, "step": 2642 }, { "epoch": 0.6131539264586475, "grad_norm": 13.750629896271956, "learning_rate": 2e-06, "loss": 0.1864, "step": 2643 }, { "epoch": 0.6133859181069482, "grad_norm": 22.7628000844624, "learning_rate": 2e-06, "loss": 0.4686, "step": 2644 }, { "epoch": 0.6136179097552488, "grad_norm": 9.845866081722727, "learning_rate": 2e-06, "loss": 0.2652, "step": 2645 }, { "epoch": 0.6138499014035494, "grad_norm": 13.935750749172811, "learning_rate": 2e-06, "loss": 0.3633, "step": 2646 }, { "epoch": 0.6140818930518501, "grad_norm": 20.494770762962357, "learning_rate": 2e-06, "loss": 0.3229, "step": 2647 }, { "epoch": 0.6143138847001508, "grad_norm": 16.60093010574271, "learning_rate": 2e-06, "loss": 0.3424, "step": 2648 }, { "epoch": 0.6145458763484515, "grad_norm": 20.04060203813773, "learning_rate": 2e-06, "loss": 0.4419, "step": 2649 }, { "epoch": 0.6147778679967522, "grad_norm": 11.737435793683828, "learning_rate": 2e-06, "loss": 0.2781, "step": 2650 }, { "epoch": 0.6150098596450527, "grad_norm": 13.725621163731567, "learning_rate": 2e-06, "loss": 0.2673, "step": 2651 }, { "epoch": 0.6152418512933534, "grad_norm": 15.800607780056476, "learning_rate": 2e-06, "loss": 0.2469, "step": 2652 }, { "epoch": 0.6154738429416541, "grad_norm": 20.152900288357856, "learning_rate": 2e-06, "loss": 0.2142, "step": 2653 }, { "epoch": 0.6157058345899548, "grad_norm": 16.55339980838737, "learning_rate": 2e-06, "loss": 0.3571, "step": 2654 }, { "epoch": 0.6159378262382554, "grad_norm": 10.524688730312457, "learning_rate": 2e-06, "loss": 0.3035, "step": 2655 }, { "epoch": 0.6161698178865561, "grad_norm": 10.31213022805119, "learning_rate": 2e-06, "loss": 0.272, "step": 2656 }, { "epoch": 0.6164018095348568, "grad_norm": 9.45210475666712, "learning_rate": 2e-06, "loss": 0.2183, "step": 2657 }, { "epoch": 0.6166338011831574, "grad_norm": 21.582429259346284, "learning_rate": 2e-06, "loss": 0.2551, "step": 2658 }, { "epoch": 0.616865792831458, "grad_norm": 26.47571112560527, "learning_rate": 2e-06, "loss": 0.3598, "step": 2659 }, { "epoch": 0.6170977844797587, "grad_norm": 14.710721928558407, "learning_rate": 2e-06, "loss": 0.3941, "step": 2660 }, { "epoch": 0.6173297761280594, "grad_norm": 10.122986352634644, "learning_rate": 2e-06, "loss": 0.2817, "step": 2661 }, { "epoch": 0.6175617677763601, "grad_norm": 17.1002915041208, "learning_rate": 2e-06, "loss": 0.2976, "step": 2662 }, { "epoch": 0.6177937594246607, "grad_norm": 15.228871856863515, "learning_rate": 2e-06, "loss": 0.3329, "step": 2663 }, { "epoch": 0.6180257510729614, "grad_norm": 10.804501166132074, "learning_rate": 2e-06, "loss": 0.2286, "step": 2664 }, { "epoch": 0.618257742721262, "grad_norm": 19.23509206707099, "learning_rate": 2e-06, "loss": 0.2952, "step": 2665 }, { "epoch": 0.6184897343695627, "grad_norm": 19.918949097059865, "learning_rate": 2e-06, "loss": 0.3427, "step": 2666 }, { "epoch": 0.6187217260178633, "grad_norm": 11.805634939444555, "learning_rate": 2e-06, "loss": 0.3066, "step": 2667 }, { "epoch": 0.618953717666164, "grad_norm": 15.470640332499393, "learning_rate": 2e-06, "loss": 0.1898, "step": 2668 }, { "epoch": 0.6191857093144647, "grad_norm": 9.398799386754078, "learning_rate": 2e-06, "loss": 0.2322, "step": 2669 }, { "epoch": 0.6194177009627654, "grad_norm": 13.784983840873174, "learning_rate": 2e-06, "loss": 0.3268, "step": 2670 }, { "epoch": 0.619649692611066, "grad_norm": 12.30965234954036, "learning_rate": 2e-06, "loss": 0.2877, "step": 2671 }, { "epoch": 0.6198816842593666, "grad_norm": 11.794091327712746, "learning_rate": 2e-06, "loss": 0.2644, "step": 2672 }, { "epoch": 0.6201136759076673, "grad_norm": 21.585226462652304, "learning_rate": 2e-06, "loss": 0.4381, "step": 2673 }, { "epoch": 0.620345667555968, "grad_norm": 10.362497530620518, "learning_rate": 2e-06, "loss": 0.2232, "step": 2674 }, { "epoch": 0.6205776592042687, "grad_norm": 12.525941047158943, "learning_rate": 2e-06, "loss": 0.2882, "step": 2675 }, { "epoch": 0.6208096508525693, "grad_norm": 16.138277764285274, "learning_rate": 2e-06, "loss": 0.3669, "step": 2676 }, { "epoch": 0.62104164250087, "grad_norm": 15.780768061964427, "learning_rate": 2e-06, "loss": 0.3749, "step": 2677 }, { "epoch": 0.6212736341491706, "grad_norm": 20.30659677577186, "learning_rate": 2e-06, "loss": 0.3917, "step": 2678 }, { "epoch": 0.6215056257974713, "grad_norm": 12.25187953805943, "learning_rate": 2e-06, "loss": 0.2449, "step": 2679 }, { "epoch": 0.6217376174457719, "grad_norm": 12.850671799957505, "learning_rate": 2e-06, "loss": 0.2107, "step": 2680 }, { "epoch": 0.6219696090940726, "grad_norm": 21.096912590816856, "learning_rate": 2e-06, "loss": 0.2952, "step": 2681 }, { "epoch": 0.6222016007423733, "grad_norm": 25.320626047184888, "learning_rate": 2e-06, "loss": 0.389, "step": 2682 }, { "epoch": 0.622433592390674, "grad_norm": 22.577550122053413, "learning_rate": 2e-06, "loss": 0.3165, "step": 2683 }, { "epoch": 0.6226655840389747, "grad_norm": 12.988787747440956, "learning_rate": 2e-06, "loss": 0.2689, "step": 2684 }, { "epoch": 0.6228975756872752, "grad_norm": 9.692038170370148, "learning_rate": 2e-06, "loss": 0.2945, "step": 2685 }, { "epoch": 0.6231295673355759, "grad_norm": 15.284503787530403, "learning_rate": 2e-06, "loss": 0.3389, "step": 2686 }, { "epoch": 0.6233615589838766, "grad_norm": 23.334498764386655, "learning_rate": 2e-06, "loss": 0.3517, "step": 2687 }, { "epoch": 0.6235935506321773, "grad_norm": 10.922055258467221, "learning_rate": 2e-06, "loss": 0.2828, "step": 2688 }, { "epoch": 0.6238255422804779, "grad_norm": 10.065528194947968, "learning_rate": 2e-06, "loss": 0.2002, "step": 2689 }, { "epoch": 0.6240575339287786, "grad_norm": 18.114588707791558, "learning_rate": 2e-06, "loss": 0.199, "step": 2690 }, { "epoch": 0.6242895255770792, "grad_norm": 25.19740551477204, "learning_rate": 2e-06, "loss": 0.3786, "step": 2691 }, { "epoch": 0.6245215172253799, "grad_norm": 12.660265258094135, "learning_rate": 2e-06, "loss": 0.2891, "step": 2692 }, { "epoch": 0.6247535088736805, "grad_norm": 12.01025971009369, "learning_rate": 2e-06, "loss": 0.218, "step": 2693 }, { "epoch": 0.6249855005219812, "grad_norm": 19.57290722929422, "learning_rate": 2e-06, "loss": 0.4168, "step": 2694 }, { "epoch": 0.6252174921702819, "grad_norm": 18.333835413311697, "learning_rate": 2e-06, "loss": 0.3802, "step": 2695 }, { "epoch": 0.6254494838185826, "grad_norm": 9.452348955554905, "learning_rate": 2e-06, "loss": 0.3192, "step": 2696 }, { "epoch": 0.6256814754668832, "grad_norm": 19.413813530652142, "learning_rate": 2e-06, "loss": 0.3486, "step": 2697 }, { "epoch": 0.6259134671151838, "grad_norm": 8.14297216131884, "learning_rate": 2e-06, "loss": 0.1745, "step": 2698 }, { "epoch": 0.6261454587634845, "grad_norm": 16.460858927880107, "learning_rate": 2e-06, "loss": 0.2898, "step": 2699 }, { "epoch": 0.6263774504117852, "grad_norm": 23.27528134870079, "learning_rate": 2e-06, "loss": 0.3116, "step": 2700 }, { "epoch": 0.6266094420600858, "grad_norm": 21.682972803443782, "learning_rate": 2e-06, "loss": 0.3774, "step": 2701 }, { "epoch": 0.6268414337083865, "grad_norm": 11.274293799473982, "learning_rate": 2e-06, "loss": 0.2252, "step": 2702 }, { "epoch": 0.6270734253566872, "grad_norm": 14.373437442014854, "learning_rate": 2e-06, "loss": 0.2802, "step": 2703 }, { "epoch": 0.6273054170049879, "grad_norm": 27.98556682726606, "learning_rate": 2e-06, "loss": 0.3984, "step": 2704 }, { "epoch": 0.6275374086532884, "grad_norm": 8.125770832169447, "learning_rate": 2e-06, "loss": 0.2483, "step": 2705 }, { "epoch": 0.6277694003015891, "grad_norm": 14.280645487072892, "learning_rate": 2e-06, "loss": 0.3523, "step": 2706 }, { "epoch": 0.6280013919498898, "grad_norm": 13.604607657906875, "learning_rate": 2e-06, "loss": 0.2502, "step": 2707 }, { "epoch": 0.6282333835981905, "grad_norm": 15.215810695283421, "learning_rate": 2e-06, "loss": 0.2886, "step": 2708 }, { "epoch": 0.6284653752464912, "grad_norm": 10.526388538487529, "learning_rate": 2e-06, "loss": 0.2921, "step": 2709 }, { "epoch": 0.6286973668947918, "grad_norm": 11.578951665205539, "learning_rate": 2e-06, "loss": 0.2166, "step": 2710 }, { "epoch": 0.6289293585430924, "grad_norm": 26.76025334672509, "learning_rate": 2e-06, "loss": 0.4258, "step": 2711 }, { "epoch": 0.6291613501913931, "grad_norm": 5.960315113925062, "learning_rate": 2e-06, "loss": 0.203, "step": 2712 }, { "epoch": 0.6293933418396938, "grad_norm": 19.098755755319814, "learning_rate": 2e-06, "loss": 0.2265, "step": 2713 }, { "epoch": 0.6296253334879944, "grad_norm": 11.279047664736554, "learning_rate": 2e-06, "loss": 0.2733, "step": 2714 }, { "epoch": 0.6298573251362951, "grad_norm": 11.042710355062926, "learning_rate": 2e-06, "loss": 0.2478, "step": 2715 }, { "epoch": 0.6300893167845958, "grad_norm": 21.186114628597807, "learning_rate": 2e-06, "loss": 0.3966, "step": 2716 }, { "epoch": 0.6303213084328965, "grad_norm": 12.535985711797327, "learning_rate": 2e-06, "loss": 0.3958, "step": 2717 }, { "epoch": 0.630553300081197, "grad_norm": 9.076805066853897, "learning_rate": 2e-06, "loss": 0.2573, "step": 2718 }, { "epoch": 0.6307852917294977, "grad_norm": 19.664137316963803, "learning_rate": 2e-06, "loss": 0.2859, "step": 2719 }, { "epoch": 0.6310172833777984, "grad_norm": 20.192281362626595, "learning_rate": 2e-06, "loss": 0.3911, "step": 2720 }, { "epoch": 0.6312492750260991, "grad_norm": 9.34365143008957, "learning_rate": 2e-06, "loss": 0.2553, "step": 2721 }, { "epoch": 0.6314812666743997, "grad_norm": 24.48111614902375, "learning_rate": 2e-06, "loss": 0.4094, "step": 2722 }, { "epoch": 0.6317132583227004, "grad_norm": 9.707702530939931, "learning_rate": 2e-06, "loss": 0.2276, "step": 2723 }, { "epoch": 0.631945249971001, "grad_norm": 10.989348115827445, "learning_rate": 2e-06, "loss": 0.3009, "step": 2724 }, { "epoch": 0.6321772416193017, "grad_norm": 11.048989765456342, "learning_rate": 2e-06, "loss": 0.2741, "step": 2725 }, { "epoch": 0.6324092332676023, "grad_norm": 14.44663365989049, "learning_rate": 2e-06, "loss": 0.3041, "step": 2726 }, { "epoch": 0.632641224915903, "grad_norm": 20.803849251140264, "learning_rate": 2e-06, "loss": 0.335, "step": 2727 }, { "epoch": 0.6328732165642037, "grad_norm": 37.679247145652994, "learning_rate": 2e-06, "loss": 0.415, "step": 2728 }, { "epoch": 0.6331052082125044, "grad_norm": 20.184213761179738, "learning_rate": 2e-06, "loss": 0.3163, "step": 2729 }, { "epoch": 0.6333371998608051, "grad_norm": 11.551312967773589, "learning_rate": 2e-06, "loss": 0.3722, "step": 2730 }, { "epoch": 0.6335691915091056, "grad_norm": 12.706340951891182, "learning_rate": 2e-06, "loss": 0.262, "step": 2731 }, { "epoch": 0.6338011831574063, "grad_norm": 7.8792482539519835, "learning_rate": 2e-06, "loss": 0.2088, "step": 2732 }, { "epoch": 0.634033174805707, "grad_norm": 20.92190277740192, "learning_rate": 2e-06, "loss": 0.4063, "step": 2733 }, { "epoch": 0.6342651664540077, "grad_norm": 23.1379632657036, "learning_rate": 2e-06, "loss": 0.3856, "step": 2734 }, { "epoch": 0.6344971581023083, "grad_norm": 10.469166151059296, "learning_rate": 2e-06, "loss": 0.3344, "step": 2735 }, { "epoch": 0.634729149750609, "grad_norm": 7.87302230405102, "learning_rate": 2e-06, "loss": 0.2252, "step": 2736 }, { "epoch": 0.6349611413989097, "grad_norm": 10.936525755895664, "learning_rate": 2e-06, "loss": 0.2936, "step": 2737 }, { "epoch": 0.6351931330472103, "grad_norm": 10.375866159081577, "learning_rate": 2e-06, "loss": 0.2014, "step": 2738 }, { "epoch": 0.6354251246955109, "grad_norm": 19.10009840905328, "learning_rate": 2e-06, "loss": 0.2933, "step": 2739 }, { "epoch": 0.6356571163438116, "grad_norm": 10.686041317032329, "learning_rate": 2e-06, "loss": 0.1862, "step": 2740 }, { "epoch": 0.6358891079921123, "grad_norm": 8.286894335356871, "learning_rate": 2e-06, "loss": 0.2741, "step": 2741 }, { "epoch": 0.636121099640413, "grad_norm": 14.205426749807465, "learning_rate": 2e-06, "loss": 0.2385, "step": 2742 }, { "epoch": 0.6363530912887136, "grad_norm": 17.240692772281445, "learning_rate": 2e-06, "loss": 0.2993, "step": 2743 }, { "epoch": 0.6365850829370142, "grad_norm": 11.042937107655254, "learning_rate": 2e-06, "loss": 0.2914, "step": 2744 }, { "epoch": 0.6368170745853149, "grad_norm": 11.676379857964525, "learning_rate": 2e-06, "loss": 0.2883, "step": 2745 }, { "epoch": 0.6370490662336156, "grad_norm": 14.066457991386905, "learning_rate": 2e-06, "loss": 0.3843, "step": 2746 }, { "epoch": 0.6372810578819162, "grad_norm": 9.493809790836558, "learning_rate": 2e-06, "loss": 0.2383, "step": 2747 }, { "epoch": 0.6375130495302169, "grad_norm": 18.954488884963048, "learning_rate": 2e-06, "loss": 0.3952, "step": 2748 }, { "epoch": 0.6377450411785176, "grad_norm": 10.592996727713146, "learning_rate": 2e-06, "loss": 0.2398, "step": 2749 }, { "epoch": 0.6379770328268183, "grad_norm": 12.26499262139558, "learning_rate": 2e-06, "loss": 0.2301, "step": 2750 }, { "epoch": 0.6382090244751188, "grad_norm": 16.25474051381953, "learning_rate": 2e-06, "loss": 0.27, "step": 2751 }, { "epoch": 0.6384410161234195, "grad_norm": 16.255107607009478, "learning_rate": 2e-06, "loss": 0.218, "step": 2752 }, { "epoch": 0.6386730077717202, "grad_norm": 14.197750750330924, "learning_rate": 2e-06, "loss": 0.3227, "step": 2753 }, { "epoch": 0.6389049994200209, "grad_norm": 10.311882448604072, "learning_rate": 2e-06, "loss": 0.1804, "step": 2754 }, { "epoch": 0.6391369910683216, "grad_norm": 19.917667184339937, "learning_rate": 2e-06, "loss": 0.2387, "step": 2755 }, { "epoch": 0.6393689827166222, "grad_norm": 23.591561126533815, "learning_rate": 2e-06, "loss": 0.2956, "step": 2756 }, { "epoch": 0.6396009743649229, "grad_norm": 6.848883359950225, "learning_rate": 2e-06, "loss": 0.1989, "step": 2757 }, { "epoch": 0.6398329660132235, "grad_norm": 30.575828724428405, "learning_rate": 2e-06, "loss": 0.447, "step": 2758 }, { "epoch": 0.6400649576615242, "grad_norm": 22.80247466822758, "learning_rate": 2e-06, "loss": 0.3561, "step": 2759 }, { "epoch": 0.6402969493098248, "grad_norm": 16.36344492473047, "learning_rate": 2e-06, "loss": 0.231, "step": 2760 }, { "epoch": 0.6405289409581255, "grad_norm": 12.587730209883201, "learning_rate": 2e-06, "loss": 0.3164, "step": 2761 }, { "epoch": 0.6407609326064262, "grad_norm": 10.992985260887487, "learning_rate": 2e-06, "loss": 0.3116, "step": 2762 }, { "epoch": 0.6409929242547269, "grad_norm": 21.5416745374942, "learning_rate": 2e-06, "loss": 0.363, "step": 2763 }, { "epoch": 0.6412249159030274, "grad_norm": 10.748047530819289, "learning_rate": 2e-06, "loss": 0.2674, "step": 2764 }, { "epoch": 0.6414569075513281, "grad_norm": 27.600547677388484, "learning_rate": 2e-06, "loss": 0.4441, "step": 2765 }, { "epoch": 0.6416888991996288, "grad_norm": 6.163204912631635, "learning_rate": 2e-06, "loss": 0.1463, "step": 2766 }, { "epoch": 0.6419208908479295, "grad_norm": 21.152287218285384, "learning_rate": 2e-06, "loss": 0.3676, "step": 2767 }, { "epoch": 0.6421528824962301, "grad_norm": 14.472285140393597, "learning_rate": 2e-06, "loss": 0.2761, "step": 2768 }, { "epoch": 0.6423848741445308, "grad_norm": 18.049548403993875, "learning_rate": 2e-06, "loss": 0.2679, "step": 2769 }, { "epoch": 0.6426168657928315, "grad_norm": 18.11985935626689, "learning_rate": 2e-06, "loss": 0.3918, "step": 2770 }, { "epoch": 0.6428488574411321, "grad_norm": 26.11802174798973, "learning_rate": 2e-06, "loss": 0.3145, "step": 2771 }, { "epoch": 0.6430808490894327, "grad_norm": 26.13363087877274, "learning_rate": 2e-06, "loss": 0.407, "step": 2772 }, { "epoch": 0.6433128407377334, "grad_norm": 12.383254109783655, "learning_rate": 2e-06, "loss": 0.2272, "step": 2773 }, { "epoch": 0.6435448323860341, "grad_norm": 14.884215874916432, "learning_rate": 2e-06, "loss": 0.2583, "step": 2774 }, { "epoch": 0.6437768240343348, "grad_norm": 18.511544627989096, "learning_rate": 2e-06, "loss": 0.2846, "step": 2775 }, { "epoch": 0.6440088156826355, "grad_norm": 8.920952494029857, "learning_rate": 2e-06, "loss": 0.2197, "step": 2776 }, { "epoch": 0.6442408073309361, "grad_norm": 16.965335181403468, "learning_rate": 2e-06, "loss": 0.213, "step": 2777 }, { "epoch": 0.6444727989792367, "grad_norm": 17.37240735011543, "learning_rate": 2e-06, "loss": 0.2977, "step": 2778 }, { "epoch": 0.6447047906275374, "grad_norm": 13.824478272806385, "learning_rate": 2e-06, "loss": 0.2514, "step": 2779 }, { "epoch": 0.6449367822758381, "grad_norm": 12.1812911628817, "learning_rate": 2e-06, "loss": 0.2624, "step": 2780 }, { "epoch": 0.6451687739241387, "grad_norm": 11.874202270954608, "learning_rate": 2e-06, "loss": 0.3416, "step": 2781 }, { "epoch": 0.6454007655724394, "grad_norm": 15.154437415027664, "learning_rate": 2e-06, "loss": 0.192, "step": 2782 }, { "epoch": 0.6456327572207401, "grad_norm": 20.87825469178961, "learning_rate": 2e-06, "loss": 0.3041, "step": 2783 }, { "epoch": 0.6458647488690407, "grad_norm": 19.52264631822083, "learning_rate": 2e-06, "loss": 0.3867, "step": 2784 }, { "epoch": 0.6460967405173413, "grad_norm": 9.487212028916371, "learning_rate": 2e-06, "loss": 0.2096, "step": 2785 }, { "epoch": 0.646328732165642, "grad_norm": 9.733036060848567, "learning_rate": 2e-06, "loss": 0.2419, "step": 2786 }, { "epoch": 0.6465607238139427, "grad_norm": 14.859036415557348, "learning_rate": 2e-06, "loss": 0.2464, "step": 2787 }, { "epoch": 0.6467927154622434, "grad_norm": 8.02322631614885, "learning_rate": 2e-06, "loss": 0.1726, "step": 2788 }, { "epoch": 0.647024707110544, "grad_norm": 16.839870153267437, "learning_rate": 2e-06, "loss": 0.2765, "step": 2789 }, { "epoch": 0.6472566987588447, "grad_norm": 12.64538932340377, "learning_rate": 2e-06, "loss": 0.233, "step": 2790 }, { "epoch": 0.6474886904071453, "grad_norm": 8.404400295247754, "learning_rate": 2e-06, "loss": 0.2357, "step": 2791 }, { "epoch": 0.647720682055446, "grad_norm": 26.048227186813318, "learning_rate": 2e-06, "loss": 0.3968, "step": 2792 }, { "epoch": 0.6479526737037467, "grad_norm": 21.542478177411446, "learning_rate": 2e-06, "loss": 0.3382, "step": 2793 }, { "epoch": 0.6481846653520473, "grad_norm": 11.151029842131523, "learning_rate": 2e-06, "loss": 0.3172, "step": 2794 }, { "epoch": 0.648416657000348, "grad_norm": 11.659646165299003, "learning_rate": 2e-06, "loss": 0.2767, "step": 2795 }, { "epoch": 0.6486486486486487, "grad_norm": 10.997059886039864, "learning_rate": 2e-06, "loss": 0.2589, "step": 2796 }, { "epoch": 0.6488806402969494, "grad_norm": 18.666221337499135, "learning_rate": 2e-06, "loss": 0.387, "step": 2797 }, { "epoch": 0.6491126319452499, "grad_norm": 7.395206819823027, "learning_rate": 2e-06, "loss": 0.2303, "step": 2798 }, { "epoch": 0.6493446235935506, "grad_norm": 11.972980545484056, "learning_rate": 2e-06, "loss": 0.2554, "step": 2799 }, { "epoch": 0.6495766152418513, "grad_norm": 18.15435582309048, "learning_rate": 2e-06, "loss": 0.2819, "step": 2800 }, { "epoch": 0.649808606890152, "grad_norm": 14.607467175131468, "learning_rate": 2e-06, "loss": 0.3821, "step": 2801 }, { "epoch": 0.6500405985384526, "grad_norm": 15.679649143266406, "learning_rate": 2e-06, "loss": 0.2644, "step": 2802 }, { "epoch": 0.6502725901867533, "grad_norm": 14.006099163322526, "learning_rate": 2e-06, "loss": 0.4045, "step": 2803 }, { "epoch": 0.6505045818350539, "grad_norm": 18.225064796926716, "learning_rate": 2e-06, "loss": 0.286, "step": 2804 }, { "epoch": 0.6507365734833546, "grad_norm": 11.06047957203517, "learning_rate": 2e-06, "loss": 0.3596, "step": 2805 }, { "epoch": 0.6509685651316552, "grad_norm": 16.095596929410195, "learning_rate": 2e-06, "loss": 0.255, "step": 2806 }, { "epoch": 0.6512005567799559, "grad_norm": 27.95225192803792, "learning_rate": 2e-06, "loss": 0.2987, "step": 2807 }, { "epoch": 0.6514325484282566, "grad_norm": 19.666700272868894, "learning_rate": 2e-06, "loss": 0.3157, "step": 2808 }, { "epoch": 0.6516645400765573, "grad_norm": 18.21122294525583, "learning_rate": 2e-06, "loss": 0.2825, "step": 2809 }, { "epoch": 0.651896531724858, "grad_norm": 8.459447025413976, "learning_rate": 2e-06, "loss": 0.208, "step": 2810 }, { "epoch": 0.6521285233731585, "grad_norm": 15.6268310899083, "learning_rate": 2e-06, "loss": 0.3198, "step": 2811 }, { "epoch": 0.6523605150214592, "grad_norm": 28.45664865289904, "learning_rate": 2e-06, "loss": 0.4413, "step": 2812 }, { "epoch": 0.6525925066697599, "grad_norm": 9.57936608488626, "learning_rate": 2e-06, "loss": 0.1847, "step": 2813 }, { "epoch": 0.6528244983180606, "grad_norm": 27.51762187086346, "learning_rate": 2e-06, "loss": 0.3609, "step": 2814 }, { "epoch": 0.6530564899663612, "grad_norm": 10.913821665408513, "learning_rate": 2e-06, "loss": 0.3048, "step": 2815 }, { "epoch": 0.6532884816146619, "grad_norm": 12.152772036034538, "learning_rate": 2e-06, "loss": 0.2951, "step": 2816 }, { "epoch": 0.6535204732629626, "grad_norm": 16.723214077755184, "learning_rate": 2e-06, "loss": 0.3622, "step": 2817 }, { "epoch": 0.6537524649112632, "grad_norm": 16.129821665506423, "learning_rate": 2e-06, "loss": 0.3142, "step": 2818 }, { "epoch": 0.6539844565595638, "grad_norm": 20.693612400230375, "learning_rate": 2e-06, "loss": 0.3765, "step": 2819 }, { "epoch": 0.6542164482078645, "grad_norm": 12.490854876318215, "learning_rate": 2e-06, "loss": 0.3044, "step": 2820 }, { "epoch": 0.6544484398561652, "grad_norm": 14.731393132025028, "learning_rate": 2e-06, "loss": 0.3037, "step": 2821 }, { "epoch": 0.6546804315044659, "grad_norm": 18.17261097086871, "learning_rate": 2e-06, "loss": 0.3278, "step": 2822 }, { "epoch": 0.6549124231527665, "grad_norm": 11.349964778383697, "learning_rate": 2e-06, "loss": 0.2254, "step": 2823 }, { "epoch": 0.6551444148010671, "grad_norm": 11.71263807129237, "learning_rate": 2e-06, "loss": 0.2592, "step": 2824 }, { "epoch": 0.6553764064493678, "grad_norm": 18.08318611215625, "learning_rate": 2e-06, "loss": 0.2325, "step": 2825 }, { "epoch": 0.6556083980976685, "grad_norm": 8.138291970407442, "learning_rate": 2e-06, "loss": 0.2515, "step": 2826 }, { "epoch": 0.6558403897459691, "grad_norm": 19.53165707849859, "learning_rate": 2e-06, "loss": 0.4315, "step": 2827 }, { "epoch": 0.6560723813942698, "grad_norm": 12.143351381067472, "learning_rate": 2e-06, "loss": 0.2126, "step": 2828 }, { "epoch": 0.6563043730425705, "grad_norm": 7.720914695591568, "learning_rate": 2e-06, "loss": 0.2538, "step": 2829 }, { "epoch": 0.6565363646908712, "grad_norm": 21.317140640865798, "learning_rate": 2e-06, "loss": 0.2426, "step": 2830 }, { "epoch": 0.6567683563391717, "grad_norm": 10.113067962324731, "learning_rate": 2e-06, "loss": 0.2468, "step": 2831 }, { "epoch": 0.6570003479874724, "grad_norm": 14.102113177194823, "learning_rate": 2e-06, "loss": 0.3143, "step": 2832 }, { "epoch": 0.6572323396357731, "grad_norm": 13.987038194596758, "learning_rate": 2e-06, "loss": 0.2907, "step": 2833 }, { "epoch": 0.6574643312840738, "grad_norm": 6.112736996816027, "learning_rate": 2e-06, "loss": 0.2774, "step": 2834 }, { "epoch": 0.6576963229323745, "grad_norm": 10.449699832658547, "learning_rate": 2e-06, "loss": 0.2477, "step": 2835 }, { "epoch": 0.6579283145806751, "grad_norm": 11.472365495587425, "learning_rate": 2e-06, "loss": 0.1979, "step": 2836 }, { "epoch": 0.6581603062289758, "grad_norm": 19.22641768300949, "learning_rate": 2e-06, "loss": 0.4411, "step": 2837 }, { "epoch": 0.6583922978772764, "grad_norm": 14.055238753422904, "learning_rate": 2e-06, "loss": 0.2558, "step": 2838 }, { "epoch": 0.6586242895255771, "grad_norm": 10.984608588095922, "learning_rate": 2e-06, "loss": 0.1894, "step": 2839 }, { "epoch": 0.6588562811738777, "grad_norm": 10.434615531979285, "learning_rate": 2e-06, "loss": 0.3205, "step": 2840 }, { "epoch": 0.6590882728221784, "grad_norm": 14.644889870932198, "learning_rate": 2e-06, "loss": 0.2957, "step": 2841 }, { "epoch": 0.6593202644704791, "grad_norm": 14.774480239943642, "learning_rate": 2e-06, "loss": 0.302, "step": 2842 }, { "epoch": 0.6595522561187798, "grad_norm": 14.71393716092559, "learning_rate": 2e-06, "loss": 0.3118, "step": 2843 }, { "epoch": 0.6597842477670803, "grad_norm": 11.242429895186634, "learning_rate": 2e-06, "loss": 0.2235, "step": 2844 }, { "epoch": 0.660016239415381, "grad_norm": 17.995923113107896, "learning_rate": 2e-06, "loss": 0.2863, "step": 2845 }, { "epoch": 0.6602482310636817, "grad_norm": 17.729162434604135, "learning_rate": 2e-06, "loss": 0.3424, "step": 2846 }, { "epoch": 0.6604802227119824, "grad_norm": 20.243422288385347, "learning_rate": 2e-06, "loss": 0.3789, "step": 2847 }, { "epoch": 0.660712214360283, "grad_norm": 18.64732313406746, "learning_rate": 2e-06, "loss": 0.387, "step": 2848 }, { "epoch": 0.6609442060085837, "grad_norm": 20.458465132834842, "learning_rate": 2e-06, "loss": 0.3071, "step": 2849 }, { "epoch": 0.6611761976568844, "grad_norm": 11.585734721403155, "learning_rate": 2e-06, "loss": 0.2617, "step": 2850 }, { "epoch": 0.661408189305185, "grad_norm": 13.461835104531968, "learning_rate": 2e-06, "loss": 0.335, "step": 2851 }, { "epoch": 0.6616401809534856, "grad_norm": 15.673380657407227, "learning_rate": 2e-06, "loss": 0.2673, "step": 2852 }, { "epoch": 0.6618721726017863, "grad_norm": 9.921530213879748, "learning_rate": 2e-06, "loss": 0.2912, "step": 2853 }, { "epoch": 0.662104164250087, "grad_norm": 14.238714967293898, "learning_rate": 2e-06, "loss": 0.3132, "step": 2854 }, { "epoch": 0.6623361558983877, "grad_norm": 10.974948622348556, "learning_rate": 2e-06, "loss": 0.3454, "step": 2855 }, { "epoch": 0.6625681475466884, "grad_norm": 14.885775876370817, "learning_rate": 2e-06, "loss": 0.2967, "step": 2856 }, { "epoch": 0.662800139194989, "grad_norm": 10.72842736132594, "learning_rate": 2e-06, "loss": 0.2827, "step": 2857 }, { "epoch": 0.6630321308432896, "grad_norm": 14.921435245690917, "learning_rate": 2e-06, "loss": 0.2179, "step": 2858 }, { "epoch": 0.6632641224915903, "grad_norm": 7.573869874381404, "learning_rate": 2e-06, "loss": 0.2186, "step": 2859 }, { "epoch": 0.663496114139891, "grad_norm": 20.056549707412877, "learning_rate": 2e-06, "loss": 0.2833, "step": 2860 }, { "epoch": 0.6637281057881916, "grad_norm": 14.888681037702106, "learning_rate": 2e-06, "loss": 0.2875, "step": 2861 }, { "epoch": 0.6639600974364923, "grad_norm": 17.421706372364103, "learning_rate": 2e-06, "loss": 0.4869, "step": 2862 }, { "epoch": 0.664192089084793, "grad_norm": 13.282211399520385, "learning_rate": 2e-06, "loss": 0.3432, "step": 2863 }, { "epoch": 0.6644240807330936, "grad_norm": 24.679787184942278, "learning_rate": 2e-06, "loss": 0.3197, "step": 2864 }, { "epoch": 0.6646560723813942, "grad_norm": 19.27889281747182, "learning_rate": 2e-06, "loss": 0.3208, "step": 2865 }, { "epoch": 0.6648880640296949, "grad_norm": 13.070453246451402, "learning_rate": 2e-06, "loss": 0.2411, "step": 2866 }, { "epoch": 0.6651200556779956, "grad_norm": 25.468659736645378, "learning_rate": 2e-06, "loss": 0.2217, "step": 2867 }, { "epoch": 0.6653520473262963, "grad_norm": 13.516564164744764, "learning_rate": 2e-06, "loss": 0.2786, "step": 2868 }, { "epoch": 0.665584038974597, "grad_norm": 46.16793049596266, "learning_rate": 2e-06, "loss": 0.2839, "step": 2869 }, { "epoch": 0.6658160306228976, "grad_norm": 15.8472120552767, "learning_rate": 2e-06, "loss": 0.2784, "step": 2870 }, { "epoch": 0.6660480222711982, "grad_norm": 9.499928057186223, "learning_rate": 2e-06, "loss": 0.2202, "step": 2871 }, { "epoch": 0.6662800139194989, "grad_norm": 15.961784571679711, "learning_rate": 2e-06, "loss": 0.3505, "step": 2872 }, { "epoch": 0.6665120055677995, "grad_norm": 14.863888557389888, "learning_rate": 2e-06, "loss": 0.2517, "step": 2873 }, { "epoch": 0.6667439972161002, "grad_norm": 12.114822736748357, "learning_rate": 2e-06, "loss": 0.266, "step": 2874 }, { "epoch": 0.6669759888644009, "grad_norm": 18.791475979494606, "learning_rate": 2e-06, "loss": 0.2813, "step": 2875 }, { "epoch": 0.6672079805127016, "grad_norm": 13.241630128223022, "learning_rate": 2e-06, "loss": 0.1811, "step": 2876 }, { "epoch": 0.6674399721610021, "grad_norm": 12.684844598060202, "learning_rate": 2e-06, "loss": 0.358, "step": 2877 }, { "epoch": 0.6676719638093028, "grad_norm": 27.649226549284123, "learning_rate": 2e-06, "loss": 0.3875, "step": 2878 }, { "epoch": 0.6679039554576035, "grad_norm": 11.450368103403596, "learning_rate": 2e-06, "loss": 0.2175, "step": 2879 }, { "epoch": 0.6681359471059042, "grad_norm": 15.313099377924349, "learning_rate": 2e-06, "loss": 0.3333, "step": 2880 }, { "epoch": 0.6683679387542049, "grad_norm": 17.75810877085882, "learning_rate": 2e-06, "loss": 0.2723, "step": 2881 }, { "epoch": 0.6685999304025055, "grad_norm": 28.161703152994132, "learning_rate": 2e-06, "loss": 0.3317, "step": 2882 }, { "epoch": 0.6688319220508062, "grad_norm": 9.307605731845863, "learning_rate": 2e-06, "loss": 0.2734, "step": 2883 }, { "epoch": 0.6690639136991068, "grad_norm": 18.674716530336838, "learning_rate": 2e-06, "loss": 0.3547, "step": 2884 }, { "epoch": 0.6692959053474075, "grad_norm": 17.731430790847497, "learning_rate": 2e-06, "loss": 0.2739, "step": 2885 }, { "epoch": 0.6695278969957081, "grad_norm": 15.38332735147208, "learning_rate": 2e-06, "loss": 0.3064, "step": 2886 }, { "epoch": 0.6697598886440088, "grad_norm": 14.647155898537573, "learning_rate": 2e-06, "loss": 0.1938, "step": 2887 }, { "epoch": 0.6699918802923095, "grad_norm": 21.958953694103528, "learning_rate": 2e-06, "loss": 0.2576, "step": 2888 }, { "epoch": 0.6702238719406102, "grad_norm": 19.13698687348818, "learning_rate": 2e-06, "loss": 0.2704, "step": 2889 }, { "epoch": 0.6704558635889108, "grad_norm": 16.02891183410766, "learning_rate": 2e-06, "loss": 0.3191, "step": 2890 }, { "epoch": 0.6706878552372114, "grad_norm": 11.89398814623729, "learning_rate": 2e-06, "loss": 0.2183, "step": 2891 }, { "epoch": 0.6709198468855121, "grad_norm": 27.27437455145165, "learning_rate": 2e-06, "loss": 0.3317, "step": 2892 }, { "epoch": 0.6711518385338128, "grad_norm": 17.494392318806085, "learning_rate": 2e-06, "loss": 0.2895, "step": 2893 }, { "epoch": 0.6713838301821135, "grad_norm": 8.532745967737572, "learning_rate": 2e-06, "loss": 0.3024, "step": 2894 }, { "epoch": 0.6716158218304141, "grad_norm": 15.784287286887945, "learning_rate": 2e-06, "loss": 0.3662, "step": 2895 }, { "epoch": 0.6718478134787148, "grad_norm": 19.885911995754892, "learning_rate": 2e-06, "loss": 0.2912, "step": 2896 }, { "epoch": 0.6720798051270154, "grad_norm": 11.502699311807573, "learning_rate": 2e-06, "loss": 0.3606, "step": 2897 }, { "epoch": 0.672311796775316, "grad_norm": 18.332904766435426, "learning_rate": 2e-06, "loss": 0.2782, "step": 2898 }, { "epoch": 0.6725437884236167, "grad_norm": 11.192278874423675, "learning_rate": 2e-06, "loss": 0.2895, "step": 2899 }, { "epoch": 0.6727757800719174, "grad_norm": 10.25836407488176, "learning_rate": 2e-06, "loss": 0.2621, "step": 2900 }, { "epoch": 0.6730077717202181, "grad_norm": 15.680650605945743, "learning_rate": 2e-06, "loss": 0.3704, "step": 2901 }, { "epoch": 0.6732397633685188, "grad_norm": 24.10371429779154, "learning_rate": 2e-06, "loss": 0.3392, "step": 2902 }, { "epoch": 0.6734717550168194, "grad_norm": 8.479237937207667, "learning_rate": 2e-06, "loss": 0.198, "step": 2903 }, { "epoch": 0.67370374666512, "grad_norm": 14.174717355055936, "learning_rate": 2e-06, "loss": 0.3281, "step": 2904 }, { "epoch": 0.6739357383134207, "grad_norm": 14.082257018873104, "learning_rate": 2e-06, "loss": 0.2516, "step": 2905 }, { "epoch": 0.6741677299617214, "grad_norm": 13.382312420174081, "learning_rate": 2e-06, "loss": 0.295, "step": 2906 }, { "epoch": 0.674399721610022, "grad_norm": 24.6062414245324, "learning_rate": 2e-06, "loss": 0.3857, "step": 2907 }, { "epoch": 0.6746317132583227, "grad_norm": 27.594212344103287, "learning_rate": 2e-06, "loss": 0.3976, "step": 2908 }, { "epoch": 0.6748637049066234, "grad_norm": 22.15561527386861, "learning_rate": 2e-06, "loss": 0.4427, "step": 2909 }, { "epoch": 0.6750956965549241, "grad_norm": 9.280812151914011, "learning_rate": 2e-06, "loss": 0.2409, "step": 2910 }, { "epoch": 0.6753276882032246, "grad_norm": 14.739297859742182, "learning_rate": 2e-06, "loss": 0.2851, "step": 2911 }, { "epoch": 0.6755596798515253, "grad_norm": 17.43973773723807, "learning_rate": 2e-06, "loss": 0.2358, "step": 2912 }, { "epoch": 0.675791671499826, "grad_norm": 21.123065539604433, "learning_rate": 2e-06, "loss": 0.2731, "step": 2913 }, { "epoch": 0.6760236631481267, "grad_norm": 18.18026849345217, "learning_rate": 2e-06, "loss": 0.2914, "step": 2914 }, { "epoch": 0.6762556547964274, "grad_norm": 22.75544500204419, "learning_rate": 2e-06, "loss": 0.3933, "step": 2915 }, { "epoch": 0.676487646444728, "grad_norm": 19.37742185783371, "learning_rate": 2e-06, "loss": 0.2525, "step": 2916 }, { "epoch": 0.6767196380930286, "grad_norm": 16.69819067505101, "learning_rate": 2e-06, "loss": 0.2691, "step": 2917 }, { "epoch": 0.6769516297413293, "grad_norm": 16.50659193036346, "learning_rate": 2e-06, "loss": 0.2146, "step": 2918 }, { "epoch": 0.67718362138963, "grad_norm": 7.484957257065774, "learning_rate": 2e-06, "loss": 0.2779, "step": 2919 }, { "epoch": 0.6774156130379306, "grad_norm": 15.034466055515287, "learning_rate": 2e-06, "loss": 0.2166, "step": 2920 }, { "epoch": 0.6776476046862313, "grad_norm": 16.417816190692054, "learning_rate": 2e-06, "loss": 0.359, "step": 2921 }, { "epoch": 0.677879596334532, "grad_norm": 22.66480068571077, "learning_rate": 2e-06, "loss": 0.2557, "step": 2922 }, { "epoch": 0.6781115879828327, "grad_norm": 9.93097539810531, "learning_rate": 2e-06, "loss": 0.2954, "step": 2923 }, { "epoch": 0.6783435796311332, "grad_norm": 15.193296735844845, "learning_rate": 2e-06, "loss": 0.2506, "step": 2924 }, { "epoch": 0.6785755712794339, "grad_norm": 17.52679971654726, "learning_rate": 2e-06, "loss": 0.3571, "step": 2925 }, { "epoch": 0.6788075629277346, "grad_norm": 7.445586192344902, "learning_rate": 2e-06, "loss": 0.1966, "step": 2926 }, { "epoch": 0.6790395545760353, "grad_norm": 8.416210239673962, "learning_rate": 2e-06, "loss": 0.2133, "step": 2927 }, { "epoch": 0.6792715462243359, "grad_norm": 14.313635294079827, "learning_rate": 2e-06, "loss": 0.3515, "step": 2928 }, { "epoch": 0.6795035378726366, "grad_norm": 24.603948921934705, "learning_rate": 2e-06, "loss": 0.3901, "step": 2929 }, { "epoch": 0.6797355295209373, "grad_norm": 15.833755249534436, "learning_rate": 2e-06, "loss": 0.3374, "step": 2930 }, { "epoch": 0.6799675211692379, "grad_norm": 16.04070634218248, "learning_rate": 2e-06, "loss": 0.3736, "step": 2931 }, { "epoch": 0.6801995128175385, "grad_norm": 20.682720901172534, "learning_rate": 2e-06, "loss": 0.3394, "step": 2932 }, { "epoch": 0.6804315044658392, "grad_norm": 21.697405182973313, "learning_rate": 2e-06, "loss": 0.3276, "step": 2933 }, { "epoch": 0.6806634961141399, "grad_norm": 18.865819341538803, "learning_rate": 2e-06, "loss": 0.3264, "step": 2934 }, { "epoch": 0.6808954877624406, "grad_norm": 10.423027096808157, "learning_rate": 2e-06, "loss": 0.3037, "step": 2935 }, { "epoch": 0.6811274794107413, "grad_norm": 21.677979146013957, "learning_rate": 2e-06, "loss": 0.367, "step": 2936 }, { "epoch": 0.6813594710590418, "grad_norm": 26.4302768785951, "learning_rate": 2e-06, "loss": 0.3841, "step": 2937 }, { "epoch": 0.6815914627073425, "grad_norm": 10.069866494626753, "learning_rate": 2e-06, "loss": 0.2875, "step": 2938 }, { "epoch": 0.6818234543556432, "grad_norm": 9.805489612875316, "learning_rate": 2e-06, "loss": 0.1803, "step": 2939 }, { "epoch": 0.6820554460039439, "grad_norm": 13.082756265772101, "learning_rate": 2e-06, "loss": 0.2856, "step": 2940 }, { "epoch": 0.6822874376522445, "grad_norm": 21.214294775844788, "learning_rate": 2e-06, "loss": 0.3278, "step": 2941 }, { "epoch": 0.6825194293005452, "grad_norm": 18.98402642369575, "learning_rate": 2e-06, "loss": 0.3717, "step": 2942 }, { "epoch": 0.6827514209488459, "grad_norm": 18.28594400987843, "learning_rate": 2e-06, "loss": 0.2966, "step": 2943 }, { "epoch": 0.6829834125971465, "grad_norm": 15.955264733597344, "learning_rate": 2e-06, "loss": 0.2649, "step": 2944 }, { "epoch": 0.6832154042454471, "grad_norm": 12.143853436389726, "learning_rate": 2e-06, "loss": 0.3043, "step": 2945 }, { "epoch": 0.6834473958937478, "grad_norm": 33.10313809595552, "learning_rate": 2e-06, "loss": 0.4111, "step": 2946 }, { "epoch": 0.6836793875420485, "grad_norm": 15.49237897542982, "learning_rate": 2e-06, "loss": 0.3257, "step": 2947 }, { "epoch": 0.6839113791903492, "grad_norm": 15.234371131142845, "learning_rate": 2e-06, "loss": 0.2847, "step": 2948 }, { "epoch": 0.6841433708386498, "grad_norm": 14.394745150033899, "learning_rate": 2e-06, "loss": 0.2325, "step": 2949 }, { "epoch": 0.6843753624869505, "grad_norm": 7.697439348929282, "learning_rate": 2e-06, "loss": 0.2083, "step": 2950 }, { "epoch": 0.6846073541352511, "grad_norm": 8.347270088398039, "learning_rate": 2e-06, "loss": 0.1849, "step": 2951 }, { "epoch": 0.6848393457835518, "grad_norm": 8.528135185478353, "learning_rate": 2e-06, "loss": 0.2895, "step": 2952 }, { "epoch": 0.6850713374318524, "grad_norm": 11.440085523850653, "learning_rate": 2e-06, "loss": 0.3013, "step": 2953 }, { "epoch": 0.6853033290801531, "grad_norm": 14.240300923564252, "learning_rate": 2e-06, "loss": 0.2868, "step": 2954 }, { "epoch": 0.6855353207284538, "grad_norm": 15.298243190826206, "learning_rate": 2e-06, "loss": 0.2526, "step": 2955 }, { "epoch": 0.6857673123767545, "grad_norm": 16.287029088480868, "learning_rate": 2e-06, "loss": 0.2477, "step": 2956 }, { "epoch": 0.685999304025055, "grad_norm": 16.330072651646685, "learning_rate": 2e-06, "loss": 0.2338, "step": 2957 }, { "epoch": 0.6862312956733557, "grad_norm": 10.144289299404209, "learning_rate": 2e-06, "loss": 0.2366, "step": 2958 }, { "epoch": 0.6864632873216564, "grad_norm": 15.970485036072468, "learning_rate": 2e-06, "loss": 0.2597, "step": 2959 }, { "epoch": 0.6866952789699571, "grad_norm": 20.059300974805478, "learning_rate": 2e-06, "loss": 0.2755, "step": 2960 }, { "epoch": 0.6869272706182578, "grad_norm": 13.123594011226873, "learning_rate": 2e-06, "loss": 0.2808, "step": 2961 }, { "epoch": 0.6871592622665584, "grad_norm": 11.967794532068869, "learning_rate": 2e-06, "loss": 0.2214, "step": 2962 }, { "epoch": 0.6873912539148591, "grad_norm": 13.279811333305938, "learning_rate": 2e-06, "loss": 0.2894, "step": 2963 }, { "epoch": 0.6876232455631597, "grad_norm": 15.978601053972245, "learning_rate": 2e-06, "loss": 0.3233, "step": 2964 }, { "epoch": 0.6878552372114604, "grad_norm": 19.482813281061436, "learning_rate": 2e-06, "loss": 0.3722, "step": 2965 }, { "epoch": 0.688087228859761, "grad_norm": 16.022926928632646, "learning_rate": 2e-06, "loss": 0.4637, "step": 2966 }, { "epoch": 0.6883192205080617, "grad_norm": 25.90689576178664, "learning_rate": 2e-06, "loss": 0.3036, "step": 2967 }, { "epoch": 0.6885512121563624, "grad_norm": 15.81243335582368, "learning_rate": 2e-06, "loss": 0.257, "step": 2968 }, { "epoch": 0.6887832038046631, "grad_norm": 18.661608358707515, "learning_rate": 2e-06, "loss": 0.3841, "step": 2969 }, { "epoch": 0.6890151954529637, "grad_norm": 20.789753229496526, "learning_rate": 2e-06, "loss": 0.3484, "step": 2970 }, { "epoch": 0.6892471871012643, "grad_norm": 11.855384758128736, "learning_rate": 2e-06, "loss": 0.2881, "step": 2971 }, { "epoch": 0.689479178749565, "grad_norm": 17.159193927611167, "learning_rate": 2e-06, "loss": 0.2717, "step": 2972 }, { "epoch": 0.6897111703978657, "grad_norm": 13.992237879814896, "learning_rate": 2e-06, "loss": 0.4286, "step": 2973 }, { "epoch": 0.6899431620461663, "grad_norm": 24.35252878929027, "learning_rate": 2e-06, "loss": 0.3378, "step": 2974 }, { "epoch": 0.690175153694467, "grad_norm": 16.71016085700087, "learning_rate": 2e-06, "loss": 0.2689, "step": 2975 }, { "epoch": 0.6904071453427677, "grad_norm": 9.784394283410494, "learning_rate": 2e-06, "loss": 0.2627, "step": 2976 }, { "epoch": 0.6906391369910683, "grad_norm": 8.624407375319688, "learning_rate": 2e-06, "loss": 0.2334, "step": 2977 }, { "epoch": 0.690871128639369, "grad_norm": 6.348981967459156, "learning_rate": 2e-06, "loss": 0.2202, "step": 2978 }, { "epoch": 0.6911031202876696, "grad_norm": 19.035352735373152, "learning_rate": 2e-06, "loss": 0.3631, "step": 2979 }, { "epoch": 0.6913351119359703, "grad_norm": 7.198738788917573, "learning_rate": 2e-06, "loss": 0.2179, "step": 2980 }, { "epoch": 0.691567103584271, "grad_norm": 18.931518041058506, "learning_rate": 2e-06, "loss": 0.3189, "step": 2981 }, { "epoch": 0.6917990952325717, "grad_norm": 23.18489676907909, "learning_rate": 2e-06, "loss": 0.2713, "step": 2982 }, { "epoch": 0.6920310868808723, "grad_norm": 15.316381457085425, "learning_rate": 2e-06, "loss": 0.3127, "step": 2983 }, { "epoch": 0.6922630785291729, "grad_norm": 11.503904532399071, "learning_rate": 2e-06, "loss": 0.2112, "step": 2984 }, { "epoch": 0.6924950701774736, "grad_norm": 23.105103132210573, "learning_rate": 2e-06, "loss": 0.35, "step": 2985 }, { "epoch": 0.6927270618257743, "grad_norm": 8.934461565490222, "learning_rate": 2e-06, "loss": 0.1694, "step": 2986 }, { "epoch": 0.6929590534740749, "grad_norm": 15.460634718345247, "learning_rate": 2e-06, "loss": 0.327, "step": 2987 }, { "epoch": 0.6931910451223756, "grad_norm": 10.533426557156625, "learning_rate": 2e-06, "loss": 0.2937, "step": 2988 }, { "epoch": 0.6934230367706763, "grad_norm": 12.54384265691961, "learning_rate": 2e-06, "loss": 0.2716, "step": 2989 }, { "epoch": 0.693655028418977, "grad_norm": 17.7973153093463, "learning_rate": 2e-06, "loss": 0.2359, "step": 2990 }, { "epoch": 0.6938870200672775, "grad_norm": 7.855860871585307, "learning_rate": 2e-06, "loss": 0.214, "step": 2991 }, { "epoch": 0.6941190117155782, "grad_norm": 23.44117120120443, "learning_rate": 2e-06, "loss": 0.3826, "step": 2992 }, { "epoch": 0.6943510033638789, "grad_norm": 21.587237505595454, "learning_rate": 2e-06, "loss": 0.3726, "step": 2993 }, { "epoch": 0.6945829950121796, "grad_norm": 8.966649034172018, "learning_rate": 2e-06, "loss": 0.3061, "step": 2994 }, { "epoch": 0.6948149866604802, "grad_norm": 10.394999748177568, "learning_rate": 2e-06, "loss": 0.2836, "step": 2995 }, { "epoch": 0.6950469783087809, "grad_norm": 16.36252544489369, "learning_rate": 2e-06, "loss": 0.2719, "step": 2996 }, { "epoch": 0.6952789699570815, "grad_norm": 15.73753046229693, "learning_rate": 2e-06, "loss": 0.2106, "step": 2997 }, { "epoch": 0.6955109616053822, "grad_norm": 10.482261401826955, "learning_rate": 2e-06, "loss": 0.2225, "step": 2998 }, { "epoch": 0.6957429532536828, "grad_norm": 16.162687708016666, "learning_rate": 2e-06, "loss": 0.3063, "step": 2999 }, { "epoch": 0.6959749449019835, "grad_norm": 10.163603053519628, "learning_rate": 2e-06, "loss": 0.2532, "step": 3000 }, { "epoch": 0.6962069365502842, "grad_norm": 13.518454150870886, "learning_rate": 2e-06, "loss": 0.3016, "step": 3001 }, { "epoch": 0.6964389281985849, "grad_norm": 18.165892745018137, "learning_rate": 2e-06, "loss": 0.3698, "step": 3002 }, { "epoch": 0.6966709198468856, "grad_norm": 17.90105884966254, "learning_rate": 2e-06, "loss": 0.3063, "step": 3003 }, { "epoch": 0.6969029114951861, "grad_norm": 12.788468740614176, "learning_rate": 2e-06, "loss": 0.3079, "step": 3004 }, { "epoch": 0.6971349031434868, "grad_norm": 15.957661767789833, "learning_rate": 2e-06, "loss": 0.2346, "step": 3005 }, { "epoch": 0.6973668947917875, "grad_norm": 18.210358640908733, "learning_rate": 2e-06, "loss": 0.2909, "step": 3006 }, { "epoch": 0.6975988864400882, "grad_norm": 13.050628004493788, "learning_rate": 2e-06, "loss": 0.3312, "step": 3007 }, { "epoch": 0.6978308780883888, "grad_norm": 13.177363294059413, "learning_rate": 2e-06, "loss": 0.2819, "step": 3008 }, { "epoch": 0.6980628697366895, "grad_norm": 8.645162144450342, "learning_rate": 2e-06, "loss": 0.2241, "step": 3009 }, { "epoch": 0.6982948613849902, "grad_norm": 12.456599894548935, "learning_rate": 2e-06, "loss": 0.2404, "step": 3010 }, { "epoch": 0.6985268530332908, "grad_norm": 16.152022904207286, "learning_rate": 2e-06, "loss": 0.2267, "step": 3011 }, { "epoch": 0.6987588446815914, "grad_norm": 15.475853041190083, "learning_rate": 2e-06, "loss": 0.2555, "step": 3012 }, { "epoch": 0.6989908363298921, "grad_norm": 17.594340371945172, "learning_rate": 2e-06, "loss": 0.3743, "step": 3013 }, { "epoch": 0.6992228279781928, "grad_norm": 13.891850067454452, "learning_rate": 2e-06, "loss": 0.333, "step": 3014 }, { "epoch": 0.6994548196264935, "grad_norm": 9.746005875818053, "learning_rate": 2e-06, "loss": 0.2134, "step": 3015 }, { "epoch": 0.6996868112747942, "grad_norm": 13.990318327075137, "learning_rate": 2e-06, "loss": 0.3343, "step": 3016 }, { "epoch": 0.6999188029230947, "grad_norm": 23.025914502524685, "learning_rate": 2e-06, "loss": 0.4144, "step": 3017 }, { "epoch": 0.7001507945713954, "grad_norm": 22.778332248585592, "learning_rate": 2e-06, "loss": 0.1918, "step": 3018 }, { "epoch": 0.7003827862196961, "grad_norm": 15.430801616526118, "learning_rate": 2e-06, "loss": 0.3679, "step": 3019 }, { "epoch": 0.7006147778679968, "grad_norm": 16.207361770057013, "learning_rate": 2e-06, "loss": 0.2704, "step": 3020 }, { "epoch": 0.7008467695162974, "grad_norm": 16.574744397317918, "learning_rate": 2e-06, "loss": 0.3118, "step": 3021 }, { "epoch": 0.7010787611645981, "grad_norm": 9.771455601212924, "learning_rate": 2e-06, "loss": 0.2042, "step": 3022 }, { "epoch": 0.7013107528128988, "grad_norm": 16.352935531849965, "learning_rate": 2e-06, "loss": 0.3103, "step": 3023 }, { "epoch": 0.7015427444611994, "grad_norm": 16.012828911522448, "learning_rate": 2e-06, "loss": 0.3218, "step": 3024 }, { "epoch": 0.7017747361095, "grad_norm": 31.336384254208152, "learning_rate": 2e-06, "loss": 0.3786, "step": 3025 }, { "epoch": 0.7020067277578007, "grad_norm": 11.076684492126956, "learning_rate": 2e-06, "loss": 0.2038, "step": 3026 }, { "epoch": 0.7022387194061014, "grad_norm": 19.254274788008026, "learning_rate": 2e-06, "loss": 0.3597, "step": 3027 }, { "epoch": 0.7024707110544021, "grad_norm": 22.845025573712128, "learning_rate": 2e-06, "loss": 0.3286, "step": 3028 }, { "epoch": 0.7027027027027027, "grad_norm": 10.265590830213393, "learning_rate": 2e-06, "loss": 0.2768, "step": 3029 }, { "epoch": 0.7029346943510033, "grad_norm": 13.635127225963343, "learning_rate": 2e-06, "loss": 0.2435, "step": 3030 }, { "epoch": 0.703166685999304, "grad_norm": 21.43039852701912, "learning_rate": 2e-06, "loss": 0.3317, "step": 3031 }, { "epoch": 0.7033986776476047, "grad_norm": 20.249225557418523, "learning_rate": 2e-06, "loss": 0.3132, "step": 3032 }, { "epoch": 0.7036306692959053, "grad_norm": 10.437849106321048, "learning_rate": 2e-06, "loss": 0.2831, "step": 3033 }, { "epoch": 0.703862660944206, "grad_norm": 14.921942445808748, "learning_rate": 2e-06, "loss": 0.3055, "step": 3034 }, { "epoch": 0.7040946525925067, "grad_norm": 13.427974898300857, "learning_rate": 2e-06, "loss": 0.2818, "step": 3035 }, { "epoch": 0.7043266442408074, "grad_norm": 18.31794009982437, "learning_rate": 2e-06, "loss": 0.3405, "step": 3036 }, { "epoch": 0.7045586358891079, "grad_norm": 31.05526528803719, "learning_rate": 2e-06, "loss": 0.5122, "step": 3037 }, { "epoch": 0.7047906275374086, "grad_norm": 12.254876345659827, "learning_rate": 2e-06, "loss": 0.3137, "step": 3038 }, { "epoch": 0.7050226191857093, "grad_norm": 15.370854458054378, "learning_rate": 2e-06, "loss": 0.3237, "step": 3039 }, { "epoch": 0.70525461083401, "grad_norm": 18.637823728977246, "learning_rate": 2e-06, "loss": 0.2775, "step": 3040 }, { "epoch": 0.7054866024823107, "grad_norm": 12.873597948913517, "learning_rate": 2e-06, "loss": 0.3299, "step": 3041 }, { "epoch": 0.7057185941306113, "grad_norm": 17.18921170254753, "learning_rate": 2e-06, "loss": 0.3152, "step": 3042 }, { "epoch": 0.705950585778912, "grad_norm": 15.409492736211421, "learning_rate": 2e-06, "loss": 0.2607, "step": 3043 }, { "epoch": 0.7061825774272126, "grad_norm": 9.975829678915247, "learning_rate": 2e-06, "loss": 0.3616, "step": 3044 }, { "epoch": 0.7064145690755133, "grad_norm": 15.207969039612548, "learning_rate": 2e-06, "loss": 0.3872, "step": 3045 }, { "epoch": 0.7066465607238139, "grad_norm": 12.70874179865131, "learning_rate": 2e-06, "loss": 0.2803, "step": 3046 }, { "epoch": 0.7068785523721146, "grad_norm": 10.777788004616392, "learning_rate": 2e-06, "loss": 0.3013, "step": 3047 }, { "epoch": 0.7071105440204153, "grad_norm": 9.71067971397703, "learning_rate": 2e-06, "loss": 0.231, "step": 3048 }, { "epoch": 0.707342535668716, "grad_norm": 16.182320295977373, "learning_rate": 2e-06, "loss": 0.2541, "step": 3049 }, { "epoch": 0.7075745273170165, "grad_norm": 14.24194784140107, "learning_rate": 2e-06, "loss": 0.3133, "step": 3050 }, { "epoch": 0.7078065189653172, "grad_norm": 8.116297025441758, "learning_rate": 2e-06, "loss": 0.2765, "step": 3051 }, { "epoch": 0.7080385106136179, "grad_norm": 15.408449590028416, "learning_rate": 2e-06, "loss": 0.3464, "step": 3052 }, { "epoch": 0.7082705022619186, "grad_norm": 10.126091809211045, "learning_rate": 2e-06, "loss": 0.2563, "step": 3053 }, { "epoch": 0.7085024939102192, "grad_norm": 15.76931845699468, "learning_rate": 2e-06, "loss": 0.3641, "step": 3054 }, { "epoch": 0.7087344855585199, "grad_norm": 14.3181566502542, "learning_rate": 2e-06, "loss": 0.2404, "step": 3055 }, { "epoch": 0.7089664772068206, "grad_norm": 11.518059207296266, "learning_rate": 2e-06, "loss": 0.3092, "step": 3056 }, { "epoch": 0.7091984688551212, "grad_norm": 11.075548644625707, "learning_rate": 2e-06, "loss": 0.2368, "step": 3057 }, { "epoch": 0.7094304605034218, "grad_norm": 8.931630979263996, "learning_rate": 2e-06, "loss": 0.2094, "step": 3058 }, { "epoch": 0.7096624521517225, "grad_norm": 10.015903169009626, "learning_rate": 2e-06, "loss": 0.3026, "step": 3059 }, { "epoch": 0.7098944438000232, "grad_norm": 9.001640545817724, "learning_rate": 2e-06, "loss": 0.2358, "step": 3060 }, { "epoch": 0.7101264354483239, "grad_norm": 9.884731171167504, "learning_rate": 2e-06, "loss": 0.2066, "step": 3061 }, { "epoch": 0.7103584270966246, "grad_norm": 8.79796649865471, "learning_rate": 2e-06, "loss": 0.303, "step": 3062 }, { "epoch": 0.7105904187449252, "grad_norm": 12.035283919663504, "learning_rate": 2e-06, "loss": 0.3186, "step": 3063 }, { "epoch": 0.7108224103932258, "grad_norm": 5.610850717951971, "learning_rate": 2e-06, "loss": 0.1643, "step": 3064 }, { "epoch": 0.7110544020415265, "grad_norm": 20.315813498268504, "learning_rate": 2e-06, "loss": 0.2786, "step": 3065 }, { "epoch": 0.7112863936898272, "grad_norm": 15.024208154539762, "learning_rate": 2e-06, "loss": 0.358, "step": 3066 }, { "epoch": 0.7115183853381278, "grad_norm": 18.763448531490877, "learning_rate": 2e-06, "loss": 0.3619, "step": 3067 }, { "epoch": 0.7117503769864285, "grad_norm": 8.850772694364398, "learning_rate": 2e-06, "loss": 0.3079, "step": 3068 }, { "epoch": 0.7119823686347292, "grad_norm": 12.558505567041202, "learning_rate": 2e-06, "loss": 0.2539, "step": 3069 }, { "epoch": 0.7122143602830298, "grad_norm": 9.413887213263452, "learning_rate": 2e-06, "loss": 0.2578, "step": 3070 }, { "epoch": 0.7124463519313304, "grad_norm": 8.268247015403908, "learning_rate": 2e-06, "loss": 0.3173, "step": 3071 }, { "epoch": 0.7126783435796311, "grad_norm": 8.897679021367162, "learning_rate": 2e-06, "loss": 0.2035, "step": 3072 }, { "epoch": 0.7129103352279318, "grad_norm": 6.774172208125166, "learning_rate": 2e-06, "loss": 0.2567, "step": 3073 }, { "epoch": 0.7131423268762325, "grad_norm": 15.784809665803815, "learning_rate": 2e-06, "loss": 0.3427, "step": 3074 }, { "epoch": 0.7133743185245331, "grad_norm": 9.490956131277786, "learning_rate": 2e-06, "loss": 0.305, "step": 3075 }, { "epoch": 0.7136063101728338, "grad_norm": 12.768478698546604, "learning_rate": 2e-06, "loss": 0.2892, "step": 3076 }, { "epoch": 0.7138383018211344, "grad_norm": 14.733261060077966, "learning_rate": 2e-06, "loss": 0.2683, "step": 3077 }, { "epoch": 0.7140702934694351, "grad_norm": 16.361146756468674, "learning_rate": 2e-06, "loss": 0.2763, "step": 3078 }, { "epoch": 0.7143022851177357, "grad_norm": 20.56834735004541, "learning_rate": 2e-06, "loss": 0.3263, "step": 3079 }, { "epoch": 0.7145342767660364, "grad_norm": 19.139723749178586, "learning_rate": 2e-06, "loss": 0.337, "step": 3080 }, { "epoch": 0.7147662684143371, "grad_norm": 6.061627605458326, "learning_rate": 2e-06, "loss": 0.1807, "step": 3081 }, { "epoch": 0.7149982600626378, "grad_norm": 17.17408580688916, "learning_rate": 2e-06, "loss": 0.2639, "step": 3082 }, { "epoch": 0.7152302517109385, "grad_norm": 21.46351281558989, "learning_rate": 2e-06, "loss": 0.3246, "step": 3083 }, { "epoch": 0.715462243359239, "grad_norm": 15.413561020002579, "learning_rate": 2e-06, "loss": 0.2781, "step": 3084 }, { "epoch": 0.7156942350075397, "grad_norm": 17.441447535031255, "learning_rate": 2e-06, "loss": 0.345, "step": 3085 }, { "epoch": 0.7159262266558404, "grad_norm": 14.122717322632662, "learning_rate": 2e-06, "loss": 0.2305, "step": 3086 }, { "epoch": 0.7161582183041411, "grad_norm": 11.784198991709928, "learning_rate": 2e-06, "loss": 0.31, "step": 3087 }, { "epoch": 0.7163902099524417, "grad_norm": 16.435399537046283, "learning_rate": 2e-06, "loss": 0.2253, "step": 3088 }, { "epoch": 0.7166222016007424, "grad_norm": 20.087543297417806, "learning_rate": 2e-06, "loss": 0.3557, "step": 3089 }, { "epoch": 0.716854193249043, "grad_norm": 9.136332789225563, "learning_rate": 2e-06, "loss": 0.2596, "step": 3090 }, { "epoch": 0.7170861848973437, "grad_norm": 17.3524058908081, "learning_rate": 2e-06, "loss": 0.2313, "step": 3091 }, { "epoch": 0.7173181765456443, "grad_norm": 17.29438646410513, "learning_rate": 2e-06, "loss": 0.2101, "step": 3092 }, { "epoch": 0.717550168193945, "grad_norm": 12.382294379053754, "learning_rate": 2e-06, "loss": 0.2556, "step": 3093 }, { "epoch": 0.7177821598422457, "grad_norm": 13.213939787408052, "learning_rate": 2e-06, "loss": 0.2625, "step": 3094 }, { "epoch": 0.7180141514905464, "grad_norm": 8.476230812288442, "learning_rate": 2e-06, "loss": 0.2527, "step": 3095 }, { "epoch": 0.718246143138847, "grad_norm": 19.212254605560727, "learning_rate": 2e-06, "loss": 0.2904, "step": 3096 }, { "epoch": 0.7184781347871476, "grad_norm": 10.343472922864036, "learning_rate": 2e-06, "loss": 0.3471, "step": 3097 }, { "epoch": 0.7187101264354483, "grad_norm": 13.725070104455183, "learning_rate": 2e-06, "loss": 0.361, "step": 3098 }, { "epoch": 0.718942118083749, "grad_norm": 16.651969325920845, "learning_rate": 2e-06, "loss": 0.3317, "step": 3099 }, { "epoch": 0.7191741097320496, "grad_norm": 9.55939511575531, "learning_rate": 2e-06, "loss": 0.3012, "step": 3100 }, { "epoch": 0.7194061013803503, "grad_norm": 15.3184086976544, "learning_rate": 2e-06, "loss": 0.2286, "step": 3101 }, { "epoch": 0.719638093028651, "grad_norm": 12.566094977158508, "learning_rate": 2e-06, "loss": 0.2944, "step": 3102 }, { "epoch": 0.7198700846769517, "grad_norm": 6.97831787840303, "learning_rate": 2e-06, "loss": 0.1917, "step": 3103 }, { "epoch": 0.7201020763252522, "grad_norm": 16.899575716149464, "learning_rate": 2e-06, "loss": 0.2448, "step": 3104 }, { "epoch": 0.7203340679735529, "grad_norm": 17.976110532970768, "learning_rate": 2e-06, "loss": 0.3163, "step": 3105 }, { "epoch": 0.7205660596218536, "grad_norm": 18.232864963298365, "learning_rate": 2e-06, "loss": 0.4656, "step": 3106 }, { "epoch": 0.7207980512701543, "grad_norm": 9.588755633507239, "learning_rate": 2e-06, "loss": 0.321, "step": 3107 }, { "epoch": 0.721030042918455, "grad_norm": 9.184369414779091, "learning_rate": 2e-06, "loss": 0.185, "step": 3108 }, { "epoch": 0.7212620345667556, "grad_norm": 8.18229606602893, "learning_rate": 2e-06, "loss": 0.2636, "step": 3109 }, { "epoch": 0.7214940262150562, "grad_norm": 9.565619460509582, "learning_rate": 2e-06, "loss": 0.3021, "step": 3110 }, { "epoch": 0.7217260178633569, "grad_norm": 8.042176091017213, "learning_rate": 2e-06, "loss": 0.1867, "step": 3111 }, { "epoch": 0.7219580095116576, "grad_norm": 13.345881386328118, "learning_rate": 2e-06, "loss": 0.2084, "step": 3112 }, { "epoch": 0.7221900011599582, "grad_norm": 15.26049198288833, "learning_rate": 2e-06, "loss": 0.3182, "step": 3113 }, { "epoch": 0.7224219928082589, "grad_norm": 12.405214439820826, "learning_rate": 2e-06, "loss": 0.3215, "step": 3114 }, { "epoch": 0.7226539844565596, "grad_norm": 8.279027687701165, "learning_rate": 2e-06, "loss": 0.2029, "step": 3115 }, { "epoch": 0.7228859761048603, "grad_norm": 9.23346549546795, "learning_rate": 2e-06, "loss": 0.2842, "step": 3116 }, { "epoch": 0.7231179677531608, "grad_norm": 13.90968760840073, "learning_rate": 2e-06, "loss": 0.2085, "step": 3117 }, { "epoch": 0.7233499594014615, "grad_norm": 13.869798608630939, "learning_rate": 2e-06, "loss": 0.2016, "step": 3118 }, { "epoch": 0.7235819510497622, "grad_norm": 7.716027129845056, "learning_rate": 2e-06, "loss": 0.2188, "step": 3119 }, { "epoch": 0.7238139426980629, "grad_norm": 10.440026382109156, "learning_rate": 2e-06, "loss": 0.2281, "step": 3120 }, { "epoch": 0.7240459343463636, "grad_norm": 17.4379176105839, "learning_rate": 2e-06, "loss": 0.3783, "step": 3121 }, { "epoch": 0.7242779259946642, "grad_norm": 17.53811782802477, "learning_rate": 2e-06, "loss": 0.2852, "step": 3122 }, { "epoch": 0.7245099176429649, "grad_norm": 12.403149884465117, "learning_rate": 2e-06, "loss": 0.2562, "step": 3123 }, { "epoch": 0.7247419092912655, "grad_norm": 7.189055185689566, "learning_rate": 2e-06, "loss": 0.2395, "step": 3124 }, { "epoch": 0.7249739009395662, "grad_norm": 13.366602934084645, "learning_rate": 2e-06, "loss": 0.3757, "step": 3125 }, { "epoch": 0.7252058925878668, "grad_norm": 20.723002926799648, "learning_rate": 2e-06, "loss": 0.3032, "step": 3126 }, { "epoch": 0.7254378842361675, "grad_norm": 14.345531287053802, "learning_rate": 2e-06, "loss": 0.2848, "step": 3127 }, { "epoch": 0.7256698758844682, "grad_norm": 15.277830898750224, "learning_rate": 2e-06, "loss": 0.2698, "step": 3128 }, { "epoch": 0.7259018675327689, "grad_norm": 14.343649187551526, "learning_rate": 2e-06, "loss": 0.2137, "step": 3129 }, { "epoch": 0.7261338591810694, "grad_norm": 11.927520687935251, "learning_rate": 2e-06, "loss": 0.2584, "step": 3130 }, { "epoch": 0.7263658508293701, "grad_norm": 7.569851105062665, "learning_rate": 2e-06, "loss": 0.2569, "step": 3131 }, { "epoch": 0.7265978424776708, "grad_norm": 15.411236027620829, "learning_rate": 2e-06, "loss": 0.2988, "step": 3132 }, { "epoch": 0.7268298341259715, "grad_norm": 17.315685638615545, "learning_rate": 2e-06, "loss": 0.3518, "step": 3133 }, { "epoch": 0.7270618257742721, "grad_norm": 12.187758686052286, "learning_rate": 2e-06, "loss": 0.2202, "step": 3134 }, { "epoch": 0.7272938174225728, "grad_norm": 9.516509125547941, "learning_rate": 2e-06, "loss": 0.2373, "step": 3135 }, { "epoch": 0.7275258090708735, "grad_norm": 13.633348693158915, "learning_rate": 2e-06, "loss": 0.244, "step": 3136 }, { "epoch": 0.7277578007191741, "grad_norm": 18.169940194464743, "learning_rate": 2e-06, "loss": 0.295, "step": 3137 }, { "epoch": 0.7279897923674747, "grad_norm": 16.600460469573456, "learning_rate": 2e-06, "loss": 0.236, "step": 3138 }, { "epoch": 0.7282217840157754, "grad_norm": 13.243093105767455, "learning_rate": 2e-06, "loss": 0.2635, "step": 3139 }, { "epoch": 0.7284537756640761, "grad_norm": 18.433591061729665, "learning_rate": 2e-06, "loss": 0.3301, "step": 3140 }, { "epoch": 0.7286857673123768, "grad_norm": 9.66065462790196, "learning_rate": 2e-06, "loss": 0.2138, "step": 3141 }, { "epoch": 0.7289177589606775, "grad_norm": 13.209312050697344, "learning_rate": 2e-06, "loss": 0.2658, "step": 3142 }, { "epoch": 0.7291497506089781, "grad_norm": 19.082935008032695, "learning_rate": 2e-06, "loss": 0.291, "step": 3143 }, { "epoch": 0.7293817422572787, "grad_norm": 7.89564746805161, "learning_rate": 2e-06, "loss": 0.223, "step": 3144 }, { "epoch": 0.7296137339055794, "grad_norm": 16.304437940242224, "learning_rate": 2e-06, "loss": 0.2943, "step": 3145 }, { "epoch": 0.72984572555388, "grad_norm": 22.612646825330938, "learning_rate": 2e-06, "loss": 0.421, "step": 3146 }, { "epoch": 0.7300777172021807, "grad_norm": 10.784007437530533, "learning_rate": 2e-06, "loss": 0.2252, "step": 3147 }, { "epoch": 0.7303097088504814, "grad_norm": 22.407086484125312, "learning_rate": 2e-06, "loss": 0.3782, "step": 3148 }, { "epoch": 0.7305417004987821, "grad_norm": 22.865257080954436, "learning_rate": 2e-06, "loss": 0.3711, "step": 3149 }, { "epoch": 0.7307736921470827, "grad_norm": 16.632205181733372, "learning_rate": 2e-06, "loss": 0.2656, "step": 3150 }, { "epoch": 0.7310056837953833, "grad_norm": 12.773540189237295, "learning_rate": 2e-06, "loss": 0.2561, "step": 3151 }, { "epoch": 0.731237675443684, "grad_norm": 19.910063057592886, "learning_rate": 2e-06, "loss": 0.3787, "step": 3152 }, { "epoch": 0.7314696670919847, "grad_norm": 11.60355812504079, "learning_rate": 2e-06, "loss": 0.248, "step": 3153 }, { "epoch": 0.7317016587402854, "grad_norm": 11.24794631426084, "learning_rate": 2e-06, "loss": 0.2551, "step": 3154 }, { "epoch": 0.731933650388586, "grad_norm": 17.127338422807483, "learning_rate": 2e-06, "loss": 0.3598, "step": 3155 }, { "epoch": 0.7321656420368867, "grad_norm": 21.97871303967765, "learning_rate": 2e-06, "loss": 0.4009, "step": 3156 }, { "epoch": 0.7323976336851873, "grad_norm": 15.615835043718105, "learning_rate": 2e-06, "loss": 0.3075, "step": 3157 }, { "epoch": 0.732629625333488, "grad_norm": 17.522596850256367, "learning_rate": 2e-06, "loss": 0.2784, "step": 3158 }, { "epoch": 0.7328616169817886, "grad_norm": 18.739187983179765, "learning_rate": 2e-06, "loss": 0.3305, "step": 3159 }, { "epoch": 0.7330936086300893, "grad_norm": 6.660399942455831, "learning_rate": 2e-06, "loss": 0.2451, "step": 3160 }, { "epoch": 0.73332560027839, "grad_norm": 15.690373574466072, "learning_rate": 2e-06, "loss": 0.2543, "step": 3161 }, { "epoch": 0.7335575919266907, "grad_norm": 10.188035897846122, "learning_rate": 2e-06, "loss": 0.2615, "step": 3162 }, { "epoch": 0.7337895835749914, "grad_norm": 23.18149272189231, "learning_rate": 2e-06, "loss": 0.2897, "step": 3163 }, { "epoch": 0.7340215752232919, "grad_norm": 14.122180502228332, "learning_rate": 2e-06, "loss": 0.2234, "step": 3164 }, { "epoch": 0.7342535668715926, "grad_norm": 7.541936206004345, "learning_rate": 2e-06, "loss": 0.2265, "step": 3165 }, { "epoch": 0.7344855585198933, "grad_norm": 12.774313254828565, "learning_rate": 2e-06, "loss": 0.2341, "step": 3166 }, { "epoch": 0.734717550168194, "grad_norm": 17.641684894723078, "learning_rate": 2e-06, "loss": 0.2906, "step": 3167 }, { "epoch": 0.7349495418164946, "grad_norm": 19.12104028237049, "learning_rate": 2e-06, "loss": 0.3829, "step": 3168 }, { "epoch": 0.7351815334647953, "grad_norm": 22.529089396316817, "learning_rate": 2e-06, "loss": 0.2992, "step": 3169 }, { "epoch": 0.7354135251130959, "grad_norm": 19.771390540790247, "learning_rate": 2e-06, "loss": 0.276, "step": 3170 }, { "epoch": 0.7356455167613966, "grad_norm": 16.61134813442355, "learning_rate": 2e-06, "loss": 0.2635, "step": 3171 }, { "epoch": 0.7358775084096972, "grad_norm": 18.759166483704387, "learning_rate": 2e-06, "loss": 0.3144, "step": 3172 }, { "epoch": 0.7361095000579979, "grad_norm": 10.461120170280791, "learning_rate": 2e-06, "loss": 0.326, "step": 3173 }, { "epoch": 0.7363414917062986, "grad_norm": 11.874710306707069, "learning_rate": 2e-06, "loss": 0.3141, "step": 3174 }, { "epoch": 0.7365734833545993, "grad_norm": 24.942525916485767, "learning_rate": 2e-06, "loss": 0.2926, "step": 3175 }, { "epoch": 0.7368054750029, "grad_norm": 15.934880410013454, "learning_rate": 2e-06, "loss": 0.3051, "step": 3176 }, { "epoch": 0.7370374666512005, "grad_norm": 9.61878494372916, "learning_rate": 2e-06, "loss": 0.2273, "step": 3177 }, { "epoch": 0.7372694582995012, "grad_norm": 17.32211608727347, "learning_rate": 2e-06, "loss": 0.2992, "step": 3178 }, { "epoch": 0.7375014499478019, "grad_norm": 19.998321939059572, "learning_rate": 2e-06, "loss": 0.1923, "step": 3179 }, { "epoch": 0.7377334415961025, "grad_norm": 10.703880883805544, "learning_rate": 2e-06, "loss": 0.2353, "step": 3180 }, { "epoch": 0.7379654332444032, "grad_norm": 19.34032775586904, "learning_rate": 2e-06, "loss": 0.3466, "step": 3181 }, { "epoch": 0.7381974248927039, "grad_norm": 9.371863932024695, "learning_rate": 2e-06, "loss": 0.2022, "step": 3182 }, { "epoch": 0.7384294165410045, "grad_norm": 12.088369944261673, "learning_rate": 2e-06, "loss": 0.3356, "step": 3183 }, { "epoch": 0.7386614081893051, "grad_norm": 13.208820592992916, "learning_rate": 2e-06, "loss": 0.3584, "step": 3184 }, { "epoch": 0.7388933998376058, "grad_norm": 14.48019733506012, "learning_rate": 2e-06, "loss": 0.2866, "step": 3185 }, { "epoch": 0.7391253914859065, "grad_norm": 9.297669852631856, "learning_rate": 2e-06, "loss": 0.285, "step": 3186 }, { "epoch": 0.7393573831342072, "grad_norm": 13.59255319093113, "learning_rate": 2e-06, "loss": 0.2217, "step": 3187 }, { "epoch": 0.7395893747825079, "grad_norm": 9.63516912508189, "learning_rate": 2e-06, "loss": 0.2913, "step": 3188 }, { "epoch": 0.7398213664308085, "grad_norm": 12.207878815290089, "learning_rate": 2e-06, "loss": 0.1737, "step": 3189 }, { "epoch": 0.7400533580791091, "grad_norm": 7.723048500107733, "learning_rate": 2e-06, "loss": 0.1719, "step": 3190 }, { "epoch": 0.7402853497274098, "grad_norm": 15.372306937561344, "learning_rate": 2e-06, "loss": 0.3438, "step": 3191 }, { "epoch": 0.7405173413757105, "grad_norm": 10.693950673258227, "learning_rate": 2e-06, "loss": 0.2514, "step": 3192 }, { "epoch": 0.7407493330240111, "grad_norm": 14.196738573156889, "learning_rate": 2e-06, "loss": 0.3165, "step": 3193 }, { "epoch": 0.7409813246723118, "grad_norm": 266.23074189192454, "learning_rate": 2e-06, "loss": 0.3074, "step": 3194 }, { "epoch": 0.7412133163206125, "grad_norm": 11.658793115573301, "learning_rate": 2e-06, "loss": 0.2975, "step": 3195 }, { "epoch": 0.7414453079689132, "grad_norm": 18.86350667547308, "learning_rate": 2e-06, "loss": 0.4451, "step": 3196 }, { "epoch": 0.7416772996172137, "grad_norm": 10.675248689307919, "learning_rate": 2e-06, "loss": 0.2638, "step": 3197 }, { "epoch": 0.7419092912655144, "grad_norm": 10.46535396830821, "learning_rate": 2e-06, "loss": 0.1983, "step": 3198 }, { "epoch": 0.7421412829138151, "grad_norm": 14.436103478643547, "learning_rate": 2e-06, "loss": 0.2665, "step": 3199 }, { "epoch": 0.7423732745621158, "grad_norm": 18.35508453929755, "learning_rate": 2e-06, "loss": 0.3326, "step": 3200 }, { "epoch": 0.7426052662104164, "grad_norm": 13.92213201139075, "learning_rate": 2e-06, "loss": 0.3001, "step": 3201 }, { "epoch": 0.7428372578587171, "grad_norm": 24.057651507458285, "learning_rate": 2e-06, "loss": 0.4314, "step": 3202 }, { "epoch": 0.7430692495070177, "grad_norm": 13.864093476181436, "learning_rate": 2e-06, "loss": 0.2738, "step": 3203 }, { "epoch": 0.7433012411553184, "grad_norm": 12.054702304883937, "learning_rate": 2e-06, "loss": 0.3038, "step": 3204 }, { "epoch": 0.743533232803619, "grad_norm": 12.60991002298068, "learning_rate": 2e-06, "loss": 0.196, "step": 3205 }, { "epoch": 0.7437652244519197, "grad_norm": 12.127707235247618, "learning_rate": 2e-06, "loss": 0.1946, "step": 3206 }, { "epoch": 0.7439972161002204, "grad_norm": 23.30100209188892, "learning_rate": 2e-06, "loss": 0.273, "step": 3207 }, { "epoch": 0.7442292077485211, "grad_norm": 13.99161322820209, "learning_rate": 2e-06, "loss": 0.2144, "step": 3208 }, { "epoch": 0.7444611993968218, "grad_norm": 13.887700930199868, "learning_rate": 2e-06, "loss": 0.2683, "step": 3209 }, { "epoch": 0.7446931910451223, "grad_norm": 26.819487750849106, "learning_rate": 2e-06, "loss": 0.4258, "step": 3210 }, { "epoch": 0.744925182693423, "grad_norm": 9.733351998525075, "learning_rate": 2e-06, "loss": 0.2155, "step": 3211 }, { "epoch": 0.7451571743417237, "grad_norm": 14.340214523192815, "learning_rate": 2e-06, "loss": 0.2755, "step": 3212 }, { "epoch": 0.7453891659900244, "grad_norm": 11.54245757950112, "learning_rate": 2e-06, "loss": 0.3347, "step": 3213 }, { "epoch": 0.745621157638325, "grad_norm": 23.06041982558424, "learning_rate": 2e-06, "loss": 0.3852, "step": 3214 }, { "epoch": 0.7458531492866257, "grad_norm": 15.579761828443129, "learning_rate": 2e-06, "loss": 0.1818, "step": 3215 }, { "epoch": 0.7460851409349264, "grad_norm": 27.921307898526475, "learning_rate": 2e-06, "loss": 0.3919, "step": 3216 }, { "epoch": 0.746317132583227, "grad_norm": 10.451501045988298, "learning_rate": 2e-06, "loss": 0.2935, "step": 3217 }, { "epoch": 0.7465491242315276, "grad_norm": 18.411670307096102, "learning_rate": 2e-06, "loss": 0.3829, "step": 3218 }, { "epoch": 0.7467811158798283, "grad_norm": 17.096497754766254, "learning_rate": 2e-06, "loss": 0.2315, "step": 3219 }, { "epoch": 0.747013107528129, "grad_norm": 19.05129410304447, "learning_rate": 2e-06, "loss": 0.3125, "step": 3220 }, { "epoch": 0.7472450991764297, "grad_norm": 7.424645567567219, "learning_rate": 2e-06, "loss": 0.2718, "step": 3221 }, { "epoch": 0.7474770908247304, "grad_norm": 11.603587562641335, "learning_rate": 2e-06, "loss": 0.2976, "step": 3222 }, { "epoch": 0.7477090824730309, "grad_norm": 9.346124673462597, "learning_rate": 2e-06, "loss": 0.2669, "step": 3223 }, { "epoch": 0.7479410741213316, "grad_norm": 13.97160979948989, "learning_rate": 2e-06, "loss": 0.2398, "step": 3224 }, { "epoch": 0.7481730657696323, "grad_norm": 25.922000700573506, "learning_rate": 2e-06, "loss": 0.3631, "step": 3225 }, { "epoch": 0.748405057417933, "grad_norm": 10.308629661553015, "learning_rate": 2e-06, "loss": 0.2849, "step": 3226 }, { "epoch": 0.7486370490662336, "grad_norm": 16.028103541976417, "learning_rate": 2e-06, "loss": 0.2732, "step": 3227 }, { "epoch": 0.7488690407145343, "grad_norm": 11.867317699477844, "learning_rate": 2e-06, "loss": 0.3506, "step": 3228 }, { "epoch": 0.749101032362835, "grad_norm": 27.591524237265336, "learning_rate": 2e-06, "loss": 0.4672, "step": 3229 }, { "epoch": 0.7493330240111356, "grad_norm": 16.75662670018917, "learning_rate": 2e-06, "loss": 0.3969, "step": 3230 }, { "epoch": 0.7495650156594362, "grad_norm": 21.443112852719207, "learning_rate": 2e-06, "loss": 0.3879, "step": 3231 }, { "epoch": 0.7497970073077369, "grad_norm": 18.57519074076872, "learning_rate": 2e-06, "loss": 0.3253, "step": 3232 }, { "epoch": 0.7500289989560376, "grad_norm": 9.228484999772808, "learning_rate": 2e-06, "loss": 0.2678, "step": 3233 }, { "epoch": 0.7502609906043383, "grad_norm": 20.858727268557686, "learning_rate": 2e-06, "loss": 0.2909, "step": 3234 }, { "epoch": 0.7504929822526389, "grad_norm": 26.914560888617398, "learning_rate": 2e-06, "loss": 0.3719, "step": 3235 }, { "epoch": 0.7507249739009396, "grad_norm": 15.709843888605175, "learning_rate": 2e-06, "loss": 0.2176, "step": 3236 }, { "epoch": 0.7509569655492402, "grad_norm": 19.16131357159073, "learning_rate": 2e-06, "loss": 0.2419, "step": 3237 }, { "epoch": 0.7511889571975409, "grad_norm": 9.892834755241338, "learning_rate": 2e-06, "loss": 0.2641, "step": 3238 }, { "epoch": 0.7514209488458415, "grad_norm": 12.710909928586199, "learning_rate": 2e-06, "loss": 0.3617, "step": 3239 }, { "epoch": 0.7516529404941422, "grad_norm": 14.861685648929491, "learning_rate": 2e-06, "loss": 0.2903, "step": 3240 }, { "epoch": 0.7518849321424429, "grad_norm": 10.636791599844129, "learning_rate": 2e-06, "loss": 0.2154, "step": 3241 }, { "epoch": 0.7521169237907436, "grad_norm": 10.824860251051467, "learning_rate": 2e-06, "loss": 0.3095, "step": 3242 }, { "epoch": 0.7523489154390441, "grad_norm": 11.258032137291424, "learning_rate": 2e-06, "loss": 0.2784, "step": 3243 }, { "epoch": 0.7525809070873448, "grad_norm": 12.216580409438913, "learning_rate": 2e-06, "loss": 0.3381, "step": 3244 }, { "epoch": 0.7528128987356455, "grad_norm": 16.46023841396873, "learning_rate": 2e-06, "loss": 0.2966, "step": 3245 }, { "epoch": 0.7530448903839462, "grad_norm": 10.773669247612482, "learning_rate": 2e-06, "loss": 0.2822, "step": 3246 }, { "epoch": 0.7532768820322469, "grad_norm": 15.47427621377556, "learning_rate": 2e-06, "loss": 0.3528, "step": 3247 }, { "epoch": 0.7535088736805475, "grad_norm": 9.437936233561318, "learning_rate": 2e-06, "loss": 0.3515, "step": 3248 }, { "epoch": 0.7537408653288482, "grad_norm": 15.060815916367174, "learning_rate": 2e-06, "loss": 0.369, "step": 3249 }, { "epoch": 0.7539728569771488, "grad_norm": 10.051667383454044, "learning_rate": 2e-06, "loss": 0.2607, "step": 3250 }, { "epoch": 0.7542048486254495, "grad_norm": 15.210897124764472, "learning_rate": 2e-06, "loss": 0.3493, "step": 3251 }, { "epoch": 0.7544368402737501, "grad_norm": 16.097661441306023, "learning_rate": 2e-06, "loss": 0.2453, "step": 3252 }, { "epoch": 0.7546688319220508, "grad_norm": 9.117636788720601, "learning_rate": 2e-06, "loss": 0.2432, "step": 3253 }, { "epoch": 0.7549008235703515, "grad_norm": 9.98191377832381, "learning_rate": 2e-06, "loss": 0.2017, "step": 3254 }, { "epoch": 0.7551328152186522, "grad_norm": 7.908774046591722, "learning_rate": 2e-06, "loss": 0.2378, "step": 3255 }, { "epoch": 0.7553648068669528, "grad_norm": 23.628388777108437, "learning_rate": 2e-06, "loss": 0.3264, "step": 3256 }, { "epoch": 0.7555967985152534, "grad_norm": 13.1515153464434, "learning_rate": 2e-06, "loss": 0.28, "step": 3257 }, { "epoch": 0.7558287901635541, "grad_norm": 16.609710434365738, "learning_rate": 2e-06, "loss": 0.2632, "step": 3258 }, { "epoch": 0.7560607818118548, "grad_norm": 19.25268556355, "learning_rate": 2e-06, "loss": 0.2485, "step": 3259 }, { "epoch": 0.7562927734601554, "grad_norm": 8.798902479176105, "learning_rate": 2e-06, "loss": 0.2291, "step": 3260 }, { "epoch": 0.7565247651084561, "grad_norm": 15.102364203693176, "learning_rate": 2e-06, "loss": 0.3814, "step": 3261 }, { "epoch": 0.7567567567567568, "grad_norm": 12.251104054670868, "learning_rate": 2e-06, "loss": 0.205, "step": 3262 }, { "epoch": 0.7569887484050574, "grad_norm": 17.830770306606684, "learning_rate": 2e-06, "loss": 0.3803, "step": 3263 }, { "epoch": 0.757220740053358, "grad_norm": 9.898043465896796, "learning_rate": 2e-06, "loss": 0.2717, "step": 3264 }, { "epoch": 0.7574527317016587, "grad_norm": 10.869198655689024, "learning_rate": 2e-06, "loss": 0.2438, "step": 3265 }, { "epoch": 0.7576847233499594, "grad_norm": 9.569985886548965, "learning_rate": 2e-06, "loss": 0.2298, "step": 3266 }, { "epoch": 0.7579167149982601, "grad_norm": 8.895433458214619, "learning_rate": 2e-06, "loss": 0.2976, "step": 3267 }, { "epoch": 0.7581487066465608, "grad_norm": 10.838387016596009, "learning_rate": 2e-06, "loss": 0.1739, "step": 3268 }, { "epoch": 0.7583806982948614, "grad_norm": 15.143001781351865, "learning_rate": 2e-06, "loss": 0.2297, "step": 3269 }, { "epoch": 0.758612689943162, "grad_norm": 15.826963259618264, "learning_rate": 2e-06, "loss": 0.2124, "step": 3270 }, { "epoch": 0.7588446815914627, "grad_norm": 18.73429903645318, "learning_rate": 2e-06, "loss": 0.3329, "step": 3271 }, { "epoch": 0.7590766732397634, "grad_norm": 8.054226271696935, "learning_rate": 2e-06, "loss": 0.2594, "step": 3272 }, { "epoch": 0.759308664888064, "grad_norm": 18.537441641456375, "learning_rate": 2e-06, "loss": 0.4262, "step": 3273 }, { "epoch": 0.7595406565363647, "grad_norm": 18.060962169878092, "learning_rate": 2e-06, "loss": 0.329, "step": 3274 }, { "epoch": 0.7597726481846654, "grad_norm": 19.176654897756123, "learning_rate": 2e-06, "loss": 0.2127, "step": 3275 }, { "epoch": 0.7600046398329661, "grad_norm": 40.19397033267575, "learning_rate": 2e-06, "loss": 0.4408, "step": 3276 }, { "epoch": 0.7602366314812666, "grad_norm": 14.183460089274515, "learning_rate": 2e-06, "loss": 0.2787, "step": 3277 }, { "epoch": 0.7604686231295673, "grad_norm": 12.614565362170005, "learning_rate": 2e-06, "loss": 0.2865, "step": 3278 }, { "epoch": 0.760700614777868, "grad_norm": 15.294760785557783, "learning_rate": 2e-06, "loss": 0.3313, "step": 3279 }, { "epoch": 0.7609326064261687, "grad_norm": 14.678094198469996, "learning_rate": 2e-06, "loss": 0.2752, "step": 3280 }, { "epoch": 0.7611645980744693, "grad_norm": 16.512603314388123, "learning_rate": 2e-06, "loss": 0.2736, "step": 3281 }, { "epoch": 0.76139658972277, "grad_norm": 21.798372206513797, "learning_rate": 2e-06, "loss": 0.3324, "step": 3282 }, { "epoch": 0.7616285813710706, "grad_norm": 13.556144644099309, "learning_rate": 2e-06, "loss": 0.313, "step": 3283 }, { "epoch": 0.7618605730193713, "grad_norm": 12.0913929886167, "learning_rate": 2e-06, "loss": 0.2542, "step": 3284 }, { "epoch": 0.762092564667672, "grad_norm": 8.532666772364651, "learning_rate": 2e-06, "loss": 0.2037, "step": 3285 }, { "epoch": 0.7623245563159726, "grad_norm": 15.608666752534837, "learning_rate": 2e-06, "loss": 0.277, "step": 3286 }, { "epoch": 0.7625565479642733, "grad_norm": 12.648901474399997, "learning_rate": 2e-06, "loss": 0.403, "step": 3287 }, { "epoch": 0.762788539612574, "grad_norm": 15.6219832778702, "learning_rate": 2e-06, "loss": 0.3407, "step": 3288 }, { "epoch": 0.7630205312608747, "grad_norm": 25.88285292749361, "learning_rate": 2e-06, "loss": 0.4021, "step": 3289 }, { "epoch": 0.7632525229091752, "grad_norm": 14.51290497078572, "learning_rate": 2e-06, "loss": 0.3171, "step": 3290 }, { "epoch": 0.7634845145574759, "grad_norm": 15.373419614063504, "learning_rate": 2e-06, "loss": 0.3018, "step": 3291 }, { "epoch": 0.7637165062057766, "grad_norm": 7.465941023334532, "learning_rate": 2e-06, "loss": 0.1827, "step": 3292 }, { "epoch": 0.7639484978540773, "grad_norm": 22.68478128905687, "learning_rate": 2e-06, "loss": 0.3353, "step": 3293 }, { "epoch": 0.7641804895023779, "grad_norm": 12.719013375955988, "learning_rate": 2e-06, "loss": 0.2751, "step": 3294 }, { "epoch": 0.7644124811506786, "grad_norm": 12.201266141777696, "learning_rate": 2e-06, "loss": 0.2502, "step": 3295 }, { "epoch": 0.7646444727989793, "grad_norm": 12.401332055108211, "learning_rate": 2e-06, "loss": 0.3048, "step": 3296 }, { "epoch": 0.7648764644472799, "grad_norm": 9.101908144365112, "learning_rate": 2e-06, "loss": 0.2507, "step": 3297 }, { "epoch": 0.7651084560955805, "grad_norm": 15.446972836287449, "learning_rate": 2e-06, "loss": 0.3506, "step": 3298 }, { "epoch": 0.7653404477438812, "grad_norm": 10.270501438242315, "learning_rate": 2e-06, "loss": 0.2518, "step": 3299 }, { "epoch": 0.7655724393921819, "grad_norm": 10.880237503429663, "learning_rate": 2e-06, "loss": 0.2826, "step": 3300 }, { "epoch": 0.7658044310404826, "grad_norm": 10.411867222827498, "learning_rate": 2e-06, "loss": 0.2021, "step": 3301 }, { "epoch": 0.7660364226887832, "grad_norm": 6.641121711480695, "learning_rate": 2e-06, "loss": 0.1922, "step": 3302 }, { "epoch": 0.7662684143370838, "grad_norm": 16.986816691328375, "learning_rate": 2e-06, "loss": 0.3354, "step": 3303 }, { "epoch": 0.7665004059853845, "grad_norm": 7.048691651075567, "learning_rate": 2e-06, "loss": 0.227, "step": 3304 }, { "epoch": 0.7667323976336852, "grad_norm": 10.118835944738485, "learning_rate": 2e-06, "loss": 0.1769, "step": 3305 }, { "epoch": 0.7669643892819858, "grad_norm": 20.612598747313648, "learning_rate": 2e-06, "loss": 0.3865, "step": 3306 }, { "epoch": 0.7671963809302865, "grad_norm": 11.739597678261555, "learning_rate": 2e-06, "loss": 0.1937, "step": 3307 }, { "epoch": 0.7674283725785872, "grad_norm": 13.091498954890323, "learning_rate": 2e-06, "loss": 0.3293, "step": 3308 }, { "epoch": 0.7676603642268879, "grad_norm": 12.450404437779614, "learning_rate": 2e-06, "loss": 0.2431, "step": 3309 }, { "epoch": 0.7678923558751884, "grad_norm": 12.381029099949977, "learning_rate": 2e-06, "loss": 0.3628, "step": 3310 }, { "epoch": 0.7681243475234891, "grad_norm": 12.71570898337696, "learning_rate": 2e-06, "loss": 0.1727, "step": 3311 }, { "epoch": 0.7683563391717898, "grad_norm": 19.630121385148918, "learning_rate": 2e-06, "loss": 0.3437, "step": 3312 }, { "epoch": 0.7685883308200905, "grad_norm": 15.42711658110915, "learning_rate": 2e-06, "loss": 0.2281, "step": 3313 }, { "epoch": 0.7688203224683912, "grad_norm": 6.885955436112426, "learning_rate": 2e-06, "loss": 0.1984, "step": 3314 }, { "epoch": 0.7690523141166918, "grad_norm": 20.38462034150911, "learning_rate": 2e-06, "loss": 0.3107, "step": 3315 }, { "epoch": 0.7692843057649925, "grad_norm": 14.158397888232825, "learning_rate": 2e-06, "loss": 0.2736, "step": 3316 }, { "epoch": 0.7695162974132931, "grad_norm": 18.52903730581829, "learning_rate": 2e-06, "loss": 0.2503, "step": 3317 }, { "epoch": 0.7697482890615938, "grad_norm": 24.06934932448499, "learning_rate": 2e-06, "loss": 0.3461, "step": 3318 }, { "epoch": 0.7699802807098944, "grad_norm": 11.039495942718224, "learning_rate": 2e-06, "loss": 0.3537, "step": 3319 }, { "epoch": 0.7702122723581951, "grad_norm": 17.35805561822558, "learning_rate": 2e-06, "loss": 0.3575, "step": 3320 }, { "epoch": 0.7704442640064958, "grad_norm": 14.312621737355462, "learning_rate": 2e-06, "loss": 0.3117, "step": 3321 }, { "epoch": 0.7706762556547965, "grad_norm": 14.30444248042403, "learning_rate": 2e-06, "loss": 0.3536, "step": 3322 }, { "epoch": 0.770908247303097, "grad_norm": 21.15912940860675, "learning_rate": 2e-06, "loss": 0.433, "step": 3323 }, { "epoch": 0.7711402389513977, "grad_norm": 12.043006519589039, "learning_rate": 2e-06, "loss": 0.2539, "step": 3324 }, { "epoch": 0.7713722305996984, "grad_norm": 8.225323237276257, "learning_rate": 2e-06, "loss": 0.1483, "step": 3325 }, { "epoch": 0.7716042222479991, "grad_norm": 12.721679821297943, "learning_rate": 2e-06, "loss": 0.266, "step": 3326 }, { "epoch": 0.7718362138962997, "grad_norm": 18.161270535697973, "learning_rate": 2e-06, "loss": 0.2539, "step": 3327 }, { "epoch": 0.7720682055446004, "grad_norm": 16.406188851303185, "learning_rate": 2e-06, "loss": 0.275, "step": 3328 }, { "epoch": 0.7723001971929011, "grad_norm": 12.093543887689135, "learning_rate": 2e-06, "loss": 0.2418, "step": 3329 }, { "epoch": 0.7725321888412017, "grad_norm": 17.715551132036744, "learning_rate": 2e-06, "loss": 0.3897, "step": 3330 }, { "epoch": 0.7727641804895024, "grad_norm": 12.83841974804794, "learning_rate": 2e-06, "loss": 0.3194, "step": 3331 }, { "epoch": 0.772996172137803, "grad_norm": 10.728654064148865, "learning_rate": 2e-06, "loss": 0.2545, "step": 3332 }, { "epoch": 0.7732281637861037, "grad_norm": 10.098533183865884, "learning_rate": 2e-06, "loss": 0.354, "step": 3333 }, { "epoch": 0.7734601554344044, "grad_norm": 15.94271340760764, "learning_rate": 2e-06, "loss": 0.2849, "step": 3334 }, { "epoch": 0.7736921470827051, "grad_norm": 15.753903381136727, "learning_rate": 2e-06, "loss": 0.3928, "step": 3335 }, { "epoch": 0.7739241387310056, "grad_norm": 13.636682401677106, "learning_rate": 2e-06, "loss": 0.2834, "step": 3336 }, { "epoch": 0.7741561303793063, "grad_norm": 21.367904185410392, "learning_rate": 2e-06, "loss": 0.4015, "step": 3337 }, { "epoch": 0.774388122027607, "grad_norm": 15.817210585659566, "learning_rate": 2e-06, "loss": 0.3041, "step": 3338 }, { "epoch": 0.7746201136759077, "grad_norm": 10.298483366955015, "learning_rate": 2e-06, "loss": 0.2287, "step": 3339 }, { "epoch": 0.7748521053242083, "grad_norm": 19.063689770985306, "learning_rate": 2e-06, "loss": 0.4006, "step": 3340 }, { "epoch": 0.775084096972509, "grad_norm": 15.485253568225815, "learning_rate": 2e-06, "loss": 0.3251, "step": 3341 }, { "epoch": 0.7753160886208097, "grad_norm": 8.78843640841125, "learning_rate": 2e-06, "loss": 0.4089, "step": 3342 }, { "epoch": 0.7755480802691103, "grad_norm": 10.77846730692596, "learning_rate": 2e-06, "loss": 0.2857, "step": 3343 }, { "epoch": 0.7757800719174109, "grad_norm": 13.253812636990132, "learning_rate": 2e-06, "loss": 0.1985, "step": 3344 }, { "epoch": 0.7760120635657116, "grad_norm": 10.455512361721096, "learning_rate": 2e-06, "loss": 0.3522, "step": 3345 }, { "epoch": 0.7762440552140123, "grad_norm": 20.698183046351787, "learning_rate": 2e-06, "loss": 0.3496, "step": 3346 }, { "epoch": 0.776476046862313, "grad_norm": 7.3385363802913535, "learning_rate": 2e-06, "loss": 0.2692, "step": 3347 }, { "epoch": 0.7767080385106137, "grad_norm": 14.080623288768594, "learning_rate": 2e-06, "loss": 0.2952, "step": 3348 }, { "epoch": 0.7769400301589143, "grad_norm": 7.9727325221498155, "learning_rate": 2e-06, "loss": 0.2831, "step": 3349 }, { "epoch": 0.7771720218072149, "grad_norm": 19.27180239056616, "learning_rate": 2e-06, "loss": 0.2993, "step": 3350 }, { "epoch": 0.7774040134555156, "grad_norm": 17.39930094442428, "learning_rate": 2e-06, "loss": 0.4456, "step": 3351 }, { "epoch": 0.7776360051038163, "grad_norm": 8.830695174435625, "learning_rate": 2e-06, "loss": 0.2363, "step": 3352 }, { "epoch": 0.7778679967521169, "grad_norm": 12.427463571267143, "learning_rate": 2e-06, "loss": 0.2699, "step": 3353 }, { "epoch": 0.7780999884004176, "grad_norm": 14.715522465947913, "learning_rate": 2e-06, "loss": 0.2928, "step": 3354 }, { "epoch": 0.7783319800487183, "grad_norm": 9.18348706874619, "learning_rate": 2e-06, "loss": 0.333, "step": 3355 }, { "epoch": 0.7785639716970189, "grad_norm": 11.394303641825907, "learning_rate": 2e-06, "loss": 0.3114, "step": 3356 }, { "epoch": 0.7787959633453195, "grad_norm": 8.11159833896116, "learning_rate": 2e-06, "loss": 0.2845, "step": 3357 }, { "epoch": 0.7790279549936202, "grad_norm": 21.21505875288533, "learning_rate": 2e-06, "loss": 0.2917, "step": 3358 }, { "epoch": 0.7792599466419209, "grad_norm": 14.613621762182552, "learning_rate": 2e-06, "loss": 0.2917, "step": 3359 }, { "epoch": 0.7794919382902216, "grad_norm": 8.027194232523648, "learning_rate": 2e-06, "loss": 0.1708, "step": 3360 }, { "epoch": 0.7797239299385222, "grad_norm": 15.20719004188822, "learning_rate": 2e-06, "loss": 0.3193, "step": 3361 }, { "epoch": 0.7799559215868229, "grad_norm": 17.25766357716426, "learning_rate": 2e-06, "loss": 0.2882, "step": 3362 }, { "epoch": 0.7801879132351235, "grad_norm": 11.739053354244797, "learning_rate": 2e-06, "loss": 0.1788, "step": 3363 }, { "epoch": 0.7804199048834242, "grad_norm": 12.986409445697113, "learning_rate": 2e-06, "loss": 0.2961, "step": 3364 }, { "epoch": 0.7806518965317248, "grad_norm": 10.988482359288097, "learning_rate": 2e-06, "loss": 0.1862, "step": 3365 }, { "epoch": 0.7808838881800255, "grad_norm": 15.64859142014504, "learning_rate": 2e-06, "loss": 0.2246, "step": 3366 }, { "epoch": 0.7811158798283262, "grad_norm": 16.32751338857044, "learning_rate": 2e-06, "loss": 0.2849, "step": 3367 }, { "epoch": 0.7813478714766269, "grad_norm": 13.76485487728567, "learning_rate": 2e-06, "loss": 0.2635, "step": 3368 }, { "epoch": 0.7815798631249276, "grad_norm": 10.305451819845539, "learning_rate": 2e-06, "loss": 0.2086, "step": 3369 }, { "epoch": 0.7818118547732281, "grad_norm": 8.387552087751217, "learning_rate": 2e-06, "loss": 0.2816, "step": 3370 }, { "epoch": 0.7820438464215288, "grad_norm": 17.442401998483444, "learning_rate": 2e-06, "loss": 0.387, "step": 3371 }, { "epoch": 0.7822758380698295, "grad_norm": 16.706704163997404, "learning_rate": 2e-06, "loss": 0.3759, "step": 3372 }, { "epoch": 0.7825078297181302, "grad_norm": 13.969522490290837, "learning_rate": 2e-06, "loss": 0.2574, "step": 3373 }, { "epoch": 0.7827398213664308, "grad_norm": 7.801153663110549, "learning_rate": 2e-06, "loss": 0.2363, "step": 3374 }, { "epoch": 0.7829718130147315, "grad_norm": 16.480451323794128, "learning_rate": 2e-06, "loss": 0.2805, "step": 3375 }, { "epoch": 0.7832038046630321, "grad_norm": 14.186735457312171, "learning_rate": 2e-06, "loss": 0.2595, "step": 3376 }, { "epoch": 0.7834357963113328, "grad_norm": 11.646776179140373, "learning_rate": 2e-06, "loss": 0.2839, "step": 3377 }, { "epoch": 0.7836677879596334, "grad_norm": 11.631828438973786, "learning_rate": 2e-06, "loss": 0.2361, "step": 3378 }, { "epoch": 0.7838997796079341, "grad_norm": 14.183205529064336, "learning_rate": 2e-06, "loss": 0.3969, "step": 3379 }, { "epoch": 0.7841317712562348, "grad_norm": 10.892634668274297, "learning_rate": 2e-06, "loss": 0.2694, "step": 3380 }, { "epoch": 0.7843637629045355, "grad_norm": 11.98108815181279, "learning_rate": 2e-06, "loss": 0.273, "step": 3381 }, { "epoch": 0.7845957545528361, "grad_norm": 8.18435288456861, "learning_rate": 2e-06, "loss": 0.2099, "step": 3382 }, { "epoch": 0.7848277462011367, "grad_norm": 12.308328485458324, "learning_rate": 2e-06, "loss": 0.2194, "step": 3383 }, { "epoch": 0.7850597378494374, "grad_norm": 14.88690096912976, "learning_rate": 2e-06, "loss": 0.3388, "step": 3384 }, { "epoch": 0.7852917294977381, "grad_norm": 23.606043344795978, "learning_rate": 2e-06, "loss": 0.3365, "step": 3385 }, { "epoch": 0.7855237211460387, "grad_norm": 17.447552640446613, "learning_rate": 2e-06, "loss": 0.3042, "step": 3386 }, { "epoch": 0.7857557127943394, "grad_norm": 15.865962850844356, "learning_rate": 2e-06, "loss": 0.3659, "step": 3387 }, { "epoch": 0.7859877044426401, "grad_norm": 11.920441370957631, "learning_rate": 2e-06, "loss": 0.2215, "step": 3388 }, { "epoch": 0.7862196960909408, "grad_norm": 16.597232700936654, "learning_rate": 2e-06, "loss": 0.3375, "step": 3389 }, { "epoch": 0.7864516877392413, "grad_norm": 18.77556256589631, "learning_rate": 2e-06, "loss": 0.2961, "step": 3390 }, { "epoch": 0.786683679387542, "grad_norm": 12.901281129823335, "learning_rate": 2e-06, "loss": 0.2619, "step": 3391 }, { "epoch": 0.7869156710358427, "grad_norm": 12.44922517671116, "learning_rate": 2e-06, "loss": 0.1882, "step": 3392 }, { "epoch": 0.7871476626841434, "grad_norm": 14.76842717352895, "learning_rate": 2e-06, "loss": 0.2978, "step": 3393 }, { "epoch": 0.7873796543324441, "grad_norm": 14.152124730026602, "learning_rate": 2e-06, "loss": 0.2882, "step": 3394 }, { "epoch": 0.7876116459807447, "grad_norm": 18.40012721570145, "learning_rate": 2e-06, "loss": 0.3215, "step": 3395 }, { "epoch": 0.7878436376290453, "grad_norm": 11.181603808392303, "learning_rate": 2e-06, "loss": 0.2649, "step": 3396 }, { "epoch": 0.788075629277346, "grad_norm": 18.509419869703052, "learning_rate": 2e-06, "loss": 0.3095, "step": 3397 }, { "epoch": 0.7883076209256467, "grad_norm": 14.184333048504568, "learning_rate": 2e-06, "loss": 0.2602, "step": 3398 }, { "epoch": 0.7885396125739473, "grad_norm": 12.587887359899812, "learning_rate": 2e-06, "loss": 0.2427, "step": 3399 }, { "epoch": 0.788771604222248, "grad_norm": 24.507560493039886, "learning_rate": 2e-06, "loss": 0.335, "step": 3400 }, { "epoch": 0.7890035958705487, "grad_norm": 4.819863587109384, "learning_rate": 2e-06, "loss": 0.1711, "step": 3401 }, { "epoch": 0.7892355875188494, "grad_norm": 22.111328616473372, "learning_rate": 2e-06, "loss": 0.3374, "step": 3402 }, { "epoch": 0.7894675791671499, "grad_norm": 11.906781767275788, "learning_rate": 2e-06, "loss": 0.207, "step": 3403 }, { "epoch": 0.7896995708154506, "grad_norm": 18.47361798133603, "learning_rate": 2e-06, "loss": 0.3547, "step": 3404 }, { "epoch": 0.7899315624637513, "grad_norm": 15.688065948218464, "learning_rate": 2e-06, "loss": 0.2639, "step": 3405 }, { "epoch": 0.790163554112052, "grad_norm": 26.116619607587307, "learning_rate": 2e-06, "loss": 0.3287, "step": 3406 }, { "epoch": 0.7903955457603526, "grad_norm": 8.882349560205764, "learning_rate": 2e-06, "loss": 0.2585, "step": 3407 }, { "epoch": 0.7906275374086533, "grad_norm": 9.446665087382272, "learning_rate": 2e-06, "loss": 0.2615, "step": 3408 }, { "epoch": 0.790859529056954, "grad_norm": 18.048231253616922, "learning_rate": 2e-06, "loss": 0.3177, "step": 3409 }, { "epoch": 0.7910915207052546, "grad_norm": 14.29683313119664, "learning_rate": 2e-06, "loss": 0.2627, "step": 3410 }, { "epoch": 0.7913235123535552, "grad_norm": 10.825488918220556, "learning_rate": 2e-06, "loss": 0.2825, "step": 3411 }, { "epoch": 0.7915555040018559, "grad_norm": 8.743391123103203, "learning_rate": 2e-06, "loss": 0.3564, "step": 3412 }, { "epoch": 0.7917874956501566, "grad_norm": 8.390341529464372, "learning_rate": 2e-06, "loss": 0.2535, "step": 3413 }, { "epoch": 0.7920194872984573, "grad_norm": 11.615387497887594, "learning_rate": 2e-06, "loss": 0.2495, "step": 3414 }, { "epoch": 0.792251478946758, "grad_norm": 10.339780478199742, "learning_rate": 2e-06, "loss": 0.2124, "step": 3415 }, { "epoch": 0.7924834705950585, "grad_norm": 19.814645086976675, "learning_rate": 2e-06, "loss": 0.3676, "step": 3416 }, { "epoch": 0.7927154622433592, "grad_norm": 11.85489858812581, "learning_rate": 2e-06, "loss": 0.2406, "step": 3417 }, { "epoch": 0.7929474538916599, "grad_norm": 16.299759592564307, "learning_rate": 2e-06, "loss": 0.2692, "step": 3418 }, { "epoch": 0.7931794455399606, "grad_norm": 13.872791910011749, "learning_rate": 2e-06, "loss": 0.305, "step": 3419 }, { "epoch": 0.7934114371882612, "grad_norm": 19.750414611149914, "learning_rate": 2e-06, "loss": 0.2335, "step": 3420 }, { "epoch": 0.7936434288365619, "grad_norm": 19.956545241264873, "learning_rate": 2e-06, "loss": 0.3085, "step": 3421 }, { "epoch": 0.7938754204848626, "grad_norm": 8.149993746924585, "learning_rate": 2e-06, "loss": 0.2059, "step": 3422 }, { "epoch": 0.7941074121331632, "grad_norm": 10.028973054761785, "learning_rate": 2e-06, "loss": 0.2432, "step": 3423 }, { "epoch": 0.7943394037814638, "grad_norm": 18.778972827943914, "learning_rate": 2e-06, "loss": 0.3628, "step": 3424 }, { "epoch": 0.7945713954297645, "grad_norm": 13.930116536144318, "learning_rate": 2e-06, "loss": 0.2445, "step": 3425 }, { "epoch": 0.7948033870780652, "grad_norm": 14.63089032989506, "learning_rate": 2e-06, "loss": 0.2343, "step": 3426 }, { "epoch": 0.7950353787263659, "grad_norm": 15.72743340478428, "learning_rate": 2e-06, "loss": 0.3289, "step": 3427 }, { "epoch": 0.7952673703746665, "grad_norm": 12.471700775785143, "learning_rate": 2e-06, "loss": 0.2463, "step": 3428 }, { "epoch": 0.7954993620229672, "grad_norm": 15.784691045568465, "learning_rate": 2e-06, "loss": 0.3693, "step": 3429 }, { "epoch": 0.7957313536712678, "grad_norm": 11.487372968049451, "learning_rate": 2e-06, "loss": 0.2886, "step": 3430 }, { "epoch": 0.7959633453195685, "grad_norm": 23.592938069770618, "learning_rate": 2e-06, "loss": 0.413, "step": 3431 }, { "epoch": 0.7961953369678691, "grad_norm": 13.804563113914236, "learning_rate": 2e-06, "loss": 0.2107, "step": 3432 }, { "epoch": 0.7964273286161698, "grad_norm": 11.229386196040094, "learning_rate": 2e-06, "loss": 0.3498, "step": 3433 }, { "epoch": 0.7966593202644705, "grad_norm": 11.510512004956807, "learning_rate": 2e-06, "loss": 0.2175, "step": 3434 }, { "epoch": 0.7968913119127712, "grad_norm": 16.424219271937943, "learning_rate": 2e-06, "loss": 0.2835, "step": 3435 }, { "epoch": 0.7971233035610717, "grad_norm": 11.513067182158752, "learning_rate": 2e-06, "loss": 0.2623, "step": 3436 }, { "epoch": 0.7973552952093724, "grad_norm": 9.572539910406347, "learning_rate": 2e-06, "loss": 0.1982, "step": 3437 }, { "epoch": 0.7975872868576731, "grad_norm": 17.580798746144982, "learning_rate": 2e-06, "loss": 0.2625, "step": 3438 }, { "epoch": 0.7978192785059738, "grad_norm": 12.457497591236477, "learning_rate": 2e-06, "loss": 0.2578, "step": 3439 }, { "epoch": 0.7980512701542745, "grad_norm": 14.722234583713858, "learning_rate": 2e-06, "loss": 0.3125, "step": 3440 }, { "epoch": 0.7982832618025751, "grad_norm": 8.417471859774857, "learning_rate": 2e-06, "loss": 0.279, "step": 3441 }, { "epoch": 0.7985152534508758, "grad_norm": 13.0848253183779, "learning_rate": 2e-06, "loss": 0.3323, "step": 3442 }, { "epoch": 0.7987472450991764, "grad_norm": 13.163029759028204, "learning_rate": 2e-06, "loss": 0.2724, "step": 3443 }, { "epoch": 0.7989792367474771, "grad_norm": 7.058717519417359, "learning_rate": 2e-06, "loss": 0.157, "step": 3444 }, { "epoch": 0.7992112283957777, "grad_norm": 15.644201443704182, "learning_rate": 2e-06, "loss": 0.3231, "step": 3445 }, { "epoch": 0.7994432200440784, "grad_norm": 16.464898783661045, "learning_rate": 2e-06, "loss": 0.3016, "step": 3446 }, { "epoch": 0.7996752116923791, "grad_norm": 13.152454899489452, "learning_rate": 2e-06, "loss": 0.2524, "step": 3447 }, { "epoch": 0.7999072033406798, "grad_norm": 16.487316256533344, "learning_rate": 2e-06, "loss": 0.3709, "step": 3448 }, { "epoch": 0.8001391949889805, "grad_norm": 9.283139468973133, "learning_rate": 2e-06, "loss": 0.2256, "step": 3449 }, { "epoch": 0.800371186637281, "grad_norm": 18.27553759166784, "learning_rate": 2e-06, "loss": 0.309, "step": 3450 }, { "epoch": 0.8006031782855817, "grad_norm": 17.61707193996193, "learning_rate": 2e-06, "loss": 0.2662, "step": 3451 }, { "epoch": 0.8008351699338824, "grad_norm": 13.457161435100732, "learning_rate": 2e-06, "loss": 0.2381, "step": 3452 }, { "epoch": 0.801067161582183, "grad_norm": 13.991405109514043, "learning_rate": 2e-06, "loss": 0.2798, "step": 3453 }, { "epoch": 0.8012991532304837, "grad_norm": 18.77836921343787, "learning_rate": 2e-06, "loss": 0.3454, "step": 3454 }, { "epoch": 0.8015311448787844, "grad_norm": 24.73440836115384, "learning_rate": 2e-06, "loss": 0.3345, "step": 3455 }, { "epoch": 0.801763136527085, "grad_norm": 19.19064961801089, "learning_rate": 2e-06, "loss": 0.2585, "step": 3456 }, { "epoch": 0.8019951281753857, "grad_norm": 10.14125178150145, "learning_rate": 2e-06, "loss": 0.2333, "step": 3457 }, { "epoch": 0.8022271198236863, "grad_norm": 7.075252080603709, "learning_rate": 2e-06, "loss": 0.1803, "step": 3458 }, { "epoch": 0.802459111471987, "grad_norm": 15.228216202553677, "learning_rate": 2e-06, "loss": 0.2749, "step": 3459 }, { "epoch": 0.8026911031202877, "grad_norm": 15.90276123435102, "learning_rate": 2e-06, "loss": 0.3938, "step": 3460 }, { "epoch": 0.8029230947685884, "grad_norm": 12.67302214729661, "learning_rate": 2e-06, "loss": 0.2562, "step": 3461 }, { "epoch": 0.803155086416889, "grad_norm": 14.535054771013746, "learning_rate": 2e-06, "loss": 0.3209, "step": 3462 }, { "epoch": 0.8033870780651896, "grad_norm": 10.919756951774275, "learning_rate": 2e-06, "loss": 0.2377, "step": 3463 }, { "epoch": 0.8036190697134903, "grad_norm": 14.211186237722966, "learning_rate": 2e-06, "loss": 0.303, "step": 3464 }, { "epoch": 0.803851061361791, "grad_norm": 10.324439146983549, "learning_rate": 2e-06, "loss": 0.2724, "step": 3465 }, { "epoch": 0.8040830530100916, "grad_norm": 18.212276946809332, "learning_rate": 2e-06, "loss": 0.2857, "step": 3466 }, { "epoch": 0.8043150446583923, "grad_norm": 9.08491619843411, "learning_rate": 2e-06, "loss": 0.3044, "step": 3467 }, { "epoch": 0.804547036306693, "grad_norm": 13.555167646674509, "learning_rate": 2e-06, "loss": 0.3019, "step": 3468 }, { "epoch": 0.8047790279549937, "grad_norm": 14.173546709054419, "learning_rate": 2e-06, "loss": 0.2405, "step": 3469 }, { "epoch": 0.8050110196032942, "grad_norm": 13.906666800937307, "learning_rate": 2e-06, "loss": 0.293, "step": 3470 }, { "epoch": 0.8052430112515949, "grad_norm": 20.61968237180272, "learning_rate": 2e-06, "loss": 0.3068, "step": 3471 }, { "epoch": 0.8054750028998956, "grad_norm": 7.016349103407426, "learning_rate": 2e-06, "loss": 0.2895, "step": 3472 }, { "epoch": 0.8057069945481963, "grad_norm": 9.27839396191916, "learning_rate": 2e-06, "loss": 0.259, "step": 3473 }, { "epoch": 0.805938986196497, "grad_norm": 9.333666595921253, "learning_rate": 2e-06, "loss": 0.2843, "step": 3474 }, { "epoch": 0.8061709778447976, "grad_norm": 12.925200076949773, "learning_rate": 2e-06, "loss": 0.2524, "step": 3475 }, { "epoch": 0.8064029694930982, "grad_norm": 23.82557788438642, "learning_rate": 2e-06, "loss": 0.3742, "step": 3476 }, { "epoch": 0.8066349611413989, "grad_norm": 12.30874586445278, "learning_rate": 2e-06, "loss": 0.2475, "step": 3477 }, { "epoch": 0.8068669527896996, "grad_norm": 17.45344333498659, "learning_rate": 2e-06, "loss": 0.2469, "step": 3478 }, { "epoch": 0.8070989444380002, "grad_norm": 20.44619451931843, "learning_rate": 2e-06, "loss": 0.4079, "step": 3479 }, { "epoch": 0.8073309360863009, "grad_norm": 16.5515046014371, "learning_rate": 2e-06, "loss": 0.3359, "step": 3480 }, { "epoch": 0.8075629277346016, "grad_norm": 24.519338165710717, "learning_rate": 2e-06, "loss": 0.3352, "step": 3481 }, { "epoch": 0.8077949193829023, "grad_norm": 14.495647774820247, "learning_rate": 2e-06, "loss": 0.2525, "step": 3482 }, { "epoch": 0.8080269110312028, "grad_norm": 35.31943301373369, "learning_rate": 2e-06, "loss": 0.5115, "step": 3483 }, { "epoch": 0.8082589026795035, "grad_norm": 13.811477132735103, "learning_rate": 2e-06, "loss": 0.3339, "step": 3484 }, { "epoch": 0.8084908943278042, "grad_norm": 11.395040916060081, "learning_rate": 2e-06, "loss": 0.3566, "step": 3485 }, { "epoch": 0.8087228859761049, "grad_norm": 6.689994663714095, "learning_rate": 2e-06, "loss": 0.2268, "step": 3486 }, { "epoch": 0.8089548776244055, "grad_norm": 7.131946165209261, "learning_rate": 2e-06, "loss": 0.2644, "step": 3487 }, { "epoch": 0.8091868692727062, "grad_norm": 13.33439706490419, "learning_rate": 2e-06, "loss": 0.3359, "step": 3488 }, { "epoch": 0.8094188609210068, "grad_norm": 7.313970577613089, "learning_rate": 2e-06, "loss": 0.2157, "step": 3489 }, { "epoch": 0.8096508525693075, "grad_norm": 8.979295973783886, "learning_rate": 2e-06, "loss": 0.2594, "step": 3490 }, { "epoch": 0.8098828442176081, "grad_norm": 7.810909631076036, "learning_rate": 2e-06, "loss": 0.2591, "step": 3491 }, { "epoch": 0.8101148358659088, "grad_norm": 12.246774385443148, "learning_rate": 2e-06, "loss": 0.3403, "step": 3492 }, { "epoch": 0.8103468275142095, "grad_norm": 9.947901848029089, "learning_rate": 2e-06, "loss": 0.2852, "step": 3493 }, { "epoch": 0.8105788191625102, "grad_norm": 11.094416640136869, "learning_rate": 2e-06, "loss": 0.2473, "step": 3494 }, { "epoch": 0.8108108108108109, "grad_norm": 11.774081114699563, "learning_rate": 2e-06, "loss": 0.3085, "step": 3495 }, { "epoch": 0.8110428024591114, "grad_norm": 10.698679440962588, "learning_rate": 2e-06, "loss": 0.2418, "step": 3496 }, { "epoch": 0.8112747941074121, "grad_norm": 15.764409636004656, "learning_rate": 2e-06, "loss": 0.2487, "step": 3497 }, { "epoch": 0.8115067857557128, "grad_norm": 15.765164065297412, "learning_rate": 2e-06, "loss": 0.3738, "step": 3498 }, { "epoch": 0.8117387774040135, "grad_norm": 13.187289856086876, "learning_rate": 2e-06, "loss": 0.3151, "step": 3499 }, { "epoch": 0.8119707690523141, "grad_norm": 13.801830481066148, "learning_rate": 2e-06, "loss": 0.2247, "step": 3500 }, { "epoch": 0.8122027607006148, "grad_norm": 11.524118770924476, "learning_rate": 2e-06, "loss": 0.2742, "step": 3501 }, { "epoch": 0.8124347523489155, "grad_norm": 20.522591212945343, "learning_rate": 2e-06, "loss": 0.3449, "step": 3502 }, { "epoch": 0.8126667439972161, "grad_norm": 11.509315641920494, "learning_rate": 2e-06, "loss": 0.2796, "step": 3503 }, { "epoch": 0.8128987356455167, "grad_norm": 5.488506283368135, "learning_rate": 2e-06, "loss": 0.1703, "step": 3504 }, { "epoch": 0.8131307272938174, "grad_norm": 11.409597576924604, "learning_rate": 2e-06, "loss": 0.2738, "step": 3505 }, { "epoch": 0.8133627189421181, "grad_norm": 14.455097672576871, "learning_rate": 2e-06, "loss": 0.3249, "step": 3506 }, { "epoch": 0.8135947105904188, "grad_norm": 15.071206169953461, "learning_rate": 2e-06, "loss": 0.3254, "step": 3507 }, { "epoch": 0.8138267022387194, "grad_norm": 10.482927998182806, "learning_rate": 2e-06, "loss": 0.2376, "step": 3508 }, { "epoch": 0.81405869388702, "grad_norm": 25.693406081649588, "learning_rate": 2e-06, "loss": 0.3781, "step": 3509 }, { "epoch": 0.8142906855353207, "grad_norm": 13.608869586175004, "learning_rate": 2e-06, "loss": 0.2643, "step": 3510 }, { "epoch": 0.8145226771836214, "grad_norm": 12.004444029483697, "learning_rate": 2e-06, "loss": 0.2325, "step": 3511 }, { "epoch": 0.814754668831922, "grad_norm": 12.940201273993516, "learning_rate": 2e-06, "loss": 0.2498, "step": 3512 }, { "epoch": 0.8149866604802227, "grad_norm": 18.105886390268846, "learning_rate": 2e-06, "loss": 0.2966, "step": 3513 }, { "epoch": 0.8152186521285234, "grad_norm": 9.755817402936005, "learning_rate": 2e-06, "loss": 0.2896, "step": 3514 }, { "epoch": 0.8154506437768241, "grad_norm": 20.685876534208933, "learning_rate": 2e-06, "loss": 0.2989, "step": 3515 }, { "epoch": 0.8156826354251246, "grad_norm": 10.772642056094304, "learning_rate": 2e-06, "loss": 0.2951, "step": 3516 }, { "epoch": 0.8159146270734253, "grad_norm": 22.421591893301528, "learning_rate": 2e-06, "loss": 0.3378, "step": 3517 }, { "epoch": 0.816146618721726, "grad_norm": 15.838181455870886, "learning_rate": 2e-06, "loss": 0.3841, "step": 3518 }, { "epoch": 0.8163786103700267, "grad_norm": 10.795058033101041, "learning_rate": 2e-06, "loss": 0.2946, "step": 3519 }, { "epoch": 0.8166106020183274, "grad_norm": 10.703899141780543, "learning_rate": 2e-06, "loss": 0.3325, "step": 3520 }, { "epoch": 0.816842593666628, "grad_norm": 8.945582349508102, "learning_rate": 2e-06, "loss": 0.2288, "step": 3521 }, { "epoch": 0.8170745853149287, "grad_norm": 16.457137415765683, "learning_rate": 2e-06, "loss": 0.3724, "step": 3522 }, { "epoch": 0.8173065769632293, "grad_norm": 14.915024776601184, "learning_rate": 2e-06, "loss": 0.2437, "step": 3523 }, { "epoch": 0.81753856861153, "grad_norm": 19.130665774689742, "learning_rate": 2e-06, "loss": 0.42, "step": 3524 }, { "epoch": 0.8177705602598306, "grad_norm": 11.793326568443652, "learning_rate": 2e-06, "loss": 0.2295, "step": 3525 }, { "epoch": 0.8180025519081313, "grad_norm": 6.473724516177291, "learning_rate": 2e-06, "loss": 0.1777, "step": 3526 }, { "epoch": 0.818234543556432, "grad_norm": 13.25425715941778, "learning_rate": 2e-06, "loss": 0.26, "step": 3527 }, { "epoch": 0.8184665352047327, "grad_norm": 10.245529294799168, "learning_rate": 2e-06, "loss": 0.3425, "step": 3528 }, { "epoch": 0.8186985268530332, "grad_norm": 6.669639477632707, "learning_rate": 2e-06, "loss": 0.2318, "step": 3529 }, { "epoch": 0.8189305185013339, "grad_norm": 18.227102749489706, "learning_rate": 2e-06, "loss": 0.2112, "step": 3530 }, { "epoch": 0.8191625101496346, "grad_norm": 9.03996463876371, "learning_rate": 2e-06, "loss": 0.296, "step": 3531 }, { "epoch": 0.8193945017979353, "grad_norm": 14.128603749034852, "learning_rate": 2e-06, "loss": 0.228, "step": 3532 }, { "epoch": 0.819626493446236, "grad_norm": 19.581050800333305, "learning_rate": 2e-06, "loss": 0.2736, "step": 3533 }, { "epoch": 0.8198584850945366, "grad_norm": 10.830024818169084, "learning_rate": 2e-06, "loss": 0.4177, "step": 3534 }, { "epoch": 0.8200904767428373, "grad_norm": 10.547522500539598, "learning_rate": 2e-06, "loss": 0.2547, "step": 3535 }, { "epoch": 0.8203224683911379, "grad_norm": 32.3173599240258, "learning_rate": 2e-06, "loss": 0.3983, "step": 3536 }, { "epoch": 0.8205544600394385, "grad_norm": 13.226067374404431, "learning_rate": 2e-06, "loss": 0.2506, "step": 3537 }, { "epoch": 0.8207864516877392, "grad_norm": 10.462376223619785, "learning_rate": 2e-06, "loss": 0.2232, "step": 3538 }, { "epoch": 0.8210184433360399, "grad_norm": 32.82054766950882, "learning_rate": 2e-06, "loss": 0.3654, "step": 3539 }, { "epoch": 0.8212504349843406, "grad_norm": 10.860846241032947, "learning_rate": 2e-06, "loss": 0.2525, "step": 3540 }, { "epoch": 0.8214824266326413, "grad_norm": 8.995251473292827, "learning_rate": 2e-06, "loss": 0.3209, "step": 3541 }, { "epoch": 0.8217144182809419, "grad_norm": 15.77422954469678, "learning_rate": 2e-06, "loss": 0.3464, "step": 3542 }, { "epoch": 0.8219464099292425, "grad_norm": 10.039797919380941, "learning_rate": 2e-06, "loss": 0.3015, "step": 3543 }, { "epoch": 0.8221784015775432, "grad_norm": 9.594735943914506, "learning_rate": 2e-06, "loss": 0.2629, "step": 3544 }, { "epoch": 0.8224103932258439, "grad_norm": 13.127890377106954, "learning_rate": 2e-06, "loss": 0.2683, "step": 3545 }, { "epoch": 0.8226423848741445, "grad_norm": 6.018401443740743, "learning_rate": 2e-06, "loss": 0.1822, "step": 3546 }, { "epoch": 0.8228743765224452, "grad_norm": 10.239555624879175, "learning_rate": 2e-06, "loss": 0.2981, "step": 3547 }, { "epoch": 0.8231063681707459, "grad_norm": 8.44416471941474, "learning_rate": 2e-06, "loss": 0.1853, "step": 3548 }, { "epoch": 0.8233383598190465, "grad_norm": 11.946867911578675, "learning_rate": 2e-06, "loss": 0.2552, "step": 3549 }, { "epoch": 0.8235703514673471, "grad_norm": 17.157645834778513, "learning_rate": 2e-06, "loss": 0.3434, "step": 3550 }, { "epoch": 0.8238023431156478, "grad_norm": 15.446833564674439, "learning_rate": 2e-06, "loss": 0.2505, "step": 3551 }, { "epoch": 0.8240343347639485, "grad_norm": 16.49241567046502, "learning_rate": 2e-06, "loss": 0.2732, "step": 3552 }, { "epoch": 0.8242663264122492, "grad_norm": 15.670109551955258, "learning_rate": 2e-06, "loss": 0.3211, "step": 3553 }, { "epoch": 0.8244983180605499, "grad_norm": 16.140530370283813, "learning_rate": 2e-06, "loss": 0.2206, "step": 3554 }, { "epoch": 0.8247303097088505, "grad_norm": 10.511695486802429, "learning_rate": 2e-06, "loss": 0.2428, "step": 3555 }, { "epoch": 0.8249623013571511, "grad_norm": 10.306828334456238, "learning_rate": 2e-06, "loss": 0.2865, "step": 3556 }, { "epoch": 0.8251942930054518, "grad_norm": 15.412563379487153, "learning_rate": 2e-06, "loss": 0.2525, "step": 3557 }, { "epoch": 0.8254262846537525, "grad_norm": 14.469061271601475, "learning_rate": 2e-06, "loss": 0.3926, "step": 3558 }, { "epoch": 0.8256582763020531, "grad_norm": 12.314557820247007, "learning_rate": 2e-06, "loss": 0.285, "step": 3559 }, { "epoch": 0.8258902679503538, "grad_norm": 20.344101930227662, "learning_rate": 2e-06, "loss": 0.2161, "step": 3560 }, { "epoch": 0.8261222595986545, "grad_norm": 14.321188193323712, "learning_rate": 2e-06, "loss": 0.282, "step": 3561 }, { "epoch": 0.8263542512469552, "grad_norm": 10.369535133540206, "learning_rate": 2e-06, "loss": 0.2756, "step": 3562 }, { "epoch": 0.8265862428952557, "grad_norm": 14.405162531020206, "learning_rate": 2e-06, "loss": 0.2895, "step": 3563 }, { "epoch": 0.8268182345435564, "grad_norm": 27.51068067463199, "learning_rate": 2e-06, "loss": 0.3162, "step": 3564 }, { "epoch": 0.8270502261918571, "grad_norm": 9.47044995543265, "learning_rate": 2e-06, "loss": 0.1824, "step": 3565 }, { "epoch": 0.8272822178401578, "grad_norm": 13.374539645458363, "learning_rate": 2e-06, "loss": 0.2139, "step": 3566 }, { "epoch": 0.8275142094884584, "grad_norm": 17.735043818984096, "learning_rate": 2e-06, "loss": 0.2542, "step": 3567 }, { "epoch": 0.8277462011367591, "grad_norm": 9.242686802461979, "learning_rate": 2e-06, "loss": 0.1826, "step": 3568 }, { "epoch": 0.8279781927850597, "grad_norm": 5.677185399446114, "learning_rate": 2e-06, "loss": 0.1462, "step": 3569 }, { "epoch": 0.8282101844333604, "grad_norm": 12.844155490278478, "learning_rate": 2e-06, "loss": 0.4044, "step": 3570 }, { "epoch": 0.828442176081661, "grad_norm": 15.018609163790327, "learning_rate": 2e-06, "loss": 0.4075, "step": 3571 }, { "epoch": 0.8286741677299617, "grad_norm": 28.60480917901526, "learning_rate": 2e-06, "loss": 0.3256, "step": 3572 }, { "epoch": 0.8289061593782624, "grad_norm": 11.797828274897043, "learning_rate": 2e-06, "loss": 0.2946, "step": 3573 }, { "epoch": 0.8291381510265631, "grad_norm": 12.151158127436506, "learning_rate": 2e-06, "loss": 0.3089, "step": 3574 }, { "epoch": 0.8293701426748638, "grad_norm": 10.69731287572852, "learning_rate": 2e-06, "loss": 0.1956, "step": 3575 }, { "epoch": 0.8296021343231643, "grad_norm": 15.209091661657826, "learning_rate": 2e-06, "loss": 0.3467, "step": 3576 }, { "epoch": 0.829834125971465, "grad_norm": 9.48025969073124, "learning_rate": 2e-06, "loss": 0.3034, "step": 3577 }, { "epoch": 0.8300661176197657, "grad_norm": 22.156690040220127, "learning_rate": 2e-06, "loss": 0.2798, "step": 3578 }, { "epoch": 0.8302981092680664, "grad_norm": 17.120131547477015, "learning_rate": 2e-06, "loss": 0.4078, "step": 3579 }, { "epoch": 0.830530100916367, "grad_norm": 13.727506476081903, "learning_rate": 2e-06, "loss": 0.2804, "step": 3580 }, { "epoch": 0.8307620925646677, "grad_norm": 9.66740188676226, "learning_rate": 2e-06, "loss": 0.2391, "step": 3581 }, { "epoch": 0.8309940842129684, "grad_norm": 17.59160175368584, "learning_rate": 2e-06, "loss": 0.3679, "step": 3582 }, { "epoch": 0.831226075861269, "grad_norm": 17.56972129711204, "learning_rate": 2e-06, "loss": 0.3698, "step": 3583 }, { "epoch": 0.8314580675095696, "grad_norm": 13.197156971473062, "learning_rate": 2e-06, "loss": 0.2521, "step": 3584 }, { "epoch": 0.8316900591578703, "grad_norm": 14.553013737894982, "learning_rate": 2e-06, "loss": 0.2363, "step": 3585 }, { "epoch": 0.831922050806171, "grad_norm": 8.26424690417724, "learning_rate": 2e-06, "loss": 0.1732, "step": 3586 }, { "epoch": 0.8321540424544717, "grad_norm": 16.971467765915932, "learning_rate": 2e-06, "loss": 0.2755, "step": 3587 }, { "epoch": 0.8323860341027723, "grad_norm": 9.547199083280193, "learning_rate": 2e-06, "loss": 0.2821, "step": 3588 }, { "epoch": 0.8326180257510729, "grad_norm": 8.19466444893716, "learning_rate": 2e-06, "loss": 0.1939, "step": 3589 }, { "epoch": 0.8328500173993736, "grad_norm": 13.773792857129889, "learning_rate": 2e-06, "loss": 0.2627, "step": 3590 }, { "epoch": 0.8330820090476743, "grad_norm": 25.164178842438982, "learning_rate": 2e-06, "loss": 0.1979, "step": 3591 }, { "epoch": 0.8333140006959749, "grad_norm": 19.67131566725128, "learning_rate": 2e-06, "loss": 0.4116, "step": 3592 }, { "epoch": 0.8335459923442756, "grad_norm": 11.797629439099808, "learning_rate": 2e-06, "loss": 0.2482, "step": 3593 }, { "epoch": 0.8337779839925763, "grad_norm": 13.672341117810163, "learning_rate": 2e-06, "loss": 0.26, "step": 3594 }, { "epoch": 0.834009975640877, "grad_norm": 13.89021157924789, "learning_rate": 2e-06, "loss": 0.3536, "step": 3595 }, { "epoch": 0.8342419672891775, "grad_norm": 12.734349467657891, "learning_rate": 2e-06, "loss": 0.3681, "step": 3596 }, { "epoch": 0.8344739589374782, "grad_norm": 18.167801879618423, "learning_rate": 2e-06, "loss": 0.2395, "step": 3597 }, { "epoch": 0.8347059505857789, "grad_norm": 15.522534985390315, "learning_rate": 2e-06, "loss": 0.3314, "step": 3598 }, { "epoch": 0.8349379422340796, "grad_norm": 14.490683555228062, "learning_rate": 2e-06, "loss": 0.2978, "step": 3599 }, { "epoch": 0.8351699338823803, "grad_norm": 29.838603940900047, "learning_rate": 2e-06, "loss": 0.3134, "step": 3600 }, { "epoch": 0.8354019255306809, "grad_norm": 13.116967226198804, "learning_rate": 2e-06, "loss": 0.2744, "step": 3601 }, { "epoch": 0.8356339171789816, "grad_norm": 25.33370211615091, "learning_rate": 2e-06, "loss": 0.4356, "step": 3602 }, { "epoch": 0.8358659088272822, "grad_norm": 16.302885312128048, "learning_rate": 2e-06, "loss": 0.3364, "step": 3603 }, { "epoch": 0.8360979004755829, "grad_norm": 12.653664618715865, "learning_rate": 2e-06, "loss": 0.2252, "step": 3604 }, { "epoch": 0.8363298921238835, "grad_norm": 11.957415500851111, "learning_rate": 2e-06, "loss": 0.2408, "step": 3605 }, { "epoch": 0.8365618837721842, "grad_norm": 26.471764142537726, "learning_rate": 2e-06, "loss": 0.3768, "step": 3606 }, { "epoch": 0.8367938754204849, "grad_norm": 17.280954916011552, "learning_rate": 2e-06, "loss": 0.3322, "step": 3607 }, { "epoch": 0.8370258670687856, "grad_norm": 12.912623624553863, "learning_rate": 2e-06, "loss": 0.2971, "step": 3608 }, { "epoch": 0.8372578587170861, "grad_norm": 33.409709385748855, "learning_rate": 2e-06, "loss": 0.3724, "step": 3609 }, { "epoch": 0.8374898503653868, "grad_norm": 13.12016105283151, "learning_rate": 2e-06, "loss": 0.3083, "step": 3610 }, { "epoch": 0.8377218420136875, "grad_norm": 21.16503577999807, "learning_rate": 2e-06, "loss": 0.4303, "step": 3611 }, { "epoch": 0.8379538336619882, "grad_norm": 10.40878601387677, "learning_rate": 2e-06, "loss": 0.2138, "step": 3612 }, { "epoch": 0.8381858253102888, "grad_norm": 19.2833065138601, "learning_rate": 2e-06, "loss": 0.269, "step": 3613 }, { "epoch": 0.8384178169585895, "grad_norm": 17.850717804608752, "learning_rate": 2e-06, "loss": 0.3181, "step": 3614 }, { "epoch": 0.8386498086068902, "grad_norm": 13.76177943878717, "learning_rate": 2e-06, "loss": 0.2724, "step": 3615 }, { "epoch": 0.8388818002551908, "grad_norm": 13.036284766357339, "learning_rate": 2e-06, "loss": 0.2476, "step": 3616 }, { "epoch": 0.8391137919034914, "grad_norm": 17.38148826319962, "learning_rate": 2e-06, "loss": 0.2757, "step": 3617 }, { "epoch": 0.8393457835517921, "grad_norm": 10.903695405275533, "learning_rate": 2e-06, "loss": 0.2743, "step": 3618 }, { "epoch": 0.8395777752000928, "grad_norm": 11.87283401552707, "learning_rate": 2e-06, "loss": 0.292, "step": 3619 }, { "epoch": 0.8398097668483935, "grad_norm": 16.771758918256218, "learning_rate": 2e-06, "loss": 0.3851, "step": 3620 }, { "epoch": 0.8400417584966942, "grad_norm": 13.719924046998493, "learning_rate": 2e-06, "loss": 0.2639, "step": 3621 }, { "epoch": 0.8402737501449948, "grad_norm": 12.03787222608202, "learning_rate": 2e-06, "loss": 0.2875, "step": 3622 }, { "epoch": 0.8405057417932954, "grad_norm": 12.36045663543364, "learning_rate": 2e-06, "loss": 0.199, "step": 3623 }, { "epoch": 0.8407377334415961, "grad_norm": 11.3673385044902, "learning_rate": 2e-06, "loss": 0.2191, "step": 3624 }, { "epoch": 0.8409697250898968, "grad_norm": 12.709416450778644, "learning_rate": 2e-06, "loss": 0.1901, "step": 3625 }, { "epoch": 0.8412017167381974, "grad_norm": 9.564064452472941, "learning_rate": 2e-06, "loss": 0.2829, "step": 3626 }, { "epoch": 0.8414337083864981, "grad_norm": 11.667053315828442, "learning_rate": 2e-06, "loss": 0.3535, "step": 3627 }, { "epoch": 0.8416657000347988, "grad_norm": 18.12528031498164, "learning_rate": 2e-06, "loss": 0.2946, "step": 3628 }, { "epoch": 0.8418976916830994, "grad_norm": 23.08275506339047, "learning_rate": 2e-06, "loss": 0.4012, "step": 3629 }, { "epoch": 0.8421296833314, "grad_norm": 9.646394367795933, "learning_rate": 2e-06, "loss": 0.2934, "step": 3630 }, { "epoch": 0.8423616749797007, "grad_norm": 20.793361796856317, "learning_rate": 2e-06, "loss": 0.3444, "step": 3631 }, { "epoch": 0.8425936666280014, "grad_norm": 14.113988911611878, "learning_rate": 2e-06, "loss": 0.2877, "step": 3632 }, { "epoch": 0.8428256582763021, "grad_norm": 8.786341654974642, "learning_rate": 2e-06, "loss": 0.2987, "step": 3633 }, { "epoch": 0.8430576499246027, "grad_norm": 18.030693899144588, "learning_rate": 2e-06, "loss": 0.3672, "step": 3634 }, { "epoch": 0.8432896415729034, "grad_norm": 13.25992214773615, "learning_rate": 2e-06, "loss": 0.3816, "step": 3635 }, { "epoch": 0.843521633221204, "grad_norm": 12.693332638873756, "learning_rate": 2e-06, "loss": 0.2074, "step": 3636 }, { "epoch": 0.8437536248695047, "grad_norm": 17.1773251818126, "learning_rate": 2e-06, "loss": 0.3135, "step": 3637 }, { "epoch": 0.8439856165178053, "grad_norm": 16.218314383885915, "learning_rate": 2e-06, "loss": 0.288, "step": 3638 }, { "epoch": 0.844217608166106, "grad_norm": 16.041706947386142, "learning_rate": 2e-06, "loss": 0.2734, "step": 3639 }, { "epoch": 0.8444495998144067, "grad_norm": 9.535551188493375, "learning_rate": 2e-06, "loss": 0.248, "step": 3640 }, { "epoch": 0.8446815914627074, "grad_norm": 18.716928330818007, "learning_rate": 2e-06, "loss": 0.3895, "step": 3641 }, { "epoch": 0.844913583111008, "grad_norm": 11.504353512182753, "learning_rate": 2e-06, "loss": 0.2513, "step": 3642 }, { "epoch": 0.8451455747593086, "grad_norm": 18.269519731683634, "learning_rate": 2e-06, "loss": 0.303, "step": 3643 }, { "epoch": 0.8453775664076093, "grad_norm": 15.2935008617942, "learning_rate": 2e-06, "loss": 0.349, "step": 3644 }, { "epoch": 0.84560955805591, "grad_norm": 11.958395285026274, "learning_rate": 2e-06, "loss": 0.2466, "step": 3645 }, { "epoch": 0.8458415497042107, "grad_norm": 11.5462305491056, "learning_rate": 2e-06, "loss": 0.2421, "step": 3646 }, { "epoch": 0.8460735413525113, "grad_norm": 9.021273192696786, "learning_rate": 2e-06, "loss": 0.2478, "step": 3647 }, { "epoch": 0.846305533000812, "grad_norm": 18.029921292402488, "learning_rate": 2e-06, "loss": 0.3454, "step": 3648 }, { "epoch": 0.8465375246491126, "grad_norm": 9.117078760672142, "learning_rate": 2e-06, "loss": 0.256, "step": 3649 }, { "epoch": 0.8467695162974133, "grad_norm": 8.027104481169884, "learning_rate": 2e-06, "loss": 0.2244, "step": 3650 }, { "epoch": 0.8470015079457139, "grad_norm": 18.36707474383036, "learning_rate": 2e-06, "loss": 0.3189, "step": 3651 }, { "epoch": 0.8472334995940146, "grad_norm": 11.373520346001401, "learning_rate": 2e-06, "loss": 0.2491, "step": 3652 }, { "epoch": 0.8474654912423153, "grad_norm": 9.023667037281498, "learning_rate": 2e-06, "loss": 0.236, "step": 3653 }, { "epoch": 0.847697482890616, "grad_norm": 12.47557691705421, "learning_rate": 2e-06, "loss": 0.3199, "step": 3654 }, { "epoch": 0.8479294745389166, "grad_norm": 11.849709139019977, "learning_rate": 2e-06, "loss": 0.312, "step": 3655 }, { "epoch": 0.8481614661872172, "grad_norm": 16.08275240994812, "learning_rate": 2e-06, "loss": 0.2989, "step": 3656 }, { "epoch": 0.8483934578355179, "grad_norm": 15.974269786013867, "learning_rate": 2e-06, "loss": 0.3295, "step": 3657 }, { "epoch": 0.8486254494838186, "grad_norm": 12.325930745153997, "learning_rate": 2e-06, "loss": 0.2483, "step": 3658 }, { "epoch": 0.8488574411321193, "grad_norm": 15.839887071663428, "learning_rate": 2e-06, "loss": 0.31, "step": 3659 }, { "epoch": 0.8490894327804199, "grad_norm": 15.718046583011358, "learning_rate": 2e-06, "loss": 0.3434, "step": 3660 }, { "epoch": 0.8493214244287206, "grad_norm": 21.360025144175662, "learning_rate": 2e-06, "loss": 0.3237, "step": 3661 }, { "epoch": 0.8495534160770212, "grad_norm": 21.407456834013093, "learning_rate": 2e-06, "loss": 0.3511, "step": 3662 }, { "epoch": 0.8497854077253219, "grad_norm": 17.516957487751252, "learning_rate": 2e-06, "loss": 0.2002, "step": 3663 }, { "epoch": 0.8500173993736225, "grad_norm": 10.00063509585583, "learning_rate": 2e-06, "loss": 0.3101, "step": 3664 }, { "epoch": 0.8502493910219232, "grad_norm": 16.61146615224927, "learning_rate": 2e-06, "loss": 0.2991, "step": 3665 }, { "epoch": 0.8504813826702239, "grad_norm": 14.226566986205597, "learning_rate": 2e-06, "loss": 0.217, "step": 3666 }, { "epoch": 0.8507133743185246, "grad_norm": 13.941877038153388, "learning_rate": 2e-06, "loss": 0.4054, "step": 3667 }, { "epoch": 0.8509453659668252, "grad_norm": 20.923056394448633, "learning_rate": 2e-06, "loss": 0.2737, "step": 3668 }, { "epoch": 0.8511773576151258, "grad_norm": 11.327595266683401, "learning_rate": 2e-06, "loss": 0.2025, "step": 3669 }, { "epoch": 0.8514093492634265, "grad_norm": 13.082863132805368, "learning_rate": 2e-06, "loss": 0.2532, "step": 3670 }, { "epoch": 0.8516413409117272, "grad_norm": 11.390712446630442, "learning_rate": 2e-06, "loss": 0.254, "step": 3671 }, { "epoch": 0.8518733325600278, "grad_norm": 21.27549243664026, "learning_rate": 2e-06, "loss": 0.3924, "step": 3672 }, { "epoch": 0.8521053242083285, "grad_norm": 21.3979323864136, "learning_rate": 2e-06, "loss": 0.2268, "step": 3673 }, { "epoch": 0.8523373158566292, "grad_norm": 8.612003270380857, "learning_rate": 2e-06, "loss": 0.1924, "step": 3674 }, { "epoch": 0.8525693075049299, "grad_norm": 19.820763966955873, "learning_rate": 2e-06, "loss": 0.4264, "step": 3675 }, { "epoch": 0.8528012991532304, "grad_norm": 20.045730155740298, "learning_rate": 2e-06, "loss": 0.3346, "step": 3676 }, { "epoch": 0.8530332908015311, "grad_norm": 13.650897189920228, "learning_rate": 2e-06, "loss": 0.3416, "step": 3677 }, { "epoch": 0.8532652824498318, "grad_norm": 10.14256766403005, "learning_rate": 2e-06, "loss": 0.2986, "step": 3678 }, { "epoch": 0.8534972740981325, "grad_norm": 10.037657539343313, "learning_rate": 2e-06, "loss": 0.2429, "step": 3679 }, { "epoch": 0.8537292657464332, "grad_norm": 12.66315273931505, "learning_rate": 2e-06, "loss": 0.2382, "step": 3680 }, { "epoch": 0.8539612573947338, "grad_norm": 9.312231073579792, "learning_rate": 2e-06, "loss": 0.2034, "step": 3681 }, { "epoch": 0.8541932490430344, "grad_norm": 8.761478461055901, "learning_rate": 2e-06, "loss": 0.216, "step": 3682 }, { "epoch": 0.8544252406913351, "grad_norm": 13.743741778322043, "learning_rate": 2e-06, "loss": 0.2603, "step": 3683 }, { "epoch": 0.8546572323396358, "grad_norm": 8.56315291962295, "learning_rate": 2e-06, "loss": 0.1987, "step": 3684 }, { "epoch": 0.8548892239879364, "grad_norm": 9.797116980529367, "learning_rate": 2e-06, "loss": 0.3647, "step": 3685 }, { "epoch": 0.8551212156362371, "grad_norm": 13.622610342375989, "learning_rate": 2e-06, "loss": 0.2278, "step": 3686 }, { "epoch": 0.8553532072845378, "grad_norm": 9.724405842358887, "learning_rate": 2e-06, "loss": 0.2339, "step": 3687 }, { "epoch": 0.8555851989328385, "grad_norm": 9.427475360484458, "learning_rate": 2e-06, "loss": 0.1833, "step": 3688 }, { "epoch": 0.855817190581139, "grad_norm": 23.056746456756965, "learning_rate": 2e-06, "loss": 0.3746, "step": 3689 }, { "epoch": 0.8560491822294397, "grad_norm": 8.420394491891004, "learning_rate": 2e-06, "loss": 0.2763, "step": 3690 }, { "epoch": 0.8562811738777404, "grad_norm": 17.380813276857875, "learning_rate": 2e-06, "loss": 0.2846, "step": 3691 }, { "epoch": 0.8565131655260411, "grad_norm": 13.749892547660071, "learning_rate": 2e-06, "loss": 0.2235, "step": 3692 }, { "epoch": 0.8567451571743417, "grad_norm": 13.50948624508527, "learning_rate": 2e-06, "loss": 0.3027, "step": 3693 }, { "epoch": 0.8569771488226424, "grad_norm": 16.78402828019823, "learning_rate": 2e-06, "loss": 0.361, "step": 3694 }, { "epoch": 0.8572091404709431, "grad_norm": 10.7775988342208, "learning_rate": 2e-06, "loss": 0.2344, "step": 3695 }, { "epoch": 0.8574411321192437, "grad_norm": 17.91160614351676, "learning_rate": 2e-06, "loss": 0.4287, "step": 3696 }, { "epoch": 0.8576731237675443, "grad_norm": 10.208952391466227, "learning_rate": 2e-06, "loss": 0.1963, "step": 3697 }, { "epoch": 0.857905115415845, "grad_norm": 12.492443606261487, "learning_rate": 2e-06, "loss": 0.1823, "step": 3698 }, { "epoch": 0.8581371070641457, "grad_norm": 7.964969511028492, "learning_rate": 2e-06, "loss": 0.1687, "step": 3699 }, { "epoch": 0.8583690987124464, "grad_norm": 7.738033256851267, "learning_rate": 2e-06, "loss": 0.1877, "step": 3700 }, { "epoch": 0.858601090360747, "grad_norm": 13.255320400154975, "learning_rate": 2e-06, "loss": 0.3013, "step": 3701 }, { "epoch": 0.8588330820090476, "grad_norm": 12.671403201642306, "learning_rate": 2e-06, "loss": 0.2109, "step": 3702 }, { "epoch": 0.8590650736573483, "grad_norm": 11.814382103135062, "learning_rate": 2e-06, "loss": 0.1372, "step": 3703 }, { "epoch": 0.859297065305649, "grad_norm": 13.982761604963033, "learning_rate": 2e-06, "loss": 0.32, "step": 3704 }, { "epoch": 0.8595290569539497, "grad_norm": 14.133755063373435, "learning_rate": 2e-06, "loss": 0.2347, "step": 3705 }, { "epoch": 0.8597610486022503, "grad_norm": 8.462385181045686, "learning_rate": 2e-06, "loss": 0.2493, "step": 3706 }, { "epoch": 0.859993040250551, "grad_norm": 14.085965020035298, "learning_rate": 2e-06, "loss": 0.3511, "step": 3707 }, { "epoch": 0.8602250318988517, "grad_norm": 58.65048537341041, "learning_rate": 2e-06, "loss": 0.3468, "step": 3708 }, { "epoch": 0.8604570235471523, "grad_norm": 13.616616688476421, "learning_rate": 2e-06, "loss": 0.2393, "step": 3709 }, { "epoch": 0.8606890151954529, "grad_norm": 12.441324389887878, "learning_rate": 2e-06, "loss": 0.2537, "step": 3710 }, { "epoch": 0.8609210068437536, "grad_norm": 14.305380497619064, "learning_rate": 2e-06, "loss": 0.2861, "step": 3711 }, { "epoch": 0.8611529984920543, "grad_norm": 16.613861586038418, "learning_rate": 2e-06, "loss": 0.2142, "step": 3712 }, { "epoch": 0.861384990140355, "grad_norm": 10.199542141395073, "learning_rate": 2e-06, "loss": 0.31, "step": 3713 }, { "epoch": 0.8616169817886556, "grad_norm": 12.55464480155109, "learning_rate": 2e-06, "loss": 0.1862, "step": 3714 }, { "epoch": 0.8618489734369563, "grad_norm": 16.10525020587473, "learning_rate": 2e-06, "loss": 0.3392, "step": 3715 }, { "epoch": 0.8620809650852569, "grad_norm": 13.438956564440527, "learning_rate": 2e-06, "loss": 0.2419, "step": 3716 }, { "epoch": 0.8623129567335576, "grad_norm": 9.808457425512904, "learning_rate": 2e-06, "loss": 0.2457, "step": 3717 }, { "epoch": 0.8625449483818582, "grad_norm": 14.98954005495591, "learning_rate": 2e-06, "loss": 0.2695, "step": 3718 }, { "epoch": 0.8627769400301589, "grad_norm": 14.064247216948907, "learning_rate": 2e-06, "loss": 0.2397, "step": 3719 }, { "epoch": 0.8630089316784596, "grad_norm": 31.850410637664915, "learning_rate": 2e-06, "loss": 0.5098, "step": 3720 }, { "epoch": 0.8632409233267603, "grad_norm": 27.13004682039094, "learning_rate": 2e-06, "loss": 0.2238, "step": 3721 }, { "epoch": 0.8634729149750608, "grad_norm": 12.81414870338563, "learning_rate": 2e-06, "loss": 0.28, "step": 3722 }, { "epoch": 0.8637049066233615, "grad_norm": 14.876898239075308, "learning_rate": 2e-06, "loss": 0.3276, "step": 3723 }, { "epoch": 0.8639368982716622, "grad_norm": 13.039596946514822, "learning_rate": 2e-06, "loss": 0.3539, "step": 3724 }, { "epoch": 0.8641688899199629, "grad_norm": 11.520663900550497, "learning_rate": 2e-06, "loss": 0.2403, "step": 3725 }, { "epoch": 0.8644008815682636, "grad_norm": 20.113972396436466, "learning_rate": 2e-06, "loss": 0.2682, "step": 3726 }, { "epoch": 0.8646328732165642, "grad_norm": 9.768363407171256, "learning_rate": 2e-06, "loss": 0.3367, "step": 3727 }, { "epoch": 0.8648648648648649, "grad_norm": 9.754462011322826, "learning_rate": 2e-06, "loss": 0.1976, "step": 3728 }, { "epoch": 0.8650968565131655, "grad_norm": 15.897468817722482, "learning_rate": 2e-06, "loss": 0.3516, "step": 3729 }, { "epoch": 0.8653288481614662, "grad_norm": 12.539672362894047, "learning_rate": 2e-06, "loss": 0.2137, "step": 3730 }, { "epoch": 0.8655608398097668, "grad_norm": 22.675675973677986, "learning_rate": 2e-06, "loss": 0.3773, "step": 3731 }, { "epoch": 0.8657928314580675, "grad_norm": 21.084484439088442, "learning_rate": 2e-06, "loss": 0.2725, "step": 3732 }, { "epoch": 0.8660248231063682, "grad_norm": 12.945612595748198, "learning_rate": 2e-06, "loss": 0.2171, "step": 3733 }, { "epoch": 0.8662568147546689, "grad_norm": 10.168197656317433, "learning_rate": 2e-06, "loss": 0.2676, "step": 3734 }, { "epoch": 0.8664888064029695, "grad_norm": 12.095214911594292, "learning_rate": 2e-06, "loss": 0.2778, "step": 3735 }, { "epoch": 0.8667207980512701, "grad_norm": 17.38157046620352, "learning_rate": 2e-06, "loss": 0.3717, "step": 3736 }, { "epoch": 0.8669527896995708, "grad_norm": 21.62919673063253, "learning_rate": 2e-06, "loss": 0.2156, "step": 3737 }, { "epoch": 0.8671847813478715, "grad_norm": 11.91250814736637, "learning_rate": 2e-06, "loss": 0.2699, "step": 3738 }, { "epoch": 0.8674167729961721, "grad_norm": 14.675029900401139, "learning_rate": 2e-06, "loss": 0.1935, "step": 3739 }, { "epoch": 0.8676487646444728, "grad_norm": 15.028454890183673, "learning_rate": 2e-06, "loss": 0.2753, "step": 3740 }, { "epoch": 0.8678807562927735, "grad_norm": 12.685941107430299, "learning_rate": 2e-06, "loss": 0.3368, "step": 3741 }, { "epoch": 0.8681127479410741, "grad_norm": 14.086959911298255, "learning_rate": 2e-06, "loss": 0.3123, "step": 3742 }, { "epoch": 0.8683447395893747, "grad_norm": 29.59069253850969, "learning_rate": 2e-06, "loss": 0.3462, "step": 3743 }, { "epoch": 0.8685767312376754, "grad_norm": 15.70500076917416, "learning_rate": 2e-06, "loss": 0.2672, "step": 3744 }, { "epoch": 0.8688087228859761, "grad_norm": 10.885835318229043, "learning_rate": 2e-06, "loss": 0.2552, "step": 3745 }, { "epoch": 0.8690407145342768, "grad_norm": 36.85881948138301, "learning_rate": 2e-06, "loss": 0.3901, "step": 3746 }, { "epoch": 0.8692727061825775, "grad_norm": 16.53745340862496, "learning_rate": 2e-06, "loss": 0.2888, "step": 3747 }, { "epoch": 0.8695046978308781, "grad_norm": 16.261524994880595, "learning_rate": 2e-06, "loss": 0.4434, "step": 3748 }, { "epoch": 0.8697366894791787, "grad_norm": 15.966881686175501, "learning_rate": 2e-06, "loss": 0.2976, "step": 3749 }, { "epoch": 0.8699686811274794, "grad_norm": 24.370815496080688, "learning_rate": 2e-06, "loss": 0.3686, "step": 3750 }, { "epoch": 0.8702006727757801, "grad_norm": 14.66934237706017, "learning_rate": 2e-06, "loss": 0.2677, "step": 3751 }, { "epoch": 0.8704326644240807, "grad_norm": 13.86784165636144, "learning_rate": 2e-06, "loss": 0.2662, "step": 3752 }, { "epoch": 0.8706646560723814, "grad_norm": 22.526612910044197, "learning_rate": 2e-06, "loss": 0.3681, "step": 3753 }, { "epoch": 0.8708966477206821, "grad_norm": 10.304405082274162, "learning_rate": 2e-06, "loss": 0.3004, "step": 3754 }, { "epoch": 0.8711286393689828, "grad_norm": 19.197447189792538, "learning_rate": 2e-06, "loss": 0.3221, "step": 3755 }, { "epoch": 0.8713606310172833, "grad_norm": 12.457718761366353, "learning_rate": 2e-06, "loss": 0.2205, "step": 3756 }, { "epoch": 0.871592622665584, "grad_norm": 12.306067228478893, "learning_rate": 2e-06, "loss": 0.3971, "step": 3757 }, { "epoch": 0.8718246143138847, "grad_norm": 18.56590997063515, "learning_rate": 2e-06, "loss": 0.2905, "step": 3758 }, { "epoch": 0.8720566059621854, "grad_norm": 18.02352978933951, "learning_rate": 2e-06, "loss": 0.3457, "step": 3759 }, { "epoch": 0.872288597610486, "grad_norm": 14.420142710853485, "learning_rate": 2e-06, "loss": 0.2826, "step": 3760 }, { "epoch": 0.8725205892587867, "grad_norm": 6.153364462060262, "learning_rate": 2e-06, "loss": 0.2808, "step": 3761 }, { "epoch": 0.8727525809070873, "grad_norm": 7.722426387420128, "learning_rate": 2e-06, "loss": 0.2435, "step": 3762 }, { "epoch": 0.872984572555388, "grad_norm": 12.084046865494983, "learning_rate": 2e-06, "loss": 0.297, "step": 3763 }, { "epoch": 0.8732165642036886, "grad_norm": 14.772204290356598, "learning_rate": 2e-06, "loss": 0.2052, "step": 3764 }, { "epoch": 0.8734485558519893, "grad_norm": 18.298133333640326, "learning_rate": 2e-06, "loss": 0.3385, "step": 3765 }, { "epoch": 0.87368054750029, "grad_norm": 8.033540187695454, "learning_rate": 2e-06, "loss": 0.2035, "step": 3766 }, { "epoch": 0.8739125391485907, "grad_norm": 14.501022464289226, "learning_rate": 2e-06, "loss": 0.3331, "step": 3767 }, { "epoch": 0.8741445307968914, "grad_norm": 8.580235421829364, "learning_rate": 2e-06, "loss": 0.2644, "step": 3768 }, { "epoch": 0.8743765224451919, "grad_norm": 8.855536509852117, "learning_rate": 2e-06, "loss": 0.2153, "step": 3769 }, { "epoch": 0.8746085140934926, "grad_norm": 11.36016770749889, "learning_rate": 2e-06, "loss": 0.2991, "step": 3770 }, { "epoch": 0.8748405057417933, "grad_norm": 13.635951353515276, "learning_rate": 2e-06, "loss": 0.3342, "step": 3771 }, { "epoch": 0.875072497390094, "grad_norm": 7.944159507447317, "learning_rate": 2e-06, "loss": 0.253, "step": 3772 }, { "epoch": 0.8753044890383946, "grad_norm": 11.172510325456205, "learning_rate": 2e-06, "loss": 0.2876, "step": 3773 }, { "epoch": 0.8755364806866953, "grad_norm": 18.73017899852579, "learning_rate": 2e-06, "loss": 0.3334, "step": 3774 }, { "epoch": 0.875768472334996, "grad_norm": 10.47454924116766, "learning_rate": 2e-06, "loss": 0.2581, "step": 3775 }, { "epoch": 0.8760004639832966, "grad_norm": 7.591698553448666, "learning_rate": 2e-06, "loss": 0.2232, "step": 3776 }, { "epoch": 0.8762324556315972, "grad_norm": 16.266482505616455, "learning_rate": 2e-06, "loss": 0.3319, "step": 3777 }, { "epoch": 0.8764644472798979, "grad_norm": 7.2554270371657354, "learning_rate": 2e-06, "loss": 0.1495, "step": 3778 }, { "epoch": 0.8766964389281986, "grad_norm": 19.825695220731603, "learning_rate": 2e-06, "loss": 0.3339, "step": 3779 }, { "epoch": 0.8769284305764993, "grad_norm": 13.50315788016503, "learning_rate": 2e-06, "loss": 0.2265, "step": 3780 }, { "epoch": 0.8771604222248, "grad_norm": 9.889392871091157, "learning_rate": 2e-06, "loss": 0.3026, "step": 3781 }, { "epoch": 0.8773924138731005, "grad_norm": 15.48773175623725, "learning_rate": 2e-06, "loss": 0.2651, "step": 3782 }, { "epoch": 0.8776244055214012, "grad_norm": 14.822191847325213, "learning_rate": 2e-06, "loss": 0.2641, "step": 3783 }, { "epoch": 0.8778563971697019, "grad_norm": 13.547907543836468, "learning_rate": 2e-06, "loss": 0.3234, "step": 3784 }, { "epoch": 0.8780883888180026, "grad_norm": 14.875134126943225, "learning_rate": 2e-06, "loss": 0.2694, "step": 3785 }, { "epoch": 0.8783203804663032, "grad_norm": 20.39451897252101, "learning_rate": 2e-06, "loss": 0.2972, "step": 3786 }, { "epoch": 0.8785523721146039, "grad_norm": 17.90206206999193, "learning_rate": 2e-06, "loss": 0.3046, "step": 3787 }, { "epoch": 0.8787843637629046, "grad_norm": 12.566470812398759, "learning_rate": 2e-06, "loss": 0.2508, "step": 3788 }, { "epoch": 0.8790163554112052, "grad_norm": 8.893414440354205, "learning_rate": 2e-06, "loss": 0.294, "step": 3789 }, { "epoch": 0.8792483470595058, "grad_norm": 19.14245934946466, "learning_rate": 2e-06, "loss": 0.3494, "step": 3790 }, { "epoch": 0.8794803387078065, "grad_norm": 15.10092811690474, "learning_rate": 2e-06, "loss": 0.3559, "step": 3791 }, { "epoch": 0.8797123303561072, "grad_norm": 10.508681437132152, "learning_rate": 2e-06, "loss": 0.2268, "step": 3792 }, { "epoch": 0.8799443220044079, "grad_norm": 21.07249450103903, "learning_rate": 2e-06, "loss": 0.245, "step": 3793 }, { "epoch": 0.8801763136527085, "grad_norm": 17.65752075276022, "learning_rate": 2e-06, "loss": 0.269, "step": 3794 }, { "epoch": 0.8804083053010091, "grad_norm": 17.466559161419354, "learning_rate": 2e-06, "loss": 0.2853, "step": 3795 }, { "epoch": 0.8806402969493098, "grad_norm": 17.14933191795175, "learning_rate": 2e-06, "loss": 0.2767, "step": 3796 }, { "epoch": 0.8808722885976105, "grad_norm": 14.78273941840289, "learning_rate": 2e-06, "loss": 0.3052, "step": 3797 }, { "epoch": 0.8811042802459111, "grad_norm": 12.53725963656686, "learning_rate": 2e-06, "loss": 0.2303, "step": 3798 }, { "epoch": 0.8813362718942118, "grad_norm": 11.924772982478647, "learning_rate": 2e-06, "loss": 0.3213, "step": 3799 }, { "epoch": 0.8815682635425125, "grad_norm": 11.76335039639468, "learning_rate": 2e-06, "loss": 0.2632, "step": 3800 }, { "epoch": 0.8818002551908132, "grad_norm": 19.85232422623424, "learning_rate": 2e-06, "loss": 0.3021, "step": 3801 }, { "epoch": 0.8820322468391137, "grad_norm": 23.65456243506742, "learning_rate": 2e-06, "loss": 0.3246, "step": 3802 }, { "epoch": 0.8822642384874144, "grad_norm": 5.716607208165604, "learning_rate": 2e-06, "loss": 0.2176, "step": 3803 }, { "epoch": 0.8824962301357151, "grad_norm": 6.022036608263883, "learning_rate": 2e-06, "loss": 0.1139, "step": 3804 }, { "epoch": 0.8827282217840158, "grad_norm": 16.350486842879306, "learning_rate": 2e-06, "loss": 0.3059, "step": 3805 }, { "epoch": 0.8829602134323165, "grad_norm": 15.069408373253298, "learning_rate": 2e-06, "loss": 0.2275, "step": 3806 }, { "epoch": 0.8831922050806171, "grad_norm": 10.384160567512737, "learning_rate": 2e-06, "loss": 0.2738, "step": 3807 }, { "epoch": 0.8834241967289178, "grad_norm": 16.660880271426983, "learning_rate": 2e-06, "loss": 0.2554, "step": 3808 }, { "epoch": 0.8836561883772184, "grad_norm": 18.374192009215328, "learning_rate": 2e-06, "loss": 0.3591, "step": 3809 }, { "epoch": 0.883888180025519, "grad_norm": 12.872310982621023, "learning_rate": 2e-06, "loss": 0.2772, "step": 3810 }, { "epoch": 0.8841201716738197, "grad_norm": 16.019644899210274, "learning_rate": 2e-06, "loss": 0.5156, "step": 3811 }, { "epoch": 0.8843521633221204, "grad_norm": 12.442141234475963, "learning_rate": 2e-06, "loss": 0.2239, "step": 3812 }, { "epoch": 0.8845841549704211, "grad_norm": 22.525745572070075, "learning_rate": 2e-06, "loss": 0.3765, "step": 3813 }, { "epoch": 0.8848161466187218, "grad_norm": 16.304838685551868, "learning_rate": 2e-06, "loss": 0.4056, "step": 3814 }, { "epoch": 0.8850481382670223, "grad_norm": 7.3709776680881705, "learning_rate": 2e-06, "loss": 0.1974, "step": 3815 }, { "epoch": 0.885280129915323, "grad_norm": 13.210776449554263, "learning_rate": 2e-06, "loss": 0.225, "step": 3816 }, { "epoch": 0.8855121215636237, "grad_norm": 16.99200282899661, "learning_rate": 2e-06, "loss": 0.2187, "step": 3817 }, { "epoch": 0.8857441132119244, "grad_norm": 14.484274148768987, "learning_rate": 2e-06, "loss": 0.2841, "step": 3818 }, { "epoch": 0.885976104860225, "grad_norm": 13.690872050877267, "learning_rate": 2e-06, "loss": 0.3322, "step": 3819 }, { "epoch": 0.8862080965085257, "grad_norm": 15.21118667258607, "learning_rate": 2e-06, "loss": 0.3085, "step": 3820 }, { "epoch": 0.8864400881568264, "grad_norm": 11.234375561403914, "learning_rate": 2e-06, "loss": 0.221, "step": 3821 }, { "epoch": 0.886672079805127, "grad_norm": 11.309069358024804, "learning_rate": 2e-06, "loss": 0.2239, "step": 3822 }, { "epoch": 0.8869040714534276, "grad_norm": 9.480559997389527, "learning_rate": 2e-06, "loss": 0.4035, "step": 3823 }, { "epoch": 0.8871360631017283, "grad_norm": 12.818376026225353, "learning_rate": 2e-06, "loss": 0.2866, "step": 3824 }, { "epoch": 0.887368054750029, "grad_norm": 13.600698803419629, "learning_rate": 2e-06, "loss": 0.2284, "step": 3825 }, { "epoch": 0.8876000463983297, "grad_norm": 12.323299185200682, "learning_rate": 2e-06, "loss": 0.2643, "step": 3826 }, { "epoch": 0.8878320380466304, "grad_norm": 16.262470843816768, "learning_rate": 2e-06, "loss": 0.3144, "step": 3827 }, { "epoch": 0.888064029694931, "grad_norm": 15.8268984032703, "learning_rate": 2e-06, "loss": 0.2297, "step": 3828 }, { "epoch": 0.8882960213432316, "grad_norm": 15.451969149107263, "learning_rate": 2e-06, "loss": 0.1857, "step": 3829 }, { "epoch": 0.8885280129915323, "grad_norm": 13.444047438579203, "learning_rate": 2e-06, "loss": 0.2971, "step": 3830 }, { "epoch": 0.888760004639833, "grad_norm": 11.468701419479576, "learning_rate": 2e-06, "loss": 0.2492, "step": 3831 }, { "epoch": 0.8889919962881336, "grad_norm": 21.788790413025257, "learning_rate": 2e-06, "loss": 0.2497, "step": 3832 }, { "epoch": 0.8892239879364343, "grad_norm": 8.534053601552444, "learning_rate": 2e-06, "loss": 0.2748, "step": 3833 }, { "epoch": 0.889455979584735, "grad_norm": 17.14812910368447, "learning_rate": 2e-06, "loss": 0.3216, "step": 3834 }, { "epoch": 0.8896879712330356, "grad_norm": 10.518702926059186, "learning_rate": 2e-06, "loss": 0.1831, "step": 3835 }, { "epoch": 0.8899199628813362, "grad_norm": 15.381986422849188, "learning_rate": 2e-06, "loss": 0.2893, "step": 3836 }, { "epoch": 0.8901519545296369, "grad_norm": 21.466599789047734, "learning_rate": 2e-06, "loss": 0.4283, "step": 3837 }, { "epoch": 0.8903839461779376, "grad_norm": 18.883265852737733, "learning_rate": 2e-06, "loss": 0.3159, "step": 3838 }, { "epoch": 0.8906159378262383, "grad_norm": 9.604683531122912, "learning_rate": 2e-06, "loss": 0.293, "step": 3839 }, { "epoch": 0.890847929474539, "grad_norm": 10.466214318640201, "learning_rate": 2e-06, "loss": 0.2093, "step": 3840 }, { "epoch": 0.8910799211228396, "grad_norm": 30.045014770740092, "learning_rate": 2e-06, "loss": 0.3721, "step": 3841 }, { "epoch": 0.8913119127711402, "grad_norm": 17.520621297975964, "learning_rate": 2e-06, "loss": 0.3136, "step": 3842 }, { "epoch": 0.8915439044194409, "grad_norm": 10.430427280788878, "learning_rate": 2e-06, "loss": 0.3048, "step": 3843 }, { "epoch": 0.8917758960677415, "grad_norm": 16.52127291757037, "learning_rate": 2e-06, "loss": 0.2781, "step": 3844 }, { "epoch": 0.8920078877160422, "grad_norm": 14.03793238580409, "learning_rate": 2e-06, "loss": 0.3499, "step": 3845 }, { "epoch": 0.8922398793643429, "grad_norm": 13.465497188671403, "learning_rate": 2e-06, "loss": 0.2369, "step": 3846 }, { "epoch": 0.8924718710126436, "grad_norm": 16.203513528553657, "learning_rate": 2e-06, "loss": 0.3037, "step": 3847 }, { "epoch": 0.8927038626609443, "grad_norm": 16.91252227614217, "learning_rate": 2e-06, "loss": 0.3807, "step": 3848 }, { "epoch": 0.8929358543092448, "grad_norm": 15.13921064541899, "learning_rate": 2e-06, "loss": 0.2367, "step": 3849 }, { "epoch": 0.8931678459575455, "grad_norm": 17.470809701462834, "learning_rate": 2e-06, "loss": 0.4561, "step": 3850 }, { "epoch": 0.8933998376058462, "grad_norm": 17.55558022042597, "learning_rate": 2e-06, "loss": 0.2008, "step": 3851 }, { "epoch": 0.8936318292541469, "grad_norm": 15.181061039277026, "learning_rate": 2e-06, "loss": 0.312, "step": 3852 }, { "epoch": 0.8938638209024475, "grad_norm": 16.306178080300985, "learning_rate": 2e-06, "loss": 0.3254, "step": 3853 }, { "epoch": 0.8940958125507482, "grad_norm": 13.315390126795844, "learning_rate": 2e-06, "loss": 0.1952, "step": 3854 }, { "epoch": 0.8943278041990488, "grad_norm": 14.600753372967226, "learning_rate": 2e-06, "loss": 0.2792, "step": 3855 }, { "epoch": 0.8945597958473495, "grad_norm": 25.411581518678336, "learning_rate": 2e-06, "loss": 0.3914, "step": 3856 }, { "epoch": 0.8947917874956501, "grad_norm": 14.153992756140097, "learning_rate": 2e-06, "loss": 0.3199, "step": 3857 }, { "epoch": 0.8950237791439508, "grad_norm": 30.60125428323226, "learning_rate": 2e-06, "loss": 0.3962, "step": 3858 }, { "epoch": 0.8952557707922515, "grad_norm": 8.730276553996347, "learning_rate": 2e-06, "loss": 0.2931, "step": 3859 }, { "epoch": 0.8954877624405522, "grad_norm": 23.977248564892673, "learning_rate": 2e-06, "loss": 0.3746, "step": 3860 }, { "epoch": 0.8957197540888528, "grad_norm": 12.272894635641682, "learning_rate": 2e-06, "loss": 0.1781, "step": 3861 }, { "epoch": 0.8959517457371534, "grad_norm": 10.266127690588375, "learning_rate": 2e-06, "loss": 0.198, "step": 3862 }, { "epoch": 0.8961837373854541, "grad_norm": 28.403463540976567, "learning_rate": 2e-06, "loss": 0.3536, "step": 3863 }, { "epoch": 0.8964157290337548, "grad_norm": 11.053863738445942, "learning_rate": 2e-06, "loss": 0.2466, "step": 3864 }, { "epoch": 0.8966477206820554, "grad_norm": 9.177659242675198, "learning_rate": 2e-06, "loss": 0.2325, "step": 3865 }, { "epoch": 0.8968797123303561, "grad_norm": 9.30944371626104, "learning_rate": 2e-06, "loss": 0.2986, "step": 3866 }, { "epoch": 0.8971117039786568, "grad_norm": 12.882120895306459, "learning_rate": 2e-06, "loss": 0.3005, "step": 3867 }, { "epoch": 0.8973436956269575, "grad_norm": 8.934266590413841, "learning_rate": 2e-06, "loss": 0.1734, "step": 3868 }, { "epoch": 0.897575687275258, "grad_norm": 5.311613108640557, "learning_rate": 2e-06, "loss": 0.2151, "step": 3869 }, { "epoch": 0.8978076789235587, "grad_norm": 24.19912099032846, "learning_rate": 2e-06, "loss": 0.3605, "step": 3870 }, { "epoch": 0.8980396705718594, "grad_norm": 17.4060760039238, "learning_rate": 2e-06, "loss": 0.36, "step": 3871 }, { "epoch": 0.8982716622201601, "grad_norm": 9.24006750676526, "learning_rate": 2e-06, "loss": 0.2765, "step": 3872 }, { "epoch": 0.8985036538684608, "grad_norm": 12.557047617052072, "learning_rate": 2e-06, "loss": 0.3256, "step": 3873 }, { "epoch": 0.8987356455167614, "grad_norm": 16.55716231109906, "learning_rate": 2e-06, "loss": 0.3307, "step": 3874 }, { "epoch": 0.898967637165062, "grad_norm": 14.257984695745476, "learning_rate": 2e-06, "loss": 0.2903, "step": 3875 }, { "epoch": 0.8991996288133627, "grad_norm": 6.765606224454405, "learning_rate": 2e-06, "loss": 0.1147, "step": 3876 }, { "epoch": 0.8994316204616634, "grad_norm": 19.732475611334035, "learning_rate": 2e-06, "loss": 0.3762, "step": 3877 }, { "epoch": 0.899663612109964, "grad_norm": 10.076354836054715, "learning_rate": 2e-06, "loss": 0.2457, "step": 3878 }, { "epoch": 0.8998956037582647, "grad_norm": 22.692936353918086, "learning_rate": 2e-06, "loss": 0.3286, "step": 3879 }, { "epoch": 0.9001275954065654, "grad_norm": 18.87531924416284, "learning_rate": 2e-06, "loss": 0.3505, "step": 3880 }, { "epoch": 0.9003595870548661, "grad_norm": 21.461426922667258, "learning_rate": 2e-06, "loss": 0.4371, "step": 3881 }, { "epoch": 0.9005915787031666, "grad_norm": 12.672233116120237, "learning_rate": 2e-06, "loss": 0.2552, "step": 3882 }, { "epoch": 0.9008235703514673, "grad_norm": 14.92757839909907, "learning_rate": 2e-06, "loss": 0.2595, "step": 3883 }, { "epoch": 0.901055561999768, "grad_norm": 9.864217379715534, "learning_rate": 2e-06, "loss": 0.2253, "step": 3884 }, { "epoch": 0.9012875536480687, "grad_norm": 22.137721500164403, "learning_rate": 2e-06, "loss": 0.316, "step": 3885 }, { "epoch": 0.9015195452963694, "grad_norm": 8.633971401134554, "learning_rate": 2e-06, "loss": 0.2195, "step": 3886 }, { "epoch": 0.90175153694467, "grad_norm": 11.99123929014511, "learning_rate": 2e-06, "loss": 0.2543, "step": 3887 }, { "epoch": 0.9019835285929707, "grad_norm": 23.05153162087007, "learning_rate": 2e-06, "loss": 0.2595, "step": 3888 }, { "epoch": 0.9022155202412713, "grad_norm": 15.378579184780568, "learning_rate": 2e-06, "loss": 0.2613, "step": 3889 }, { "epoch": 0.902447511889572, "grad_norm": 9.358759181548711, "learning_rate": 2e-06, "loss": 0.1977, "step": 3890 }, { "epoch": 0.9026795035378726, "grad_norm": 14.072218217487547, "learning_rate": 2e-06, "loss": 0.314, "step": 3891 }, { "epoch": 0.9029114951861733, "grad_norm": 8.650515094354374, "learning_rate": 2e-06, "loss": 0.2392, "step": 3892 }, { "epoch": 0.903143486834474, "grad_norm": 13.973869182199863, "learning_rate": 2e-06, "loss": 0.2385, "step": 3893 }, { "epoch": 0.9033754784827747, "grad_norm": 28.02108106068224, "learning_rate": 2e-06, "loss": 0.4868, "step": 3894 }, { "epoch": 0.9036074701310752, "grad_norm": 10.126726222892133, "learning_rate": 2e-06, "loss": 0.2712, "step": 3895 }, { "epoch": 0.9038394617793759, "grad_norm": 14.446733796997036, "learning_rate": 2e-06, "loss": 0.2759, "step": 3896 }, { "epoch": 0.9040714534276766, "grad_norm": 9.142437995803487, "learning_rate": 2e-06, "loss": 0.2935, "step": 3897 }, { "epoch": 0.9043034450759773, "grad_norm": 16.347152291076053, "learning_rate": 2e-06, "loss": 0.3192, "step": 3898 }, { "epoch": 0.9045354367242779, "grad_norm": 10.663833545657894, "learning_rate": 2e-06, "loss": 0.2734, "step": 3899 }, { "epoch": 0.9047674283725786, "grad_norm": 18.334678168407894, "learning_rate": 2e-06, "loss": 0.2272, "step": 3900 }, { "epoch": 0.9049994200208793, "grad_norm": 9.98864088999583, "learning_rate": 2e-06, "loss": 0.2761, "step": 3901 }, { "epoch": 0.9052314116691799, "grad_norm": 16.88195086485774, "learning_rate": 2e-06, "loss": 0.2828, "step": 3902 }, { "epoch": 0.9054634033174805, "grad_norm": 14.193321843242153, "learning_rate": 2e-06, "loss": 0.3519, "step": 3903 }, { "epoch": 0.9056953949657812, "grad_norm": 24.72235772205411, "learning_rate": 2e-06, "loss": 0.3367, "step": 3904 }, { "epoch": 0.9059273866140819, "grad_norm": 20.661583880476023, "learning_rate": 2e-06, "loss": 0.3861, "step": 3905 }, { "epoch": 0.9061593782623826, "grad_norm": 10.309625510352685, "learning_rate": 2e-06, "loss": 0.2169, "step": 3906 }, { "epoch": 0.9063913699106833, "grad_norm": 15.189679441464554, "learning_rate": 2e-06, "loss": 0.1656, "step": 3907 }, { "epoch": 0.9066233615589839, "grad_norm": 7.763584174957461, "learning_rate": 2e-06, "loss": 0.2384, "step": 3908 }, { "epoch": 0.9068553532072845, "grad_norm": 32.802394565514405, "learning_rate": 2e-06, "loss": 0.418, "step": 3909 }, { "epoch": 0.9070873448555852, "grad_norm": 19.839585714773417, "learning_rate": 2e-06, "loss": 0.2461, "step": 3910 }, { "epoch": 0.9073193365038859, "grad_norm": 13.767551143679269, "learning_rate": 2e-06, "loss": 0.2116, "step": 3911 }, { "epoch": 0.9075513281521865, "grad_norm": 20.153545824967566, "learning_rate": 2e-06, "loss": 0.3376, "step": 3912 }, { "epoch": 0.9077833198004872, "grad_norm": 8.939747154687687, "learning_rate": 2e-06, "loss": 0.2546, "step": 3913 }, { "epoch": 0.9080153114487879, "grad_norm": 32.43756975476725, "learning_rate": 2e-06, "loss": 0.3785, "step": 3914 }, { "epoch": 0.9082473030970885, "grad_norm": 11.848078031401903, "learning_rate": 2e-06, "loss": 0.2395, "step": 3915 }, { "epoch": 0.9084792947453891, "grad_norm": 11.22352972732074, "learning_rate": 2e-06, "loss": 0.215, "step": 3916 }, { "epoch": 0.9087112863936898, "grad_norm": 17.184953012684105, "learning_rate": 2e-06, "loss": 0.3557, "step": 3917 }, { "epoch": 0.9089432780419905, "grad_norm": 8.829476518236506, "learning_rate": 2e-06, "loss": 0.2368, "step": 3918 }, { "epoch": 0.9091752696902912, "grad_norm": 25.986430156365554, "learning_rate": 2e-06, "loss": 0.2737, "step": 3919 }, { "epoch": 0.9094072613385918, "grad_norm": 8.631894893578531, "learning_rate": 2e-06, "loss": 0.2415, "step": 3920 }, { "epoch": 0.9096392529868925, "grad_norm": 14.692330088194996, "learning_rate": 2e-06, "loss": 0.2917, "step": 3921 }, { "epoch": 0.9098712446351931, "grad_norm": 10.057081616113766, "learning_rate": 2e-06, "loss": 0.3515, "step": 3922 }, { "epoch": 0.9101032362834938, "grad_norm": 18.242861421349204, "learning_rate": 2e-06, "loss": 0.3514, "step": 3923 }, { "epoch": 0.9103352279317944, "grad_norm": 12.00313556221313, "learning_rate": 2e-06, "loss": 0.3548, "step": 3924 }, { "epoch": 0.9105672195800951, "grad_norm": 8.759182341190806, "learning_rate": 2e-06, "loss": 0.2854, "step": 3925 }, { "epoch": 0.9107992112283958, "grad_norm": 21.635245268730696, "learning_rate": 2e-06, "loss": 0.261, "step": 3926 }, { "epoch": 0.9110312028766965, "grad_norm": 14.39027864566995, "learning_rate": 2e-06, "loss": 0.2331, "step": 3927 }, { "epoch": 0.9112631945249972, "grad_norm": 17.651616795395558, "learning_rate": 2e-06, "loss": 0.3345, "step": 3928 }, { "epoch": 0.9114951861732977, "grad_norm": 17.026843525740297, "learning_rate": 2e-06, "loss": 0.2664, "step": 3929 }, { "epoch": 0.9117271778215984, "grad_norm": 15.703856377194578, "learning_rate": 2e-06, "loss": 0.3038, "step": 3930 }, { "epoch": 0.9119591694698991, "grad_norm": 19.510509020521962, "learning_rate": 2e-06, "loss": 0.3946, "step": 3931 }, { "epoch": 0.9121911611181998, "grad_norm": 21.184958616085613, "learning_rate": 2e-06, "loss": 0.2226, "step": 3932 }, { "epoch": 0.9124231527665004, "grad_norm": 22.910390703702443, "learning_rate": 2e-06, "loss": 0.3747, "step": 3933 }, { "epoch": 0.9126551444148011, "grad_norm": 21.79356223730189, "learning_rate": 2e-06, "loss": 0.2648, "step": 3934 }, { "epoch": 0.9128871360631017, "grad_norm": 16.719709200320167, "learning_rate": 2e-06, "loss": 0.3129, "step": 3935 }, { "epoch": 0.9131191277114024, "grad_norm": 15.75587390438053, "learning_rate": 2e-06, "loss": 0.2771, "step": 3936 }, { "epoch": 0.913351119359703, "grad_norm": 20.445984418989443, "learning_rate": 2e-06, "loss": 0.3287, "step": 3937 }, { "epoch": 0.9135831110080037, "grad_norm": 9.295243164429655, "learning_rate": 2e-06, "loss": 0.2542, "step": 3938 }, { "epoch": 0.9138151026563044, "grad_norm": 16.194199891364974, "learning_rate": 2e-06, "loss": 0.2801, "step": 3939 }, { "epoch": 0.9140470943046051, "grad_norm": 20.491671606951503, "learning_rate": 2e-06, "loss": 0.2353, "step": 3940 }, { "epoch": 0.9142790859529057, "grad_norm": 10.452287640229645, "learning_rate": 2e-06, "loss": 0.2174, "step": 3941 }, { "epoch": 0.9145110776012063, "grad_norm": 15.182715386927697, "learning_rate": 2e-06, "loss": 0.2636, "step": 3942 }, { "epoch": 0.914743069249507, "grad_norm": 14.727316457709057, "learning_rate": 2e-06, "loss": 0.2375, "step": 3943 }, { "epoch": 0.9149750608978077, "grad_norm": 14.991906354041783, "learning_rate": 2e-06, "loss": 0.2735, "step": 3944 }, { "epoch": 0.9152070525461083, "grad_norm": 20.877355785804873, "learning_rate": 2e-06, "loss": 0.3779, "step": 3945 }, { "epoch": 0.915439044194409, "grad_norm": 15.027199298330427, "learning_rate": 2e-06, "loss": 0.2824, "step": 3946 }, { "epoch": 0.9156710358427097, "grad_norm": 8.425894451610421, "learning_rate": 2e-06, "loss": 0.1924, "step": 3947 }, { "epoch": 0.9159030274910103, "grad_norm": 25.349503788018012, "learning_rate": 2e-06, "loss": 0.3346, "step": 3948 }, { "epoch": 0.916135019139311, "grad_norm": 10.193314799347108, "learning_rate": 2e-06, "loss": 0.2686, "step": 3949 }, { "epoch": 0.9163670107876116, "grad_norm": 18.574048790419745, "learning_rate": 2e-06, "loss": 0.4334, "step": 3950 }, { "epoch": 0.9165990024359123, "grad_norm": 6.773484630130486, "learning_rate": 2e-06, "loss": 0.1877, "step": 3951 }, { "epoch": 0.916830994084213, "grad_norm": 13.643189811756903, "learning_rate": 2e-06, "loss": 0.3131, "step": 3952 }, { "epoch": 0.9170629857325137, "grad_norm": 19.924213913482326, "learning_rate": 2e-06, "loss": 0.3825, "step": 3953 }, { "epoch": 0.9172949773808143, "grad_norm": 12.958783220911148, "learning_rate": 2e-06, "loss": 0.3044, "step": 3954 }, { "epoch": 0.9175269690291149, "grad_norm": 12.0595184813058, "learning_rate": 2e-06, "loss": 0.2276, "step": 3955 }, { "epoch": 0.9177589606774156, "grad_norm": 19.21193616308311, "learning_rate": 2e-06, "loss": 0.2676, "step": 3956 }, { "epoch": 0.9179909523257163, "grad_norm": 11.476061295294725, "learning_rate": 2e-06, "loss": 0.2979, "step": 3957 }, { "epoch": 0.9182229439740169, "grad_norm": 19.28669197764272, "learning_rate": 2e-06, "loss": 0.4399, "step": 3958 }, { "epoch": 0.9184549356223176, "grad_norm": 7.265364825229757, "learning_rate": 2e-06, "loss": 0.2276, "step": 3959 }, { "epoch": 0.9186869272706183, "grad_norm": 15.89433824195223, "learning_rate": 2e-06, "loss": 0.2828, "step": 3960 }, { "epoch": 0.918918918918919, "grad_norm": 9.074347573516029, "learning_rate": 2e-06, "loss": 0.2016, "step": 3961 }, { "epoch": 0.9191509105672195, "grad_norm": 16.415884335639657, "learning_rate": 2e-06, "loss": 0.2457, "step": 3962 }, { "epoch": 0.9193829022155202, "grad_norm": 26.616894388131367, "learning_rate": 2e-06, "loss": 0.2135, "step": 3963 }, { "epoch": 0.9196148938638209, "grad_norm": 6.792574370355875, "learning_rate": 2e-06, "loss": 0.215, "step": 3964 }, { "epoch": 0.9198468855121216, "grad_norm": 14.404411259491736, "learning_rate": 2e-06, "loss": 0.3734, "step": 3965 }, { "epoch": 0.9200788771604222, "grad_norm": 28.16018432451622, "learning_rate": 2e-06, "loss": 0.4642, "step": 3966 }, { "epoch": 0.9203108688087229, "grad_norm": 19.233118360271835, "learning_rate": 2e-06, "loss": 0.3211, "step": 3967 }, { "epoch": 0.9205428604570235, "grad_norm": 13.944355764448762, "learning_rate": 2e-06, "loss": 0.2451, "step": 3968 }, { "epoch": 0.9207748521053242, "grad_norm": 12.641243085346089, "learning_rate": 2e-06, "loss": 0.2858, "step": 3969 }, { "epoch": 0.9210068437536248, "grad_norm": 18.809098112566154, "learning_rate": 2e-06, "loss": 0.3246, "step": 3970 }, { "epoch": 0.9212388354019255, "grad_norm": 21.782444054054192, "learning_rate": 2e-06, "loss": 0.3959, "step": 3971 }, { "epoch": 0.9214708270502262, "grad_norm": 13.814108123062622, "learning_rate": 2e-06, "loss": 0.4264, "step": 3972 }, { "epoch": 0.9217028186985269, "grad_norm": 21.81885611322097, "learning_rate": 2e-06, "loss": 0.3552, "step": 3973 }, { "epoch": 0.9219348103468276, "grad_norm": 14.101735582385354, "learning_rate": 2e-06, "loss": 0.3584, "step": 3974 }, { "epoch": 0.9221668019951281, "grad_norm": 11.043363753344781, "learning_rate": 2e-06, "loss": 0.2681, "step": 3975 }, { "epoch": 0.9223987936434288, "grad_norm": 13.593291215016803, "learning_rate": 2e-06, "loss": 0.2515, "step": 3976 }, { "epoch": 0.9226307852917295, "grad_norm": 12.362049676839284, "learning_rate": 2e-06, "loss": 0.257, "step": 3977 }, { "epoch": 0.9228627769400302, "grad_norm": 13.270066216692763, "learning_rate": 2e-06, "loss": 0.3117, "step": 3978 }, { "epoch": 0.9230947685883308, "grad_norm": 14.579060042594897, "learning_rate": 2e-06, "loss": 0.3214, "step": 3979 }, { "epoch": 0.9233267602366315, "grad_norm": 14.720997537346065, "learning_rate": 2e-06, "loss": 0.2896, "step": 3980 }, { "epoch": 0.9235587518849322, "grad_norm": 27.34929272148167, "learning_rate": 2e-06, "loss": 0.3681, "step": 3981 }, { "epoch": 0.9237907435332328, "grad_norm": 6.8723669925715605, "learning_rate": 2e-06, "loss": 0.2386, "step": 3982 }, { "epoch": 0.9240227351815334, "grad_norm": 15.46405296753594, "learning_rate": 2e-06, "loss": 0.2891, "step": 3983 }, { "epoch": 0.9242547268298341, "grad_norm": 21.133738829314755, "learning_rate": 2e-06, "loss": 0.4005, "step": 3984 }, { "epoch": 0.9244867184781348, "grad_norm": 23.69066601838177, "learning_rate": 2e-06, "loss": 0.2624, "step": 3985 }, { "epoch": 0.9247187101264355, "grad_norm": 11.928606304200803, "learning_rate": 2e-06, "loss": 0.2461, "step": 3986 }, { "epoch": 0.9249507017747362, "grad_norm": 7.763964912210666, "learning_rate": 2e-06, "loss": 0.299, "step": 3987 }, { "epoch": 0.9251826934230367, "grad_norm": 14.448936450722345, "learning_rate": 2e-06, "loss": 0.2497, "step": 3988 }, { "epoch": 0.9254146850713374, "grad_norm": 6.527725086557093, "learning_rate": 2e-06, "loss": 0.231, "step": 3989 }, { "epoch": 0.9256466767196381, "grad_norm": 22.37946721451997, "learning_rate": 2e-06, "loss": 0.3802, "step": 3990 }, { "epoch": 0.9258786683679388, "grad_norm": 12.629177421202952, "learning_rate": 2e-06, "loss": 0.3255, "step": 3991 }, { "epoch": 0.9261106600162394, "grad_norm": 15.587491757728799, "learning_rate": 2e-06, "loss": 0.2531, "step": 3992 }, { "epoch": 0.9263426516645401, "grad_norm": 11.847664403920094, "learning_rate": 2e-06, "loss": 0.269, "step": 3993 }, { "epoch": 0.9265746433128408, "grad_norm": 13.322943271190882, "learning_rate": 2e-06, "loss": 0.278, "step": 3994 }, { "epoch": 0.9268066349611414, "grad_norm": 14.495165691483393, "learning_rate": 2e-06, "loss": 0.3801, "step": 3995 }, { "epoch": 0.927038626609442, "grad_norm": 14.3677196682021, "learning_rate": 2e-06, "loss": 0.2334, "step": 3996 }, { "epoch": 0.9272706182577427, "grad_norm": 10.218329276484413, "learning_rate": 2e-06, "loss": 0.2553, "step": 3997 }, { "epoch": 0.9275026099060434, "grad_norm": 34.637262702418646, "learning_rate": 2e-06, "loss": 0.4718, "step": 3998 }, { "epoch": 0.9277346015543441, "grad_norm": 9.237413082217557, "learning_rate": 2e-06, "loss": 0.2473, "step": 3999 }, { "epoch": 0.9279665932026447, "grad_norm": 16.090490051135355, "learning_rate": 2e-06, "loss": 0.2933, "step": 4000 }, { "epoch": 0.9281985848509454, "grad_norm": 18.131571394205444, "learning_rate": 2e-06, "loss": 0.3476, "step": 4001 }, { "epoch": 0.928430576499246, "grad_norm": 13.298876519414382, "learning_rate": 2e-06, "loss": 0.2335, "step": 4002 }, { "epoch": 0.9286625681475467, "grad_norm": 13.195268495451675, "learning_rate": 2e-06, "loss": 0.2911, "step": 4003 }, { "epoch": 0.9288945597958473, "grad_norm": 8.619850603564261, "learning_rate": 2e-06, "loss": 0.1825, "step": 4004 }, { "epoch": 0.929126551444148, "grad_norm": 9.168772392177162, "learning_rate": 2e-06, "loss": 0.2824, "step": 4005 }, { "epoch": 0.9293585430924487, "grad_norm": 18.2102243935687, "learning_rate": 2e-06, "loss": 0.2815, "step": 4006 }, { "epoch": 0.9295905347407494, "grad_norm": 10.502444860681994, "learning_rate": 2e-06, "loss": 0.3784, "step": 4007 }, { "epoch": 0.9298225263890499, "grad_norm": 19.634937250194543, "learning_rate": 2e-06, "loss": 0.3284, "step": 4008 }, { "epoch": 0.9300545180373506, "grad_norm": 17.777781446560336, "learning_rate": 2e-06, "loss": 0.3177, "step": 4009 }, { "epoch": 0.9302865096856513, "grad_norm": 15.66459952879304, "learning_rate": 2e-06, "loss": 0.3439, "step": 4010 }, { "epoch": 0.930518501333952, "grad_norm": 12.296224082963853, "learning_rate": 2e-06, "loss": 0.2705, "step": 4011 }, { "epoch": 0.9307504929822527, "grad_norm": 13.958049366132693, "learning_rate": 2e-06, "loss": 0.2072, "step": 4012 }, { "epoch": 0.9309824846305533, "grad_norm": 11.377406989138178, "learning_rate": 2e-06, "loss": 0.2086, "step": 4013 }, { "epoch": 0.931214476278854, "grad_norm": 18.73659133228136, "learning_rate": 2e-06, "loss": 0.3641, "step": 4014 }, { "epoch": 0.9314464679271546, "grad_norm": 9.579161039043823, "learning_rate": 2e-06, "loss": 0.1496, "step": 4015 }, { "epoch": 0.9316784595754553, "grad_norm": 20.95385075541004, "learning_rate": 2e-06, "loss": 0.4974, "step": 4016 }, { "epoch": 0.9319104512237559, "grad_norm": 15.665155596272864, "learning_rate": 2e-06, "loss": 0.3812, "step": 4017 }, { "epoch": 0.9321424428720566, "grad_norm": 10.504501557374716, "learning_rate": 2e-06, "loss": 0.1418, "step": 4018 }, { "epoch": 0.9323744345203573, "grad_norm": 10.85342891308318, "learning_rate": 2e-06, "loss": 0.319, "step": 4019 }, { "epoch": 0.932606426168658, "grad_norm": 8.10601244212646, "learning_rate": 2e-06, "loss": 0.2395, "step": 4020 }, { "epoch": 0.9328384178169586, "grad_norm": 13.022680006869349, "learning_rate": 2e-06, "loss": 0.2419, "step": 4021 }, { "epoch": 0.9330704094652592, "grad_norm": 12.13270254820567, "learning_rate": 2e-06, "loss": 0.2383, "step": 4022 }, { "epoch": 0.9333024011135599, "grad_norm": 16.32468558517697, "learning_rate": 2e-06, "loss": 0.2664, "step": 4023 }, { "epoch": 0.9335343927618606, "grad_norm": 12.246989823743979, "learning_rate": 2e-06, "loss": 0.3144, "step": 4024 }, { "epoch": 0.9337663844101612, "grad_norm": 15.224846029260464, "learning_rate": 2e-06, "loss": 0.2838, "step": 4025 }, { "epoch": 0.9339983760584619, "grad_norm": 15.018031125518513, "learning_rate": 2e-06, "loss": 0.2506, "step": 4026 }, { "epoch": 0.9342303677067626, "grad_norm": 7.434044658596272, "learning_rate": 2e-06, "loss": 0.2986, "step": 4027 }, { "epoch": 0.9344623593550632, "grad_norm": 20.494236364736892, "learning_rate": 2e-06, "loss": 0.2571, "step": 4028 }, { "epoch": 0.9346943510033638, "grad_norm": 6.418952262961799, "learning_rate": 2e-06, "loss": 0.2598, "step": 4029 }, { "epoch": 0.9349263426516645, "grad_norm": 21.616245646302445, "learning_rate": 2e-06, "loss": 0.2834, "step": 4030 }, { "epoch": 0.9351583342999652, "grad_norm": 18.3306232616933, "learning_rate": 2e-06, "loss": 0.3145, "step": 4031 }, { "epoch": 0.9353903259482659, "grad_norm": 10.352480111360936, "learning_rate": 2e-06, "loss": 0.2459, "step": 4032 }, { "epoch": 0.9356223175965666, "grad_norm": 11.204629468614156, "learning_rate": 2e-06, "loss": 0.2544, "step": 4033 }, { "epoch": 0.9358543092448672, "grad_norm": 16.492577310753635, "learning_rate": 2e-06, "loss": 0.2445, "step": 4034 }, { "epoch": 0.9360863008931678, "grad_norm": 22.371945371319974, "learning_rate": 2e-06, "loss": 0.2821, "step": 4035 }, { "epoch": 0.9363182925414685, "grad_norm": 8.859132258593608, "learning_rate": 2e-06, "loss": 0.3099, "step": 4036 }, { "epoch": 0.9365502841897692, "grad_norm": 17.014776481789077, "learning_rate": 2e-06, "loss": 0.3262, "step": 4037 }, { "epoch": 0.9367822758380698, "grad_norm": 10.625597532800239, "learning_rate": 2e-06, "loss": 0.2873, "step": 4038 }, { "epoch": 0.9370142674863705, "grad_norm": 14.122822330175744, "learning_rate": 2e-06, "loss": 0.38, "step": 4039 }, { "epoch": 0.9372462591346712, "grad_norm": 10.328734624379049, "learning_rate": 2e-06, "loss": 0.2192, "step": 4040 }, { "epoch": 0.9374782507829719, "grad_norm": 9.09386762470707, "learning_rate": 2e-06, "loss": 0.2266, "step": 4041 }, { "epoch": 0.9377102424312724, "grad_norm": 18.684735772439982, "learning_rate": 2e-06, "loss": 0.2231, "step": 4042 }, { "epoch": 0.9379422340795731, "grad_norm": 9.957849633530989, "learning_rate": 2e-06, "loss": 0.2933, "step": 4043 }, { "epoch": 0.9381742257278738, "grad_norm": 13.49504899297702, "learning_rate": 2e-06, "loss": 0.1928, "step": 4044 }, { "epoch": 0.9384062173761745, "grad_norm": 11.66253981574796, "learning_rate": 2e-06, "loss": 0.2601, "step": 4045 }, { "epoch": 0.9386382090244751, "grad_norm": 10.106589206845713, "learning_rate": 2e-06, "loss": 0.2958, "step": 4046 }, { "epoch": 0.9388702006727758, "grad_norm": 12.192365381188443, "learning_rate": 2e-06, "loss": 0.2948, "step": 4047 }, { "epoch": 0.9391021923210764, "grad_norm": 11.08645100908848, "learning_rate": 2e-06, "loss": 0.2181, "step": 4048 }, { "epoch": 0.9393341839693771, "grad_norm": 10.461564499933814, "learning_rate": 2e-06, "loss": 0.2554, "step": 4049 }, { "epoch": 0.9395661756176777, "grad_norm": 12.516633499592912, "learning_rate": 2e-06, "loss": 0.3163, "step": 4050 }, { "epoch": 0.9397981672659784, "grad_norm": 17.879976322757418, "learning_rate": 2e-06, "loss": 0.3279, "step": 4051 }, { "epoch": 0.9400301589142791, "grad_norm": 10.951916309028428, "learning_rate": 2e-06, "loss": 0.2341, "step": 4052 }, { "epoch": 0.9402621505625798, "grad_norm": 8.976366702731776, "learning_rate": 2e-06, "loss": 0.1948, "step": 4053 }, { "epoch": 0.9404941422108805, "grad_norm": 9.076599604883869, "learning_rate": 2e-06, "loss": 0.3378, "step": 4054 }, { "epoch": 0.940726133859181, "grad_norm": 15.382474393870892, "learning_rate": 2e-06, "loss": 0.2488, "step": 4055 }, { "epoch": 0.9409581255074817, "grad_norm": 14.329098715179613, "learning_rate": 2e-06, "loss": 0.3046, "step": 4056 }, { "epoch": 0.9411901171557824, "grad_norm": 9.195493737277706, "learning_rate": 2e-06, "loss": 0.2264, "step": 4057 }, { "epoch": 0.9414221088040831, "grad_norm": 10.882441981983053, "learning_rate": 2e-06, "loss": 0.2679, "step": 4058 }, { "epoch": 0.9416541004523837, "grad_norm": 7.640288467837494, "learning_rate": 2e-06, "loss": 0.1782, "step": 4059 }, { "epoch": 0.9418860921006844, "grad_norm": 7.9402748004771295, "learning_rate": 2e-06, "loss": 0.2199, "step": 4060 }, { "epoch": 0.9421180837489851, "grad_norm": 14.183099572940574, "learning_rate": 2e-06, "loss": 0.197, "step": 4061 }, { "epoch": 0.9423500753972857, "grad_norm": 14.253871053319683, "learning_rate": 2e-06, "loss": 0.2358, "step": 4062 }, { "epoch": 0.9425820670455863, "grad_norm": 14.87910110838926, "learning_rate": 2e-06, "loss": 0.249, "step": 4063 }, { "epoch": 0.942814058693887, "grad_norm": 18.414766910385318, "learning_rate": 2e-06, "loss": 0.2946, "step": 4064 }, { "epoch": 0.9430460503421877, "grad_norm": 19.484331898866966, "learning_rate": 2e-06, "loss": 0.3497, "step": 4065 }, { "epoch": 0.9432780419904884, "grad_norm": 7.831919172941622, "learning_rate": 2e-06, "loss": 0.2108, "step": 4066 }, { "epoch": 0.943510033638789, "grad_norm": 13.970992782476834, "learning_rate": 2e-06, "loss": 0.2523, "step": 4067 }, { "epoch": 0.9437420252870896, "grad_norm": 15.572274581276728, "learning_rate": 2e-06, "loss": 0.3366, "step": 4068 }, { "epoch": 0.9439740169353903, "grad_norm": 18.73910628418021, "learning_rate": 2e-06, "loss": 0.3037, "step": 4069 }, { "epoch": 0.944206008583691, "grad_norm": 14.560548536813151, "learning_rate": 2e-06, "loss": 0.2343, "step": 4070 }, { "epoch": 0.9444380002319916, "grad_norm": 13.014495432933003, "learning_rate": 2e-06, "loss": 0.2704, "step": 4071 }, { "epoch": 0.9446699918802923, "grad_norm": 16.70216711953269, "learning_rate": 2e-06, "loss": 0.2593, "step": 4072 }, { "epoch": 0.944901983528593, "grad_norm": 12.274314755806715, "learning_rate": 2e-06, "loss": 0.334, "step": 4073 }, { "epoch": 0.9451339751768937, "grad_norm": 12.610739079859846, "learning_rate": 2e-06, "loss": 0.261, "step": 4074 }, { "epoch": 0.9453659668251942, "grad_norm": 7.652569686562883, "learning_rate": 2e-06, "loss": 0.253, "step": 4075 }, { "epoch": 0.9455979584734949, "grad_norm": 9.82109278599335, "learning_rate": 2e-06, "loss": 0.3167, "step": 4076 }, { "epoch": 0.9458299501217956, "grad_norm": 15.045805903494685, "learning_rate": 2e-06, "loss": 0.391, "step": 4077 }, { "epoch": 0.9460619417700963, "grad_norm": 20.756814333126798, "learning_rate": 2e-06, "loss": 0.3532, "step": 4078 }, { "epoch": 0.946293933418397, "grad_norm": 11.520740221476641, "learning_rate": 2e-06, "loss": 0.2224, "step": 4079 }, { "epoch": 0.9465259250666976, "grad_norm": 20.332249191152876, "learning_rate": 2e-06, "loss": 0.3074, "step": 4080 }, { "epoch": 0.9467579167149983, "grad_norm": 9.704537942099549, "learning_rate": 2e-06, "loss": 0.2567, "step": 4081 }, { "epoch": 0.9469899083632989, "grad_norm": 12.313089663784114, "learning_rate": 2e-06, "loss": 0.3028, "step": 4082 }, { "epoch": 0.9472219000115996, "grad_norm": 21.2792596586522, "learning_rate": 2e-06, "loss": 0.2978, "step": 4083 }, { "epoch": 0.9474538916599002, "grad_norm": 10.101328052308142, "learning_rate": 2e-06, "loss": 0.2412, "step": 4084 }, { "epoch": 0.9476858833082009, "grad_norm": 17.19211419820723, "learning_rate": 2e-06, "loss": 0.3151, "step": 4085 }, { "epoch": 0.9479178749565016, "grad_norm": 16.302835135561683, "learning_rate": 2e-06, "loss": 0.2125, "step": 4086 }, { "epoch": 0.9481498666048023, "grad_norm": 11.946190807136137, "learning_rate": 2e-06, "loss": 0.3241, "step": 4087 }, { "epoch": 0.9483818582531028, "grad_norm": 13.151658952123395, "learning_rate": 2e-06, "loss": 0.2848, "step": 4088 }, { "epoch": 0.9486138499014035, "grad_norm": 23.02715333790368, "learning_rate": 2e-06, "loss": 0.2935, "step": 4089 }, { "epoch": 0.9488458415497042, "grad_norm": 8.503461364810859, "learning_rate": 2e-06, "loss": 0.1989, "step": 4090 }, { "epoch": 0.9490778331980049, "grad_norm": 18.427826665139552, "learning_rate": 2e-06, "loss": 0.2229, "step": 4091 }, { "epoch": 0.9493098248463055, "grad_norm": 14.303047249938743, "learning_rate": 2e-06, "loss": 0.2439, "step": 4092 }, { "epoch": 0.9495418164946062, "grad_norm": 19.143796564075636, "learning_rate": 2e-06, "loss": 0.2807, "step": 4093 }, { "epoch": 0.9497738081429069, "grad_norm": 13.488732980205729, "learning_rate": 2e-06, "loss": 0.257, "step": 4094 }, { "epoch": 0.9500057997912075, "grad_norm": 8.526817701556933, "learning_rate": 2e-06, "loss": 0.193, "step": 4095 }, { "epoch": 0.9502377914395082, "grad_norm": 15.559353236992328, "learning_rate": 2e-06, "loss": 0.3272, "step": 4096 }, { "epoch": 0.9504697830878088, "grad_norm": 24.587515021857655, "learning_rate": 2e-06, "loss": 0.3034, "step": 4097 }, { "epoch": 0.9507017747361095, "grad_norm": 13.200634807361052, "learning_rate": 2e-06, "loss": 0.2902, "step": 4098 }, { "epoch": 0.9509337663844102, "grad_norm": 20.434562905867345, "learning_rate": 2e-06, "loss": 0.3055, "step": 4099 }, { "epoch": 0.9511657580327109, "grad_norm": 18.417630148667733, "learning_rate": 2e-06, "loss": 0.3549, "step": 4100 }, { "epoch": 0.9513977496810114, "grad_norm": 13.31683770770985, "learning_rate": 2e-06, "loss": 0.2553, "step": 4101 }, { "epoch": 0.9516297413293121, "grad_norm": 11.843045183770629, "learning_rate": 2e-06, "loss": 0.252, "step": 4102 }, { "epoch": 0.9518617329776128, "grad_norm": 11.666980315192783, "learning_rate": 2e-06, "loss": 0.2539, "step": 4103 }, { "epoch": 0.9520937246259135, "grad_norm": 19.913932697088207, "learning_rate": 2e-06, "loss": 0.3337, "step": 4104 }, { "epoch": 0.9523257162742141, "grad_norm": 13.36391426480302, "learning_rate": 2e-06, "loss": 0.2737, "step": 4105 }, { "epoch": 0.9525577079225148, "grad_norm": 20.073464429920577, "learning_rate": 2e-06, "loss": 0.4001, "step": 4106 }, { "epoch": 0.9527896995708155, "grad_norm": 15.891488742906676, "learning_rate": 2e-06, "loss": 0.3502, "step": 4107 }, { "epoch": 0.9530216912191161, "grad_norm": 21.231285427185863, "learning_rate": 2e-06, "loss": 0.3465, "step": 4108 }, { "epoch": 0.9532536828674167, "grad_norm": 13.94812781849407, "learning_rate": 2e-06, "loss": 0.2747, "step": 4109 }, { "epoch": 0.9534856745157174, "grad_norm": 7.340569324720874, "learning_rate": 2e-06, "loss": 0.2401, "step": 4110 }, { "epoch": 0.9537176661640181, "grad_norm": 15.352599805848575, "learning_rate": 2e-06, "loss": 0.338, "step": 4111 }, { "epoch": 0.9539496578123188, "grad_norm": 12.305729093870848, "learning_rate": 2e-06, "loss": 0.2499, "step": 4112 }, { "epoch": 0.9541816494606195, "grad_norm": 9.690251495539805, "learning_rate": 2e-06, "loss": 0.2528, "step": 4113 }, { "epoch": 0.9544136411089201, "grad_norm": 15.161578406160716, "learning_rate": 2e-06, "loss": 0.3333, "step": 4114 }, { "epoch": 0.9546456327572207, "grad_norm": 28.285554380523788, "learning_rate": 2e-06, "loss": 0.2778, "step": 4115 }, { "epoch": 0.9548776244055214, "grad_norm": 16.19964193709801, "learning_rate": 2e-06, "loss": 0.2748, "step": 4116 }, { "epoch": 0.955109616053822, "grad_norm": 12.83960278879911, "learning_rate": 2e-06, "loss": 0.2952, "step": 4117 }, { "epoch": 0.9553416077021227, "grad_norm": 8.782977231717306, "learning_rate": 2e-06, "loss": 0.2084, "step": 4118 }, { "epoch": 0.9555735993504234, "grad_norm": 15.30528731331944, "learning_rate": 2e-06, "loss": 0.326, "step": 4119 }, { "epoch": 0.9558055909987241, "grad_norm": 13.290100729515132, "learning_rate": 2e-06, "loss": 0.272, "step": 4120 }, { "epoch": 0.9560375826470247, "grad_norm": 10.406690132651857, "learning_rate": 2e-06, "loss": 0.2514, "step": 4121 }, { "epoch": 0.9562695742953253, "grad_norm": 26.04862314136393, "learning_rate": 2e-06, "loss": 0.3575, "step": 4122 }, { "epoch": 0.956501565943626, "grad_norm": 7.815336659949993, "learning_rate": 2e-06, "loss": 0.1821, "step": 4123 }, { "epoch": 0.9567335575919267, "grad_norm": 9.946114361385957, "learning_rate": 2e-06, "loss": 0.2606, "step": 4124 }, { "epoch": 0.9569655492402274, "grad_norm": 8.008143895713363, "learning_rate": 2e-06, "loss": 0.258, "step": 4125 }, { "epoch": 0.957197540888528, "grad_norm": 12.47252896155183, "learning_rate": 2e-06, "loss": 0.2618, "step": 4126 }, { "epoch": 0.9574295325368287, "grad_norm": 8.102956281530695, "learning_rate": 2e-06, "loss": 0.3792, "step": 4127 }, { "epoch": 0.9576615241851293, "grad_norm": 8.2663318549362, "learning_rate": 2e-06, "loss": 0.3004, "step": 4128 }, { "epoch": 0.95789351583343, "grad_norm": 12.246000066279436, "learning_rate": 2e-06, "loss": 0.2644, "step": 4129 }, { "epoch": 0.9581255074817306, "grad_norm": 11.872467722008956, "learning_rate": 2e-06, "loss": 0.2984, "step": 4130 }, { "epoch": 0.9583574991300313, "grad_norm": 21.614820482071732, "learning_rate": 2e-06, "loss": 0.2752, "step": 4131 }, { "epoch": 0.958589490778332, "grad_norm": 14.068048318367484, "learning_rate": 2e-06, "loss": 0.3651, "step": 4132 }, { "epoch": 0.9588214824266327, "grad_norm": 15.711678066427929, "learning_rate": 2e-06, "loss": 0.2752, "step": 4133 }, { "epoch": 0.9590534740749334, "grad_norm": 5.923996766497805, "learning_rate": 2e-06, "loss": 0.2064, "step": 4134 }, { "epoch": 0.9592854657232339, "grad_norm": 9.744245529377244, "learning_rate": 2e-06, "loss": 0.3068, "step": 4135 }, { "epoch": 0.9595174573715346, "grad_norm": 8.475805972629109, "learning_rate": 2e-06, "loss": 0.2627, "step": 4136 }, { "epoch": 0.9597494490198353, "grad_norm": 17.447438317238976, "learning_rate": 2e-06, "loss": 0.3531, "step": 4137 }, { "epoch": 0.959981440668136, "grad_norm": 10.248204605683624, "learning_rate": 2e-06, "loss": 0.3174, "step": 4138 }, { "epoch": 0.9602134323164366, "grad_norm": 9.349030328082655, "learning_rate": 2e-06, "loss": 0.2251, "step": 4139 }, { "epoch": 0.9604454239647373, "grad_norm": 10.040785549242996, "learning_rate": 2e-06, "loss": 0.3646, "step": 4140 }, { "epoch": 0.9606774156130379, "grad_norm": 17.99881429560681, "learning_rate": 2e-06, "loss": 0.3179, "step": 4141 }, { "epoch": 0.9609094072613386, "grad_norm": 23.15821161884971, "learning_rate": 2e-06, "loss": 0.3449, "step": 4142 }, { "epoch": 0.9611413989096392, "grad_norm": 11.559981381311555, "learning_rate": 2e-06, "loss": 0.2579, "step": 4143 }, { "epoch": 0.9613733905579399, "grad_norm": 16.29055174473012, "learning_rate": 2e-06, "loss": 0.2503, "step": 4144 }, { "epoch": 0.9616053822062406, "grad_norm": 11.7947484697283, "learning_rate": 2e-06, "loss": 0.2859, "step": 4145 }, { "epoch": 0.9618373738545413, "grad_norm": 19.286142596191475, "learning_rate": 2e-06, "loss": 0.3261, "step": 4146 }, { "epoch": 0.9620693655028419, "grad_norm": 9.755826226950822, "learning_rate": 2e-06, "loss": 0.2265, "step": 4147 }, { "epoch": 0.9623013571511425, "grad_norm": 16.999414195147992, "learning_rate": 2e-06, "loss": 0.2519, "step": 4148 }, { "epoch": 0.9625333487994432, "grad_norm": 9.47299950537098, "learning_rate": 2e-06, "loss": 0.1947, "step": 4149 }, { "epoch": 0.9627653404477439, "grad_norm": 7.826711210558996, "learning_rate": 2e-06, "loss": 0.2812, "step": 4150 }, { "epoch": 0.9629973320960445, "grad_norm": 10.426702042786996, "learning_rate": 2e-06, "loss": 0.2236, "step": 4151 }, { "epoch": 0.9632293237443452, "grad_norm": 15.108961521422483, "learning_rate": 2e-06, "loss": 0.3395, "step": 4152 }, { "epoch": 0.9634613153926459, "grad_norm": 9.802328672252278, "learning_rate": 2e-06, "loss": 0.2706, "step": 4153 }, { "epoch": 0.9636933070409466, "grad_norm": 18.037924430831286, "learning_rate": 2e-06, "loss": 0.2694, "step": 4154 }, { "epoch": 0.9639252986892471, "grad_norm": 8.017284974062889, "learning_rate": 2e-06, "loss": 0.2999, "step": 4155 }, { "epoch": 0.9641572903375478, "grad_norm": 12.34805634741516, "learning_rate": 2e-06, "loss": 0.255, "step": 4156 }, { "epoch": 0.9643892819858485, "grad_norm": 16.752460181250132, "learning_rate": 2e-06, "loss": 0.2908, "step": 4157 }, { "epoch": 0.9646212736341492, "grad_norm": 16.975203492078528, "learning_rate": 2e-06, "loss": 0.3314, "step": 4158 }, { "epoch": 0.9648532652824499, "grad_norm": 18.677633298719076, "learning_rate": 2e-06, "loss": 0.2746, "step": 4159 }, { "epoch": 0.9650852569307505, "grad_norm": 16.169374674291078, "learning_rate": 2e-06, "loss": 0.3081, "step": 4160 }, { "epoch": 0.9653172485790511, "grad_norm": 8.690820570400597, "learning_rate": 2e-06, "loss": 0.2812, "step": 4161 }, { "epoch": 0.9655492402273518, "grad_norm": 18.24990491642374, "learning_rate": 2e-06, "loss": 0.2228, "step": 4162 }, { "epoch": 0.9657812318756525, "grad_norm": 7.2511972849280015, "learning_rate": 2e-06, "loss": 0.294, "step": 4163 }, { "epoch": 0.9660132235239531, "grad_norm": 14.82011859195889, "learning_rate": 2e-06, "loss": 0.2779, "step": 4164 }, { "epoch": 0.9662452151722538, "grad_norm": 8.815089700662222, "learning_rate": 2e-06, "loss": 0.1979, "step": 4165 }, { "epoch": 0.9664772068205545, "grad_norm": 11.440596644880184, "learning_rate": 2e-06, "loss": 0.2995, "step": 4166 }, { "epoch": 0.9667091984688552, "grad_norm": 21.152456518163454, "learning_rate": 2e-06, "loss": 0.3035, "step": 4167 }, { "epoch": 0.9669411901171557, "grad_norm": 22.36876306470267, "learning_rate": 2e-06, "loss": 0.3341, "step": 4168 }, { "epoch": 0.9671731817654564, "grad_norm": 9.240470420717966, "learning_rate": 2e-06, "loss": 0.2977, "step": 4169 }, { "epoch": 0.9674051734137571, "grad_norm": 12.167277516648443, "learning_rate": 2e-06, "loss": 0.2749, "step": 4170 }, { "epoch": 0.9676371650620578, "grad_norm": 5.054791610066648, "learning_rate": 2e-06, "loss": 0.2408, "step": 4171 }, { "epoch": 0.9678691567103584, "grad_norm": 6.7646904021890455, "learning_rate": 2e-06, "loss": 0.2495, "step": 4172 }, { "epoch": 0.9681011483586591, "grad_norm": 16.408927035270644, "learning_rate": 2e-06, "loss": 0.2613, "step": 4173 }, { "epoch": 0.9683331400069598, "grad_norm": 14.037063486759456, "learning_rate": 2e-06, "loss": 0.3029, "step": 4174 }, { "epoch": 0.9685651316552604, "grad_norm": 13.446123483781111, "learning_rate": 2e-06, "loss": 0.2195, "step": 4175 }, { "epoch": 0.968797123303561, "grad_norm": 12.642700280341838, "learning_rate": 2e-06, "loss": 0.2894, "step": 4176 }, { "epoch": 0.9690291149518617, "grad_norm": 10.132367629156734, "learning_rate": 2e-06, "loss": 0.253, "step": 4177 }, { "epoch": 0.9692611066001624, "grad_norm": 8.498917805369992, "learning_rate": 2e-06, "loss": 0.3635, "step": 4178 }, { "epoch": 0.9694930982484631, "grad_norm": 13.209622449831079, "learning_rate": 2e-06, "loss": 0.2891, "step": 4179 }, { "epoch": 0.9697250898967638, "grad_norm": 12.29051439378758, "learning_rate": 2e-06, "loss": 0.3119, "step": 4180 }, { "epoch": 0.9699570815450643, "grad_norm": 21.12653821260995, "learning_rate": 2e-06, "loss": 0.3552, "step": 4181 }, { "epoch": 0.970189073193365, "grad_norm": 5.3595942600030115, "learning_rate": 2e-06, "loss": 0.1905, "step": 4182 }, { "epoch": 0.9704210648416657, "grad_norm": 10.134794135814035, "learning_rate": 2e-06, "loss": 0.2624, "step": 4183 }, { "epoch": 0.9706530564899664, "grad_norm": 21.330682910796718, "learning_rate": 2e-06, "loss": 0.311, "step": 4184 }, { "epoch": 0.970885048138267, "grad_norm": 14.964958740540329, "learning_rate": 2e-06, "loss": 0.2662, "step": 4185 }, { "epoch": 0.9711170397865677, "grad_norm": 11.493977780236726, "learning_rate": 2e-06, "loss": 0.2423, "step": 4186 }, { "epoch": 0.9713490314348684, "grad_norm": 16.30578846347033, "learning_rate": 2e-06, "loss": 0.2295, "step": 4187 }, { "epoch": 0.971581023083169, "grad_norm": 21.62575639230587, "learning_rate": 2e-06, "loss": 0.2544, "step": 4188 }, { "epoch": 0.9718130147314696, "grad_norm": 14.537087718010312, "learning_rate": 2e-06, "loss": 0.2882, "step": 4189 }, { "epoch": 0.9720450063797703, "grad_norm": 13.378301810685581, "learning_rate": 2e-06, "loss": 0.326, "step": 4190 }, { "epoch": 0.972276998028071, "grad_norm": 14.564670167831924, "learning_rate": 2e-06, "loss": 0.2357, "step": 4191 }, { "epoch": 0.9725089896763717, "grad_norm": 11.895665121896728, "learning_rate": 2e-06, "loss": 0.2513, "step": 4192 }, { "epoch": 0.9727409813246723, "grad_norm": 29.173504877419095, "learning_rate": 2e-06, "loss": 0.2729, "step": 4193 }, { "epoch": 0.972972972972973, "grad_norm": 18.29224069723669, "learning_rate": 2e-06, "loss": 0.2807, "step": 4194 }, { "epoch": 0.9732049646212736, "grad_norm": 10.68192300084348, "learning_rate": 2e-06, "loss": 0.2298, "step": 4195 }, { "epoch": 0.9734369562695743, "grad_norm": 21.485573797924964, "learning_rate": 2e-06, "loss": 0.3444, "step": 4196 }, { "epoch": 0.973668947917875, "grad_norm": 7.255065307915454, "learning_rate": 2e-06, "loss": 0.2094, "step": 4197 }, { "epoch": 0.9739009395661756, "grad_norm": 11.98053928489005, "learning_rate": 2e-06, "loss": 0.2398, "step": 4198 }, { "epoch": 0.9741329312144763, "grad_norm": 16.306959457810798, "learning_rate": 2e-06, "loss": 0.2924, "step": 4199 }, { "epoch": 0.974364922862777, "grad_norm": 15.066723005130221, "learning_rate": 2e-06, "loss": 0.2981, "step": 4200 }, { "epoch": 0.9745969145110776, "grad_norm": 17.8311141003004, "learning_rate": 2e-06, "loss": 0.2809, "step": 4201 }, { "epoch": 0.9748289061593782, "grad_norm": 17.330070730465206, "learning_rate": 2e-06, "loss": 0.3275, "step": 4202 }, { "epoch": 0.9750608978076789, "grad_norm": 13.899796439940532, "learning_rate": 2e-06, "loss": 0.2049, "step": 4203 }, { "epoch": 0.9752928894559796, "grad_norm": 12.515907162025195, "learning_rate": 2e-06, "loss": 0.2258, "step": 4204 }, { "epoch": 0.9755248811042803, "grad_norm": 14.070388956916347, "learning_rate": 2e-06, "loss": 0.3706, "step": 4205 }, { "epoch": 0.9757568727525809, "grad_norm": 9.998273054313804, "learning_rate": 2e-06, "loss": 0.2585, "step": 4206 }, { "epoch": 0.9759888644008816, "grad_norm": 17.59697288259026, "learning_rate": 2e-06, "loss": 0.2893, "step": 4207 }, { "epoch": 0.9762208560491822, "grad_norm": 10.29582133296254, "learning_rate": 2e-06, "loss": 0.1864, "step": 4208 }, { "epoch": 0.9764528476974829, "grad_norm": 15.152312481568451, "learning_rate": 2e-06, "loss": 0.2662, "step": 4209 }, { "epoch": 0.9766848393457835, "grad_norm": 13.491237354588291, "learning_rate": 2e-06, "loss": 0.2564, "step": 4210 }, { "epoch": 0.9769168309940842, "grad_norm": 22.77241362575545, "learning_rate": 2e-06, "loss": 0.3766, "step": 4211 }, { "epoch": 0.9771488226423849, "grad_norm": 24.502660636251704, "learning_rate": 2e-06, "loss": 0.3272, "step": 4212 }, { "epoch": 0.9773808142906856, "grad_norm": 24.914401016942282, "learning_rate": 2e-06, "loss": 0.288, "step": 4213 }, { "epoch": 0.9776128059389863, "grad_norm": 11.111214277761023, "learning_rate": 2e-06, "loss": 0.2549, "step": 4214 }, { "epoch": 0.9778447975872868, "grad_norm": 11.49523120194123, "learning_rate": 2e-06, "loss": 0.317, "step": 4215 }, { "epoch": 0.9780767892355875, "grad_norm": 9.055681111603517, "learning_rate": 2e-06, "loss": 0.2275, "step": 4216 }, { "epoch": 0.9783087808838882, "grad_norm": 15.174180810545446, "learning_rate": 2e-06, "loss": 0.2826, "step": 4217 }, { "epoch": 0.9785407725321889, "grad_norm": 13.401080527534248, "learning_rate": 2e-06, "loss": 0.2857, "step": 4218 }, { "epoch": 0.9787727641804895, "grad_norm": 6.164648447458631, "learning_rate": 2e-06, "loss": 0.1799, "step": 4219 }, { "epoch": 0.9790047558287902, "grad_norm": 27.775235280358498, "learning_rate": 2e-06, "loss": 0.3474, "step": 4220 }, { "epoch": 0.9792367474770908, "grad_norm": 18.37992954725875, "learning_rate": 2e-06, "loss": 0.229, "step": 4221 }, { "epoch": 0.9794687391253915, "grad_norm": 13.19515503304382, "learning_rate": 2e-06, "loss": 0.3051, "step": 4222 }, { "epoch": 0.9797007307736921, "grad_norm": 11.132635370674684, "learning_rate": 2e-06, "loss": 0.2385, "step": 4223 }, { "epoch": 0.9799327224219928, "grad_norm": 8.405337802905594, "learning_rate": 2e-06, "loss": 0.1608, "step": 4224 }, { "epoch": 0.9801647140702935, "grad_norm": 13.917574725560614, "learning_rate": 2e-06, "loss": 0.3205, "step": 4225 }, { "epoch": 0.9803967057185942, "grad_norm": 6.015763156205712, "learning_rate": 2e-06, "loss": 0.1753, "step": 4226 }, { "epoch": 0.9806286973668948, "grad_norm": 15.86064516096754, "learning_rate": 2e-06, "loss": 0.3311, "step": 4227 }, { "epoch": 0.9808606890151954, "grad_norm": 13.07809649383298, "learning_rate": 2e-06, "loss": 0.3762, "step": 4228 }, { "epoch": 0.9810926806634961, "grad_norm": 22.12385581572482, "learning_rate": 2e-06, "loss": 0.3983, "step": 4229 }, { "epoch": 0.9813246723117968, "grad_norm": 21.61215045671309, "learning_rate": 2e-06, "loss": 0.2543, "step": 4230 }, { "epoch": 0.9815566639600974, "grad_norm": 11.469648361559175, "learning_rate": 2e-06, "loss": 0.2123, "step": 4231 }, { "epoch": 0.9817886556083981, "grad_norm": 22.827779221927173, "learning_rate": 2e-06, "loss": 0.4152, "step": 4232 }, { "epoch": 0.9820206472566988, "grad_norm": 16.671441093942356, "learning_rate": 2e-06, "loss": 0.2653, "step": 4233 }, { "epoch": 0.9822526389049995, "grad_norm": 15.742088488552337, "learning_rate": 2e-06, "loss": 0.1953, "step": 4234 }, { "epoch": 0.9824846305533, "grad_norm": 6.266727832106582, "learning_rate": 2e-06, "loss": 0.1748, "step": 4235 }, { "epoch": 0.9827166222016007, "grad_norm": 15.052647218778917, "learning_rate": 2e-06, "loss": 0.2619, "step": 4236 }, { "epoch": 0.9829486138499014, "grad_norm": 11.529712668140277, "learning_rate": 2e-06, "loss": 0.2717, "step": 4237 }, { "epoch": 0.9831806054982021, "grad_norm": 11.626580721336351, "learning_rate": 2e-06, "loss": 0.3329, "step": 4238 }, { "epoch": 0.9834125971465028, "grad_norm": 17.82590025916798, "learning_rate": 2e-06, "loss": 0.3335, "step": 4239 }, { "epoch": 0.9836445887948034, "grad_norm": 13.926504229230394, "learning_rate": 2e-06, "loss": 0.3854, "step": 4240 }, { "epoch": 0.983876580443104, "grad_norm": 7.309050762390744, "learning_rate": 2e-06, "loss": 0.2475, "step": 4241 }, { "epoch": 0.9841085720914047, "grad_norm": 10.44914747577233, "learning_rate": 2e-06, "loss": 0.1732, "step": 4242 }, { "epoch": 0.9843405637397054, "grad_norm": 17.92649994514198, "learning_rate": 2e-06, "loss": 0.2721, "step": 4243 }, { "epoch": 0.984572555388006, "grad_norm": 12.515934818615193, "learning_rate": 2e-06, "loss": 0.2515, "step": 4244 }, { "epoch": 0.9848045470363067, "grad_norm": 11.349447658048096, "learning_rate": 2e-06, "loss": 0.2519, "step": 4245 }, { "epoch": 0.9850365386846074, "grad_norm": 17.278232190175704, "learning_rate": 2e-06, "loss": 0.2798, "step": 4246 }, { "epoch": 0.9852685303329081, "grad_norm": 19.564700818775094, "learning_rate": 2e-06, "loss": 0.3114, "step": 4247 }, { "epoch": 0.9855005219812086, "grad_norm": 11.075353127881275, "learning_rate": 2e-06, "loss": 0.2909, "step": 4248 }, { "epoch": 0.9857325136295093, "grad_norm": 12.89124213988945, "learning_rate": 2e-06, "loss": 0.1592, "step": 4249 }, { "epoch": 0.98596450527781, "grad_norm": 16.98800196949275, "learning_rate": 2e-06, "loss": 0.3969, "step": 4250 }, { "epoch": 0.9861964969261107, "grad_norm": 13.578942902506654, "learning_rate": 2e-06, "loss": 0.2777, "step": 4251 }, { "epoch": 0.9864284885744113, "grad_norm": 8.1871697074597, "learning_rate": 2e-06, "loss": 0.203, "step": 4252 }, { "epoch": 0.986660480222712, "grad_norm": 7.040253276385697, "learning_rate": 2e-06, "loss": 0.224, "step": 4253 }, { "epoch": 0.9868924718710126, "grad_norm": 14.405757347393758, "learning_rate": 2e-06, "loss": 0.2295, "step": 4254 }, { "epoch": 0.9871244635193133, "grad_norm": 9.033447010628546, "learning_rate": 2e-06, "loss": 0.1497, "step": 4255 }, { "epoch": 0.9873564551676139, "grad_norm": 20.97446599435015, "learning_rate": 2e-06, "loss": 0.1794, "step": 4256 }, { "epoch": 0.9875884468159146, "grad_norm": 6.904763491483926, "learning_rate": 2e-06, "loss": 0.1891, "step": 4257 }, { "epoch": 0.9878204384642153, "grad_norm": 18.688009180195195, "learning_rate": 2e-06, "loss": 0.3001, "step": 4258 }, { "epoch": 0.988052430112516, "grad_norm": 13.364486799681748, "learning_rate": 2e-06, "loss": 0.2617, "step": 4259 }, { "epoch": 0.9882844217608167, "grad_norm": 15.973372169182674, "learning_rate": 2e-06, "loss": 0.2303, "step": 4260 }, { "epoch": 0.9885164134091172, "grad_norm": 10.556847505773627, "learning_rate": 2e-06, "loss": 0.3017, "step": 4261 }, { "epoch": 0.9887484050574179, "grad_norm": 23.634222130250084, "learning_rate": 2e-06, "loss": 0.4199, "step": 4262 }, { "epoch": 0.9889803967057186, "grad_norm": 16.572984667619835, "learning_rate": 2e-06, "loss": 0.3701, "step": 4263 }, { "epoch": 0.9892123883540193, "grad_norm": 14.404180757195148, "learning_rate": 2e-06, "loss": 0.332, "step": 4264 }, { "epoch": 0.9894443800023199, "grad_norm": 13.811304035079404, "learning_rate": 2e-06, "loss": 0.2003, "step": 4265 }, { "epoch": 0.9896763716506206, "grad_norm": 11.8826651967571, "learning_rate": 2e-06, "loss": 0.333, "step": 4266 }, { "epoch": 0.9899083632989213, "grad_norm": 14.923848968766313, "learning_rate": 2e-06, "loss": 0.4666, "step": 4267 }, { "epoch": 0.9901403549472219, "grad_norm": 19.934166664100598, "learning_rate": 2e-06, "loss": 0.3621, "step": 4268 }, { "epoch": 0.9903723465955225, "grad_norm": 10.548852262130172, "learning_rate": 2e-06, "loss": 0.2605, "step": 4269 }, { "epoch": 0.9906043382438232, "grad_norm": 17.044978616656284, "learning_rate": 2e-06, "loss": 0.2738, "step": 4270 }, { "epoch": 0.9908363298921239, "grad_norm": 7.3678955030967, "learning_rate": 2e-06, "loss": 0.1474, "step": 4271 }, { "epoch": 0.9910683215404246, "grad_norm": 18.214716715347542, "learning_rate": 2e-06, "loss": 0.4379, "step": 4272 }, { "epoch": 0.9913003131887252, "grad_norm": 12.945968711053286, "learning_rate": 2e-06, "loss": 0.2377, "step": 4273 }, { "epoch": 0.9915323048370258, "grad_norm": 17.728922143117316, "learning_rate": 2e-06, "loss": 0.3382, "step": 4274 }, { "epoch": 0.9917642964853265, "grad_norm": 18.646348539918346, "learning_rate": 2e-06, "loss": 0.3737, "step": 4275 }, { "epoch": 0.9919962881336272, "grad_norm": 17.22778162656779, "learning_rate": 2e-06, "loss": 0.281, "step": 4276 }, { "epoch": 0.9922282797819278, "grad_norm": 12.540238388456121, "learning_rate": 2e-06, "loss": 0.3496, "step": 4277 }, { "epoch": 0.9924602714302285, "grad_norm": 9.732713018493623, "learning_rate": 2e-06, "loss": 0.2186, "step": 4278 }, { "epoch": 0.9926922630785292, "grad_norm": 7.953560031755493, "learning_rate": 2e-06, "loss": 0.2391, "step": 4279 }, { "epoch": 0.9929242547268299, "grad_norm": 11.722711090296285, "learning_rate": 2e-06, "loss": 0.2226, "step": 4280 }, { "epoch": 0.9931562463751304, "grad_norm": 18.63265751918661, "learning_rate": 2e-06, "loss": 0.2999, "step": 4281 }, { "epoch": 0.9933882380234311, "grad_norm": 10.19449522121469, "learning_rate": 2e-06, "loss": 0.2203, "step": 4282 }, { "epoch": 0.9936202296717318, "grad_norm": 18.11672937369853, "learning_rate": 2e-06, "loss": 0.3204, "step": 4283 }, { "epoch": 0.9938522213200325, "grad_norm": 16.01830303614881, "learning_rate": 2e-06, "loss": 0.3197, "step": 4284 }, { "epoch": 0.9940842129683332, "grad_norm": 17.3963087347634, "learning_rate": 2e-06, "loss": 0.3376, "step": 4285 }, { "epoch": 0.9943162046166338, "grad_norm": 17.667883565032273, "learning_rate": 2e-06, "loss": 0.3051, "step": 4286 }, { "epoch": 0.9945481962649345, "grad_norm": 8.313509916883348, "learning_rate": 2e-06, "loss": 0.2424, "step": 4287 }, { "epoch": 0.9947801879132351, "grad_norm": 21.853369792188385, "learning_rate": 2e-06, "loss": 0.3898, "step": 4288 }, { "epoch": 0.9950121795615358, "grad_norm": 12.909500013417489, "learning_rate": 2e-06, "loss": 0.2194, "step": 4289 }, { "epoch": 0.9952441712098364, "grad_norm": 15.270128959373558, "learning_rate": 2e-06, "loss": 0.2795, "step": 4290 }, { "epoch": 0.9954761628581371, "grad_norm": 21.79462981369358, "learning_rate": 2e-06, "loss": 0.339, "step": 4291 }, { "epoch": 0.9957081545064378, "grad_norm": 19.88564636383995, "learning_rate": 2e-06, "loss": 0.3024, "step": 4292 }, { "epoch": 0.9959401461547385, "grad_norm": 14.399655142947225, "learning_rate": 2e-06, "loss": 0.2895, "step": 4293 }, { "epoch": 0.996172137803039, "grad_norm": 19.85949576833494, "learning_rate": 2e-06, "loss": 0.3431, "step": 4294 }, { "epoch": 0.9964041294513397, "grad_norm": 11.683854205528311, "learning_rate": 2e-06, "loss": 0.2807, "step": 4295 }, { "epoch": 0.9966361210996404, "grad_norm": 17.951758210842055, "learning_rate": 2e-06, "loss": 0.3739, "step": 4296 }, { "epoch": 0.9968681127479411, "grad_norm": 21.711123070741547, "learning_rate": 2e-06, "loss": 0.2775, "step": 4297 }, { "epoch": 0.9971001043962417, "grad_norm": 12.616016808487865, "learning_rate": 2e-06, "loss": 0.2993, "step": 4298 }, { "epoch": 0.9973320960445424, "grad_norm": 20.280139049312588, "learning_rate": 2e-06, "loss": 0.2819, "step": 4299 }, { "epoch": 0.9975640876928431, "grad_norm": 12.354640230223774, "learning_rate": 2e-06, "loss": 0.264, "step": 4300 }, { "epoch": 0.9977960793411437, "grad_norm": 8.04778189340334, "learning_rate": 2e-06, "loss": 0.3385, "step": 4301 }, { "epoch": 0.9980280709894443, "grad_norm": 13.08075846529473, "learning_rate": 2e-06, "loss": 0.1973, "step": 4302 }, { "epoch": 0.998260062637745, "grad_norm": 12.520305141646622, "learning_rate": 2e-06, "loss": 0.3255, "step": 4303 }, { "epoch": 0.9984920542860457, "grad_norm": 11.281683034711, "learning_rate": 2e-06, "loss": 0.2641, "step": 4304 }, { "epoch": 0.9987240459343464, "grad_norm": 9.467801053894949, "learning_rate": 2e-06, "loss": 0.2862, "step": 4305 }, { "epoch": 0.9989560375826471, "grad_norm": 10.738481898576978, "learning_rate": 2e-06, "loss": 0.2807, "step": 4306 }, { "epoch": 0.9991880292309477, "grad_norm": 12.40324963497283, "learning_rate": 2e-06, "loss": 0.2784, "step": 4307 }, { "epoch": 0.9994200208792483, "grad_norm": 17.279743062019485, "learning_rate": 2e-06, "loss": 0.3601, "step": 4308 }, { "epoch": 0.999652012527549, "grad_norm": 12.254255842119658, "learning_rate": 2e-06, "loss": 0.2333, "step": 4309 }, { "epoch": 0.9998840041758497, "grad_norm": 15.644995614708913, "learning_rate": 2e-06, "loss": 0.301, "step": 4310 }, { "epoch": 1.0001159958241503, "grad_norm": 16.718302350093527, "learning_rate": 2e-06, "loss": 0.3431, "step": 4311 }, { "epoch": 1.000347987472451, "grad_norm": 5.981456267975129, "learning_rate": 2e-06, "loss": 0.278, "step": 4312 }, { "epoch": 1.0005799791207517, "grad_norm": 11.01508633788959, "learning_rate": 2e-06, "loss": 0.2755, "step": 4313 }, { "epoch": 1.0008119707690524, "grad_norm": 10.28879898371332, "learning_rate": 2e-06, "loss": 0.2578, "step": 4314 }, { "epoch": 1.001043962417353, "grad_norm": 19.773665483140853, "learning_rate": 2e-06, "loss": 0.427, "step": 4315 }, { "epoch": 1.0012759540656537, "grad_norm": 14.035676093485598, "learning_rate": 2e-06, "loss": 0.3006, "step": 4316 }, { "epoch": 1.0015079457139544, "grad_norm": 13.1091865357948, "learning_rate": 2e-06, "loss": 0.2829, "step": 4317 }, { "epoch": 1.0017399373622549, "grad_norm": 19.786693874665463, "learning_rate": 2e-06, "loss": 0.319, "step": 4318 }, { "epoch": 1.0019719290105555, "grad_norm": 7.270669487731423, "learning_rate": 2e-06, "loss": 0.2108, "step": 4319 }, { "epoch": 1.0022039206588562, "grad_norm": 20.17286026961784, "learning_rate": 2e-06, "loss": 0.3011, "step": 4320 }, { "epoch": 1.002435912307157, "grad_norm": 12.587046558200004, "learning_rate": 2e-06, "loss": 0.2785, "step": 4321 }, { "epoch": 1.0026679039554576, "grad_norm": 8.869916800554623, "learning_rate": 2e-06, "loss": 0.2481, "step": 4322 }, { "epoch": 1.0028998956037583, "grad_norm": 12.136751295143798, "learning_rate": 2e-06, "loss": 0.264, "step": 4323 }, { "epoch": 1.003131887252059, "grad_norm": 8.667913267118324, "learning_rate": 2e-06, "loss": 0.1941, "step": 4324 }, { "epoch": 1.0033638789003596, "grad_norm": 14.403161655669786, "learning_rate": 2e-06, "loss": 0.2535, "step": 4325 }, { "epoch": 1.0035958705486603, "grad_norm": 18.612318169636975, "learning_rate": 2e-06, "loss": 0.2766, "step": 4326 }, { "epoch": 1.003827862196961, "grad_norm": 16.99645141292192, "learning_rate": 2e-06, "loss": 0.363, "step": 4327 }, { "epoch": 1.0040598538452616, "grad_norm": 13.751830433854773, "learning_rate": 2e-06, "loss": 0.155, "step": 4328 }, { "epoch": 1.0042918454935623, "grad_norm": 13.671771418605278, "learning_rate": 2e-06, "loss": 0.2716, "step": 4329 }, { "epoch": 1.004523837141863, "grad_norm": 18.752642833061604, "learning_rate": 2e-06, "loss": 0.3661, "step": 4330 }, { "epoch": 1.0047558287901635, "grad_norm": 14.847783747948847, "learning_rate": 2e-06, "loss": 0.2702, "step": 4331 }, { "epoch": 1.0049878204384641, "grad_norm": 17.885233787267723, "learning_rate": 2e-06, "loss": 0.3698, "step": 4332 }, { "epoch": 1.0052198120867648, "grad_norm": 27.13789320974805, "learning_rate": 2e-06, "loss": 0.3145, "step": 4333 }, { "epoch": 1.0054518037350655, "grad_norm": 13.304508645892248, "learning_rate": 2e-06, "loss": 0.2326, "step": 4334 }, { "epoch": 1.0056837953833662, "grad_norm": 9.862556153578181, "learning_rate": 2e-06, "loss": 0.1547, "step": 4335 }, { "epoch": 1.0059157870316668, "grad_norm": 10.683912066124044, "learning_rate": 2e-06, "loss": 0.3168, "step": 4336 }, { "epoch": 1.0061477786799675, "grad_norm": 12.134907278341815, "learning_rate": 2e-06, "loss": 0.2847, "step": 4337 }, { "epoch": 1.0063797703282682, "grad_norm": 16.990495829795368, "learning_rate": 2e-06, "loss": 0.1884, "step": 4338 }, { "epoch": 1.0066117619765689, "grad_norm": 11.49411437654733, "learning_rate": 2e-06, "loss": 0.2447, "step": 4339 }, { "epoch": 1.0068437536248696, "grad_norm": 14.995630381719709, "learning_rate": 2e-06, "loss": 0.2222, "step": 4340 }, { "epoch": 1.0070757452731702, "grad_norm": 13.971606657098535, "learning_rate": 2e-06, "loss": 0.2872, "step": 4341 }, { "epoch": 1.007307736921471, "grad_norm": 20.96371526573696, "learning_rate": 2e-06, "loss": 0.2905, "step": 4342 }, { "epoch": 1.0075397285697716, "grad_norm": 9.266262013323754, "learning_rate": 2e-06, "loss": 0.1939, "step": 4343 }, { "epoch": 1.007771720218072, "grad_norm": 19.77717418896224, "learning_rate": 2e-06, "loss": 0.3568, "step": 4344 }, { "epoch": 1.0080037118663727, "grad_norm": 12.26230275832593, "learning_rate": 2e-06, "loss": 0.3573, "step": 4345 }, { "epoch": 1.0082357035146734, "grad_norm": 14.19253024029564, "learning_rate": 2e-06, "loss": 0.3354, "step": 4346 }, { "epoch": 1.008467695162974, "grad_norm": 10.803220157659359, "learning_rate": 2e-06, "loss": 0.27, "step": 4347 }, { "epoch": 1.0086996868112748, "grad_norm": 23.124982254178747, "learning_rate": 2e-06, "loss": 0.3364, "step": 4348 }, { "epoch": 1.0089316784595754, "grad_norm": 9.731324037618686, "learning_rate": 2e-06, "loss": 0.1988, "step": 4349 }, { "epoch": 1.009163670107876, "grad_norm": 7.93294592056329, "learning_rate": 2e-06, "loss": 0.2416, "step": 4350 }, { "epoch": 1.0093956617561768, "grad_norm": 14.057267162792947, "learning_rate": 2e-06, "loss": 0.32, "step": 4351 }, { "epoch": 1.0096276534044775, "grad_norm": 13.900748666564924, "learning_rate": 2e-06, "loss": 0.2171, "step": 4352 }, { "epoch": 1.0098596450527781, "grad_norm": 8.245172013413677, "learning_rate": 2e-06, "loss": 0.2147, "step": 4353 }, { "epoch": 1.0100916367010788, "grad_norm": 10.271433355787496, "learning_rate": 2e-06, "loss": 0.1919, "step": 4354 }, { "epoch": 1.0103236283493795, "grad_norm": 8.194394625100914, "learning_rate": 2e-06, "loss": 0.1986, "step": 4355 }, { "epoch": 1.0105556199976802, "grad_norm": 15.079051609451382, "learning_rate": 2e-06, "loss": 0.2572, "step": 4356 }, { "epoch": 1.0107876116459809, "grad_norm": 15.291543050990853, "learning_rate": 2e-06, "loss": 0.4196, "step": 4357 }, { "epoch": 1.0110196032942813, "grad_norm": 17.82830434225328, "learning_rate": 2e-06, "loss": 0.2908, "step": 4358 }, { "epoch": 1.011251594942582, "grad_norm": 12.837214688662794, "learning_rate": 2e-06, "loss": 0.3404, "step": 4359 }, { "epoch": 1.0114835865908827, "grad_norm": 11.901047019390361, "learning_rate": 2e-06, "loss": 0.2712, "step": 4360 }, { "epoch": 1.0117155782391833, "grad_norm": 23.758163883570887, "learning_rate": 2e-06, "loss": 0.3621, "step": 4361 }, { "epoch": 1.011947569887484, "grad_norm": 11.482949711438708, "learning_rate": 2e-06, "loss": 0.147, "step": 4362 }, { "epoch": 1.0121795615357847, "grad_norm": 10.940010722924, "learning_rate": 2e-06, "loss": 0.317, "step": 4363 }, { "epoch": 1.0124115531840854, "grad_norm": 9.369929809574218, "learning_rate": 2e-06, "loss": 0.1951, "step": 4364 }, { "epoch": 1.012643544832386, "grad_norm": 9.403910759231197, "learning_rate": 2e-06, "loss": 0.2646, "step": 4365 }, { "epoch": 1.0128755364806867, "grad_norm": 11.168686934520977, "learning_rate": 2e-06, "loss": 0.2605, "step": 4366 }, { "epoch": 1.0131075281289874, "grad_norm": 11.041500841883662, "learning_rate": 2e-06, "loss": 0.3602, "step": 4367 }, { "epoch": 1.013339519777288, "grad_norm": 9.369178993030669, "learning_rate": 2e-06, "loss": 0.2316, "step": 4368 }, { "epoch": 1.0135715114255888, "grad_norm": 12.33230547811149, "learning_rate": 2e-06, "loss": 0.1931, "step": 4369 }, { "epoch": 1.0138035030738894, "grad_norm": 13.487586445637827, "learning_rate": 2e-06, "loss": 0.3596, "step": 4370 }, { "epoch": 1.01403549472219, "grad_norm": 15.733338825367728, "learning_rate": 2e-06, "loss": 0.214, "step": 4371 }, { "epoch": 1.0142674863704906, "grad_norm": 14.042948766828657, "learning_rate": 2e-06, "loss": 0.2801, "step": 4372 }, { "epoch": 1.0144994780187913, "grad_norm": 10.908660927650285, "learning_rate": 2e-06, "loss": 0.2215, "step": 4373 }, { "epoch": 1.014731469667092, "grad_norm": 12.053128384428641, "learning_rate": 2e-06, "loss": 0.2202, "step": 4374 }, { "epoch": 1.0149634613153926, "grad_norm": 15.94150176034634, "learning_rate": 2e-06, "loss": 0.34, "step": 4375 }, { "epoch": 1.0151954529636933, "grad_norm": 30.488268098804266, "learning_rate": 2e-06, "loss": 0.3275, "step": 4376 }, { "epoch": 1.015427444611994, "grad_norm": 16.418756536831573, "learning_rate": 2e-06, "loss": 0.2529, "step": 4377 }, { "epoch": 1.0156594362602946, "grad_norm": 26.929167976037675, "learning_rate": 2e-06, "loss": 0.3632, "step": 4378 }, { "epoch": 1.0158914279085953, "grad_norm": 13.390221016403952, "learning_rate": 2e-06, "loss": 0.1912, "step": 4379 }, { "epoch": 1.016123419556896, "grad_norm": 11.176161477331638, "learning_rate": 2e-06, "loss": 0.2135, "step": 4380 }, { "epoch": 1.0163554112051967, "grad_norm": 19.321719328090147, "learning_rate": 2e-06, "loss": 0.38, "step": 4381 }, { "epoch": 1.0165874028534974, "grad_norm": 8.73358529268335, "learning_rate": 2e-06, "loss": 0.2182, "step": 4382 }, { "epoch": 1.016819394501798, "grad_norm": 11.346499255352734, "learning_rate": 2e-06, "loss": 0.2417, "step": 4383 }, { "epoch": 1.0170513861500985, "grad_norm": 9.501895122254266, "learning_rate": 2e-06, "loss": 0.2326, "step": 4384 }, { "epoch": 1.0172833777983992, "grad_norm": 12.229807304921541, "learning_rate": 2e-06, "loss": 0.2692, "step": 4385 }, { "epoch": 1.0175153694466998, "grad_norm": 11.847609278185644, "learning_rate": 2e-06, "loss": 0.2968, "step": 4386 }, { "epoch": 1.0177473610950005, "grad_norm": 9.549411930106775, "learning_rate": 2e-06, "loss": 0.1805, "step": 4387 }, { "epoch": 1.0179793527433012, "grad_norm": 9.267107386625788, "learning_rate": 2e-06, "loss": 0.2132, "step": 4388 }, { "epoch": 1.0182113443916019, "grad_norm": 8.690027982071577, "learning_rate": 2e-06, "loss": 0.2782, "step": 4389 }, { "epoch": 1.0184433360399026, "grad_norm": 12.795072421966484, "learning_rate": 2e-06, "loss": 0.274, "step": 4390 }, { "epoch": 1.0186753276882032, "grad_norm": 5.842035700725516, "learning_rate": 2e-06, "loss": 0.1685, "step": 4391 }, { "epoch": 1.018907319336504, "grad_norm": 14.051283930385816, "learning_rate": 2e-06, "loss": 0.254, "step": 4392 }, { "epoch": 1.0191393109848046, "grad_norm": 13.348897262207288, "learning_rate": 2e-06, "loss": 0.2763, "step": 4393 }, { "epoch": 1.0193713026331053, "grad_norm": 22.540889105334582, "learning_rate": 2e-06, "loss": 0.3685, "step": 4394 }, { "epoch": 1.019603294281406, "grad_norm": 11.525100788667038, "learning_rate": 2e-06, "loss": 0.2691, "step": 4395 }, { "epoch": 1.0198352859297066, "grad_norm": 18.453868704349077, "learning_rate": 2e-06, "loss": 0.3714, "step": 4396 }, { "epoch": 1.0200672775780073, "grad_norm": 14.36172001747481, "learning_rate": 2e-06, "loss": 0.3594, "step": 4397 }, { "epoch": 1.0202992692263078, "grad_norm": 11.360356324324796, "learning_rate": 2e-06, "loss": 0.2799, "step": 4398 }, { "epoch": 1.0205312608746084, "grad_norm": 15.46130705982928, "learning_rate": 2e-06, "loss": 0.2868, "step": 4399 }, { "epoch": 1.0207632525229091, "grad_norm": 18.75112773586128, "learning_rate": 2e-06, "loss": 0.2558, "step": 4400 }, { "epoch": 1.0209952441712098, "grad_norm": 14.047846349720592, "learning_rate": 2e-06, "loss": 0.2228, "step": 4401 }, { "epoch": 1.0212272358195105, "grad_norm": 8.51175892802764, "learning_rate": 2e-06, "loss": 0.2604, "step": 4402 }, { "epoch": 1.0214592274678111, "grad_norm": 10.553828364794468, "learning_rate": 2e-06, "loss": 0.1464, "step": 4403 }, { "epoch": 1.0216912191161118, "grad_norm": 16.277207470334066, "learning_rate": 2e-06, "loss": 0.2669, "step": 4404 }, { "epoch": 1.0219232107644125, "grad_norm": 14.650722863677906, "learning_rate": 2e-06, "loss": 0.4093, "step": 4405 }, { "epoch": 1.0221552024127132, "grad_norm": 20.686933586265983, "learning_rate": 2e-06, "loss": 0.3219, "step": 4406 }, { "epoch": 1.0223871940610139, "grad_norm": 18.149022562966458, "learning_rate": 2e-06, "loss": 0.2369, "step": 4407 }, { "epoch": 1.0226191857093145, "grad_norm": 13.441727627483365, "learning_rate": 2e-06, "loss": 0.2794, "step": 4408 }, { "epoch": 1.0228511773576152, "grad_norm": 9.002417763935158, "learning_rate": 2e-06, "loss": 0.1919, "step": 4409 }, { "epoch": 1.023083169005916, "grad_norm": 9.604553834199644, "learning_rate": 2e-06, "loss": 0.1538, "step": 4410 }, { "epoch": 1.0233151606542163, "grad_norm": 7.693559744151872, "learning_rate": 2e-06, "loss": 0.1894, "step": 4411 }, { "epoch": 1.023547152302517, "grad_norm": 13.679783666333623, "learning_rate": 2e-06, "loss": 0.2397, "step": 4412 }, { "epoch": 1.0237791439508177, "grad_norm": 16.107777090519455, "learning_rate": 2e-06, "loss": 0.2707, "step": 4413 }, { "epoch": 1.0240111355991184, "grad_norm": 22.498111337105726, "learning_rate": 2e-06, "loss": 0.235, "step": 4414 }, { "epoch": 1.024243127247419, "grad_norm": 11.17453992197758, "learning_rate": 2e-06, "loss": 0.3602, "step": 4415 }, { "epoch": 1.0244751188957197, "grad_norm": 13.490648517081917, "learning_rate": 2e-06, "loss": 0.2619, "step": 4416 }, { "epoch": 1.0247071105440204, "grad_norm": 14.101245578112804, "learning_rate": 2e-06, "loss": 0.3315, "step": 4417 }, { "epoch": 1.024939102192321, "grad_norm": 14.941819487142078, "learning_rate": 2e-06, "loss": 0.2541, "step": 4418 }, { "epoch": 1.0251710938406218, "grad_norm": 15.85648435924746, "learning_rate": 2e-06, "loss": 0.3405, "step": 4419 }, { "epoch": 1.0254030854889224, "grad_norm": 17.142409104258302, "learning_rate": 2e-06, "loss": 0.2756, "step": 4420 }, { "epoch": 1.0256350771372231, "grad_norm": 14.136208692909497, "learning_rate": 2e-06, "loss": 0.3226, "step": 4421 }, { "epoch": 1.0258670687855238, "grad_norm": 11.199660197653973, "learning_rate": 2e-06, "loss": 0.1979, "step": 4422 }, { "epoch": 1.0260990604338245, "grad_norm": 27.70601825277928, "learning_rate": 2e-06, "loss": 0.374, "step": 4423 }, { "epoch": 1.026331052082125, "grad_norm": 20.308926895318287, "learning_rate": 2e-06, "loss": 0.2835, "step": 4424 }, { "epoch": 1.0265630437304256, "grad_norm": 11.668644087020201, "learning_rate": 2e-06, "loss": 0.159, "step": 4425 }, { "epoch": 1.0267950353787263, "grad_norm": 14.05495343536885, "learning_rate": 2e-06, "loss": 0.2125, "step": 4426 }, { "epoch": 1.027027027027027, "grad_norm": 9.009071699795452, "learning_rate": 2e-06, "loss": 0.28, "step": 4427 }, { "epoch": 1.0272590186753277, "grad_norm": 24.15482898661228, "learning_rate": 2e-06, "loss": 0.2167, "step": 4428 }, { "epoch": 1.0274910103236283, "grad_norm": 12.582332412328576, "learning_rate": 2e-06, "loss": 0.1751, "step": 4429 }, { "epoch": 1.027723001971929, "grad_norm": 21.538185705261743, "learning_rate": 2e-06, "loss": 0.3409, "step": 4430 }, { "epoch": 1.0279549936202297, "grad_norm": 8.712311921794157, "learning_rate": 2e-06, "loss": 0.1466, "step": 4431 }, { "epoch": 1.0281869852685304, "grad_norm": 16.378555006915374, "learning_rate": 2e-06, "loss": 0.3026, "step": 4432 }, { "epoch": 1.028418976916831, "grad_norm": 16.769818815480654, "learning_rate": 2e-06, "loss": 0.2525, "step": 4433 }, { "epoch": 1.0286509685651317, "grad_norm": 7.228007866660501, "learning_rate": 2e-06, "loss": 0.2106, "step": 4434 }, { "epoch": 1.0288829602134324, "grad_norm": 8.775747363226587, "learning_rate": 2e-06, "loss": 0.2117, "step": 4435 }, { "epoch": 1.029114951861733, "grad_norm": 9.435564557301497, "learning_rate": 2e-06, "loss": 0.2511, "step": 4436 }, { "epoch": 1.0293469435100335, "grad_norm": 9.178260857291992, "learning_rate": 2e-06, "loss": 0.2053, "step": 4437 }, { "epoch": 1.0295789351583342, "grad_norm": 15.10714105935708, "learning_rate": 2e-06, "loss": 0.312, "step": 4438 }, { "epoch": 1.0298109268066349, "grad_norm": 19.037191167720213, "learning_rate": 2e-06, "loss": 0.2424, "step": 4439 }, { "epoch": 1.0300429184549356, "grad_norm": 13.052108536860008, "learning_rate": 2e-06, "loss": 0.2326, "step": 4440 }, { "epoch": 1.0302749101032362, "grad_norm": 7.880553741749966, "learning_rate": 2e-06, "loss": 0.1743, "step": 4441 }, { "epoch": 1.030506901751537, "grad_norm": 18.99204868125294, "learning_rate": 2e-06, "loss": 0.2647, "step": 4442 }, { "epoch": 1.0307388933998376, "grad_norm": 41.273427411034746, "learning_rate": 2e-06, "loss": 0.2691, "step": 4443 }, { "epoch": 1.0309708850481383, "grad_norm": 11.052697660150518, "learning_rate": 2e-06, "loss": 0.1936, "step": 4444 }, { "epoch": 1.031202876696439, "grad_norm": 13.15387981143392, "learning_rate": 2e-06, "loss": 0.3272, "step": 4445 }, { "epoch": 1.0314348683447396, "grad_norm": 16.851744488684126, "learning_rate": 2e-06, "loss": 0.3239, "step": 4446 }, { "epoch": 1.0316668599930403, "grad_norm": 24.65220325923243, "learning_rate": 2e-06, "loss": 0.3975, "step": 4447 }, { "epoch": 1.031898851641341, "grad_norm": 10.587443256446537, "learning_rate": 2e-06, "loss": 0.2074, "step": 4448 }, { "epoch": 1.0321308432896417, "grad_norm": 9.47223283101745, "learning_rate": 2e-06, "loss": 0.1383, "step": 4449 }, { "epoch": 1.0323628349379423, "grad_norm": 19.69363174664466, "learning_rate": 2e-06, "loss": 0.348, "step": 4450 }, { "epoch": 1.0325948265862428, "grad_norm": 17.418878994857508, "learning_rate": 2e-06, "loss": 0.3025, "step": 4451 }, { "epoch": 1.0328268182345435, "grad_norm": 17.068050882970642, "learning_rate": 2e-06, "loss": 0.1959, "step": 4452 }, { "epoch": 1.0330588098828442, "grad_norm": 14.348268274620187, "learning_rate": 2e-06, "loss": 0.2605, "step": 4453 }, { "epoch": 1.0332908015311448, "grad_norm": 9.200927968182237, "learning_rate": 2e-06, "loss": 0.3117, "step": 4454 }, { "epoch": 1.0335227931794455, "grad_norm": 11.880208685210507, "learning_rate": 2e-06, "loss": 0.3102, "step": 4455 }, { "epoch": 1.0337547848277462, "grad_norm": 13.195463192921046, "learning_rate": 2e-06, "loss": 0.3442, "step": 4456 }, { "epoch": 1.0339867764760469, "grad_norm": 16.39948374883616, "learning_rate": 2e-06, "loss": 0.2593, "step": 4457 }, { "epoch": 1.0342187681243475, "grad_norm": 21.695075440702873, "learning_rate": 2e-06, "loss": 0.2773, "step": 4458 }, { "epoch": 1.0344507597726482, "grad_norm": 23.236654747422598, "learning_rate": 2e-06, "loss": 0.3134, "step": 4459 }, { "epoch": 1.034682751420949, "grad_norm": 12.113159851456805, "learning_rate": 2e-06, "loss": 0.2446, "step": 4460 }, { "epoch": 1.0349147430692496, "grad_norm": 16.68370613230402, "learning_rate": 2e-06, "loss": 0.2297, "step": 4461 }, { "epoch": 1.0351467347175503, "grad_norm": 14.844254905487963, "learning_rate": 2e-06, "loss": 0.2473, "step": 4462 }, { "epoch": 1.035378726365851, "grad_norm": 20.220611429267375, "learning_rate": 2e-06, "loss": 0.1801, "step": 4463 }, { "epoch": 1.0356107180141514, "grad_norm": 14.251304825494103, "learning_rate": 2e-06, "loss": 0.263, "step": 4464 }, { "epoch": 1.035842709662452, "grad_norm": 22.99719982100096, "learning_rate": 2e-06, "loss": 0.2855, "step": 4465 }, { "epoch": 1.0360747013107527, "grad_norm": 8.585584935855628, "learning_rate": 2e-06, "loss": 0.2178, "step": 4466 }, { "epoch": 1.0363066929590534, "grad_norm": 18.387469782533767, "learning_rate": 2e-06, "loss": 0.21, "step": 4467 }, { "epoch": 1.036538684607354, "grad_norm": 11.401769727716564, "learning_rate": 2e-06, "loss": 0.2416, "step": 4468 }, { "epoch": 1.0367706762556548, "grad_norm": 15.94523695286945, "learning_rate": 2e-06, "loss": 0.311, "step": 4469 }, { "epoch": 1.0370026679039555, "grad_norm": 11.196828766891255, "learning_rate": 2e-06, "loss": 0.2191, "step": 4470 }, { "epoch": 1.0372346595522561, "grad_norm": 11.540823035696267, "learning_rate": 2e-06, "loss": 0.1927, "step": 4471 }, { "epoch": 1.0374666512005568, "grad_norm": 14.536796413435079, "learning_rate": 2e-06, "loss": 0.3304, "step": 4472 }, { "epoch": 1.0376986428488575, "grad_norm": 14.343576899291822, "learning_rate": 2e-06, "loss": 0.2313, "step": 4473 }, { "epoch": 1.0379306344971582, "grad_norm": 16.922838282153773, "learning_rate": 2e-06, "loss": 0.3408, "step": 4474 }, { "epoch": 1.0381626261454588, "grad_norm": 11.707108143377337, "learning_rate": 2e-06, "loss": 0.2202, "step": 4475 }, { "epoch": 1.0383946177937595, "grad_norm": 8.517094184401891, "learning_rate": 2e-06, "loss": 0.2071, "step": 4476 }, { "epoch": 1.0386266094420602, "grad_norm": 12.389320998949996, "learning_rate": 2e-06, "loss": 0.2455, "step": 4477 }, { "epoch": 1.0388586010903607, "grad_norm": 13.645512621397236, "learning_rate": 2e-06, "loss": 0.2399, "step": 4478 }, { "epoch": 1.0390905927386613, "grad_norm": 20.161252864133314, "learning_rate": 2e-06, "loss": 0.2882, "step": 4479 }, { "epoch": 1.039322584386962, "grad_norm": 19.924490724699734, "learning_rate": 2e-06, "loss": 0.2941, "step": 4480 }, { "epoch": 1.0395545760352627, "grad_norm": 17.642672690575644, "learning_rate": 2e-06, "loss": 0.2028, "step": 4481 }, { "epoch": 1.0397865676835634, "grad_norm": 11.183339327538803, "learning_rate": 2e-06, "loss": 0.216, "step": 4482 }, { "epoch": 1.040018559331864, "grad_norm": 16.449886451041284, "learning_rate": 2e-06, "loss": 0.2215, "step": 4483 }, { "epoch": 1.0402505509801647, "grad_norm": 23.901687440717772, "learning_rate": 2e-06, "loss": 0.3176, "step": 4484 }, { "epoch": 1.0404825426284654, "grad_norm": 10.718689618673862, "learning_rate": 2e-06, "loss": 0.2306, "step": 4485 }, { "epoch": 1.040714534276766, "grad_norm": 13.623712550846152, "learning_rate": 2e-06, "loss": 0.2108, "step": 4486 }, { "epoch": 1.0409465259250668, "grad_norm": 16.330073437541824, "learning_rate": 2e-06, "loss": 0.3593, "step": 4487 }, { "epoch": 1.0411785175733674, "grad_norm": 12.825110261820797, "learning_rate": 2e-06, "loss": 0.2464, "step": 4488 }, { "epoch": 1.0414105092216681, "grad_norm": 7.216701052005836, "learning_rate": 2e-06, "loss": 0.1575, "step": 4489 }, { "epoch": 1.0416425008699686, "grad_norm": 22.15548889662668, "learning_rate": 2e-06, "loss": 0.1809, "step": 4490 }, { "epoch": 1.0418744925182692, "grad_norm": 9.994987316571109, "learning_rate": 2e-06, "loss": 0.1943, "step": 4491 }, { "epoch": 1.04210648416657, "grad_norm": 8.941695765183466, "learning_rate": 2e-06, "loss": 0.2319, "step": 4492 }, { "epoch": 1.0423384758148706, "grad_norm": 15.189764942546732, "learning_rate": 2e-06, "loss": 0.2488, "step": 4493 }, { "epoch": 1.0425704674631713, "grad_norm": 13.35000274072905, "learning_rate": 2e-06, "loss": 0.2118, "step": 4494 }, { "epoch": 1.042802459111472, "grad_norm": 13.057315814446655, "learning_rate": 2e-06, "loss": 0.2257, "step": 4495 }, { "epoch": 1.0430344507597726, "grad_norm": 12.011177476914142, "learning_rate": 2e-06, "loss": 0.2612, "step": 4496 }, { "epoch": 1.0432664424080733, "grad_norm": 18.472444083720685, "learning_rate": 2e-06, "loss": 0.2278, "step": 4497 }, { "epoch": 1.043498434056374, "grad_norm": 11.947161039265346, "learning_rate": 2e-06, "loss": 0.2028, "step": 4498 }, { "epoch": 1.0437304257046747, "grad_norm": 11.683468527357304, "learning_rate": 2e-06, "loss": 0.2638, "step": 4499 }, { "epoch": 1.0439624173529753, "grad_norm": 14.343236326278143, "learning_rate": 2e-06, "loss": 0.302, "step": 4500 }, { "epoch": 1.044194409001276, "grad_norm": 10.710904819129617, "learning_rate": 2e-06, "loss": 0.2985, "step": 4501 }, { "epoch": 1.0444264006495767, "grad_norm": 12.13213015738718, "learning_rate": 2e-06, "loss": 0.242, "step": 4502 }, { "epoch": 1.0446583922978774, "grad_norm": 18.94819133437833, "learning_rate": 2e-06, "loss": 0.3138, "step": 4503 }, { "epoch": 1.0448903839461778, "grad_norm": 12.539901478278948, "learning_rate": 2e-06, "loss": 0.2392, "step": 4504 }, { "epoch": 1.0451223755944785, "grad_norm": 24.18437820339448, "learning_rate": 2e-06, "loss": 0.2755, "step": 4505 }, { "epoch": 1.0453543672427792, "grad_norm": 23.383318703382532, "learning_rate": 2e-06, "loss": 0.3919, "step": 4506 }, { "epoch": 1.0455863588910799, "grad_norm": 9.67869670349899, "learning_rate": 2e-06, "loss": 0.2123, "step": 4507 }, { "epoch": 1.0458183505393805, "grad_norm": 14.588716396765122, "learning_rate": 2e-06, "loss": 0.2996, "step": 4508 }, { "epoch": 1.0460503421876812, "grad_norm": 12.840467825576411, "learning_rate": 2e-06, "loss": 0.2757, "step": 4509 }, { "epoch": 1.046282333835982, "grad_norm": 14.272494811690157, "learning_rate": 2e-06, "loss": 0.2401, "step": 4510 }, { "epoch": 1.0465143254842826, "grad_norm": 20.01918249140347, "learning_rate": 2e-06, "loss": 0.3383, "step": 4511 }, { "epoch": 1.0467463171325833, "grad_norm": 7.556423609497603, "learning_rate": 2e-06, "loss": 0.1243, "step": 4512 }, { "epoch": 1.046978308780884, "grad_norm": 8.6970267586175, "learning_rate": 2e-06, "loss": 0.1976, "step": 4513 }, { "epoch": 1.0472103004291846, "grad_norm": 24.096318328090813, "learning_rate": 2e-06, "loss": 0.2707, "step": 4514 }, { "epoch": 1.0474422920774853, "grad_norm": 7.8467329542157565, "learning_rate": 2e-06, "loss": 0.1836, "step": 4515 }, { "epoch": 1.047674283725786, "grad_norm": 29.928182855435207, "learning_rate": 2e-06, "loss": 0.3649, "step": 4516 }, { "epoch": 1.0479062753740864, "grad_norm": 13.927439113839654, "learning_rate": 2e-06, "loss": 0.3017, "step": 4517 }, { "epoch": 1.048138267022387, "grad_norm": 18.88795094602496, "learning_rate": 2e-06, "loss": 0.225, "step": 4518 }, { "epoch": 1.0483702586706878, "grad_norm": 9.620669829192538, "learning_rate": 2e-06, "loss": 0.2321, "step": 4519 }, { "epoch": 1.0486022503189885, "grad_norm": 17.223106187454317, "learning_rate": 2e-06, "loss": 0.2882, "step": 4520 }, { "epoch": 1.0488342419672891, "grad_norm": 8.646155394897107, "learning_rate": 2e-06, "loss": 0.3076, "step": 4521 }, { "epoch": 1.0490662336155898, "grad_norm": 8.778649493758728, "learning_rate": 2e-06, "loss": 0.2392, "step": 4522 }, { "epoch": 1.0492982252638905, "grad_norm": 7.918122268225447, "learning_rate": 2e-06, "loss": 0.1816, "step": 4523 }, { "epoch": 1.0495302169121912, "grad_norm": 11.01843669150523, "learning_rate": 2e-06, "loss": 0.2619, "step": 4524 }, { "epoch": 1.0497622085604918, "grad_norm": 13.875284633960788, "learning_rate": 2e-06, "loss": 0.2297, "step": 4525 }, { "epoch": 1.0499942002087925, "grad_norm": 20.84523992516674, "learning_rate": 2e-06, "loss": 0.2333, "step": 4526 }, { "epoch": 1.0502261918570932, "grad_norm": 7.755997981352427, "learning_rate": 2e-06, "loss": 0.1873, "step": 4527 }, { "epoch": 1.0504581835053939, "grad_norm": 9.333137819737173, "learning_rate": 2e-06, "loss": 0.2753, "step": 4528 }, { "epoch": 1.0506901751536946, "grad_norm": 39.160017340477, "learning_rate": 2e-06, "loss": 0.2399, "step": 4529 }, { "epoch": 1.0509221668019952, "grad_norm": 25.282757342372953, "learning_rate": 2e-06, "loss": 0.2774, "step": 4530 }, { "epoch": 1.0511541584502957, "grad_norm": 12.916061156883767, "learning_rate": 2e-06, "loss": 0.1881, "step": 4531 }, { "epoch": 1.0513861500985964, "grad_norm": 17.357923519315666, "learning_rate": 2e-06, "loss": 0.3112, "step": 4532 }, { "epoch": 1.051618141746897, "grad_norm": 8.057295108120414, "learning_rate": 2e-06, "loss": 0.1589, "step": 4533 }, { "epoch": 1.0518501333951977, "grad_norm": 11.473880621998514, "learning_rate": 2e-06, "loss": 0.1511, "step": 4534 }, { "epoch": 1.0520821250434984, "grad_norm": 7.7417751611696835, "learning_rate": 2e-06, "loss": 0.1299, "step": 4535 }, { "epoch": 1.052314116691799, "grad_norm": 15.442864167699556, "learning_rate": 2e-06, "loss": 0.3142, "step": 4536 }, { "epoch": 1.0525461083400998, "grad_norm": 17.60484150290361, "learning_rate": 2e-06, "loss": 0.2372, "step": 4537 }, { "epoch": 1.0527780999884004, "grad_norm": 16.936105884989708, "learning_rate": 2e-06, "loss": 0.289, "step": 4538 }, { "epoch": 1.0530100916367011, "grad_norm": 11.843293598367202, "learning_rate": 2e-06, "loss": 0.2139, "step": 4539 }, { "epoch": 1.0532420832850018, "grad_norm": 23.64743039681951, "learning_rate": 2e-06, "loss": 0.3383, "step": 4540 }, { "epoch": 1.0534740749333025, "grad_norm": 17.546494934902448, "learning_rate": 2e-06, "loss": 0.2541, "step": 4541 }, { "epoch": 1.0537060665816032, "grad_norm": 15.785905089669802, "learning_rate": 2e-06, "loss": 0.2919, "step": 4542 }, { "epoch": 1.0539380582299038, "grad_norm": 15.552562721537294, "learning_rate": 2e-06, "loss": 0.313, "step": 4543 }, { "epoch": 1.0541700498782043, "grad_norm": 13.38274443962515, "learning_rate": 2e-06, "loss": 0.2333, "step": 4544 }, { "epoch": 1.054402041526505, "grad_norm": 14.364837287011094, "learning_rate": 2e-06, "loss": 0.3176, "step": 4545 }, { "epoch": 1.0546340331748056, "grad_norm": 10.675992961826456, "learning_rate": 2e-06, "loss": 0.1771, "step": 4546 }, { "epoch": 1.0548660248231063, "grad_norm": 25.91576359257879, "learning_rate": 2e-06, "loss": 0.2628, "step": 4547 }, { "epoch": 1.055098016471407, "grad_norm": 10.117754686145902, "learning_rate": 2e-06, "loss": 0.2334, "step": 4548 }, { "epoch": 1.0553300081197077, "grad_norm": 10.059227091582052, "learning_rate": 2e-06, "loss": 0.1506, "step": 4549 }, { "epoch": 1.0555619997680084, "grad_norm": 16.908727808786647, "learning_rate": 2e-06, "loss": 0.2702, "step": 4550 }, { "epoch": 1.055793991416309, "grad_norm": 14.386589699765908, "learning_rate": 2e-06, "loss": 0.2534, "step": 4551 }, { "epoch": 1.0560259830646097, "grad_norm": 10.248441451994353, "learning_rate": 2e-06, "loss": 0.2022, "step": 4552 }, { "epoch": 1.0562579747129104, "grad_norm": 7.8765393725949, "learning_rate": 2e-06, "loss": 0.1391, "step": 4553 }, { "epoch": 1.056489966361211, "grad_norm": 13.745768749508281, "learning_rate": 2e-06, "loss": 0.3445, "step": 4554 }, { "epoch": 1.0567219580095117, "grad_norm": 20.73572027129585, "learning_rate": 2e-06, "loss": 0.2545, "step": 4555 }, { "epoch": 1.0569539496578124, "grad_norm": 19.14940776893297, "learning_rate": 2e-06, "loss": 0.239, "step": 4556 }, { "epoch": 1.0571859413061129, "grad_norm": 12.890430025290328, "learning_rate": 2e-06, "loss": 0.29, "step": 4557 }, { "epoch": 1.0574179329544136, "grad_norm": 18.20419221939733, "learning_rate": 2e-06, "loss": 0.3279, "step": 4558 }, { "epoch": 1.0576499246027142, "grad_norm": 12.980910847550414, "learning_rate": 2e-06, "loss": 0.2111, "step": 4559 }, { "epoch": 1.057881916251015, "grad_norm": 8.458796386653953, "learning_rate": 2e-06, "loss": 0.1938, "step": 4560 }, { "epoch": 1.0581139078993156, "grad_norm": 21.75891206290162, "learning_rate": 2e-06, "loss": 0.2288, "step": 4561 }, { "epoch": 1.0583458995476163, "grad_norm": 13.117360758513612, "learning_rate": 2e-06, "loss": 0.2496, "step": 4562 }, { "epoch": 1.058577891195917, "grad_norm": 18.092807169325763, "learning_rate": 2e-06, "loss": 0.2883, "step": 4563 }, { "epoch": 1.0588098828442176, "grad_norm": 28.11071353474641, "learning_rate": 2e-06, "loss": 0.2404, "step": 4564 }, { "epoch": 1.0590418744925183, "grad_norm": 11.093359793460808, "learning_rate": 2e-06, "loss": 0.2871, "step": 4565 }, { "epoch": 1.059273866140819, "grad_norm": 5.708558446753598, "learning_rate": 2e-06, "loss": 0.1646, "step": 4566 }, { "epoch": 1.0595058577891197, "grad_norm": 8.007983606895225, "learning_rate": 2e-06, "loss": 0.1235, "step": 4567 }, { "epoch": 1.0597378494374203, "grad_norm": 23.691691410023708, "learning_rate": 2e-06, "loss": 0.4476, "step": 4568 }, { "epoch": 1.059969841085721, "grad_norm": 14.342884710913854, "learning_rate": 2e-06, "loss": 0.2121, "step": 4569 }, { "epoch": 1.0602018327340215, "grad_norm": 20.421082262288795, "learning_rate": 2e-06, "loss": 0.2337, "step": 4570 }, { "epoch": 1.0604338243823221, "grad_norm": 14.283943578587204, "learning_rate": 2e-06, "loss": 0.3167, "step": 4571 }, { "epoch": 1.0606658160306228, "grad_norm": 14.32307432911022, "learning_rate": 2e-06, "loss": 0.3219, "step": 4572 }, { "epoch": 1.0608978076789235, "grad_norm": 23.713879420441952, "learning_rate": 2e-06, "loss": 0.42, "step": 4573 }, { "epoch": 1.0611297993272242, "grad_norm": 14.388879223470608, "learning_rate": 2e-06, "loss": 0.2093, "step": 4574 }, { "epoch": 1.0613617909755249, "grad_norm": 14.770522362129077, "learning_rate": 2e-06, "loss": 0.2532, "step": 4575 }, { "epoch": 1.0615937826238255, "grad_norm": 9.670132297957418, "learning_rate": 2e-06, "loss": 0.2032, "step": 4576 }, { "epoch": 1.0618257742721262, "grad_norm": 12.174822545988004, "learning_rate": 2e-06, "loss": 0.1677, "step": 4577 }, { "epoch": 1.0620577659204269, "grad_norm": 16.409913639531826, "learning_rate": 2e-06, "loss": 0.305, "step": 4578 }, { "epoch": 1.0622897575687276, "grad_norm": 13.679220521114875, "learning_rate": 2e-06, "loss": 0.3606, "step": 4579 }, { "epoch": 1.0625217492170282, "grad_norm": 15.351767353163767, "learning_rate": 2e-06, "loss": 0.2492, "step": 4580 }, { "epoch": 1.062753740865329, "grad_norm": 14.55264537341593, "learning_rate": 2e-06, "loss": 0.194, "step": 4581 }, { "epoch": 1.0629857325136296, "grad_norm": 23.70433697778693, "learning_rate": 2e-06, "loss": 0.2432, "step": 4582 }, { "epoch": 1.0632177241619303, "grad_norm": 13.494892360681849, "learning_rate": 2e-06, "loss": 0.2695, "step": 4583 }, { "epoch": 1.0634497158102307, "grad_norm": 14.626691010991978, "learning_rate": 2e-06, "loss": 0.2345, "step": 4584 }, { "epoch": 1.0636817074585314, "grad_norm": 13.807386499526016, "learning_rate": 2e-06, "loss": 0.1773, "step": 4585 }, { "epoch": 1.063913699106832, "grad_norm": 14.377411897301746, "learning_rate": 2e-06, "loss": 0.206, "step": 4586 }, { "epoch": 1.0641456907551328, "grad_norm": 10.717646496233396, "learning_rate": 2e-06, "loss": 0.1543, "step": 4587 }, { "epoch": 1.0643776824034334, "grad_norm": 12.121576229930133, "learning_rate": 2e-06, "loss": 0.1937, "step": 4588 }, { "epoch": 1.0646096740517341, "grad_norm": 10.377540319043618, "learning_rate": 2e-06, "loss": 0.2044, "step": 4589 }, { "epoch": 1.0648416657000348, "grad_norm": 16.025433031691097, "learning_rate": 2e-06, "loss": 0.2279, "step": 4590 }, { "epoch": 1.0650736573483355, "grad_norm": 28.09748111844974, "learning_rate": 2e-06, "loss": 0.271, "step": 4591 }, { "epoch": 1.0653056489966362, "grad_norm": 15.233408719058405, "learning_rate": 2e-06, "loss": 0.2557, "step": 4592 }, { "epoch": 1.0655376406449368, "grad_norm": 11.688531867610594, "learning_rate": 2e-06, "loss": 0.2213, "step": 4593 }, { "epoch": 1.0657696322932375, "grad_norm": 12.478963044020029, "learning_rate": 2e-06, "loss": 0.3794, "step": 4594 }, { "epoch": 1.0660016239415382, "grad_norm": 12.255769188520668, "learning_rate": 2e-06, "loss": 0.1958, "step": 4595 }, { "epoch": 1.0662336155898389, "grad_norm": 14.479545376110917, "learning_rate": 2e-06, "loss": 0.2721, "step": 4596 }, { "epoch": 1.0664656072381393, "grad_norm": 9.375689405742289, "learning_rate": 2e-06, "loss": 0.2932, "step": 4597 }, { "epoch": 1.06669759888644, "grad_norm": 10.315314477724502, "learning_rate": 2e-06, "loss": 0.2008, "step": 4598 }, { "epoch": 1.0669295905347407, "grad_norm": 21.631875638569465, "learning_rate": 2e-06, "loss": 0.3366, "step": 4599 }, { "epoch": 1.0671615821830414, "grad_norm": 27.067384794430104, "learning_rate": 2e-06, "loss": 0.4603, "step": 4600 }, { "epoch": 1.067393573831342, "grad_norm": 9.802424258760192, "learning_rate": 2e-06, "loss": 0.1782, "step": 4601 }, { "epoch": 1.0676255654796427, "grad_norm": 10.320769127337003, "learning_rate": 2e-06, "loss": 0.1942, "step": 4602 }, { "epoch": 1.0678575571279434, "grad_norm": 12.10330234840912, "learning_rate": 2e-06, "loss": 0.2389, "step": 4603 }, { "epoch": 1.068089548776244, "grad_norm": 9.482829401573873, "learning_rate": 2e-06, "loss": 0.2014, "step": 4604 }, { "epoch": 1.0683215404245447, "grad_norm": 13.2444842153338, "learning_rate": 2e-06, "loss": 0.275, "step": 4605 }, { "epoch": 1.0685535320728454, "grad_norm": 9.059155667239775, "learning_rate": 2e-06, "loss": 0.1656, "step": 4606 }, { "epoch": 1.068785523721146, "grad_norm": 22.107718337840467, "learning_rate": 2e-06, "loss": 0.3723, "step": 4607 }, { "epoch": 1.0690175153694468, "grad_norm": 16.721630962015563, "learning_rate": 2e-06, "loss": 0.2691, "step": 4608 }, { "epoch": 1.0692495070177475, "grad_norm": 14.552520243084661, "learning_rate": 2e-06, "loss": 0.2485, "step": 4609 }, { "epoch": 1.0694814986660481, "grad_norm": 16.363779480701567, "learning_rate": 2e-06, "loss": 0.2091, "step": 4610 }, { "epoch": 1.0697134903143486, "grad_norm": 6.404394903208128, "learning_rate": 2e-06, "loss": 0.2208, "step": 4611 }, { "epoch": 1.0699454819626493, "grad_norm": 20.95385822012562, "learning_rate": 2e-06, "loss": 0.2749, "step": 4612 }, { "epoch": 1.07017747361095, "grad_norm": 8.509211364750067, "learning_rate": 2e-06, "loss": 0.1986, "step": 4613 }, { "epoch": 1.0704094652592506, "grad_norm": 9.683183528937615, "learning_rate": 2e-06, "loss": 0.1457, "step": 4614 }, { "epoch": 1.0706414569075513, "grad_norm": 9.248345401803073, "learning_rate": 2e-06, "loss": 0.2679, "step": 4615 }, { "epoch": 1.070873448555852, "grad_norm": 10.304306796998121, "learning_rate": 2e-06, "loss": 0.2017, "step": 4616 }, { "epoch": 1.0711054402041527, "grad_norm": 12.830372489140107, "learning_rate": 2e-06, "loss": 0.2048, "step": 4617 }, { "epoch": 1.0713374318524533, "grad_norm": 15.16407227138631, "learning_rate": 2e-06, "loss": 0.3194, "step": 4618 }, { "epoch": 1.071569423500754, "grad_norm": 19.579768580627913, "learning_rate": 2e-06, "loss": 0.2052, "step": 4619 }, { "epoch": 1.0718014151490547, "grad_norm": 18.4708758331504, "learning_rate": 2e-06, "loss": 0.2765, "step": 4620 }, { "epoch": 1.0720334067973554, "grad_norm": 16.340610589492915, "learning_rate": 2e-06, "loss": 0.1827, "step": 4621 }, { "epoch": 1.072265398445656, "grad_norm": 15.476040559442902, "learning_rate": 2e-06, "loss": 0.3229, "step": 4622 }, { "epoch": 1.0724973900939565, "grad_norm": 11.731130456848682, "learning_rate": 2e-06, "loss": 0.2831, "step": 4623 }, { "epoch": 1.0727293817422572, "grad_norm": 14.988365226355235, "learning_rate": 2e-06, "loss": 0.3421, "step": 4624 }, { "epoch": 1.0729613733905579, "grad_norm": 16.246876392051842, "learning_rate": 2e-06, "loss": 0.2171, "step": 4625 }, { "epoch": 1.0731933650388585, "grad_norm": 16.63092189706823, "learning_rate": 2e-06, "loss": 0.2903, "step": 4626 }, { "epoch": 1.0734253566871592, "grad_norm": 9.829098076638097, "learning_rate": 2e-06, "loss": 0.1791, "step": 4627 }, { "epoch": 1.07365734833546, "grad_norm": 12.44972854207715, "learning_rate": 2e-06, "loss": 0.1847, "step": 4628 }, { "epoch": 1.0738893399837606, "grad_norm": 15.653560086600205, "learning_rate": 2e-06, "loss": 0.2672, "step": 4629 }, { "epoch": 1.0741213316320612, "grad_norm": 6.496762545985574, "learning_rate": 2e-06, "loss": 0.1861, "step": 4630 }, { "epoch": 1.074353323280362, "grad_norm": 8.684805225175854, "learning_rate": 2e-06, "loss": 0.1961, "step": 4631 }, { "epoch": 1.0745853149286626, "grad_norm": 13.618572813623743, "learning_rate": 2e-06, "loss": 0.2212, "step": 4632 }, { "epoch": 1.0748173065769633, "grad_norm": 11.440261368799872, "learning_rate": 2e-06, "loss": 0.2728, "step": 4633 }, { "epoch": 1.075049298225264, "grad_norm": 8.376117985507744, "learning_rate": 2e-06, "loss": 0.2127, "step": 4634 }, { "epoch": 1.0752812898735646, "grad_norm": 10.788704052608766, "learning_rate": 2e-06, "loss": 0.2423, "step": 4635 }, { "epoch": 1.0755132815218653, "grad_norm": 12.356605240160762, "learning_rate": 2e-06, "loss": 0.2149, "step": 4636 }, { "epoch": 1.0757452731701658, "grad_norm": 8.646560020577828, "learning_rate": 2e-06, "loss": 0.1408, "step": 4637 }, { "epoch": 1.0759772648184665, "grad_norm": 22.873044231235614, "learning_rate": 2e-06, "loss": 0.3564, "step": 4638 }, { "epoch": 1.0762092564667671, "grad_norm": 13.115236584747734, "learning_rate": 2e-06, "loss": 0.327, "step": 4639 }, { "epoch": 1.0764412481150678, "grad_norm": 16.990624415839985, "learning_rate": 2e-06, "loss": 0.2237, "step": 4640 }, { "epoch": 1.0766732397633685, "grad_norm": 24.284509006768378, "learning_rate": 2e-06, "loss": 0.3058, "step": 4641 }, { "epoch": 1.0769052314116692, "grad_norm": 13.264059901096275, "learning_rate": 2e-06, "loss": 0.2127, "step": 4642 }, { "epoch": 1.0771372230599698, "grad_norm": 17.962152197713916, "learning_rate": 2e-06, "loss": 0.4182, "step": 4643 }, { "epoch": 1.0773692147082705, "grad_norm": 15.13566581385218, "learning_rate": 2e-06, "loss": 0.3004, "step": 4644 }, { "epoch": 1.0776012063565712, "grad_norm": 9.107776851142189, "learning_rate": 2e-06, "loss": 0.1799, "step": 4645 }, { "epoch": 1.0778331980048719, "grad_norm": 15.416609074726267, "learning_rate": 2e-06, "loss": 0.2687, "step": 4646 }, { "epoch": 1.0780651896531726, "grad_norm": 60.35641994256627, "learning_rate": 2e-06, "loss": 0.2461, "step": 4647 }, { "epoch": 1.0782971813014732, "grad_norm": 11.96075666327412, "learning_rate": 2e-06, "loss": 0.1971, "step": 4648 }, { "epoch": 1.078529172949774, "grad_norm": 13.424063532979817, "learning_rate": 2e-06, "loss": 0.2106, "step": 4649 }, { "epoch": 1.0787611645980744, "grad_norm": 12.773540979848594, "learning_rate": 2e-06, "loss": 0.1968, "step": 4650 }, { "epoch": 1.078993156246375, "grad_norm": 19.417735202497628, "learning_rate": 2e-06, "loss": 0.2292, "step": 4651 }, { "epoch": 1.0792251478946757, "grad_norm": 6.308682768305041, "learning_rate": 2e-06, "loss": 0.1197, "step": 4652 }, { "epoch": 1.0794571395429764, "grad_norm": 7.2959765587506205, "learning_rate": 2e-06, "loss": 0.2321, "step": 4653 }, { "epoch": 1.079689131191277, "grad_norm": 15.44589618064587, "learning_rate": 2e-06, "loss": 0.2507, "step": 4654 }, { "epoch": 1.0799211228395778, "grad_norm": 9.29934262830167, "learning_rate": 2e-06, "loss": 0.2287, "step": 4655 }, { "epoch": 1.0801531144878784, "grad_norm": 15.286437259258648, "learning_rate": 2e-06, "loss": 0.3175, "step": 4656 }, { "epoch": 1.080385106136179, "grad_norm": 13.27664930644032, "learning_rate": 2e-06, "loss": 0.2383, "step": 4657 }, { "epoch": 1.0806170977844798, "grad_norm": 12.263893963668028, "learning_rate": 2e-06, "loss": 0.2594, "step": 4658 }, { "epoch": 1.0808490894327805, "grad_norm": 6.0357490100565006, "learning_rate": 2e-06, "loss": 0.1549, "step": 4659 }, { "epoch": 1.0810810810810811, "grad_norm": 16.18608723157087, "learning_rate": 2e-06, "loss": 0.2984, "step": 4660 }, { "epoch": 1.0813130727293818, "grad_norm": 9.181713104002375, "learning_rate": 2e-06, "loss": 0.1386, "step": 4661 }, { "epoch": 1.0815450643776825, "grad_norm": 5.644022388995841, "learning_rate": 2e-06, "loss": 0.1688, "step": 4662 }, { "epoch": 1.0817770560259832, "grad_norm": 14.739106617725085, "learning_rate": 2e-06, "loss": 0.2945, "step": 4663 }, { "epoch": 1.0820090476742836, "grad_norm": 11.676188528472048, "learning_rate": 2e-06, "loss": 0.1579, "step": 4664 }, { "epoch": 1.0822410393225843, "grad_norm": 9.657311316004925, "learning_rate": 2e-06, "loss": 0.1834, "step": 4665 }, { "epoch": 1.082473030970885, "grad_norm": 31.149815682668027, "learning_rate": 2e-06, "loss": 0.5156, "step": 4666 }, { "epoch": 1.0827050226191857, "grad_norm": 22.49755979397703, "learning_rate": 2e-06, "loss": 0.3144, "step": 4667 }, { "epoch": 1.0829370142674863, "grad_norm": 16.400850906073345, "learning_rate": 2e-06, "loss": 0.24, "step": 4668 }, { "epoch": 1.083169005915787, "grad_norm": 14.898844803167906, "learning_rate": 2e-06, "loss": 0.2425, "step": 4669 }, { "epoch": 1.0834009975640877, "grad_norm": 15.375770635265186, "learning_rate": 2e-06, "loss": 0.235, "step": 4670 }, { "epoch": 1.0836329892123884, "grad_norm": 8.903667496388419, "learning_rate": 2e-06, "loss": 0.1767, "step": 4671 }, { "epoch": 1.083864980860689, "grad_norm": 20.36801862973905, "learning_rate": 2e-06, "loss": 0.34, "step": 4672 }, { "epoch": 1.0840969725089897, "grad_norm": 16.623785198060162, "learning_rate": 2e-06, "loss": 0.3051, "step": 4673 }, { "epoch": 1.0843289641572904, "grad_norm": 21.712004714291528, "learning_rate": 2e-06, "loss": 0.2214, "step": 4674 }, { "epoch": 1.084560955805591, "grad_norm": 18.64164274149171, "learning_rate": 2e-06, "loss": 0.3313, "step": 4675 }, { "epoch": 1.0847929474538915, "grad_norm": 15.021075091917751, "learning_rate": 2e-06, "loss": 0.2372, "step": 4676 }, { "epoch": 1.0850249391021922, "grad_norm": 13.172486147361289, "learning_rate": 2e-06, "loss": 0.2282, "step": 4677 }, { "epoch": 1.085256930750493, "grad_norm": 14.952373836405085, "learning_rate": 2e-06, "loss": 0.2649, "step": 4678 }, { "epoch": 1.0854889223987936, "grad_norm": 12.057002528751237, "learning_rate": 2e-06, "loss": 0.2557, "step": 4679 }, { "epoch": 1.0857209140470943, "grad_norm": 14.097720996300128, "learning_rate": 2e-06, "loss": 0.1673, "step": 4680 }, { "epoch": 1.085952905695395, "grad_norm": 13.424131645799678, "learning_rate": 2e-06, "loss": 0.2547, "step": 4681 }, { "epoch": 1.0861848973436956, "grad_norm": 26.119273464189042, "learning_rate": 2e-06, "loss": 0.2301, "step": 4682 }, { "epoch": 1.0864168889919963, "grad_norm": 10.85903450705642, "learning_rate": 2e-06, "loss": 0.3203, "step": 4683 }, { "epoch": 1.086648880640297, "grad_norm": 12.192959640288787, "learning_rate": 2e-06, "loss": 0.2221, "step": 4684 }, { "epoch": 1.0868808722885976, "grad_norm": 12.991969981697782, "learning_rate": 2e-06, "loss": 0.2164, "step": 4685 }, { "epoch": 1.0871128639368983, "grad_norm": 13.438897594700979, "learning_rate": 2e-06, "loss": 0.2253, "step": 4686 }, { "epoch": 1.087344855585199, "grad_norm": 21.185991926389537, "learning_rate": 2e-06, "loss": 0.2621, "step": 4687 }, { "epoch": 1.0875768472334997, "grad_norm": 7.49060315068765, "learning_rate": 2e-06, "loss": 0.152, "step": 4688 }, { "epoch": 1.0878088388818004, "grad_norm": 10.692675079619084, "learning_rate": 2e-06, "loss": 0.206, "step": 4689 }, { "epoch": 1.088040830530101, "grad_norm": 12.085593295785767, "learning_rate": 2e-06, "loss": 0.2123, "step": 4690 }, { "epoch": 1.0882728221784015, "grad_norm": 15.658009069301887, "learning_rate": 2e-06, "loss": 0.2292, "step": 4691 }, { "epoch": 1.0885048138267022, "grad_norm": 21.839791385577954, "learning_rate": 2e-06, "loss": 0.2165, "step": 4692 }, { "epoch": 1.0887368054750028, "grad_norm": 20.168667031089573, "learning_rate": 2e-06, "loss": 0.4257, "step": 4693 }, { "epoch": 1.0889687971233035, "grad_norm": 13.532395866046311, "learning_rate": 2e-06, "loss": 0.3163, "step": 4694 }, { "epoch": 1.0892007887716042, "grad_norm": 7.037441919712227, "learning_rate": 2e-06, "loss": 0.1576, "step": 4695 }, { "epoch": 1.0894327804199049, "grad_norm": 6.080678326111714, "learning_rate": 2e-06, "loss": 0.1554, "step": 4696 }, { "epoch": 1.0896647720682056, "grad_norm": 11.62312585625033, "learning_rate": 2e-06, "loss": 0.2072, "step": 4697 }, { "epoch": 1.0898967637165062, "grad_norm": 25.07140286851281, "learning_rate": 2e-06, "loss": 0.3817, "step": 4698 }, { "epoch": 1.090128755364807, "grad_norm": 14.454005153102496, "learning_rate": 2e-06, "loss": 0.237, "step": 4699 }, { "epoch": 1.0903607470131076, "grad_norm": 33.65593280593324, "learning_rate": 2e-06, "loss": 0.3603, "step": 4700 }, { "epoch": 1.0905927386614083, "grad_norm": 24.21819465836278, "learning_rate": 2e-06, "loss": 0.3173, "step": 4701 }, { "epoch": 1.090824730309709, "grad_norm": 6.002003352997946, "learning_rate": 2e-06, "loss": 0.1649, "step": 4702 }, { "epoch": 1.0910567219580094, "grad_norm": 15.363931384625337, "learning_rate": 2e-06, "loss": 0.3317, "step": 4703 }, { "epoch": 1.09128871360631, "grad_norm": 19.767472015606725, "learning_rate": 2e-06, "loss": 0.3107, "step": 4704 }, { "epoch": 1.0915207052546108, "grad_norm": 15.38698131756278, "learning_rate": 2e-06, "loss": 0.2003, "step": 4705 }, { "epoch": 1.0917526969029114, "grad_norm": 12.01919931645781, "learning_rate": 2e-06, "loss": 0.2028, "step": 4706 }, { "epoch": 1.0919846885512121, "grad_norm": 15.942894539948433, "learning_rate": 2e-06, "loss": 0.2649, "step": 4707 }, { "epoch": 1.0922166801995128, "grad_norm": 14.38917599038256, "learning_rate": 2e-06, "loss": 0.2061, "step": 4708 }, { "epoch": 1.0924486718478135, "grad_norm": 11.833878380303931, "learning_rate": 2e-06, "loss": 0.2203, "step": 4709 }, { "epoch": 1.0926806634961141, "grad_norm": 16.914912934828145, "learning_rate": 2e-06, "loss": 0.2783, "step": 4710 }, { "epoch": 1.0929126551444148, "grad_norm": 17.266074010543782, "learning_rate": 2e-06, "loss": 0.3055, "step": 4711 }, { "epoch": 1.0931446467927155, "grad_norm": 18.75284359060675, "learning_rate": 2e-06, "loss": 0.283, "step": 4712 }, { "epoch": 1.0933766384410162, "grad_norm": 12.43982576036956, "learning_rate": 2e-06, "loss": 0.1862, "step": 4713 }, { "epoch": 1.0936086300893169, "grad_norm": 14.80054201183015, "learning_rate": 2e-06, "loss": 0.2475, "step": 4714 }, { "epoch": 1.0938406217376175, "grad_norm": 13.096084263417465, "learning_rate": 2e-06, "loss": 0.2297, "step": 4715 }, { "epoch": 1.0940726133859182, "grad_norm": 23.74250380776062, "learning_rate": 2e-06, "loss": 0.2798, "step": 4716 }, { "epoch": 1.0943046050342187, "grad_norm": 8.639370985491647, "learning_rate": 2e-06, "loss": 0.1993, "step": 4717 }, { "epoch": 1.0945365966825193, "grad_norm": 8.52413219768701, "learning_rate": 2e-06, "loss": 0.1727, "step": 4718 }, { "epoch": 1.09476858833082, "grad_norm": 11.6044749708807, "learning_rate": 2e-06, "loss": 0.2995, "step": 4719 }, { "epoch": 1.0950005799791207, "grad_norm": 15.624858689431957, "learning_rate": 2e-06, "loss": 0.2698, "step": 4720 }, { "epoch": 1.0952325716274214, "grad_norm": 13.945303085396459, "learning_rate": 2e-06, "loss": 0.2106, "step": 4721 }, { "epoch": 1.095464563275722, "grad_norm": 10.769513824946431, "learning_rate": 2e-06, "loss": 0.1963, "step": 4722 }, { "epoch": 1.0956965549240227, "grad_norm": 7.428153969726964, "learning_rate": 2e-06, "loss": 0.148, "step": 4723 }, { "epoch": 1.0959285465723234, "grad_norm": 6.88998868347155, "learning_rate": 2e-06, "loss": 0.1802, "step": 4724 }, { "epoch": 1.096160538220624, "grad_norm": 14.252947491435332, "learning_rate": 2e-06, "loss": 0.2731, "step": 4725 }, { "epoch": 1.0963925298689248, "grad_norm": 18.767434337446932, "learning_rate": 2e-06, "loss": 0.1984, "step": 4726 }, { "epoch": 1.0966245215172254, "grad_norm": 25.407062038419664, "learning_rate": 2e-06, "loss": 0.3428, "step": 4727 }, { "epoch": 1.0968565131655261, "grad_norm": 19.46092075545016, "learning_rate": 2e-06, "loss": 0.2915, "step": 4728 }, { "epoch": 1.0970885048138268, "grad_norm": 20.120073541123094, "learning_rate": 2e-06, "loss": 0.3822, "step": 4729 }, { "epoch": 1.0973204964621273, "grad_norm": 22.039775692665884, "learning_rate": 2e-06, "loss": 0.3699, "step": 4730 }, { "epoch": 1.097552488110428, "grad_norm": 19.658684197724533, "learning_rate": 2e-06, "loss": 0.2764, "step": 4731 }, { "epoch": 1.0977844797587286, "grad_norm": 12.168061926618066, "learning_rate": 2e-06, "loss": 0.1944, "step": 4732 }, { "epoch": 1.0980164714070293, "grad_norm": 24.83924510420318, "learning_rate": 2e-06, "loss": 0.3434, "step": 4733 }, { "epoch": 1.09824846305533, "grad_norm": 12.746618260356046, "learning_rate": 2e-06, "loss": 0.2208, "step": 4734 }, { "epoch": 1.0984804547036306, "grad_norm": 13.560829849978433, "learning_rate": 2e-06, "loss": 0.2237, "step": 4735 }, { "epoch": 1.0987124463519313, "grad_norm": 16.147177241511702, "learning_rate": 2e-06, "loss": 0.2865, "step": 4736 }, { "epoch": 1.098944438000232, "grad_norm": 15.593195676890463, "learning_rate": 2e-06, "loss": 0.1484, "step": 4737 }, { "epoch": 1.0991764296485327, "grad_norm": 17.79741115414023, "learning_rate": 2e-06, "loss": 0.2794, "step": 4738 }, { "epoch": 1.0994084212968334, "grad_norm": 12.267696363765928, "learning_rate": 2e-06, "loss": 0.213, "step": 4739 }, { "epoch": 1.099640412945134, "grad_norm": 21.270542895126816, "learning_rate": 2e-06, "loss": 0.3346, "step": 4740 }, { "epoch": 1.0998724045934347, "grad_norm": 13.268440838091314, "learning_rate": 2e-06, "loss": 0.2893, "step": 4741 }, { "epoch": 1.1001043962417354, "grad_norm": 5.773451254069926, "learning_rate": 2e-06, "loss": 0.1248, "step": 4742 }, { "epoch": 1.100336387890036, "grad_norm": 13.38291685039027, "learning_rate": 2e-06, "loss": 0.2305, "step": 4743 }, { "epoch": 1.1005683795383365, "grad_norm": 17.201303310278664, "learning_rate": 2e-06, "loss": 0.3608, "step": 4744 }, { "epoch": 1.1008003711866372, "grad_norm": 14.377630959205913, "learning_rate": 2e-06, "loss": 0.2518, "step": 4745 }, { "epoch": 1.1010323628349379, "grad_norm": 14.809347676129226, "learning_rate": 2e-06, "loss": 0.2533, "step": 4746 }, { "epoch": 1.1012643544832386, "grad_norm": 9.986337062899754, "learning_rate": 2e-06, "loss": 0.2014, "step": 4747 }, { "epoch": 1.1014963461315392, "grad_norm": 15.922279096775062, "learning_rate": 2e-06, "loss": 0.2832, "step": 4748 }, { "epoch": 1.10172833777984, "grad_norm": 12.216428482913866, "learning_rate": 2e-06, "loss": 0.2412, "step": 4749 }, { "epoch": 1.1019603294281406, "grad_norm": 8.778545487508715, "learning_rate": 2e-06, "loss": 0.213, "step": 4750 }, { "epoch": 1.1021923210764413, "grad_norm": 13.367201916070908, "learning_rate": 2e-06, "loss": 0.2322, "step": 4751 }, { "epoch": 1.102424312724742, "grad_norm": 17.779014580733584, "learning_rate": 2e-06, "loss": 0.2878, "step": 4752 }, { "epoch": 1.1026563043730426, "grad_norm": 15.27352344365719, "learning_rate": 2e-06, "loss": 0.3188, "step": 4753 }, { "epoch": 1.1028882960213433, "grad_norm": 16.461117593032657, "learning_rate": 2e-06, "loss": 0.2435, "step": 4754 }, { "epoch": 1.103120287669644, "grad_norm": 14.525277461670653, "learning_rate": 2e-06, "loss": 0.2675, "step": 4755 }, { "epoch": 1.1033522793179444, "grad_norm": 16.976506099386146, "learning_rate": 2e-06, "loss": 0.1877, "step": 4756 }, { "epoch": 1.1035842709662451, "grad_norm": 10.436412155903202, "learning_rate": 2e-06, "loss": 0.2611, "step": 4757 }, { "epoch": 1.1038162626145458, "grad_norm": 17.45570996860586, "learning_rate": 2e-06, "loss": 0.2223, "step": 4758 }, { "epoch": 1.1040482542628465, "grad_norm": 9.11640215787863, "learning_rate": 2e-06, "loss": 0.1965, "step": 4759 }, { "epoch": 1.1042802459111472, "grad_norm": 18.53955266775229, "learning_rate": 2e-06, "loss": 0.2927, "step": 4760 }, { "epoch": 1.1045122375594478, "grad_norm": 11.813660934324743, "learning_rate": 2e-06, "loss": 0.2288, "step": 4761 }, { "epoch": 1.1047442292077485, "grad_norm": 28.113973632222756, "learning_rate": 2e-06, "loss": 0.4137, "step": 4762 }, { "epoch": 1.1049762208560492, "grad_norm": 13.059036772244967, "learning_rate": 2e-06, "loss": 0.2059, "step": 4763 }, { "epoch": 1.1052082125043499, "grad_norm": 26.718482609331993, "learning_rate": 2e-06, "loss": 0.5283, "step": 4764 }, { "epoch": 1.1054402041526505, "grad_norm": 21.388850421340358, "learning_rate": 2e-06, "loss": 0.2686, "step": 4765 }, { "epoch": 1.1056721958009512, "grad_norm": 13.831933938166646, "learning_rate": 2e-06, "loss": 0.2863, "step": 4766 }, { "epoch": 1.105904187449252, "grad_norm": 14.716751435725632, "learning_rate": 2e-06, "loss": 0.2895, "step": 4767 }, { "epoch": 1.1061361790975526, "grad_norm": 28.502863024905714, "learning_rate": 2e-06, "loss": 0.2986, "step": 4768 }, { "epoch": 1.1063681707458533, "grad_norm": 11.712588324651502, "learning_rate": 2e-06, "loss": 0.2583, "step": 4769 }, { "epoch": 1.1066001623941537, "grad_norm": 14.2455947813304, "learning_rate": 2e-06, "loss": 0.3269, "step": 4770 }, { "epoch": 1.1068321540424544, "grad_norm": 20.12904263154396, "learning_rate": 2e-06, "loss": 0.3651, "step": 4771 }, { "epoch": 1.107064145690755, "grad_norm": 10.495923075726504, "learning_rate": 2e-06, "loss": 0.2407, "step": 4772 }, { "epoch": 1.1072961373390557, "grad_norm": 13.363417014459092, "learning_rate": 2e-06, "loss": 0.3103, "step": 4773 }, { "epoch": 1.1075281289873564, "grad_norm": 9.752561848539205, "learning_rate": 2e-06, "loss": 0.1802, "step": 4774 }, { "epoch": 1.107760120635657, "grad_norm": 10.545529316723636, "learning_rate": 2e-06, "loss": 0.2058, "step": 4775 }, { "epoch": 1.1079921122839578, "grad_norm": 11.52069645016782, "learning_rate": 2e-06, "loss": 0.2783, "step": 4776 }, { "epoch": 1.1082241039322585, "grad_norm": 11.148106046786955, "learning_rate": 2e-06, "loss": 0.1324, "step": 4777 }, { "epoch": 1.1084560955805591, "grad_norm": 11.319629458424366, "learning_rate": 2e-06, "loss": 0.3652, "step": 4778 }, { "epoch": 1.1086880872288598, "grad_norm": 13.690492530236053, "learning_rate": 2e-06, "loss": 0.2366, "step": 4779 }, { "epoch": 1.1089200788771605, "grad_norm": 9.632568788720638, "learning_rate": 2e-06, "loss": 0.2421, "step": 4780 }, { "epoch": 1.1091520705254612, "grad_norm": 12.249528407753822, "learning_rate": 2e-06, "loss": 0.2144, "step": 4781 }, { "epoch": 1.1093840621737618, "grad_norm": 14.319966268970228, "learning_rate": 2e-06, "loss": 0.2755, "step": 4782 }, { "epoch": 1.1096160538220623, "grad_norm": 25.04977436131108, "learning_rate": 2e-06, "loss": 0.2159, "step": 4783 }, { "epoch": 1.109848045470363, "grad_norm": 13.118374923070153, "learning_rate": 2e-06, "loss": 0.2431, "step": 4784 }, { "epoch": 1.1100800371186637, "grad_norm": 13.918332645947876, "learning_rate": 2e-06, "loss": 0.3029, "step": 4785 }, { "epoch": 1.1103120287669643, "grad_norm": 17.479860616245222, "learning_rate": 2e-06, "loss": 0.3775, "step": 4786 }, { "epoch": 1.110544020415265, "grad_norm": 14.610461992797951, "learning_rate": 2e-06, "loss": 0.2226, "step": 4787 }, { "epoch": 1.1107760120635657, "grad_norm": 18.220638020090497, "learning_rate": 2e-06, "loss": 0.2144, "step": 4788 }, { "epoch": 1.1110080037118664, "grad_norm": 11.609900531076313, "learning_rate": 2e-06, "loss": 0.2249, "step": 4789 }, { "epoch": 1.111239995360167, "grad_norm": 11.198809304515796, "learning_rate": 2e-06, "loss": 0.2243, "step": 4790 }, { "epoch": 1.1114719870084677, "grad_norm": 17.489913518711848, "learning_rate": 2e-06, "loss": 0.2504, "step": 4791 }, { "epoch": 1.1117039786567684, "grad_norm": 12.78910159612698, "learning_rate": 2e-06, "loss": 0.3694, "step": 4792 }, { "epoch": 1.111935970305069, "grad_norm": 15.772843023314104, "learning_rate": 2e-06, "loss": 0.3384, "step": 4793 }, { "epoch": 1.1121679619533698, "grad_norm": 11.195851794747387, "learning_rate": 2e-06, "loss": 0.2107, "step": 4794 }, { "epoch": 1.1123999536016704, "grad_norm": 9.780183781534575, "learning_rate": 2e-06, "loss": 0.1513, "step": 4795 }, { "epoch": 1.112631945249971, "grad_norm": 11.903280414707714, "learning_rate": 2e-06, "loss": 0.2401, "step": 4796 }, { "epoch": 1.1128639368982716, "grad_norm": 12.375824587748216, "learning_rate": 2e-06, "loss": 0.3704, "step": 4797 }, { "epoch": 1.1130959285465722, "grad_norm": 15.841434537765656, "learning_rate": 2e-06, "loss": 0.267, "step": 4798 }, { "epoch": 1.113327920194873, "grad_norm": 16.05445964383955, "learning_rate": 2e-06, "loss": 0.2876, "step": 4799 }, { "epoch": 1.1135599118431736, "grad_norm": 16.170941162145347, "learning_rate": 2e-06, "loss": 0.4808, "step": 4800 }, { "epoch": 1.1137919034914743, "grad_norm": 7.983958612287163, "learning_rate": 2e-06, "loss": 0.2322, "step": 4801 }, { "epoch": 1.114023895139775, "grad_norm": 17.947298006751954, "learning_rate": 2e-06, "loss": 0.2394, "step": 4802 }, { "epoch": 1.1142558867880756, "grad_norm": 14.904696965127243, "learning_rate": 2e-06, "loss": 0.2698, "step": 4803 }, { "epoch": 1.1144878784363763, "grad_norm": 24.716054456606248, "learning_rate": 2e-06, "loss": 0.3391, "step": 4804 }, { "epoch": 1.114719870084677, "grad_norm": 17.012778768945804, "learning_rate": 2e-06, "loss": 0.2561, "step": 4805 }, { "epoch": 1.1149518617329777, "grad_norm": 11.77157842120241, "learning_rate": 2e-06, "loss": 0.2345, "step": 4806 }, { "epoch": 1.1151838533812783, "grad_norm": 17.869538222418846, "learning_rate": 2e-06, "loss": 0.2186, "step": 4807 }, { "epoch": 1.115415845029579, "grad_norm": 7.464176480789736, "learning_rate": 2e-06, "loss": 0.1725, "step": 4808 }, { "epoch": 1.1156478366778795, "grad_norm": 5.421341474728665, "learning_rate": 2e-06, "loss": 0.209, "step": 4809 }, { "epoch": 1.1158798283261802, "grad_norm": 10.89685000856278, "learning_rate": 2e-06, "loss": 0.1944, "step": 4810 }, { "epoch": 1.1161118199744808, "grad_norm": 13.971761511853458, "learning_rate": 2e-06, "loss": 0.3304, "step": 4811 }, { "epoch": 1.1163438116227815, "grad_norm": 34.40954485204369, "learning_rate": 2e-06, "loss": 0.394, "step": 4812 }, { "epoch": 1.1165758032710822, "grad_norm": 15.488875948943251, "learning_rate": 2e-06, "loss": 0.2046, "step": 4813 }, { "epoch": 1.1168077949193829, "grad_norm": 12.132601391775486, "learning_rate": 2e-06, "loss": 0.2219, "step": 4814 }, { "epoch": 1.1170397865676835, "grad_norm": 13.424740585546829, "learning_rate": 2e-06, "loss": 0.221, "step": 4815 }, { "epoch": 1.1172717782159842, "grad_norm": 8.659678873098684, "learning_rate": 2e-06, "loss": 0.2093, "step": 4816 }, { "epoch": 1.117503769864285, "grad_norm": 11.50236687791197, "learning_rate": 2e-06, "loss": 0.2311, "step": 4817 }, { "epoch": 1.1177357615125856, "grad_norm": 18.164120226051143, "learning_rate": 2e-06, "loss": 0.2499, "step": 4818 }, { "epoch": 1.1179677531608863, "grad_norm": 17.800646370079132, "learning_rate": 2e-06, "loss": 0.223, "step": 4819 }, { "epoch": 1.118199744809187, "grad_norm": 9.323484868669501, "learning_rate": 2e-06, "loss": 0.1765, "step": 4820 }, { "epoch": 1.1184317364574876, "grad_norm": 16.824993656613074, "learning_rate": 2e-06, "loss": 0.312, "step": 4821 }, { "epoch": 1.1186637281057883, "grad_norm": 12.203168704174818, "learning_rate": 2e-06, "loss": 0.2344, "step": 4822 }, { "epoch": 1.118895719754089, "grad_norm": 31.56245938171965, "learning_rate": 2e-06, "loss": 0.3754, "step": 4823 }, { "epoch": 1.1191277114023894, "grad_norm": 8.968222747142645, "learning_rate": 2e-06, "loss": 0.1932, "step": 4824 }, { "epoch": 1.11935970305069, "grad_norm": 16.992866517419166, "learning_rate": 2e-06, "loss": 0.2995, "step": 4825 }, { "epoch": 1.1195916946989908, "grad_norm": 10.459429594675886, "learning_rate": 2e-06, "loss": 0.352, "step": 4826 }, { "epoch": 1.1198236863472915, "grad_norm": 13.449766556141963, "learning_rate": 2e-06, "loss": 0.2041, "step": 4827 }, { "epoch": 1.1200556779955921, "grad_norm": 12.05557560578355, "learning_rate": 2e-06, "loss": 0.2017, "step": 4828 }, { "epoch": 1.1202876696438928, "grad_norm": 9.367438870463518, "learning_rate": 2e-06, "loss": 0.2387, "step": 4829 }, { "epoch": 1.1205196612921935, "grad_norm": 17.76365816605677, "learning_rate": 2e-06, "loss": 0.1864, "step": 4830 }, { "epoch": 1.1207516529404942, "grad_norm": 15.404675931816326, "learning_rate": 2e-06, "loss": 0.2811, "step": 4831 }, { "epoch": 1.1209836445887948, "grad_norm": 15.29386673652393, "learning_rate": 2e-06, "loss": 0.2481, "step": 4832 }, { "epoch": 1.1212156362370955, "grad_norm": 8.263621968845886, "learning_rate": 2e-06, "loss": 0.2153, "step": 4833 }, { "epoch": 1.1214476278853962, "grad_norm": 11.598973711992286, "learning_rate": 2e-06, "loss": 0.2205, "step": 4834 }, { "epoch": 1.1216796195336969, "grad_norm": 12.121246868598893, "learning_rate": 2e-06, "loss": 0.2727, "step": 4835 }, { "epoch": 1.1219116111819973, "grad_norm": 21.845578092591122, "learning_rate": 2e-06, "loss": 0.247, "step": 4836 }, { "epoch": 1.122143602830298, "grad_norm": 26.244677083522305, "learning_rate": 2e-06, "loss": 0.4809, "step": 4837 }, { "epoch": 1.1223755944785987, "grad_norm": 14.525950272406515, "learning_rate": 2e-06, "loss": 0.2579, "step": 4838 }, { "epoch": 1.1226075861268994, "grad_norm": 8.921911193753202, "learning_rate": 2e-06, "loss": 0.2388, "step": 4839 }, { "epoch": 1.1228395777752, "grad_norm": 18.573858193690356, "learning_rate": 2e-06, "loss": 0.2852, "step": 4840 }, { "epoch": 1.1230715694235007, "grad_norm": 15.231148127881918, "learning_rate": 2e-06, "loss": 0.3048, "step": 4841 }, { "epoch": 1.1233035610718014, "grad_norm": 14.311189724319316, "learning_rate": 2e-06, "loss": 0.174, "step": 4842 }, { "epoch": 1.123535552720102, "grad_norm": 9.759725795906759, "learning_rate": 2e-06, "loss": 0.2344, "step": 4843 }, { "epoch": 1.1237675443684028, "grad_norm": 16.32478232769143, "learning_rate": 2e-06, "loss": 0.3097, "step": 4844 }, { "epoch": 1.1239995360167034, "grad_norm": 11.181963687751907, "learning_rate": 2e-06, "loss": 0.2216, "step": 4845 }, { "epoch": 1.1242315276650041, "grad_norm": 10.353225268015038, "learning_rate": 2e-06, "loss": 0.2954, "step": 4846 }, { "epoch": 1.1244635193133048, "grad_norm": 10.309147818411686, "learning_rate": 2e-06, "loss": 0.1935, "step": 4847 }, { "epoch": 1.1246955109616055, "grad_norm": 14.911985063186178, "learning_rate": 2e-06, "loss": 0.228, "step": 4848 }, { "epoch": 1.1249275026099061, "grad_norm": 21.648534920152432, "learning_rate": 2e-06, "loss": 0.3459, "step": 4849 }, { "epoch": 1.1251594942582068, "grad_norm": 12.10124114368933, "learning_rate": 2e-06, "loss": 0.2225, "step": 4850 }, { "epoch": 1.1253914859065073, "grad_norm": 15.01947620297207, "learning_rate": 2e-06, "loss": 0.2392, "step": 4851 }, { "epoch": 1.125623477554808, "grad_norm": 22.524618954163767, "learning_rate": 2e-06, "loss": 0.2461, "step": 4852 }, { "epoch": 1.1258554692031086, "grad_norm": 20.873088264065554, "learning_rate": 2e-06, "loss": 0.3608, "step": 4853 }, { "epoch": 1.1260874608514093, "grad_norm": 13.904202955837375, "learning_rate": 2e-06, "loss": 0.2254, "step": 4854 }, { "epoch": 1.12631945249971, "grad_norm": 26.509242877229937, "learning_rate": 2e-06, "loss": 0.3359, "step": 4855 }, { "epoch": 1.1265514441480107, "grad_norm": 13.028136016976488, "learning_rate": 2e-06, "loss": 0.2455, "step": 4856 }, { "epoch": 1.1267834357963114, "grad_norm": 7.319785950351981, "learning_rate": 2e-06, "loss": 0.1814, "step": 4857 }, { "epoch": 1.127015427444612, "grad_norm": 20.918159000298974, "learning_rate": 2e-06, "loss": 0.3011, "step": 4858 }, { "epoch": 1.1272474190929127, "grad_norm": 9.220278402493546, "learning_rate": 2e-06, "loss": 0.2005, "step": 4859 }, { "epoch": 1.1274794107412134, "grad_norm": 7.8435657373328445, "learning_rate": 2e-06, "loss": 0.1529, "step": 4860 }, { "epoch": 1.127711402389514, "grad_norm": 12.331950357133152, "learning_rate": 2e-06, "loss": 0.2644, "step": 4861 }, { "epoch": 1.1279433940378145, "grad_norm": 11.468091056813932, "learning_rate": 2e-06, "loss": 0.2825, "step": 4862 }, { "epoch": 1.1281753856861152, "grad_norm": 9.125446465633535, "learning_rate": 2e-06, "loss": 0.1779, "step": 4863 }, { "epoch": 1.1284073773344159, "grad_norm": 16.561095932862465, "learning_rate": 2e-06, "loss": 0.3371, "step": 4864 }, { "epoch": 1.1286393689827166, "grad_norm": 13.373293675830446, "learning_rate": 2e-06, "loss": 0.3443, "step": 4865 }, { "epoch": 1.1288713606310172, "grad_norm": 20.618005722280156, "learning_rate": 2e-06, "loss": 0.302, "step": 4866 }, { "epoch": 1.129103352279318, "grad_norm": 20.43437800180757, "learning_rate": 2e-06, "loss": 0.4071, "step": 4867 }, { "epoch": 1.1293353439276186, "grad_norm": 23.09409807583735, "learning_rate": 2e-06, "loss": 0.3352, "step": 4868 }, { "epoch": 1.1295673355759193, "grad_norm": 21.548952088035673, "learning_rate": 2e-06, "loss": 0.2033, "step": 4869 }, { "epoch": 1.12979932722422, "grad_norm": 13.179588125487964, "learning_rate": 2e-06, "loss": 0.1926, "step": 4870 }, { "epoch": 1.1300313188725206, "grad_norm": 17.560998374400327, "learning_rate": 2e-06, "loss": 0.3046, "step": 4871 }, { "epoch": 1.1302633105208213, "grad_norm": 15.359237858726829, "learning_rate": 2e-06, "loss": 0.2663, "step": 4872 }, { "epoch": 1.130495302169122, "grad_norm": 19.264672448088973, "learning_rate": 2e-06, "loss": 0.311, "step": 4873 }, { "epoch": 1.1307272938174227, "grad_norm": 18.49971199105671, "learning_rate": 2e-06, "loss": 0.2726, "step": 4874 }, { "epoch": 1.1309592854657233, "grad_norm": 16.811942294775953, "learning_rate": 2e-06, "loss": 0.3512, "step": 4875 }, { "epoch": 1.131191277114024, "grad_norm": 10.463711626604983, "learning_rate": 2e-06, "loss": 0.2304, "step": 4876 }, { "epoch": 1.1314232687623245, "grad_norm": 12.918641341385287, "learning_rate": 2e-06, "loss": 0.2987, "step": 4877 }, { "epoch": 1.1316552604106251, "grad_norm": 7.4286140248356825, "learning_rate": 2e-06, "loss": 0.1508, "step": 4878 }, { "epoch": 1.1318872520589258, "grad_norm": 20.665785145642293, "learning_rate": 2e-06, "loss": 0.365, "step": 4879 }, { "epoch": 1.1321192437072265, "grad_norm": 12.166636770416769, "learning_rate": 2e-06, "loss": 0.2197, "step": 4880 }, { "epoch": 1.1323512353555272, "grad_norm": 15.393645357245871, "learning_rate": 2e-06, "loss": 0.3478, "step": 4881 }, { "epoch": 1.1325832270038279, "grad_norm": 9.480527037233777, "learning_rate": 2e-06, "loss": 0.1504, "step": 4882 }, { "epoch": 1.1328152186521285, "grad_norm": 10.986611255263721, "learning_rate": 2e-06, "loss": 0.2744, "step": 4883 }, { "epoch": 1.1330472103004292, "grad_norm": 8.984061788935852, "learning_rate": 2e-06, "loss": 0.2151, "step": 4884 }, { "epoch": 1.1332792019487299, "grad_norm": 19.65763052847948, "learning_rate": 2e-06, "loss": 0.2741, "step": 4885 }, { "epoch": 1.1335111935970306, "grad_norm": 10.736693671979038, "learning_rate": 2e-06, "loss": 0.1959, "step": 4886 }, { "epoch": 1.1337431852453312, "grad_norm": 14.183338903446597, "learning_rate": 2e-06, "loss": 0.281, "step": 4887 }, { "epoch": 1.133975176893632, "grad_norm": 9.740995781048545, "learning_rate": 2e-06, "loss": 0.1822, "step": 4888 }, { "epoch": 1.1342071685419324, "grad_norm": 21.632445425240174, "learning_rate": 2e-06, "loss": 0.3031, "step": 4889 }, { "epoch": 1.134439160190233, "grad_norm": 24.816556530453955, "learning_rate": 2e-06, "loss": 0.3508, "step": 4890 }, { "epoch": 1.1346711518385337, "grad_norm": 13.454497821313662, "learning_rate": 2e-06, "loss": 0.196, "step": 4891 }, { "epoch": 1.1349031434868344, "grad_norm": 6.912301352121711, "learning_rate": 2e-06, "loss": 0.18, "step": 4892 }, { "epoch": 1.135135135135135, "grad_norm": 20.439880621178403, "learning_rate": 2e-06, "loss": 0.2886, "step": 4893 }, { "epoch": 1.1353671267834358, "grad_norm": 19.2432781790515, "learning_rate": 2e-06, "loss": 0.3268, "step": 4894 }, { "epoch": 1.1355991184317364, "grad_norm": 12.769986602500271, "learning_rate": 2e-06, "loss": 0.2096, "step": 4895 }, { "epoch": 1.1358311100800371, "grad_norm": 11.600290293189488, "learning_rate": 2e-06, "loss": 0.1359, "step": 4896 }, { "epoch": 1.1360631017283378, "grad_norm": 15.612197475543613, "learning_rate": 2e-06, "loss": 0.2858, "step": 4897 }, { "epoch": 1.1362950933766385, "grad_norm": 8.466076316561889, "learning_rate": 2e-06, "loss": 0.1672, "step": 4898 }, { "epoch": 1.1365270850249392, "grad_norm": 14.876673188857252, "learning_rate": 2e-06, "loss": 0.3044, "step": 4899 }, { "epoch": 1.1367590766732398, "grad_norm": 11.133115228172553, "learning_rate": 2e-06, "loss": 0.1983, "step": 4900 }, { "epoch": 1.1369910683215405, "grad_norm": 8.143066059932629, "learning_rate": 2e-06, "loss": 0.1884, "step": 4901 }, { "epoch": 1.1372230599698412, "grad_norm": 17.890975380490254, "learning_rate": 2e-06, "loss": 0.2398, "step": 4902 }, { "epoch": 1.1374550516181419, "grad_norm": 15.559934780180642, "learning_rate": 2e-06, "loss": 0.2069, "step": 4903 }, { "epoch": 1.1376870432664423, "grad_norm": 21.19586601560053, "learning_rate": 2e-06, "loss": 0.3452, "step": 4904 }, { "epoch": 1.137919034914743, "grad_norm": 18.280108012771613, "learning_rate": 2e-06, "loss": 0.3406, "step": 4905 }, { "epoch": 1.1381510265630437, "grad_norm": 13.982547036255285, "learning_rate": 2e-06, "loss": 0.2576, "step": 4906 }, { "epoch": 1.1383830182113444, "grad_norm": 11.804514631444446, "learning_rate": 2e-06, "loss": 0.229, "step": 4907 }, { "epoch": 1.138615009859645, "grad_norm": 19.541278639122623, "learning_rate": 2e-06, "loss": 0.2622, "step": 4908 }, { "epoch": 1.1388470015079457, "grad_norm": 14.760808289312461, "learning_rate": 2e-06, "loss": 0.2904, "step": 4909 }, { "epoch": 1.1390789931562464, "grad_norm": 19.011030729462316, "learning_rate": 2e-06, "loss": 0.2777, "step": 4910 }, { "epoch": 1.139310984804547, "grad_norm": 6.137233877763751, "learning_rate": 2e-06, "loss": 0.1433, "step": 4911 }, { "epoch": 1.1395429764528477, "grad_norm": 14.918947462083826, "learning_rate": 2e-06, "loss": 0.2392, "step": 4912 }, { "epoch": 1.1397749681011484, "grad_norm": 14.772137907592853, "learning_rate": 2e-06, "loss": 0.1439, "step": 4913 }, { "epoch": 1.140006959749449, "grad_norm": 14.608881128373772, "learning_rate": 2e-06, "loss": 0.2408, "step": 4914 }, { "epoch": 1.1402389513977498, "grad_norm": 9.58689032170824, "learning_rate": 2e-06, "loss": 0.1699, "step": 4915 }, { "epoch": 1.1404709430460502, "grad_norm": 11.368371089686674, "learning_rate": 2e-06, "loss": 0.1777, "step": 4916 }, { "epoch": 1.140702934694351, "grad_norm": 14.5766053931017, "learning_rate": 2e-06, "loss": 0.1859, "step": 4917 }, { "epoch": 1.1409349263426516, "grad_norm": 9.122800332401326, "learning_rate": 2e-06, "loss": 0.1833, "step": 4918 }, { "epoch": 1.1411669179909523, "grad_norm": 9.61504687863536, "learning_rate": 2e-06, "loss": 0.1986, "step": 4919 }, { "epoch": 1.141398909639253, "grad_norm": 21.523746206148463, "learning_rate": 2e-06, "loss": 0.3443, "step": 4920 }, { "epoch": 1.1416309012875536, "grad_norm": 14.444470032653033, "learning_rate": 2e-06, "loss": 0.1923, "step": 4921 }, { "epoch": 1.1418628929358543, "grad_norm": 18.236309948457443, "learning_rate": 2e-06, "loss": 0.2983, "step": 4922 }, { "epoch": 1.142094884584155, "grad_norm": 13.92540097571774, "learning_rate": 2e-06, "loss": 0.193, "step": 4923 }, { "epoch": 1.1423268762324557, "grad_norm": 24.180937619439668, "learning_rate": 2e-06, "loss": 0.3859, "step": 4924 }, { "epoch": 1.1425588678807563, "grad_norm": 15.437820412405143, "learning_rate": 2e-06, "loss": 0.268, "step": 4925 }, { "epoch": 1.142790859529057, "grad_norm": 13.582142530612812, "learning_rate": 2e-06, "loss": 0.2587, "step": 4926 }, { "epoch": 1.1430228511773577, "grad_norm": 15.043145299085626, "learning_rate": 2e-06, "loss": 0.2439, "step": 4927 }, { "epoch": 1.1432548428256584, "grad_norm": 16.86067192142225, "learning_rate": 2e-06, "loss": 0.2741, "step": 4928 }, { "epoch": 1.143486834473959, "grad_norm": 18.408278721917398, "learning_rate": 2e-06, "loss": 0.2853, "step": 4929 }, { "epoch": 1.1437188261222595, "grad_norm": 11.034525892499884, "learning_rate": 2e-06, "loss": 0.2126, "step": 4930 }, { "epoch": 1.1439508177705602, "grad_norm": 8.690261559663353, "learning_rate": 2e-06, "loss": 0.1887, "step": 4931 }, { "epoch": 1.1441828094188609, "grad_norm": 10.915617366321268, "learning_rate": 2e-06, "loss": 0.2147, "step": 4932 }, { "epoch": 1.1444148010671615, "grad_norm": 6.999836918162795, "learning_rate": 2e-06, "loss": 0.2213, "step": 4933 }, { "epoch": 1.1446467927154622, "grad_norm": 12.861398225532644, "learning_rate": 2e-06, "loss": 0.2748, "step": 4934 }, { "epoch": 1.144878784363763, "grad_norm": 21.73627815965974, "learning_rate": 2e-06, "loss": 0.2468, "step": 4935 }, { "epoch": 1.1451107760120636, "grad_norm": 5.840407410752199, "learning_rate": 2e-06, "loss": 0.1333, "step": 4936 }, { "epoch": 1.1453427676603642, "grad_norm": 13.66647412745888, "learning_rate": 2e-06, "loss": 0.254, "step": 4937 }, { "epoch": 1.145574759308665, "grad_norm": 10.955800205327387, "learning_rate": 2e-06, "loss": 0.2701, "step": 4938 }, { "epoch": 1.1458067509569656, "grad_norm": 18.279641821552023, "learning_rate": 2e-06, "loss": 0.3168, "step": 4939 }, { "epoch": 1.1460387426052663, "grad_norm": 13.678123638896912, "learning_rate": 2e-06, "loss": 0.1945, "step": 4940 }, { "epoch": 1.146270734253567, "grad_norm": 20.690435466350603, "learning_rate": 2e-06, "loss": 0.3321, "step": 4941 }, { "epoch": 1.1465027259018674, "grad_norm": 11.352089339007339, "learning_rate": 2e-06, "loss": 0.2042, "step": 4942 }, { "epoch": 1.146734717550168, "grad_norm": 15.102901486629841, "learning_rate": 2e-06, "loss": 0.1913, "step": 4943 }, { "epoch": 1.1469667091984688, "grad_norm": 16.465122437217744, "learning_rate": 2e-06, "loss": 0.2494, "step": 4944 }, { "epoch": 1.1471987008467694, "grad_norm": 7.706423255130726, "learning_rate": 2e-06, "loss": 0.1918, "step": 4945 }, { "epoch": 1.1474306924950701, "grad_norm": 13.249898609853025, "learning_rate": 2e-06, "loss": 0.2462, "step": 4946 }, { "epoch": 1.1476626841433708, "grad_norm": 21.2608644464412, "learning_rate": 2e-06, "loss": 0.3209, "step": 4947 }, { "epoch": 1.1478946757916715, "grad_norm": 8.674472393729499, "learning_rate": 2e-06, "loss": 0.1809, "step": 4948 }, { "epoch": 1.1481266674399722, "grad_norm": 12.503974947717918, "learning_rate": 2e-06, "loss": 0.3836, "step": 4949 }, { "epoch": 1.1483586590882728, "grad_norm": 20.12930606562257, "learning_rate": 2e-06, "loss": 0.1928, "step": 4950 }, { "epoch": 1.1485906507365735, "grad_norm": 20.94522005014584, "learning_rate": 2e-06, "loss": 0.2479, "step": 4951 }, { "epoch": 1.1488226423848742, "grad_norm": 24.26873660368573, "learning_rate": 2e-06, "loss": 0.2137, "step": 4952 }, { "epoch": 1.1490546340331749, "grad_norm": 10.406117471367917, "learning_rate": 2e-06, "loss": 0.2668, "step": 4953 }, { "epoch": 1.1492866256814755, "grad_norm": 11.010379737891737, "learning_rate": 2e-06, "loss": 0.1913, "step": 4954 }, { "epoch": 1.1495186173297762, "grad_norm": 12.299171427398806, "learning_rate": 2e-06, "loss": 0.1321, "step": 4955 }, { "epoch": 1.149750608978077, "grad_norm": 17.37082274280718, "learning_rate": 2e-06, "loss": 0.3217, "step": 4956 }, { "epoch": 1.1499826006263774, "grad_norm": 14.921465025425027, "learning_rate": 2e-06, "loss": 0.3425, "step": 4957 }, { "epoch": 1.150214592274678, "grad_norm": 17.521596392801534, "learning_rate": 2e-06, "loss": 0.3958, "step": 4958 }, { "epoch": 1.1504465839229787, "grad_norm": 15.05574840704271, "learning_rate": 2e-06, "loss": 0.2477, "step": 4959 }, { "epoch": 1.1506785755712794, "grad_norm": 9.486800586659157, "learning_rate": 2e-06, "loss": 0.2234, "step": 4960 }, { "epoch": 1.15091056721958, "grad_norm": 10.712302390953361, "learning_rate": 2e-06, "loss": 0.2072, "step": 4961 }, { "epoch": 1.1511425588678807, "grad_norm": 10.90126140152789, "learning_rate": 2e-06, "loss": 0.3454, "step": 4962 }, { "epoch": 1.1513745505161814, "grad_norm": 20.62003729182546, "learning_rate": 2e-06, "loss": 0.3847, "step": 4963 }, { "epoch": 1.151606542164482, "grad_norm": 5.952093748397747, "learning_rate": 2e-06, "loss": 0.1854, "step": 4964 }, { "epoch": 1.1518385338127828, "grad_norm": 21.558466694925713, "learning_rate": 2e-06, "loss": 0.3083, "step": 4965 }, { "epoch": 1.1520705254610835, "grad_norm": 16.098757853215655, "learning_rate": 2e-06, "loss": 0.2644, "step": 4966 }, { "epoch": 1.1523025171093841, "grad_norm": 10.083478843015879, "learning_rate": 2e-06, "loss": 0.293, "step": 4967 }, { "epoch": 1.1525345087576848, "grad_norm": 14.425534089566499, "learning_rate": 2e-06, "loss": 0.2881, "step": 4968 }, { "epoch": 1.1527665004059853, "grad_norm": 18.679608904856032, "learning_rate": 2e-06, "loss": 0.3362, "step": 4969 }, { "epoch": 1.152998492054286, "grad_norm": 21.405848462538046, "learning_rate": 2e-06, "loss": 0.2952, "step": 4970 }, { "epoch": 1.1532304837025866, "grad_norm": 18.534218012077417, "learning_rate": 2e-06, "loss": 0.2789, "step": 4971 }, { "epoch": 1.1534624753508873, "grad_norm": 16.90363676355196, "learning_rate": 2e-06, "loss": 0.2342, "step": 4972 }, { "epoch": 1.153694466999188, "grad_norm": 20.269954202409547, "learning_rate": 2e-06, "loss": 0.2938, "step": 4973 }, { "epoch": 1.1539264586474887, "grad_norm": 14.655589208363546, "learning_rate": 2e-06, "loss": 0.2959, "step": 4974 }, { "epoch": 1.1541584502957893, "grad_norm": 16.179437731609507, "learning_rate": 2e-06, "loss": 0.3986, "step": 4975 }, { "epoch": 1.15439044194409, "grad_norm": 17.471580867845255, "learning_rate": 2e-06, "loss": 0.3414, "step": 4976 }, { "epoch": 1.1546224335923907, "grad_norm": 15.168744371031757, "learning_rate": 2e-06, "loss": 0.2526, "step": 4977 }, { "epoch": 1.1548544252406914, "grad_norm": 14.814605235511708, "learning_rate": 2e-06, "loss": 0.297, "step": 4978 }, { "epoch": 1.155086416888992, "grad_norm": 5.064559732869771, "learning_rate": 2e-06, "loss": 0.1687, "step": 4979 }, { "epoch": 1.1553184085372927, "grad_norm": 22.91198371044645, "learning_rate": 2e-06, "loss": 0.3917, "step": 4980 }, { "epoch": 1.1555504001855934, "grad_norm": 10.770969937868816, "learning_rate": 2e-06, "loss": 0.2501, "step": 4981 }, { "epoch": 1.155782391833894, "grad_norm": 15.982737134996356, "learning_rate": 2e-06, "loss": 0.2392, "step": 4982 }, { "epoch": 1.1560143834821948, "grad_norm": 16.99469144176511, "learning_rate": 2e-06, "loss": 0.3181, "step": 4983 }, { "epoch": 1.1562463751304952, "grad_norm": 25.361483749720282, "learning_rate": 2e-06, "loss": 0.2793, "step": 4984 }, { "epoch": 1.156478366778796, "grad_norm": 11.43453219225867, "learning_rate": 2e-06, "loss": 0.2394, "step": 4985 }, { "epoch": 1.1567103584270966, "grad_norm": 17.527726479771758, "learning_rate": 2e-06, "loss": 0.3313, "step": 4986 }, { "epoch": 1.1569423500753973, "grad_norm": 23.029281340142415, "learning_rate": 2e-06, "loss": 0.2906, "step": 4987 }, { "epoch": 1.157174341723698, "grad_norm": 15.859396445352443, "learning_rate": 2e-06, "loss": 0.1847, "step": 4988 }, { "epoch": 1.1574063333719986, "grad_norm": 18.52207039945924, "learning_rate": 2e-06, "loss": 0.232, "step": 4989 }, { "epoch": 1.1576383250202993, "grad_norm": 18.5323290413136, "learning_rate": 2e-06, "loss": 0.2638, "step": 4990 }, { "epoch": 1.1578703166686, "grad_norm": 11.726869560068051, "learning_rate": 2e-06, "loss": 0.2262, "step": 4991 }, { "epoch": 1.1581023083169006, "grad_norm": 10.325741206233513, "learning_rate": 2e-06, "loss": 0.2112, "step": 4992 }, { "epoch": 1.1583342999652013, "grad_norm": 18.185472349818276, "learning_rate": 2e-06, "loss": 0.2729, "step": 4993 }, { "epoch": 1.158566291613502, "grad_norm": 16.719998948739807, "learning_rate": 2e-06, "loss": 0.2897, "step": 4994 }, { "epoch": 1.1587982832618025, "grad_norm": 16.404411179286353, "learning_rate": 2e-06, "loss": 0.2828, "step": 4995 }, { "epoch": 1.1590302749101031, "grad_norm": 15.826079148596243, "learning_rate": 2e-06, "loss": 0.1987, "step": 4996 }, { "epoch": 1.1592622665584038, "grad_norm": 23.52084532680064, "learning_rate": 2e-06, "loss": 0.3933, "step": 4997 }, { "epoch": 1.1594942582067045, "grad_norm": 19.727342270029716, "learning_rate": 2e-06, "loss": 0.3705, "step": 4998 }, { "epoch": 1.1597262498550052, "grad_norm": 8.532466128418308, "learning_rate": 2e-06, "loss": 0.2316, "step": 4999 }, { "epoch": 1.1599582415033058, "grad_norm": 18.545541497081892, "learning_rate": 2e-06, "loss": 0.2471, "step": 5000 }, { "epoch": 1.1601902331516065, "grad_norm": 7.39760627147255, "learning_rate": 2e-06, "loss": 0.1612, "step": 5001 }, { "epoch": 1.1604222247999072, "grad_norm": 10.978935659260085, "learning_rate": 2e-06, "loss": 0.2346, "step": 5002 }, { "epoch": 1.1606542164482079, "grad_norm": 11.515475457268554, "learning_rate": 2e-06, "loss": 0.2053, "step": 5003 }, { "epoch": 1.1608862080965086, "grad_norm": 6.859243508484112, "learning_rate": 2e-06, "loss": 0.1754, "step": 5004 }, { "epoch": 1.1611181997448092, "grad_norm": 22.30127623099719, "learning_rate": 2e-06, "loss": 0.2655, "step": 5005 }, { "epoch": 1.16135019139311, "grad_norm": 18.018850253614218, "learning_rate": 2e-06, "loss": 0.2342, "step": 5006 }, { "epoch": 1.1615821830414106, "grad_norm": 14.185858142945714, "learning_rate": 2e-06, "loss": 0.2357, "step": 5007 }, { "epoch": 1.1618141746897113, "grad_norm": 13.850770859563564, "learning_rate": 2e-06, "loss": 0.2282, "step": 5008 }, { "epoch": 1.162046166338012, "grad_norm": 20.168032983436802, "learning_rate": 2e-06, "loss": 0.3488, "step": 5009 }, { "epoch": 1.1622781579863124, "grad_norm": 14.213162878346074, "learning_rate": 2e-06, "loss": 0.1906, "step": 5010 }, { "epoch": 1.162510149634613, "grad_norm": 18.527199878180998, "learning_rate": 2e-06, "loss": 0.2876, "step": 5011 }, { "epoch": 1.1627421412829138, "grad_norm": 21.948041045089763, "learning_rate": 2e-06, "loss": 0.2023, "step": 5012 }, { "epoch": 1.1629741329312144, "grad_norm": 18.20419217348716, "learning_rate": 2e-06, "loss": 0.2678, "step": 5013 }, { "epoch": 1.163206124579515, "grad_norm": 7.939425692141144, "learning_rate": 2e-06, "loss": 0.2372, "step": 5014 }, { "epoch": 1.1634381162278158, "grad_norm": 12.672603130303628, "learning_rate": 2e-06, "loss": 0.2832, "step": 5015 }, { "epoch": 1.1636701078761165, "grad_norm": 10.981135126370553, "learning_rate": 2e-06, "loss": 0.3249, "step": 5016 }, { "epoch": 1.1639020995244171, "grad_norm": 15.315053555166035, "learning_rate": 2e-06, "loss": 0.2706, "step": 5017 }, { "epoch": 1.1641340911727178, "grad_norm": 7.395346913250289, "learning_rate": 2e-06, "loss": 0.1825, "step": 5018 }, { "epoch": 1.1643660828210185, "grad_norm": 25.789706744104237, "learning_rate": 2e-06, "loss": 0.2779, "step": 5019 }, { "epoch": 1.1645980744693192, "grad_norm": 13.929896223441505, "learning_rate": 2e-06, "loss": 0.3325, "step": 5020 }, { "epoch": 1.1648300661176199, "grad_norm": 20.150725102614853, "learning_rate": 2e-06, "loss": 0.2426, "step": 5021 }, { "epoch": 1.1650620577659203, "grad_norm": 16.45818500299967, "learning_rate": 2e-06, "loss": 0.1771, "step": 5022 }, { "epoch": 1.165294049414221, "grad_norm": 15.739444570190075, "learning_rate": 2e-06, "loss": 0.2365, "step": 5023 }, { "epoch": 1.1655260410625217, "grad_norm": 10.570517956343688, "learning_rate": 2e-06, "loss": 0.2052, "step": 5024 }, { "epoch": 1.1657580327108223, "grad_norm": 13.3629168062596, "learning_rate": 2e-06, "loss": 0.2241, "step": 5025 }, { "epoch": 1.165990024359123, "grad_norm": 18.754419599758915, "learning_rate": 2e-06, "loss": 0.2735, "step": 5026 }, { "epoch": 1.1662220160074237, "grad_norm": 19.664815170672313, "learning_rate": 2e-06, "loss": 0.2899, "step": 5027 }, { "epoch": 1.1664540076557244, "grad_norm": 9.366801142256714, "learning_rate": 2e-06, "loss": 0.23, "step": 5028 }, { "epoch": 1.166685999304025, "grad_norm": 14.935637719397329, "learning_rate": 2e-06, "loss": 0.2281, "step": 5029 }, { "epoch": 1.1669179909523257, "grad_norm": 24.75933529551093, "learning_rate": 2e-06, "loss": 0.2572, "step": 5030 }, { "epoch": 1.1671499826006264, "grad_norm": 14.730901640082049, "learning_rate": 2e-06, "loss": 0.3131, "step": 5031 }, { "epoch": 1.167381974248927, "grad_norm": 5.845621968031191, "learning_rate": 2e-06, "loss": 0.103, "step": 5032 }, { "epoch": 1.1676139658972278, "grad_norm": 15.167261756957625, "learning_rate": 2e-06, "loss": 0.2014, "step": 5033 }, { "epoch": 1.1678459575455284, "grad_norm": 16.816500981299157, "learning_rate": 2e-06, "loss": 0.3123, "step": 5034 }, { "epoch": 1.1680779491938291, "grad_norm": 12.555567932873625, "learning_rate": 2e-06, "loss": 0.209, "step": 5035 }, { "epoch": 1.1683099408421298, "grad_norm": 15.661823316978516, "learning_rate": 2e-06, "loss": 0.3003, "step": 5036 }, { "epoch": 1.1685419324904303, "grad_norm": 7.488072913478131, "learning_rate": 2e-06, "loss": 0.1905, "step": 5037 }, { "epoch": 1.168773924138731, "grad_norm": 15.02359869467387, "learning_rate": 2e-06, "loss": 0.2309, "step": 5038 }, { "epoch": 1.1690059157870316, "grad_norm": 14.78971080478935, "learning_rate": 2e-06, "loss": 0.2297, "step": 5039 }, { "epoch": 1.1692379074353323, "grad_norm": 20.143819338315392, "learning_rate": 2e-06, "loss": 0.3366, "step": 5040 }, { "epoch": 1.169469899083633, "grad_norm": 20.31800677163891, "learning_rate": 2e-06, "loss": 0.3499, "step": 5041 }, { "epoch": 1.1697018907319336, "grad_norm": 18.584130400204625, "learning_rate": 2e-06, "loss": 0.2986, "step": 5042 }, { "epoch": 1.1699338823802343, "grad_norm": 9.014805971449672, "learning_rate": 2e-06, "loss": 0.2106, "step": 5043 }, { "epoch": 1.170165874028535, "grad_norm": 15.855667851271633, "learning_rate": 2e-06, "loss": 0.3955, "step": 5044 }, { "epoch": 1.1703978656768357, "grad_norm": 16.573583773977177, "learning_rate": 2e-06, "loss": 0.3397, "step": 5045 }, { "epoch": 1.1706298573251364, "grad_norm": 38.35167617624872, "learning_rate": 2e-06, "loss": 0.2736, "step": 5046 }, { "epoch": 1.170861848973437, "grad_norm": 22.72179482763607, "learning_rate": 2e-06, "loss": 0.3405, "step": 5047 }, { "epoch": 1.1710938406217377, "grad_norm": 12.249486716090715, "learning_rate": 2e-06, "loss": 0.1978, "step": 5048 }, { "epoch": 1.1713258322700382, "grad_norm": 9.266429827972644, "learning_rate": 2e-06, "loss": 0.2376, "step": 5049 }, { "epoch": 1.1715578239183388, "grad_norm": 18.193981352814255, "learning_rate": 2e-06, "loss": 0.3892, "step": 5050 }, { "epoch": 1.1717898155666395, "grad_norm": 15.126893755477953, "learning_rate": 2e-06, "loss": 0.2697, "step": 5051 }, { "epoch": 1.1720218072149402, "grad_norm": 11.528059200038399, "learning_rate": 2e-06, "loss": 0.2112, "step": 5052 }, { "epoch": 1.1722537988632409, "grad_norm": 5.696819551783051, "learning_rate": 2e-06, "loss": 0.1733, "step": 5053 }, { "epoch": 1.1724857905115416, "grad_norm": 21.68939285127513, "learning_rate": 2e-06, "loss": 0.238, "step": 5054 }, { "epoch": 1.1727177821598422, "grad_norm": 11.54426577532738, "learning_rate": 2e-06, "loss": 0.2835, "step": 5055 }, { "epoch": 1.172949773808143, "grad_norm": 16.36253134447032, "learning_rate": 2e-06, "loss": 0.3236, "step": 5056 }, { "epoch": 1.1731817654564436, "grad_norm": 8.142562005217773, "learning_rate": 2e-06, "loss": 0.212, "step": 5057 }, { "epoch": 1.1734137571047443, "grad_norm": 12.615320983816575, "learning_rate": 2e-06, "loss": 0.2282, "step": 5058 }, { "epoch": 1.173645748753045, "grad_norm": 12.514326523956044, "learning_rate": 2e-06, "loss": 0.1682, "step": 5059 }, { "epoch": 1.1738777404013456, "grad_norm": 12.633760376909049, "learning_rate": 2e-06, "loss": 0.2375, "step": 5060 }, { "epoch": 1.1741097320496463, "grad_norm": 18.721158363810936, "learning_rate": 2e-06, "loss": 0.3598, "step": 5061 }, { "epoch": 1.174341723697947, "grad_norm": 19.54568283304831, "learning_rate": 2e-06, "loss": 0.3662, "step": 5062 }, { "epoch": 1.1745737153462477, "grad_norm": 56.5685419514945, "learning_rate": 2e-06, "loss": 0.1762, "step": 5063 }, { "epoch": 1.1748057069945481, "grad_norm": 20.023831440292504, "learning_rate": 2e-06, "loss": 0.2206, "step": 5064 }, { "epoch": 1.1750376986428488, "grad_norm": 11.468361701197614, "learning_rate": 2e-06, "loss": 0.2195, "step": 5065 }, { "epoch": 1.1752696902911495, "grad_norm": 12.883223148041303, "learning_rate": 2e-06, "loss": 0.2525, "step": 5066 }, { "epoch": 1.1755016819394501, "grad_norm": 12.738172382017016, "learning_rate": 2e-06, "loss": 0.3331, "step": 5067 }, { "epoch": 1.1757336735877508, "grad_norm": 15.362041684758266, "learning_rate": 2e-06, "loss": 0.3555, "step": 5068 }, { "epoch": 1.1759656652360515, "grad_norm": 12.414739283145341, "learning_rate": 2e-06, "loss": 0.1942, "step": 5069 }, { "epoch": 1.1761976568843522, "grad_norm": 17.904270630927922, "learning_rate": 2e-06, "loss": 0.2559, "step": 5070 }, { "epoch": 1.1764296485326529, "grad_norm": 13.538195283396922, "learning_rate": 2e-06, "loss": 0.2464, "step": 5071 }, { "epoch": 1.1766616401809535, "grad_norm": 13.422021738421487, "learning_rate": 2e-06, "loss": 0.2749, "step": 5072 }, { "epoch": 1.1768936318292542, "grad_norm": 22.53580094618496, "learning_rate": 2e-06, "loss": 0.243, "step": 5073 }, { "epoch": 1.177125623477555, "grad_norm": 11.55881963636086, "learning_rate": 2e-06, "loss": 0.2796, "step": 5074 }, { "epoch": 1.1773576151258554, "grad_norm": 10.049045185397313, "learning_rate": 2e-06, "loss": 0.1981, "step": 5075 }, { "epoch": 1.177589606774156, "grad_norm": 22.65546336238449, "learning_rate": 2e-06, "loss": 0.2988, "step": 5076 }, { "epoch": 1.1778215984224567, "grad_norm": 13.048630873201166, "learning_rate": 2e-06, "loss": 0.2681, "step": 5077 }, { "epoch": 1.1780535900707574, "grad_norm": 13.900332711139294, "learning_rate": 2e-06, "loss": 0.2521, "step": 5078 }, { "epoch": 1.178285581719058, "grad_norm": 11.32114169357351, "learning_rate": 2e-06, "loss": 0.2488, "step": 5079 }, { "epoch": 1.1785175733673587, "grad_norm": 24.06534770278926, "learning_rate": 2e-06, "loss": 0.4219, "step": 5080 }, { "epoch": 1.1787495650156594, "grad_norm": 20.324166681833166, "learning_rate": 2e-06, "loss": 0.249, "step": 5081 }, { "epoch": 1.17898155666396, "grad_norm": 10.903753798413435, "learning_rate": 2e-06, "loss": 0.2103, "step": 5082 }, { "epoch": 1.1792135483122608, "grad_norm": 29.388568864614136, "learning_rate": 2e-06, "loss": 0.4449, "step": 5083 }, { "epoch": 1.1794455399605615, "grad_norm": 12.80463042132413, "learning_rate": 2e-06, "loss": 0.2395, "step": 5084 }, { "epoch": 1.1796775316088621, "grad_norm": 21.144388392722167, "learning_rate": 2e-06, "loss": 0.3401, "step": 5085 }, { "epoch": 1.1799095232571628, "grad_norm": 12.57877912456603, "learning_rate": 2e-06, "loss": 0.2803, "step": 5086 }, { "epoch": 1.1801415149054635, "grad_norm": 12.110198711063557, "learning_rate": 2e-06, "loss": 0.192, "step": 5087 }, { "epoch": 1.1803735065537642, "grad_norm": 9.479556009422616, "learning_rate": 2e-06, "loss": 0.2175, "step": 5088 }, { "epoch": 1.1806054982020648, "grad_norm": 9.087753336527165, "learning_rate": 2e-06, "loss": 0.1777, "step": 5089 }, { "epoch": 1.1808374898503653, "grad_norm": 16.533214839801566, "learning_rate": 2e-06, "loss": 0.2729, "step": 5090 }, { "epoch": 1.181069481498666, "grad_norm": 20.021190380675282, "learning_rate": 2e-06, "loss": 0.3022, "step": 5091 }, { "epoch": 1.1813014731469667, "grad_norm": 19.108612020028357, "learning_rate": 2e-06, "loss": 0.3042, "step": 5092 }, { "epoch": 1.1815334647952673, "grad_norm": 13.03954712661226, "learning_rate": 2e-06, "loss": 0.1961, "step": 5093 }, { "epoch": 1.181765456443568, "grad_norm": 13.956540532981451, "learning_rate": 2e-06, "loss": 0.277, "step": 5094 }, { "epoch": 1.1819974480918687, "grad_norm": 16.498525718064315, "learning_rate": 2e-06, "loss": 0.1667, "step": 5095 }, { "epoch": 1.1822294397401694, "grad_norm": 24.182252375905538, "learning_rate": 2e-06, "loss": 0.3496, "step": 5096 }, { "epoch": 1.18246143138847, "grad_norm": 15.170383802626947, "learning_rate": 2e-06, "loss": 0.308, "step": 5097 }, { "epoch": 1.1826934230367707, "grad_norm": 12.57381961517264, "learning_rate": 2e-06, "loss": 0.2234, "step": 5098 }, { "epoch": 1.1829254146850714, "grad_norm": 17.774575808972216, "learning_rate": 2e-06, "loss": 0.3411, "step": 5099 }, { "epoch": 1.183157406333372, "grad_norm": 12.241022832388117, "learning_rate": 2e-06, "loss": 0.3423, "step": 5100 }, { "epoch": 1.1833893979816728, "grad_norm": 13.921320685454633, "learning_rate": 2e-06, "loss": 0.1748, "step": 5101 }, { "epoch": 1.1836213896299732, "grad_norm": 11.853467367859825, "learning_rate": 2e-06, "loss": 0.17, "step": 5102 }, { "epoch": 1.1838533812782739, "grad_norm": 14.718381089627021, "learning_rate": 2e-06, "loss": 0.2199, "step": 5103 }, { "epoch": 1.1840853729265746, "grad_norm": 14.749166426120494, "learning_rate": 2e-06, "loss": 0.277, "step": 5104 }, { "epoch": 1.1843173645748752, "grad_norm": 11.982207117697797, "learning_rate": 2e-06, "loss": 0.2446, "step": 5105 }, { "epoch": 1.184549356223176, "grad_norm": 10.943136557718498, "learning_rate": 2e-06, "loss": 0.284, "step": 5106 }, { "epoch": 1.1847813478714766, "grad_norm": 12.564714406437187, "learning_rate": 2e-06, "loss": 0.1975, "step": 5107 }, { "epoch": 1.1850133395197773, "grad_norm": 16.674037184874972, "learning_rate": 2e-06, "loss": 0.2302, "step": 5108 }, { "epoch": 1.185245331168078, "grad_norm": 21.112893619492684, "learning_rate": 2e-06, "loss": 0.4166, "step": 5109 }, { "epoch": 1.1854773228163786, "grad_norm": 16.847327105118747, "learning_rate": 2e-06, "loss": 0.259, "step": 5110 }, { "epoch": 1.1857093144646793, "grad_norm": 15.396429450961502, "learning_rate": 2e-06, "loss": 0.2732, "step": 5111 }, { "epoch": 1.18594130611298, "grad_norm": 15.041230182302556, "learning_rate": 2e-06, "loss": 0.2603, "step": 5112 }, { "epoch": 1.1861732977612807, "grad_norm": 11.679276575944929, "learning_rate": 2e-06, "loss": 0.2414, "step": 5113 }, { "epoch": 1.1864052894095813, "grad_norm": 17.23626301583967, "learning_rate": 2e-06, "loss": 0.2465, "step": 5114 }, { "epoch": 1.186637281057882, "grad_norm": 10.387629871060861, "learning_rate": 2e-06, "loss": 0.2685, "step": 5115 }, { "epoch": 1.1868692727061827, "grad_norm": 13.583743595609374, "learning_rate": 2e-06, "loss": 0.2684, "step": 5116 }, { "epoch": 1.1871012643544832, "grad_norm": 18.0518869085287, "learning_rate": 2e-06, "loss": 0.1706, "step": 5117 }, { "epoch": 1.1873332560027838, "grad_norm": 8.292611403006696, "learning_rate": 2e-06, "loss": 0.1672, "step": 5118 }, { "epoch": 1.1875652476510845, "grad_norm": 9.030629924098347, "learning_rate": 2e-06, "loss": 0.2098, "step": 5119 }, { "epoch": 1.1877972392993852, "grad_norm": 22.190945386452597, "learning_rate": 2e-06, "loss": 0.4378, "step": 5120 }, { "epoch": 1.1880292309476859, "grad_norm": 14.062678153543576, "learning_rate": 2e-06, "loss": 0.2778, "step": 5121 }, { "epoch": 1.1882612225959865, "grad_norm": 9.700629503739886, "learning_rate": 2e-06, "loss": 0.3183, "step": 5122 }, { "epoch": 1.1884932142442872, "grad_norm": 8.273146258477006, "learning_rate": 2e-06, "loss": 0.2306, "step": 5123 }, { "epoch": 1.188725205892588, "grad_norm": 11.615305784614497, "learning_rate": 2e-06, "loss": 0.2241, "step": 5124 }, { "epoch": 1.1889571975408886, "grad_norm": 9.89918448079035, "learning_rate": 2e-06, "loss": 0.1862, "step": 5125 }, { "epoch": 1.1891891891891893, "grad_norm": 16.405532140936124, "learning_rate": 2e-06, "loss": 0.2831, "step": 5126 }, { "epoch": 1.18942118083749, "grad_norm": 25.228975723271233, "learning_rate": 2e-06, "loss": 0.3914, "step": 5127 }, { "epoch": 1.1896531724857904, "grad_norm": 18.17180925808904, "learning_rate": 2e-06, "loss": 0.2368, "step": 5128 }, { "epoch": 1.189885164134091, "grad_norm": 8.534850939568784, "learning_rate": 2e-06, "loss": 0.2791, "step": 5129 }, { "epoch": 1.1901171557823917, "grad_norm": 20.27568382310745, "learning_rate": 2e-06, "loss": 0.3322, "step": 5130 }, { "epoch": 1.1903491474306924, "grad_norm": 14.323277855853348, "learning_rate": 2e-06, "loss": 0.3306, "step": 5131 }, { "epoch": 1.190581139078993, "grad_norm": 7.469000808934431, "learning_rate": 2e-06, "loss": 0.1893, "step": 5132 }, { "epoch": 1.1908131307272938, "grad_norm": 12.340451919803156, "learning_rate": 2e-06, "loss": 0.2664, "step": 5133 }, { "epoch": 1.1910451223755945, "grad_norm": 15.508648743070854, "learning_rate": 2e-06, "loss": 0.2753, "step": 5134 }, { "epoch": 1.1912771140238951, "grad_norm": 23.80622709464657, "learning_rate": 2e-06, "loss": 0.3063, "step": 5135 }, { "epoch": 1.1915091056721958, "grad_norm": 16.937254153485966, "learning_rate": 2e-06, "loss": 0.2666, "step": 5136 }, { "epoch": 1.1917410973204965, "grad_norm": 9.110017246897536, "learning_rate": 2e-06, "loss": 0.17, "step": 5137 }, { "epoch": 1.1919730889687972, "grad_norm": 13.043804133984896, "learning_rate": 2e-06, "loss": 0.3281, "step": 5138 }, { "epoch": 1.1922050806170978, "grad_norm": 8.90342892280623, "learning_rate": 2e-06, "loss": 0.1846, "step": 5139 }, { "epoch": 1.1924370722653985, "grad_norm": 7.969223964850742, "learning_rate": 2e-06, "loss": 0.1504, "step": 5140 }, { "epoch": 1.1926690639136992, "grad_norm": 13.442988145448872, "learning_rate": 2e-06, "loss": 0.2254, "step": 5141 }, { "epoch": 1.1929010555619999, "grad_norm": 18.190046733000564, "learning_rate": 2e-06, "loss": 0.148, "step": 5142 }, { "epoch": 1.1931330472103003, "grad_norm": 11.28922782319458, "learning_rate": 2e-06, "loss": 0.2086, "step": 5143 }, { "epoch": 1.193365038858601, "grad_norm": 22.17112011918985, "learning_rate": 2e-06, "loss": 0.3036, "step": 5144 }, { "epoch": 1.1935970305069017, "grad_norm": 14.839295424960572, "learning_rate": 2e-06, "loss": 0.2228, "step": 5145 }, { "epoch": 1.1938290221552024, "grad_norm": 12.869616829651251, "learning_rate": 2e-06, "loss": 0.189, "step": 5146 }, { "epoch": 1.194061013803503, "grad_norm": 15.443605579905961, "learning_rate": 2e-06, "loss": 0.2418, "step": 5147 }, { "epoch": 1.1942930054518037, "grad_norm": 21.87299521722901, "learning_rate": 2e-06, "loss": 0.2444, "step": 5148 }, { "epoch": 1.1945249971001044, "grad_norm": 9.004184331024451, "learning_rate": 2e-06, "loss": 0.1728, "step": 5149 }, { "epoch": 1.194756988748405, "grad_norm": 12.551599524000297, "learning_rate": 2e-06, "loss": 0.2258, "step": 5150 }, { "epoch": 1.1949889803967058, "grad_norm": 29.857523497550893, "learning_rate": 2e-06, "loss": 0.3028, "step": 5151 }, { "epoch": 1.1952209720450064, "grad_norm": 12.085422933856577, "learning_rate": 2e-06, "loss": 0.2088, "step": 5152 }, { "epoch": 1.1954529636933071, "grad_norm": 17.68757686122845, "learning_rate": 2e-06, "loss": 0.3159, "step": 5153 }, { "epoch": 1.1956849553416078, "grad_norm": 18.730269953915307, "learning_rate": 2e-06, "loss": 0.2206, "step": 5154 }, { "epoch": 1.1959169469899082, "grad_norm": 21.30782403888487, "learning_rate": 2e-06, "loss": 0.2301, "step": 5155 }, { "epoch": 1.196148938638209, "grad_norm": 22.47097003616981, "learning_rate": 2e-06, "loss": 0.3451, "step": 5156 }, { "epoch": 1.1963809302865096, "grad_norm": 15.510565862769823, "learning_rate": 2e-06, "loss": 0.3132, "step": 5157 }, { "epoch": 1.1966129219348103, "grad_norm": 15.18217102573386, "learning_rate": 2e-06, "loss": 0.2763, "step": 5158 }, { "epoch": 1.196844913583111, "grad_norm": 8.803567230647891, "learning_rate": 2e-06, "loss": 0.1541, "step": 5159 }, { "epoch": 1.1970769052314116, "grad_norm": 13.327744557640425, "learning_rate": 2e-06, "loss": 0.1522, "step": 5160 }, { "epoch": 1.1973088968797123, "grad_norm": 18.90690605913973, "learning_rate": 2e-06, "loss": 0.2968, "step": 5161 }, { "epoch": 1.197540888528013, "grad_norm": 12.32255655411487, "learning_rate": 2e-06, "loss": 0.2587, "step": 5162 }, { "epoch": 1.1977728801763137, "grad_norm": 11.8652432225982, "learning_rate": 2e-06, "loss": 0.1902, "step": 5163 }, { "epoch": 1.1980048718246143, "grad_norm": 16.73378327149659, "learning_rate": 2e-06, "loss": 0.2478, "step": 5164 }, { "epoch": 1.198236863472915, "grad_norm": 11.562678414791206, "learning_rate": 2e-06, "loss": 0.1989, "step": 5165 }, { "epoch": 1.1984688551212157, "grad_norm": 10.397222114298854, "learning_rate": 2e-06, "loss": 0.2023, "step": 5166 }, { "epoch": 1.1987008467695164, "grad_norm": 42.61136099362814, "learning_rate": 2e-06, "loss": 0.3812, "step": 5167 }, { "epoch": 1.198932838417817, "grad_norm": 7.762564277411292, "learning_rate": 2e-06, "loss": 0.1743, "step": 5168 }, { "epoch": 1.1991648300661177, "grad_norm": 19.320174208023595, "learning_rate": 2e-06, "loss": 0.2735, "step": 5169 }, { "epoch": 1.1993968217144182, "grad_norm": 9.33982545876157, "learning_rate": 2e-06, "loss": 0.1582, "step": 5170 }, { "epoch": 1.1996288133627189, "grad_norm": 24.12096917597885, "learning_rate": 2e-06, "loss": 0.269, "step": 5171 }, { "epoch": 1.1998608050110195, "grad_norm": 14.189468767579216, "learning_rate": 2e-06, "loss": 0.2206, "step": 5172 }, { "epoch": 1.2000927966593202, "grad_norm": 15.64630259236317, "learning_rate": 2e-06, "loss": 0.2856, "step": 5173 }, { "epoch": 1.200324788307621, "grad_norm": 11.43412902266011, "learning_rate": 2e-06, "loss": 0.1606, "step": 5174 }, { "epoch": 1.2005567799559216, "grad_norm": 17.885516797052404, "learning_rate": 2e-06, "loss": 0.2469, "step": 5175 }, { "epoch": 1.2007887716042223, "grad_norm": 13.97343455823514, "learning_rate": 2e-06, "loss": 0.2441, "step": 5176 }, { "epoch": 1.201020763252523, "grad_norm": 21.132176964097027, "learning_rate": 2e-06, "loss": 0.1858, "step": 5177 }, { "epoch": 1.2012527549008236, "grad_norm": 16.177779257561756, "learning_rate": 2e-06, "loss": 0.2804, "step": 5178 }, { "epoch": 1.2014847465491243, "grad_norm": 15.237446241231124, "learning_rate": 2e-06, "loss": 0.3138, "step": 5179 }, { "epoch": 1.201716738197425, "grad_norm": 15.410025662753815, "learning_rate": 2e-06, "loss": 0.272, "step": 5180 }, { "epoch": 1.2019487298457256, "grad_norm": 24.874371348199457, "learning_rate": 2e-06, "loss": 0.2788, "step": 5181 }, { "epoch": 1.202180721494026, "grad_norm": 18.610897292084417, "learning_rate": 2e-06, "loss": 0.2966, "step": 5182 }, { "epoch": 1.2024127131423268, "grad_norm": 18.36018014980279, "learning_rate": 2e-06, "loss": 0.2466, "step": 5183 }, { "epoch": 1.2026447047906275, "grad_norm": 20.29533914354666, "learning_rate": 2e-06, "loss": 0.279, "step": 5184 }, { "epoch": 1.2028766964389281, "grad_norm": 15.975613795013066, "learning_rate": 2e-06, "loss": 0.2532, "step": 5185 }, { "epoch": 1.2031086880872288, "grad_norm": 10.660371213800708, "learning_rate": 2e-06, "loss": 0.2077, "step": 5186 }, { "epoch": 1.2033406797355295, "grad_norm": 12.96893264451673, "learning_rate": 2e-06, "loss": 0.2392, "step": 5187 }, { "epoch": 1.2035726713838302, "grad_norm": 15.938185909799879, "learning_rate": 2e-06, "loss": 0.2569, "step": 5188 }, { "epoch": 1.2038046630321309, "grad_norm": 15.88553069087415, "learning_rate": 2e-06, "loss": 0.2742, "step": 5189 }, { "epoch": 1.2040366546804315, "grad_norm": 17.130342950677043, "learning_rate": 2e-06, "loss": 0.2267, "step": 5190 }, { "epoch": 1.2042686463287322, "grad_norm": 14.692193200428905, "learning_rate": 2e-06, "loss": 0.2488, "step": 5191 }, { "epoch": 1.2045006379770329, "grad_norm": 20.5376558559371, "learning_rate": 2e-06, "loss": 0.2569, "step": 5192 }, { "epoch": 1.2047326296253336, "grad_norm": 12.547370979534392, "learning_rate": 2e-06, "loss": 0.1669, "step": 5193 }, { "epoch": 1.2049646212736342, "grad_norm": 20.732742991471042, "learning_rate": 2e-06, "loss": 0.4357, "step": 5194 }, { "epoch": 1.205196612921935, "grad_norm": 16.429927193275017, "learning_rate": 2e-06, "loss": 0.2503, "step": 5195 }, { "epoch": 1.2054286045702356, "grad_norm": 11.049573146865242, "learning_rate": 2e-06, "loss": 0.1848, "step": 5196 }, { "epoch": 1.205660596218536, "grad_norm": 14.675234242172602, "learning_rate": 2e-06, "loss": 0.2755, "step": 5197 }, { "epoch": 1.2058925878668367, "grad_norm": 15.300604270336887, "learning_rate": 2e-06, "loss": 0.317, "step": 5198 }, { "epoch": 1.2061245795151374, "grad_norm": 9.036597914764565, "learning_rate": 2e-06, "loss": 0.1548, "step": 5199 }, { "epoch": 1.206356571163438, "grad_norm": 12.011136214657867, "learning_rate": 2e-06, "loss": 0.1626, "step": 5200 }, { "epoch": 1.2065885628117388, "grad_norm": 16.154875873107986, "learning_rate": 2e-06, "loss": 0.2903, "step": 5201 }, { "epoch": 1.2068205544600394, "grad_norm": 17.69607289312409, "learning_rate": 2e-06, "loss": 0.2915, "step": 5202 }, { "epoch": 1.2070525461083401, "grad_norm": 17.132895546179085, "learning_rate": 2e-06, "loss": 0.2851, "step": 5203 }, { "epoch": 1.2072845377566408, "grad_norm": 17.23651791207007, "learning_rate": 2e-06, "loss": 0.2503, "step": 5204 }, { "epoch": 1.2075165294049415, "grad_norm": 13.061251148229335, "learning_rate": 2e-06, "loss": 0.2728, "step": 5205 }, { "epoch": 1.2077485210532422, "grad_norm": 9.275794904296339, "learning_rate": 2e-06, "loss": 0.1567, "step": 5206 }, { "epoch": 1.2079805127015428, "grad_norm": 10.008621925169576, "learning_rate": 2e-06, "loss": 0.2422, "step": 5207 }, { "epoch": 1.2082125043498433, "grad_norm": 7.5606010156297, "learning_rate": 2e-06, "loss": 0.1861, "step": 5208 }, { "epoch": 1.208444495998144, "grad_norm": 13.35580662177829, "learning_rate": 2e-06, "loss": 0.3143, "step": 5209 }, { "epoch": 1.2086764876464446, "grad_norm": 13.10733565592071, "learning_rate": 2e-06, "loss": 0.1783, "step": 5210 }, { "epoch": 1.2089084792947453, "grad_norm": 7.70315944281134, "learning_rate": 2e-06, "loss": 0.131, "step": 5211 }, { "epoch": 1.209140470943046, "grad_norm": 11.866743411131484, "learning_rate": 2e-06, "loss": 0.2679, "step": 5212 }, { "epoch": 1.2093724625913467, "grad_norm": 22.510614560943893, "learning_rate": 2e-06, "loss": 0.2429, "step": 5213 }, { "epoch": 1.2096044542396474, "grad_norm": 10.310466738956148, "learning_rate": 2e-06, "loss": 0.2073, "step": 5214 }, { "epoch": 1.209836445887948, "grad_norm": 20.320785351962254, "learning_rate": 2e-06, "loss": 0.3057, "step": 5215 }, { "epoch": 1.2100684375362487, "grad_norm": 9.40601990023123, "learning_rate": 2e-06, "loss": 0.182, "step": 5216 }, { "epoch": 1.2103004291845494, "grad_norm": 13.741165780691722, "learning_rate": 2e-06, "loss": 0.2182, "step": 5217 }, { "epoch": 1.21053242083285, "grad_norm": 7.508991657284485, "learning_rate": 2e-06, "loss": 0.1946, "step": 5218 }, { "epoch": 1.2107644124811507, "grad_norm": 15.514854424391363, "learning_rate": 2e-06, "loss": 0.2248, "step": 5219 }, { "epoch": 1.2109964041294514, "grad_norm": 10.895204473041584, "learning_rate": 2e-06, "loss": 0.1865, "step": 5220 }, { "epoch": 1.211228395777752, "grad_norm": 19.450039982901867, "learning_rate": 2e-06, "loss": 0.219, "step": 5221 }, { "epoch": 1.2114603874260528, "grad_norm": 13.302386848635217, "learning_rate": 2e-06, "loss": 0.1976, "step": 5222 }, { "epoch": 1.2116923790743532, "grad_norm": 22.35231992069119, "learning_rate": 2e-06, "loss": 0.4383, "step": 5223 }, { "epoch": 1.211924370722654, "grad_norm": 16.850637258242692, "learning_rate": 2e-06, "loss": 0.1745, "step": 5224 }, { "epoch": 1.2121563623709546, "grad_norm": 7.775618491069661, "learning_rate": 2e-06, "loss": 0.1796, "step": 5225 }, { "epoch": 1.2123883540192553, "grad_norm": 26.089677514709958, "learning_rate": 2e-06, "loss": 0.3559, "step": 5226 }, { "epoch": 1.212620345667556, "grad_norm": 21.937358674413783, "learning_rate": 2e-06, "loss": 0.2087, "step": 5227 }, { "epoch": 1.2128523373158566, "grad_norm": 8.702241396578975, "learning_rate": 2e-06, "loss": 0.2015, "step": 5228 }, { "epoch": 1.2130843289641573, "grad_norm": 7.409840180631241, "learning_rate": 2e-06, "loss": 0.1414, "step": 5229 }, { "epoch": 1.213316320612458, "grad_norm": 19.95628813984114, "learning_rate": 2e-06, "loss": 0.3464, "step": 5230 }, { "epoch": 1.2135483122607587, "grad_norm": 17.23062502380466, "learning_rate": 2e-06, "loss": 0.2711, "step": 5231 }, { "epoch": 1.2137803039090593, "grad_norm": 20.833991635692122, "learning_rate": 2e-06, "loss": 0.4338, "step": 5232 }, { "epoch": 1.21401229555736, "grad_norm": 15.283066556761923, "learning_rate": 2e-06, "loss": 0.3238, "step": 5233 }, { "epoch": 1.2142442872056607, "grad_norm": 14.901162875963417, "learning_rate": 2e-06, "loss": 0.1931, "step": 5234 }, { "epoch": 1.2144762788539611, "grad_norm": 21.469509336173594, "learning_rate": 2e-06, "loss": 0.2316, "step": 5235 }, { "epoch": 1.2147082705022618, "grad_norm": 17.957005529775234, "learning_rate": 2e-06, "loss": 0.3315, "step": 5236 }, { "epoch": 1.2149402621505625, "grad_norm": 18.140735392836362, "learning_rate": 2e-06, "loss": 0.2248, "step": 5237 }, { "epoch": 1.2151722537988632, "grad_norm": 26.17420284310708, "learning_rate": 2e-06, "loss": 0.3931, "step": 5238 }, { "epoch": 1.2154042454471639, "grad_norm": 7.8289304917804055, "learning_rate": 2e-06, "loss": 0.1446, "step": 5239 }, { "epoch": 1.2156362370954645, "grad_norm": 18.23081021770153, "learning_rate": 2e-06, "loss": 0.1974, "step": 5240 }, { "epoch": 1.2158682287437652, "grad_norm": 18.589950781673117, "learning_rate": 2e-06, "loss": 0.2152, "step": 5241 }, { "epoch": 1.216100220392066, "grad_norm": 15.53438228175382, "learning_rate": 2e-06, "loss": 0.2284, "step": 5242 }, { "epoch": 1.2163322120403666, "grad_norm": 17.21491317910429, "learning_rate": 2e-06, "loss": 0.2799, "step": 5243 }, { "epoch": 1.2165642036886672, "grad_norm": 17.67007580606666, "learning_rate": 2e-06, "loss": 0.4263, "step": 5244 }, { "epoch": 1.216796195336968, "grad_norm": 16.006138323506924, "learning_rate": 2e-06, "loss": 0.2022, "step": 5245 }, { "epoch": 1.2170281869852686, "grad_norm": 12.50912584709491, "learning_rate": 2e-06, "loss": 0.2468, "step": 5246 }, { "epoch": 1.2172601786335693, "grad_norm": 19.46136469945236, "learning_rate": 2e-06, "loss": 0.3507, "step": 5247 }, { "epoch": 1.21749217028187, "grad_norm": 14.635269609314967, "learning_rate": 2e-06, "loss": 0.2371, "step": 5248 }, { "epoch": 1.2177241619301706, "grad_norm": 13.798822892634078, "learning_rate": 2e-06, "loss": 0.218, "step": 5249 }, { "epoch": 1.217956153578471, "grad_norm": 27.779776858612273, "learning_rate": 2e-06, "loss": 0.5181, "step": 5250 }, { "epoch": 1.2181881452267718, "grad_norm": 15.34299875845018, "learning_rate": 2e-06, "loss": 0.3343, "step": 5251 }, { "epoch": 1.2184201368750724, "grad_norm": 18.417075663930053, "learning_rate": 2e-06, "loss": 0.2751, "step": 5252 }, { "epoch": 1.2186521285233731, "grad_norm": 18.78961744953273, "learning_rate": 2e-06, "loss": 0.3231, "step": 5253 }, { "epoch": 1.2188841201716738, "grad_norm": 13.46130141911124, "learning_rate": 2e-06, "loss": 0.2129, "step": 5254 }, { "epoch": 1.2191161118199745, "grad_norm": 11.833100567305175, "learning_rate": 2e-06, "loss": 0.3017, "step": 5255 }, { "epoch": 1.2193481034682752, "grad_norm": 7.404813410538326, "learning_rate": 2e-06, "loss": 0.2085, "step": 5256 }, { "epoch": 1.2195800951165758, "grad_norm": 27.538389014789562, "learning_rate": 2e-06, "loss": 0.3444, "step": 5257 }, { "epoch": 1.2198120867648765, "grad_norm": 14.48501764302651, "learning_rate": 2e-06, "loss": 0.265, "step": 5258 }, { "epoch": 1.2200440784131772, "grad_norm": 15.734888066600165, "learning_rate": 2e-06, "loss": 0.2839, "step": 5259 }, { "epoch": 1.2202760700614779, "grad_norm": 14.022442780422809, "learning_rate": 2e-06, "loss": 0.3132, "step": 5260 }, { "epoch": 1.2205080617097783, "grad_norm": 15.731696974707647, "learning_rate": 2e-06, "loss": 0.1934, "step": 5261 }, { "epoch": 1.220740053358079, "grad_norm": 12.422595153554099, "learning_rate": 2e-06, "loss": 0.2521, "step": 5262 }, { "epoch": 1.2209720450063797, "grad_norm": 15.45177514885588, "learning_rate": 2e-06, "loss": 0.2583, "step": 5263 }, { "epoch": 1.2212040366546804, "grad_norm": 14.29565801586227, "learning_rate": 2e-06, "loss": 0.2793, "step": 5264 }, { "epoch": 1.221436028302981, "grad_norm": 15.599363130704816, "learning_rate": 2e-06, "loss": 0.2823, "step": 5265 }, { "epoch": 1.2216680199512817, "grad_norm": 15.440296781675615, "learning_rate": 2e-06, "loss": 0.3655, "step": 5266 }, { "epoch": 1.2219000115995824, "grad_norm": 13.407628310415342, "learning_rate": 2e-06, "loss": 0.2767, "step": 5267 }, { "epoch": 1.222132003247883, "grad_norm": 10.727707106075549, "learning_rate": 2e-06, "loss": 0.1716, "step": 5268 }, { "epoch": 1.2223639948961837, "grad_norm": 10.130188590250924, "learning_rate": 2e-06, "loss": 0.2332, "step": 5269 }, { "epoch": 1.2225959865444844, "grad_norm": 14.498875997948055, "learning_rate": 2e-06, "loss": 0.278, "step": 5270 }, { "epoch": 1.222827978192785, "grad_norm": 8.002068878752675, "learning_rate": 2e-06, "loss": 0.2471, "step": 5271 }, { "epoch": 1.2230599698410858, "grad_norm": 11.603943189818388, "learning_rate": 2e-06, "loss": 0.2822, "step": 5272 }, { "epoch": 1.2232919614893865, "grad_norm": 8.812245906502385, "learning_rate": 2e-06, "loss": 0.173, "step": 5273 }, { "epoch": 1.2235239531376871, "grad_norm": 21.6674746111459, "learning_rate": 2e-06, "loss": 0.2749, "step": 5274 }, { "epoch": 1.2237559447859878, "grad_norm": 21.08493131129624, "learning_rate": 2e-06, "loss": 0.3471, "step": 5275 }, { "epoch": 1.2239879364342883, "grad_norm": 16.69822828663652, "learning_rate": 2e-06, "loss": 0.2287, "step": 5276 }, { "epoch": 1.224219928082589, "grad_norm": 10.561775512795705, "learning_rate": 2e-06, "loss": 0.1804, "step": 5277 }, { "epoch": 1.2244519197308896, "grad_norm": 28.678018135887456, "learning_rate": 2e-06, "loss": 0.3214, "step": 5278 }, { "epoch": 1.2246839113791903, "grad_norm": 28.824433534080182, "learning_rate": 2e-06, "loss": 0.2695, "step": 5279 }, { "epoch": 1.224915903027491, "grad_norm": 14.031124142089942, "learning_rate": 2e-06, "loss": 0.2114, "step": 5280 }, { "epoch": 1.2251478946757917, "grad_norm": 17.859482295756116, "learning_rate": 2e-06, "loss": 0.226, "step": 5281 }, { "epoch": 1.2253798863240923, "grad_norm": 16.643741572523084, "learning_rate": 2e-06, "loss": 0.1829, "step": 5282 }, { "epoch": 1.225611877972393, "grad_norm": 37.07114976670886, "learning_rate": 2e-06, "loss": 0.232, "step": 5283 }, { "epoch": 1.2258438696206937, "grad_norm": 20.04040061838349, "learning_rate": 2e-06, "loss": 0.4911, "step": 5284 }, { "epoch": 1.2260758612689944, "grad_norm": 11.54246641036162, "learning_rate": 2e-06, "loss": 0.2101, "step": 5285 }, { "epoch": 1.226307852917295, "grad_norm": 16.36165634216338, "learning_rate": 2e-06, "loss": 0.2945, "step": 5286 }, { "epoch": 1.2265398445655957, "grad_norm": 19.920970194021628, "learning_rate": 2e-06, "loss": 0.2737, "step": 5287 }, { "epoch": 1.2267718362138962, "grad_norm": 17.9952410931923, "learning_rate": 2e-06, "loss": 0.3436, "step": 5288 }, { "epoch": 1.2270038278621969, "grad_norm": 11.289988956344413, "learning_rate": 2e-06, "loss": 0.2206, "step": 5289 }, { "epoch": 1.2272358195104975, "grad_norm": 13.383557975275709, "learning_rate": 2e-06, "loss": 0.2419, "step": 5290 }, { "epoch": 1.2274678111587982, "grad_norm": 22.369959716146592, "learning_rate": 2e-06, "loss": 0.2851, "step": 5291 }, { "epoch": 1.227699802807099, "grad_norm": 12.843101286087245, "learning_rate": 2e-06, "loss": 0.2112, "step": 5292 }, { "epoch": 1.2279317944553996, "grad_norm": 9.344779922844397, "learning_rate": 2e-06, "loss": 0.267, "step": 5293 }, { "epoch": 1.2281637861037003, "grad_norm": 12.59966525810847, "learning_rate": 2e-06, "loss": 0.295, "step": 5294 }, { "epoch": 1.228395777752001, "grad_norm": 14.793820877515458, "learning_rate": 2e-06, "loss": 0.2337, "step": 5295 }, { "epoch": 1.2286277694003016, "grad_norm": 9.697869595422336, "learning_rate": 2e-06, "loss": 0.1847, "step": 5296 }, { "epoch": 1.2288597610486023, "grad_norm": 12.580748125443968, "learning_rate": 2e-06, "loss": 0.211, "step": 5297 }, { "epoch": 1.229091752696903, "grad_norm": 8.40386513019791, "learning_rate": 2e-06, "loss": 0.1698, "step": 5298 }, { "epoch": 1.2293237443452036, "grad_norm": 7.596042516068372, "learning_rate": 2e-06, "loss": 0.2155, "step": 5299 }, { "epoch": 1.2295557359935043, "grad_norm": 17.352822181947214, "learning_rate": 2e-06, "loss": 0.3248, "step": 5300 }, { "epoch": 1.229787727641805, "grad_norm": 14.722065240431393, "learning_rate": 2e-06, "loss": 0.2228, "step": 5301 }, { "epoch": 1.2300197192901057, "grad_norm": 8.723387036606349, "learning_rate": 2e-06, "loss": 0.1781, "step": 5302 }, { "epoch": 1.2302517109384061, "grad_norm": 15.040416722840469, "learning_rate": 2e-06, "loss": 0.2241, "step": 5303 }, { "epoch": 1.2304837025867068, "grad_norm": 13.198481853039846, "learning_rate": 2e-06, "loss": 0.234, "step": 5304 }, { "epoch": 1.2307156942350075, "grad_norm": 9.054704316573181, "learning_rate": 2e-06, "loss": 0.2446, "step": 5305 }, { "epoch": 1.2309476858833082, "grad_norm": 16.080836154342443, "learning_rate": 2e-06, "loss": 0.2978, "step": 5306 }, { "epoch": 1.2311796775316088, "grad_norm": 9.419831444878099, "learning_rate": 2e-06, "loss": 0.1617, "step": 5307 }, { "epoch": 1.2314116691799095, "grad_norm": 13.487378036543648, "learning_rate": 2e-06, "loss": 0.1953, "step": 5308 }, { "epoch": 1.2316436608282102, "grad_norm": 13.178197224965563, "learning_rate": 2e-06, "loss": 0.3204, "step": 5309 }, { "epoch": 1.2318756524765109, "grad_norm": 15.681660473782781, "learning_rate": 2e-06, "loss": 0.3323, "step": 5310 }, { "epoch": 1.2321076441248116, "grad_norm": 25.239516257597224, "learning_rate": 2e-06, "loss": 0.3799, "step": 5311 }, { "epoch": 1.2323396357731122, "grad_norm": 8.244385037922948, "learning_rate": 2e-06, "loss": 0.1961, "step": 5312 }, { "epoch": 1.232571627421413, "grad_norm": 12.190565719041365, "learning_rate": 2e-06, "loss": 0.311, "step": 5313 }, { "epoch": 1.2328036190697136, "grad_norm": 9.850539308026018, "learning_rate": 2e-06, "loss": 0.2206, "step": 5314 }, { "epoch": 1.233035610718014, "grad_norm": 14.153920435151814, "learning_rate": 2e-06, "loss": 0.3262, "step": 5315 }, { "epoch": 1.2332676023663147, "grad_norm": 13.520522643805842, "learning_rate": 2e-06, "loss": 0.1809, "step": 5316 }, { "epoch": 1.2334995940146154, "grad_norm": 19.47076432215645, "learning_rate": 2e-06, "loss": 0.2154, "step": 5317 }, { "epoch": 1.233731585662916, "grad_norm": 11.485832864111378, "learning_rate": 2e-06, "loss": 0.2006, "step": 5318 }, { "epoch": 1.2339635773112168, "grad_norm": 20.080611120412676, "learning_rate": 2e-06, "loss": 0.316, "step": 5319 }, { "epoch": 1.2341955689595174, "grad_norm": 8.779411883202771, "learning_rate": 2e-06, "loss": 0.1902, "step": 5320 }, { "epoch": 1.234427560607818, "grad_norm": 16.488788887028306, "learning_rate": 2e-06, "loss": 0.2165, "step": 5321 }, { "epoch": 1.2346595522561188, "grad_norm": 11.51692899731653, "learning_rate": 2e-06, "loss": 0.258, "step": 5322 }, { "epoch": 1.2348915439044195, "grad_norm": 17.678905519432654, "learning_rate": 2e-06, "loss": 0.3973, "step": 5323 }, { "epoch": 1.2351235355527201, "grad_norm": 11.581841331604895, "learning_rate": 2e-06, "loss": 0.2456, "step": 5324 }, { "epoch": 1.2353555272010208, "grad_norm": 18.37405604865033, "learning_rate": 2e-06, "loss": 0.3861, "step": 5325 }, { "epoch": 1.2355875188493215, "grad_norm": 15.130367359648647, "learning_rate": 2e-06, "loss": 0.4564, "step": 5326 }, { "epoch": 1.2358195104976222, "grad_norm": 14.957660846372246, "learning_rate": 2e-06, "loss": 0.2367, "step": 5327 }, { "epoch": 1.2360515021459229, "grad_norm": 10.56472444321113, "learning_rate": 2e-06, "loss": 0.197, "step": 5328 }, { "epoch": 1.2362834937942235, "grad_norm": 13.940692025178311, "learning_rate": 2e-06, "loss": 0.1999, "step": 5329 }, { "epoch": 1.236515485442524, "grad_norm": 11.270627604067421, "learning_rate": 2e-06, "loss": 0.278, "step": 5330 }, { "epoch": 1.2367474770908247, "grad_norm": 16.92028277449169, "learning_rate": 2e-06, "loss": 0.2766, "step": 5331 }, { "epoch": 1.2369794687391253, "grad_norm": 26.307067518093277, "learning_rate": 2e-06, "loss": 0.3855, "step": 5332 }, { "epoch": 1.237211460387426, "grad_norm": 21.92979348271037, "learning_rate": 2e-06, "loss": 0.3515, "step": 5333 }, { "epoch": 1.2374434520357267, "grad_norm": 11.581362836366731, "learning_rate": 2e-06, "loss": 0.2065, "step": 5334 }, { "epoch": 1.2376754436840274, "grad_norm": 11.930440029646041, "learning_rate": 2e-06, "loss": 0.1782, "step": 5335 }, { "epoch": 1.237907435332328, "grad_norm": 17.013879424895844, "learning_rate": 2e-06, "loss": 0.2203, "step": 5336 }, { "epoch": 1.2381394269806287, "grad_norm": 7.1496544046508195, "learning_rate": 2e-06, "loss": 0.1477, "step": 5337 }, { "epoch": 1.2383714186289294, "grad_norm": 15.173156812522874, "learning_rate": 2e-06, "loss": 0.253, "step": 5338 }, { "epoch": 1.23860341027723, "grad_norm": 13.382184341323773, "learning_rate": 2e-06, "loss": 0.2531, "step": 5339 }, { "epoch": 1.2388354019255308, "grad_norm": 10.946003794488881, "learning_rate": 2e-06, "loss": 0.2282, "step": 5340 }, { "epoch": 1.2390673935738312, "grad_norm": 10.11429496594036, "learning_rate": 2e-06, "loss": 0.2194, "step": 5341 }, { "epoch": 1.239299385222132, "grad_norm": 12.266409181966331, "learning_rate": 2e-06, "loss": 0.1854, "step": 5342 }, { "epoch": 1.2395313768704326, "grad_norm": 14.34239606199122, "learning_rate": 2e-06, "loss": 0.2947, "step": 5343 }, { "epoch": 1.2397633685187333, "grad_norm": 17.562564624110667, "learning_rate": 2e-06, "loss": 0.1894, "step": 5344 }, { "epoch": 1.239995360167034, "grad_norm": 13.432435067887818, "learning_rate": 2e-06, "loss": 0.2164, "step": 5345 }, { "epoch": 1.2402273518153346, "grad_norm": 22.792988260322883, "learning_rate": 2e-06, "loss": 0.3237, "step": 5346 }, { "epoch": 1.2404593434636353, "grad_norm": 11.917086324715964, "learning_rate": 2e-06, "loss": 0.2028, "step": 5347 }, { "epoch": 1.240691335111936, "grad_norm": 18.645434561808916, "learning_rate": 2e-06, "loss": 0.3638, "step": 5348 }, { "epoch": 1.2409233267602366, "grad_norm": 14.115886033627724, "learning_rate": 2e-06, "loss": 0.1957, "step": 5349 }, { "epoch": 1.2411553184085373, "grad_norm": 11.39626873072166, "learning_rate": 2e-06, "loss": 0.2367, "step": 5350 }, { "epoch": 1.241387310056838, "grad_norm": 18.343971576177992, "learning_rate": 2e-06, "loss": 0.301, "step": 5351 }, { "epoch": 1.2416193017051387, "grad_norm": 14.328369898087471, "learning_rate": 2e-06, "loss": 0.196, "step": 5352 }, { "epoch": 1.2418512933534394, "grad_norm": 23.31079139215547, "learning_rate": 2e-06, "loss": 0.3009, "step": 5353 }, { "epoch": 1.24208328500174, "grad_norm": 15.595543149206021, "learning_rate": 2e-06, "loss": 0.2458, "step": 5354 }, { "epoch": 1.2423152766500407, "grad_norm": 12.699706007266634, "learning_rate": 2e-06, "loss": 0.2639, "step": 5355 }, { "epoch": 1.2425472682983412, "grad_norm": 17.694766975284093, "learning_rate": 2e-06, "loss": 0.3025, "step": 5356 }, { "epoch": 1.2427792599466418, "grad_norm": 19.481656997033788, "learning_rate": 2e-06, "loss": 0.2393, "step": 5357 }, { "epoch": 1.2430112515949425, "grad_norm": 26.897648558409387, "learning_rate": 2e-06, "loss": 0.2656, "step": 5358 }, { "epoch": 1.2432432432432432, "grad_norm": 17.579246825669056, "learning_rate": 2e-06, "loss": 0.2459, "step": 5359 }, { "epoch": 1.2434752348915439, "grad_norm": 15.492054232320067, "learning_rate": 2e-06, "loss": 0.194, "step": 5360 }, { "epoch": 1.2437072265398446, "grad_norm": 10.026590173467095, "learning_rate": 2e-06, "loss": 0.1695, "step": 5361 }, { "epoch": 1.2439392181881452, "grad_norm": 16.692618658838594, "learning_rate": 2e-06, "loss": 0.2551, "step": 5362 }, { "epoch": 1.244171209836446, "grad_norm": 15.049609606356615, "learning_rate": 2e-06, "loss": 0.2791, "step": 5363 }, { "epoch": 1.2444032014847466, "grad_norm": 11.389033854712501, "learning_rate": 2e-06, "loss": 0.3525, "step": 5364 }, { "epoch": 1.2446351931330473, "grad_norm": 6.8271310257402815, "learning_rate": 2e-06, "loss": 0.1401, "step": 5365 }, { "epoch": 1.244867184781348, "grad_norm": 24.358447016584893, "learning_rate": 2e-06, "loss": 0.2946, "step": 5366 }, { "epoch": 1.2450991764296486, "grad_norm": 15.490606812265609, "learning_rate": 2e-06, "loss": 0.3321, "step": 5367 }, { "epoch": 1.245331168077949, "grad_norm": 17.53430950849937, "learning_rate": 2e-06, "loss": 0.2033, "step": 5368 }, { "epoch": 1.2455631597262498, "grad_norm": 6.766701685141638, "learning_rate": 2e-06, "loss": 0.1528, "step": 5369 }, { "epoch": 1.2457951513745504, "grad_norm": 21.917208890331153, "learning_rate": 2e-06, "loss": 0.3338, "step": 5370 }, { "epoch": 1.2460271430228511, "grad_norm": 9.526048106428112, "learning_rate": 2e-06, "loss": 0.1639, "step": 5371 }, { "epoch": 1.2462591346711518, "grad_norm": 11.603815223248493, "learning_rate": 2e-06, "loss": 0.2868, "step": 5372 }, { "epoch": 1.2464911263194525, "grad_norm": 13.988597939735435, "learning_rate": 2e-06, "loss": 0.259, "step": 5373 }, { "epoch": 1.2467231179677531, "grad_norm": 8.844100357062398, "learning_rate": 2e-06, "loss": 0.1971, "step": 5374 }, { "epoch": 1.2469551096160538, "grad_norm": 15.231375274391446, "learning_rate": 2e-06, "loss": 0.2439, "step": 5375 }, { "epoch": 1.2471871012643545, "grad_norm": 13.556775542243084, "learning_rate": 2e-06, "loss": 0.2513, "step": 5376 }, { "epoch": 1.2474190929126552, "grad_norm": 11.837907914182448, "learning_rate": 2e-06, "loss": 0.1514, "step": 5377 }, { "epoch": 1.2476510845609559, "grad_norm": 11.071337144240893, "learning_rate": 2e-06, "loss": 0.1871, "step": 5378 }, { "epoch": 1.2478830762092565, "grad_norm": 9.181267980204264, "learning_rate": 2e-06, "loss": 0.1558, "step": 5379 }, { "epoch": 1.2481150678575572, "grad_norm": 27.662846838543977, "learning_rate": 2e-06, "loss": 0.4109, "step": 5380 }, { "epoch": 1.248347059505858, "grad_norm": 7.471747957628051, "learning_rate": 2e-06, "loss": 0.1225, "step": 5381 }, { "epoch": 1.2485790511541586, "grad_norm": 11.818739316598782, "learning_rate": 2e-06, "loss": 0.2175, "step": 5382 }, { "epoch": 1.248811042802459, "grad_norm": 13.116379005884413, "learning_rate": 2e-06, "loss": 0.2719, "step": 5383 }, { "epoch": 1.2490430344507597, "grad_norm": 15.287798792169049, "learning_rate": 2e-06, "loss": 0.1912, "step": 5384 }, { "epoch": 1.2492750260990604, "grad_norm": 21.297823361697418, "learning_rate": 2e-06, "loss": 0.3731, "step": 5385 }, { "epoch": 1.249507017747361, "grad_norm": 14.772441145934714, "learning_rate": 2e-06, "loss": 0.2237, "step": 5386 }, { "epoch": 1.2497390093956617, "grad_norm": 22.743960581731905, "learning_rate": 2e-06, "loss": 0.2809, "step": 5387 }, { "epoch": 1.2499710010439624, "grad_norm": 8.798316896561857, "learning_rate": 2e-06, "loss": 0.1979, "step": 5388 }, { "epoch": 1.250202992692263, "grad_norm": 17.322801821705692, "learning_rate": 2e-06, "loss": 0.2744, "step": 5389 }, { "epoch": 1.2504349843405638, "grad_norm": 22.401139532469738, "learning_rate": 2e-06, "loss": 0.2902, "step": 5390 }, { "epoch": 1.2506669759888644, "grad_norm": 12.33349075041886, "learning_rate": 2e-06, "loss": 0.2465, "step": 5391 }, { "epoch": 1.2508989676371651, "grad_norm": 12.554184765560295, "learning_rate": 2e-06, "loss": 0.2667, "step": 5392 }, { "epoch": 1.2511309592854658, "grad_norm": 19.246451335744148, "learning_rate": 2e-06, "loss": 0.268, "step": 5393 }, { "epoch": 1.2513629509337663, "grad_norm": 15.757899753835336, "learning_rate": 2e-06, "loss": 0.3866, "step": 5394 }, { "epoch": 1.251594942582067, "grad_norm": 11.682845962868761, "learning_rate": 2e-06, "loss": 0.1962, "step": 5395 }, { "epoch": 1.2518269342303676, "grad_norm": 8.234530902056399, "learning_rate": 2e-06, "loss": 0.1765, "step": 5396 }, { "epoch": 1.2520589258786683, "grad_norm": 9.16550352575417, "learning_rate": 2e-06, "loss": 0.2197, "step": 5397 }, { "epoch": 1.252290917526969, "grad_norm": 13.833352896694109, "learning_rate": 2e-06, "loss": 0.2307, "step": 5398 }, { "epoch": 1.2525229091752696, "grad_norm": 13.330435337641362, "learning_rate": 2e-06, "loss": 0.3172, "step": 5399 }, { "epoch": 1.2527549008235703, "grad_norm": 13.575020941419025, "learning_rate": 2e-06, "loss": 0.3236, "step": 5400 }, { "epoch": 1.252986892471871, "grad_norm": 15.24233503470825, "learning_rate": 2e-06, "loss": 0.1454, "step": 5401 }, { "epoch": 1.2532188841201717, "grad_norm": 7.776532329048835, "learning_rate": 2e-06, "loss": 0.1922, "step": 5402 }, { "epoch": 1.2534508757684724, "grad_norm": 10.196711226595042, "learning_rate": 2e-06, "loss": 0.2424, "step": 5403 }, { "epoch": 1.253682867416773, "grad_norm": 14.61397139989457, "learning_rate": 2e-06, "loss": 0.205, "step": 5404 }, { "epoch": 1.2539148590650737, "grad_norm": 10.510717889247282, "learning_rate": 2e-06, "loss": 0.2287, "step": 5405 }, { "epoch": 1.2541468507133744, "grad_norm": 6.766161353966618, "learning_rate": 2e-06, "loss": 0.2261, "step": 5406 }, { "epoch": 1.254378842361675, "grad_norm": 11.411526589754951, "learning_rate": 2e-06, "loss": 0.2431, "step": 5407 }, { "epoch": 1.2546108340099758, "grad_norm": 12.28838509585074, "learning_rate": 2e-06, "loss": 0.1855, "step": 5408 }, { "epoch": 1.2548428256582764, "grad_norm": 7.946790336662799, "learning_rate": 2e-06, "loss": 0.1554, "step": 5409 }, { "epoch": 1.2550748173065769, "grad_norm": 12.619103129116512, "learning_rate": 2e-06, "loss": 0.1923, "step": 5410 }, { "epoch": 1.2553068089548776, "grad_norm": 27.705684365921115, "learning_rate": 2e-06, "loss": 0.5062, "step": 5411 }, { "epoch": 1.2555388006031782, "grad_norm": 4.933290693333517, "learning_rate": 2e-06, "loss": 0.1248, "step": 5412 }, { "epoch": 1.255770792251479, "grad_norm": 24.902814613229886, "learning_rate": 2e-06, "loss": 0.337, "step": 5413 }, { "epoch": 1.2560027838997796, "grad_norm": 10.21067892420391, "learning_rate": 2e-06, "loss": 0.2146, "step": 5414 }, { "epoch": 1.2562347755480803, "grad_norm": 6.780520081375767, "learning_rate": 2e-06, "loss": 0.2351, "step": 5415 }, { "epoch": 1.256466767196381, "grad_norm": 13.71098978847903, "learning_rate": 2e-06, "loss": 0.2876, "step": 5416 }, { "epoch": 1.2566987588446816, "grad_norm": 10.855468980185774, "learning_rate": 2e-06, "loss": 0.1825, "step": 5417 }, { "epoch": 1.2569307504929823, "grad_norm": 21.315008502857644, "learning_rate": 2e-06, "loss": 0.3071, "step": 5418 }, { "epoch": 1.257162742141283, "grad_norm": 14.134423743002685, "learning_rate": 2e-06, "loss": 0.1695, "step": 5419 }, { "epoch": 1.2573947337895834, "grad_norm": 15.135285920332297, "learning_rate": 2e-06, "loss": 0.1712, "step": 5420 }, { "epoch": 1.2576267254378841, "grad_norm": 13.025679352989053, "learning_rate": 2e-06, "loss": 0.1629, "step": 5421 }, { "epoch": 1.2578587170861848, "grad_norm": 15.418028878233873, "learning_rate": 2e-06, "loss": 0.1973, "step": 5422 }, { "epoch": 1.2580907087344855, "grad_norm": 14.90544663133779, "learning_rate": 2e-06, "loss": 0.2366, "step": 5423 }, { "epoch": 1.2583227003827862, "grad_norm": 17.13177952043397, "learning_rate": 2e-06, "loss": 0.2224, "step": 5424 }, { "epoch": 1.2585546920310868, "grad_norm": 15.768336904910315, "learning_rate": 2e-06, "loss": 0.1723, "step": 5425 }, { "epoch": 1.2587866836793875, "grad_norm": 16.784910745172407, "learning_rate": 2e-06, "loss": 0.2809, "step": 5426 }, { "epoch": 1.2590186753276882, "grad_norm": 14.877686438432477, "learning_rate": 2e-06, "loss": 0.2759, "step": 5427 }, { "epoch": 1.2592506669759889, "grad_norm": 9.201621848621404, "learning_rate": 2e-06, "loss": 0.1827, "step": 5428 }, { "epoch": 1.2594826586242895, "grad_norm": 14.700888196853745, "learning_rate": 2e-06, "loss": 0.1968, "step": 5429 }, { "epoch": 1.2597146502725902, "grad_norm": 18.81228873623378, "learning_rate": 2e-06, "loss": 0.3326, "step": 5430 }, { "epoch": 1.259946641920891, "grad_norm": 12.9065843099499, "learning_rate": 2e-06, "loss": 0.2946, "step": 5431 }, { "epoch": 1.2601786335691916, "grad_norm": 9.30669535994372, "learning_rate": 2e-06, "loss": 0.2333, "step": 5432 }, { "epoch": 1.2604106252174923, "grad_norm": 9.267711029265648, "learning_rate": 2e-06, "loss": 0.2008, "step": 5433 }, { "epoch": 1.260642616865793, "grad_norm": 26.074534924266477, "learning_rate": 2e-06, "loss": 0.3929, "step": 5434 }, { "epoch": 1.2608746085140936, "grad_norm": 13.773153795926959, "learning_rate": 2e-06, "loss": 0.1561, "step": 5435 }, { "epoch": 1.2611066001623943, "grad_norm": 20.51695528132908, "learning_rate": 2e-06, "loss": 0.1994, "step": 5436 }, { "epoch": 1.2613385918106947, "grad_norm": 13.866096902646714, "learning_rate": 2e-06, "loss": 0.25, "step": 5437 }, { "epoch": 1.2615705834589954, "grad_norm": 15.601652672968376, "learning_rate": 2e-06, "loss": 0.1956, "step": 5438 }, { "epoch": 1.261802575107296, "grad_norm": 9.407943419133732, "learning_rate": 2e-06, "loss": 0.2155, "step": 5439 }, { "epoch": 1.2620345667555968, "grad_norm": 21.304630727887947, "learning_rate": 2e-06, "loss": 0.2215, "step": 5440 }, { "epoch": 1.2622665584038975, "grad_norm": 18.56505166670855, "learning_rate": 2e-06, "loss": 0.3319, "step": 5441 }, { "epoch": 1.2624985500521981, "grad_norm": 11.90138217449211, "learning_rate": 2e-06, "loss": 0.2573, "step": 5442 }, { "epoch": 1.2627305417004988, "grad_norm": 15.059252412285858, "learning_rate": 2e-06, "loss": 0.2218, "step": 5443 }, { "epoch": 1.2629625333487995, "grad_norm": 8.775996304124295, "learning_rate": 2e-06, "loss": 0.1364, "step": 5444 }, { "epoch": 1.2631945249971002, "grad_norm": 12.641643012310846, "learning_rate": 2e-06, "loss": 0.2702, "step": 5445 }, { "epoch": 1.2634265166454008, "grad_norm": 20.388734064223414, "learning_rate": 2e-06, "loss": 0.3179, "step": 5446 }, { "epoch": 1.2636585082937013, "grad_norm": 17.263471978052475, "learning_rate": 2e-06, "loss": 0.3227, "step": 5447 }, { "epoch": 1.263890499942002, "grad_norm": 13.912072866064792, "learning_rate": 2e-06, "loss": 0.2209, "step": 5448 }, { "epoch": 1.2641224915903027, "grad_norm": 17.87351186270327, "learning_rate": 2e-06, "loss": 0.347, "step": 5449 }, { "epoch": 1.2643544832386033, "grad_norm": 20.927462227177482, "learning_rate": 2e-06, "loss": 0.3443, "step": 5450 }, { "epoch": 1.264586474886904, "grad_norm": 14.92230797263229, "learning_rate": 2e-06, "loss": 0.2529, "step": 5451 }, { "epoch": 1.2648184665352047, "grad_norm": 11.658954372633668, "learning_rate": 2e-06, "loss": 0.1356, "step": 5452 }, { "epoch": 1.2650504581835054, "grad_norm": 16.889816040298218, "learning_rate": 2e-06, "loss": 0.2544, "step": 5453 }, { "epoch": 1.265282449831806, "grad_norm": 17.986590759970184, "learning_rate": 2e-06, "loss": 0.2652, "step": 5454 }, { "epoch": 1.2655144414801067, "grad_norm": 15.375217352482847, "learning_rate": 2e-06, "loss": 0.2162, "step": 5455 }, { "epoch": 1.2657464331284074, "grad_norm": 13.563201322950821, "learning_rate": 2e-06, "loss": 0.2364, "step": 5456 }, { "epoch": 1.265978424776708, "grad_norm": 14.413978406555218, "learning_rate": 2e-06, "loss": 0.2424, "step": 5457 }, { "epoch": 1.2662104164250088, "grad_norm": 16.31505711897065, "learning_rate": 2e-06, "loss": 0.2526, "step": 5458 }, { "epoch": 1.2664424080733094, "grad_norm": 8.752232702803143, "learning_rate": 2e-06, "loss": 0.1601, "step": 5459 }, { "epoch": 1.2666743997216101, "grad_norm": 9.794706568555943, "learning_rate": 2e-06, "loss": 0.1836, "step": 5460 }, { "epoch": 1.2669063913699108, "grad_norm": 13.647159948468177, "learning_rate": 2e-06, "loss": 0.2018, "step": 5461 }, { "epoch": 1.2671383830182115, "grad_norm": 14.812271457713734, "learning_rate": 2e-06, "loss": 0.2389, "step": 5462 }, { "epoch": 1.267370374666512, "grad_norm": 8.905656894633575, "learning_rate": 2e-06, "loss": 0.1496, "step": 5463 }, { "epoch": 1.2676023663148126, "grad_norm": 9.256200576462955, "learning_rate": 2e-06, "loss": 0.1749, "step": 5464 }, { "epoch": 1.2678343579631133, "grad_norm": 12.138821016330132, "learning_rate": 2e-06, "loss": 0.3476, "step": 5465 }, { "epoch": 1.268066349611414, "grad_norm": 14.589430451668205, "learning_rate": 2e-06, "loss": 0.1956, "step": 5466 }, { "epoch": 1.2682983412597146, "grad_norm": 18.385243783916064, "learning_rate": 2e-06, "loss": 0.2236, "step": 5467 }, { "epoch": 1.2685303329080153, "grad_norm": 15.912545380757068, "learning_rate": 2e-06, "loss": 0.2797, "step": 5468 }, { "epoch": 1.268762324556316, "grad_norm": 24.290486722780003, "learning_rate": 2e-06, "loss": 0.336, "step": 5469 }, { "epoch": 1.2689943162046167, "grad_norm": 17.68733338274814, "learning_rate": 2e-06, "loss": 0.3079, "step": 5470 }, { "epoch": 1.2692263078529173, "grad_norm": 29.58373573181717, "learning_rate": 2e-06, "loss": 0.2578, "step": 5471 }, { "epoch": 1.269458299501218, "grad_norm": 21.49698383474083, "learning_rate": 2e-06, "loss": 0.2823, "step": 5472 }, { "epoch": 1.2696902911495185, "grad_norm": 20.99655313594657, "learning_rate": 2e-06, "loss": 0.3464, "step": 5473 }, { "epoch": 1.2699222827978192, "grad_norm": 12.60122524250526, "learning_rate": 2e-06, "loss": 0.404, "step": 5474 }, { "epoch": 1.2701542744461198, "grad_norm": 11.14334254645051, "learning_rate": 2e-06, "loss": 0.259, "step": 5475 }, { "epoch": 1.2703862660944205, "grad_norm": 6.256460629933859, "learning_rate": 2e-06, "loss": 0.1994, "step": 5476 }, { "epoch": 1.2706182577427212, "grad_norm": 15.74119024883826, "learning_rate": 2e-06, "loss": 0.2372, "step": 5477 }, { "epoch": 1.2708502493910219, "grad_norm": 22.71832340845438, "learning_rate": 2e-06, "loss": 0.3303, "step": 5478 }, { "epoch": 1.2710822410393225, "grad_norm": 9.136667866436637, "learning_rate": 2e-06, "loss": 0.214, "step": 5479 }, { "epoch": 1.2713142326876232, "grad_norm": 22.00409773731631, "learning_rate": 2e-06, "loss": 0.2791, "step": 5480 }, { "epoch": 1.271546224335924, "grad_norm": 14.514457099733832, "learning_rate": 2e-06, "loss": 0.1993, "step": 5481 }, { "epoch": 1.2717782159842246, "grad_norm": 19.24414215082918, "learning_rate": 2e-06, "loss": 0.239, "step": 5482 }, { "epoch": 1.2720102076325253, "grad_norm": 10.895891454072098, "learning_rate": 2e-06, "loss": 0.1715, "step": 5483 }, { "epoch": 1.272242199280826, "grad_norm": 9.632069525604589, "learning_rate": 2e-06, "loss": 0.328, "step": 5484 }, { "epoch": 1.2724741909291266, "grad_norm": 9.230036573343929, "learning_rate": 2e-06, "loss": 0.1802, "step": 5485 }, { "epoch": 1.2727061825774273, "grad_norm": 8.919987934719751, "learning_rate": 2e-06, "loss": 0.1919, "step": 5486 }, { "epoch": 1.272938174225728, "grad_norm": 21.874171401796, "learning_rate": 2e-06, "loss": 0.4171, "step": 5487 }, { "epoch": 1.2731701658740286, "grad_norm": 21.073806899512824, "learning_rate": 2e-06, "loss": 0.2203, "step": 5488 }, { "epoch": 1.2734021575223293, "grad_norm": 19.802684449859967, "learning_rate": 2e-06, "loss": 0.2651, "step": 5489 }, { "epoch": 1.2736341491706298, "grad_norm": 8.130642986566825, "learning_rate": 2e-06, "loss": 0.2091, "step": 5490 }, { "epoch": 1.2738661408189305, "grad_norm": 16.65568102886136, "learning_rate": 2e-06, "loss": 0.195, "step": 5491 }, { "epoch": 1.2740981324672311, "grad_norm": 46.93369863196454, "learning_rate": 2e-06, "loss": 0.2481, "step": 5492 }, { "epoch": 1.2743301241155318, "grad_norm": 19.882996928850734, "learning_rate": 2e-06, "loss": 0.3797, "step": 5493 }, { "epoch": 1.2745621157638325, "grad_norm": 6.786034128081566, "learning_rate": 2e-06, "loss": 0.1929, "step": 5494 }, { "epoch": 1.2747941074121332, "grad_norm": 10.954183622735066, "learning_rate": 2e-06, "loss": 0.1846, "step": 5495 }, { "epoch": 1.2750260990604338, "grad_norm": 17.289913825248284, "learning_rate": 2e-06, "loss": 0.2575, "step": 5496 }, { "epoch": 1.2752580907087345, "grad_norm": 9.900332056828477, "learning_rate": 2e-06, "loss": 0.1919, "step": 5497 }, { "epoch": 1.2754900823570352, "grad_norm": 11.216172941552085, "learning_rate": 2e-06, "loss": 0.2311, "step": 5498 }, { "epoch": 1.2757220740053359, "grad_norm": 18.067500250176654, "learning_rate": 2e-06, "loss": 0.2861, "step": 5499 }, { "epoch": 1.2759540656536363, "grad_norm": 9.544040849535888, "learning_rate": 2e-06, "loss": 0.2313, "step": 5500 }, { "epoch": 1.276186057301937, "grad_norm": 19.109610262028383, "learning_rate": 2e-06, "loss": 0.2947, "step": 5501 }, { "epoch": 1.2764180489502377, "grad_norm": 8.074254510862858, "learning_rate": 2e-06, "loss": 0.1712, "step": 5502 }, { "epoch": 1.2766500405985384, "grad_norm": 16.010980414431298, "learning_rate": 2e-06, "loss": 0.3747, "step": 5503 }, { "epoch": 1.276882032246839, "grad_norm": 13.51093579441853, "learning_rate": 2e-06, "loss": 0.2592, "step": 5504 }, { "epoch": 1.2771140238951397, "grad_norm": 14.669785165177656, "learning_rate": 2e-06, "loss": 0.2628, "step": 5505 }, { "epoch": 1.2773460155434404, "grad_norm": 7.660559763630664, "learning_rate": 2e-06, "loss": 0.1481, "step": 5506 }, { "epoch": 1.277578007191741, "grad_norm": 20.24682997014625, "learning_rate": 2e-06, "loss": 0.3538, "step": 5507 }, { "epoch": 1.2778099988400418, "grad_norm": 13.362958359090564, "learning_rate": 2e-06, "loss": 0.2271, "step": 5508 }, { "epoch": 1.2780419904883424, "grad_norm": 8.164473390361263, "learning_rate": 2e-06, "loss": 0.1568, "step": 5509 }, { "epoch": 1.2782739821366431, "grad_norm": 6.75118446837882, "learning_rate": 2e-06, "loss": 0.1275, "step": 5510 }, { "epoch": 1.2785059737849438, "grad_norm": 8.801135729909866, "learning_rate": 2e-06, "loss": 0.1721, "step": 5511 }, { "epoch": 1.2787379654332445, "grad_norm": 19.006232049977932, "learning_rate": 2e-06, "loss": 0.3433, "step": 5512 }, { "epoch": 1.2789699570815452, "grad_norm": 8.830534900711758, "learning_rate": 2e-06, "loss": 0.1636, "step": 5513 }, { "epoch": 1.2792019487298458, "grad_norm": 3.593403289783236, "learning_rate": 2e-06, "loss": 0.1181, "step": 5514 }, { "epoch": 1.2794339403781465, "grad_norm": 20.464133931736466, "learning_rate": 2e-06, "loss": 0.3697, "step": 5515 }, { "epoch": 1.2796659320264472, "grad_norm": 22.020806574162382, "learning_rate": 2e-06, "loss": 0.2337, "step": 5516 }, { "epoch": 1.2798979236747476, "grad_norm": 11.288169129492667, "learning_rate": 2e-06, "loss": 0.2503, "step": 5517 }, { "epoch": 1.2801299153230483, "grad_norm": 12.874188117641705, "learning_rate": 2e-06, "loss": 0.3134, "step": 5518 }, { "epoch": 1.280361906971349, "grad_norm": 12.527139165639522, "learning_rate": 2e-06, "loss": 0.3227, "step": 5519 }, { "epoch": 1.2805938986196497, "grad_norm": 14.050860916922632, "learning_rate": 2e-06, "loss": 0.4138, "step": 5520 }, { "epoch": 1.2808258902679504, "grad_norm": 17.575926637021528, "learning_rate": 2e-06, "loss": 0.4182, "step": 5521 }, { "epoch": 1.281057881916251, "grad_norm": 38.30070940939618, "learning_rate": 2e-06, "loss": 0.2344, "step": 5522 }, { "epoch": 1.2812898735645517, "grad_norm": 15.721424393613805, "learning_rate": 2e-06, "loss": 0.2039, "step": 5523 }, { "epoch": 1.2815218652128524, "grad_norm": 14.438274765990126, "learning_rate": 2e-06, "loss": 0.2811, "step": 5524 }, { "epoch": 1.281753856861153, "grad_norm": 11.964371077643447, "learning_rate": 2e-06, "loss": 0.1871, "step": 5525 }, { "epoch": 1.2819858485094537, "grad_norm": 11.192127859492148, "learning_rate": 2e-06, "loss": 0.2232, "step": 5526 }, { "epoch": 1.2822178401577542, "grad_norm": 16.55720382812037, "learning_rate": 2e-06, "loss": 0.3419, "step": 5527 }, { "epoch": 1.2824498318060549, "grad_norm": 11.859714499129243, "learning_rate": 2e-06, "loss": 0.3109, "step": 5528 }, { "epoch": 1.2826818234543556, "grad_norm": 12.773674037445563, "learning_rate": 2e-06, "loss": 0.2546, "step": 5529 }, { "epoch": 1.2829138151026562, "grad_norm": 20.39897546810617, "learning_rate": 2e-06, "loss": 0.3194, "step": 5530 }, { "epoch": 1.283145806750957, "grad_norm": 16.604649378816013, "learning_rate": 2e-06, "loss": 0.2979, "step": 5531 }, { "epoch": 1.2833777983992576, "grad_norm": 10.145216773558865, "learning_rate": 2e-06, "loss": 0.1958, "step": 5532 }, { "epoch": 1.2836097900475583, "grad_norm": 12.156091156673625, "learning_rate": 2e-06, "loss": 0.1559, "step": 5533 }, { "epoch": 1.283841781695859, "grad_norm": 14.025709421254515, "learning_rate": 2e-06, "loss": 0.1721, "step": 5534 }, { "epoch": 1.2840737733441596, "grad_norm": 11.107260139493441, "learning_rate": 2e-06, "loss": 0.266, "step": 5535 }, { "epoch": 1.2843057649924603, "grad_norm": 11.576453611507793, "learning_rate": 2e-06, "loss": 0.2343, "step": 5536 }, { "epoch": 1.284537756640761, "grad_norm": 8.007601321516523, "learning_rate": 2e-06, "loss": 0.1732, "step": 5537 }, { "epoch": 1.2847697482890617, "grad_norm": 11.928698418916918, "learning_rate": 2e-06, "loss": 0.1612, "step": 5538 }, { "epoch": 1.2850017399373623, "grad_norm": 17.465166018146665, "learning_rate": 2e-06, "loss": 0.3162, "step": 5539 }, { "epoch": 1.285233731585663, "grad_norm": 5.783970569015845, "learning_rate": 2e-06, "loss": 0.1845, "step": 5540 }, { "epoch": 1.2854657232339637, "grad_norm": 9.157428800986871, "learning_rate": 2e-06, "loss": 0.2112, "step": 5541 }, { "epoch": 1.2856977148822644, "grad_norm": 4.1076278120894, "learning_rate": 2e-06, "loss": 0.1123, "step": 5542 }, { "epoch": 1.2859297065305648, "grad_norm": 10.11312397969375, "learning_rate": 2e-06, "loss": 0.1765, "step": 5543 }, { "epoch": 1.2861616981788655, "grad_norm": 11.274897323273564, "learning_rate": 2e-06, "loss": 0.1913, "step": 5544 }, { "epoch": 1.2863936898271662, "grad_norm": 19.606507289753136, "learning_rate": 2e-06, "loss": 0.3079, "step": 5545 }, { "epoch": 1.2866256814754669, "grad_norm": 9.684645461882754, "learning_rate": 2e-06, "loss": 0.1835, "step": 5546 }, { "epoch": 1.2868576731237675, "grad_norm": 9.737675214521724, "learning_rate": 2e-06, "loss": 0.2299, "step": 5547 }, { "epoch": 1.2870896647720682, "grad_norm": 10.599090403823574, "learning_rate": 2e-06, "loss": 0.2373, "step": 5548 }, { "epoch": 1.2873216564203689, "grad_norm": 16.630432627012777, "learning_rate": 2e-06, "loss": 0.3984, "step": 5549 }, { "epoch": 1.2875536480686696, "grad_norm": 17.729293437700733, "learning_rate": 2e-06, "loss": 0.2671, "step": 5550 }, { "epoch": 1.2877856397169702, "grad_norm": 12.05835176586732, "learning_rate": 2e-06, "loss": 0.2906, "step": 5551 }, { "epoch": 1.288017631365271, "grad_norm": 14.440985815174287, "learning_rate": 2e-06, "loss": 0.2058, "step": 5552 }, { "epoch": 1.2882496230135714, "grad_norm": 16.84003641847929, "learning_rate": 2e-06, "loss": 0.32, "step": 5553 }, { "epoch": 1.288481614661872, "grad_norm": 20.495041375308226, "learning_rate": 2e-06, "loss": 0.1994, "step": 5554 }, { "epoch": 1.2887136063101727, "grad_norm": 21.208082266804475, "learning_rate": 2e-06, "loss": 0.4303, "step": 5555 }, { "epoch": 1.2889455979584734, "grad_norm": 10.462591620428805, "learning_rate": 2e-06, "loss": 0.1862, "step": 5556 }, { "epoch": 1.289177589606774, "grad_norm": 33.57346108492076, "learning_rate": 2e-06, "loss": 0.4521, "step": 5557 }, { "epoch": 1.2894095812550748, "grad_norm": 9.690303304788598, "learning_rate": 2e-06, "loss": 0.1659, "step": 5558 }, { "epoch": 1.2896415729033754, "grad_norm": 26.12477597482978, "learning_rate": 2e-06, "loss": 0.3697, "step": 5559 }, { "epoch": 1.2898735645516761, "grad_norm": 10.753843000487956, "learning_rate": 2e-06, "loss": 0.1845, "step": 5560 }, { "epoch": 1.2901055561999768, "grad_norm": 11.94320212794658, "learning_rate": 2e-06, "loss": 0.3165, "step": 5561 }, { "epoch": 1.2903375478482775, "grad_norm": 8.074090848780601, "learning_rate": 2e-06, "loss": 0.1782, "step": 5562 }, { "epoch": 1.2905695394965782, "grad_norm": 9.986874563274212, "learning_rate": 2e-06, "loss": 0.2196, "step": 5563 }, { "epoch": 1.2908015311448788, "grad_norm": 13.88506300781901, "learning_rate": 2e-06, "loss": 0.2352, "step": 5564 }, { "epoch": 1.2910335227931795, "grad_norm": 12.780056004774648, "learning_rate": 2e-06, "loss": 0.206, "step": 5565 }, { "epoch": 1.2912655144414802, "grad_norm": 17.948578222912435, "learning_rate": 2e-06, "loss": 0.3228, "step": 5566 }, { "epoch": 1.2914975060897809, "grad_norm": 8.911005690791237, "learning_rate": 2e-06, "loss": 0.2329, "step": 5567 }, { "epoch": 1.2917294977380815, "grad_norm": 9.354454545488181, "learning_rate": 2e-06, "loss": 0.2418, "step": 5568 }, { "epoch": 1.2919614893863822, "grad_norm": 21.323098522700068, "learning_rate": 2e-06, "loss": 0.2735, "step": 5569 }, { "epoch": 1.2921934810346827, "grad_norm": 15.446059925915666, "learning_rate": 2e-06, "loss": 0.2856, "step": 5570 }, { "epoch": 1.2924254726829834, "grad_norm": 8.774450735495995, "learning_rate": 2e-06, "loss": 0.2065, "step": 5571 }, { "epoch": 1.292657464331284, "grad_norm": 8.875266415022931, "learning_rate": 2e-06, "loss": 0.1516, "step": 5572 }, { "epoch": 1.2928894559795847, "grad_norm": 25.699395396889404, "learning_rate": 2e-06, "loss": 0.1989, "step": 5573 }, { "epoch": 1.2931214476278854, "grad_norm": 13.780637111179585, "learning_rate": 2e-06, "loss": 0.1974, "step": 5574 }, { "epoch": 1.293353439276186, "grad_norm": 10.596894116088386, "learning_rate": 2e-06, "loss": 0.1873, "step": 5575 }, { "epoch": 1.2935854309244867, "grad_norm": 15.882696742741262, "learning_rate": 2e-06, "loss": 0.275, "step": 5576 }, { "epoch": 1.2938174225727874, "grad_norm": 16.749738561496294, "learning_rate": 2e-06, "loss": 0.2766, "step": 5577 }, { "epoch": 1.294049414221088, "grad_norm": 22.970839761942415, "learning_rate": 2e-06, "loss": 0.303, "step": 5578 }, { "epoch": 1.2942814058693888, "grad_norm": 10.956263232344353, "learning_rate": 2e-06, "loss": 0.2567, "step": 5579 }, { "epoch": 1.2945133975176892, "grad_norm": 12.000426780774758, "learning_rate": 2e-06, "loss": 0.2221, "step": 5580 }, { "epoch": 1.29474538916599, "grad_norm": 11.959888501070827, "learning_rate": 2e-06, "loss": 0.2105, "step": 5581 }, { "epoch": 1.2949773808142906, "grad_norm": 13.004029224978572, "learning_rate": 2e-06, "loss": 0.2816, "step": 5582 }, { "epoch": 1.2952093724625913, "grad_norm": 16.552747214762597, "learning_rate": 2e-06, "loss": 0.2822, "step": 5583 }, { "epoch": 1.295441364110892, "grad_norm": 7.828758165100487, "learning_rate": 2e-06, "loss": 0.1445, "step": 5584 }, { "epoch": 1.2956733557591926, "grad_norm": 9.69840028310193, "learning_rate": 2e-06, "loss": 0.192, "step": 5585 }, { "epoch": 1.2959053474074933, "grad_norm": 8.594676831269815, "learning_rate": 2e-06, "loss": 0.2646, "step": 5586 }, { "epoch": 1.296137339055794, "grad_norm": 19.72568385506288, "learning_rate": 2e-06, "loss": 0.4113, "step": 5587 }, { "epoch": 1.2963693307040947, "grad_norm": 11.60141821509818, "learning_rate": 2e-06, "loss": 0.2368, "step": 5588 }, { "epoch": 1.2966013223523953, "grad_norm": 11.360073503368694, "learning_rate": 2e-06, "loss": 0.1508, "step": 5589 }, { "epoch": 1.296833314000696, "grad_norm": 11.466245032578335, "learning_rate": 2e-06, "loss": 0.1911, "step": 5590 }, { "epoch": 1.2970653056489967, "grad_norm": 11.352394051290077, "learning_rate": 2e-06, "loss": 0.1763, "step": 5591 }, { "epoch": 1.2972972972972974, "grad_norm": 16.11533083404958, "learning_rate": 2e-06, "loss": 0.2902, "step": 5592 }, { "epoch": 1.297529288945598, "grad_norm": 12.36723660219495, "learning_rate": 2e-06, "loss": 0.2096, "step": 5593 }, { "epoch": 1.2977612805938987, "grad_norm": 30.89625173241812, "learning_rate": 2e-06, "loss": 0.2666, "step": 5594 }, { "epoch": 1.2979932722421994, "grad_norm": 19.514344172121138, "learning_rate": 2e-06, "loss": 0.2418, "step": 5595 }, { "epoch": 1.2982252638905, "grad_norm": 10.788689229972393, "learning_rate": 2e-06, "loss": 0.1786, "step": 5596 }, { "epoch": 1.2984572555388005, "grad_norm": 15.45400603168222, "learning_rate": 2e-06, "loss": 0.2733, "step": 5597 }, { "epoch": 1.2986892471871012, "grad_norm": 13.722921586738387, "learning_rate": 2e-06, "loss": 0.2962, "step": 5598 }, { "epoch": 1.298921238835402, "grad_norm": 19.39647911275564, "learning_rate": 2e-06, "loss": 0.3514, "step": 5599 }, { "epoch": 1.2991532304837026, "grad_norm": 15.274235619440297, "learning_rate": 2e-06, "loss": 0.2525, "step": 5600 }, { "epoch": 1.2993852221320032, "grad_norm": 16.11752315764069, "learning_rate": 2e-06, "loss": 0.2394, "step": 5601 }, { "epoch": 1.299617213780304, "grad_norm": 24.618352423068448, "learning_rate": 2e-06, "loss": 0.3625, "step": 5602 }, { "epoch": 1.2998492054286046, "grad_norm": 24.593954311058177, "learning_rate": 2e-06, "loss": 0.2486, "step": 5603 }, { "epoch": 1.3000811970769053, "grad_norm": 23.962619857204736, "learning_rate": 2e-06, "loss": 0.2788, "step": 5604 }, { "epoch": 1.300313188725206, "grad_norm": 13.745233766397064, "learning_rate": 2e-06, "loss": 0.292, "step": 5605 }, { "epoch": 1.3005451803735064, "grad_norm": 184.99953181781865, "learning_rate": 2e-06, "loss": 0.2061, "step": 5606 }, { "epoch": 1.300777172021807, "grad_norm": 11.009695789207703, "learning_rate": 2e-06, "loss": 0.2157, "step": 5607 }, { "epoch": 1.3010091636701078, "grad_norm": 15.03713330803401, "learning_rate": 2e-06, "loss": 0.2397, "step": 5608 }, { "epoch": 1.3012411553184084, "grad_norm": 17.794040596623752, "learning_rate": 2e-06, "loss": 0.2294, "step": 5609 }, { "epoch": 1.3014731469667091, "grad_norm": 12.91998866491081, "learning_rate": 2e-06, "loss": 0.2401, "step": 5610 }, { "epoch": 1.3017051386150098, "grad_norm": 15.712347452714706, "learning_rate": 2e-06, "loss": 0.1369, "step": 5611 }, { "epoch": 1.3019371302633105, "grad_norm": 18.194023250326826, "learning_rate": 2e-06, "loss": 0.2016, "step": 5612 }, { "epoch": 1.3021691219116112, "grad_norm": 10.73759855968218, "learning_rate": 2e-06, "loss": 0.1686, "step": 5613 }, { "epoch": 1.3024011135599118, "grad_norm": 12.442952329564145, "learning_rate": 2e-06, "loss": 0.2391, "step": 5614 }, { "epoch": 1.3026331052082125, "grad_norm": 23.27162006463831, "learning_rate": 2e-06, "loss": 0.2731, "step": 5615 }, { "epoch": 1.3028650968565132, "grad_norm": 15.22373633972883, "learning_rate": 2e-06, "loss": 0.3134, "step": 5616 }, { "epoch": 1.3030970885048139, "grad_norm": 8.903182393289645, "learning_rate": 2e-06, "loss": 0.1805, "step": 5617 }, { "epoch": 1.3033290801531145, "grad_norm": 25.937526008653386, "learning_rate": 2e-06, "loss": 0.2313, "step": 5618 }, { "epoch": 1.3035610718014152, "grad_norm": 17.577990312401784, "learning_rate": 2e-06, "loss": 0.3776, "step": 5619 }, { "epoch": 1.303793063449716, "grad_norm": 22.427564565601266, "learning_rate": 2e-06, "loss": 0.256, "step": 5620 }, { "epoch": 1.3040250550980166, "grad_norm": 15.660251500511174, "learning_rate": 2e-06, "loss": 0.2918, "step": 5621 }, { "epoch": 1.3042570467463173, "grad_norm": 28.10834557005505, "learning_rate": 2e-06, "loss": 0.3871, "step": 5622 }, { "epoch": 1.3044890383946177, "grad_norm": 8.673493743204125, "learning_rate": 2e-06, "loss": 0.2583, "step": 5623 }, { "epoch": 1.3047210300429184, "grad_norm": 21.417289777883838, "learning_rate": 2e-06, "loss": 0.3396, "step": 5624 }, { "epoch": 1.304953021691219, "grad_norm": 16.89284063484694, "learning_rate": 2e-06, "loss": 0.3283, "step": 5625 }, { "epoch": 1.3051850133395198, "grad_norm": 29.120207682173884, "learning_rate": 2e-06, "loss": 0.3296, "step": 5626 }, { "epoch": 1.3054170049878204, "grad_norm": 8.919708053307183, "learning_rate": 2e-06, "loss": 0.1461, "step": 5627 }, { "epoch": 1.305648996636121, "grad_norm": 19.78253962837575, "learning_rate": 2e-06, "loss": 0.2993, "step": 5628 }, { "epoch": 1.3058809882844218, "grad_norm": 17.402054742095746, "learning_rate": 2e-06, "loss": 0.3022, "step": 5629 }, { "epoch": 1.3061129799327225, "grad_norm": 11.4823499978865, "learning_rate": 2e-06, "loss": 0.2225, "step": 5630 }, { "epoch": 1.3063449715810231, "grad_norm": 7.3361505909982005, "learning_rate": 2e-06, "loss": 0.1902, "step": 5631 }, { "epoch": 1.3065769632293238, "grad_norm": 27.559188119699623, "learning_rate": 2e-06, "loss": 0.2572, "step": 5632 }, { "epoch": 1.3068089548776243, "grad_norm": 15.273929151441568, "learning_rate": 2e-06, "loss": 0.2725, "step": 5633 }, { "epoch": 1.307040946525925, "grad_norm": 22.424000462796233, "learning_rate": 2e-06, "loss": 0.3106, "step": 5634 }, { "epoch": 1.3072729381742256, "grad_norm": 14.713171143247271, "learning_rate": 2e-06, "loss": 0.1986, "step": 5635 }, { "epoch": 1.3075049298225263, "grad_norm": 19.197749791692836, "learning_rate": 2e-06, "loss": 0.2414, "step": 5636 }, { "epoch": 1.307736921470827, "grad_norm": 9.70171890144793, "learning_rate": 2e-06, "loss": 0.2229, "step": 5637 }, { "epoch": 1.3079689131191277, "grad_norm": 11.136763234840572, "learning_rate": 2e-06, "loss": 0.1674, "step": 5638 }, { "epoch": 1.3082009047674283, "grad_norm": 26.03212305103045, "learning_rate": 2e-06, "loss": 0.3436, "step": 5639 }, { "epoch": 1.308432896415729, "grad_norm": 21.30713547042178, "learning_rate": 2e-06, "loss": 0.2795, "step": 5640 }, { "epoch": 1.3086648880640297, "grad_norm": 10.993965951343093, "learning_rate": 2e-06, "loss": 0.1959, "step": 5641 }, { "epoch": 1.3088968797123304, "grad_norm": 17.769077676138195, "learning_rate": 2e-06, "loss": 0.2873, "step": 5642 }, { "epoch": 1.309128871360631, "grad_norm": 18.506312434168304, "learning_rate": 2e-06, "loss": 0.255, "step": 5643 }, { "epoch": 1.3093608630089317, "grad_norm": 6.793232387244837, "learning_rate": 2e-06, "loss": 0.174, "step": 5644 }, { "epoch": 1.3095928546572324, "grad_norm": 32.49566834679004, "learning_rate": 2e-06, "loss": 0.2574, "step": 5645 }, { "epoch": 1.309824846305533, "grad_norm": 17.02251550801001, "learning_rate": 2e-06, "loss": 0.3174, "step": 5646 }, { "epoch": 1.3100568379538338, "grad_norm": 12.841461665684056, "learning_rate": 2e-06, "loss": 0.3524, "step": 5647 }, { "epoch": 1.3102888296021344, "grad_norm": 19.076546400110157, "learning_rate": 2e-06, "loss": 0.2798, "step": 5648 }, { "epoch": 1.3105208212504351, "grad_norm": 13.18433741810789, "learning_rate": 2e-06, "loss": 0.3233, "step": 5649 }, { "epoch": 1.3107528128987356, "grad_norm": 11.135069470099385, "learning_rate": 2e-06, "loss": 0.1642, "step": 5650 }, { "epoch": 1.3109848045470363, "grad_norm": 15.315677523471457, "learning_rate": 2e-06, "loss": 0.2652, "step": 5651 }, { "epoch": 1.311216796195337, "grad_norm": 10.62905135472249, "learning_rate": 2e-06, "loss": 0.2165, "step": 5652 }, { "epoch": 1.3114487878436376, "grad_norm": 11.231634605332838, "learning_rate": 2e-06, "loss": 0.2613, "step": 5653 }, { "epoch": 1.3116807794919383, "grad_norm": 9.078649052887393, "learning_rate": 2e-06, "loss": 0.2143, "step": 5654 }, { "epoch": 1.311912771140239, "grad_norm": 21.39401032753167, "learning_rate": 2e-06, "loss": 0.3495, "step": 5655 }, { "epoch": 1.3121447627885396, "grad_norm": 17.904213558896597, "learning_rate": 2e-06, "loss": 0.2527, "step": 5656 }, { "epoch": 1.3123767544368403, "grad_norm": 15.558548274464785, "learning_rate": 2e-06, "loss": 0.1924, "step": 5657 }, { "epoch": 1.312608746085141, "grad_norm": 12.029762240958295, "learning_rate": 2e-06, "loss": 0.2173, "step": 5658 }, { "epoch": 1.3128407377334417, "grad_norm": 25.30959589435329, "learning_rate": 2e-06, "loss": 0.2702, "step": 5659 }, { "epoch": 1.3130727293817421, "grad_norm": 10.092299250446088, "learning_rate": 2e-06, "loss": 0.1949, "step": 5660 }, { "epoch": 1.3133047210300428, "grad_norm": 13.306741014180796, "learning_rate": 2e-06, "loss": 0.2432, "step": 5661 }, { "epoch": 1.3135367126783435, "grad_norm": 17.398002407527894, "learning_rate": 2e-06, "loss": 0.2507, "step": 5662 }, { "epoch": 1.3137687043266442, "grad_norm": 13.958106857172252, "learning_rate": 2e-06, "loss": 0.2637, "step": 5663 }, { "epoch": 1.3140006959749448, "grad_norm": 11.373230060748023, "learning_rate": 2e-06, "loss": 0.2399, "step": 5664 }, { "epoch": 1.3142326876232455, "grad_norm": 15.86847179090274, "learning_rate": 2e-06, "loss": 0.2329, "step": 5665 }, { "epoch": 1.3144646792715462, "grad_norm": 15.508532456260358, "learning_rate": 2e-06, "loss": 0.3161, "step": 5666 }, { "epoch": 1.3146966709198469, "grad_norm": 15.532683378784451, "learning_rate": 2e-06, "loss": 0.3558, "step": 5667 }, { "epoch": 1.3149286625681476, "grad_norm": 19.714299042898194, "learning_rate": 2e-06, "loss": 0.3513, "step": 5668 }, { "epoch": 1.3151606542164482, "grad_norm": 13.958754944833625, "learning_rate": 2e-06, "loss": 0.2168, "step": 5669 }, { "epoch": 1.315392645864749, "grad_norm": 30.644144100112875, "learning_rate": 2e-06, "loss": 0.3604, "step": 5670 }, { "epoch": 1.3156246375130496, "grad_norm": 16.352621053156582, "learning_rate": 2e-06, "loss": 0.2741, "step": 5671 }, { "epoch": 1.3158566291613503, "grad_norm": 19.73007160153037, "learning_rate": 2e-06, "loss": 0.261, "step": 5672 }, { "epoch": 1.316088620809651, "grad_norm": 14.995894794886874, "learning_rate": 2e-06, "loss": 0.2534, "step": 5673 }, { "epoch": 1.3163206124579516, "grad_norm": 15.945047875864542, "learning_rate": 2e-06, "loss": 0.3808, "step": 5674 }, { "epoch": 1.3165526041062523, "grad_norm": 14.018774776451082, "learning_rate": 2e-06, "loss": 0.2702, "step": 5675 }, { "epoch": 1.3167845957545528, "grad_norm": 12.70859105222736, "learning_rate": 2e-06, "loss": 0.24, "step": 5676 }, { "epoch": 1.3170165874028534, "grad_norm": 9.678998173041654, "learning_rate": 2e-06, "loss": 0.1932, "step": 5677 }, { "epoch": 1.3172485790511541, "grad_norm": 15.736690719275137, "learning_rate": 2e-06, "loss": 0.2398, "step": 5678 }, { "epoch": 1.3174805706994548, "grad_norm": 13.60977667580227, "learning_rate": 2e-06, "loss": 0.2472, "step": 5679 }, { "epoch": 1.3177125623477555, "grad_norm": 11.102773871126534, "learning_rate": 2e-06, "loss": 0.2326, "step": 5680 }, { "epoch": 1.3179445539960561, "grad_norm": 17.291796040037262, "learning_rate": 2e-06, "loss": 0.346, "step": 5681 }, { "epoch": 1.3181765456443568, "grad_norm": 18.166601354990185, "learning_rate": 2e-06, "loss": 0.2617, "step": 5682 }, { "epoch": 1.3184085372926575, "grad_norm": 10.95806449961031, "learning_rate": 2e-06, "loss": 0.237, "step": 5683 }, { "epoch": 1.3186405289409582, "grad_norm": 160.91351095779942, "learning_rate": 2e-06, "loss": 0.2951, "step": 5684 }, { "epoch": 1.3188725205892589, "grad_norm": 10.603170771079542, "learning_rate": 2e-06, "loss": 0.1857, "step": 5685 }, { "epoch": 1.3191045122375593, "grad_norm": 6.870903833321501, "learning_rate": 2e-06, "loss": 0.1642, "step": 5686 }, { "epoch": 1.31933650388586, "grad_norm": 13.412277495806073, "learning_rate": 2e-06, "loss": 0.2572, "step": 5687 }, { "epoch": 1.3195684955341607, "grad_norm": 16.00296720744652, "learning_rate": 2e-06, "loss": 0.2616, "step": 5688 }, { "epoch": 1.3198004871824613, "grad_norm": 14.523893821884103, "learning_rate": 2e-06, "loss": 0.2931, "step": 5689 }, { "epoch": 1.320032478830762, "grad_norm": 8.344921740838892, "learning_rate": 2e-06, "loss": 0.1381, "step": 5690 }, { "epoch": 1.3202644704790627, "grad_norm": 18.55719833842272, "learning_rate": 2e-06, "loss": 0.2493, "step": 5691 }, { "epoch": 1.3204964621273634, "grad_norm": 14.12605618900697, "learning_rate": 2e-06, "loss": 0.1944, "step": 5692 }, { "epoch": 1.320728453775664, "grad_norm": 9.998184058061966, "learning_rate": 2e-06, "loss": 0.1542, "step": 5693 }, { "epoch": 1.3209604454239647, "grad_norm": 15.911168575838987, "learning_rate": 2e-06, "loss": 0.4191, "step": 5694 }, { "epoch": 1.3211924370722654, "grad_norm": 24.85883116892657, "learning_rate": 2e-06, "loss": 0.2957, "step": 5695 }, { "epoch": 1.321424428720566, "grad_norm": 17.623611354513486, "learning_rate": 2e-06, "loss": 0.3902, "step": 5696 }, { "epoch": 1.3216564203688668, "grad_norm": 16.48576591994104, "learning_rate": 2e-06, "loss": 0.3015, "step": 5697 }, { "epoch": 1.3218884120171674, "grad_norm": 15.575788365937843, "learning_rate": 2e-06, "loss": 0.2635, "step": 5698 }, { "epoch": 1.3221204036654681, "grad_norm": 20.85661933589363, "learning_rate": 2e-06, "loss": 0.2316, "step": 5699 }, { "epoch": 1.3223523953137688, "grad_norm": 16.061174354807644, "learning_rate": 2e-06, "loss": 0.1946, "step": 5700 }, { "epoch": 1.3225843869620695, "grad_norm": 17.84776593809947, "learning_rate": 2e-06, "loss": 0.2291, "step": 5701 }, { "epoch": 1.3228163786103702, "grad_norm": 18.804374385719715, "learning_rate": 2e-06, "loss": 0.3436, "step": 5702 }, { "epoch": 1.3230483702586706, "grad_norm": 13.939505997269132, "learning_rate": 2e-06, "loss": 0.2215, "step": 5703 }, { "epoch": 1.3232803619069713, "grad_norm": 22.54661147857366, "learning_rate": 2e-06, "loss": 0.4171, "step": 5704 }, { "epoch": 1.323512353555272, "grad_norm": 14.117182775507976, "learning_rate": 2e-06, "loss": 0.2152, "step": 5705 }, { "epoch": 1.3237443452035726, "grad_norm": 18.175557503429257, "learning_rate": 2e-06, "loss": 0.2274, "step": 5706 }, { "epoch": 1.3239763368518733, "grad_norm": 11.06563729243645, "learning_rate": 2e-06, "loss": 0.1603, "step": 5707 }, { "epoch": 1.324208328500174, "grad_norm": 11.94708026064263, "learning_rate": 2e-06, "loss": 0.2145, "step": 5708 }, { "epoch": 1.3244403201484747, "grad_norm": 19.632307362212533, "learning_rate": 2e-06, "loss": 0.3409, "step": 5709 }, { "epoch": 1.3246723117967754, "grad_norm": 12.854868599897623, "learning_rate": 2e-06, "loss": 0.2245, "step": 5710 }, { "epoch": 1.324904303445076, "grad_norm": 23.40658671310776, "learning_rate": 2e-06, "loss": 0.2682, "step": 5711 }, { "epoch": 1.3251362950933767, "grad_norm": 16.268358378842592, "learning_rate": 2e-06, "loss": 0.3602, "step": 5712 }, { "epoch": 1.3253682867416772, "grad_norm": 11.210694208149162, "learning_rate": 2e-06, "loss": 0.2451, "step": 5713 }, { "epoch": 1.3256002783899778, "grad_norm": 13.093366884959643, "learning_rate": 2e-06, "loss": 0.1851, "step": 5714 }, { "epoch": 1.3258322700382785, "grad_norm": 13.9470039807361, "learning_rate": 2e-06, "loss": 0.2528, "step": 5715 }, { "epoch": 1.3260642616865792, "grad_norm": 13.826574290698455, "learning_rate": 2e-06, "loss": 0.2275, "step": 5716 }, { "epoch": 1.3262962533348799, "grad_norm": 19.0989336579408, "learning_rate": 2e-06, "loss": 0.4041, "step": 5717 }, { "epoch": 1.3265282449831806, "grad_norm": 10.94421028576376, "learning_rate": 2e-06, "loss": 0.1777, "step": 5718 }, { "epoch": 1.3267602366314812, "grad_norm": 11.10383820485182, "learning_rate": 2e-06, "loss": 0.1856, "step": 5719 }, { "epoch": 1.326992228279782, "grad_norm": 11.421442448851714, "learning_rate": 2e-06, "loss": 0.215, "step": 5720 }, { "epoch": 1.3272242199280826, "grad_norm": 16.064869933616933, "learning_rate": 2e-06, "loss": 0.311, "step": 5721 }, { "epoch": 1.3274562115763833, "grad_norm": 18.75785660722465, "learning_rate": 2e-06, "loss": 0.4559, "step": 5722 }, { "epoch": 1.327688203224684, "grad_norm": 11.390146013990016, "learning_rate": 2e-06, "loss": 0.2577, "step": 5723 }, { "epoch": 1.3279201948729846, "grad_norm": 16.85163301480196, "learning_rate": 2e-06, "loss": 0.2651, "step": 5724 }, { "epoch": 1.3281521865212853, "grad_norm": 13.0317462251221, "learning_rate": 2e-06, "loss": 0.2271, "step": 5725 }, { "epoch": 1.328384178169586, "grad_norm": 14.351959885689283, "learning_rate": 2e-06, "loss": 0.226, "step": 5726 }, { "epoch": 1.3286161698178867, "grad_norm": 9.984793704893075, "learning_rate": 2e-06, "loss": 0.1668, "step": 5727 }, { "epoch": 1.3288481614661873, "grad_norm": 17.625833578442002, "learning_rate": 2e-06, "loss": 0.343, "step": 5728 }, { "epoch": 1.329080153114488, "grad_norm": 13.273409325079097, "learning_rate": 2e-06, "loss": 0.2235, "step": 5729 }, { "epoch": 1.3293121447627885, "grad_norm": 8.8052389353841, "learning_rate": 2e-06, "loss": 0.223, "step": 5730 }, { "epoch": 1.3295441364110892, "grad_norm": 15.961048639991073, "learning_rate": 2e-06, "loss": 0.1882, "step": 5731 }, { "epoch": 1.3297761280593898, "grad_norm": 15.980771784027198, "learning_rate": 2e-06, "loss": 0.2133, "step": 5732 }, { "epoch": 1.3300081197076905, "grad_norm": 34.87453938575482, "learning_rate": 2e-06, "loss": 0.3107, "step": 5733 }, { "epoch": 1.3302401113559912, "grad_norm": 7.971302638747583, "learning_rate": 2e-06, "loss": 0.1623, "step": 5734 }, { "epoch": 1.3304721030042919, "grad_norm": 13.732619994934568, "learning_rate": 2e-06, "loss": 0.3478, "step": 5735 }, { "epoch": 1.3307040946525925, "grad_norm": 13.81265807913985, "learning_rate": 2e-06, "loss": 0.2507, "step": 5736 }, { "epoch": 1.3309360863008932, "grad_norm": 17.587535840998314, "learning_rate": 2e-06, "loss": 0.299, "step": 5737 }, { "epoch": 1.331168077949194, "grad_norm": 18.227607371744853, "learning_rate": 2e-06, "loss": 0.3449, "step": 5738 }, { "epoch": 1.3314000695974946, "grad_norm": 12.651673644096768, "learning_rate": 2e-06, "loss": 0.2299, "step": 5739 }, { "epoch": 1.331632061245795, "grad_norm": 13.107582649881053, "learning_rate": 2e-06, "loss": 0.2587, "step": 5740 }, { "epoch": 1.3318640528940957, "grad_norm": 19.926768899159622, "learning_rate": 2e-06, "loss": 0.2526, "step": 5741 }, { "epoch": 1.3320960445423964, "grad_norm": 9.882153373560206, "learning_rate": 2e-06, "loss": 0.2351, "step": 5742 }, { "epoch": 1.332328036190697, "grad_norm": 15.401606571846399, "learning_rate": 2e-06, "loss": 0.2877, "step": 5743 }, { "epoch": 1.3325600278389977, "grad_norm": 8.051494076240969, "learning_rate": 2e-06, "loss": 0.1099, "step": 5744 }, { "epoch": 1.3327920194872984, "grad_norm": 12.485400896606112, "learning_rate": 2e-06, "loss": 0.2127, "step": 5745 }, { "epoch": 1.333024011135599, "grad_norm": 19.37818023128224, "learning_rate": 2e-06, "loss": 0.3585, "step": 5746 }, { "epoch": 1.3332560027838998, "grad_norm": 11.50542375491202, "learning_rate": 2e-06, "loss": 0.1986, "step": 5747 }, { "epoch": 1.3334879944322005, "grad_norm": 9.535853097525685, "learning_rate": 2e-06, "loss": 0.1785, "step": 5748 }, { "epoch": 1.3337199860805011, "grad_norm": 13.825585578950808, "learning_rate": 2e-06, "loss": 0.2251, "step": 5749 }, { "epoch": 1.3339519777288018, "grad_norm": 20.489808046635204, "learning_rate": 2e-06, "loss": 0.3068, "step": 5750 }, { "epoch": 1.3341839693771025, "grad_norm": 7.979473578310454, "learning_rate": 2e-06, "loss": 0.219, "step": 5751 }, { "epoch": 1.3344159610254032, "grad_norm": 20.741850635447804, "learning_rate": 2e-06, "loss": 0.2966, "step": 5752 }, { "epoch": 1.3346479526737038, "grad_norm": 14.487809285683062, "learning_rate": 2e-06, "loss": 0.2511, "step": 5753 }, { "epoch": 1.3348799443220045, "grad_norm": 13.953602449611058, "learning_rate": 2e-06, "loss": 0.2804, "step": 5754 }, { "epoch": 1.3351119359703052, "grad_norm": 17.240216254771312, "learning_rate": 2e-06, "loss": 0.1892, "step": 5755 }, { "epoch": 1.3353439276186057, "grad_norm": 10.47617287821396, "learning_rate": 2e-06, "loss": 0.1832, "step": 5756 }, { "epoch": 1.3355759192669063, "grad_norm": 17.52330908616922, "learning_rate": 2e-06, "loss": 0.2934, "step": 5757 }, { "epoch": 1.335807910915207, "grad_norm": 13.566061758419231, "learning_rate": 2e-06, "loss": 0.2273, "step": 5758 }, { "epoch": 1.3360399025635077, "grad_norm": 12.742318765067107, "learning_rate": 2e-06, "loss": 0.1997, "step": 5759 }, { "epoch": 1.3362718942118084, "grad_norm": 18.812041464643745, "learning_rate": 2e-06, "loss": 0.2981, "step": 5760 }, { "epoch": 1.336503885860109, "grad_norm": 7.438888234983939, "learning_rate": 2e-06, "loss": 0.1601, "step": 5761 }, { "epoch": 1.3367358775084097, "grad_norm": 16.70120584326876, "learning_rate": 2e-06, "loss": 0.2006, "step": 5762 }, { "epoch": 1.3369678691567104, "grad_norm": 18.450037872236187, "learning_rate": 2e-06, "loss": 0.2465, "step": 5763 }, { "epoch": 1.337199860805011, "grad_norm": 15.59582496370636, "learning_rate": 2e-06, "loss": 0.3092, "step": 5764 }, { "epoch": 1.3374318524533118, "grad_norm": 24.090622475541675, "learning_rate": 2e-06, "loss": 0.3752, "step": 5765 }, { "epoch": 1.3376638441016122, "grad_norm": 11.557989090391212, "learning_rate": 2e-06, "loss": 0.2061, "step": 5766 }, { "epoch": 1.3378958357499129, "grad_norm": 7.013857459223439, "learning_rate": 2e-06, "loss": 0.218, "step": 5767 }, { "epoch": 1.3381278273982136, "grad_norm": 10.23666384150274, "learning_rate": 2e-06, "loss": 0.1466, "step": 5768 }, { "epoch": 1.3383598190465142, "grad_norm": 11.148655360805588, "learning_rate": 2e-06, "loss": 0.1825, "step": 5769 }, { "epoch": 1.338591810694815, "grad_norm": 16.186729561676344, "learning_rate": 2e-06, "loss": 0.3968, "step": 5770 }, { "epoch": 1.3388238023431156, "grad_norm": 6.943969848029348, "learning_rate": 2e-06, "loss": 0.1478, "step": 5771 }, { "epoch": 1.3390557939914163, "grad_norm": 17.341373830915476, "learning_rate": 2e-06, "loss": 0.1947, "step": 5772 }, { "epoch": 1.339287785639717, "grad_norm": 24.482447956744732, "learning_rate": 2e-06, "loss": 0.3202, "step": 5773 }, { "epoch": 1.3395197772880176, "grad_norm": 21.39557609795782, "learning_rate": 2e-06, "loss": 0.3008, "step": 5774 }, { "epoch": 1.3397517689363183, "grad_norm": 6.321646081575424, "learning_rate": 2e-06, "loss": 0.18, "step": 5775 }, { "epoch": 1.339983760584619, "grad_norm": 14.757152685348238, "learning_rate": 2e-06, "loss": 0.3238, "step": 5776 }, { "epoch": 1.3402157522329197, "grad_norm": 12.278175383917478, "learning_rate": 2e-06, "loss": 0.2181, "step": 5777 }, { "epoch": 1.3404477438812203, "grad_norm": 12.23761349802958, "learning_rate": 2e-06, "loss": 0.3068, "step": 5778 }, { "epoch": 1.340679735529521, "grad_norm": 11.403333857518392, "learning_rate": 2e-06, "loss": 0.1405, "step": 5779 }, { "epoch": 1.3409117271778217, "grad_norm": 13.05117023683827, "learning_rate": 2e-06, "loss": 0.2465, "step": 5780 }, { "epoch": 1.3411437188261224, "grad_norm": 22.224875632817582, "learning_rate": 2e-06, "loss": 0.2594, "step": 5781 }, { "epoch": 1.341375710474423, "grad_norm": 20.353481270647716, "learning_rate": 2e-06, "loss": 0.3515, "step": 5782 }, { "epoch": 1.3416077021227235, "grad_norm": 12.682528250567298, "learning_rate": 2e-06, "loss": 0.1926, "step": 5783 }, { "epoch": 1.3418396937710242, "grad_norm": 28.57266479023652, "learning_rate": 2e-06, "loss": 0.2512, "step": 5784 }, { "epoch": 1.3420716854193249, "grad_norm": 16.909366137872915, "learning_rate": 2e-06, "loss": 0.2867, "step": 5785 }, { "epoch": 1.3423036770676255, "grad_norm": 13.171274976616166, "learning_rate": 2e-06, "loss": 0.2623, "step": 5786 }, { "epoch": 1.3425356687159262, "grad_norm": 8.080789224669203, "learning_rate": 2e-06, "loss": 0.1932, "step": 5787 }, { "epoch": 1.342767660364227, "grad_norm": 20.2668518242124, "learning_rate": 2e-06, "loss": 0.3434, "step": 5788 }, { "epoch": 1.3429996520125276, "grad_norm": 15.16901875665676, "learning_rate": 2e-06, "loss": 0.2457, "step": 5789 }, { "epoch": 1.3432316436608283, "grad_norm": 11.042566276296274, "learning_rate": 2e-06, "loss": 0.1578, "step": 5790 }, { "epoch": 1.343463635309129, "grad_norm": 10.584691156901858, "learning_rate": 2e-06, "loss": 0.2686, "step": 5791 }, { "epoch": 1.3436956269574296, "grad_norm": 13.434105792076489, "learning_rate": 2e-06, "loss": 0.3587, "step": 5792 }, { "epoch": 1.34392761860573, "grad_norm": 17.34269805708881, "learning_rate": 2e-06, "loss": 0.2988, "step": 5793 }, { "epoch": 1.3441596102540307, "grad_norm": 6.675450785331753, "learning_rate": 2e-06, "loss": 0.1739, "step": 5794 }, { "epoch": 1.3443916019023314, "grad_norm": 13.984086856770102, "learning_rate": 2e-06, "loss": 0.3053, "step": 5795 }, { "epoch": 1.344623593550632, "grad_norm": 19.1634289852142, "learning_rate": 2e-06, "loss": 0.3829, "step": 5796 }, { "epoch": 1.3448555851989328, "grad_norm": 11.212872422045528, "learning_rate": 2e-06, "loss": 0.2311, "step": 5797 }, { "epoch": 1.3450875768472335, "grad_norm": 8.339351003547979, "learning_rate": 2e-06, "loss": 0.186, "step": 5798 }, { "epoch": 1.3453195684955341, "grad_norm": 11.20007845000388, "learning_rate": 2e-06, "loss": 0.2126, "step": 5799 }, { "epoch": 1.3455515601438348, "grad_norm": 9.150632215768846, "learning_rate": 2e-06, "loss": 0.1566, "step": 5800 }, { "epoch": 1.3457835517921355, "grad_norm": 10.867733909146118, "learning_rate": 2e-06, "loss": 0.2175, "step": 5801 }, { "epoch": 1.3460155434404362, "grad_norm": 44.450382387130546, "learning_rate": 2e-06, "loss": 0.3324, "step": 5802 }, { "epoch": 1.3462475350887368, "grad_norm": 19.048516268670234, "learning_rate": 2e-06, "loss": 0.2678, "step": 5803 }, { "epoch": 1.3464795267370375, "grad_norm": 12.280476212517785, "learning_rate": 2e-06, "loss": 0.2765, "step": 5804 }, { "epoch": 1.3467115183853382, "grad_norm": 14.774793325255686, "learning_rate": 2e-06, "loss": 0.4255, "step": 5805 }, { "epoch": 1.3469435100336389, "grad_norm": 11.714254562758581, "learning_rate": 2e-06, "loss": 0.2319, "step": 5806 }, { "epoch": 1.3471755016819396, "grad_norm": 12.16881579986135, "learning_rate": 2e-06, "loss": 0.2334, "step": 5807 }, { "epoch": 1.3474074933302402, "grad_norm": 15.737049960037895, "learning_rate": 2e-06, "loss": 0.2389, "step": 5808 }, { "epoch": 1.3476394849785407, "grad_norm": 14.653130929599717, "learning_rate": 2e-06, "loss": 0.2888, "step": 5809 }, { "epoch": 1.3478714766268414, "grad_norm": 16.50927703121179, "learning_rate": 2e-06, "loss": 0.2738, "step": 5810 }, { "epoch": 1.348103468275142, "grad_norm": 13.745080652083896, "learning_rate": 2e-06, "loss": 0.2365, "step": 5811 }, { "epoch": 1.3483354599234427, "grad_norm": 18.49258799469895, "learning_rate": 2e-06, "loss": 0.3036, "step": 5812 }, { "epoch": 1.3485674515717434, "grad_norm": 18.51938083068741, "learning_rate": 2e-06, "loss": 0.2746, "step": 5813 }, { "epoch": 1.348799443220044, "grad_norm": 12.325727775873908, "learning_rate": 2e-06, "loss": 0.2631, "step": 5814 }, { "epoch": 1.3490314348683448, "grad_norm": 17.84369574907302, "learning_rate": 2e-06, "loss": 0.3388, "step": 5815 }, { "epoch": 1.3492634265166454, "grad_norm": 13.687209491713253, "learning_rate": 2e-06, "loss": 0.1935, "step": 5816 }, { "epoch": 1.3494954181649461, "grad_norm": 16.68203773438771, "learning_rate": 2e-06, "loss": 0.2844, "step": 5817 }, { "epoch": 1.3497274098132468, "grad_norm": 9.734625692979442, "learning_rate": 2e-06, "loss": 0.2316, "step": 5818 }, { "epoch": 1.3499594014615472, "grad_norm": 17.018315278088533, "learning_rate": 2e-06, "loss": 0.2562, "step": 5819 }, { "epoch": 1.350191393109848, "grad_norm": 5.075900093156938, "learning_rate": 2e-06, "loss": 0.1322, "step": 5820 }, { "epoch": 1.3504233847581486, "grad_norm": 12.15002342495086, "learning_rate": 2e-06, "loss": 0.1723, "step": 5821 }, { "epoch": 1.3506553764064493, "grad_norm": 13.438205754472456, "learning_rate": 2e-06, "loss": 0.2599, "step": 5822 }, { "epoch": 1.35088736805475, "grad_norm": 14.590596311024154, "learning_rate": 2e-06, "loss": 0.2037, "step": 5823 }, { "epoch": 1.3511193597030506, "grad_norm": 11.914815783448612, "learning_rate": 2e-06, "loss": 0.1507, "step": 5824 }, { "epoch": 1.3513513513513513, "grad_norm": 11.406277237176791, "learning_rate": 2e-06, "loss": 0.1968, "step": 5825 }, { "epoch": 1.351583342999652, "grad_norm": 23.409525749652285, "learning_rate": 2e-06, "loss": 0.2491, "step": 5826 }, { "epoch": 1.3518153346479527, "grad_norm": 10.095249207393666, "learning_rate": 2e-06, "loss": 0.1637, "step": 5827 }, { "epoch": 1.3520473262962533, "grad_norm": 11.506175779660916, "learning_rate": 2e-06, "loss": 0.2528, "step": 5828 }, { "epoch": 1.352279317944554, "grad_norm": 15.188116430508073, "learning_rate": 2e-06, "loss": 0.3157, "step": 5829 }, { "epoch": 1.3525113095928547, "grad_norm": 10.428369062323641, "learning_rate": 2e-06, "loss": 0.1897, "step": 5830 }, { "epoch": 1.3527433012411554, "grad_norm": 11.703161185958193, "learning_rate": 2e-06, "loss": 0.2059, "step": 5831 }, { "epoch": 1.352975292889456, "grad_norm": 18.495688046319522, "learning_rate": 2e-06, "loss": 0.2096, "step": 5832 }, { "epoch": 1.3532072845377567, "grad_norm": 15.077754673818836, "learning_rate": 2e-06, "loss": 0.1848, "step": 5833 }, { "epoch": 1.3534392761860574, "grad_norm": 8.695499011423294, "learning_rate": 2e-06, "loss": 0.1427, "step": 5834 }, { "epoch": 1.353671267834358, "grad_norm": 10.649967194948475, "learning_rate": 2e-06, "loss": 0.2084, "step": 5835 }, { "epoch": 1.3539032594826585, "grad_norm": 9.647015626245473, "learning_rate": 2e-06, "loss": 0.164, "step": 5836 }, { "epoch": 1.3541352511309592, "grad_norm": 18.321975887396846, "learning_rate": 2e-06, "loss": 0.2208, "step": 5837 }, { "epoch": 1.35436724277926, "grad_norm": 12.6264369995146, "learning_rate": 2e-06, "loss": 0.2561, "step": 5838 }, { "epoch": 1.3545992344275606, "grad_norm": 13.3140753106527, "learning_rate": 2e-06, "loss": 0.271, "step": 5839 }, { "epoch": 1.3548312260758613, "grad_norm": 18.567865143723125, "learning_rate": 2e-06, "loss": 0.3481, "step": 5840 }, { "epoch": 1.355063217724162, "grad_norm": 12.737075327613965, "learning_rate": 2e-06, "loss": 0.4176, "step": 5841 }, { "epoch": 1.3552952093724626, "grad_norm": 11.288649162226255, "learning_rate": 2e-06, "loss": 0.2044, "step": 5842 }, { "epoch": 1.3555272010207633, "grad_norm": 18.55176280025957, "learning_rate": 2e-06, "loss": 0.2101, "step": 5843 }, { "epoch": 1.355759192669064, "grad_norm": 17.54094405996478, "learning_rate": 2e-06, "loss": 0.2408, "step": 5844 }, { "epoch": 1.3559911843173647, "grad_norm": 16.9841331544675, "learning_rate": 2e-06, "loss": 0.3381, "step": 5845 }, { "epoch": 1.356223175965665, "grad_norm": 19.49676791705497, "learning_rate": 2e-06, "loss": 0.2199, "step": 5846 }, { "epoch": 1.3564551676139658, "grad_norm": 14.364415926692745, "learning_rate": 2e-06, "loss": 0.1998, "step": 5847 }, { "epoch": 1.3566871592622665, "grad_norm": 13.325930038440983, "learning_rate": 2e-06, "loss": 0.1628, "step": 5848 }, { "epoch": 1.3569191509105671, "grad_norm": 23.459217434270393, "learning_rate": 2e-06, "loss": 0.2282, "step": 5849 }, { "epoch": 1.3571511425588678, "grad_norm": 8.778681303386438, "learning_rate": 2e-06, "loss": 0.153, "step": 5850 }, { "epoch": 1.3573831342071685, "grad_norm": 18.54260424822745, "learning_rate": 2e-06, "loss": 0.309, "step": 5851 }, { "epoch": 1.3576151258554692, "grad_norm": 15.49191454628838, "learning_rate": 2e-06, "loss": 0.3286, "step": 5852 }, { "epoch": 1.3578471175037699, "grad_norm": 12.041053783427783, "learning_rate": 2e-06, "loss": 0.2258, "step": 5853 }, { "epoch": 1.3580791091520705, "grad_norm": 22.61898634773694, "learning_rate": 2e-06, "loss": 0.298, "step": 5854 }, { "epoch": 1.3583111008003712, "grad_norm": 4.671569771383828, "learning_rate": 2e-06, "loss": 0.1256, "step": 5855 }, { "epoch": 1.3585430924486719, "grad_norm": 28.314641736645186, "learning_rate": 2e-06, "loss": 0.2476, "step": 5856 }, { "epoch": 1.3587750840969726, "grad_norm": 17.632781361738424, "learning_rate": 2e-06, "loss": 0.2985, "step": 5857 }, { "epoch": 1.3590070757452732, "grad_norm": 8.707190057399295, "learning_rate": 2e-06, "loss": 0.2273, "step": 5858 }, { "epoch": 1.359239067393574, "grad_norm": 10.346660254268741, "learning_rate": 2e-06, "loss": 0.1795, "step": 5859 }, { "epoch": 1.3594710590418746, "grad_norm": 23.020950583167213, "learning_rate": 2e-06, "loss": 0.3505, "step": 5860 }, { "epoch": 1.3597030506901753, "grad_norm": 13.438837661679297, "learning_rate": 2e-06, "loss": 0.2115, "step": 5861 }, { "epoch": 1.359935042338476, "grad_norm": 11.591111693578771, "learning_rate": 2e-06, "loss": 0.1497, "step": 5862 }, { "epoch": 1.3601670339867764, "grad_norm": 13.92468760116834, "learning_rate": 2e-06, "loss": 0.3266, "step": 5863 }, { "epoch": 1.360399025635077, "grad_norm": 11.622340864961435, "learning_rate": 2e-06, "loss": 0.2338, "step": 5864 }, { "epoch": 1.3606310172833778, "grad_norm": 18.978722409368135, "learning_rate": 2e-06, "loss": 0.2449, "step": 5865 }, { "epoch": 1.3608630089316784, "grad_norm": 9.71805799882756, "learning_rate": 2e-06, "loss": 0.1905, "step": 5866 }, { "epoch": 1.3610950005799791, "grad_norm": 16.208738621497222, "learning_rate": 2e-06, "loss": 0.233, "step": 5867 }, { "epoch": 1.3613269922282798, "grad_norm": 13.70124124125462, "learning_rate": 2e-06, "loss": 0.2076, "step": 5868 }, { "epoch": 1.3615589838765805, "grad_norm": 13.063859874737823, "learning_rate": 2e-06, "loss": 0.2459, "step": 5869 }, { "epoch": 1.3617909755248812, "grad_norm": 8.165669539795408, "learning_rate": 2e-06, "loss": 0.1647, "step": 5870 }, { "epoch": 1.3620229671731818, "grad_norm": 11.781677616521952, "learning_rate": 2e-06, "loss": 0.274, "step": 5871 }, { "epoch": 1.3622549588214825, "grad_norm": 9.778774987540954, "learning_rate": 2e-06, "loss": 0.1572, "step": 5872 }, { "epoch": 1.362486950469783, "grad_norm": 20.766841055575046, "learning_rate": 2e-06, "loss": 0.2041, "step": 5873 }, { "epoch": 1.3627189421180836, "grad_norm": 11.59339540161316, "learning_rate": 2e-06, "loss": 0.2524, "step": 5874 }, { "epoch": 1.3629509337663843, "grad_norm": 12.47148805393732, "learning_rate": 2e-06, "loss": 0.242, "step": 5875 }, { "epoch": 1.363182925414685, "grad_norm": 10.960900356092315, "learning_rate": 2e-06, "loss": 0.1566, "step": 5876 }, { "epoch": 1.3634149170629857, "grad_norm": 14.772461356180653, "learning_rate": 2e-06, "loss": 0.2168, "step": 5877 }, { "epoch": 1.3636469087112864, "grad_norm": 17.572173076188516, "learning_rate": 2e-06, "loss": 0.2988, "step": 5878 }, { "epoch": 1.363878900359587, "grad_norm": 9.980374339519635, "learning_rate": 2e-06, "loss": 0.2, "step": 5879 }, { "epoch": 1.3641108920078877, "grad_norm": 16.310899671151198, "learning_rate": 2e-06, "loss": 0.157, "step": 5880 }, { "epoch": 1.3643428836561884, "grad_norm": 15.22384696178038, "learning_rate": 2e-06, "loss": 0.2674, "step": 5881 }, { "epoch": 1.364574875304489, "grad_norm": 113.19316199239812, "learning_rate": 2e-06, "loss": 0.3356, "step": 5882 }, { "epoch": 1.3648068669527897, "grad_norm": 14.031477531072245, "learning_rate": 2e-06, "loss": 0.3136, "step": 5883 }, { "epoch": 1.3650388586010904, "grad_norm": 22.121313316789323, "learning_rate": 2e-06, "loss": 0.1891, "step": 5884 }, { "epoch": 1.365270850249391, "grad_norm": 17.825567248921306, "learning_rate": 2e-06, "loss": 0.3541, "step": 5885 }, { "epoch": 1.3655028418976918, "grad_norm": 19.069180568099014, "learning_rate": 2e-06, "loss": 0.3158, "step": 5886 }, { "epoch": 1.3657348335459925, "grad_norm": 25.075437472856258, "learning_rate": 2e-06, "loss": 0.4052, "step": 5887 }, { "epoch": 1.3659668251942931, "grad_norm": 9.977701700428549, "learning_rate": 2e-06, "loss": 0.181, "step": 5888 }, { "epoch": 1.3661988168425936, "grad_norm": 17.59684848990075, "learning_rate": 2e-06, "loss": 0.2857, "step": 5889 }, { "epoch": 1.3664308084908943, "grad_norm": 15.09816100180224, "learning_rate": 2e-06, "loss": 0.3539, "step": 5890 }, { "epoch": 1.366662800139195, "grad_norm": 10.943892970975487, "learning_rate": 2e-06, "loss": 0.1357, "step": 5891 }, { "epoch": 1.3668947917874956, "grad_norm": 15.689614080268415, "learning_rate": 2e-06, "loss": 0.275, "step": 5892 }, { "epoch": 1.3671267834357963, "grad_norm": 17.654429430044164, "learning_rate": 2e-06, "loss": 0.2506, "step": 5893 }, { "epoch": 1.367358775084097, "grad_norm": 18.689276228543932, "learning_rate": 2e-06, "loss": 0.1976, "step": 5894 }, { "epoch": 1.3675907667323977, "grad_norm": 18.938131307283825, "learning_rate": 2e-06, "loss": 0.3159, "step": 5895 }, { "epoch": 1.3678227583806983, "grad_norm": 18.188342876250907, "learning_rate": 2e-06, "loss": 0.2705, "step": 5896 }, { "epoch": 1.368054750028999, "grad_norm": 19.57321528336182, "learning_rate": 2e-06, "loss": 0.263, "step": 5897 }, { "epoch": 1.3682867416772997, "grad_norm": 12.626901209943034, "learning_rate": 2e-06, "loss": 0.2686, "step": 5898 }, { "epoch": 1.3685187333256001, "grad_norm": 16.377096327836597, "learning_rate": 2e-06, "loss": 0.2324, "step": 5899 }, { "epoch": 1.3687507249739008, "grad_norm": 10.146682101900355, "learning_rate": 2e-06, "loss": 0.1951, "step": 5900 }, { "epoch": 1.3689827166222015, "grad_norm": 9.085444222802984, "learning_rate": 2e-06, "loss": 0.2103, "step": 5901 }, { "epoch": 1.3692147082705022, "grad_norm": 13.078879458322199, "learning_rate": 2e-06, "loss": 0.1458, "step": 5902 }, { "epoch": 1.3694466999188029, "grad_norm": 22.771698063467927, "learning_rate": 2e-06, "loss": 0.3165, "step": 5903 }, { "epoch": 1.3696786915671035, "grad_norm": 12.684698649225055, "learning_rate": 2e-06, "loss": 0.2419, "step": 5904 }, { "epoch": 1.3699106832154042, "grad_norm": 18.780053747838103, "learning_rate": 2e-06, "loss": 0.3131, "step": 5905 }, { "epoch": 1.370142674863705, "grad_norm": 13.218927833905115, "learning_rate": 2e-06, "loss": 0.1716, "step": 5906 }, { "epoch": 1.3703746665120056, "grad_norm": 12.99323013996737, "learning_rate": 2e-06, "loss": 0.3394, "step": 5907 }, { "epoch": 1.3706066581603062, "grad_norm": 14.975069838383702, "learning_rate": 2e-06, "loss": 0.2689, "step": 5908 }, { "epoch": 1.370838649808607, "grad_norm": 13.194842859328135, "learning_rate": 2e-06, "loss": 0.255, "step": 5909 }, { "epoch": 1.3710706414569076, "grad_norm": 13.572338888548854, "learning_rate": 2e-06, "loss": 0.2105, "step": 5910 }, { "epoch": 1.3713026331052083, "grad_norm": 9.995146313684502, "learning_rate": 2e-06, "loss": 0.2161, "step": 5911 }, { "epoch": 1.371534624753509, "grad_norm": 10.08923809195754, "learning_rate": 2e-06, "loss": 0.2171, "step": 5912 }, { "epoch": 1.3717666164018096, "grad_norm": 26.709924443854867, "learning_rate": 2e-06, "loss": 0.3202, "step": 5913 }, { "epoch": 1.3719986080501103, "grad_norm": 12.308014395977608, "learning_rate": 2e-06, "loss": 0.2215, "step": 5914 }, { "epoch": 1.372230599698411, "grad_norm": 13.05156012354374, "learning_rate": 2e-06, "loss": 0.2219, "step": 5915 }, { "epoch": 1.3724625913467114, "grad_norm": 15.342982310766626, "learning_rate": 2e-06, "loss": 0.2504, "step": 5916 }, { "epoch": 1.3726945829950121, "grad_norm": 17.12977581566977, "learning_rate": 2e-06, "loss": 0.3918, "step": 5917 }, { "epoch": 1.3729265746433128, "grad_norm": 10.564058055931197, "learning_rate": 2e-06, "loss": 0.2486, "step": 5918 }, { "epoch": 1.3731585662916135, "grad_norm": 14.801827927834955, "learning_rate": 2e-06, "loss": 0.2071, "step": 5919 }, { "epoch": 1.3733905579399142, "grad_norm": 19.601648310112644, "learning_rate": 2e-06, "loss": 0.3412, "step": 5920 }, { "epoch": 1.3736225495882148, "grad_norm": 14.214701362197474, "learning_rate": 2e-06, "loss": 0.1959, "step": 5921 }, { "epoch": 1.3738545412365155, "grad_norm": 12.208764144329555, "learning_rate": 2e-06, "loss": 0.3126, "step": 5922 }, { "epoch": 1.3740865328848162, "grad_norm": 17.16152183037491, "learning_rate": 2e-06, "loss": 0.3282, "step": 5923 }, { "epoch": 1.3743185245331169, "grad_norm": 15.279736880276335, "learning_rate": 2e-06, "loss": 0.2844, "step": 5924 }, { "epoch": 1.3745505161814175, "grad_norm": 12.674344871267387, "learning_rate": 2e-06, "loss": 0.2018, "step": 5925 }, { "epoch": 1.374782507829718, "grad_norm": 10.83384890181781, "learning_rate": 2e-06, "loss": 0.2479, "step": 5926 }, { "epoch": 1.3750144994780187, "grad_norm": 12.041522272764032, "learning_rate": 2e-06, "loss": 0.2226, "step": 5927 }, { "epoch": 1.3752464911263194, "grad_norm": 13.700646292901698, "learning_rate": 2e-06, "loss": 0.284, "step": 5928 }, { "epoch": 1.37547848277462, "grad_norm": 16.9939098026401, "learning_rate": 2e-06, "loss": 0.2319, "step": 5929 }, { "epoch": 1.3757104744229207, "grad_norm": 12.29850103313542, "learning_rate": 2e-06, "loss": 0.2211, "step": 5930 }, { "epoch": 1.3759424660712214, "grad_norm": 22.822665379374467, "learning_rate": 2e-06, "loss": 0.1982, "step": 5931 }, { "epoch": 1.376174457719522, "grad_norm": 13.545598501814286, "learning_rate": 2e-06, "loss": 0.187, "step": 5932 }, { "epoch": 1.3764064493678227, "grad_norm": 12.914616833005294, "learning_rate": 2e-06, "loss": 0.2556, "step": 5933 }, { "epoch": 1.3766384410161234, "grad_norm": 21.749768835773786, "learning_rate": 2e-06, "loss": 0.2858, "step": 5934 }, { "epoch": 1.376870432664424, "grad_norm": 22.098796702958314, "learning_rate": 2e-06, "loss": 0.3402, "step": 5935 }, { "epoch": 1.3771024243127248, "grad_norm": 21.733880085073434, "learning_rate": 2e-06, "loss": 0.3829, "step": 5936 }, { "epoch": 1.3773344159610255, "grad_norm": 8.488122175170245, "learning_rate": 2e-06, "loss": 0.1605, "step": 5937 }, { "epoch": 1.3775664076093261, "grad_norm": 12.911399378010156, "learning_rate": 2e-06, "loss": 0.2086, "step": 5938 }, { "epoch": 1.3777983992576268, "grad_norm": 28.886112837195807, "learning_rate": 2e-06, "loss": 0.4906, "step": 5939 }, { "epoch": 1.3780303909059275, "grad_norm": 21.59522435818442, "learning_rate": 2e-06, "loss": 0.2664, "step": 5940 }, { "epoch": 1.3782623825542282, "grad_norm": 10.076643720998616, "learning_rate": 2e-06, "loss": 0.2323, "step": 5941 }, { "epoch": 1.3784943742025286, "grad_norm": 13.516919113099314, "learning_rate": 2e-06, "loss": 0.3002, "step": 5942 }, { "epoch": 1.3787263658508293, "grad_norm": 8.643098284175489, "learning_rate": 2e-06, "loss": 0.216, "step": 5943 }, { "epoch": 1.37895835749913, "grad_norm": 14.300103934873006, "learning_rate": 2e-06, "loss": 0.2819, "step": 5944 }, { "epoch": 1.3791903491474307, "grad_norm": 12.466558909522385, "learning_rate": 2e-06, "loss": 0.2555, "step": 5945 }, { "epoch": 1.3794223407957313, "grad_norm": 8.138712893736352, "learning_rate": 2e-06, "loss": 0.2659, "step": 5946 }, { "epoch": 1.379654332444032, "grad_norm": 12.649915403183721, "learning_rate": 2e-06, "loss": 0.2405, "step": 5947 }, { "epoch": 1.3798863240923327, "grad_norm": 9.271014937152602, "learning_rate": 2e-06, "loss": 0.1644, "step": 5948 }, { "epoch": 1.3801183157406334, "grad_norm": 12.106163458210577, "learning_rate": 2e-06, "loss": 0.2074, "step": 5949 }, { "epoch": 1.380350307388934, "grad_norm": 14.840048188527057, "learning_rate": 2e-06, "loss": 0.227, "step": 5950 }, { "epoch": 1.3805822990372347, "grad_norm": 15.092469306811585, "learning_rate": 2e-06, "loss": 0.3458, "step": 5951 }, { "epoch": 1.3808142906855352, "grad_norm": 18.896819046822152, "learning_rate": 2e-06, "loss": 0.3011, "step": 5952 }, { "epoch": 1.3810462823338359, "grad_norm": 12.53153675981379, "learning_rate": 2e-06, "loss": 0.3175, "step": 5953 }, { "epoch": 1.3812782739821365, "grad_norm": 12.148576689161318, "learning_rate": 2e-06, "loss": 0.2515, "step": 5954 }, { "epoch": 1.3815102656304372, "grad_norm": 17.49484792170981, "learning_rate": 2e-06, "loss": 0.2591, "step": 5955 }, { "epoch": 1.381742257278738, "grad_norm": 10.394134652681377, "learning_rate": 2e-06, "loss": 0.2057, "step": 5956 }, { "epoch": 1.3819742489270386, "grad_norm": 29.127601288530386, "learning_rate": 2e-06, "loss": 0.4277, "step": 5957 }, { "epoch": 1.3822062405753393, "grad_norm": 22.843391511775554, "learning_rate": 2e-06, "loss": 0.3193, "step": 5958 }, { "epoch": 1.38243823222364, "grad_norm": 5.510142521551222, "learning_rate": 2e-06, "loss": 0.1484, "step": 5959 }, { "epoch": 1.3826702238719406, "grad_norm": 11.396703033934903, "learning_rate": 2e-06, "loss": 0.2945, "step": 5960 }, { "epoch": 1.3829022155202413, "grad_norm": 9.600664042933897, "learning_rate": 2e-06, "loss": 0.1599, "step": 5961 }, { "epoch": 1.383134207168542, "grad_norm": 5.892640207145712, "learning_rate": 2e-06, "loss": 0.1359, "step": 5962 }, { "epoch": 1.3833661988168426, "grad_norm": 20.277757054717135, "learning_rate": 2e-06, "loss": 0.2424, "step": 5963 }, { "epoch": 1.3835981904651433, "grad_norm": 10.898832193056212, "learning_rate": 2e-06, "loss": 0.1662, "step": 5964 }, { "epoch": 1.383830182113444, "grad_norm": 19.88410341780845, "learning_rate": 2e-06, "loss": 0.2381, "step": 5965 }, { "epoch": 1.3840621737617447, "grad_norm": 20.916354990874595, "learning_rate": 2e-06, "loss": 0.4176, "step": 5966 }, { "epoch": 1.3842941654100454, "grad_norm": 12.718636226829762, "learning_rate": 2e-06, "loss": 0.1831, "step": 5967 }, { "epoch": 1.384526157058346, "grad_norm": 15.40648006298018, "learning_rate": 2e-06, "loss": 0.2073, "step": 5968 }, { "epoch": 1.3847581487066465, "grad_norm": 14.03245463978515, "learning_rate": 2e-06, "loss": 0.2025, "step": 5969 }, { "epoch": 1.3849901403549472, "grad_norm": 23.08067780388722, "learning_rate": 2e-06, "loss": 0.3673, "step": 5970 }, { "epoch": 1.3852221320032478, "grad_norm": 8.710463900190048, "learning_rate": 2e-06, "loss": 0.1394, "step": 5971 }, { "epoch": 1.3854541236515485, "grad_norm": 19.634103965805803, "learning_rate": 2e-06, "loss": 0.3047, "step": 5972 }, { "epoch": 1.3856861152998492, "grad_norm": 9.507533107547227, "learning_rate": 2e-06, "loss": 0.1999, "step": 5973 }, { "epoch": 1.3859181069481499, "grad_norm": 17.26108100615538, "learning_rate": 2e-06, "loss": 0.3609, "step": 5974 }, { "epoch": 1.3861500985964506, "grad_norm": 11.200525491341423, "learning_rate": 2e-06, "loss": 0.2384, "step": 5975 }, { "epoch": 1.3863820902447512, "grad_norm": 26.638142154059025, "learning_rate": 2e-06, "loss": 0.4838, "step": 5976 }, { "epoch": 1.386614081893052, "grad_norm": 9.526430529695023, "learning_rate": 2e-06, "loss": 0.3479, "step": 5977 }, { "epoch": 1.3868460735413526, "grad_norm": 11.526083961069686, "learning_rate": 2e-06, "loss": 0.2981, "step": 5978 }, { "epoch": 1.387078065189653, "grad_norm": 18.60022661873739, "learning_rate": 2e-06, "loss": 0.2312, "step": 5979 }, { "epoch": 1.3873100568379537, "grad_norm": 12.776358544049172, "learning_rate": 2e-06, "loss": 0.2869, "step": 5980 }, { "epoch": 1.3875420484862544, "grad_norm": 6.80341080091959, "learning_rate": 2e-06, "loss": 0.1541, "step": 5981 }, { "epoch": 1.387774040134555, "grad_norm": 30.41245426392083, "learning_rate": 2e-06, "loss": 0.3814, "step": 5982 }, { "epoch": 1.3880060317828558, "grad_norm": 8.694938302800548, "learning_rate": 2e-06, "loss": 0.2095, "step": 5983 }, { "epoch": 1.3882380234311564, "grad_norm": 17.115266860722507, "learning_rate": 2e-06, "loss": 0.244, "step": 5984 }, { "epoch": 1.388470015079457, "grad_norm": 11.428928951768523, "learning_rate": 2e-06, "loss": 0.2025, "step": 5985 }, { "epoch": 1.3887020067277578, "grad_norm": 7.287599703203382, "learning_rate": 2e-06, "loss": 0.136, "step": 5986 }, { "epoch": 1.3889339983760585, "grad_norm": 22.706310929356366, "learning_rate": 2e-06, "loss": 0.2294, "step": 5987 }, { "epoch": 1.3891659900243591, "grad_norm": 13.099589997732332, "learning_rate": 2e-06, "loss": 0.2546, "step": 5988 }, { "epoch": 1.3893979816726598, "grad_norm": 7.785644395983943, "learning_rate": 2e-06, "loss": 0.191, "step": 5989 }, { "epoch": 1.3896299733209605, "grad_norm": 16.68842225821719, "learning_rate": 2e-06, "loss": 0.3427, "step": 5990 }, { "epoch": 1.3898619649692612, "grad_norm": 14.016036584118243, "learning_rate": 2e-06, "loss": 0.2749, "step": 5991 }, { "epoch": 1.3900939566175619, "grad_norm": 7.988120441251208, "learning_rate": 2e-06, "loss": 0.1942, "step": 5992 }, { "epoch": 1.3903259482658625, "grad_norm": 112.7683572465281, "learning_rate": 2e-06, "loss": 0.3115, "step": 5993 }, { "epoch": 1.3905579399141632, "grad_norm": 42.66441929587144, "learning_rate": 2e-06, "loss": 0.2085, "step": 5994 }, { "epoch": 1.3907899315624639, "grad_norm": 10.947839024667601, "learning_rate": 2e-06, "loss": 0.2289, "step": 5995 }, { "epoch": 1.3910219232107643, "grad_norm": 13.246445827515094, "learning_rate": 2e-06, "loss": 0.2489, "step": 5996 }, { "epoch": 1.391253914859065, "grad_norm": 17.01408986194027, "learning_rate": 2e-06, "loss": 0.2695, "step": 5997 }, { "epoch": 1.3914859065073657, "grad_norm": 9.757210001235624, "learning_rate": 2e-06, "loss": 0.215, "step": 5998 }, { "epoch": 1.3917178981556664, "grad_norm": 14.177056956242843, "learning_rate": 2e-06, "loss": 0.2778, "step": 5999 }, { "epoch": 1.391949889803967, "grad_norm": 5.241533037856061, "learning_rate": 2e-06, "loss": 0.149, "step": 6000 }, { "epoch": 1.3921818814522677, "grad_norm": 11.535395075466417, "learning_rate": 2e-06, "loss": 0.2143, "step": 6001 }, { "epoch": 1.3924138731005684, "grad_norm": 10.4332883563328, "learning_rate": 2e-06, "loss": 0.183, "step": 6002 }, { "epoch": 1.392645864748869, "grad_norm": 6.525121061567562, "learning_rate": 2e-06, "loss": 0.1406, "step": 6003 }, { "epoch": 1.3928778563971698, "grad_norm": 9.429201567332692, "learning_rate": 2e-06, "loss": 0.1821, "step": 6004 }, { "epoch": 1.3931098480454704, "grad_norm": 12.268882730046338, "learning_rate": 2e-06, "loss": 0.2193, "step": 6005 }, { "epoch": 1.393341839693771, "grad_norm": 21.003497758566507, "learning_rate": 2e-06, "loss": 0.3852, "step": 6006 }, { "epoch": 1.3935738313420716, "grad_norm": 17.83956602871701, "learning_rate": 2e-06, "loss": 0.323, "step": 6007 }, { "epoch": 1.3938058229903723, "grad_norm": 10.34881103448905, "learning_rate": 2e-06, "loss": 0.2825, "step": 6008 }, { "epoch": 1.394037814638673, "grad_norm": 14.011766669465839, "learning_rate": 2e-06, "loss": 0.2148, "step": 6009 }, { "epoch": 1.3942698062869736, "grad_norm": 19.869740415845733, "learning_rate": 2e-06, "loss": 0.394, "step": 6010 }, { "epoch": 1.3945017979352743, "grad_norm": 18.403769506198863, "learning_rate": 2e-06, "loss": 0.3615, "step": 6011 }, { "epoch": 1.394733789583575, "grad_norm": 7.7733595799105615, "learning_rate": 2e-06, "loss": 0.2009, "step": 6012 }, { "epoch": 1.3949657812318756, "grad_norm": 7.321632491742111, "learning_rate": 2e-06, "loss": 0.1419, "step": 6013 }, { "epoch": 1.3951977728801763, "grad_norm": 9.675069354228755, "learning_rate": 2e-06, "loss": 0.1768, "step": 6014 }, { "epoch": 1.395429764528477, "grad_norm": 17.28073435681396, "learning_rate": 2e-06, "loss": 0.2887, "step": 6015 }, { "epoch": 1.3956617561767777, "grad_norm": 9.356232891758404, "learning_rate": 2e-06, "loss": 0.2237, "step": 6016 }, { "epoch": 1.3958937478250784, "grad_norm": 12.157659270560018, "learning_rate": 2e-06, "loss": 0.1606, "step": 6017 }, { "epoch": 1.396125739473379, "grad_norm": 22.129004589054688, "learning_rate": 2e-06, "loss": 0.307, "step": 6018 }, { "epoch": 1.3963577311216797, "grad_norm": 32.36328986876033, "learning_rate": 2e-06, "loss": 0.4857, "step": 6019 }, { "epoch": 1.3965897227699804, "grad_norm": 19.052967549651004, "learning_rate": 2e-06, "loss": 0.3291, "step": 6020 }, { "epoch": 1.396821714418281, "grad_norm": 23.30684610747512, "learning_rate": 2e-06, "loss": 0.2642, "step": 6021 }, { "epoch": 1.3970537060665815, "grad_norm": 13.054309580640513, "learning_rate": 2e-06, "loss": 0.2567, "step": 6022 }, { "epoch": 1.3972856977148822, "grad_norm": 11.28301974377214, "learning_rate": 2e-06, "loss": 0.2084, "step": 6023 }, { "epoch": 1.3975176893631829, "grad_norm": 19.094865999095894, "learning_rate": 2e-06, "loss": 0.4077, "step": 6024 }, { "epoch": 1.3977496810114836, "grad_norm": 20.856946893139874, "learning_rate": 2e-06, "loss": 0.2084, "step": 6025 }, { "epoch": 1.3979816726597842, "grad_norm": 9.45025107913317, "learning_rate": 2e-06, "loss": 0.2486, "step": 6026 }, { "epoch": 1.398213664308085, "grad_norm": 15.203941180895525, "learning_rate": 2e-06, "loss": 0.1621, "step": 6027 }, { "epoch": 1.3984456559563856, "grad_norm": 6.03861772807484, "learning_rate": 2e-06, "loss": 0.1304, "step": 6028 }, { "epoch": 1.3986776476046863, "grad_norm": 17.018859853107013, "learning_rate": 2e-06, "loss": 0.3151, "step": 6029 }, { "epoch": 1.398909639252987, "grad_norm": 15.165568168648043, "learning_rate": 2e-06, "loss": 0.2561, "step": 6030 }, { "epoch": 1.3991416309012876, "grad_norm": 13.53577123662282, "learning_rate": 2e-06, "loss": 0.2988, "step": 6031 }, { "epoch": 1.399373622549588, "grad_norm": 14.900819174481283, "learning_rate": 2e-06, "loss": 0.2946, "step": 6032 }, { "epoch": 1.3996056141978888, "grad_norm": 7.993752940333517, "learning_rate": 2e-06, "loss": 0.2056, "step": 6033 }, { "epoch": 1.3998376058461894, "grad_norm": 22.356467342587003, "learning_rate": 2e-06, "loss": 0.1956, "step": 6034 }, { "epoch": 1.4000695974944901, "grad_norm": 14.683958717312493, "learning_rate": 2e-06, "loss": 0.1932, "step": 6035 }, { "epoch": 1.4003015891427908, "grad_norm": 12.143973408945179, "learning_rate": 2e-06, "loss": 0.2515, "step": 6036 }, { "epoch": 1.4005335807910915, "grad_norm": 11.753610537279906, "learning_rate": 2e-06, "loss": 0.2235, "step": 6037 }, { "epoch": 1.4007655724393921, "grad_norm": 22.482569340622312, "learning_rate": 2e-06, "loss": 0.2757, "step": 6038 }, { "epoch": 1.4009975640876928, "grad_norm": 10.124105869305984, "learning_rate": 2e-06, "loss": 0.2144, "step": 6039 }, { "epoch": 1.4012295557359935, "grad_norm": 9.48123463264121, "learning_rate": 2e-06, "loss": 0.1909, "step": 6040 }, { "epoch": 1.4014615473842942, "grad_norm": 6.375181245128169, "learning_rate": 2e-06, "loss": 0.1122, "step": 6041 }, { "epoch": 1.4016935390325949, "grad_norm": 25.529223668853717, "learning_rate": 2e-06, "loss": 0.3305, "step": 6042 }, { "epoch": 1.4019255306808955, "grad_norm": 15.821112574407275, "learning_rate": 2e-06, "loss": 0.3286, "step": 6043 }, { "epoch": 1.4021575223291962, "grad_norm": 9.7919902421017, "learning_rate": 2e-06, "loss": 0.1983, "step": 6044 }, { "epoch": 1.402389513977497, "grad_norm": 47.05440000845356, "learning_rate": 2e-06, "loss": 0.2881, "step": 6045 }, { "epoch": 1.4026215056257976, "grad_norm": 15.433693071590781, "learning_rate": 2e-06, "loss": 0.2866, "step": 6046 }, { "epoch": 1.4028534972740982, "grad_norm": 19.2840330067713, "learning_rate": 2e-06, "loss": 0.413, "step": 6047 }, { "epoch": 1.403085488922399, "grad_norm": 15.715282873660984, "learning_rate": 2e-06, "loss": 0.3135, "step": 6048 }, { "epoch": 1.4033174805706994, "grad_norm": 12.690754735658464, "learning_rate": 2e-06, "loss": 0.1997, "step": 6049 }, { "epoch": 1.403549472219, "grad_norm": 17.86836759439439, "learning_rate": 2e-06, "loss": 0.3227, "step": 6050 }, { "epoch": 1.4037814638673007, "grad_norm": 20.685699119431618, "learning_rate": 2e-06, "loss": 0.2731, "step": 6051 }, { "epoch": 1.4040134555156014, "grad_norm": 14.641062692101555, "learning_rate": 2e-06, "loss": 0.2976, "step": 6052 }, { "epoch": 1.404245447163902, "grad_norm": 7.732580108107947, "learning_rate": 2e-06, "loss": 0.1796, "step": 6053 }, { "epoch": 1.4044774388122028, "grad_norm": 18.032129481542793, "learning_rate": 2e-06, "loss": 0.3029, "step": 6054 }, { "epoch": 1.4047094304605034, "grad_norm": 14.180763869770052, "learning_rate": 2e-06, "loss": 0.2168, "step": 6055 }, { "epoch": 1.4049414221088041, "grad_norm": 13.529057984274884, "learning_rate": 2e-06, "loss": 0.2202, "step": 6056 }, { "epoch": 1.4051734137571048, "grad_norm": 19.91666727951003, "learning_rate": 2e-06, "loss": 0.1565, "step": 6057 }, { "epoch": 1.4054054054054055, "grad_norm": 20.439080791635305, "learning_rate": 2e-06, "loss": 0.3029, "step": 6058 }, { "epoch": 1.405637397053706, "grad_norm": 8.094859396497109, "learning_rate": 2e-06, "loss": 0.159, "step": 6059 }, { "epoch": 1.4058693887020066, "grad_norm": 17.211446044937162, "learning_rate": 2e-06, "loss": 0.2021, "step": 6060 }, { "epoch": 1.4061013803503073, "grad_norm": 16.984237415332583, "learning_rate": 2e-06, "loss": 0.3073, "step": 6061 }, { "epoch": 1.406333371998608, "grad_norm": 19.736131712976423, "learning_rate": 2e-06, "loss": 0.3034, "step": 6062 }, { "epoch": 1.4065653636469087, "grad_norm": 8.803289909229532, "learning_rate": 2e-06, "loss": 0.2266, "step": 6063 }, { "epoch": 1.4067973552952093, "grad_norm": 14.67567093094092, "learning_rate": 2e-06, "loss": 0.2873, "step": 6064 }, { "epoch": 1.40702934694351, "grad_norm": 21.111374577296598, "learning_rate": 2e-06, "loss": 0.3436, "step": 6065 }, { "epoch": 1.4072613385918107, "grad_norm": 23.285346279881995, "learning_rate": 2e-06, "loss": 0.353, "step": 6066 }, { "epoch": 1.4074933302401114, "grad_norm": 30.91209969978155, "learning_rate": 2e-06, "loss": 0.3959, "step": 6067 }, { "epoch": 1.407725321888412, "grad_norm": 23.50666321730142, "learning_rate": 2e-06, "loss": 0.3817, "step": 6068 }, { "epoch": 1.4079573135367127, "grad_norm": 24.887261676979207, "learning_rate": 2e-06, "loss": 0.265, "step": 6069 }, { "epoch": 1.4081893051850134, "grad_norm": 17.371120969303167, "learning_rate": 2e-06, "loss": 0.2875, "step": 6070 }, { "epoch": 1.408421296833314, "grad_norm": 17.01279131314068, "learning_rate": 2e-06, "loss": 0.2949, "step": 6071 }, { "epoch": 1.4086532884816148, "grad_norm": 8.158139020970319, "learning_rate": 2e-06, "loss": 0.2076, "step": 6072 }, { "epoch": 1.4088852801299154, "grad_norm": 7.818186686590393, "learning_rate": 2e-06, "loss": 0.1549, "step": 6073 }, { "epoch": 1.409117271778216, "grad_norm": 5.121188328909677, "learning_rate": 2e-06, "loss": 0.1714, "step": 6074 }, { "epoch": 1.4093492634265166, "grad_norm": 11.775903756067791, "learning_rate": 2e-06, "loss": 0.2023, "step": 6075 }, { "epoch": 1.4095812550748172, "grad_norm": 30.858707507154378, "learning_rate": 2e-06, "loss": 0.4582, "step": 6076 }, { "epoch": 1.409813246723118, "grad_norm": 15.755543306378247, "learning_rate": 2e-06, "loss": 0.2169, "step": 6077 }, { "epoch": 1.4100452383714186, "grad_norm": 18.69184492966843, "learning_rate": 2e-06, "loss": 0.2139, "step": 6078 }, { "epoch": 1.4102772300197193, "grad_norm": 27.398404718078048, "learning_rate": 2e-06, "loss": 0.337, "step": 6079 }, { "epoch": 1.41050922166802, "grad_norm": 25.75019073595999, "learning_rate": 2e-06, "loss": 0.3711, "step": 6080 }, { "epoch": 1.4107412133163206, "grad_norm": 9.892592852299064, "learning_rate": 2e-06, "loss": 0.2051, "step": 6081 }, { "epoch": 1.4109732049646213, "grad_norm": 7.408465030887885, "learning_rate": 2e-06, "loss": 0.1045, "step": 6082 }, { "epoch": 1.411205196612922, "grad_norm": 14.06067060739889, "learning_rate": 2e-06, "loss": 0.1927, "step": 6083 }, { "epoch": 1.4114371882612227, "grad_norm": 13.913663213295978, "learning_rate": 2e-06, "loss": 0.2588, "step": 6084 }, { "epoch": 1.4116691799095231, "grad_norm": 10.085229486493613, "learning_rate": 2e-06, "loss": 0.2227, "step": 6085 }, { "epoch": 1.4119011715578238, "grad_norm": 13.631124668880567, "learning_rate": 2e-06, "loss": 0.2588, "step": 6086 }, { "epoch": 1.4121331632061245, "grad_norm": 10.88601808887599, "learning_rate": 2e-06, "loss": 0.2068, "step": 6087 }, { "epoch": 1.4123651548544252, "grad_norm": 13.403020292657482, "learning_rate": 2e-06, "loss": 0.2976, "step": 6088 }, { "epoch": 1.4125971465027258, "grad_norm": 10.344911101143142, "learning_rate": 2e-06, "loss": 0.2509, "step": 6089 }, { "epoch": 1.4128291381510265, "grad_norm": 12.71640337739951, "learning_rate": 2e-06, "loss": 0.2407, "step": 6090 }, { "epoch": 1.4130611297993272, "grad_norm": 15.261059190133613, "learning_rate": 2e-06, "loss": 0.3193, "step": 6091 }, { "epoch": 1.4132931214476279, "grad_norm": 17.65399175249053, "learning_rate": 2e-06, "loss": 0.2981, "step": 6092 }, { "epoch": 1.4135251130959285, "grad_norm": 18.415558010960087, "learning_rate": 2e-06, "loss": 0.3171, "step": 6093 }, { "epoch": 1.4137571047442292, "grad_norm": 18.71760155254099, "learning_rate": 2e-06, "loss": 0.3432, "step": 6094 }, { "epoch": 1.41398909639253, "grad_norm": 12.08233981768872, "learning_rate": 2e-06, "loss": 0.1896, "step": 6095 }, { "epoch": 1.4142210880408306, "grad_norm": 12.924121884049468, "learning_rate": 2e-06, "loss": 0.2531, "step": 6096 }, { "epoch": 1.4144530796891313, "grad_norm": 15.51268648637336, "learning_rate": 2e-06, "loss": 0.2829, "step": 6097 }, { "epoch": 1.414685071337432, "grad_norm": 17.961260828460986, "learning_rate": 2e-06, "loss": 0.2108, "step": 6098 }, { "epoch": 1.4149170629857326, "grad_norm": 17.365521433935196, "learning_rate": 2e-06, "loss": 0.2831, "step": 6099 }, { "epoch": 1.4151490546340333, "grad_norm": 10.163031122556262, "learning_rate": 2e-06, "loss": 0.2289, "step": 6100 }, { "epoch": 1.415381046282334, "grad_norm": 7.099404642130162, "learning_rate": 2e-06, "loss": 0.1514, "step": 6101 }, { "epoch": 1.4156130379306344, "grad_norm": 11.052661536656478, "learning_rate": 2e-06, "loss": 0.1811, "step": 6102 }, { "epoch": 1.415845029578935, "grad_norm": 12.598462452392125, "learning_rate": 2e-06, "loss": 0.1743, "step": 6103 }, { "epoch": 1.4160770212272358, "grad_norm": 6.835223930186045, "learning_rate": 2e-06, "loss": 0.1597, "step": 6104 }, { "epoch": 1.4163090128755365, "grad_norm": 26.35206340861612, "learning_rate": 2e-06, "loss": 0.3793, "step": 6105 }, { "epoch": 1.4165410045238371, "grad_norm": 25.374028954103416, "learning_rate": 2e-06, "loss": 0.3094, "step": 6106 }, { "epoch": 1.4167729961721378, "grad_norm": 11.833484379950999, "learning_rate": 2e-06, "loss": 0.244, "step": 6107 }, { "epoch": 1.4170049878204385, "grad_norm": 20.596598339272045, "learning_rate": 2e-06, "loss": 0.3502, "step": 6108 }, { "epoch": 1.4172369794687392, "grad_norm": 10.987960713511372, "learning_rate": 2e-06, "loss": 0.2556, "step": 6109 }, { "epoch": 1.4174689711170398, "grad_norm": 7.404113399619784, "learning_rate": 2e-06, "loss": 0.1751, "step": 6110 }, { "epoch": 1.4177009627653405, "grad_norm": 8.825514460580765, "learning_rate": 2e-06, "loss": 0.1497, "step": 6111 }, { "epoch": 1.417932954413641, "grad_norm": 15.713391703876344, "learning_rate": 2e-06, "loss": 0.2371, "step": 6112 }, { "epoch": 1.4181649460619417, "grad_norm": 9.618159708726328, "learning_rate": 2e-06, "loss": 0.2016, "step": 6113 }, { "epoch": 1.4183969377102423, "grad_norm": 10.16052102551055, "learning_rate": 2e-06, "loss": 0.3071, "step": 6114 }, { "epoch": 1.418628929358543, "grad_norm": 6.310866150220136, "learning_rate": 2e-06, "loss": 0.1548, "step": 6115 }, { "epoch": 1.4188609210068437, "grad_norm": 19.80731411393623, "learning_rate": 2e-06, "loss": 0.3295, "step": 6116 }, { "epoch": 1.4190929126551444, "grad_norm": 13.3606379253285, "learning_rate": 2e-06, "loss": 0.1826, "step": 6117 }, { "epoch": 1.419324904303445, "grad_norm": 8.81503462660466, "learning_rate": 2e-06, "loss": 0.2251, "step": 6118 }, { "epoch": 1.4195568959517457, "grad_norm": 15.069781449360283, "learning_rate": 2e-06, "loss": 0.2557, "step": 6119 }, { "epoch": 1.4197888876000464, "grad_norm": 18.822472041359575, "learning_rate": 2e-06, "loss": 0.3187, "step": 6120 }, { "epoch": 1.420020879248347, "grad_norm": 20.897490149361044, "learning_rate": 2e-06, "loss": 0.2403, "step": 6121 }, { "epoch": 1.4202528708966478, "grad_norm": 6.836425013476632, "learning_rate": 2e-06, "loss": 0.1446, "step": 6122 }, { "epoch": 1.4204848625449484, "grad_norm": 11.993374434727933, "learning_rate": 2e-06, "loss": 0.217, "step": 6123 }, { "epoch": 1.4207168541932491, "grad_norm": 11.146419401766341, "learning_rate": 2e-06, "loss": 0.158, "step": 6124 }, { "epoch": 1.4209488458415498, "grad_norm": 12.804609156327725, "learning_rate": 2e-06, "loss": 0.2305, "step": 6125 }, { "epoch": 1.4211808374898505, "grad_norm": 20.536425822305404, "learning_rate": 2e-06, "loss": 0.2628, "step": 6126 }, { "epoch": 1.4214128291381511, "grad_norm": 19.6182767891473, "learning_rate": 2e-06, "loss": 0.2312, "step": 6127 }, { "epoch": 1.4216448207864518, "grad_norm": 8.67891356781281, "learning_rate": 2e-06, "loss": 0.1341, "step": 6128 }, { "epoch": 1.4218768124347523, "grad_norm": 9.495972989677902, "learning_rate": 2e-06, "loss": 0.194, "step": 6129 }, { "epoch": 1.422108804083053, "grad_norm": 16.465910741318215, "learning_rate": 2e-06, "loss": 0.2441, "step": 6130 }, { "epoch": 1.4223407957313536, "grad_norm": 18.64237556218032, "learning_rate": 2e-06, "loss": 0.2429, "step": 6131 }, { "epoch": 1.4225727873796543, "grad_norm": 15.131650364743992, "learning_rate": 2e-06, "loss": 0.2176, "step": 6132 }, { "epoch": 1.422804779027955, "grad_norm": 16.521831690467256, "learning_rate": 2e-06, "loss": 0.1971, "step": 6133 }, { "epoch": 1.4230367706762557, "grad_norm": 15.255936878428859, "learning_rate": 2e-06, "loss": 0.3409, "step": 6134 }, { "epoch": 1.4232687623245563, "grad_norm": 19.319660936948484, "learning_rate": 2e-06, "loss": 0.1897, "step": 6135 }, { "epoch": 1.423500753972857, "grad_norm": 19.973456561284625, "learning_rate": 2e-06, "loss": 0.3944, "step": 6136 }, { "epoch": 1.4237327456211577, "grad_norm": 21.849948560087277, "learning_rate": 2e-06, "loss": 0.3276, "step": 6137 }, { "epoch": 1.4239647372694584, "grad_norm": 16.2802313700536, "learning_rate": 2e-06, "loss": 0.2491, "step": 6138 }, { "epoch": 1.4241967289177588, "grad_norm": 19.909440328810053, "learning_rate": 2e-06, "loss": 0.1586, "step": 6139 }, { "epoch": 1.4244287205660595, "grad_norm": 14.748574132006137, "learning_rate": 2e-06, "loss": 0.2809, "step": 6140 }, { "epoch": 1.4246607122143602, "grad_norm": 9.825467024454078, "learning_rate": 2e-06, "loss": 0.2456, "step": 6141 }, { "epoch": 1.4248927038626609, "grad_norm": 15.769056569854035, "learning_rate": 2e-06, "loss": 0.2548, "step": 6142 }, { "epoch": 1.4251246955109615, "grad_norm": 9.883953478158336, "learning_rate": 2e-06, "loss": 0.2044, "step": 6143 }, { "epoch": 1.4253566871592622, "grad_norm": 17.016653772181083, "learning_rate": 2e-06, "loss": 0.406, "step": 6144 }, { "epoch": 1.425588678807563, "grad_norm": 20.34748285243919, "learning_rate": 2e-06, "loss": 0.3685, "step": 6145 }, { "epoch": 1.4258206704558636, "grad_norm": 11.292193060995796, "learning_rate": 2e-06, "loss": 0.2376, "step": 6146 }, { "epoch": 1.4260526621041643, "grad_norm": 9.088756757460795, "learning_rate": 2e-06, "loss": 0.1936, "step": 6147 }, { "epoch": 1.426284653752465, "grad_norm": 18.792375574742056, "learning_rate": 2e-06, "loss": 0.2258, "step": 6148 }, { "epoch": 1.4265166454007656, "grad_norm": 25.108023742053295, "learning_rate": 2e-06, "loss": 0.2133, "step": 6149 }, { "epoch": 1.4267486370490663, "grad_norm": 19.186927719163982, "learning_rate": 2e-06, "loss": 0.2004, "step": 6150 }, { "epoch": 1.426980628697367, "grad_norm": 17.71675497443564, "learning_rate": 2e-06, "loss": 0.2085, "step": 6151 }, { "epoch": 1.4272126203456676, "grad_norm": 22.360834089520896, "learning_rate": 2e-06, "loss": 0.3317, "step": 6152 }, { "epoch": 1.4274446119939683, "grad_norm": 19.971085580506056, "learning_rate": 2e-06, "loss": 0.2874, "step": 6153 }, { "epoch": 1.427676603642269, "grad_norm": 23.875068987608802, "learning_rate": 2e-06, "loss": 0.3523, "step": 6154 }, { "epoch": 1.4279085952905695, "grad_norm": 14.028916962582743, "learning_rate": 2e-06, "loss": 0.2884, "step": 6155 }, { "epoch": 1.4281405869388701, "grad_norm": 18.488646503647313, "learning_rate": 2e-06, "loss": 0.2513, "step": 6156 }, { "epoch": 1.4283725785871708, "grad_norm": 18.390315378298006, "learning_rate": 2e-06, "loss": 0.3414, "step": 6157 }, { "epoch": 1.4286045702354715, "grad_norm": 25.069614253868096, "learning_rate": 2e-06, "loss": 0.3823, "step": 6158 }, { "epoch": 1.4288365618837722, "grad_norm": 16.644875413619268, "learning_rate": 2e-06, "loss": 0.2242, "step": 6159 }, { "epoch": 1.4290685535320728, "grad_norm": 16.553733055395217, "learning_rate": 2e-06, "loss": 0.314, "step": 6160 }, { "epoch": 1.4293005451803735, "grad_norm": 11.93423897501811, "learning_rate": 2e-06, "loss": 0.2209, "step": 6161 }, { "epoch": 1.4295325368286742, "grad_norm": 10.172506414261784, "learning_rate": 2e-06, "loss": 0.2577, "step": 6162 }, { "epoch": 1.4297645284769749, "grad_norm": 13.74352601131, "learning_rate": 2e-06, "loss": 0.1991, "step": 6163 }, { "epoch": 1.4299965201252756, "grad_norm": 20.315569158586207, "learning_rate": 2e-06, "loss": 0.2665, "step": 6164 }, { "epoch": 1.430228511773576, "grad_norm": 9.354263225246887, "learning_rate": 2e-06, "loss": 0.2413, "step": 6165 }, { "epoch": 1.4304605034218767, "grad_norm": 12.08739139621951, "learning_rate": 2e-06, "loss": 0.2746, "step": 6166 }, { "epoch": 1.4306924950701774, "grad_norm": 9.65658511342512, "learning_rate": 2e-06, "loss": 0.1819, "step": 6167 }, { "epoch": 1.430924486718478, "grad_norm": 7.610932909883998, "learning_rate": 2e-06, "loss": 0.172, "step": 6168 }, { "epoch": 1.4311564783667787, "grad_norm": 17.58117935316524, "learning_rate": 2e-06, "loss": 0.278, "step": 6169 }, { "epoch": 1.4313884700150794, "grad_norm": 16.990502623789205, "learning_rate": 2e-06, "loss": 0.255, "step": 6170 }, { "epoch": 1.43162046166338, "grad_norm": 13.475962120240382, "learning_rate": 2e-06, "loss": 0.2718, "step": 6171 }, { "epoch": 1.4318524533116808, "grad_norm": 24.175582308504648, "learning_rate": 2e-06, "loss": 0.2386, "step": 6172 }, { "epoch": 1.4320844449599814, "grad_norm": 5.392419668157998, "learning_rate": 2e-06, "loss": 0.0997, "step": 6173 }, { "epoch": 1.4323164366082821, "grad_norm": 18.51628849044846, "learning_rate": 2e-06, "loss": 0.2131, "step": 6174 }, { "epoch": 1.4325484282565828, "grad_norm": 14.240146276064726, "learning_rate": 2e-06, "loss": 0.1993, "step": 6175 }, { "epoch": 1.4327804199048835, "grad_norm": 11.586250671216098, "learning_rate": 2e-06, "loss": 0.2414, "step": 6176 }, { "epoch": 1.4330124115531842, "grad_norm": 11.519426842901828, "learning_rate": 2e-06, "loss": 0.2363, "step": 6177 }, { "epoch": 1.4332444032014848, "grad_norm": 14.449292115185402, "learning_rate": 2e-06, "loss": 0.2333, "step": 6178 }, { "epoch": 1.4334763948497855, "grad_norm": 14.108943556459652, "learning_rate": 2e-06, "loss": 0.2261, "step": 6179 }, { "epoch": 1.4337083864980862, "grad_norm": 16.064294267055452, "learning_rate": 2e-06, "loss": 0.2962, "step": 6180 }, { "epoch": 1.4339403781463869, "grad_norm": 10.455267909022457, "learning_rate": 2e-06, "loss": 0.1511, "step": 6181 }, { "epoch": 1.4341723697946873, "grad_norm": 11.967123031710038, "learning_rate": 2e-06, "loss": 0.2125, "step": 6182 }, { "epoch": 1.434404361442988, "grad_norm": 15.112844010484544, "learning_rate": 2e-06, "loss": 0.1897, "step": 6183 }, { "epoch": 1.4346363530912887, "grad_norm": 13.501040212475816, "learning_rate": 2e-06, "loss": 0.2221, "step": 6184 }, { "epoch": 1.4348683447395894, "grad_norm": 11.403029548215033, "learning_rate": 2e-06, "loss": 0.2974, "step": 6185 }, { "epoch": 1.43510033638789, "grad_norm": 10.528756579434749, "learning_rate": 2e-06, "loss": 0.2717, "step": 6186 }, { "epoch": 1.4353323280361907, "grad_norm": 17.4785306549272, "learning_rate": 2e-06, "loss": 0.3093, "step": 6187 }, { "epoch": 1.4355643196844914, "grad_norm": 16.520461776544106, "learning_rate": 2e-06, "loss": 0.2704, "step": 6188 }, { "epoch": 1.435796311332792, "grad_norm": 18.529090232095495, "learning_rate": 2e-06, "loss": 0.2756, "step": 6189 }, { "epoch": 1.4360283029810927, "grad_norm": 19.45665700411441, "learning_rate": 2e-06, "loss": 0.1903, "step": 6190 }, { "epoch": 1.4362602946293934, "grad_norm": 23.243262921443165, "learning_rate": 2e-06, "loss": 0.3617, "step": 6191 }, { "epoch": 1.4364922862776939, "grad_norm": 14.006968855607878, "learning_rate": 2e-06, "loss": 0.2091, "step": 6192 }, { "epoch": 1.4367242779259946, "grad_norm": 21.504718595315833, "learning_rate": 2e-06, "loss": 0.2635, "step": 6193 }, { "epoch": 1.4369562695742952, "grad_norm": 13.817998624581417, "learning_rate": 2e-06, "loss": 0.2324, "step": 6194 }, { "epoch": 1.437188261222596, "grad_norm": 7.810015772765436, "learning_rate": 2e-06, "loss": 0.1785, "step": 6195 }, { "epoch": 1.4374202528708966, "grad_norm": 9.365374008202055, "learning_rate": 2e-06, "loss": 0.1485, "step": 6196 }, { "epoch": 1.4376522445191973, "grad_norm": 10.895895587927544, "learning_rate": 2e-06, "loss": 0.3335, "step": 6197 }, { "epoch": 1.437884236167498, "grad_norm": 14.286288695476802, "learning_rate": 2e-06, "loss": 0.2615, "step": 6198 }, { "epoch": 1.4381162278157986, "grad_norm": 10.872284344699505, "learning_rate": 2e-06, "loss": 0.2664, "step": 6199 }, { "epoch": 1.4383482194640993, "grad_norm": 10.360378431259461, "learning_rate": 2e-06, "loss": 0.223, "step": 6200 }, { "epoch": 1.4385802111124, "grad_norm": 13.574591955794803, "learning_rate": 2e-06, "loss": 0.2481, "step": 6201 }, { "epoch": 1.4388122027607007, "grad_norm": 19.38468404639361, "learning_rate": 2e-06, "loss": 0.2538, "step": 6202 }, { "epoch": 1.4390441944090013, "grad_norm": 17.673282877067486, "learning_rate": 2e-06, "loss": 0.1993, "step": 6203 }, { "epoch": 1.439276186057302, "grad_norm": 19.05185072264639, "learning_rate": 2e-06, "loss": 0.1937, "step": 6204 }, { "epoch": 1.4395081777056027, "grad_norm": 14.081400982151527, "learning_rate": 2e-06, "loss": 0.2435, "step": 6205 }, { "epoch": 1.4397401693539034, "grad_norm": 15.081745367531159, "learning_rate": 2e-06, "loss": 0.187, "step": 6206 }, { "epoch": 1.439972161002204, "grad_norm": 7.762727291413856, "learning_rate": 2e-06, "loss": 0.1486, "step": 6207 }, { "epoch": 1.4402041526505045, "grad_norm": 18.9592087976754, "learning_rate": 2e-06, "loss": 0.2228, "step": 6208 }, { "epoch": 1.4404361442988052, "grad_norm": 11.651180155359892, "learning_rate": 2e-06, "loss": 0.2049, "step": 6209 }, { "epoch": 1.4406681359471059, "grad_norm": 21.85022053628526, "learning_rate": 2e-06, "loss": 0.3898, "step": 6210 }, { "epoch": 1.4409001275954065, "grad_norm": 15.022376335743237, "learning_rate": 2e-06, "loss": 0.2378, "step": 6211 }, { "epoch": 1.4411321192437072, "grad_norm": 14.013016604280779, "learning_rate": 2e-06, "loss": 0.3044, "step": 6212 }, { "epoch": 1.4413641108920079, "grad_norm": 15.010439248112638, "learning_rate": 2e-06, "loss": 0.25, "step": 6213 }, { "epoch": 1.4415961025403086, "grad_norm": 14.021784314225817, "learning_rate": 2e-06, "loss": 0.1858, "step": 6214 }, { "epoch": 1.4418280941886092, "grad_norm": 7.823186542884315, "learning_rate": 2e-06, "loss": 0.1652, "step": 6215 }, { "epoch": 1.44206008583691, "grad_norm": 15.567837669828194, "learning_rate": 2e-06, "loss": 0.2944, "step": 6216 }, { "epoch": 1.4422920774852106, "grad_norm": 14.942780282170178, "learning_rate": 2e-06, "loss": 0.3001, "step": 6217 }, { "epoch": 1.442524069133511, "grad_norm": 19.08348088709312, "learning_rate": 2e-06, "loss": 0.3644, "step": 6218 }, { "epoch": 1.4427560607818117, "grad_norm": 11.287593844636518, "learning_rate": 2e-06, "loss": 0.2539, "step": 6219 }, { "epoch": 1.4429880524301124, "grad_norm": 7.621639203002738, "learning_rate": 2e-06, "loss": 0.2016, "step": 6220 }, { "epoch": 1.443220044078413, "grad_norm": 18.406599332910428, "learning_rate": 2e-06, "loss": 0.2905, "step": 6221 }, { "epoch": 1.4434520357267138, "grad_norm": 23.286825030451947, "learning_rate": 2e-06, "loss": 0.2663, "step": 6222 }, { "epoch": 1.4436840273750144, "grad_norm": 8.222848890709107, "learning_rate": 2e-06, "loss": 0.2088, "step": 6223 }, { "epoch": 1.4439160190233151, "grad_norm": 15.558196318996108, "learning_rate": 2e-06, "loss": 0.2139, "step": 6224 }, { "epoch": 1.4441480106716158, "grad_norm": 12.775547720889165, "learning_rate": 2e-06, "loss": 0.1868, "step": 6225 }, { "epoch": 1.4443800023199165, "grad_norm": 7.512811432134344, "learning_rate": 2e-06, "loss": 0.1356, "step": 6226 }, { "epoch": 1.4446119939682172, "grad_norm": 13.737685430600592, "learning_rate": 2e-06, "loss": 0.2513, "step": 6227 }, { "epoch": 1.4448439856165178, "grad_norm": 14.156243226031668, "learning_rate": 2e-06, "loss": 0.2744, "step": 6228 }, { "epoch": 1.4450759772648185, "grad_norm": 17.606808588301096, "learning_rate": 2e-06, "loss": 0.2185, "step": 6229 }, { "epoch": 1.4453079689131192, "grad_norm": 11.62668769274412, "learning_rate": 2e-06, "loss": 0.2055, "step": 6230 }, { "epoch": 1.4455399605614199, "grad_norm": 14.338056904667104, "learning_rate": 2e-06, "loss": 0.2705, "step": 6231 }, { "epoch": 1.4457719522097205, "grad_norm": 8.648296098897648, "learning_rate": 2e-06, "loss": 0.1655, "step": 6232 }, { "epoch": 1.4460039438580212, "grad_norm": 13.206173762170703, "learning_rate": 2e-06, "loss": 0.2529, "step": 6233 }, { "epoch": 1.446235935506322, "grad_norm": 9.84978112659568, "learning_rate": 2e-06, "loss": 0.1463, "step": 6234 }, { "epoch": 1.4464679271546224, "grad_norm": 5.811228390154432, "learning_rate": 2e-06, "loss": 0.1444, "step": 6235 }, { "epoch": 1.446699918802923, "grad_norm": 8.78844000550045, "learning_rate": 2e-06, "loss": 0.1566, "step": 6236 }, { "epoch": 1.4469319104512237, "grad_norm": 14.425942706111064, "learning_rate": 2e-06, "loss": 0.2102, "step": 6237 }, { "epoch": 1.4471639020995244, "grad_norm": 30.388185582567207, "learning_rate": 2e-06, "loss": 0.2826, "step": 6238 }, { "epoch": 1.447395893747825, "grad_norm": 10.26658546069454, "learning_rate": 2e-06, "loss": 0.2397, "step": 6239 }, { "epoch": 1.4476278853961257, "grad_norm": 12.24282778864517, "learning_rate": 2e-06, "loss": 0.2435, "step": 6240 }, { "epoch": 1.4478598770444264, "grad_norm": 32.10949393944506, "learning_rate": 2e-06, "loss": 0.4579, "step": 6241 }, { "epoch": 1.448091868692727, "grad_norm": 12.488039092828613, "learning_rate": 2e-06, "loss": 0.1936, "step": 6242 }, { "epoch": 1.4483238603410278, "grad_norm": 19.789272082035247, "learning_rate": 2e-06, "loss": 0.3089, "step": 6243 }, { "epoch": 1.4485558519893285, "grad_norm": 10.038105505453792, "learning_rate": 2e-06, "loss": 0.2293, "step": 6244 }, { "epoch": 1.448787843637629, "grad_norm": 14.92566839643602, "learning_rate": 2e-06, "loss": 0.2395, "step": 6245 }, { "epoch": 1.4490198352859296, "grad_norm": 13.084583951620527, "learning_rate": 2e-06, "loss": 0.2754, "step": 6246 }, { "epoch": 1.4492518269342303, "grad_norm": 16.02842816644472, "learning_rate": 2e-06, "loss": 0.2676, "step": 6247 }, { "epoch": 1.449483818582531, "grad_norm": 19.128811378051246, "learning_rate": 2e-06, "loss": 0.2822, "step": 6248 }, { "epoch": 1.4497158102308316, "grad_norm": 22.26464674913668, "learning_rate": 2e-06, "loss": 0.2977, "step": 6249 }, { "epoch": 1.4499478018791323, "grad_norm": 18.209839699130395, "learning_rate": 2e-06, "loss": 0.2602, "step": 6250 }, { "epoch": 1.450179793527433, "grad_norm": 15.616951580530294, "learning_rate": 2e-06, "loss": 0.253, "step": 6251 }, { "epoch": 1.4504117851757337, "grad_norm": 11.246926978514672, "learning_rate": 2e-06, "loss": 0.1721, "step": 6252 }, { "epoch": 1.4506437768240343, "grad_norm": 16.39083591876464, "learning_rate": 2e-06, "loss": 0.2644, "step": 6253 }, { "epoch": 1.450875768472335, "grad_norm": 20.47143320303876, "learning_rate": 2e-06, "loss": 0.2169, "step": 6254 }, { "epoch": 1.4511077601206357, "grad_norm": 19.1369428998323, "learning_rate": 2e-06, "loss": 0.2878, "step": 6255 }, { "epoch": 1.4513397517689364, "grad_norm": 12.488871471556534, "learning_rate": 2e-06, "loss": 0.2151, "step": 6256 }, { "epoch": 1.451571743417237, "grad_norm": 11.303663814651218, "learning_rate": 2e-06, "loss": 0.1897, "step": 6257 }, { "epoch": 1.4518037350655377, "grad_norm": 14.441768775753307, "learning_rate": 2e-06, "loss": 0.3127, "step": 6258 }, { "epoch": 1.4520357267138384, "grad_norm": 16.363244917076656, "learning_rate": 2e-06, "loss": 0.2342, "step": 6259 }, { "epoch": 1.452267718362139, "grad_norm": 23.380606574314694, "learning_rate": 2e-06, "loss": 0.25, "step": 6260 }, { "epoch": 1.4524997100104398, "grad_norm": 7.810441383500039, "learning_rate": 2e-06, "loss": 0.1686, "step": 6261 }, { "epoch": 1.4527317016587402, "grad_norm": 17.524082584791838, "learning_rate": 2e-06, "loss": 0.2508, "step": 6262 }, { "epoch": 1.452963693307041, "grad_norm": 15.673484460883444, "learning_rate": 2e-06, "loss": 0.3772, "step": 6263 }, { "epoch": 1.4531956849553416, "grad_norm": 8.920955052046363, "learning_rate": 2e-06, "loss": 0.1725, "step": 6264 }, { "epoch": 1.4534276766036422, "grad_norm": 17.182446994054157, "learning_rate": 2e-06, "loss": 0.2424, "step": 6265 }, { "epoch": 1.453659668251943, "grad_norm": 17.68772899689125, "learning_rate": 2e-06, "loss": 0.2875, "step": 6266 }, { "epoch": 1.4538916599002436, "grad_norm": 19.9172602061717, "learning_rate": 2e-06, "loss": 0.3221, "step": 6267 }, { "epoch": 1.4541236515485443, "grad_norm": 10.481349481722452, "learning_rate": 2e-06, "loss": 0.1947, "step": 6268 }, { "epoch": 1.454355643196845, "grad_norm": 10.944112076996793, "learning_rate": 2e-06, "loss": 0.2254, "step": 6269 }, { "epoch": 1.4545876348451456, "grad_norm": 15.685246629354507, "learning_rate": 2e-06, "loss": 0.3094, "step": 6270 }, { "epoch": 1.4548196264934463, "grad_norm": 8.918649947722725, "learning_rate": 2e-06, "loss": 0.2104, "step": 6271 }, { "epoch": 1.4550516181417468, "grad_norm": 11.16163245508632, "learning_rate": 2e-06, "loss": 0.1538, "step": 6272 }, { "epoch": 1.4552836097900474, "grad_norm": 19.679477959880053, "learning_rate": 2e-06, "loss": 0.2507, "step": 6273 }, { "epoch": 1.4555156014383481, "grad_norm": 12.987750627077375, "learning_rate": 2e-06, "loss": 0.2041, "step": 6274 }, { "epoch": 1.4557475930866488, "grad_norm": 17.114922892888984, "learning_rate": 2e-06, "loss": 0.3117, "step": 6275 }, { "epoch": 1.4559795847349495, "grad_norm": 10.067639029819325, "learning_rate": 2e-06, "loss": 0.1696, "step": 6276 }, { "epoch": 1.4562115763832502, "grad_norm": 17.685941678809247, "learning_rate": 2e-06, "loss": 0.2471, "step": 6277 }, { "epoch": 1.4564435680315508, "grad_norm": 20.106708525227468, "learning_rate": 2e-06, "loss": 0.2805, "step": 6278 }, { "epoch": 1.4566755596798515, "grad_norm": 29.450049625966425, "learning_rate": 2e-06, "loss": 0.3727, "step": 6279 }, { "epoch": 1.4569075513281522, "grad_norm": 12.063322046943563, "learning_rate": 2e-06, "loss": 0.1852, "step": 6280 }, { "epoch": 1.4571395429764529, "grad_norm": 23.251493560396554, "learning_rate": 2e-06, "loss": 0.2404, "step": 6281 }, { "epoch": 1.4573715346247536, "grad_norm": 16.424722310407798, "learning_rate": 2e-06, "loss": 0.2657, "step": 6282 }, { "epoch": 1.4576035262730542, "grad_norm": 48.52568098535008, "learning_rate": 2e-06, "loss": 0.3805, "step": 6283 }, { "epoch": 1.457835517921355, "grad_norm": 11.824165675935632, "learning_rate": 2e-06, "loss": 0.0905, "step": 6284 }, { "epoch": 1.4580675095696556, "grad_norm": 12.360254817926524, "learning_rate": 2e-06, "loss": 0.3002, "step": 6285 }, { "epoch": 1.4582995012179563, "grad_norm": 11.468732781327246, "learning_rate": 2e-06, "loss": 0.2397, "step": 6286 }, { "epoch": 1.458531492866257, "grad_norm": 17.519255642282676, "learning_rate": 2e-06, "loss": 0.2276, "step": 6287 }, { "epoch": 1.4587634845145574, "grad_norm": 16.463553573068534, "learning_rate": 2e-06, "loss": 0.2647, "step": 6288 }, { "epoch": 1.458995476162858, "grad_norm": 10.964272322350954, "learning_rate": 2e-06, "loss": 0.1639, "step": 6289 }, { "epoch": 1.4592274678111588, "grad_norm": 22.362764075921604, "learning_rate": 2e-06, "loss": 0.2763, "step": 6290 }, { "epoch": 1.4594594594594594, "grad_norm": 12.523391116774407, "learning_rate": 2e-06, "loss": 0.2957, "step": 6291 }, { "epoch": 1.45969145110776, "grad_norm": 11.906953940165494, "learning_rate": 2e-06, "loss": 0.2841, "step": 6292 }, { "epoch": 1.4599234427560608, "grad_norm": 10.510627828123306, "learning_rate": 2e-06, "loss": 0.1849, "step": 6293 }, { "epoch": 1.4601554344043615, "grad_norm": 11.924520737964247, "learning_rate": 2e-06, "loss": 0.2385, "step": 6294 }, { "epoch": 1.4603874260526621, "grad_norm": 17.689641377689316, "learning_rate": 2e-06, "loss": 0.4174, "step": 6295 }, { "epoch": 1.4606194177009628, "grad_norm": 11.519765885822281, "learning_rate": 2e-06, "loss": 0.2632, "step": 6296 }, { "epoch": 1.4608514093492635, "grad_norm": 12.189681236199014, "learning_rate": 2e-06, "loss": 0.3487, "step": 6297 }, { "epoch": 1.461083400997564, "grad_norm": 20.49920570696744, "learning_rate": 2e-06, "loss": 0.2388, "step": 6298 }, { "epoch": 1.4613153926458646, "grad_norm": 12.667179012508328, "learning_rate": 2e-06, "loss": 0.282, "step": 6299 }, { "epoch": 1.4615473842941653, "grad_norm": 12.849410460911564, "learning_rate": 2e-06, "loss": 0.227, "step": 6300 }, { "epoch": 1.461779375942466, "grad_norm": 12.650327888469596, "learning_rate": 2e-06, "loss": 0.2, "step": 6301 }, { "epoch": 1.4620113675907667, "grad_norm": 10.508182667607313, "learning_rate": 2e-06, "loss": 0.227, "step": 6302 }, { "epoch": 1.4622433592390673, "grad_norm": 11.683754689306165, "learning_rate": 2e-06, "loss": 0.1806, "step": 6303 }, { "epoch": 1.462475350887368, "grad_norm": 14.253812881316286, "learning_rate": 2e-06, "loss": 0.2678, "step": 6304 }, { "epoch": 1.4627073425356687, "grad_norm": 12.658627134992473, "learning_rate": 2e-06, "loss": 0.2552, "step": 6305 }, { "epoch": 1.4629393341839694, "grad_norm": 31.209899902519314, "learning_rate": 2e-06, "loss": 0.3512, "step": 6306 }, { "epoch": 1.46317132583227, "grad_norm": 6.515647786907523, "learning_rate": 2e-06, "loss": 0.1727, "step": 6307 }, { "epoch": 1.4634033174805707, "grad_norm": 14.165600444230142, "learning_rate": 2e-06, "loss": 0.2658, "step": 6308 }, { "epoch": 1.4636353091288714, "grad_norm": 20.574582915051842, "learning_rate": 2e-06, "loss": 0.3432, "step": 6309 }, { "epoch": 1.463867300777172, "grad_norm": 9.196302550572256, "learning_rate": 2e-06, "loss": 0.2192, "step": 6310 }, { "epoch": 1.4640992924254728, "grad_norm": 12.910670650009056, "learning_rate": 2e-06, "loss": 0.2553, "step": 6311 }, { "epoch": 1.4643312840737734, "grad_norm": 16.611732586870954, "learning_rate": 2e-06, "loss": 0.2743, "step": 6312 }, { "epoch": 1.4645632757220741, "grad_norm": 11.80635259402948, "learning_rate": 2e-06, "loss": 0.2631, "step": 6313 }, { "epoch": 1.4647952673703748, "grad_norm": 11.714774501709245, "learning_rate": 2e-06, "loss": 0.1788, "step": 6314 }, { "epoch": 1.4650272590186753, "grad_norm": 14.071179173593196, "learning_rate": 2e-06, "loss": 0.2477, "step": 6315 }, { "epoch": 1.465259250666976, "grad_norm": 14.841448731705155, "learning_rate": 2e-06, "loss": 0.2876, "step": 6316 }, { "epoch": 1.4654912423152766, "grad_norm": 16.19049964702802, "learning_rate": 2e-06, "loss": 0.3139, "step": 6317 }, { "epoch": 1.4657232339635773, "grad_norm": 11.0637061092165, "learning_rate": 2e-06, "loss": 0.2283, "step": 6318 }, { "epoch": 1.465955225611878, "grad_norm": 11.873817866467068, "learning_rate": 2e-06, "loss": 0.1563, "step": 6319 }, { "epoch": 1.4661872172601786, "grad_norm": 9.005565476151371, "learning_rate": 2e-06, "loss": 0.1996, "step": 6320 }, { "epoch": 1.4664192089084793, "grad_norm": 13.384400838128574, "learning_rate": 2e-06, "loss": 0.3289, "step": 6321 }, { "epoch": 1.46665120055678, "grad_norm": 20.766319695050672, "learning_rate": 2e-06, "loss": 0.2601, "step": 6322 }, { "epoch": 1.4668831922050807, "grad_norm": 6.532545552029801, "learning_rate": 2e-06, "loss": 0.1765, "step": 6323 }, { "epoch": 1.4671151838533814, "grad_norm": 7.998792218137757, "learning_rate": 2e-06, "loss": 0.1722, "step": 6324 }, { "epoch": 1.4673471755016818, "grad_norm": 12.834191100976502, "learning_rate": 2e-06, "loss": 0.2775, "step": 6325 }, { "epoch": 1.4675791671499825, "grad_norm": 9.317612115484362, "learning_rate": 2e-06, "loss": 0.2043, "step": 6326 }, { "epoch": 1.4678111587982832, "grad_norm": 5.091989747213939, "learning_rate": 2e-06, "loss": 0.201, "step": 6327 }, { "epoch": 1.4680431504465838, "grad_norm": 11.99678074994932, "learning_rate": 2e-06, "loss": 0.1815, "step": 6328 }, { "epoch": 1.4682751420948845, "grad_norm": 21.94462944847759, "learning_rate": 2e-06, "loss": 0.3566, "step": 6329 }, { "epoch": 1.4685071337431852, "grad_norm": 12.180969031739162, "learning_rate": 2e-06, "loss": 0.245, "step": 6330 }, { "epoch": 1.4687391253914859, "grad_norm": 9.778283626208468, "learning_rate": 2e-06, "loss": 0.2044, "step": 6331 }, { "epoch": 1.4689711170397866, "grad_norm": 16.4203032986916, "learning_rate": 2e-06, "loss": 0.2448, "step": 6332 }, { "epoch": 1.4692031086880872, "grad_norm": 16.900025226867008, "learning_rate": 2e-06, "loss": 0.1976, "step": 6333 }, { "epoch": 1.469435100336388, "grad_norm": 10.021115257040865, "learning_rate": 2e-06, "loss": 0.1496, "step": 6334 }, { "epoch": 1.4696670919846886, "grad_norm": 13.389193715874105, "learning_rate": 2e-06, "loss": 0.2155, "step": 6335 }, { "epoch": 1.4698990836329893, "grad_norm": 8.596070239371144, "learning_rate": 2e-06, "loss": 0.1977, "step": 6336 }, { "epoch": 1.47013107528129, "grad_norm": 12.127040615020997, "learning_rate": 2e-06, "loss": 0.2525, "step": 6337 }, { "epoch": 1.4703630669295906, "grad_norm": 16.134833289034738, "learning_rate": 2e-06, "loss": 0.215, "step": 6338 }, { "epoch": 1.4705950585778913, "grad_norm": 10.771203339462213, "learning_rate": 2e-06, "loss": 0.2874, "step": 6339 }, { "epoch": 1.470827050226192, "grad_norm": 16.109559074612886, "learning_rate": 2e-06, "loss": 0.1802, "step": 6340 }, { "epoch": 1.4710590418744927, "grad_norm": 24.667718004035784, "learning_rate": 2e-06, "loss": 0.2801, "step": 6341 }, { "epoch": 1.4712910335227931, "grad_norm": 11.749541340994458, "learning_rate": 2e-06, "loss": 0.1959, "step": 6342 }, { "epoch": 1.4715230251710938, "grad_norm": 14.16110476245822, "learning_rate": 2e-06, "loss": 0.1827, "step": 6343 }, { "epoch": 1.4717550168193945, "grad_norm": 10.151478075496506, "learning_rate": 2e-06, "loss": 0.127, "step": 6344 }, { "epoch": 1.4719870084676951, "grad_norm": 16.90380252331379, "learning_rate": 2e-06, "loss": 0.1837, "step": 6345 }, { "epoch": 1.4722190001159958, "grad_norm": 8.733030126409302, "learning_rate": 2e-06, "loss": 0.138, "step": 6346 }, { "epoch": 1.4724509917642965, "grad_norm": 6.654863266480442, "learning_rate": 2e-06, "loss": 0.1539, "step": 6347 }, { "epoch": 1.4726829834125972, "grad_norm": 23.177115613442897, "learning_rate": 2e-06, "loss": 0.1599, "step": 6348 }, { "epoch": 1.4729149750608979, "grad_norm": 14.13503325100472, "learning_rate": 2e-06, "loss": 0.3002, "step": 6349 }, { "epoch": 1.4731469667091985, "grad_norm": 19.238936368187463, "learning_rate": 2e-06, "loss": 0.3622, "step": 6350 }, { "epoch": 1.4733789583574992, "grad_norm": 20.46860314621833, "learning_rate": 2e-06, "loss": 0.2823, "step": 6351 }, { "epoch": 1.4736109500057997, "grad_norm": 10.96071359838017, "learning_rate": 2e-06, "loss": 0.178, "step": 6352 }, { "epoch": 1.4738429416541003, "grad_norm": 17.243761699595712, "learning_rate": 2e-06, "loss": 0.3201, "step": 6353 }, { "epoch": 1.474074933302401, "grad_norm": 11.761870335399129, "learning_rate": 2e-06, "loss": 0.1588, "step": 6354 }, { "epoch": 1.4743069249507017, "grad_norm": 15.063788634244156, "learning_rate": 2e-06, "loss": 0.1985, "step": 6355 }, { "epoch": 1.4745389165990024, "grad_norm": 10.205439964919952, "learning_rate": 2e-06, "loss": 0.1951, "step": 6356 }, { "epoch": 1.474770908247303, "grad_norm": 19.755369133580384, "learning_rate": 2e-06, "loss": 0.314, "step": 6357 }, { "epoch": 1.4750028998956037, "grad_norm": 18.02092442331726, "learning_rate": 2e-06, "loss": 0.2532, "step": 6358 }, { "epoch": 1.4752348915439044, "grad_norm": 20.776127614591175, "learning_rate": 2e-06, "loss": 0.3867, "step": 6359 }, { "epoch": 1.475466883192205, "grad_norm": 18.8490296827277, "learning_rate": 2e-06, "loss": 0.2271, "step": 6360 }, { "epoch": 1.4756988748405058, "grad_norm": 16.243421950387734, "learning_rate": 2e-06, "loss": 0.3152, "step": 6361 }, { "epoch": 1.4759308664888064, "grad_norm": 12.912182042718188, "learning_rate": 2e-06, "loss": 0.2522, "step": 6362 }, { "epoch": 1.4761628581371071, "grad_norm": 13.434169048365156, "learning_rate": 2e-06, "loss": 0.1501, "step": 6363 }, { "epoch": 1.4763948497854078, "grad_norm": 23.64559945466988, "learning_rate": 2e-06, "loss": 0.322, "step": 6364 }, { "epoch": 1.4766268414337085, "grad_norm": 15.840938473928546, "learning_rate": 2e-06, "loss": 0.398, "step": 6365 }, { "epoch": 1.4768588330820092, "grad_norm": 13.593669173006043, "learning_rate": 2e-06, "loss": 0.2298, "step": 6366 }, { "epoch": 1.4770908247303098, "grad_norm": 15.484086423496137, "learning_rate": 2e-06, "loss": 0.2102, "step": 6367 }, { "epoch": 1.4773228163786103, "grad_norm": 20.566877565115348, "learning_rate": 2e-06, "loss": 0.2452, "step": 6368 }, { "epoch": 1.477554808026911, "grad_norm": 26.017129239071807, "learning_rate": 2e-06, "loss": 0.3534, "step": 6369 }, { "epoch": 1.4777867996752116, "grad_norm": 22.452637281185886, "learning_rate": 2e-06, "loss": 0.3603, "step": 6370 }, { "epoch": 1.4780187913235123, "grad_norm": 16.962058059153215, "learning_rate": 2e-06, "loss": 0.2599, "step": 6371 }, { "epoch": 1.478250782971813, "grad_norm": 31.44988018042093, "learning_rate": 2e-06, "loss": 0.3149, "step": 6372 }, { "epoch": 1.4784827746201137, "grad_norm": 17.97818075763738, "learning_rate": 2e-06, "loss": 0.2712, "step": 6373 }, { "epoch": 1.4787147662684144, "grad_norm": 16.298172657415773, "learning_rate": 2e-06, "loss": 0.2514, "step": 6374 }, { "epoch": 1.478946757916715, "grad_norm": 18.243457940810273, "learning_rate": 2e-06, "loss": 0.2485, "step": 6375 }, { "epoch": 1.4791787495650157, "grad_norm": 11.290826885683321, "learning_rate": 2e-06, "loss": 0.2569, "step": 6376 }, { "epoch": 1.4794107412133164, "grad_norm": 8.751435320787085, "learning_rate": 2e-06, "loss": 0.1369, "step": 6377 }, { "epoch": 1.4796427328616168, "grad_norm": 16.818687395472264, "learning_rate": 2e-06, "loss": 0.209, "step": 6378 }, { "epoch": 1.4798747245099175, "grad_norm": 15.913409104761076, "learning_rate": 2e-06, "loss": 0.2155, "step": 6379 }, { "epoch": 1.4801067161582182, "grad_norm": 17.282055857289045, "learning_rate": 2e-06, "loss": 0.3269, "step": 6380 }, { "epoch": 1.4803387078065189, "grad_norm": 17.084959173190963, "learning_rate": 2e-06, "loss": 0.3279, "step": 6381 }, { "epoch": 1.4805706994548196, "grad_norm": 10.158009644371294, "learning_rate": 2e-06, "loss": 0.3108, "step": 6382 }, { "epoch": 1.4808026911031202, "grad_norm": 13.161002799442894, "learning_rate": 2e-06, "loss": 0.2121, "step": 6383 }, { "epoch": 1.481034682751421, "grad_norm": 10.16665338585957, "learning_rate": 2e-06, "loss": 0.1704, "step": 6384 }, { "epoch": 1.4812666743997216, "grad_norm": 11.521240503832262, "learning_rate": 2e-06, "loss": 0.2038, "step": 6385 }, { "epoch": 1.4814986660480223, "grad_norm": 15.73741550757018, "learning_rate": 2e-06, "loss": 0.3129, "step": 6386 }, { "epoch": 1.481730657696323, "grad_norm": 12.864819746200281, "learning_rate": 2e-06, "loss": 0.2185, "step": 6387 }, { "epoch": 1.4819626493446236, "grad_norm": 13.343572206351373, "learning_rate": 2e-06, "loss": 0.2572, "step": 6388 }, { "epoch": 1.4821946409929243, "grad_norm": 11.726772864515828, "learning_rate": 2e-06, "loss": 0.214, "step": 6389 }, { "epoch": 1.482426632641225, "grad_norm": 12.530670113616015, "learning_rate": 2e-06, "loss": 0.2401, "step": 6390 }, { "epoch": 1.4826586242895257, "grad_norm": 19.523033448539547, "learning_rate": 2e-06, "loss": 0.3102, "step": 6391 }, { "epoch": 1.4828906159378263, "grad_norm": 14.813811087640675, "learning_rate": 2e-06, "loss": 0.2472, "step": 6392 }, { "epoch": 1.483122607586127, "grad_norm": 11.124681524188954, "learning_rate": 2e-06, "loss": 0.1997, "step": 6393 }, { "epoch": 1.4833545992344277, "grad_norm": 13.281192962940535, "learning_rate": 2e-06, "loss": 0.1957, "step": 6394 }, { "epoch": 1.4835865908827282, "grad_norm": 9.593242911666561, "learning_rate": 2e-06, "loss": 0.1657, "step": 6395 }, { "epoch": 1.4838185825310288, "grad_norm": 14.775396233103777, "learning_rate": 2e-06, "loss": 0.2757, "step": 6396 }, { "epoch": 1.4840505741793295, "grad_norm": 10.907066557516638, "learning_rate": 2e-06, "loss": 0.1952, "step": 6397 }, { "epoch": 1.4842825658276302, "grad_norm": 12.904636573491848, "learning_rate": 2e-06, "loss": 0.2891, "step": 6398 }, { "epoch": 1.4845145574759309, "grad_norm": 16.960841103912564, "learning_rate": 2e-06, "loss": 0.3691, "step": 6399 }, { "epoch": 1.4847465491242315, "grad_norm": 10.291992699295115, "learning_rate": 2e-06, "loss": 0.2032, "step": 6400 }, { "epoch": 1.4849785407725322, "grad_norm": 10.330431592292854, "learning_rate": 2e-06, "loss": 0.1908, "step": 6401 }, { "epoch": 1.485210532420833, "grad_norm": 12.167172292182945, "learning_rate": 2e-06, "loss": 0.2551, "step": 6402 }, { "epoch": 1.4854425240691336, "grad_norm": 9.095098976169192, "learning_rate": 2e-06, "loss": 0.203, "step": 6403 }, { "epoch": 1.4856745157174343, "grad_norm": 21.738840895505344, "learning_rate": 2e-06, "loss": 0.3902, "step": 6404 }, { "epoch": 1.4859065073657347, "grad_norm": 20.273545028134055, "learning_rate": 2e-06, "loss": 0.2845, "step": 6405 }, { "epoch": 1.4861384990140354, "grad_norm": 15.994132167667901, "learning_rate": 2e-06, "loss": 0.2778, "step": 6406 }, { "epoch": 1.486370490662336, "grad_norm": 13.177377098709755, "learning_rate": 2e-06, "loss": 0.2385, "step": 6407 }, { "epoch": 1.4866024823106367, "grad_norm": 18.166082735053617, "learning_rate": 2e-06, "loss": 0.3841, "step": 6408 }, { "epoch": 1.4868344739589374, "grad_norm": 19.653124970701338, "learning_rate": 2e-06, "loss": 0.2373, "step": 6409 }, { "epoch": 1.487066465607238, "grad_norm": 20.508541376693575, "learning_rate": 2e-06, "loss": 0.3786, "step": 6410 }, { "epoch": 1.4872984572555388, "grad_norm": 18.380647360810677, "learning_rate": 2e-06, "loss": 0.3239, "step": 6411 }, { "epoch": 1.4875304489038395, "grad_norm": 32.4958970734748, "learning_rate": 2e-06, "loss": 0.2241, "step": 6412 }, { "epoch": 1.4877624405521401, "grad_norm": 29.17435793636348, "learning_rate": 2e-06, "loss": 0.2003, "step": 6413 }, { "epoch": 1.4879944322004408, "grad_norm": 14.065406253018462, "learning_rate": 2e-06, "loss": 0.3424, "step": 6414 }, { "epoch": 1.4882264238487415, "grad_norm": 16.456177366838705, "learning_rate": 2e-06, "loss": 0.2163, "step": 6415 }, { "epoch": 1.4884584154970422, "grad_norm": 18.49614960441739, "learning_rate": 2e-06, "loss": 0.3273, "step": 6416 }, { "epoch": 1.4886904071453428, "grad_norm": 12.96237766808122, "learning_rate": 2e-06, "loss": 0.1475, "step": 6417 }, { "epoch": 1.4889223987936435, "grad_norm": 13.498753630694996, "learning_rate": 2e-06, "loss": 0.2419, "step": 6418 }, { "epoch": 1.4891543904419442, "grad_norm": 19.590928089711877, "learning_rate": 2e-06, "loss": 0.4618, "step": 6419 }, { "epoch": 1.4893863820902449, "grad_norm": 51.909561474681176, "learning_rate": 2e-06, "loss": 0.3855, "step": 6420 }, { "epoch": 1.4896183737385453, "grad_norm": 14.388437806863546, "learning_rate": 2e-06, "loss": 0.2515, "step": 6421 }, { "epoch": 1.489850365386846, "grad_norm": 21.97510015415248, "learning_rate": 2e-06, "loss": 0.4647, "step": 6422 }, { "epoch": 1.4900823570351467, "grad_norm": 10.430234245754157, "learning_rate": 2e-06, "loss": 0.1647, "step": 6423 }, { "epoch": 1.4903143486834474, "grad_norm": 11.789116182208888, "learning_rate": 2e-06, "loss": 0.2325, "step": 6424 }, { "epoch": 1.490546340331748, "grad_norm": 9.155597005212678, "learning_rate": 2e-06, "loss": 0.2378, "step": 6425 }, { "epoch": 1.4907783319800487, "grad_norm": 11.338288541901479, "learning_rate": 2e-06, "loss": 0.3419, "step": 6426 }, { "epoch": 1.4910103236283494, "grad_norm": 13.99855524710128, "learning_rate": 2e-06, "loss": 0.3002, "step": 6427 }, { "epoch": 1.49124231527665, "grad_norm": 19.044828598098015, "learning_rate": 2e-06, "loss": 0.3295, "step": 6428 }, { "epoch": 1.4914743069249508, "grad_norm": 10.062277970290738, "learning_rate": 2e-06, "loss": 0.2077, "step": 6429 }, { "epoch": 1.4917062985732514, "grad_norm": 14.284423700285167, "learning_rate": 2e-06, "loss": 0.2801, "step": 6430 }, { "epoch": 1.4919382902215519, "grad_norm": 35.50237344890928, "learning_rate": 2e-06, "loss": 0.2168, "step": 6431 }, { "epoch": 1.4921702818698526, "grad_norm": 5.948831486266877, "learning_rate": 2e-06, "loss": 0.1457, "step": 6432 }, { "epoch": 1.4924022735181532, "grad_norm": 15.893248474505135, "learning_rate": 2e-06, "loss": 0.2549, "step": 6433 }, { "epoch": 1.492634265166454, "grad_norm": 17.785878418025135, "learning_rate": 2e-06, "loss": 0.3139, "step": 6434 }, { "epoch": 1.4928662568147546, "grad_norm": 17.70381675074382, "learning_rate": 2e-06, "loss": 0.3474, "step": 6435 }, { "epoch": 1.4930982484630553, "grad_norm": 12.609051573865418, "learning_rate": 2e-06, "loss": 0.2953, "step": 6436 }, { "epoch": 1.493330240111356, "grad_norm": 5.811534282114288, "learning_rate": 2e-06, "loss": 0.2519, "step": 6437 }, { "epoch": 1.4935622317596566, "grad_norm": 10.391498834861888, "learning_rate": 2e-06, "loss": 0.1582, "step": 6438 }, { "epoch": 1.4937942234079573, "grad_norm": 10.597649958632756, "learning_rate": 2e-06, "loss": 0.2445, "step": 6439 }, { "epoch": 1.494026215056258, "grad_norm": 9.533945431912754, "learning_rate": 2e-06, "loss": 0.1678, "step": 6440 }, { "epoch": 1.4942582067045587, "grad_norm": 19.709720547193285, "learning_rate": 2e-06, "loss": 0.3057, "step": 6441 }, { "epoch": 1.4944901983528593, "grad_norm": 17.841104378481514, "learning_rate": 2e-06, "loss": 0.2086, "step": 6442 }, { "epoch": 1.49472219000116, "grad_norm": 13.695781339844167, "learning_rate": 2e-06, "loss": 0.2651, "step": 6443 }, { "epoch": 1.4949541816494607, "grad_norm": 10.329248417507095, "learning_rate": 2e-06, "loss": 0.2072, "step": 6444 }, { "epoch": 1.4951861732977614, "grad_norm": 11.273298957941135, "learning_rate": 2e-06, "loss": 0.3243, "step": 6445 }, { "epoch": 1.495418164946062, "grad_norm": 11.123275926165398, "learning_rate": 2e-06, "loss": 0.2626, "step": 6446 }, { "epoch": 1.4956501565943627, "grad_norm": 19.591634516882863, "learning_rate": 2e-06, "loss": 0.3643, "step": 6447 }, { "epoch": 1.4958821482426632, "grad_norm": 14.93813363970523, "learning_rate": 2e-06, "loss": 0.2281, "step": 6448 }, { "epoch": 1.4961141398909639, "grad_norm": 14.059895540139681, "learning_rate": 2e-06, "loss": 0.2931, "step": 6449 }, { "epoch": 1.4963461315392645, "grad_norm": 13.686524407500817, "learning_rate": 2e-06, "loss": 0.2201, "step": 6450 }, { "epoch": 1.4965781231875652, "grad_norm": 12.69422427186578, "learning_rate": 2e-06, "loss": 0.2263, "step": 6451 }, { "epoch": 1.496810114835866, "grad_norm": 6.64847972204828, "learning_rate": 2e-06, "loss": 0.1778, "step": 6452 }, { "epoch": 1.4970421064841666, "grad_norm": 10.611507182667797, "learning_rate": 2e-06, "loss": 0.2653, "step": 6453 }, { "epoch": 1.4972740981324673, "grad_norm": 8.206805145949547, "learning_rate": 2e-06, "loss": 0.1589, "step": 6454 }, { "epoch": 1.497506089780768, "grad_norm": 12.934838866814447, "learning_rate": 2e-06, "loss": 0.2327, "step": 6455 }, { "epoch": 1.4977380814290686, "grad_norm": 8.605576047699119, "learning_rate": 2e-06, "loss": 0.1946, "step": 6456 }, { "epoch": 1.4979700730773693, "grad_norm": 13.740337789467292, "learning_rate": 2e-06, "loss": 0.1989, "step": 6457 }, { "epoch": 1.4982020647256697, "grad_norm": 12.719383705892868, "learning_rate": 2e-06, "loss": 0.3019, "step": 6458 }, { "epoch": 1.4984340563739704, "grad_norm": 9.414119565019343, "learning_rate": 2e-06, "loss": 0.1875, "step": 6459 }, { "epoch": 1.498666048022271, "grad_norm": 7.286243777258862, "learning_rate": 2e-06, "loss": 0.2029, "step": 6460 }, { "epoch": 1.4988980396705718, "grad_norm": 25.087415884774984, "learning_rate": 2e-06, "loss": 0.3493, "step": 6461 }, { "epoch": 1.4991300313188725, "grad_norm": 19.529758328612694, "learning_rate": 2e-06, "loss": 0.2511, "step": 6462 }, { "epoch": 1.4993620229671731, "grad_norm": 10.558360675132494, "learning_rate": 2e-06, "loss": 0.3066, "step": 6463 }, { "epoch": 1.4995940146154738, "grad_norm": 12.929556331585013, "learning_rate": 2e-06, "loss": 0.2349, "step": 6464 }, { "epoch": 1.4998260062637745, "grad_norm": 13.317649088780495, "learning_rate": 2e-06, "loss": 0.2356, "step": 6465 }, { "epoch": 1.5000579979120752, "grad_norm": 14.80770359106743, "learning_rate": 2e-06, "loss": 0.2201, "step": 6466 }, { "epoch": 1.5002899895603758, "grad_norm": 10.099958255530332, "learning_rate": 2e-06, "loss": 0.2774, "step": 6467 }, { "epoch": 1.5005219812086765, "grad_norm": 10.157809884548199, "learning_rate": 2e-06, "loss": 0.2198, "step": 6468 }, { "epoch": 1.5007539728569772, "grad_norm": 12.343167575809934, "learning_rate": 2e-06, "loss": 0.26, "step": 6469 }, { "epoch": 1.5009859645052779, "grad_norm": 16.69967960591968, "learning_rate": 2e-06, "loss": 0.3119, "step": 6470 }, { "epoch": 1.5012179561535786, "grad_norm": 11.728765200927777, "learning_rate": 2e-06, "loss": 0.2118, "step": 6471 }, { "epoch": 1.5014499478018792, "grad_norm": 11.541890012505004, "learning_rate": 2e-06, "loss": 0.1384, "step": 6472 }, { "epoch": 1.50168193945018, "grad_norm": 9.732218961699658, "learning_rate": 2e-06, "loss": 0.1718, "step": 6473 }, { "epoch": 1.5019139310984806, "grad_norm": 17.88222137496914, "learning_rate": 2e-06, "loss": 0.309, "step": 6474 }, { "epoch": 1.5021459227467813, "grad_norm": 16.773658318362514, "learning_rate": 2e-06, "loss": 0.2618, "step": 6475 }, { "epoch": 1.5023779143950817, "grad_norm": 12.08839410124754, "learning_rate": 2e-06, "loss": 0.2378, "step": 6476 }, { "epoch": 1.5026099060433824, "grad_norm": 14.656155009066419, "learning_rate": 2e-06, "loss": 0.2386, "step": 6477 }, { "epoch": 1.502841897691683, "grad_norm": 9.973123753284185, "learning_rate": 2e-06, "loss": 0.308, "step": 6478 }, { "epoch": 1.5030738893399838, "grad_norm": 41.89955411808061, "learning_rate": 2e-06, "loss": 0.1906, "step": 6479 }, { "epoch": 1.5033058809882844, "grad_norm": 10.945620291360028, "learning_rate": 2e-06, "loss": 0.204, "step": 6480 }, { "epoch": 1.5035378726365851, "grad_norm": 10.877027582989168, "learning_rate": 2e-06, "loss": 0.2483, "step": 6481 }, { "epoch": 1.5037698642848858, "grad_norm": 13.071631401641872, "learning_rate": 2e-06, "loss": 0.3342, "step": 6482 }, { "epoch": 1.5040018559331862, "grad_norm": 13.171544565902238, "learning_rate": 2e-06, "loss": 0.3112, "step": 6483 }, { "epoch": 1.504233847581487, "grad_norm": 12.170105872800002, "learning_rate": 2e-06, "loss": 0.2779, "step": 6484 }, { "epoch": 1.5044658392297876, "grad_norm": 13.122499592129397, "learning_rate": 2e-06, "loss": 0.1801, "step": 6485 }, { "epoch": 1.5046978308780883, "grad_norm": 15.47394430721565, "learning_rate": 2e-06, "loss": 0.2778, "step": 6486 }, { "epoch": 1.504929822526389, "grad_norm": 11.18579845578175, "learning_rate": 2e-06, "loss": 0.3338, "step": 6487 }, { "epoch": 1.5051618141746896, "grad_norm": 19.618419994055362, "learning_rate": 2e-06, "loss": 0.3429, "step": 6488 }, { "epoch": 1.5053938058229903, "grad_norm": 10.515346377508802, "learning_rate": 2e-06, "loss": 0.2214, "step": 6489 }, { "epoch": 1.505625797471291, "grad_norm": 16.76037884514449, "learning_rate": 2e-06, "loss": 0.3002, "step": 6490 }, { "epoch": 1.5058577891195917, "grad_norm": 18.413510022031442, "learning_rate": 2e-06, "loss": 0.2109, "step": 6491 }, { "epoch": 1.5060897807678923, "grad_norm": 45.908922289749256, "learning_rate": 2e-06, "loss": 0.2336, "step": 6492 }, { "epoch": 1.506321772416193, "grad_norm": 15.561067182649099, "learning_rate": 2e-06, "loss": 0.2545, "step": 6493 }, { "epoch": 1.5065537640644937, "grad_norm": 16.969709333468, "learning_rate": 2e-06, "loss": 0.3423, "step": 6494 }, { "epoch": 1.5067857557127944, "grad_norm": 10.3194205977704, "learning_rate": 2e-06, "loss": 0.1431, "step": 6495 }, { "epoch": 1.507017747361095, "grad_norm": 17.808742052277843, "learning_rate": 2e-06, "loss": 0.3041, "step": 6496 }, { "epoch": 1.5072497390093957, "grad_norm": 9.31469480252244, "learning_rate": 2e-06, "loss": 0.2087, "step": 6497 }, { "epoch": 1.5074817306576964, "grad_norm": 17.60457924421259, "learning_rate": 2e-06, "loss": 0.2772, "step": 6498 }, { "epoch": 1.507713722305997, "grad_norm": 11.324853479445679, "learning_rate": 2e-06, "loss": 0.2332, "step": 6499 }, { "epoch": 1.5079457139542978, "grad_norm": 5.4358127032744035, "learning_rate": 2e-06, "loss": 0.1556, "step": 6500 }, { "epoch": 1.5081777056025985, "grad_norm": 10.692291572338746, "learning_rate": 2e-06, "loss": 0.264, "step": 6501 }, { "epoch": 1.5084096972508991, "grad_norm": 23.722388037868864, "learning_rate": 2e-06, "loss": 0.3714, "step": 6502 }, { "epoch": 1.5086416888991996, "grad_norm": 14.672151917575201, "learning_rate": 2e-06, "loss": 0.2105, "step": 6503 }, { "epoch": 1.5088736805475003, "grad_norm": 12.031371405646743, "learning_rate": 2e-06, "loss": 0.2954, "step": 6504 }, { "epoch": 1.509105672195801, "grad_norm": 3.1784302456047224, "learning_rate": 2e-06, "loss": 0.1228, "step": 6505 }, { "epoch": 1.5093376638441016, "grad_norm": 14.213306200760728, "learning_rate": 2e-06, "loss": 0.2708, "step": 6506 }, { "epoch": 1.5095696554924023, "grad_norm": 15.232030296744599, "learning_rate": 2e-06, "loss": 0.2593, "step": 6507 }, { "epoch": 1.509801647140703, "grad_norm": 14.984208384309996, "learning_rate": 2e-06, "loss": 0.2175, "step": 6508 }, { "epoch": 1.5100336387890034, "grad_norm": 15.054506568863584, "learning_rate": 2e-06, "loss": 0.291, "step": 6509 }, { "epoch": 1.510265630437304, "grad_norm": 18.958083066770698, "learning_rate": 2e-06, "loss": 0.3165, "step": 6510 }, { "epoch": 1.5104976220856048, "grad_norm": 15.20084589197303, "learning_rate": 2e-06, "loss": 0.2437, "step": 6511 }, { "epoch": 1.5107296137339055, "grad_norm": 10.595426838724423, "learning_rate": 2e-06, "loss": 0.1996, "step": 6512 }, { "epoch": 1.5109616053822061, "grad_norm": 13.323499389916211, "learning_rate": 2e-06, "loss": 0.2852, "step": 6513 }, { "epoch": 1.5111935970305068, "grad_norm": 12.349992816548118, "learning_rate": 2e-06, "loss": 0.159, "step": 6514 }, { "epoch": 1.5114255886788075, "grad_norm": 8.347530066491494, "learning_rate": 2e-06, "loss": 0.1881, "step": 6515 }, { "epoch": 1.5116575803271082, "grad_norm": 14.539653588005612, "learning_rate": 2e-06, "loss": 0.233, "step": 6516 }, { "epoch": 1.5118895719754089, "grad_norm": 14.335559702883744, "learning_rate": 2e-06, "loss": 0.2032, "step": 6517 }, { "epoch": 1.5121215636237095, "grad_norm": 10.128049266081188, "learning_rate": 2e-06, "loss": 0.1667, "step": 6518 }, { "epoch": 1.5123535552720102, "grad_norm": 8.385680504688892, "learning_rate": 2e-06, "loss": 0.2341, "step": 6519 }, { "epoch": 1.5125855469203109, "grad_norm": 17.06497822611483, "learning_rate": 2e-06, "loss": 0.1913, "step": 6520 }, { "epoch": 1.5128175385686116, "grad_norm": 16.332199808682926, "learning_rate": 2e-06, "loss": 0.1997, "step": 6521 }, { "epoch": 1.5130495302169122, "grad_norm": 19.854169165602478, "learning_rate": 2e-06, "loss": 0.3257, "step": 6522 }, { "epoch": 1.513281521865213, "grad_norm": 18.77711344263193, "learning_rate": 2e-06, "loss": 0.2624, "step": 6523 }, { "epoch": 1.5135135135135136, "grad_norm": 13.115808526811055, "learning_rate": 2e-06, "loss": 0.2487, "step": 6524 }, { "epoch": 1.5137455051618143, "grad_norm": 6.266253493130478, "learning_rate": 2e-06, "loss": 0.1387, "step": 6525 }, { "epoch": 1.513977496810115, "grad_norm": 30.669033494279304, "learning_rate": 2e-06, "loss": 0.3013, "step": 6526 }, { "epoch": 1.5142094884584156, "grad_norm": 17.383425126374238, "learning_rate": 2e-06, "loss": 0.3154, "step": 6527 }, { "epoch": 1.5144414801067163, "grad_norm": 9.12719618391748, "learning_rate": 2e-06, "loss": 0.2042, "step": 6528 }, { "epoch": 1.5146734717550168, "grad_norm": 16.01351599490197, "learning_rate": 2e-06, "loss": 0.2471, "step": 6529 }, { "epoch": 1.5149054634033174, "grad_norm": 7.5545651946593155, "learning_rate": 2e-06, "loss": 0.1569, "step": 6530 }, { "epoch": 1.5151374550516181, "grad_norm": 10.252329088153038, "learning_rate": 2e-06, "loss": 0.1647, "step": 6531 }, { "epoch": 1.5153694466999188, "grad_norm": 12.471980786928466, "learning_rate": 2e-06, "loss": 0.2009, "step": 6532 }, { "epoch": 1.5156014383482195, "grad_norm": 11.677249814799206, "learning_rate": 2e-06, "loss": 0.273, "step": 6533 }, { "epoch": 1.5158334299965202, "grad_norm": 16.203498552868982, "learning_rate": 2e-06, "loss": 0.2866, "step": 6534 }, { "epoch": 1.5160654216448208, "grad_norm": 16.749259296710072, "learning_rate": 2e-06, "loss": 0.2742, "step": 6535 }, { "epoch": 1.5162974132931213, "grad_norm": 11.47405388582526, "learning_rate": 2e-06, "loss": 0.2707, "step": 6536 }, { "epoch": 1.516529404941422, "grad_norm": 25.593684432781764, "learning_rate": 2e-06, "loss": 0.2175, "step": 6537 }, { "epoch": 1.5167613965897226, "grad_norm": 20.441951801303862, "learning_rate": 2e-06, "loss": 0.2418, "step": 6538 }, { "epoch": 1.5169933882380233, "grad_norm": 14.458608496737757, "learning_rate": 2e-06, "loss": 0.2365, "step": 6539 }, { "epoch": 1.517225379886324, "grad_norm": 33.16233465034753, "learning_rate": 2e-06, "loss": 0.3357, "step": 6540 }, { "epoch": 1.5174573715346247, "grad_norm": 14.1321734243137, "learning_rate": 2e-06, "loss": 0.2245, "step": 6541 }, { "epoch": 1.5176893631829254, "grad_norm": 14.33484209040663, "learning_rate": 2e-06, "loss": 0.2339, "step": 6542 }, { "epoch": 1.517921354831226, "grad_norm": 15.328236886901246, "learning_rate": 2e-06, "loss": 0.2048, "step": 6543 }, { "epoch": 1.5181533464795267, "grad_norm": 10.470249220106636, "learning_rate": 2e-06, "loss": 0.2095, "step": 6544 }, { "epoch": 1.5183853381278274, "grad_norm": 19.99383171597015, "learning_rate": 2e-06, "loss": 0.2835, "step": 6545 }, { "epoch": 1.518617329776128, "grad_norm": 19.34033305766493, "learning_rate": 2e-06, "loss": 0.2156, "step": 6546 }, { "epoch": 1.5188493214244287, "grad_norm": 20.841866636345877, "learning_rate": 2e-06, "loss": 0.3316, "step": 6547 }, { "epoch": 1.5190813130727294, "grad_norm": 10.383445951544882, "learning_rate": 2e-06, "loss": 0.131, "step": 6548 }, { "epoch": 1.51931330472103, "grad_norm": 14.532499443762863, "learning_rate": 2e-06, "loss": 0.3055, "step": 6549 }, { "epoch": 1.5195452963693308, "grad_norm": 15.630033556591604, "learning_rate": 2e-06, "loss": 0.4152, "step": 6550 }, { "epoch": 1.5197772880176315, "grad_norm": 12.030975197512266, "learning_rate": 2e-06, "loss": 0.2482, "step": 6551 }, { "epoch": 1.5200092796659321, "grad_norm": 13.340481336968507, "learning_rate": 2e-06, "loss": 0.2646, "step": 6552 }, { "epoch": 1.5202412713142328, "grad_norm": 14.488802750841987, "learning_rate": 2e-06, "loss": 0.3086, "step": 6553 }, { "epoch": 1.5204732629625335, "grad_norm": 8.115303601704829, "learning_rate": 2e-06, "loss": 0.1744, "step": 6554 }, { "epoch": 1.5207052546108342, "grad_norm": 26.135474002000866, "learning_rate": 2e-06, "loss": 0.4021, "step": 6555 }, { "epoch": 1.5209372462591346, "grad_norm": 25.226920890660292, "learning_rate": 2e-06, "loss": 0.3901, "step": 6556 }, { "epoch": 1.5211692379074353, "grad_norm": 16.670183683407572, "learning_rate": 2e-06, "loss": 0.2408, "step": 6557 }, { "epoch": 1.521401229555736, "grad_norm": 15.625375444010585, "learning_rate": 2e-06, "loss": 0.2326, "step": 6558 }, { "epoch": 1.5216332212040367, "grad_norm": 14.917535152680767, "learning_rate": 2e-06, "loss": 0.2647, "step": 6559 }, { "epoch": 1.5218652128523373, "grad_norm": 13.332426191345766, "learning_rate": 2e-06, "loss": 0.2578, "step": 6560 }, { "epoch": 1.522097204500638, "grad_norm": 18.671462189485453, "learning_rate": 2e-06, "loss": 0.3071, "step": 6561 }, { "epoch": 1.5223291961489387, "grad_norm": 11.246730512085772, "learning_rate": 2e-06, "loss": 0.1709, "step": 6562 }, { "epoch": 1.5225611877972391, "grad_norm": 19.622214769945217, "learning_rate": 2e-06, "loss": 0.2527, "step": 6563 }, { "epoch": 1.5227931794455398, "grad_norm": 12.284109364278002, "learning_rate": 2e-06, "loss": 0.2795, "step": 6564 }, { "epoch": 1.5230251710938405, "grad_norm": 6.678563214595849, "learning_rate": 2e-06, "loss": 0.1916, "step": 6565 }, { "epoch": 1.5232571627421412, "grad_norm": 10.69967395464174, "learning_rate": 2e-06, "loss": 0.183, "step": 6566 }, { "epoch": 1.5234891543904419, "grad_norm": 18.99299029345869, "learning_rate": 2e-06, "loss": 0.2846, "step": 6567 }, { "epoch": 1.5237211460387425, "grad_norm": 9.86938009551779, "learning_rate": 2e-06, "loss": 0.2259, "step": 6568 }, { "epoch": 1.5239531376870432, "grad_norm": 12.17829700571588, "learning_rate": 2e-06, "loss": 0.221, "step": 6569 }, { "epoch": 1.524185129335344, "grad_norm": 11.699644060447417, "learning_rate": 2e-06, "loss": 0.2575, "step": 6570 }, { "epoch": 1.5244171209836446, "grad_norm": 9.703258680204174, "learning_rate": 2e-06, "loss": 0.2, "step": 6571 }, { "epoch": 1.5246491126319452, "grad_norm": 14.95404804361216, "learning_rate": 2e-06, "loss": 0.2595, "step": 6572 }, { "epoch": 1.524881104280246, "grad_norm": 13.152934576076827, "learning_rate": 2e-06, "loss": 0.1685, "step": 6573 }, { "epoch": 1.5251130959285466, "grad_norm": 15.716773941763252, "learning_rate": 2e-06, "loss": 0.2364, "step": 6574 }, { "epoch": 1.5253450875768473, "grad_norm": 10.98282431021787, "learning_rate": 2e-06, "loss": 0.2123, "step": 6575 }, { "epoch": 1.525577079225148, "grad_norm": 10.298974653319465, "learning_rate": 2e-06, "loss": 0.3139, "step": 6576 }, { "epoch": 1.5258090708734486, "grad_norm": 15.407719180498699, "learning_rate": 2e-06, "loss": 0.2313, "step": 6577 }, { "epoch": 1.5260410625217493, "grad_norm": 13.536063143083295, "learning_rate": 2e-06, "loss": 0.2669, "step": 6578 }, { "epoch": 1.52627305417005, "grad_norm": 17.303253418761933, "learning_rate": 2e-06, "loss": 0.2706, "step": 6579 }, { "epoch": 1.5265050458183507, "grad_norm": 8.999912205572087, "learning_rate": 2e-06, "loss": 0.1724, "step": 6580 }, { "epoch": 1.5267370374666513, "grad_norm": 13.214016604293978, "learning_rate": 2e-06, "loss": 0.2278, "step": 6581 }, { "epoch": 1.526969029114952, "grad_norm": 19.689311179023186, "learning_rate": 2e-06, "loss": 0.4611, "step": 6582 }, { "epoch": 1.5272010207632525, "grad_norm": 17.523858510746575, "learning_rate": 2e-06, "loss": 0.3864, "step": 6583 }, { "epoch": 1.5274330124115532, "grad_norm": 8.710260312691528, "learning_rate": 2e-06, "loss": 0.1756, "step": 6584 }, { "epoch": 1.5276650040598538, "grad_norm": 10.848455252659218, "learning_rate": 2e-06, "loss": 0.224, "step": 6585 }, { "epoch": 1.5278969957081545, "grad_norm": 16.842313323134718, "learning_rate": 2e-06, "loss": 0.2283, "step": 6586 }, { "epoch": 1.5281289873564552, "grad_norm": 14.082538162407813, "learning_rate": 2e-06, "loss": 0.2786, "step": 6587 }, { "epoch": 1.5283609790047559, "grad_norm": 13.690119420871433, "learning_rate": 2e-06, "loss": 0.213, "step": 6588 }, { "epoch": 1.5285929706530563, "grad_norm": 8.366710908032633, "learning_rate": 2e-06, "loss": 0.2031, "step": 6589 }, { "epoch": 1.528824962301357, "grad_norm": 11.625826226867636, "learning_rate": 2e-06, "loss": 0.2905, "step": 6590 }, { "epoch": 1.5290569539496577, "grad_norm": 9.64366760293919, "learning_rate": 2e-06, "loss": 0.1721, "step": 6591 }, { "epoch": 1.5292889455979584, "grad_norm": 11.064945785341019, "learning_rate": 2e-06, "loss": 0.2017, "step": 6592 }, { "epoch": 1.529520937246259, "grad_norm": 12.392594488420682, "learning_rate": 2e-06, "loss": 0.3388, "step": 6593 }, { "epoch": 1.5297529288945597, "grad_norm": 10.038529036525198, "learning_rate": 2e-06, "loss": 0.1767, "step": 6594 }, { "epoch": 1.5299849205428604, "grad_norm": 18.155270257757028, "learning_rate": 2e-06, "loss": 0.2795, "step": 6595 }, { "epoch": 1.530216912191161, "grad_norm": 18.631747785591013, "learning_rate": 2e-06, "loss": 0.4423, "step": 6596 }, { "epoch": 1.5304489038394617, "grad_norm": 11.650253566150127, "learning_rate": 2e-06, "loss": 0.1601, "step": 6597 }, { "epoch": 1.5306808954877624, "grad_norm": 11.892720717803435, "learning_rate": 2e-06, "loss": 0.2567, "step": 6598 }, { "epoch": 1.530912887136063, "grad_norm": 12.76447937851006, "learning_rate": 2e-06, "loss": 0.2272, "step": 6599 }, { "epoch": 1.5311448787843638, "grad_norm": 19.670009456521562, "learning_rate": 2e-06, "loss": 0.191, "step": 6600 }, { "epoch": 1.5313768704326645, "grad_norm": 17.911080788040636, "learning_rate": 2e-06, "loss": 0.2934, "step": 6601 }, { "epoch": 1.5316088620809651, "grad_norm": 12.02701092188653, "learning_rate": 2e-06, "loss": 0.2006, "step": 6602 }, { "epoch": 1.5318408537292658, "grad_norm": 14.254489915142843, "learning_rate": 2e-06, "loss": 0.1326, "step": 6603 }, { "epoch": 1.5320728453775665, "grad_norm": 16.942217221710525, "learning_rate": 2e-06, "loss": 0.2467, "step": 6604 }, { "epoch": 1.5323048370258672, "grad_norm": 12.251395907968385, "learning_rate": 2e-06, "loss": 0.1818, "step": 6605 }, { "epoch": 1.5325368286741679, "grad_norm": 18.873433824205463, "learning_rate": 2e-06, "loss": 0.304, "step": 6606 }, { "epoch": 1.5327688203224685, "grad_norm": 19.97499742532436, "learning_rate": 2e-06, "loss": 0.3184, "step": 6607 }, { "epoch": 1.5330008119707692, "grad_norm": 8.638731047589733, "learning_rate": 2e-06, "loss": 0.2164, "step": 6608 }, { "epoch": 1.5332328036190697, "grad_norm": 16.66699551716635, "learning_rate": 2e-06, "loss": 0.2551, "step": 6609 }, { "epoch": 1.5334647952673703, "grad_norm": 5.295961005754983, "learning_rate": 2e-06, "loss": 0.1288, "step": 6610 }, { "epoch": 1.533696786915671, "grad_norm": 21.756919902176616, "learning_rate": 2e-06, "loss": 0.2657, "step": 6611 }, { "epoch": 1.5339287785639717, "grad_norm": 16.63173357913042, "learning_rate": 2e-06, "loss": 0.3024, "step": 6612 }, { "epoch": 1.5341607702122724, "grad_norm": 15.377279500427504, "learning_rate": 2e-06, "loss": 0.1583, "step": 6613 }, { "epoch": 1.534392761860573, "grad_norm": 13.542730249995518, "learning_rate": 2e-06, "loss": 0.2867, "step": 6614 }, { "epoch": 1.5346247535088737, "grad_norm": 13.45081937791314, "learning_rate": 2e-06, "loss": 0.3402, "step": 6615 }, { "epoch": 1.5348567451571742, "grad_norm": 18.73627537803717, "learning_rate": 2e-06, "loss": 0.3025, "step": 6616 }, { "epoch": 1.5350887368054749, "grad_norm": 17.96909572583018, "learning_rate": 2e-06, "loss": 0.25, "step": 6617 }, { "epoch": 1.5353207284537755, "grad_norm": 7.296570244559349, "learning_rate": 2e-06, "loss": 0.2265, "step": 6618 }, { "epoch": 1.5355527201020762, "grad_norm": 11.84219243712135, "learning_rate": 2e-06, "loss": 0.2714, "step": 6619 }, { "epoch": 1.535784711750377, "grad_norm": 11.616406825047289, "learning_rate": 2e-06, "loss": 0.2136, "step": 6620 }, { "epoch": 1.5360167033986776, "grad_norm": 8.454661127361028, "learning_rate": 2e-06, "loss": 0.2052, "step": 6621 }, { "epoch": 1.5362486950469783, "grad_norm": 17.398773472415783, "learning_rate": 2e-06, "loss": 0.3175, "step": 6622 }, { "epoch": 1.536480686695279, "grad_norm": 14.004436701132663, "learning_rate": 2e-06, "loss": 0.1801, "step": 6623 }, { "epoch": 1.5367126783435796, "grad_norm": 15.782936325674424, "learning_rate": 2e-06, "loss": 0.1985, "step": 6624 }, { "epoch": 1.5369446699918803, "grad_norm": 8.437744584794697, "learning_rate": 2e-06, "loss": 0.1646, "step": 6625 }, { "epoch": 1.537176661640181, "grad_norm": 10.262631524625183, "learning_rate": 2e-06, "loss": 0.178, "step": 6626 }, { "epoch": 1.5374086532884816, "grad_norm": 6.3501856308833755, "learning_rate": 2e-06, "loss": 0.228, "step": 6627 }, { "epoch": 1.5376406449367823, "grad_norm": 10.01570986797597, "learning_rate": 2e-06, "loss": 0.3387, "step": 6628 }, { "epoch": 1.537872636585083, "grad_norm": 30.458849058891083, "learning_rate": 2e-06, "loss": 0.4754, "step": 6629 }, { "epoch": 1.5381046282333837, "grad_norm": 11.716062302207616, "learning_rate": 2e-06, "loss": 0.1712, "step": 6630 }, { "epoch": 1.5383366198816844, "grad_norm": 18.175307041258502, "learning_rate": 2e-06, "loss": 0.2043, "step": 6631 }, { "epoch": 1.538568611529985, "grad_norm": 12.24045972591269, "learning_rate": 2e-06, "loss": 0.179, "step": 6632 }, { "epoch": 1.5388006031782857, "grad_norm": 11.07667745607954, "learning_rate": 2e-06, "loss": 0.273, "step": 6633 }, { "epoch": 1.5390325948265864, "grad_norm": 12.272831750483638, "learning_rate": 2e-06, "loss": 0.179, "step": 6634 }, { "epoch": 1.539264586474887, "grad_norm": 12.611848727210983, "learning_rate": 2e-06, "loss": 0.411, "step": 6635 }, { "epoch": 1.5394965781231875, "grad_norm": 18.854779245821923, "learning_rate": 2e-06, "loss": 0.3932, "step": 6636 }, { "epoch": 1.5397285697714882, "grad_norm": 7.172384964319514, "learning_rate": 2e-06, "loss": 0.1187, "step": 6637 }, { "epoch": 1.5399605614197889, "grad_norm": 6.576975458207418, "learning_rate": 2e-06, "loss": 0.1599, "step": 6638 }, { "epoch": 1.5401925530680896, "grad_norm": 12.24297995933426, "learning_rate": 2e-06, "loss": 0.2164, "step": 6639 }, { "epoch": 1.5404245447163902, "grad_norm": 10.647747894323764, "learning_rate": 2e-06, "loss": 0.2583, "step": 6640 }, { "epoch": 1.540656536364691, "grad_norm": 19.60146067514771, "learning_rate": 2e-06, "loss": 0.1466, "step": 6641 }, { "epoch": 1.5408885280129916, "grad_norm": 14.684098261137065, "learning_rate": 2e-06, "loss": 0.2603, "step": 6642 }, { "epoch": 1.541120519661292, "grad_norm": 16.59216010203391, "learning_rate": 2e-06, "loss": 0.2821, "step": 6643 }, { "epoch": 1.5413525113095927, "grad_norm": 14.913905233436939, "learning_rate": 2e-06, "loss": 0.3483, "step": 6644 }, { "epoch": 1.5415845029578934, "grad_norm": 7.166202900774562, "learning_rate": 2e-06, "loss": 0.1455, "step": 6645 }, { "epoch": 1.541816494606194, "grad_norm": 21.821133413069827, "learning_rate": 2e-06, "loss": 0.3514, "step": 6646 }, { "epoch": 1.5420484862544948, "grad_norm": 12.236283152701525, "learning_rate": 2e-06, "loss": 0.2124, "step": 6647 }, { "epoch": 1.5422804779027954, "grad_norm": 17.098002615388406, "learning_rate": 2e-06, "loss": 0.2716, "step": 6648 }, { "epoch": 1.542512469551096, "grad_norm": 14.267657162351332, "learning_rate": 2e-06, "loss": 0.2191, "step": 6649 }, { "epoch": 1.5427444611993968, "grad_norm": 15.523793251848586, "learning_rate": 2e-06, "loss": 0.1445, "step": 6650 }, { "epoch": 1.5429764528476975, "grad_norm": 14.559855721210122, "learning_rate": 2e-06, "loss": 0.2379, "step": 6651 }, { "epoch": 1.5432084444959981, "grad_norm": 23.65012451053669, "learning_rate": 2e-06, "loss": 0.4326, "step": 6652 }, { "epoch": 1.5434404361442988, "grad_norm": 8.303100619952016, "learning_rate": 2e-06, "loss": 0.198, "step": 6653 }, { "epoch": 1.5436724277925995, "grad_norm": 15.935548264212265, "learning_rate": 2e-06, "loss": 0.1968, "step": 6654 }, { "epoch": 1.5439044194409002, "grad_norm": 8.883382058562992, "learning_rate": 2e-06, "loss": 0.181, "step": 6655 }, { "epoch": 1.5441364110892009, "grad_norm": 13.870904459442297, "learning_rate": 2e-06, "loss": 0.2708, "step": 6656 }, { "epoch": 1.5443684027375015, "grad_norm": 18.77365934368007, "learning_rate": 2e-06, "loss": 0.3606, "step": 6657 }, { "epoch": 1.5446003943858022, "grad_norm": 11.231032727883631, "learning_rate": 2e-06, "loss": 0.1979, "step": 6658 }, { "epoch": 1.544832386034103, "grad_norm": 13.809682217206333, "learning_rate": 2e-06, "loss": 0.212, "step": 6659 }, { "epoch": 1.5450643776824036, "grad_norm": 9.238173167632462, "learning_rate": 2e-06, "loss": 0.2164, "step": 6660 }, { "epoch": 1.5452963693307042, "grad_norm": 13.771198076915777, "learning_rate": 2e-06, "loss": 0.196, "step": 6661 }, { "epoch": 1.5455283609790047, "grad_norm": 15.557560680677668, "learning_rate": 2e-06, "loss": 0.1748, "step": 6662 }, { "epoch": 1.5457603526273054, "grad_norm": 19.399216680140757, "learning_rate": 2e-06, "loss": 0.3062, "step": 6663 }, { "epoch": 1.545992344275606, "grad_norm": 12.735134100589814, "learning_rate": 2e-06, "loss": 0.1553, "step": 6664 }, { "epoch": 1.5462243359239067, "grad_norm": 11.721525953735028, "learning_rate": 2e-06, "loss": 0.1754, "step": 6665 }, { "epoch": 1.5464563275722074, "grad_norm": 25.85267662972695, "learning_rate": 2e-06, "loss": 0.3786, "step": 6666 }, { "epoch": 1.546688319220508, "grad_norm": 9.82288963464247, "learning_rate": 2e-06, "loss": 0.22, "step": 6667 }, { "epoch": 1.5469203108688088, "grad_norm": 14.562619633467646, "learning_rate": 2e-06, "loss": 0.1804, "step": 6668 }, { "epoch": 1.5471523025171092, "grad_norm": 12.559383643951522, "learning_rate": 2e-06, "loss": 0.2596, "step": 6669 }, { "epoch": 1.54738429416541, "grad_norm": 23.46206957851582, "learning_rate": 2e-06, "loss": 0.2093, "step": 6670 }, { "epoch": 1.5476162858137106, "grad_norm": 15.949501067315126, "learning_rate": 2e-06, "loss": 0.2572, "step": 6671 }, { "epoch": 1.5478482774620113, "grad_norm": 24.868620049788493, "learning_rate": 2e-06, "loss": 0.4118, "step": 6672 }, { "epoch": 1.548080269110312, "grad_norm": 15.287923367273436, "learning_rate": 2e-06, "loss": 0.2223, "step": 6673 }, { "epoch": 1.5483122607586126, "grad_norm": 6.52400379250765, "learning_rate": 2e-06, "loss": 0.1518, "step": 6674 }, { "epoch": 1.5485442524069133, "grad_norm": 14.473581077854373, "learning_rate": 2e-06, "loss": 0.2041, "step": 6675 }, { "epoch": 1.548776244055214, "grad_norm": 13.152523903148891, "learning_rate": 2e-06, "loss": 0.2297, "step": 6676 }, { "epoch": 1.5490082357035146, "grad_norm": 15.555084560097228, "learning_rate": 2e-06, "loss": 0.2117, "step": 6677 }, { "epoch": 1.5492402273518153, "grad_norm": 18.95724262071258, "learning_rate": 2e-06, "loss": 0.2694, "step": 6678 }, { "epoch": 1.549472219000116, "grad_norm": 23.805287017623378, "learning_rate": 2e-06, "loss": 0.2882, "step": 6679 }, { "epoch": 1.5497042106484167, "grad_norm": 9.063455098713165, "learning_rate": 2e-06, "loss": 0.1562, "step": 6680 }, { "epoch": 1.5499362022967174, "grad_norm": 16.767249600071942, "learning_rate": 2e-06, "loss": 0.2859, "step": 6681 }, { "epoch": 1.550168193945018, "grad_norm": 11.262662463905274, "learning_rate": 2e-06, "loss": 0.2708, "step": 6682 }, { "epoch": 1.5504001855933187, "grad_norm": 6.92155707769241, "learning_rate": 2e-06, "loss": 0.1165, "step": 6683 }, { "epoch": 1.5506321772416194, "grad_norm": 13.00332862318548, "learning_rate": 2e-06, "loss": 0.225, "step": 6684 }, { "epoch": 1.55086416888992, "grad_norm": 16.8149521444566, "learning_rate": 2e-06, "loss": 0.2741, "step": 6685 }, { "epoch": 1.5510961605382207, "grad_norm": 19.949779449084364, "learning_rate": 2e-06, "loss": 0.4181, "step": 6686 }, { "epoch": 1.5513281521865214, "grad_norm": 15.470405223572625, "learning_rate": 2e-06, "loss": 0.2668, "step": 6687 }, { "epoch": 1.551560143834822, "grad_norm": 9.524958830303015, "learning_rate": 2e-06, "loss": 0.1999, "step": 6688 }, { "epoch": 1.5517921354831226, "grad_norm": 10.401920612753036, "learning_rate": 2e-06, "loss": 0.1673, "step": 6689 }, { "epoch": 1.5520241271314232, "grad_norm": 14.408570492378052, "learning_rate": 2e-06, "loss": 0.2244, "step": 6690 }, { "epoch": 1.552256118779724, "grad_norm": 21.054179752792802, "learning_rate": 2e-06, "loss": 0.3153, "step": 6691 }, { "epoch": 1.5524881104280246, "grad_norm": 12.234752323220984, "learning_rate": 2e-06, "loss": 0.233, "step": 6692 }, { "epoch": 1.5527201020763253, "grad_norm": 16.21777025907242, "learning_rate": 2e-06, "loss": 0.2629, "step": 6693 }, { "epoch": 1.552952093724626, "grad_norm": 12.365330556133785, "learning_rate": 2e-06, "loss": 0.225, "step": 6694 }, { "epoch": 1.5531840853729266, "grad_norm": 13.604646105301338, "learning_rate": 2e-06, "loss": 0.1856, "step": 6695 }, { "epoch": 1.553416077021227, "grad_norm": 11.565050637758615, "learning_rate": 2e-06, "loss": 0.1909, "step": 6696 }, { "epoch": 1.5536480686695278, "grad_norm": 10.095837283548882, "learning_rate": 2e-06, "loss": 0.144, "step": 6697 }, { "epoch": 1.5538800603178284, "grad_norm": 26.34064370188464, "learning_rate": 2e-06, "loss": 0.2701, "step": 6698 }, { "epoch": 1.5541120519661291, "grad_norm": 15.714451308894228, "learning_rate": 2e-06, "loss": 0.2907, "step": 6699 }, { "epoch": 1.5543440436144298, "grad_norm": 23.67048393742744, "learning_rate": 2e-06, "loss": 0.28, "step": 6700 }, { "epoch": 1.5545760352627305, "grad_norm": 19.769761573650925, "learning_rate": 2e-06, "loss": 0.4424, "step": 6701 }, { "epoch": 1.5548080269110311, "grad_norm": 13.912627564672938, "learning_rate": 2e-06, "loss": 0.2099, "step": 6702 }, { "epoch": 1.5550400185593318, "grad_norm": 17.84318080318395, "learning_rate": 2e-06, "loss": 0.2191, "step": 6703 }, { "epoch": 1.5552720102076325, "grad_norm": 17.882379196787138, "learning_rate": 2e-06, "loss": 0.2854, "step": 6704 }, { "epoch": 1.5555040018559332, "grad_norm": 19.754686577344806, "learning_rate": 2e-06, "loss": 0.2216, "step": 6705 }, { "epoch": 1.5557359935042339, "grad_norm": 12.30229564320053, "learning_rate": 2e-06, "loss": 0.2358, "step": 6706 }, { "epoch": 1.5559679851525345, "grad_norm": 11.072988829052486, "learning_rate": 2e-06, "loss": 0.3163, "step": 6707 }, { "epoch": 1.5561999768008352, "grad_norm": 11.920322064117377, "learning_rate": 2e-06, "loss": 0.184, "step": 6708 }, { "epoch": 1.556431968449136, "grad_norm": 23.620264848489672, "learning_rate": 2e-06, "loss": 0.5399, "step": 6709 }, { "epoch": 1.5566639600974366, "grad_norm": 18.340827062399427, "learning_rate": 2e-06, "loss": 0.2605, "step": 6710 }, { "epoch": 1.5568959517457372, "grad_norm": 15.804678291711879, "learning_rate": 2e-06, "loss": 0.2082, "step": 6711 }, { "epoch": 1.557127943394038, "grad_norm": 9.287038285372942, "learning_rate": 2e-06, "loss": 0.1843, "step": 6712 }, { "epoch": 1.5573599350423386, "grad_norm": 13.862382590699733, "learning_rate": 2e-06, "loss": 0.2673, "step": 6713 }, { "epoch": 1.5575919266906393, "grad_norm": 22.047374843399634, "learning_rate": 2e-06, "loss": 0.2695, "step": 6714 }, { "epoch": 1.55782391833894, "grad_norm": 8.856948070107391, "learning_rate": 2e-06, "loss": 0.1535, "step": 6715 }, { "epoch": 1.5580559099872404, "grad_norm": 12.628650705847866, "learning_rate": 2e-06, "loss": 0.223, "step": 6716 }, { "epoch": 1.558287901635541, "grad_norm": 23.188665968133286, "learning_rate": 2e-06, "loss": 0.2675, "step": 6717 }, { "epoch": 1.5585198932838418, "grad_norm": 10.686962550001526, "learning_rate": 2e-06, "loss": 0.3214, "step": 6718 }, { "epoch": 1.5587518849321425, "grad_norm": 17.718995290129744, "learning_rate": 2e-06, "loss": 0.2649, "step": 6719 }, { "epoch": 1.5589838765804431, "grad_norm": 13.700206729941808, "learning_rate": 2e-06, "loss": 0.2116, "step": 6720 }, { "epoch": 1.5592158682287438, "grad_norm": 13.688686894063217, "learning_rate": 2e-06, "loss": 0.2587, "step": 6721 }, { "epoch": 1.5594478598770443, "grad_norm": 12.49552285498753, "learning_rate": 2e-06, "loss": 0.2303, "step": 6722 }, { "epoch": 1.559679851525345, "grad_norm": 13.62322311967421, "learning_rate": 2e-06, "loss": 0.1915, "step": 6723 }, { "epoch": 1.5599118431736456, "grad_norm": 7.434462107481903, "learning_rate": 2e-06, "loss": 0.1737, "step": 6724 }, { "epoch": 1.5601438348219463, "grad_norm": 10.92478845203364, "learning_rate": 2e-06, "loss": 0.2139, "step": 6725 }, { "epoch": 1.560375826470247, "grad_norm": 12.50872192630918, "learning_rate": 2e-06, "loss": 0.2493, "step": 6726 }, { "epoch": 1.5606078181185477, "grad_norm": 14.669812825175278, "learning_rate": 2e-06, "loss": 0.2171, "step": 6727 }, { "epoch": 1.5608398097668483, "grad_norm": 16.867602158489653, "learning_rate": 2e-06, "loss": 0.1857, "step": 6728 }, { "epoch": 1.561071801415149, "grad_norm": 24.13353406032791, "learning_rate": 2e-06, "loss": 0.2582, "step": 6729 }, { "epoch": 1.5613037930634497, "grad_norm": 11.22810564138021, "learning_rate": 2e-06, "loss": 0.1928, "step": 6730 }, { "epoch": 1.5615357847117504, "grad_norm": 5.337300728209669, "learning_rate": 2e-06, "loss": 0.1356, "step": 6731 }, { "epoch": 1.561767776360051, "grad_norm": 14.689968164162265, "learning_rate": 2e-06, "loss": 0.3578, "step": 6732 }, { "epoch": 1.5619997680083517, "grad_norm": 11.167047902657862, "learning_rate": 2e-06, "loss": 0.1839, "step": 6733 }, { "epoch": 1.5622317596566524, "grad_norm": 8.165761931642459, "learning_rate": 2e-06, "loss": 0.1836, "step": 6734 }, { "epoch": 1.562463751304953, "grad_norm": 10.54991480371774, "learning_rate": 2e-06, "loss": 0.1719, "step": 6735 }, { "epoch": 1.5626957429532538, "grad_norm": 11.442688510228646, "learning_rate": 2e-06, "loss": 0.2653, "step": 6736 }, { "epoch": 1.5629277346015544, "grad_norm": 17.070029585225544, "learning_rate": 2e-06, "loss": 0.2521, "step": 6737 }, { "epoch": 1.563159726249855, "grad_norm": 19.686280230299616, "learning_rate": 2e-06, "loss": 0.244, "step": 6738 }, { "epoch": 1.5633917178981558, "grad_norm": 9.948167478079785, "learning_rate": 2e-06, "loss": 0.2483, "step": 6739 }, { "epoch": 1.5636237095464565, "grad_norm": 24.922215627756376, "learning_rate": 2e-06, "loss": 0.2743, "step": 6740 }, { "epoch": 1.5638557011947571, "grad_norm": 16.741152692172665, "learning_rate": 2e-06, "loss": 0.2465, "step": 6741 }, { "epoch": 1.5640876928430576, "grad_norm": 10.47916098313398, "learning_rate": 2e-06, "loss": 0.2147, "step": 6742 }, { "epoch": 1.5643196844913583, "grad_norm": 9.617663992594, "learning_rate": 2e-06, "loss": 0.1909, "step": 6743 }, { "epoch": 1.564551676139659, "grad_norm": 11.671052205411605, "learning_rate": 2e-06, "loss": 0.2597, "step": 6744 }, { "epoch": 1.5647836677879596, "grad_norm": 12.058931258856934, "learning_rate": 2e-06, "loss": 0.2216, "step": 6745 }, { "epoch": 1.5650156594362603, "grad_norm": 21.409795579059104, "learning_rate": 2e-06, "loss": 0.3596, "step": 6746 }, { "epoch": 1.565247651084561, "grad_norm": 11.764103802660122, "learning_rate": 2e-06, "loss": 0.1823, "step": 6747 }, { "epoch": 1.5654796427328617, "grad_norm": 9.706993728012272, "learning_rate": 2e-06, "loss": 0.169, "step": 6748 }, { "epoch": 1.5657116343811621, "grad_norm": 15.599357074918032, "learning_rate": 2e-06, "loss": 0.342, "step": 6749 }, { "epoch": 1.5659436260294628, "grad_norm": 20.41002666037572, "learning_rate": 2e-06, "loss": 0.2836, "step": 6750 }, { "epoch": 1.5661756176777635, "grad_norm": 15.460943544867607, "learning_rate": 2e-06, "loss": 0.3194, "step": 6751 }, { "epoch": 1.5664076093260642, "grad_norm": 18.23070983470633, "learning_rate": 2e-06, "loss": 0.2515, "step": 6752 }, { "epoch": 1.5666396009743648, "grad_norm": 24.89491573709899, "learning_rate": 2e-06, "loss": 0.2149, "step": 6753 }, { "epoch": 1.5668715926226655, "grad_norm": 10.42731457408122, "learning_rate": 2e-06, "loss": 0.1346, "step": 6754 }, { "epoch": 1.5671035842709662, "grad_norm": 26.145121796832107, "learning_rate": 2e-06, "loss": 0.3824, "step": 6755 }, { "epoch": 1.5673355759192669, "grad_norm": 19.222209564955683, "learning_rate": 2e-06, "loss": 0.2588, "step": 6756 }, { "epoch": 1.5675675675675675, "grad_norm": 10.470769858587463, "learning_rate": 2e-06, "loss": 0.2401, "step": 6757 }, { "epoch": 1.5677995592158682, "grad_norm": 11.749844689977595, "learning_rate": 2e-06, "loss": 0.1865, "step": 6758 }, { "epoch": 1.568031550864169, "grad_norm": 24.061754616113973, "learning_rate": 2e-06, "loss": 0.3668, "step": 6759 }, { "epoch": 1.5682635425124696, "grad_norm": 14.476040215882161, "learning_rate": 2e-06, "loss": 0.2639, "step": 6760 }, { "epoch": 1.5684955341607703, "grad_norm": 19.156412552986147, "learning_rate": 2e-06, "loss": 0.2453, "step": 6761 }, { "epoch": 1.568727525809071, "grad_norm": 9.392538890429808, "learning_rate": 2e-06, "loss": 0.207, "step": 6762 }, { "epoch": 1.5689595174573716, "grad_norm": 6.850823669102928, "learning_rate": 2e-06, "loss": 0.145, "step": 6763 }, { "epoch": 1.5691915091056723, "grad_norm": 13.466344294898505, "learning_rate": 2e-06, "loss": 0.2537, "step": 6764 }, { "epoch": 1.569423500753973, "grad_norm": 15.669892855674025, "learning_rate": 2e-06, "loss": 0.2211, "step": 6765 }, { "epoch": 1.5696554924022736, "grad_norm": 16.398892981894086, "learning_rate": 2e-06, "loss": 0.2775, "step": 6766 }, { "epoch": 1.5698874840505743, "grad_norm": 15.11369594798255, "learning_rate": 2e-06, "loss": 0.2086, "step": 6767 }, { "epoch": 1.570119475698875, "grad_norm": 16.09216950836462, "learning_rate": 2e-06, "loss": 0.2734, "step": 6768 }, { "epoch": 1.5703514673471755, "grad_norm": 11.456504129507978, "learning_rate": 2e-06, "loss": 0.2696, "step": 6769 }, { "epoch": 1.5705834589954761, "grad_norm": 31.295646221623393, "learning_rate": 2e-06, "loss": 0.2382, "step": 6770 }, { "epoch": 1.5708154506437768, "grad_norm": 14.91478972462344, "learning_rate": 2e-06, "loss": 0.2604, "step": 6771 }, { "epoch": 1.5710474422920775, "grad_norm": 14.084946879586926, "learning_rate": 2e-06, "loss": 0.2746, "step": 6772 }, { "epoch": 1.5712794339403782, "grad_norm": 7.254192390967452, "learning_rate": 2e-06, "loss": 0.1817, "step": 6773 }, { "epoch": 1.5715114255886788, "grad_norm": 21.57021793936699, "learning_rate": 2e-06, "loss": 0.287, "step": 6774 }, { "epoch": 1.5717434172369795, "grad_norm": 21.625281378712074, "learning_rate": 2e-06, "loss": 0.2452, "step": 6775 }, { "epoch": 1.57197540888528, "grad_norm": 14.467275186899649, "learning_rate": 2e-06, "loss": 0.2274, "step": 6776 }, { "epoch": 1.5722074005335807, "grad_norm": 8.852228895085334, "learning_rate": 2e-06, "loss": 0.1996, "step": 6777 }, { "epoch": 1.5724393921818813, "grad_norm": 8.286774206475272, "learning_rate": 2e-06, "loss": 0.187, "step": 6778 }, { "epoch": 1.572671383830182, "grad_norm": 14.316350630272812, "learning_rate": 2e-06, "loss": 0.2814, "step": 6779 }, { "epoch": 1.5729033754784827, "grad_norm": 12.459751760967073, "learning_rate": 2e-06, "loss": 0.2404, "step": 6780 }, { "epoch": 1.5731353671267834, "grad_norm": 7.701203511096362, "learning_rate": 2e-06, "loss": 0.1813, "step": 6781 }, { "epoch": 1.573367358775084, "grad_norm": 15.65163803258445, "learning_rate": 2e-06, "loss": 0.3066, "step": 6782 }, { "epoch": 1.5735993504233847, "grad_norm": 11.415272637022873, "learning_rate": 2e-06, "loss": 0.1425, "step": 6783 }, { "epoch": 1.5738313420716854, "grad_norm": 19.358539152020647, "learning_rate": 2e-06, "loss": 0.3298, "step": 6784 }, { "epoch": 1.574063333719986, "grad_norm": 12.680294495610841, "learning_rate": 2e-06, "loss": 0.2396, "step": 6785 }, { "epoch": 1.5742953253682868, "grad_norm": 19.26119166215534, "learning_rate": 2e-06, "loss": 0.2845, "step": 6786 }, { "epoch": 1.5745273170165874, "grad_norm": 43.35671069330195, "learning_rate": 2e-06, "loss": 0.3221, "step": 6787 }, { "epoch": 1.5747593086648881, "grad_norm": 25.079014100953824, "learning_rate": 2e-06, "loss": 0.3231, "step": 6788 }, { "epoch": 1.5749913003131888, "grad_norm": 25.518585287352856, "learning_rate": 2e-06, "loss": 0.335, "step": 6789 }, { "epoch": 1.5752232919614895, "grad_norm": 8.677770134465073, "learning_rate": 2e-06, "loss": 0.2009, "step": 6790 }, { "epoch": 1.5754552836097901, "grad_norm": 16.41414521651827, "learning_rate": 2e-06, "loss": 0.341, "step": 6791 }, { "epoch": 1.5756872752580908, "grad_norm": 14.16484596317382, "learning_rate": 2e-06, "loss": 0.1712, "step": 6792 }, { "epoch": 1.5759192669063915, "grad_norm": 31.22819956446191, "learning_rate": 2e-06, "loss": 0.3924, "step": 6793 }, { "epoch": 1.5761512585546922, "grad_norm": 10.63781472409317, "learning_rate": 2e-06, "loss": 0.1474, "step": 6794 }, { "epoch": 1.5763832502029929, "grad_norm": 15.8712959128847, "learning_rate": 2e-06, "loss": 0.2747, "step": 6795 }, { "epoch": 1.5766152418512933, "grad_norm": 16.81316543278334, "learning_rate": 2e-06, "loss": 0.2064, "step": 6796 }, { "epoch": 1.576847233499594, "grad_norm": 10.956852812678623, "learning_rate": 2e-06, "loss": 0.2405, "step": 6797 }, { "epoch": 1.5770792251478947, "grad_norm": 10.47422890616429, "learning_rate": 2e-06, "loss": 0.2584, "step": 6798 }, { "epoch": 1.5773112167961953, "grad_norm": 11.247698042368608, "learning_rate": 2e-06, "loss": 0.2368, "step": 6799 }, { "epoch": 1.577543208444496, "grad_norm": 11.458253221758767, "learning_rate": 2e-06, "loss": 0.2327, "step": 6800 }, { "epoch": 1.5777752000927967, "grad_norm": 8.372281579949735, "learning_rate": 2e-06, "loss": 0.2589, "step": 6801 }, { "epoch": 1.5780071917410972, "grad_norm": 23.002950702220993, "learning_rate": 2e-06, "loss": 0.2453, "step": 6802 }, { "epoch": 1.5782391833893978, "grad_norm": 8.10206962011327, "learning_rate": 2e-06, "loss": 0.1706, "step": 6803 }, { "epoch": 1.5784711750376985, "grad_norm": 5.549188344327498, "learning_rate": 2e-06, "loss": 0.1771, "step": 6804 }, { "epoch": 1.5787031666859992, "grad_norm": 14.578356313178427, "learning_rate": 2e-06, "loss": 0.2663, "step": 6805 }, { "epoch": 1.5789351583342999, "grad_norm": 10.42207019577819, "learning_rate": 2e-06, "loss": 0.2595, "step": 6806 }, { "epoch": 1.5791671499826005, "grad_norm": 6.864112094691692, "learning_rate": 2e-06, "loss": 0.1364, "step": 6807 }, { "epoch": 1.5793991416309012, "grad_norm": 4.728977241944988, "learning_rate": 2e-06, "loss": 0.1352, "step": 6808 }, { "epoch": 1.579631133279202, "grad_norm": 13.672761713026164, "learning_rate": 2e-06, "loss": 0.2204, "step": 6809 }, { "epoch": 1.5798631249275026, "grad_norm": 17.40182794118163, "learning_rate": 2e-06, "loss": 0.3978, "step": 6810 }, { "epoch": 1.5800951165758033, "grad_norm": 15.13729096710601, "learning_rate": 2e-06, "loss": 0.1737, "step": 6811 }, { "epoch": 1.580327108224104, "grad_norm": 21.127192170719233, "learning_rate": 2e-06, "loss": 0.5297, "step": 6812 }, { "epoch": 1.5805590998724046, "grad_norm": 16.860482121408943, "learning_rate": 2e-06, "loss": 0.1868, "step": 6813 }, { "epoch": 1.5807910915207053, "grad_norm": 10.21009934750211, "learning_rate": 2e-06, "loss": 0.2236, "step": 6814 }, { "epoch": 1.581023083169006, "grad_norm": 15.657316076566811, "learning_rate": 2e-06, "loss": 0.2979, "step": 6815 }, { "epoch": 1.5812550748173066, "grad_norm": 15.242342987174924, "learning_rate": 2e-06, "loss": 0.3382, "step": 6816 }, { "epoch": 1.5814870664656073, "grad_norm": 8.972983513729012, "learning_rate": 2e-06, "loss": 0.1847, "step": 6817 }, { "epoch": 1.581719058113908, "grad_norm": 8.861874703597897, "learning_rate": 2e-06, "loss": 0.2629, "step": 6818 }, { "epoch": 1.5819510497622087, "grad_norm": 11.282644462145537, "learning_rate": 2e-06, "loss": 0.2448, "step": 6819 }, { "epoch": 1.5821830414105094, "grad_norm": 16.643703301291083, "learning_rate": 2e-06, "loss": 0.218, "step": 6820 }, { "epoch": 1.58241503305881, "grad_norm": 21.338207571831255, "learning_rate": 2e-06, "loss": 0.3162, "step": 6821 }, { "epoch": 1.5826470247071105, "grad_norm": 10.456117076657767, "learning_rate": 2e-06, "loss": 0.1931, "step": 6822 }, { "epoch": 1.5828790163554112, "grad_norm": 20.253880071565263, "learning_rate": 2e-06, "loss": 0.3495, "step": 6823 }, { "epoch": 1.5831110080037119, "grad_norm": 17.609409195262224, "learning_rate": 2e-06, "loss": 0.3101, "step": 6824 }, { "epoch": 1.5833429996520125, "grad_norm": 14.320650363686498, "learning_rate": 2e-06, "loss": 0.2719, "step": 6825 }, { "epoch": 1.5835749913003132, "grad_norm": 18.748152428299427, "learning_rate": 2e-06, "loss": 0.2974, "step": 6826 }, { "epoch": 1.5838069829486139, "grad_norm": 9.52741203825633, "learning_rate": 2e-06, "loss": 0.1331, "step": 6827 }, { "epoch": 1.5840389745969146, "grad_norm": 18.27852816483889, "learning_rate": 2e-06, "loss": 0.3419, "step": 6828 }, { "epoch": 1.584270966245215, "grad_norm": 9.530308694977986, "learning_rate": 2e-06, "loss": 0.1755, "step": 6829 }, { "epoch": 1.5845029578935157, "grad_norm": 16.07479575909688, "learning_rate": 2e-06, "loss": 0.2276, "step": 6830 }, { "epoch": 1.5847349495418164, "grad_norm": 5.195872495498322, "learning_rate": 2e-06, "loss": 0.1703, "step": 6831 }, { "epoch": 1.584966941190117, "grad_norm": 9.3011970800669, "learning_rate": 2e-06, "loss": 0.2328, "step": 6832 }, { "epoch": 1.5851989328384177, "grad_norm": 11.992368937834383, "learning_rate": 2e-06, "loss": 0.2196, "step": 6833 }, { "epoch": 1.5854309244867184, "grad_norm": 8.603011392573826, "learning_rate": 2e-06, "loss": 0.2124, "step": 6834 }, { "epoch": 1.585662916135019, "grad_norm": 12.27925572845166, "learning_rate": 2e-06, "loss": 0.243, "step": 6835 }, { "epoch": 1.5858949077833198, "grad_norm": 17.79861330852824, "learning_rate": 2e-06, "loss": 0.2526, "step": 6836 }, { "epoch": 1.5861268994316204, "grad_norm": 8.642526433758599, "learning_rate": 2e-06, "loss": 0.1874, "step": 6837 }, { "epoch": 1.5863588910799211, "grad_norm": 7.819458952164534, "learning_rate": 2e-06, "loss": 0.172, "step": 6838 }, { "epoch": 1.5865908827282218, "grad_norm": 14.04549995988459, "learning_rate": 2e-06, "loss": 0.2023, "step": 6839 }, { "epoch": 1.5868228743765225, "grad_norm": 9.58691437742038, "learning_rate": 2e-06, "loss": 0.1658, "step": 6840 }, { "epoch": 1.5870548660248232, "grad_norm": 7.415634246482783, "learning_rate": 2e-06, "loss": 0.1984, "step": 6841 }, { "epoch": 1.5872868576731238, "grad_norm": 7.101726327086242, "learning_rate": 2e-06, "loss": 0.1825, "step": 6842 }, { "epoch": 1.5875188493214245, "grad_norm": 7.409833020702544, "learning_rate": 2e-06, "loss": 0.173, "step": 6843 }, { "epoch": 1.5877508409697252, "grad_norm": 5.643831335022831, "learning_rate": 2e-06, "loss": 0.1592, "step": 6844 }, { "epoch": 1.5879828326180259, "grad_norm": 12.266503536628118, "learning_rate": 2e-06, "loss": 0.266, "step": 6845 }, { "epoch": 1.5882148242663265, "grad_norm": 17.78772424839602, "learning_rate": 2e-06, "loss": 0.3874, "step": 6846 }, { "epoch": 1.5884468159146272, "grad_norm": 15.623174768233348, "learning_rate": 2e-06, "loss": 0.2541, "step": 6847 }, { "epoch": 1.588678807562928, "grad_norm": 14.267785946840403, "learning_rate": 2e-06, "loss": 0.3315, "step": 6848 }, { "epoch": 1.5889107992112284, "grad_norm": 25.090609601792437, "learning_rate": 2e-06, "loss": 0.1686, "step": 6849 }, { "epoch": 1.589142790859529, "grad_norm": 10.182624698844203, "learning_rate": 2e-06, "loss": 0.1973, "step": 6850 }, { "epoch": 1.5893747825078297, "grad_norm": 16.37330122299087, "learning_rate": 2e-06, "loss": 0.2241, "step": 6851 }, { "epoch": 1.5896067741561304, "grad_norm": 13.459030559524912, "learning_rate": 2e-06, "loss": 0.2656, "step": 6852 }, { "epoch": 1.589838765804431, "grad_norm": 18.64639791565411, "learning_rate": 2e-06, "loss": 0.2362, "step": 6853 }, { "epoch": 1.5900707574527317, "grad_norm": 8.167161131211438, "learning_rate": 2e-06, "loss": 0.2205, "step": 6854 }, { "epoch": 1.5903027491010322, "grad_norm": 12.041969699634368, "learning_rate": 2e-06, "loss": 0.2306, "step": 6855 }, { "epoch": 1.5905347407493329, "grad_norm": 14.858497088147926, "learning_rate": 2e-06, "loss": 0.2603, "step": 6856 }, { "epoch": 1.5907667323976336, "grad_norm": 12.226650589555664, "learning_rate": 2e-06, "loss": 0.2172, "step": 6857 }, { "epoch": 1.5909987240459342, "grad_norm": 9.08808966600926, "learning_rate": 2e-06, "loss": 0.1842, "step": 6858 }, { "epoch": 1.591230715694235, "grad_norm": 13.717394027158436, "learning_rate": 2e-06, "loss": 0.2377, "step": 6859 }, { "epoch": 1.5914627073425356, "grad_norm": 13.078249717134886, "learning_rate": 2e-06, "loss": 0.2417, "step": 6860 }, { "epoch": 1.5916946989908363, "grad_norm": 28.056304082891295, "learning_rate": 2e-06, "loss": 0.2097, "step": 6861 }, { "epoch": 1.591926690639137, "grad_norm": 14.645870930917718, "learning_rate": 2e-06, "loss": 0.1665, "step": 6862 }, { "epoch": 1.5921586822874376, "grad_norm": 15.310789998713801, "learning_rate": 2e-06, "loss": 0.3082, "step": 6863 }, { "epoch": 1.5923906739357383, "grad_norm": 20.451507985504687, "learning_rate": 2e-06, "loss": 0.2574, "step": 6864 }, { "epoch": 1.592622665584039, "grad_norm": 21.807670305901297, "learning_rate": 2e-06, "loss": 0.3035, "step": 6865 }, { "epoch": 1.5928546572323397, "grad_norm": 19.895841208070944, "learning_rate": 2e-06, "loss": 0.2902, "step": 6866 }, { "epoch": 1.5930866488806403, "grad_norm": 8.457046599193541, "learning_rate": 2e-06, "loss": 0.2041, "step": 6867 }, { "epoch": 1.593318640528941, "grad_norm": 22.47335248001497, "learning_rate": 2e-06, "loss": 0.4859, "step": 6868 }, { "epoch": 1.5935506321772417, "grad_norm": 6.685828572757369, "learning_rate": 2e-06, "loss": 0.1665, "step": 6869 }, { "epoch": 1.5937826238255424, "grad_norm": 12.58042735255098, "learning_rate": 2e-06, "loss": 0.1601, "step": 6870 }, { "epoch": 1.594014615473843, "grad_norm": 22.317484002899555, "learning_rate": 2e-06, "loss": 0.242, "step": 6871 }, { "epoch": 1.5942466071221437, "grad_norm": 18.39607935176384, "learning_rate": 2e-06, "loss": 0.3981, "step": 6872 }, { "epoch": 1.5944785987704444, "grad_norm": 10.360346744792217, "learning_rate": 2e-06, "loss": 0.1472, "step": 6873 }, { "epoch": 1.594710590418745, "grad_norm": 11.671580147838984, "learning_rate": 2e-06, "loss": 0.2312, "step": 6874 }, { "epoch": 1.5949425820670455, "grad_norm": 8.029919131678305, "learning_rate": 2e-06, "loss": 0.234, "step": 6875 }, { "epoch": 1.5951745737153462, "grad_norm": 14.454042603280314, "learning_rate": 2e-06, "loss": 0.2537, "step": 6876 }, { "epoch": 1.595406565363647, "grad_norm": 14.486299136318895, "learning_rate": 2e-06, "loss": 0.2426, "step": 6877 }, { "epoch": 1.5956385570119476, "grad_norm": 14.517852619599177, "learning_rate": 2e-06, "loss": 0.2604, "step": 6878 }, { "epoch": 1.5958705486602482, "grad_norm": 13.24440098634251, "learning_rate": 2e-06, "loss": 0.2915, "step": 6879 }, { "epoch": 1.596102540308549, "grad_norm": 12.914902414937012, "learning_rate": 2e-06, "loss": 0.3206, "step": 6880 }, { "epoch": 1.5963345319568496, "grad_norm": 12.885798355302601, "learning_rate": 2e-06, "loss": 0.1833, "step": 6881 }, { "epoch": 1.59656652360515, "grad_norm": 15.829499481850467, "learning_rate": 2e-06, "loss": 0.2901, "step": 6882 }, { "epoch": 1.5967985152534507, "grad_norm": 11.329045094579913, "learning_rate": 2e-06, "loss": 0.241, "step": 6883 }, { "epoch": 1.5970305069017514, "grad_norm": 14.853956669259542, "learning_rate": 2e-06, "loss": 0.2227, "step": 6884 }, { "epoch": 1.597262498550052, "grad_norm": 9.383651932545247, "learning_rate": 2e-06, "loss": 0.1166, "step": 6885 }, { "epoch": 1.5974944901983528, "grad_norm": 29.26111858454847, "learning_rate": 2e-06, "loss": 0.3973, "step": 6886 }, { "epoch": 1.5977264818466534, "grad_norm": 6.7484712455436116, "learning_rate": 2e-06, "loss": 0.1273, "step": 6887 }, { "epoch": 1.5979584734949541, "grad_norm": 10.15763033087749, "learning_rate": 2e-06, "loss": 0.271, "step": 6888 }, { "epoch": 1.5981904651432548, "grad_norm": 17.35175331054969, "learning_rate": 2e-06, "loss": 0.4555, "step": 6889 }, { "epoch": 1.5984224567915555, "grad_norm": 12.735975895446408, "learning_rate": 2e-06, "loss": 0.258, "step": 6890 }, { "epoch": 1.5986544484398562, "grad_norm": 12.899543658492155, "learning_rate": 2e-06, "loss": 0.1864, "step": 6891 }, { "epoch": 1.5988864400881568, "grad_norm": 12.698385568969032, "learning_rate": 2e-06, "loss": 0.1718, "step": 6892 }, { "epoch": 1.5991184317364575, "grad_norm": 4.28535250927259, "learning_rate": 2e-06, "loss": 0.1266, "step": 6893 }, { "epoch": 1.5993504233847582, "grad_norm": 15.392604216922926, "learning_rate": 2e-06, "loss": 0.2754, "step": 6894 }, { "epoch": 1.5995824150330589, "grad_norm": 15.137874358989361, "learning_rate": 2e-06, "loss": 0.2634, "step": 6895 }, { "epoch": 1.5998144066813595, "grad_norm": 15.254360087513062, "learning_rate": 2e-06, "loss": 0.3004, "step": 6896 }, { "epoch": 1.6000463983296602, "grad_norm": 18.28465820969029, "learning_rate": 2e-06, "loss": 0.4048, "step": 6897 }, { "epoch": 1.600278389977961, "grad_norm": 7.541889046152906, "learning_rate": 2e-06, "loss": 0.1286, "step": 6898 }, { "epoch": 1.6005103816262616, "grad_norm": 14.525111372393546, "learning_rate": 2e-06, "loss": 0.1813, "step": 6899 }, { "epoch": 1.6007423732745623, "grad_norm": 10.764190597901676, "learning_rate": 2e-06, "loss": 0.2426, "step": 6900 }, { "epoch": 1.600974364922863, "grad_norm": 16.376203207837985, "learning_rate": 2e-06, "loss": 0.3211, "step": 6901 }, { "epoch": 1.6012063565711634, "grad_norm": 8.72620445463533, "learning_rate": 2e-06, "loss": 0.1751, "step": 6902 }, { "epoch": 1.601438348219464, "grad_norm": 15.942772043804196, "learning_rate": 2e-06, "loss": 0.2968, "step": 6903 }, { "epoch": 1.6016703398677647, "grad_norm": 18.402605174063517, "learning_rate": 2e-06, "loss": 0.2, "step": 6904 }, { "epoch": 1.6019023315160654, "grad_norm": 12.426769789870608, "learning_rate": 2e-06, "loss": 0.2583, "step": 6905 }, { "epoch": 1.602134323164366, "grad_norm": 10.17124275196209, "learning_rate": 2e-06, "loss": 0.1767, "step": 6906 }, { "epoch": 1.6023663148126668, "grad_norm": 20.138982762387695, "learning_rate": 2e-06, "loss": 0.301, "step": 6907 }, { "epoch": 1.6025983064609675, "grad_norm": 14.742653023011412, "learning_rate": 2e-06, "loss": 0.2199, "step": 6908 }, { "epoch": 1.602830298109268, "grad_norm": 25.703435637577172, "learning_rate": 2e-06, "loss": 0.3056, "step": 6909 }, { "epoch": 1.6030622897575686, "grad_norm": 12.570448997393886, "learning_rate": 2e-06, "loss": 0.2229, "step": 6910 }, { "epoch": 1.6032942814058693, "grad_norm": 7.55682126082453, "learning_rate": 2e-06, "loss": 0.1689, "step": 6911 }, { "epoch": 1.60352627305417, "grad_norm": 13.806272086112495, "learning_rate": 2e-06, "loss": 0.2732, "step": 6912 }, { "epoch": 1.6037582647024706, "grad_norm": 22.446697414205737, "learning_rate": 2e-06, "loss": 0.4215, "step": 6913 }, { "epoch": 1.6039902563507713, "grad_norm": 17.32438262165776, "learning_rate": 2e-06, "loss": 0.2283, "step": 6914 }, { "epoch": 1.604222247999072, "grad_norm": 23.79328231241564, "learning_rate": 2e-06, "loss": 0.3247, "step": 6915 }, { "epoch": 1.6044542396473727, "grad_norm": 11.871426031777244, "learning_rate": 2e-06, "loss": 0.2369, "step": 6916 }, { "epoch": 1.6046862312956733, "grad_norm": 12.60168201994963, "learning_rate": 2e-06, "loss": 0.2332, "step": 6917 }, { "epoch": 1.604918222943974, "grad_norm": 14.391829705544197, "learning_rate": 2e-06, "loss": 0.2458, "step": 6918 }, { "epoch": 1.6051502145922747, "grad_norm": 22.365046280164332, "learning_rate": 2e-06, "loss": 0.3598, "step": 6919 }, { "epoch": 1.6053822062405754, "grad_norm": 9.21861265143294, "learning_rate": 2e-06, "loss": 0.1547, "step": 6920 }, { "epoch": 1.605614197888876, "grad_norm": 17.501152537868485, "learning_rate": 2e-06, "loss": 0.241, "step": 6921 }, { "epoch": 1.6058461895371767, "grad_norm": 13.363051800078775, "learning_rate": 2e-06, "loss": 0.2323, "step": 6922 }, { "epoch": 1.6060781811854774, "grad_norm": 10.522078319379862, "learning_rate": 2e-06, "loss": 0.2682, "step": 6923 }, { "epoch": 1.606310172833778, "grad_norm": 10.73490836853733, "learning_rate": 2e-06, "loss": 0.1414, "step": 6924 }, { "epoch": 1.6065421644820788, "grad_norm": 8.652664717579718, "learning_rate": 2e-06, "loss": 0.2046, "step": 6925 }, { "epoch": 1.6067741561303794, "grad_norm": 10.409928706645774, "learning_rate": 2e-06, "loss": 0.1589, "step": 6926 }, { "epoch": 1.6070061477786801, "grad_norm": 27.671244480261826, "learning_rate": 2e-06, "loss": 0.3348, "step": 6927 }, { "epoch": 1.6072381394269808, "grad_norm": 17.329101528036233, "learning_rate": 2e-06, "loss": 0.3062, "step": 6928 }, { "epoch": 1.6074701310752812, "grad_norm": 14.260022102662832, "learning_rate": 2e-06, "loss": 0.2847, "step": 6929 }, { "epoch": 1.607702122723582, "grad_norm": 34.96318462390881, "learning_rate": 2e-06, "loss": 0.4542, "step": 6930 }, { "epoch": 1.6079341143718826, "grad_norm": 10.267362931793937, "learning_rate": 2e-06, "loss": 0.2643, "step": 6931 }, { "epoch": 1.6081661060201833, "grad_norm": 17.83854286219526, "learning_rate": 2e-06, "loss": 0.2844, "step": 6932 }, { "epoch": 1.608398097668484, "grad_norm": 15.649285578386333, "learning_rate": 2e-06, "loss": 0.2164, "step": 6933 }, { "epoch": 1.6086300893167846, "grad_norm": 11.59450320536855, "learning_rate": 2e-06, "loss": 0.2121, "step": 6934 }, { "epoch": 1.608862080965085, "grad_norm": 14.804743000417668, "learning_rate": 2e-06, "loss": 0.2726, "step": 6935 }, { "epoch": 1.6090940726133858, "grad_norm": 27.013454990211674, "learning_rate": 2e-06, "loss": 0.2249, "step": 6936 }, { "epoch": 1.6093260642616865, "grad_norm": 13.469542011658062, "learning_rate": 2e-06, "loss": 0.2222, "step": 6937 }, { "epoch": 1.6095580559099871, "grad_norm": 6.0198227896653975, "learning_rate": 2e-06, "loss": 0.1241, "step": 6938 }, { "epoch": 1.6097900475582878, "grad_norm": 15.411189902368585, "learning_rate": 2e-06, "loss": 0.2981, "step": 6939 }, { "epoch": 1.6100220392065885, "grad_norm": 11.78771720357112, "learning_rate": 2e-06, "loss": 0.2158, "step": 6940 }, { "epoch": 1.6102540308548892, "grad_norm": 22.74730256036679, "learning_rate": 2e-06, "loss": 0.3225, "step": 6941 }, { "epoch": 1.6104860225031898, "grad_norm": 10.730879983272139, "learning_rate": 2e-06, "loss": 0.248, "step": 6942 }, { "epoch": 1.6107180141514905, "grad_norm": 11.91366434944726, "learning_rate": 2e-06, "loss": 0.1315, "step": 6943 }, { "epoch": 1.6109500057997912, "grad_norm": 13.894807052318972, "learning_rate": 2e-06, "loss": 0.2077, "step": 6944 }, { "epoch": 1.6111819974480919, "grad_norm": 25.650238814998406, "learning_rate": 2e-06, "loss": 0.3684, "step": 6945 }, { "epoch": 1.6114139890963926, "grad_norm": 20.577245785155775, "learning_rate": 2e-06, "loss": 0.3512, "step": 6946 }, { "epoch": 1.6116459807446932, "grad_norm": 15.803228741453447, "learning_rate": 2e-06, "loss": 0.2321, "step": 6947 }, { "epoch": 1.611877972392994, "grad_norm": 9.749549037933088, "learning_rate": 2e-06, "loss": 0.2645, "step": 6948 }, { "epoch": 1.6121099640412946, "grad_norm": 19.299638527391338, "learning_rate": 2e-06, "loss": 0.2504, "step": 6949 }, { "epoch": 1.6123419556895953, "grad_norm": 14.484103820826514, "learning_rate": 2e-06, "loss": 0.2341, "step": 6950 }, { "epoch": 1.612573947337896, "grad_norm": 18.78291403294643, "learning_rate": 2e-06, "loss": 0.2847, "step": 6951 }, { "epoch": 1.6128059389861966, "grad_norm": 11.665599286408527, "learning_rate": 2e-06, "loss": 0.1852, "step": 6952 }, { "epoch": 1.6130379306344973, "grad_norm": 9.265971566495484, "learning_rate": 2e-06, "loss": 0.2277, "step": 6953 }, { "epoch": 1.613269922282798, "grad_norm": 8.44029894218763, "learning_rate": 2e-06, "loss": 0.1743, "step": 6954 }, { "epoch": 1.6135019139310984, "grad_norm": 13.715280174043034, "learning_rate": 2e-06, "loss": 0.2562, "step": 6955 }, { "epoch": 1.613733905579399, "grad_norm": 9.560914543568476, "learning_rate": 2e-06, "loss": 0.1785, "step": 6956 }, { "epoch": 1.6139658972276998, "grad_norm": 4.536975004892732, "learning_rate": 2e-06, "loss": 0.1289, "step": 6957 }, { "epoch": 1.6141978888760005, "grad_norm": 9.108618213148045, "learning_rate": 2e-06, "loss": 0.2254, "step": 6958 }, { "epoch": 1.6144298805243011, "grad_norm": 15.068677329144581, "learning_rate": 2e-06, "loss": 0.344, "step": 6959 }, { "epoch": 1.6146618721726018, "grad_norm": 10.657346641654241, "learning_rate": 2e-06, "loss": 0.2706, "step": 6960 }, { "epoch": 1.6148938638209025, "grad_norm": 10.836273054781074, "learning_rate": 2e-06, "loss": 0.2135, "step": 6961 }, { "epoch": 1.615125855469203, "grad_norm": 15.3784691856773, "learning_rate": 2e-06, "loss": 0.2182, "step": 6962 }, { "epoch": 1.6153578471175036, "grad_norm": 12.18357080868277, "learning_rate": 2e-06, "loss": 0.1148, "step": 6963 }, { "epoch": 1.6155898387658043, "grad_norm": 16.523784611503427, "learning_rate": 2e-06, "loss": 0.2526, "step": 6964 }, { "epoch": 1.615821830414105, "grad_norm": 13.457293895959346, "learning_rate": 2e-06, "loss": 0.2961, "step": 6965 }, { "epoch": 1.6160538220624057, "grad_norm": 9.172409383148903, "learning_rate": 2e-06, "loss": 0.2673, "step": 6966 }, { "epoch": 1.6162858137107063, "grad_norm": 10.592448387883387, "learning_rate": 2e-06, "loss": 0.2896, "step": 6967 }, { "epoch": 1.616517805359007, "grad_norm": 8.043362460788911, "learning_rate": 2e-06, "loss": 0.2057, "step": 6968 }, { "epoch": 1.6167497970073077, "grad_norm": 13.452763397776307, "learning_rate": 2e-06, "loss": 0.2028, "step": 6969 }, { "epoch": 1.6169817886556084, "grad_norm": 11.889829935690594, "learning_rate": 2e-06, "loss": 0.3731, "step": 6970 }, { "epoch": 1.617213780303909, "grad_norm": 15.55403517134958, "learning_rate": 2e-06, "loss": 0.2903, "step": 6971 }, { "epoch": 1.6174457719522097, "grad_norm": 10.142859304319238, "learning_rate": 2e-06, "loss": 0.3078, "step": 6972 }, { "epoch": 1.6176777636005104, "grad_norm": 22.015360469237347, "learning_rate": 2e-06, "loss": 0.3501, "step": 6973 }, { "epoch": 1.617909755248811, "grad_norm": 16.099404708706622, "learning_rate": 2e-06, "loss": 0.2518, "step": 6974 }, { "epoch": 1.6181417468971118, "grad_norm": 16.155557913431416, "learning_rate": 2e-06, "loss": 0.3623, "step": 6975 }, { "epoch": 1.6183737385454124, "grad_norm": 12.094711432250437, "learning_rate": 2e-06, "loss": 0.228, "step": 6976 }, { "epoch": 1.6186057301937131, "grad_norm": 9.624041117699276, "learning_rate": 2e-06, "loss": 0.2488, "step": 6977 }, { "epoch": 1.6188377218420138, "grad_norm": 11.654084378361608, "learning_rate": 2e-06, "loss": 0.1419, "step": 6978 }, { "epoch": 1.6190697134903145, "grad_norm": 10.785825490097595, "learning_rate": 2e-06, "loss": 0.2813, "step": 6979 }, { "epoch": 1.6193017051386152, "grad_norm": 15.670027219809535, "learning_rate": 2e-06, "loss": 0.2212, "step": 6980 }, { "epoch": 1.6195336967869158, "grad_norm": 10.37649965458519, "learning_rate": 2e-06, "loss": 0.3193, "step": 6981 }, { "epoch": 1.6197656884352163, "grad_norm": 12.447543343424678, "learning_rate": 2e-06, "loss": 0.351, "step": 6982 }, { "epoch": 1.619997680083517, "grad_norm": 20.356869900294978, "learning_rate": 2e-06, "loss": 0.3829, "step": 6983 }, { "epoch": 1.6202296717318176, "grad_norm": 6.864750951398252, "learning_rate": 2e-06, "loss": 0.1783, "step": 6984 }, { "epoch": 1.6204616633801183, "grad_norm": 12.418089662331026, "learning_rate": 2e-06, "loss": 0.3723, "step": 6985 }, { "epoch": 1.620693655028419, "grad_norm": 12.540617596943656, "learning_rate": 2e-06, "loss": 0.3026, "step": 6986 }, { "epoch": 1.6209256466767197, "grad_norm": 19.082797466653933, "learning_rate": 2e-06, "loss": 0.4161, "step": 6987 }, { "epoch": 1.6211576383250201, "grad_norm": 7.163325569917312, "learning_rate": 2e-06, "loss": 0.1496, "step": 6988 }, { "epoch": 1.6213896299733208, "grad_norm": 7.111859392755355, "learning_rate": 2e-06, "loss": 0.1597, "step": 6989 }, { "epoch": 1.6216216216216215, "grad_norm": 19.57450811713846, "learning_rate": 2e-06, "loss": 0.2889, "step": 6990 }, { "epoch": 1.6218536132699222, "grad_norm": 15.036470054451497, "learning_rate": 2e-06, "loss": 0.2912, "step": 6991 }, { "epoch": 1.6220856049182228, "grad_norm": 15.753907717110106, "learning_rate": 2e-06, "loss": 0.1838, "step": 6992 }, { "epoch": 1.6223175965665235, "grad_norm": 13.527695512482605, "learning_rate": 2e-06, "loss": 0.2222, "step": 6993 }, { "epoch": 1.6225495882148242, "grad_norm": 14.951485530049725, "learning_rate": 2e-06, "loss": 0.4262, "step": 6994 }, { "epoch": 1.6227815798631249, "grad_norm": 10.88914474498736, "learning_rate": 2e-06, "loss": 0.2445, "step": 6995 }, { "epoch": 1.6230135715114256, "grad_norm": 7.7800678868379265, "learning_rate": 2e-06, "loss": 0.1926, "step": 6996 }, { "epoch": 1.6232455631597262, "grad_norm": 8.47913440262279, "learning_rate": 2e-06, "loss": 0.1662, "step": 6997 }, { "epoch": 1.623477554808027, "grad_norm": 17.86686445889657, "learning_rate": 2e-06, "loss": 0.3313, "step": 6998 }, { "epoch": 1.6237095464563276, "grad_norm": 7.766073380740134, "learning_rate": 2e-06, "loss": 0.1742, "step": 6999 }, { "epoch": 1.6239415381046283, "grad_norm": 15.802998754377214, "learning_rate": 2e-06, "loss": 0.3292, "step": 7000 }, { "epoch": 1.624173529752929, "grad_norm": 12.543459151492442, "learning_rate": 2e-06, "loss": 0.254, "step": 7001 }, { "epoch": 1.6244055214012296, "grad_norm": 13.798966596523735, "learning_rate": 2e-06, "loss": 0.2806, "step": 7002 }, { "epoch": 1.6246375130495303, "grad_norm": 14.669498102811584, "learning_rate": 2e-06, "loss": 0.1382, "step": 7003 }, { "epoch": 1.624869504697831, "grad_norm": 18.572808451157982, "learning_rate": 2e-06, "loss": 0.2693, "step": 7004 }, { "epoch": 1.6251014963461317, "grad_norm": 16.05801422771798, "learning_rate": 2e-06, "loss": 0.2689, "step": 7005 }, { "epoch": 1.6253334879944323, "grad_norm": 6.454360588304225, "learning_rate": 2e-06, "loss": 0.1544, "step": 7006 }, { "epoch": 1.625565479642733, "grad_norm": 9.589180560376818, "learning_rate": 2e-06, "loss": 0.2748, "step": 7007 }, { "epoch": 1.6257974712910335, "grad_norm": 14.671821741588948, "learning_rate": 2e-06, "loss": 0.2478, "step": 7008 }, { "epoch": 1.6260294629393341, "grad_norm": 22.87668683869851, "learning_rate": 2e-06, "loss": 0.2541, "step": 7009 }, { "epoch": 1.6262614545876348, "grad_norm": 13.872086745553338, "learning_rate": 2e-06, "loss": 0.3123, "step": 7010 }, { "epoch": 1.6264934462359355, "grad_norm": 22.842902471747887, "learning_rate": 2e-06, "loss": 0.2549, "step": 7011 }, { "epoch": 1.6267254378842362, "grad_norm": 15.858015016072699, "learning_rate": 2e-06, "loss": 0.3301, "step": 7012 }, { "epoch": 1.6269574295325369, "grad_norm": 17.20531113729456, "learning_rate": 2e-06, "loss": 0.3237, "step": 7013 }, { "epoch": 1.6271894211808375, "grad_norm": 16.205524427264955, "learning_rate": 2e-06, "loss": 0.2433, "step": 7014 }, { "epoch": 1.627421412829138, "grad_norm": 15.422387385622043, "learning_rate": 2e-06, "loss": 0.4239, "step": 7015 }, { "epoch": 1.6276534044774387, "grad_norm": 16.60742013346518, "learning_rate": 2e-06, "loss": 0.3804, "step": 7016 }, { "epoch": 1.6278853961257393, "grad_norm": 13.290142696386331, "learning_rate": 2e-06, "loss": 0.2607, "step": 7017 }, { "epoch": 1.62811738777404, "grad_norm": 14.580299839319386, "learning_rate": 2e-06, "loss": 0.3681, "step": 7018 }, { "epoch": 1.6283493794223407, "grad_norm": 11.730455808829811, "learning_rate": 2e-06, "loss": 0.2022, "step": 7019 }, { "epoch": 1.6285813710706414, "grad_norm": 12.826274722124499, "learning_rate": 2e-06, "loss": 0.2255, "step": 7020 }, { "epoch": 1.628813362718942, "grad_norm": 12.43214970101332, "learning_rate": 2e-06, "loss": 0.2796, "step": 7021 }, { "epoch": 1.6290453543672427, "grad_norm": 14.440187541395591, "learning_rate": 2e-06, "loss": 0.4325, "step": 7022 }, { "epoch": 1.6292773460155434, "grad_norm": 9.727986359501164, "learning_rate": 2e-06, "loss": 0.2083, "step": 7023 }, { "epoch": 1.629509337663844, "grad_norm": 15.501284501084388, "learning_rate": 2e-06, "loss": 0.2339, "step": 7024 }, { "epoch": 1.6297413293121448, "grad_norm": 13.762739971342741, "learning_rate": 2e-06, "loss": 0.2757, "step": 7025 }, { "epoch": 1.6299733209604454, "grad_norm": 12.265472728748575, "learning_rate": 2e-06, "loss": 0.3016, "step": 7026 }, { "epoch": 1.6302053126087461, "grad_norm": 15.96070196913894, "learning_rate": 2e-06, "loss": 0.2472, "step": 7027 }, { "epoch": 1.6304373042570468, "grad_norm": 14.095882691910807, "learning_rate": 2e-06, "loss": 0.2394, "step": 7028 }, { "epoch": 1.6306692959053475, "grad_norm": 16.54000688770342, "learning_rate": 2e-06, "loss": 0.2738, "step": 7029 }, { "epoch": 1.6309012875536482, "grad_norm": 11.581800441529591, "learning_rate": 2e-06, "loss": 0.2248, "step": 7030 }, { "epoch": 1.6311332792019488, "grad_norm": 10.634740006896134, "learning_rate": 2e-06, "loss": 0.1975, "step": 7031 }, { "epoch": 1.6313652708502495, "grad_norm": 5.71974278126696, "learning_rate": 2e-06, "loss": 0.1228, "step": 7032 }, { "epoch": 1.6315972624985502, "grad_norm": 11.52002459240929, "learning_rate": 2e-06, "loss": 0.1986, "step": 7033 }, { "epoch": 1.6318292541468509, "grad_norm": 12.300589751455139, "learning_rate": 2e-06, "loss": 0.2178, "step": 7034 }, { "epoch": 1.6320612457951513, "grad_norm": 11.600142147918511, "learning_rate": 2e-06, "loss": 0.2138, "step": 7035 }, { "epoch": 1.632293237443452, "grad_norm": 10.474970068593255, "learning_rate": 2e-06, "loss": 0.2222, "step": 7036 }, { "epoch": 1.6325252290917527, "grad_norm": 19.9060475508325, "learning_rate": 2e-06, "loss": 0.2173, "step": 7037 }, { "epoch": 1.6327572207400534, "grad_norm": 11.184774219572693, "learning_rate": 2e-06, "loss": 0.1366, "step": 7038 }, { "epoch": 1.632989212388354, "grad_norm": 11.245052817363465, "learning_rate": 2e-06, "loss": 0.1932, "step": 7039 }, { "epoch": 1.6332212040366547, "grad_norm": 13.971578345151503, "learning_rate": 2e-06, "loss": 0.3935, "step": 7040 }, { "epoch": 1.6334531956849554, "grad_norm": 10.115989514407806, "learning_rate": 2e-06, "loss": 0.2755, "step": 7041 }, { "epoch": 1.6336851873332559, "grad_norm": 9.736157752300581, "learning_rate": 2e-06, "loss": 0.1958, "step": 7042 }, { "epoch": 1.6339171789815565, "grad_norm": 12.293691083260265, "learning_rate": 2e-06, "loss": 0.2329, "step": 7043 }, { "epoch": 1.6341491706298572, "grad_norm": 10.391037532897096, "learning_rate": 2e-06, "loss": 0.1948, "step": 7044 }, { "epoch": 1.6343811622781579, "grad_norm": 17.20398074920175, "learning_rate": 2e-06, "loss": 0.3274, "step": 7045 }, { "epoch": 1.6346131539264586, "grad_norm": 26.074947278089915, "learning_rate": 2e-06, "loss": 0.3111, "step": 7046 }, { "epoch": 1.6348451455747592, "grad_norm": 12.340585075621851, "learning_rate": 2e-06, "loss": 0.337, "step": 7047 }, { "epoch": 1.63507713722306, "grad_norm": 11.670255160442114, "learning_rate": 2e-06, "loss": 0.2644, "step": 7048 }, { "epoch": 1.6353091288713606, "grad_norm": 10.342650552953074, "learning_rate": 2e-06, "loss": 0.2224, "step": 7049 }, { "epoch": 1.6355411205196613, "grad_norm": 9.819092085974871, "learning_rate": 2e-06, "loss": 0.2415, "step": 7050 }, { "epoch": 1.635773112167962, "grad_norm": 11.559798052526743, "learning_rate": 2e-06, "loss": 0.2196, "step": 7051 }, { "epoch": 1.6360051038162626, "grad_norm": 16.251447881217928, "learning_rate": 2e-06, "loss": 0.3706, "step": 7052 }, { "epoch": 1.6362370954645633, "grad_norm": 18.046939192717193, "learning_rate": 2e-06, "loss": 0.3217, "step": 7053 }, { "epoch": 1.636469087112864, "grad_norm": 5.2898743743472645, "learning_rate": 2e-06, "loss": 0.1283, "step": 7054 }, { "epoch": 1.6367010787611647, "grad_norm": 14.568451000162517, "learning_rate": 2e-06, "loss": 0.3354, "step": 7055 }, { "epoch": 1.6369330704094653, "grad_norm": 8.994228712771722, "learning_rate": 2e-06, "loss": 0.2345, "step": 7056 }, { "epoch": 1.637165062057766, "grad_norm": 22.65822871755929, "learning_rate": 2e-06, "loss": 0.3173, "step": 7057 }, { "epoch": 1.6373970537060667, "grad_norm": 18.544582583755645, "learning_rate": 2e-06, "loss": 0.2662, "step": 7058 }, { "epoch": 1.6376290453543674, "grad_norm": 12.768144951770473, "learning_rate": 2e-06, "loss": 0.3208, "step": 7059 }, { "epoch": 1.637861037002668, "grad_norm": 16.23576130431629, "learning_rate": 2e-06, "loss": 0.2045, "step": 7060 }, { "epoch": 1.6380930286509687, "grad_norm": 10.590738727915014, "learning_rate": 2e-06, "loss": 0.278, "step": 7061 }, { "epoch": 1.6383250202992692, "grad_norm": 25.543926792585626, "learning_rate": 2e-06, "loss": 0.3695, "step": 7062 }, { "epoch": 1.6385570119475699, "grad_norm": 13.621056889878547, "learning_rate": 2e-06, "loss": 0.3308, "step": 7063 }, { "epoch": 1.6387890035958705, "grad_norm": 11.16766633431158, "learning_rate": 2e-06, "loss": 0.3386, "step": 7064 }, { "epoch": 1.6390209952441712, "grad_norm": 10.051390960951437, "learning_rate": 2e-06, "loss": 0.1408, "step": 7065 }, { "epoch": 1.639252986892472, "grad_norm": 16.319182344726816, "learning_rate": 2e-06, "loss": 0.2849, "step": 7066 }, { "epoch": 1.6394849785407726, "grad_norm": 8.323363441312226, "learning_rate": 2e-06, "loss": 0.17, "step": 7067 }, { "epoch": 1.639716970189073, "grad_norm": 18.082884377601907, "learning_rate": 2e-06, "loss": 0.2853, "step": 7068 }, { "epoch": 1.6399489618373737, "grad_norm": 17.96858123227917, "learning_rate": 2e-06, "loss": 0.3246, "step": 7069 }, { "epoch": 1.6401809534856744, "grad_norm": 8.348504342658279, "learning_rate": 2e-06, "loss": 0.2122, "step": 7070 }, { "epoch": 1.640412945133975, "grad_norm": 14.195859872328015, "learning_rate": 2e-06, "loss": 0.2579, "step": 7071 }, { "epoch": 1.6406449367822757, "grad_norm": 8.747800547063882, "learning_rate": 2e-06, "loss": 0.1936, "step": 7072 }, { "epoch": 1.6408769284305764, "grad_norm": 13.625462706249374, "learning_rate": 2e-06, "loss": 0.1596, "step": 7073 }, { "epoch": 1.641108920078877, "grad_norm": 9.66076283450918, "learning_rate": 2e-06, "loss": 0.1452, "step": 7074 }, { "epoch": 1.6413409117271778, "grad_norm": 15.258436926230441, "learning_rate": 2e-06, "loss": 0.1653, "step": 7075 }, { "epoch": 1.6415729033754785, "grad_norm": 16.421172139281012, "learning_rate": 2e-06, "loss": 0.2951, "step": 7076 }, { "epoch": 1.6418048950237791, "grad_norm": 13.484416764834592, "learning_rate": 2e-06, "loss": 0.2514, "step": 7077 }, { "epoch": 1.6420368866720798, "grad_norm": 18.197461234111085, "learning_rate": 2e-06, "loss": 0.2852, "step": 7078 }, { "epoch": 1.6422688783203805, "grad_norm": 21.10162923051286, "learning_rate": 2e-06, "loss": 0.2829, "step": 7079 }, { "epoch": 1.6425008699686812, "grad_norm": 12.427903176075388, "learning_rate": 2e-06, "loss": 0.2173, "step": 7080 }, { "epoch": 1.6427328616169818, "grad_norm": 18.285960244290226, "learning_rate": 2e-06, "loss": 0.3021, "step": 7081 }, { "epoch": 1.6429648532652825, "grad_norm": 8.15178393543725, "learning_rate": 2e-06, "loss": 0.1949, "step": 7082 }, { "epoch": 1.6431968449135832, "grad_norm": 10.810702543448354, "learning_rate": 2e-06, "loss": 0.2177, "step": 7083 }, { "epoch": 1.6434288365618839, "grad_norm": 10.53137318843868, "learning_rate": 2e-06, "loss": 0.1946, "step": 7084 }, { "epoch": 1.6436608282101846, "grad_norm": 8.979276521118402, "learning_rate": 2e-06, "loss": 0.2341, "step": 7085 }, { "epoch": 1.6438928198584852, "grad_norm": 7.373128298424443, "learning_rate": 2e-06, "loss": 0.1703, "step": 7086 }, { "epoch": 1.644124811506786, "grad_norm": 9.086247902153618, "learning_rate": 2e-06, "loss": 0.1985, "step": 7087 }, { "epoch": 1.6443568031550864, "grad_norm": 18.829921655392535, "learning_rate": 2e-06, "loss": 0.2776, "step": 7088 }, { "epoch": 1.644588794803387, "grad_norm": 16.727235065578363, "learning_rate": 2e-06, "loss": 0.2458, "step": 7089 }, { "epoch": 1.6448207864516877, "grad_norm": 11.826408708970803, "learning_rate": 2e-06, "loss": 0.2563, "step": 7090 }, { "epoch": 1.6450527780999884, "grad_norm": 12.812812098521128, "learning_rate": 2e-06, "loss": 0.2057, "step": 7091 }, { "epoch": 1.645284769748289, "grad_norm": 18.747553139697516, "learning_rate": 2e-06, "loss": 0.2797, "step": 7092 }, { "epoch": 1.6455167613965898, "grad_norm": 21.37070998648583, "learning_rate": 2e-06, "loss": 0.2842, "step": 7093 }, { "epoch": 1.6457487530448904, "grad_norm": 16.031144141161292, "learning_rate": 2e-06, "loss": 0.2853, "step": 7094 }, { "epoch": 1.645980744693191, "grad_norm": 11.355397426862138, "learning_rate": 2e-06, "loss": 0.2688, "step": 7095 }, { "epoch": 1.6462127363414916, "grad_norm": 10.83029224429112, "learning_rate": 2e-06, "loss": 0.211, "step": 7096 }, { "epoch": 1.6464447279897922, "grad_norm": 9.158647967175433, "learning_rate": 2e-06, "loss": 0.2457, "step": 7097 }, { "epoch": 1.646676719638093, "grad_norm": 12.659950260456156, "learning_rate": 2e-06, "loss": 0.2327, "step": 7098 }, { "epoch": 1.6469087112863936, "grad_norm": 11.799338439161634, "learning_rate": 2e-06, "loss": 0.2592, "step": 7099 }, { "epoch": 1.6471407029346943, "grad_norm": 13.881708515680108, "learning_rate": 2e-06, "loss": 0.2502, "step": 7100 }, { "epoch": 1.647372694582995, "grad_norm": 15.506415761028464, "learning_rate": 2e-06, "loss": 0.2147, "step": 7101 }, { "epoch": 1.6476046862312956, "grad_norm": 14.239460943501205, "learning_rate": 2e-06, "loss": 0.2004, "step": 7102 }, { "epoch": 1.6478366778795963, "grad_norm": 12.17574573411829, "learning_rate": 2e-06, "loss": 0.1786, "step": 7103 }, { "epoch": 1.648068669527897, "grad_norm": 17.152879482952468, "learning_rate": 2e-06, "loss": 0.1815, "step": 7104 }, { "epoch": 1.6483006611761977, "grad_norm": 7.326448041591375, "learning_rate": 2e-06, "loss": 0.1246, "step": 7105 }, { "epoch": 1.6485326528244983, "grad_norm": 16.79864805443018, "learning_rate": 2e-06, "loss": 0.3143, "step": 7106 }, { "epoch": 1.648764644472799, "grad_norm": 13.370767304403438, "learning_rate": 2e-06, "loss": 0.2865, "step": 7107 }, { "epoch": 1.6489966361210997, "grad_norm": 21.51092895572337, "learning_rate": 2e-06, "loss": 0.295, "step": 7108 }, { "epoch": 1.6492286277694004, "grad_norm": 18.007298325256, "learning_rate": 2e-06, "loss": 0.3231, "step": 7109 }, { "epoch": 1.649460619417701, "grad_norm": 27.794812087978574, "learning_rate": 2e-06, "loss": 0.3555, "step": 7110 }, { "epoch": 1.6496926110660017, "grad_norm": 9.828940646642232, "learning_rate": 2e-06, "loss": 0.1938, "step": 7111 }, { "epoch": 1.6499246027143024, "grad_norm": 19.735335010849287, "learning_rate": 2e-06, "loss": 0.2739, "step": 7112 }, { "epoch": 1.650156594362603, "grad_norm": 11.14559502802546, "learning_rate": 2e-06, "loss": 0.2884, "step": 7113 }, { "epoch": 1.6503885860109038, "grad_norm": 12.179689951370753, "learning_rate": 2e-06, "loss": 0.194, "step": 7114 }, { "epoch": 1.6506205776592042, "grad_norm": 16.684675765332578, "learning_rate": 2e-06, "loss": 0.2507, "step": 7115 }, { "epoch": 1.650852569307505, "grad_norm": 6.811563807023977, "learning_rate": 2e-06, "loss": 0.1291, "step": 7116 }, { "epoch": 1.6510845609558056, "grad_norm": 10.669302710321867, "learning_rate": 2e-06, "loss": 0.2399, "step": 7117 }, { "epoch": 1.6513165526041063, "grad_norm": 11.669049742452437, "learning_rate": 2e-06, "loss": 0.2609, "step": 7118 }, { "epoch": 1.651548544252407, "grad_norm": 18.476138275697288, "learning_rate": 2e-06, "loss": 0.3279, "step": 7119 }, { "epoch": 1.6517805359007076, "grad_norm": 16.27903554685498, "learning_rate": 2e-06, "loss": 0.2781, "step": 7120 }, { "epoch": 1.652012527549008, "grad_norm": 17.359246051089706, "learning_rate": 2e-06, "loss": 0.3157, "step": 7121 }, { "epoch": 1.6522445191973087, "grad_norm": 12.366129458336568, "learning_rate": 2e-06, "loss": 0.2502, "step": 7122 }, { "epoch": 1.6524765108456094, "grad_norm": 11.376221833141713, "learning_rate": 2e-06, "loss": 0.2164, "step": 7123 }, { "epoch": 1.65270850249391, "grad_norm": 23.552326593798647, "learning_rate": 2e-06, "loss": 0.2096, "step": 7124 }, { "epoch": 1.6529404941422108, "grad_norm": 21.398110960726864, "learning_rate": 2e-06, "loss": 0.3372, "step": 7125 }, { "epoch": 1.6531724857905115, "grad_norm": 10.320835052153663, "learning_rate": 2e-06, "loss": 0.1588, "step": 7126 }, { "epoch": 1.6534044774388121, "grad_norm": 43.51247348322184, "learning_rate": 2e-06, "loss": 0.3616, "step": 7127 }, { "epoch": 1.6536364690871128, "grad_norm": 16.571909321923275, "learning_rate": 2e-06, "loss": 0.3267, "step": 7128 }, { "epoch": 1.6538684607354135, "grad_norm": 12.319199115679023, "learning_rate": 2e-06, "loss": 0.3166, "step": 7129 }, { "epoch": 1.6541004523837142, "grad_norm": 17.12975006935626, "learning_rate": 2e-06, "loss": 0.2008, "step": 7130 }, { "epoch": 1.6543324440320148, "grad_norm": 14.052613143049053, "learning_rate": 2e-06, "loss": 0.2825, "step": 7131 }, { "epoch": 1.6545644356803155, "grad_norm": 17.40443541616568, "learning_rate": 2e-06, "loss": 0.3052, "step": 7132 }, { "epoch": 1.6547964273286162, "grad_norm": 27.391161447742995, "learning_rate": 2e-06, "loss": 0.3455, "step": 7133 }, { "epoch": 1.6550284189769169, "grad_norm": 16.172270537691748, "learning_rate": 2e-06, "loss": 0.1898, "step": 7134 }, { "epoch": 1.6552604106252176, "grad_norm": 7.745639522105556, "learning_rate": 2e-06, "loss": 0.1708, "step": 7135 }, { "epoch": 1.6554924022735182, "grad_norm": 15.698002630882499, "learning_rate": 2e-06, "loss": 0.2375, "step": 7136 }, { "epoch": 1.655724393921819, "grad_norm": 18.27497738405015, "learning_rate": 2e-06, "loss": 0.2097, "step": 7137 }, { "epoch": 1.6559563855701196, "grad_norm": 19.399303027089903, "learning_rate": 2e-06, "loss": 0.2969, "step": 7138 }, { "epoch": 1.6561883772184203, "grad_norm": 15.486733537769139, "learning_rate": 2e-06, "loss": 0.3188, "step": 7139 }, { "epoch": 1.656420368866721, "grad_norm": 9.362904869460838, "learning_rate": 2e-06, "loss": 0.2204, "step": 7140 }, { "epoch": 1.6566523605150214, "grad_norm": 12.366872072316477, "learning_rate": 2e-06, "loss": 0.2335, "step": 7141 }, { "epoch": 1.656884352163322, "grad_norm": 12.496802501137957, "learning_rate": 2e-06, "loss": 0.236, "step": 7142 }, { "epoch": 1.6571163438116228, "grad_norm": 12.254734708977596, "learning_rate": 2e-06, "loss": 0.1864, "step": 7143 }, { "epoch": 1.6573483354599234, "grad_norm": 9.660063763494001, "learning_rate": 2e-06, "loss": 0.2146, "step": 7144 }, { "epoch": 1.6575803271082241, "grad_norm": 10.427701070630379, "learning_rate": 2e-06, "loss": 0.1668, "step": 7145 }, { "epoch": 1.6578123187565248, "grad_norm": 12.60420478831905, "learning_rate": 2e-06, "loss": 0.2078, "step": 7146 }, { "epoch": 1.6580443104048255, "grad_norm": 7.42629483367214, "learning_rate": 2e-06, "loss": 0.1457, "step": 7147 }, { "epoch": 1.658276302053126, "grad_norm": 18.688867676781133, "learning_rate": 2e-06, "loss": 0.3135, "step": 7148 }, { "epoch": 1.6585082937014266, "grad_norm": 18.540251604423794, "learning_rate": 2e-06, "loss": 0.3148, "step": 7149 }, { "epoch": 1.6587402853497273, "grad_norm": 15.21926828603947, "learning_rate": 2e-06, "loss": 0.3285, "step": 7150 }, { "epoch": 1.658972276998028, "grad_norm": 17.34134795151519, "learning_rate": 2e-06, "loss": 0.3138, "step": 7151 }, { "epoch": 1.6592042686463286, "grad_norm": 15.21227762992704, "learning_rate": 2e-06, "loss": 0.242, "step": 7152 }, { "epoch": 1.6594362602946293, "grad_norm": 12.275563953766433, "learning_rate": 2e-06, "loss": 0.3142, "step": 7153 }, { "epoch": 1.65966825194293, "grad_norm": 8.203457602806631, "learning_rate": 2e-06, "loss": 0.1712, "step": 7154 }, { "epoch": 1.6599002435912307, "grad_norm": 12.874196812733794, "learning_rate": 2e-06, "loss": 0.2242, "step": 7155 }, { "epoch": 1.6601322352395314, "grad_norm": 9.838459330430709, "learning_rate": 2e-06, "loss": 0.2479, "step": 7156 }, { "epoch": 1.660364226887832, "grad_norm": 12.464409721834336, "learning_rate": 2e-06, "loss": 0.2776, "step": 7157 }, { "epoch": 1.6605962185361327, "grad_norm": 11.060179146903785, "learning_rate": 2e-06, "loss": 0.2777, "step": 7158 }, { "epoch": 1.6608282101844334, "grad_norm": 19.965716682293383, "learning_rate": 2e-06, "loss": 0.2935, "step": 7159 }, { "epoch": 1.661060201832734, "grad_norm": 14.087061956800868, "learning_rate": 2e-06, "loss": 0.246, "step": 7160 }, { "epoch": 1.6612921934810347, "grad_norm": 17.92066191994181, "learning_rate": 2e-06, "loss": 0.2955, "step": 7161 }, { "epoch": 1.6615241851293354, "grad_norm": 24.209134033683643, "learning_rate": 2e-06, "loss": 0.2748, "step": 7162 }, { "epoch": 1.661756176777636, "grad_norm": 11.876774521762682, "learning_rate": 2e-06, "loss": 0.249, "step": 7163 }, { "epoch": 1.6619881684259368, "grad_norm": 11.534212929724452, "learning_rate": 2e-06, "loss": 0.1807, "step": 7164 }, { "epoch": 1.6622201600742375, "grad_norm": 15.413596450223995, "learning_rate": 2e-06, "loss": 0.2057, "step": 7165 }, { "epoch": 1.6624521517225381, "grad_norm": 14.949284064707648, "learning_rate": 2e-06, "loss": 0.1989, "step": 7166 }, { "epoch": 1.6626841433708388, "grad_norm": 7.451658989662105, "learning_rate": 2e-06, "loss": 0.2982, "step": 7167 }, { "epoch": 1.6629161350191393, "grad_norm": 17.876488522008227, "learning_rate": 2e-06, "loss": 0.3764, "step": 7168 }, { "epoch": 1.66314812666744, "grad_norm": 9.601497369310469, "learning_rate": 2e-06, "loss": 0.1856, "step": 7169 }, { "epoch": 1.6633801183157406, "grad_norm": 26.29643437167827, "learning_rate": 2e-06, "loss": 0.2852, "step": 7170 }, { "epoch": 1.6636121099640413, "grad_norm": 26.008823115734014, "learning_rate": 2e-06, "loss": 0.1835, "step": 7171 }, { "epoch": 1.663844101612342, "grad_norm": 17.775698521683385, "learning_rate": 2e-06, "loss": 0.3432, "step": 7172 }, { "epoch": 1.6640760932606427, "grad_norm": 16.226258624963403, "learning_rate": 2e-06, "loss": 0.2569, "step": 7173 }, { "epoch": 1.6643080849089433, "grad_norm": 11.083269886897604, "learning_rate": 2e-06, "loss": 0.2663, "step": 7174 }, { "epoch": 1.6645400765572438, "grad_norm": 16.496353744352007, "learning_rate": 2e-06, "loss": 0.2829, "step": 7175 }, { "epoch": 1.6647720682055445, "grad_norm": 20.16568187382624, "learning_rate": 2e-06, "loss": 0.2756, "step": 7176 }, { "epoch": 1.6650040598538451, "grad_norm": 5.926606012468249, "learning_rate": 2e-06, "loss": 0.1369, "step": 7177 }, { "epoch": 1.6652360515021458, "grad_norm": 12.910944857308234, "learning_rate": 2e-06, "loss": 0.2397, "step": 7178 }, { "epoch": 1.6654680431504465, "grad_norm": 12.738385571388708, "learning_rate": 2e-06, "loss": 0.2917, "step": 7179 }, { "epoch": 1.6657000347987472, "grad_norm": 17.232627774810663, "learning_rate": 2e-06, "loss": 0.2987, "step": 7180 }, { "epoch": 1.6659320264470479, "grad_norm": 10.560890514090739, "learning_rate": 2e-06, "loss": 0.2315, "step": 7181 }, { "epoch": 1.6661640180953485, "grad_norm": 5.4585419224197445, "learning_rate": 2e-06, "loss": 0.145, "step": 7182 }, { "epoch": 1.6663960097436492, "grad_norm": 20.192116461853846, "learning_rate": 2e-06, "loss": 0.2229, "step": 7183 }, { "epoch": 1.6666280013919499, "grad_norm": 18.001526552299513, "learning_rate": 2e-06, "loss": 0.2593, "step": 7184 }, { "epoch": 1.6668599930402506, "grad_norm": 17.958796196132493, "learning_rate": 2e-06, "loss": 0.2876, "step": 7185 }, { "epoch": 1.6670919846885512, "grad_norm": 13.977270787710811, "learning_rate": 2e-06, "loss": 0.2066, "step": 7186 }, { "epoch": 1.667323976336852, "grad_norm": 6.077093067527741, "learning_rate": 2e-06, "loss": 0.1958, "step": 7187 }, { "epoch": 1.6675559679851526, "grad_norm": 14.162961183279469, "learning_rate": 2e-06, "loss": 0.1998, "step": 7188 }, { "epoch": 1.6677879596334533, "grad_norm": 15.410769413943282, "learning_rate": 2e-06, "loss": 0.2707, "step": 7189 }, { "epoch": 1.668019951281754, "grad_norm": 14.002372039832355, "learning_rate": 2e-06, "loss": 0.249, "step": 7190 }, { "epoch": 1.6682519429300546, "grad_norm": 11.425963241642725, "learning_rate": 2e-06, "loss": 0.249, "step": 7191 }, { "epoch": 1.6684839345783553, "grad_norm": 10.424330561363904, "learning_rate": 2e-06, "loss": 0.1938, "step": 7192 }, { "epoch": 1.668715926226656, "grad_norm": 7.77959555923727, "learning_rate": 2e-06, "loss": 0.2323, "step": 7193 }, { "epoch": 1.6689479178749567, "grad_norm": 12.563718470613516, "learning_rate": 2e-06, "loss": 0.2741, "step": 7194 }, { "epoch": 1.6691799095232571, "grad_norm": 12.248035271839441, "learning_rate": 2e-06, "loss": 0.2678, "step": 7195 }, { "epoch": 1.6694119011715578, "grad_norm": 9.725214437497984, "learning_rate": 2e-06, "loss": 0.1983, "step": 7196 }, { "epoch": 1.6696438928198585, "grad_norm": 83.29799388121215, "learning_rate": 2e-06, "loss": 0.3167, "step": 7197 }, { "epoch": 1.6698758844681592, "grad_norm": 17.328766071210865, "learning_rate": 2e-06, "loss": 0.2559, "step": 7198 }, { "epoch": 1.6701078761164598, "grad_norm": 11.736583987785513, "learning_rate": 2e-06, "loss": 0.1709, "step": 7199 }, { "epoch": 1.6703398677647605, "grad_norm": 5.073238294383731, "learning_rate": 2e-06, "loss": 0.1439, "step": 7200 }, { "epoch": 1.670571859413061, "grad_norm": 23.945785671567652, "learning_rate": 2e-06, "loss": 0.4377, "step": 7201 }, { "epoch": 1.6708038510613616, "grad_norm": 17.812827348025948, "learning_rate": 2e-06, "loss": 0.2269, "step": 7202 }, { "epoch": 1.6710358427096623, "grad_norm": 13.814090695774514, "learning_rate": 2e-06, "loss": 0.2267, "step": 7203 }, { "epoch": 1.671267834357963, "grad_norm": 11.800236103771432, "learning_rate": 2e-06, "loss": 0.1501, "step": 7204 }, { "epoch": 1.6714998260062637, "grad_norm": 14.133330969888693, "learning_rate": 2e-06, "loss": 0.2594, "step": 7205 }, { "epoch": 1.6717318176545644, "grad_norm": 12.403132619329371, "learning_rate": 2e-06, "loss": 0.2171, "step": 7206 }, { "epoch": 1.671963809302865, "grad_norm": 11.387512003348485, "learning_rate": 2e-06, "loss": 0.243, "step": 7207 }, { "epoch": 1.6721958009511657, "grad_norm": 29.850020206835577, "learning_rate": 2e-06, "loss": 0.3552, "step": 7208 }, { "epoch": 1.6724277925994664, "grad_norm": 14.538118662992103, "learning_rate": 2e-06, "loss": 0.2018, "step": 7209 }, { "epoch": 1.672659784247767, "grad_norm": 17.847018805166996, "learning_rate": 2e-06, "loss": 0.3098, "step": 7210 }, { "epoch": 1.6728917758960677, "grad_norm": 11.821794128153151, "learning_rate": 2e-06, "loss": 0.3643, "step": 7211 }, { "epoch": 1.6731237675443684, "grad_norm": 20.07149739617927, "learning_rate": 2e-06, "loss": 0.3223, "step": 7212 }, { "epoch": 1.673355759192669, "grad_norm": 9.991688195420922, "learning_rate": 2e-06, "loss": 0.2121, "step": 7213 }, { "epoch": 1.6735877508409698, "grad_norm": 13.829416802908472, "learning_rate": 2e-06, "loss": 0.2223, "step": 7214 }, { "epoch": 1.6738197424892705, "grad_norm": 5.792930798689378, "learning_rate": 2e-06, "loss": 0.1783, "step": 7215 }, { "epoch": 1.6740517341375711, "grad_norm": 21.858001437576718, "learning_rate": 2e-06, "loss": 0.3587, "step": 7216 }, { "epoch": 1.6742837257858718, "grad_norm": 14.953758049208334, "learning_rate": 2e-06, "loss": 0.261, "step": 7217 }, { "epoch": 1.6745157174341725, "grad_norm": 15.32929982573524, "learning_rate": 2e-06, "loss": 0.2438, "step": 7218 }, { "epoch": 1.6747477090824732, "grad_norm": 12.461868779926432, "learning_rate": 2e-06, "loss": 0.2068, "step": 7219 }, { "epoch": 1.6749797007307738, "grad_norm": 15.767175377082367, "learning_rate": 2e-06, "loss": 0.2911, "step": 7220 }, { "epoch": 1.6752116923790743, "grad_norm": 13.8931709287092, "learning_rate": 2e-06, "loss": 0.2982, "step": 7221 }, { "epoch": 1.675443684027375, "grad_norm": 14.649828134211061, "learning_rate": 2e-06, "loss": 0.2224, "step": 7222 }, { "epoch": 1.6756756756756757, "grad_norm": 21.45201926425029, "learning_rate": 2e-06, "loss": 0.2641, "step": 7223 }, { "epoch": 1.6759076673239763, "grad_norm": 13.8367213243625, "learning_rate": 2e-06, "loss": 0.2521, "step": 7224 }, { "epoch": 1.676139658972277, "grad_norm": 10.023145113017152, "learning_rate": 2e-06, "loss": 0.22, "step": 7225 }, { "epoch": 1.6763716506205777, "grad_norm": 29.17788046000198, "learning_rate": 2e-06, "loss": 0.2037, "step": 7226 }, { "epoch": 1.6766036422688784, "grad_norm": 9.137259677515337, "learning_rate": 2e-06, "loss": 0.276, "step": 7227 }, { "epoch": 1.6768356339171788, "grad_norm": 13.453485559735219, "learning_rate": 2e-06, "loss": 0.2402, "step": 7228 }, { "epoch": 1.6770676255654795, "grad_norm": 14.936132272085098, "learning_rate": 2e-06, "loss": 0.2982, "step": 7229 }, { "epoch": 1.6772996172137802, "grad_norm": 11.989505567263949, "learning_rate": 2e-06, "loss": 0.3163, "step": 7230 }, { "epoch": 1.6775316088620809, "grad_norm": 16.932666813685692, "learning_rate": 2e-06, "loss": 0.3932, "step": 7231 }, { "epoch": 1.6777636005103815, "grad_norm": 10.189344381942949, "learning_rate": 2e-06, "loss": 0.2055, "step": 7232 }, { "epoch": 1.6779955921586822, "grad_norm": 13.877659783625909, "learning_rate": 2e-06, "loss": 0.2089, "step": 7233 }, { "epoch": 1.678227583806983, "grad_norm": 11.88046757505946, "learning_rate": 2e-06, "loss": 0.2989, "step": 7234 }, { "epoch": 1.6784595754552836, "grad_norm": 12.790805012245048, "learning_rate": 2e-06, "loss": 0.2614, "step": 7235 }, { "epoch": 1.6786915671035842, "grad_norm": 13.851701743001856, "learning_rate": 2e-06, "loss": 0.1686, "step": 7236 }, { "epoch": 1.678923558751885, "grad_norm": 17.59761637474589, "learning_rate": 2e-06, "loss": 0.2935, "step": 7237 }, { "epoch": 1.6791555504001856, "grad_norm": 13.48792494767899, "learning_rate": 2e-06, "loss": 0.3288, "step": 7238 }, { "epoch": 1.6793875420484863, "grad_norm": 20.19153728570158, "learning_rate": 2e-06, "loss": 0.2486, "step": 7239 }, { "epoch": 1.679619533696787, "grad_norm": 8.553678141416055, "learning_rate": 2e-06, "loss": 0.2466, "step": 7240 }, { "epoch": 1.6798515253450876, "grad_norm": 9.832203832754326, "learning_rate": 2e-06, "loss": 0.2081, "step": 7241 }, { "epoch": 1.6800835169933883, "grad_norm": 16.72323345948946, "learning_rate": 2e-06, "loss": 0.2816, "step": 7242 }, { "epoch": 1.680315508641689, "grad_norm": 9.139400513114731, "learning_rate": 2e-06, "loss": 0.1794, "step": 7243 }, { "epoch": 1.6805475002899897, "grad_norm": 11.173560663571159, "learning_rate": 2e-06, "loss": 0.21, "step": 7244 }, { "epoch": 1.6807794919382903, "grad_norm": 13.41759061043099, "learning_rate": 2e-06, "loss": 0.2704, "step": 7245 }, { "epoch": 1.681011483586591, "grad_norm": 11.530722433665698, "learning_rate": 2e-06, "loss": 0.2328, "step": 7246 }, { "epoch": 1.6812434752348917, "grad_norm": 48.37803041976953, "learning_rate": 2e-06, "loss": 0.1837, "step": 7247 }, { "epoch": 1.6814754668831922, "grad_norm": 9.805662828423877, "learning_rate": 2e-06, "loss": 0.3065, "step": 7248 }, { "epoch": 1.6817074585314928, "grad_norm": 14.922040681149298, "learning_rate": 2e-06, "loss": 0.2789, "step": 7249 }, { "epoch": 1.6819394501797935, "grad_norm": 16.518766137978755, "learning_rate": 2e-06, "loss": 0.197, "step": 7250 }, { "epoch": 1.6821714418280942, "grad_norm": 9.19056226837705, "learning_rate": 2e-06, "loss": 0.2237, "step": 7251 }, { "epoch": 1.6824034334763949, "grad_norm": 13.196367330577875, "learning_rate": 2e-06, "loss": 0.2455, "step": 7252 }, { "epoch": 1.6826354251246955, "grad_norm": 14.036007189990867, "learning_rate": 2e-06, "loss": 0.2197, "step": 7253 }, { "epoch": 1.6828674167729962, "grad_norm": 14.91431701000174, "learning_rate": 2e-06, "loss": 0.2718, "step": 7254 }, { "epoch": 1.6830994084212967, "grad_norm": 16.16148852325024, "learning_rate": 2e-06, "loss": 0.2549, "step": 7255 }, { "epoch": 1.6833314000695974, "grad_norm": 15.585917882713956, "learning_rate": 2e-06, "loss": 0.1808, "step": 7256 }, { "epoch": 1.683563391717898, "grad_norm": 20.987773075527787, "learning_rate": 2e-06, "loss": 0.2464, "step": 7257 }, { "epoch": 1.6837953833661987, "grad_norm": 9.67357503129696, "learning_rate": 2e-06, "loss": 0.2684, "step": 7258 }, { "epoch": 1.6840273750144994, "grad_norm": 12.767117257443124, "learning_rate": 2e-06, "loss": 0.2111, "step": 7259 }, { "epoch": 1.6842593666628, "grad_norm": 11.685125969676664, "learning_rate": 2e-06, "loss": 0.2216, "step": 7260 }, { "epoch": 1.6844913583111008, "grad_norm": 15.651428326415914, "learning_rate": 2e-06, "loss": 0.2254, "step": 7261 }, { "epoch": 1.6847233499594014, "grad_norm": 10.54086061454288, "learning_rate": 2e-06, "loss": 0.2496, "step": 7262 }, { "epoch": 1.684955341607702, "grad_norm": 15.971847719178118, "learning_rate": 2e-06, "loss": 0.189, "step": 7263 }, { "epoch": 1.6851873332560028, "grad_norm": 8.066546502110779, "learning_rate": 2e-06, "loss": 0.2025, "step": 7264 }, { "epoch": 1.6854193249043035, "grad_norm": 8.468046938996284, "learning_rate": 2e-06, "loss": 0.2017, "step": 7265 }, { "epoch": 1.6856513165526041, "grad_norm": 9.632765936355588, "learning_rate": 2e-06, "loss": 0.2203, "step": 7266 }, { "epoch": 1.6858833082009048, "grad_norm": 14.954336739425484, "learning_rate": 2e-06, "loss": 0.3368, "step": 7267 }, { "epoch": 1.6861152998492055, "grad_norm": 11.151590329091023, "learning_rate": 2e-06, "loss": 0.1318, "step": 7268 }, { "epoch": 1.6863472914975062, "grad_norm": 18.76079227324024, "learning_rate": 2e-06, "loss": 0.2761, "step": 7269 }, { "epoch": 1.6865792831458069, "grad_norm": 17.605320132885076, "learning_rate": 2e-06, "loss": 0.2647, "step": 7270 }, { "epoch": 1.6868112747941075, "grad_norm": 12.944711743448268, "learning_rate": 2e-06, "loss": 0.2539, "step": 7271 }, { "epoch": 1.6870432664424082, "grad_norm": 123.24890746611254, "learning_rate": 2e-06, "loss": 0.2955, "step": 7272 }, { "epoch": 1.6872752580907089, "grad_norm": 8.159596136022413, "learning_rate": 2e-06, "loss": 0.1544, "step": 7273 }, { "epoch": 1.6875072497390093, "grad_norm": 6.68692543458059, "learning_rate": 2e-06, "loss": 0.2265, "step": 7274 }, { "epoch": 1.68773924138731, "grad_norm": 13.505784951239923, "learning_rate": 2e-06, "loss": 0.2127, "step": 7275 }, { "epoch": 1.6879712330356107, "grad_norm": 12.820996468185626, "learning_rate": 2e-06, "loss": 0.322, "step": 7276 }, { "epoch": 1.6882032246839114, "grad_norm": 16.991615458156502, "learning_rate": 2e-06, "loss": 0.2802, "step": 7277 }, { "epoch": 1.688435216332212, "grad_norm": 10.463780024145759, "learning_rate": 2e-06, "loss": 0.1726, "step": 7278 }, { "epoch": 1.6886672079805127, "grad_norm": 16.589872313730154, "learning_rate": 2e-06, "loss": 0.288, "step": 7279 }, { "epoch": 1.6888991996288134, "grad_norm": 9.827582196248413, "learning_rate": 2e-06, "loss": 0.1659, "step": 7280 }, { "epoch": 1.6891311912771139, "grad_norm": 15.061570411083014, "learning_rate": 2e-06, "loss": 0.2588, "step": 7281 }, { "epoch": 1.6893631829254145, "grad_norm": 14.836146002644297, "learning_rate": 2e-06, "loss": 0.1934, "step": 7282 }, { "epoch": 1.6895951745737152, "grad_norm": 10.968410038414373, "learning_rate": 2e-06, "loss": 0.1331, "step": 7283 }, { "epoch": 1.689827166222016, "grad_norm": 9.697378039295229, "learning_rate": 2e-06, "loss": 0.1898, "step": 7284 }, { "epoch": 1.6900591578703166, "grad_norm": 9.26353684174463, "learning_rate": 2e-06, "loss": 0.2501, "step": 7285 }, { "epoch": 1.6902911495186173, "grad_norm": 16.511560222732655, "learning_rate": 2e-06, "loss": 0.2632, "step": 7286 }, { "epoch": 1.690523141166918, "grad_norm": 15.538008963116829, "learning_rate": 2e-06, "loss": 0.2322, "step": 7287 }, { "epoch": 1.6907551328152186, "grad_norm": 6.702030928521217, "learning_rate": 2e-06, "loss": 0.1563, "step": 7288 }, { "epoch": 1.6909871244635193, "grad_norm": 18.95424278469588, "learning_rate": 2e-06, "loss": 0.412, "step": 7289 }, { "epoch": 1.69121911611182, "grad_norm": 19.402775836233555, "learning_rate": 2e-06, "loss": 0.3471, "step": 7290 }, { "epoch": 1.6914511077601206, "grad_norm": 18.586344150422477, "learning_rate": 2e-06, "loss": 0.1725, "step": 7291 }, { "epoch": 1.6916830994084213, "grad_norm": 13.803017788235675, "learning_rate": 2e-06, "loss": 0.2308, "step": 7292 }, { "epoch": 1.691915091056722, "grad_norm": 12.289919019565339, "learning_rate": 2e-06, "loss": 0.2321, "step": 7293 }, { "epoch": 1.6921470827050227, "grad_norm": 14.677095371271689, "learning_rate": 2e-06, "loss": 0.2651, "step": 7294 }, { "epoch": 1.6923790743533234, "grad_norm": 15.267058990164466, "learning_rate": 2e-06, "loss": 0.2599, "step": 7295 }, { "epoch": 1.692611066001624, "grad_norm": 10.819449552891342, "learning_rate": 2e-06, "loss": 0.2296, "step": 7296 }, { "epoch": 1.6928430576499247, "grad_norm": 7.79293963930981, "learning_rate": 2e-06, "loss": 0.1925, "step": 7297 }, { "epoch": 1.6930750492982254, "grad_norm": 12.106463229541392, "learning_rate": 2e-06, "loss": 0.2504, "step": 7298 }, { "epoch": 1.693307040946526, "grad_norm": 20.23700952736958, "learning_rate": 2e-06, "loss": 0.3133, "step": 7299 }, { "epoch": 1.6935390325948267, "grad_norm": 13.082483003263226, "learning_rate": 2e-06, "loss": 0.3206, "step": 7300 }, { "epoch": 1.6937710242431272, "grad_norm": 5.561796986154681, "learning_rate": 2e-06, "loss": 0.1812, "step": 7301 }, { "epoch": 1.6940030158914279, "grad_norm": 18.485603555962598, "learning_rate": 2e-06, "loss": 0.2279, "step": 7302 }, { "epoch": 1.6942350075397286, "grad_norm": 19.852227953612164, "learning_rate": 2e-06, "loss": 0.4063, "step": 7303 }, { "epoch": 1.6944669991880292, "grad_norm": 19.68949775248541, "learning_rate": 2e-06, "loss": 0.3253, "step": 7304 }, { "epoch": 1.69469899083633, "grad_norm": 33.77052968630431, "learning_rate": 2e-06, "loss": 0.3797, "step": 7305 }, { "epoch": 1.6949309824846306, "grad_norm": 11.94857037264836, "learning_rate": 2e-06, "loss": 0.209, "step": 7306 }, { "epoch": 1.6951629741329313, "grad_norm": 13.804592380035896, "learning_rate": 2e-06, "loss": 0.2286, "step": 7307 }, { "epoch": 1.6953949657812317, "grad_norm": 8.742072666215575, "learning_rate": 2e-06, "loss": 0.1553, "step": 7308 }, { "epoch": 1.6956269574295324, "grad_norm": 11.872405377077234, "learning_rate": 2e-06, "loss": 0.2513, "step": 7309 }, { "epoch": 1.695858949077833, "grad_norm": 8.341582411825478, "learning_rate": 2e-06, "loss": 0.1319, "step": 7310 }, { "epoch": 1.6960909407261338, "grad_norm": 10.44053334267485, "learning_rate": 2e-06, "loss": 0.2116, "step": 7311 }, { "epoch": 1.6963229323744344, "grad_norm": 17.881643539271504, "learning_rate": 2e-06, "loss": 0.2348, "step": 7312 }, { "epoch": 1.6965549240227351, "grad_norm": 19.973000179858243, "learning_rate": 2e-06, "loss": 0.3227, "step": 7313 }, { "epoch": 1.6967869156710358, "grad_norm": 25.58664111962267, "learning_rate": 2e-06, "loss": 0.3412, "step": 7314 }, { "epoch": 1.6970189073193365, "grad_norm": 16.170402376477018, "learning_rate": 2e-06, "loss": 0.2891, "step": 7315 }, { "epoch": 1.6972508989676371, "grad_norm": 25.391042103179647, "learning_rate": 2e-06, "loss": 0.4034, "step": 7316 }, { "epoch": 1.6974828906159378, "grad_norm": 15.631652628735425, "learning_rate": 2e-06, "loss": 0.2334, "step": 7317 }, { "epoch": 1.6977148822642385, "grad_norm": 15.931573085004318, "learning_rate": 2e-06, "loss": 0.3106, "step": 7318 }, { "epoch": 1.6979468739125392, "grad_norm": 19.12347367874556, "learning_rate": 2e-06, "loss": 0.1906, "step": 7319 }, { "epoch": 1.6981788655608399, "grad_norm": 15.569423556959888, "learning_rate": 2e-06, "loss": 0.264, "step": 7320 }, { "epoch": 1.6984108572091405, "grad_norm": 14.068857339247876, "learning_rate": 2e-06, "loss": 0.237, "step": 7321 }, { "epoch": 1.6986428488574412, "grad_norm": 7.4346690593116, "learning_rate": 2e-06, "loss": 0.2656, "step": 7322 }, { "epoch": 1.698874840505742, "grad_norm": 13.320835738404085, "learning_rate": 2e-06, "loss": 0.2022, "step": 7323 }, { "epoch": 1.6991068321540426, "grad_norm": 16.618587408159758, "learning_rate": 2e-06, "loss": 0.3168, "step": 7324 }, { "epoch": 1.6993388238023432, "grad_norm": 14.77136574060571, "learning_rate": 2e-06, "loss": 0.1647, "step": 7325 }, { "epoch": 1.699570815450644, "grad_norm": 14.808907885630699, "learning_rate": 2e-06, "loss": 0.2321, "step": 7326 }, { "epoch": 1.6998028070989446, "grad_norm": 11.36970811881377, "learning_rate": 2e-06, "loss": 0.2535, "step": 7327 }, { "epoch": 1.700034798747245, "grad_norm": 11.569639321117595, "learning_rate": 2e-06, "loss": 0.2157, "step": 7328 }, { "epoch": 1.7002667903955457, "grad_norm": 13.531970644424788, "learning_rate": 2e-06, "loss": 0.154, "step": 7329 }, { "epoch": 1.7004987820438464, "grad_norm": 8.423003530239141, "learning_rate": 2e-06, "loss": 0.2078, "step": 7330 }, { "epoch": 1.700730773692147, "grad_norm": 16.232057245838032, "learning_rate": 2e-06, "loss": 0.1807, "step": 7331 }, { "epoch": 1.7009627653404478, "grad_norm": 12.855703913990952, "learning_rate": 2e-06, "loss": 0.2202, "step": 7332 }, { "epoch": 1.7011947569887484, "grad_norm": 12.417491377191617, "learning_rate": 2e-06, "loss": 0.2327, "step": 7333 }, { "epoch": 1.701426748637049, "grad_norm": 14.984586298988479, "learning_rate": 2e-06, "loss": 0.2119, "step": 7334 }, { "epoch": 1.7016587402853496, "grad_norm": 18.66081159209174, "learning_rate": 2e-06, "loss": 0.3525, "step": 7335 }, { "epoch": 1.7018907319336503, "grad_norm": 9.688077039211272, "learning_rate": 2e-06, "loss": 0.1497, "step": 7336 }, { "epoch": 1.702122723581951, "grad_norm": 7.287356424854079, "learning_rate": 2e-06, "loss": 0.1793, "step": 7337 }, { "epoch": 1.7023547152302516, "grad_norm": 8.409087290247287, "learning_rate": 2e-06, "loss": 0.2413, "step": 7338 }, { "epoch": 1.7025867068785523, "grad_norm": 13.801898488863271, "learning_rate": 2e-06, "loss": 0.2279, "step": 7339 }, { "epoch": 1.702818698526853, "grad_norm": 10.625966492141034, "learning_rate": 2e-06, "loss": 0.1772, "step": 7340 }, { "epoch": 1.7030506901751536, "grad_norm": 8.734020582081474, "learning_rate": 2e-06, "loss": 0.2377, "step": 7341 }, { "epoch": 1.7032826818234543, "grad_norm": 15.367119881801106, "learning_rate": 2e-06, "loss": 0.2827, "step": 7342 }, { "epoch": 1.703514673471755, "grad_norm": 9.132522203727172, "learning_rate": 2e-06, "loss": 0.1569, "step": 7343 }, { "epoch": 1.7037466651200557, "grad_norm": 26.578806454786072, "learning_rate": 2e-06, "loss": 0.3061, "step": 7344 }, { "epoch": 1.7039786567683564, "grad_norm": 13.623761355111489, "learning_rate": 2e-06, "loss": 0.2544, "step": 7345 }, { "epoch": 1.704210648416657, "grad_norm": 10.402184063489008, "learning_rate": 2e-06, "loss": 0.1433, "step": 7346 }, { "epoch": 1.7044426400649577, "grad_norm": 14.848757688205776, "learning_rate": 2e-06, "loss": 0.2865, "step": 7347 }, { "epoch": 1.7046746317132584, "grad_norm": 10.388087104158064, "learning_rate": 2e-06, "loss": 0.2016, "step": 7348 }, { "epoch": 1.704906623361559, "grad_norm": 10.690069785947735, "learning_rate": 2e-06, "loss": 0.251, "step": 7349 }, { "epoch": 1.7051386150098597, "grad_norm": 12.286173589369547, "learning_rate": 2e-06, "loss": 0.193, "step": 7350 }, { "epoch": 1.7053706066581604, "grad_norm": 18.118306711239853, "learning_rate": 2e-06, "loss": 0.2965, "step": 7351 }, { "epoch": 1.705602598306461, "grad_norm": 17.74489505862405, "learning_rate": 2e-06, "loss": 0.4117, "step": 7352 }, { "epoch": 1.7058345899547618, "grad_norm": 14.440055676789756, "learning_rate": 2e-06, "loss": 0.1778, "step": 7353 }, { "epoch": 1.7060665816030622, "grad_norm": 21.136004075754848, "learning_rate": 2e-06, "loss": 0.2744, "step": 7354 }, { "epoch": 1.706298573251363, "grad_norm": 15.881275091214922, "learning_rate": 2e-06, "loss": 0.2271, "step": 7355 }, { "epoch": 1.7065305648996636, "grad_norm": 7.7350361481897725, "learning_rate": 2e-06, "loss": 0.1523, "step": 7356 }, { "epoch": 1.7067625565479643, "grad_norm": 10.3562729510965, "learning_rate": 2e-06, "loss": 0.3039, "step": 7357 }, { "epoch": 1.706994548196265, "grad_norm": 15.919554632350538, "learning_rate": 2e-06, "loss": 0.2601, "step": 7358 }, { "epoch": 1.7072265398445656, "grad_norm": 6.907861083753691, "learning_rate": 2e-06, "loss": 0.1562, "step": 7359 }, { "epoch": 1.7074585314928663, "grad_norm": 13.332375865894466, "learning_rate": 2e-06, "loss": 0.1999, "step": 7360 }, { "epoch": 1.7076905231411668, "grad_norm": 9.13285577347624, "learning_rate": 2e-06, "loss": 0.2235, "step": 7361 }, { "epoch": 1.7079225147894674, "grad_norm": 14.410625044515026, "learning_rate": 2e-06, "loss": 0.3811, "step": 7362 }, { "epoch": 1.7081545064377681, "grad_norm": 15.474633868476383, "learning_rate": 2e-06, "loss": 0.399, "step": 7363 }, { "epoch": 1.7083864980860688, "grad_norm": 8.161414052092125, "learning_rate": 2e-06, "loss": 0.1485, "step": 7364 }, { "epoch": 1.7086184897343695, "grad_norm": 7.173334004364632, "learning_rate": 2e-06, "loss": 0.2258, "step": 7365 }, { "epoch": 1.7088504813826702, "grad_norm": 15.427045346489873, "learning_rate": 2e-06, "loss": 0.2198, "step": 7366 }, { "epoch": 1.7090824730309708, "grad_norm": 12.765297230763718, "learning_rate": 2e-06, "loss": 0.2092, "step": 7367 }, { "epoch": 1.7093144646792715, "grad_norm": 10.360717945103982, "learning_rate": 2e-06, "loss": 0.2371, "step": 7368 }, { "epoch": 1.7095464563275722, "grad_norm": 11.163692771886856, "learning_rate": 2e-06, "loss": 0.2144, "step": 7369 }, { "epoch": 1.7097784479758729, "grad_norm": 9.414971675169285, "learning_rate": 2e-06, "loss": 0.2496, "step": 7370 }, { "epoch": 1.7100104396241735, "grad_norm": 12.427623155406073, "learning_rate": 2e-06, "loss": 0.2593, "step": 7371 }, { "epoch": 1.7102424312724742, "grad_norm": 9.993736030250428, "learning_rate": 2e-06, "loss": 0.2196, "step": 7372 }, { "epoch": 1.710474422920775, "grad_norm": 12.357927783611151, "learning_rate": 2e-06, "loss": 0.2762, "step": 7373 }, { "epoch": 1.7107064145690756, "grad_norm": 17.258664349758774, "learning_rate": 2e-06, "loss": 0.3045, "step": 7374 }, { "epoch": 1.7109384062173763, "grad_norm": 7.514361257025172, "learning_rate": 2e-06, "loss": 0.1494, "step": 7375 }, { "epoch": 1.711170397865677, "grad_norm": 21.462401133891056, "learning_rate": 2e-06, "loss": 0.3879, "step": 7376 }, { "epoch": 1.7114023895139776, "grad_norm": 14.017574262579139, "learning_rate": 2e-06, "loss": 0.2501, "step": 7377 }, { "epoch": 1.7116343811622783, "grad_norm": 59.43227788764268, "learning_rate": 2e-06, "loss": 0.1805, "step": 7378 }, { "epoch": 1.711866372810579, "grad_norm": 13.789051712709842, "learning_rate": 2e-06, "loss": 0.2946, "step": 7379 }, { "epoch": 1.7120983644588796, "grad_norm": 11.672587587978251, "learning_rate": 2e-06, "loss": 0.2156, "step": 7380 }, { "epoch": 1.71233035610718, "grad_norm": 21.34434420615057, "learning_rate": 2e-06, "loss": 0.325, "step": 7381 }, { "epoch": 1.7125623477554808, "grad_norm": 16.615661247788037, "learning_rate": 2e-06, "loss": 0.2408, "step": 7382 }, { "epoch": 1.7127943394037815, "grad_norm": 20.75326511437421, "learning_rate": 2e-06, "loss": 0.3114, "step": 7383 }, { "epoch": 1.7130263310520821, "grad_norm": 12.286557692400283, "learning_rate": 2e-06, "loss": 0.2356, "step": 7384 }, { "epoch": 1.7132583227003828, "grad_norm": 12.174885841243048, "learning_rate": 2e-06, "loss": 0.223, "step": 7385 }, { "epoch": 1.7134903143486835, "grad_norm": 14.877687974756407, "learning_rate": 2e-06, "loss": 0.3443, "step": 7386 }, { "epoch": 1.7137223059969842, "grad_norm": 17.07574828854014, "learning_rate": 2e-06, "loss": 0.3792, "step": 7387 }, { "epoch": 1.7139542976452846, "grad_norm": 13.713405100948842, "learning_rate": 2e-06, "loss": 0.2943, "step": 7388 }, { "epoch": 1.7141862892935853, "grad_norm": 10.06477622586005, "learning_rate": 2e-06, "loss": 0.2264, "step": 7389 }, { "epoch": 1.714418280941886, "grad_norm": 11.263272476317919, "learning_rate": 2e-06, "loss": 0.2381, "step": 7390 }, { "epoch": 1.7146502725901867, "grad_norm": 19.508196318478443, "learning_rate": 2e-06, "loss": 0.3064, "step": 7391 }, { "epoch": 1.7148822642384873, "grad_norm": 11.261598830547625, "learning_rate": 2e-06, "loss": 0.2087, "step": 7392 }, { "epoch": 1.715114255886788, "grad_norm": 12.405569223219322, "learning_rate": 2e-06, "loss": 0.3194, "step": 7393 }, { "epoch": 1.7153462475350887, "grad_norm": 16.233821077133854, "learning_rate": 2e-06, "loss": 0.2638, "step": 7394 }, { "epoch": 1.7155782391833894, "grad_norm": 18.979216214316168, "learning_rate": 2e-06, "loss": 0.2379, "step": 7395 }, { "epoch": 1.71581023083169, "grad_norm": 12.002055700439051, "learning_rate": 2e-06, "loss": 0.2695, "step": 7396 }, { "epoch": 1.7160422224799907, "grad_norm": 7.190049386433045, "learning_rate": 2e-06, "loss": 0.1919, "step": 7397 }, { "epoch": 1.7162742141282914, "grad_norm": 9.293922544406554, "learning_rate": 2e-06, "loss": 0.1739, "step": 7398 }, { "epoch": 1.716506205776592, "grad_norm": 19.113098476282847, "learning_rate": 2e-06, "loss": 0.2361, "step": 7399 }, { "epoch": 1.7167381974248928, "grad_norm": 21.822492041683304, "learning_rate": 2e-06, "loss": 0.3029, "step": 7400 }, { "epoch": 1.7169701890731934, "grad_norm": 13.930659725168429, "learning_rate": 2e-06, "loss": 0.263, "step": 7401 }, { "epoch": 1.717202180721494, "grad_norm": 14.905559993741406, "learning_rate": 2e-06, "loss": 0.2522, "step": 7402 }, { "epoch": 1.7174341723697948, "grad_norm": 11.164819303552916, "learning_rate": 2e-06, "loss": 0.2697, "step": 7403 }, { "epoch": 1.7176661640180955, "grad_norm": 8.91142886247981, "learning_rate": 2e-06, "loss": 0.115, "step": 7404 }, { "epoch": 1.7178981556663961, "grad_norm": 7.949290065036741, "learning_rate": 2e-06, "loss": 0.2455, "step": 7405 }, { "epoch": 1.7181301473146968, "grad_norm": 10.785661327154305, "learning_rate": 2e-06, "loss": 0.1592, "step": 7406 }, { "epoch": 1.7183621389629975, "grad_norm": 16.361965541625413, "learning_rate": 2e-06, "loss": 0.2323, "step": 7407 }, { "epoch": 1.718594130611298, "grad_norm": 19.92901371079172, "learning_rate": 2e-06, "loss": 0.3203, "step": 7408 }, { "epoch": 1.7188261222595986, "grad_norm": 9.312777707144642, "learning_rate": 2e-06, "loss": 0.1555, "step": 7409 }, { "epoch": 1.7190581139078993, "grad_norm": 6.883164003465984, "learning_rate": 2e-06, "loss": 0.1613, "step": 7410 }, { "epoch": 1.7192901055562, "grad_norm": 7.625503436868352, "learning_rate": 2e-06, "loss": 0.233, "step": 7411 }, { "epoch": 1.7195220972045007, "grad_norm": 8.824659120759403, "learning_rate": 2e-06, "loss": 0.2014, "step": 7412 }, { "epoch": 1.7197540888528013, "grad_norm": 6.026256965073181, "learning_rate": 2e-06, "loss": 0.187, "step": 7413 }, { "epoch": 1.7199860805011018, "grad_norm": 13.585052566414436, "learning_rate": 2e-06, "loss": 0.2389, "step": 7414 }, { "epoch": 1.7202180721494025, "grad_norm": 20.721352563891614, "learning_rate": 2e-06, "loss": 0.3369, "step": 7415 }, { "epoch": 1.7204500637977032, "grad_norm": 17.802194293608064, "learning_rate": 2e-06, "loss": 0.291, "step": 7416 }, { "epoch": 1.7206820554460038, "grad_norm": 12.616286867089714, "learning_rate": 2e-06, "loss": 0.1882, "step": 7417 }, { "epoch": 1.7209140470943045, "grad_norm": 9.911712897105605, "learning_rate": 2e-06, "loss": 0.2234, "step": 7418 }, { "epoch": 1.7211460387426052, "grad_norm": 16.06779518312088, "learning_rate": 2e-06, "loss": 0.2163, "step": 7419 }, { "epoch": 1.7213780303909059, "grad_norm": 14.026915838395407, "learning_rate": 2e-06, "loss": 0.2335, "step": 7420 }, { "epoch": 1.7216100220392065, "grad_norm": 16.03652589750512, "learning_rate": 2e-06, "loss": 0.2006, "step": 7421 }, { "epoch": 1.7218420136875072, "grad_norm": 14.123340674487144, "learning_rate": 2e-06, "loss": 0.2975, "step": 7422 }, { "epoch": 1.722074005335808, "grad_norm": 9.61097211651846, "learning_rate": 2e-06, "loss": 0.1643, "step": 7423 }, { "epoch": 1.7223059969841086, "grad_norm": 6.260522196319805, "learning_rate": 2e-06, "loss": 0.1685, "step": 7424 }, { "epoch": 1.7225379886324093, "grad_norm": 12.096547938872812, "learning_rate": 2e-06, "loss": 0.2311, "step": 7425 }, { "epoch": 1.72276998028071, "grad_norm": 10.742362818573552, "learning_rate": 2e-06, "loss": 0.1912, "step": 7426 }, { "epoch": 1.7230019719290106, "grad_norm": 9.979446739855348, "learning_rate": 2e-06, "loss": 0.2561, "step": 7427 }, { "epoch": 1.7232339635773113, "grad_norm": 12.982352110270831, "learning_rate": 2e-06, "loss": 0.246, "step": 7428 }, { "epoch": 1.723465955225612, "grad_norm": 10.51719727420884, "learning_rate": 2e-06, "loss": 0.2248, "step": 7429 }, { "epoch": 1.7236979468739126, "grad_norm": 19.54035939653835, "learning_rate": 2e-06, "loss": 0.2248, "step": 7430 }, { "epoch": 1.7239299385222133, "grad_norm": 10.282834840940072, "learning_rate": 2e-06, "loss": 0.1643, "step": 7431 }, { "epoch": 1.724161930170514, "grad_norm": 15.72589351752963, "learning_rate": 2e-06, "loss": 0.3086, "step": 7432 }, { "epoch": 1.7243939218188147, "grad_norm": 11.020603461535057, "learning_rate": 2e-06, "loss": 0.1795, "step": 7433 }, { "epoch": 1.7246259134671151, "grad_norm": 9.55280925000355, "learning_rate": 2e-06, "loss": 0.2475, "step": 7434 }, { "epoch": 1.7248579051154158, "grad_norm": 12.311849057297254, "learning_rate": 2e-06, "loss": 0.1943, "step": 7435 }, { "epoch": 1.7250898967637165, "grad_norm": 8.677933835714159, "learning_rate": 2e-06, "loss": 0.1278, "step": 7436 }, { "epoch": 1.7253218884120172, "grad_norm": 9.135809044634728, "learning_rate": 2e-06, "loss": 0.1905, "step": 7437 }, { "epoch": 1.7255538800603178, "grad_norm": 12.128952282925505, "learning_rate": 2e-06, "loss": 0.2191, "step": 7438 }, { "epoch": 1.7257858717086185, "grad_norm": 11.620391971320371, "learning_rate": 2e-06, "loss": 0.3023, "step": 7439 }, { "epoch": 1.7260178633569192, "grad_norm": 7.742562300737355, "learning_rate": 2e-06, "loss": 0.2002, "step": 7440 }, { "epoch": 1.7262498550052197, "grad_norm": 11.483373680485649, "learning_rate": 2e-06, "loss": 0.3349, "step": 7441 }, { "epoch": 1.7264818466535203, "grad_norm": 14.756476462309388, "learning_rate": 2e-06, "loss": 0.2754, "step": 7442 }, { "epoch": 1.726713838301821, "grad_norm": 24.504072386189904, "learning_rate": 2e-06, "loss": 0.3742, "step": 7443 }, { "epoch": 1.7269458299501217, "grad_norm": 12.707665192584258, "learning_rate": 2e-06, "loss": 0.225, "step": 7444 }, { "epoch": 1.7271778215984224, "grad_norm": 25.36059367282093, "learning_rate": 2e-06, "loss": 0.2895, "step": 7445 }, { "epoch": 1.727409813246723, "grad_norm": 18.671781461183837, "learning_rate": 2e-06, "loss": 0.3407, "step": 7446 }, { "epoch": 1.7276418048950237, "grad_norm": 15.746357771325753, "learning_rate": 2e-06, "loss": 0.2162, "step": 7447 }, { "epoch": 1.7278737965433244, "grad_norm": 8.535603643460004, "learning_rate": 2e-06, "loss": 0.2024, "step": 7448 }, { "epoch": 1.728105788191625, "grad_norm": 16.43628019510447, "learning_rate": 2e-06, "loss": 0.351, "step": 7449 }, { "epoch": 1.7283377798399258, "grad_norm": 10.001938065434366, "learning_rate": 2e-06, "loss": 0.2177, "step": 7450 }, { "epoch": 1.7285697714882264, "grad_norm": 12.779681705176559, "learning_rate": 2e-06, "loss": 0.1771, "step": 7451 }, { "epoch": 1.7288017631365271, "grad_norm": 13.328753843002087, "learning_rate": 2e-06, "loss": 0.1639, "step": 7452 }, { "epoch": 1.7290337547848278, "grad_norm": 13.858957106187253, "learning_rate": 2e-06, "loss": 0.2973, "step": 7453 }, { "epoch": 1.7292657464331285, "grad_norm": 14.723632245449247, "learning_rate": 2e-06, "loss": 0.2721, "step": 7454 }, { "epoch": 1.7294977380814291, "grad_norm": 11.906681071889423, "learning_rate": 2e-06, "loss": 0.2087, "step": 7455 }, { "epoch": 1.7297297297297298, "grad_norm": 7.628253623661381, "learning_rate": 2e-06, "loss": 0.1461, "step": 7456 }, { "epoch": 1.7299617213780305, "grad_norm": 13.74681104057977, "learning_rate": 2e-06, "loss": 0.2663, "step": 7457 }, { "epoch": 1.7301937130263312, "grad_norm": 8.654851282645897, "learning_rate": 2e-06, "loss": 0.2446, "step": 7458 }, { "epoch": 1.7304257046746319, "grad_norm": 11.135571891568727, "learning_rate": 2e-06, "loss": 0.2514, "step": 7459 }, { "epoch": 1.7306576963229325, "grad_norm": 23.09441910446493, "learning_rate": 2e-06, "loss": 0.3025, "step": 7460 }, { "epoch": 1.730889687971233, "grad_norm": 7.408623355561432, "learning_rate": 2e-06, "loss": 0.2236, "step": 7461 }, { "epoch": 1.7311216796195337, "grad_norm": 14.021068585561615, "learning_rate": 2e-06, "loss": 0.2338, "step": 7462 }, { "epoch": 1.7313536712678343, "grad_norm": 13.492300579390124, "learning_rate": 2e-06, "loss": 0.2432, "step": 7463 }, { "epoch": 1.731585662916135, "grad_norm": 9.888204097854688, "learning_rate": 2e-06, "loss": 0.264, "step": 7464 }, { "epoch": 1.7318176545644357, "grad_norm": 15.78820383154411, "learning_rate": 2e-06, "loss": 0.2384, "step": 7465 }, { "epoch": 1.7320496462127364, "grad_norm": 9.480461865651494, "learning_rate": 2e-06, "loss": 0.2361, "step": 7466 }, { "epoch": 1.7322816378610368, "grad_norm": 10.41053618967658, "learning_rate": 2e-06, "loss": 0.2749, "step": 7467 }, { "epoch": 1.7325136295093375, "grad_norm": 12.479664664280824, "learning_rate": 2e-06, "loss": 0.2908, "step": 7468 }, { "epoch": 1.7327456211576382, "grad_norm": 15.79738817329508, "learning_rate": 2e-06, "loss": 0.2914, "step": 7469 }, { "epoch": 1.7329776128059389, "grad_norm": 12.33172355967031, "learning_rate": 2e-06, "loss": 0.2325, "step": 7470 }, { "epoch": 1.7332096044542395, "grad_norm": 24.294313092336242, "learning_rate": 2e-06, "loss": 0.319, "step": 7471 }, { "epoch": 1.7334415961025402, "grad_norm": 25.220141346783684, "learning_rate": 2e-06, "loss": 0.3333, "step": 7472 }, { "epoch": 1.733673587750841, "grad_norm": 18.63395457662261, "learning_rate": 2e-06, "loss": 0.2178, "step": 7473 }, { "epoch": 1.7339055793991416, "grad_norm": 15.239283398729846, "learning_rate": 2e-06, "loss": 0.2145, "step": 7474 }, { "epoch": 1.7341375710474423, "grad_norm": 11.652366727377803, "learning_rate": 2e-06, "loss": 0.2277, "step": 7475 }, { "epoch": 1.734369562695743, "grad_norm": 11.646906456604956, "learning_rate": 2e-06, "loss": 0.2013, "step": 7476 }, { "epoch": 1.7346015543440436, "grad_norm": 8.637249927622804, "learning_rate": 2e-06, "loss": 0.1697, "step": 7477 }, { "epoch": 1.7348335459923443, "grad_norm": 11.990032300591745, "learning_rate": 2e-06, "loss": 0.1589, "step": 7478 }, { "epoch": 1.735065537640645, "grad_norm": 10.945905877991185, "learning_rate": 2e-06, "loss": 0.2319, "step": 7479 }, { "epoch": 1.7352975292889457, "grad_norm": 7.920506695800604, "learning_rate": 2e-06, "loss": 0.2008, "step": 7480 }, { "epoch": 1.7355295209372463, "grad_norm": 8.339878093891347, "learning_rate": 2e-06, "loss": 0.1503, "step": 7481 }, { "epoch": 1.735761512585547, "grad_norm": 15.023425770672432, "learning_rate": 2e-06, "loss": 0.3408, "step": 7482 }, { "epoch": 1.7359935042338477, "grad_norm": 9.377761857893933, "learning_rate": 2e-06, "loss": 0.2254, "step": 7483 }, { "epoch": 1.7362254958821484, "grad_norm": 10.184995366885822, "learning_rate": 2e-06, "loss": 0.1921, "step": 7484 }, { "epoch": 1.736457487530449, "grad_norm": 13.578407661309907, "learning_rate": 2e-06, "loss": 0.2303, "step": 7485 }, { "epoch": 1.7366894791787497, "grad_norm": 7.356389683470666, "learning_rate": 2e-06, "loss": 0.2637, "step": 7486 }, { "epoch": 1.7369214708270502, "grad_norm": 24.566412216006192, "learning_rate": 2e-06, "loss": 0.2539, "step": 7487 }, { "epoch": 1.7371534624753509, "grad_norm": 11.865767307057718, "learning_rate": 2e-06, "loss": 0.2375, "step": 7488 }, { "epoch": 1.7373854541236515, "grad_norm": 9.212238196866018, "learning_rate": 2e-06, "loss": 0.1615, "step": 7489 }, { "epoch": 1.7376174457719522, "grad_norm": 11.28651869495539, "learning_rate": 2e-06, "loss": 0.2427, "step": 7490 }, { "epoch": 1.7378494374202529, "grad_norm": 16.932290711659782, "learning_rate": 2e-06, "loss": 0.2682, "step": 7491 }, { "epoch": 1.7380814290685536, "grad_norm": 7.522671677144873, "learning_rate": 2e-06, "loss": 0.1405, "step": 7492 }, { "epoch": 1.7383134207168542, "grad_norm": 10.36334726849435, "learning_rate": 2e-06, "loss": 0.2022, "step": 7493 }, { "epoch": 1.7385454123651547, "grad_norm": 10.670762139411545, "learning_rate": 2e-06, "loss": 0.1941, "step": 7494 }, { "epoch": 1.7387774040134554, "grad_norm": 13.315131202322673, "learning_rate": 2e-06, "loss": 0.1852, "step": 7495 }, { "epoch": 1.739009395661756, "grad_norm": 13.907336275113536, "learning_rate": 2e-06, "loss": 0.216, "step": 7496 }, { "epoch": 1.7392413873100567, "grad_norm": 13.021695949675474, "learning_rate": 2e-06, "loss": 0.3078, "step": 7497 }, { "epoch": 1.7394733789583574, "grad_norm": 13.732802454687654, "learning_rate": 2e-06, "loss": 0.1771, "step": 7498 }, { "epoch": 1.739705370606658, "grad_norm": 11.879483859690765, "learning_rate": 2e-06, "loss": 0.4304, "step": 7499 }, { "epoch": 1.7399373622549588, "grad_norm": 16.22319785403237, "learning_rate": 2e-06, "loss": 0.2655, "step": 7500 }, { "epoch": 1.7401693539032594, "grad_norm": 15.214716242928388, "learning_rate": 2e-06, "loss": 0.2577, "step": 7501 }, { "epoch": 1.7404013455515601, "grad_norm": 14.592929299836015, "learning_rate": 2e-06, "loss": 0.2414, "step": 7502 }, { "epoch": 1.7406333371998608, "grad_norm": 20.926869969586114, "learning_rate": 2e-06, "loss": 0.3618, "step": 7503 }, { "epoch": 1.7408653288481615, "grad_norm": 11.524230240811791, "learning_rate": 2e-06, "loss": 0.3186, "step": 7504 }, { "epoch": 1.7410973204964622, "grad_norm": 8.307629840052147, "learning_rate": 2e-06, "loss": 0.256, "step": 7505 }, { "epoch": 1.7413293121447628, "grad_norm": 14.612464879032128, "learning_rate": 2e-06, "loss": 0.3739, "step": 7506 }, { "epoch": 1.7415613037930635, "grad_norm": 9.623353081994557, "learning_rate": 2e-06, "loss": 0.1218, "step": 7507 }, { "epoch": 1.7417932954413642, "grad_norm": 18.58891226199371, "learning_rate": 2e-06, "loss": 0.208, "step": 7508 }, { "epoch": 1.7420252870896649, "grad_norm": 10.85916321252991, "learning_rate": 2e-06, "loss": 0.2402, "step": 7509 }, { "epoch": 1.7422572787379655, "grad_norm": 9.232168351677451, "learning_rate": 2e-06, "loss": 0.2054, "step": 7510 }, { "epoch": 1.7424892703862662, "grad_norm": 11.909383974387465, "learning_rate": 2e-06, "loss": 0.1525, "step": 7511 }, { "epoch": 1.742721262034567, "grad_norm": 10.25152784065819, "learning_rate": 2e-06, "loss": 0.2434, "step": 7512 }, { "epoch": 1.7429532536828676, "grad_norm": 9.52767133429135, "learning_rate": 2e-06, "loss": 0.1615, "step": 7513 }, { "epoch": 1.743185245331168, "grad_norm": 7.889990749074475, "learning_rate": 2e-06, "loss": 0.1444, "step": 7514 }, { "epoch": 1.7434172369794687, "grad_norm": 11.299170921030075, "learning_rate": 2e-06, "loss": 0.1949, "step": 7515 }, { "epoch": 1.7436492286277694, "grad_norm": 16.034637698952388, "learning_rate": 2e-06, "loss": 0.2648, "step": 7516 }, { "epoch": 1.74388122027607, "grad_norm": 16.068494140149642, "learning_rate": 2e-06, "loss": 0.2912, "step": 7517 }, { "epoch": 1.7441132119243707, "grad_norm": 11.633519815812562, "learning_rate": 2e-06, "loss": 0.2694, "step": 7518 }, { "epoch": 1.7443452035726714, "grad_norm": 14.184939207792523, "learning_rate": 2e-06, "loss": 0.2376, "step": 7519 }, { "epoch": 1.744577195220972, "grad_norm": 15.154376906265702, "learning_rate": 2e-06, "loss": 0.2754, "step": 7520 }, { "epoch": 1.7448091868692726, "grad_norm": 16.188615959115307, "learning_rate": 2e-06, "loss": 0.1729, "step": 7521 }, { "epoch": 1.7450411785175732, "grad_norm": 12.385480875263635, "learning_rate": 2e-06, "loss": 0.2123, "step": 7522 }, { "epoch": 1.745273170165874, "grad_norm": 20.89350954693197, "learning_rate": 2e-06, "loss": 0.3894, "step": 7523 }, { "epoch": 1.7455051618141746, "grad_norm": 12.754250138450933, "learning_rate": 2e-06, "loss": 0.3069, "step": 7524 }, { "epoch": 1.7457371534624753, "grad_norm": 16.636950981836346, "learning_rate": 2e-06, "loss": 0.3309, "step": 7525 }, { "epoch": 1.745969145110776, "grad_norm": 9.15472274877276, "learning_rate": 2e-06, "loss": 0.2223, "step": 7526 }, { "epoch": 1.7462011367590766, "grad_norm": 12.287297688521102, "learning_rate": 2e-06, "loss": 0.1625, "step": 7527 }, { "epoch": 1.7464331284073773, "grad_norm": 16.256618957006086, "learning_rate": 2e-06, "loss": 0.1969, "step": 7528 }, { "epoch": 1.746665120055678, "grad_norm": 11.264582784625329, "learning_rate": 2e-06, "loss": 0.2765, "step": 7529 }, { "epoch": 1.7468971117039787, "grad_norm": 7.349783736438385, "learning_rate": 2e-06, "loss": 0.1936, "step": 7530 }, { "epoch": 1.7471291033522793, "grad_norm": 13.180748443177167, "learning_rate": 2e-06, "loss": 0.3109, "step": 7531 }, { "epoch": 1.74736109500058, "grad_norm": 12.550800164137836, "learning_rate": 2e-06, "loss": 0.2776, "step": 7532 }, { "epoch": 1.7475930866488807, "grad_norm": 18.065154984445197, "learning_rate": 2e-06, "loss": 0.3497, "step": 7533 }, { "epoch": 1.7478250782971814, "grad_norm": 12.56891058573405, "learning_rate": 2e-06, "loss": 0.2562, "step": 7534 }, { "epoch": 1.748057069945482, "grad_norm": 13.878529337942904, "learning_rate": 2e-06, "loss": 0.286, "step": 7535 }, { "epoch": 1.7482890615937827, "grad_norm": 19.637820912174707, "learning_rate": 2e-06, "loss": 0.2508, "step": 7536 }, { "epoch": 1.7485210532420834, "grad_norm": 11.758718917948533, "learning_rate": 2e-06, "loss": 0.2464, "step": 7537 }, { "epoch": 1.748753044890384, "grad_norm": 7.703897971156512, "learning_rate": 2e-06, "loss": 0.1873, "step": 7538 }, { "epoch": 1.7489850365386848, "grad_norm": 22.799078363783156, "learning_rate": 2e-06, "loss": 0.3683, "step": 7539 }, { "epoch": 1.7492170281869854, "grad_norm": 8.022522116780017, "learning_rate": 2e-06, "loss": 0.2211, "step": 7540 }, { "epoch": 1.749449019835286, "grad_norm": 9.720216247242394, "learning_rate": 2e-06, "loss": 0.1509, "step": 7541 }, { "epoch": 1.7496810114835866, "grad_norm": 12.831999318303131, "learning_rate": 2e-06, "loss": 0.3326, "step": 7542 }, { "epoch": 1.7499130031318872, "grad_norm": 13.350805612457814, "learning_rate": 2e-06, "loss": 0.2451, "step": 7543 }, { "epoch": 1.750144994780188, "grad_norm": 29.70812030671747, "learning_rate": 2e-06, "loss": 0.3109, "step": 7544 }, { "epoch": 1.7503769864284886, "grad_norm": 14.927095134968875, "learning_rate": 2e-06, "loss": 0.2826, "step": 7545 }, { "epoch": 1.7506089780767893, "grad_norm": 15.271269633188103, "learning_rate": 2e-06, "loss": 0.2656, "step": 7546 }, { "epoch": 1.7508409697250897, "grad_norm": 18.91591537542004, "learning_rate": 2e-06, "loss": 0.2331, "step": 7547 }, { "epoch": 1.7510729613733904, "grad_norm": 15.809697419634523, "learning_rate": 2e-06, "loss": 0.2874, "step": 7548 }, { "epoch": 1.751304953021691, "grad_norm": 10.111798022135611, "learning_rate": 2e-06, "loss": 0.2326, "step": 7549 }, { "epoch": 1.7515369446699918, "grad_norm": 12.66921707938741, "learning_rate": 2e-06, "loss": 0.2217, "step": 7550 }, { "epoch": 1.7517689363182924, "grad_norm": 7.11185655840169, "learning_rate": 2e-06, "loss": 0.1963, "step": 7551 }, { "epoch": 1.7520009279665931, "grad_norm": 12.80342476480035, "learning_rate": 2e-06, "loss": 0.229, "step": 7552 }, { "epoch": 1.7522329196148938, "grad_norm": 10.654623307789162, "learning_rate": 2e-06, "loss": 0.2072, "step": 7553 }, { "epoch": 1.7524649112631945, "grad_norm": 31.130164040219547, "learning_rate": 2e-06, "loss": 0.2552, "step": 7554 }, { "epoch": 1.7526969029114952, "grad_norm": 9.521927343087164, "learning_rate": 2e-06, "loss": 0.2096, "step": 7555 }, { "epoch": 1.7529288945597958, "grad_norm": 17.004948780199133, "learning_rate": 2e-06, "loss": 0.2874, "step": 7556 }, { "epoch": 1.7531608862080965, "grad_norm": 24.340497532566054, "learning_rate": 2e-06, "loss": 0.3665, "step": 7557 }, { "epoch": 1.7533928778563972, "grad_norm": 9.246138535580242, "learning_rate": 2e-06, "loss": 0.1686, "step": 7558 }, { "epoch": 1.7536248695046979, "grad_norm": 10.679323908025648, "learning_rate": 2e-06, "loss": 0.1937, "step": 7559 }, { "epoch": 1.7538568611529985, "grad_norm": 7.459226203385063, "learning_rate": 2e-06, "loss": 0.2052, "step": 7560 }, { "epoch": 1.7540888528012992, "grad_norm": 14.642865169587246, "learning_rate": 2e-06, "loss": 0.2596, "step": 7561 }, { "epoch": 1.7543208444496, "grad_norm": 10.384026009480467, "learning_rate": 2e-06, "loss": 0.1944, "step": 7562 }, { "epoch": 1.7545528360979006, "grad_norm": 17.544916763886047, "learning_rate": 2e-06, "loss": 0.3075, "step": 7563 }, { "epoch": 1.7547848277462013, "grad_norm": 20.255564597301053, "learning_rate": 2e-06, "loss": 0.2725, "step": 7564 }, { "epoch": 1.755016819394502, "grad_norm": 15.313597934383315, "learning_rate": 2e-06, "loss": 0.2702, "step": 7565 }, { "epoch": 1.7552488110428026, "grad_norm": 11.868105774967077, "learning_rate": 2e-06, "loss": 0.1997, "step": 7566 }, { "epoch": 1.755480802691103, "grad_norm": 8.132450737749771, "learning_rate": 2e-06, "loss": 0.1891, "step": 7567 }, { "epoch": 1.7557127943394037, "grad_norm": 9.64320808739205, "learning_rate": 2e-06, "loss": 0.237, "step": 7568 }, { "epoch": 1.7559447859877044, "grad_norm": 14.945149857562205, "learning_rate": 2e-06, "loss": 0.32, "step": 7569 }, { "epoch": 1.756176777636005, "grad_norm": 5.6092356114404796, "learning_rate": 2e-06, "loss": 0.1472, "step": 7570 }, { "epoch": 1.7564087692843058, "grad_norm": 15.171945822518653, "learning_rate": 2e-06, "loss": 0.2661, "step": 7571 }, { "epoch": 1.7566407609326065, "grad_norm": 12.379076468659813, "learning_rate": 2e-06, "loss": 0.2324, "step": 7572 }, { "epoch": 1.7568727525809071, "grad_norm": 10.976070574759373, "learning_rate": 2e-06, "loss": 0.2262, "step": 7573 }, { "epoch": 1.7571047442292076, "grad_norm": 17.711656018486853, "learning_rate": 2e-06, "loss": 0.2753, "step": 7574 }, { "epoch": 1.7573367358775083, "grad_norm": 9.414455829200904, "learning_rate": 2e-06, "loss": 0.2846, "step": 7575 }, { "epoch": 1.757568727525809, "grad_norm": 10.143710465167523, "learning_rate": 2e-06, "loss": 0.2424, "step": 7576 }, { "epoch": 1.7578007191741096, "grad_norm": 14.533736277795931, "learning_rate": 2e-06, "loss": 0.2556, "step": 7577 }, { "epoch": 1.7580327108224103, "grad_norm": 15.913258956484471, "learning_rate": 2e-06, "loss": 0.2865, "step": 7578 }, { "epoch": 1.758264702470711, "grad_norm": 12.767660833167866, "learning_rate": 2e-06, "loss": 0.271, "step": 7579 }, { "epoch": 1.7584966941190117, "grad_norm": 11.294717862094545, "learning_rate": 2e-06, "loss": 0.2234, "step": 7580 }, { "epoch": 1.7587286857673123, "grad_norm": 11.306623689139046, "learning_rate": 2e-06, "loss": 0.2039, "step": 7581 }, { "epoch": 1.758960677415613, "grad_norm": 8.535703463216377, "learning_rate": 2e-06, "loss": 0.1754, "step": 7582 }, { "epoch": 1.7591926690639137, "grad_norm": 15.506198698340077, "learning_rate": 2e-06, "loss": 0.1989, "step": 7583 }, { "epoch": 1.7594246607122144, "grad_norm": 13.271148442659763, "learning_rate": 2e-06, "loss": 0.2016, "step": 7584 }, { "epoch": 1.759656652360515, "grad_norm": 12.936342351595748, "learning_rate": 2e-06, "loss": 0.2456, "step": 7585 }, { "epoch": 1.7598886440088157, "grad_norm": 4.865820394943794, "learning_rate": 2e-06, "loss": 0.1095, "step": 7586 }, { "epoch": 1.7601206356571164, "grad_norm": 24.73588202755812, "learning_rate": 2e-06, "loss": 0.4227, "step": 7587 }, { "epoch": 1.760352627305417, "grad_norm": 20.67162051161943, "learning_rate": 2e-06, "loss": 0.2526, "step": 7588 }, { "epoch": 1.7605846189537178, "grad_norm": 19.6711134518649, "learning_rate": 2e-06, "loss": 0.3269, "step": 7589 }, { "epoch": 1.7608166106020184, "grad_norm": 14.748934619827882, "learning_rate": 2e-06, "loss": 0.2186, "step": 7590 }, { "epoch": 1.7610486022503191, "grad_norm": 11.75288363328804, "learning_rate": 2e-06, "loss": 0.1792, "step": 7591 }, { "epoch": 1.7612805938986198, "grad_norm": 13.161600894829599, "learning_rate": 2e-06, "loss": 0.3328, "step": 7592 }, { "epoch": 1.7615125855469205, "grad_norm": 24.39174578419128, "learning_rate": 2e-06, "loss": 0.276, "step": 7593 }, { "epoch": 1.761744577195221, "grad_norm": 12.371452232880623, "learning_rate": 2e-06, "loss": 0.2194, "step": 7594 }, { "epoch": 1.7619765688435216, "grad_norm": 14.749222617313963, "learning_rate": 2e-06, "loss": 0.2265, "step": 7595 }, { "epoch": 1.7622085604918223, "grad_norm": 14.394177818624705, "learning_rate": 2e-06, "loss": 0.2763, "step": 7596 }, { "epoch": 1.762440552140123, "grad_norm": 11.736377858916772, "learning_rate": 2e-06, "loss": 0.2413, "step": 7597 }, { "epoch": 1.7626725437884236, "grad_norm": 9.37857496441924, "learning_rate": 2e-06, "loss": 0.2431, "step": 7598 }, { "epoch": 1.7629045354367243, "grad_norm": 7.755353360563734, "learning_rate": 2e-06, "loss": 0.1467, "step": 7599 }, { "epoch": 1.7631365270850248, "grad_norm": 17.392051182713978, "learning_rate": 2e-06, "loss": 0.3205, "step": 7600 }, { "epoch": 1.7633685187333255, "grad_norm": 6.479599459078176, "learning_rate": 2e-06, "loss": 0.1768, "step": 7601 }, { "epoch": 1.7636005103816261, "grad_norm": 12.443300925816132, "learning_rate": 2e-06, "loss": 0.2188, "step": 7602 }, { "epoch": 1.7638325020299268, "grad_norm": 14.02188400560854, "learning_rate": 2e-06, "loss": 0.1731, "step": 7603 }, { "epoch": 1.7640644936782275, "grad_norm": 13.579452802841217, "learning_rate": 2e-06, "loss": 0.188, "step": 7604 }, { "epoch": 1.7642964853265282, "grad_norm": 11.894099852846338, "learning_rate": 2e-06, "loss": 0.2135, "step": 7605 }, { "epoch": 1.7645284769748288, "grad_norm": 15.44436809606793, "learning_rate": 2e-06, "loss": 0.2343, "step": 7606 }, { "epoch": 1.7647604686231295, "grad_norm": 25.01790094541462, "learning_rate": 2e-06, "loss": 0.2829, "step": 7607 }, { "epoch": 1.7649924602714302, "grad_norm": 17.30869963511119, "learning_rate": 2e-06, "loss": 0.3645, "step": 7608 }, { "epoch": 1.7652244519197309, "grad_norm": 7.544423889631266, "learning_rate": 2e-06, "loss": 0.1884, "step": 7609 }, { "epoch": 1.7654564435680316, "grad_norm": 16.77983044601465, "learning_rate": 2e-06, "loss": 0.3064, "step": 7610 }, { "epoch": 1.7656884352163322, "grad_norm": 13.915765880853346, "learning_rate": 2e-06, "loss": 0.3585, "step": 7611 }, { "epoch": 1.765920426864633, "grad_norm": 14.45783459966466, "learning_rate": 2e-06, "loss": 0.234, "step": 7612 }, { "epoch": 1.7661524185129336, "grad_norm": 13.257478868195667, "learning_rate": 2e-06, "loss": 0.1836, "step": 7613 }, { "epoch": 1.7663844101612343, "grad_norm": 24.416811462153376, "learning_rate": 2e-06, "loss": 0.3739, "step": 7614 }, { "epoch": 1.766616401809535, "grad_norm": 16.41812661132975, "learning_rate": 2e-06, "loss": 0.3323, "step": 7615 }, { "epoch": 1.7668483934578356, "grad_norm": 13.841333663391419, "learning_rate": 2e-06, "loss": 0.2876, "step": 7616 }, { "epoch": 1.7670803851061363, "grad_norm": 18.261926281948277, "learning_rate": 2e-06, "loss": 0.2349, "step": 7617 }, { "epoch": 1.767312376754437, "grad_norm": 12.052038914795572, "learning_rate": 2e-06, "loss": 0.267, "step": 7618 }, { "epoch": 1.7675443684027377, "grad_norm": 13.682131575458424, "learning_rate": 2e-06, "loss": 0.2445, "step": 7619 }, { "epoch": 1.767776360051038, "grad_norm": 17.787255720543858, "learning_rate": 2e-06, "loss": 0.2742, "step": 7620 }, { "epoch": 1.7680083516993388, "grad_norm": 15.57759468719208, "learning_rate": 2e-06, "loss": 0.2104, "step": 7621 }, { "epoch": 1.7682403433476395, "grad_norm": 10.939546101600278, "learning_rate": 2e-06, "loss": 0.2552, "step": 7622 }, { "epoch": 1.7684723349959401, "grad_norm": 17.21174522628649, "learning_rate": 2e-06, "loss": 0.3564, "step": 7623 }, { "epoch": 1.7687043266442408, "grad_norm": 10.751055553207847, "learning_rate": 2e-06, "loss": 0.1995, "step": 7624 }, { "epoch": 1.7689363182925415, "grad_norm": 39.846053247413224, "learning_rate": 2e-06, "loss": 0.3384, "step": 7625 }, { "epoch": 1.7691683099408422, "grad_norm": 12.08905876035218, "learning_rate": 2e-06, "loss": 0.1989, "step": 7626 }, { "epoch": 1.7694003015891426, "grad_norm": 12.793073296425936, "learning_rate": 2e-06, "loss": 0.2565, "step": 7627 }, { "epoch": 1.7696322932374433, "grad_norm": 15.993395786481747, "learning_rate": 2e-06, "loss": 0.3398, "step": 7628 }, { "epoch": 1.769864284885744, "grad_norm": 13.706742234967228, "learning_rate": 2e-06, "loss": 0.1252, "step": 7629 }, { "epoch": 1.7700962765340447, "grad_norm": 11.953171049385569, "learning_rate": 2e-06, "loss": 0.2428, "step": 7630 }, { "epoch": 1.7703282681823453, "grad_norm": 14.74479617381812, "learning_rate": 2e-06, "loss": 0.2547, "step": 7631 }, { "epoch": 1.770560259830646, "grad_norm": 14.219514617403146, "learning_rate": 2e-06, "loss": 0.2018, "step": 7632 }, { "epoch": 1.7707922514789467, "grad_norm": 13.951297658416964, "learning_rate": 2e-06, "loss": 0.2766, "step": 7633 }, { "epoch": 1.7710242431272474, "grad_norm": 9.491752262170884, "learning_rate": 2e-06, "loss": 0.1672, "step": 7634 }, { "epoch": 1.771256234775548, "grad_norm": 17.46797158449257, "learning_rate": 2e-06, "loss": 0.3137, "step": 7635 }, { "epoch": 1.7714882264238487, "grad_norm": 19.06944157828703, "learning_rate": 2e-06, "loss": 0.2695, "step": 7636 }, { "epoch": 1.7717202180721494, "grad_norm": 15.879264186736425, "learning_rate": 2e-06, "loss": 0.2672, "step": 7637 }, { "epoch": 1.77195220972045, "grad_norm": 9.32608523384946, "learning_rate": 2e-06, "loss": 0.2372, "step": 7638 }, { "epoch": 1.7721842013687508, "grad_norm": 18.03702817443358, "learning_rate": 2e-06, "loss": 0.2554, "step": 7639 }, { "epoch": 1.7724161930170514, "grad_norm": 13.736295330033094, "learning_rate": 2e-06, "loss": 0.2727, "step": 7640 }, { "epoch": 1.7726481846653521, "grad_norm": 8.515303235720744, "learning_rate": 2e-06, "loss": 0.1856, "step": 7641 }, { "epoch": 1.7728801763136528, "grad_norm": 23.630669813146312, "learning_rate": 2e-06, "loss": 0.2172, "step": 7642 }, { "epoch": 1.7731121679619535, "grad_norm": 12.887268646237283, "learning_rate": 2e-06, "loss": 0.3188, "step": 7643 }, { "epoch": 1.7733441596102542, "grad_norm": 12.540798137644023, "learning_rate": 2e-06, "loss": 0.2652, "step": 7644 }, { "epoch": 1.7735761512585548, "grad_norm": 14.105494099925721, "learning_rate": 2e-06, "loss": 0.2969, "step": 7645 }, { "epoch": 1.7738081429068555, "grad_norm": 13.725792982960401, "learning_rate": 2e-06, "loss": 0.227, "step": 7646 }, { "epoch": 1.774040134555156, "grad_norm": 21.37437022895052, "learning_rate": 2e-06, "loss": 0.3201, "step": 7647 }, { "epoch": 1.7742721262034566, "grad_norm": 10.309915600828834, "learning_rate": 2e-06, "loss": 0.2181, "step": 7648 }, { "epoch": 1.7745041178517573, "grad_norm": 12.343204213291852, "learning_rate": 2e-06, "loss": 0.1647, "step": 7649 }, { "epoch": 1.774736109500058, "grad_norm": 12.04423941594413, "learning_rate": 2e-06, "loss": 0.2637, "step": 7650 }, { "epoch": 1.7749681011483587, "grad_norm": 18.005224230213113, "learning_rate": 2e-06, "loss": 0.3285, "step": 7651 }, { "epoch": 1.7752000927966594, "grad_norm": 7.775026327525456, "learning_rate": 2e-06, "loss": 0.1205, "step": 7652 }, { "epoch": 1.77543208444496, "grad_norm": 15.384233415521097, "learning_rate": 2e-06, "loss": 0.3409, "step": 7653 }, { "epoch": 1.7756640760932605, "grad_norm": 4.987462215279807, "learning_rate": 2e-06, "loss": 0.1379, "step": 7654 }, { "epoch": 1.7758960677415612, "grad_norm": 22.727798216734325, "learning_rate": 2e-06, "loss": 0.4157, "step": 7655 }, { "epoch": 1.7761280593898618, "grad_norm": 8.363343229513351, "learning_rate": 2e-06, "loss": 0.167, "step": 7656 }, { "epoch": 1.7763600510381625, "grad_norm": 7.1776307695687045, "learning_rate": 2e-06, "loss": 0.1454, "step": 7657 }, { "epoch": 1.7765920426864632, "grad_norm": 10.413632590231295, "learning_rate": 2e-06, "loss": 0.1798, "step": 7658 }, { "epoch": 1.7768240343347639, "grad_norm": 17.375429455673192, "learning_rate": 2e-06, "loss": 0.3528, "step": 7659 }, { "epoch": 1.7770560259830646, "grad_norm": 9.339877282892838, "learning_rate": 2e-06, "loss": 0.3373, "step": 7660 }, { "epoch": 1.7772880176313652, "grad_norm": 15.76690432141599, "learning_rate": 2e-06, "loss": 0.2209, "step": 7661 }, { "epoch": 1.777520009279666, "grad_norm": 16.580997351774784, "learning_rate": 2e-06, "loss": 0.248, "step": 7662 }, { "epoch": 1.7777520009279666, "grad_norm": 8.380956961573576, "learning_rate": 2e-06, "loss": 0.1943, "step": 7663 }, { "epoch": 1.7779839925762673, "grad_norm": 18.827557808833518, "learning_rate": 2e-06, "loss": 0.2473, "step": 7664 }, { "epoch": 1.778215984224568, "grad_norm": 13.388074493082893, "learning_rate": 2e-06, "loss": 0.2367, "step": 7665 }, { "epoch": 1.7784479758728686, "grad_norm": 26.059732419409222, "learning_rate": 2e-06, "loss": 0.2773, "step": 7666 }, { "epoch": 1.7786799675211693, "grad_norm": 11.879194017751967, "learning_rate": 2e-06, "loss": 0.2716, "step": 7667 }, { "epoch": 1.77891195916947, "grad_norm": 20.71252153591305, "learning_rate": 2e-06, "loss": 0.2782, "step": 7668 }, { "epoch": 1.7791439508177707, "grad_norm": 8.655225424649053, "learning_rate": 2e-06, "loss": 0.149, "step": 7669 }, { "epoch": 1.7793759424660713, "grad_norm": 24.443766643758085, "learning_rate": 2e-06, "loss": 0.378, "step": 7670 }, { "epoch": 1.779607934114372, "grad_norm": 13.744789244886295, "learning_rate": 2e-06, "loss": 0.2104, "step": 7671 }, { "epoch": 1.7798399257626727, "grad_norm": 11.431132212646698, "learning_rate": 2e-06, "loss": 0.2329, "step": 7672 }, { "epoch": 1.7800719174109734, "grad_norm": 12.346610648641189, "learning_rate": 2e-06, "loss": 0.3629, "step": 7673 }, { "epoch": 1.7803039090592738, "grad_norm": 13.919607998095852, "learning_rate": 2e-06, "loss": 0.178, "step": 7674 }, { "epoch": 1.7805359007075745, "grad_norm": 17.970739199315016, "learning_rate": 2e-06, "loss": 0.3034, "step": 7675 }, { "epoch": 1.7807678923558752, "grad_norm": 7.195667958235038, "learning_rate": 2e-06, "loss": 0.224, "step": 7676 }, { "epoch": 1.7809998840041759, "grad_norm": 8.009797752370751, "learning_rate": 2e-06, "loss": 0.1594, "step": 7677 }, { "epoch": 1.7812318756524765, "grad_norm": 4.141046293959366, "learning_rate": 2e-06, "loss": 0.1238, "step": 7678 }, { "epoch": 1.7814638673007772, "grad_norm": 12.84582198199472, "learning_rate": 2e-06, "loss": 0.2748, "step": 7679 }, { "epoch": 1.7816958589490777, "grad_norm": 15.120203097994304, "learning_rate": 2e-06, "loss": 0.2866, "step": 7680 }, { "epoch": 1.7819278505973783, "grad_norm": 14.5632055267852, "learning_rate": 2e-06, "loss": 0.2852, "step": 7681 }, { "epoch": 1.782159842245679, "grad_norm": 16.375652600630268, "learning_rate": 2e-06, "loss": 0.3306, "step": 7682 }, { "epoch": 1.7823918338939797, "grad_norm": 15.930067366581838, "learning_rate": 2e-06, "loss": 0.2648, "step": 7683 }, { "epoch": 1.7826238255422804, "grad_norm": 13.939992885928524, "learning_rate": 2e-06, "loss": 0.2087, "step": 7684 }, { "epoch": 1.782855817190581, "grad_norm": 8.928404635074617, "learning_rate": 2e-06, "loss": 0.2156, "step": 7685 }, { "epoch": 1.7830878088388817, "grad_norm": 6.86939899315531, "learning_rate": 2e-06, "loss": 0.2018, "step": 7686 }, { "epoch": 1.7833198004871824, "grad_norm": 14.856892617680042, "learning_rate": 2e-06, "loss": 0.2491, "step": 7687 }, { "epoch": 1.783551792135483, "grad_norm": 9.050658316312433, "learning_rate": 2e-06, "loss": 0.2238, "step": 7688 }, { "epoch": 1.7837837837837838, "grad_norm": 10.238929511967964, "learning_rate": 2e-06, "loss": 0.261, "step": 7689 }, { "epoch": 1.7840157754320844, "grad_norm": 12.373983145047632, "learning_rate": 2e-06, "loss": 0.2076, "step": 7690 }, { "epoch": 1.7842477670803851, "grad_norm": 18.484115788006253, "learning_rate": 2e-06, "loss": 0.3735, "step": 7691 }, { "epoch": 1.7844797587286858, "grad_norm": 13.47239908811981, "learning_rate": 2e-06, "loss": 0.3014, "step": 7692 }, { "epoch": 1.7847117503769865, "grad_norm": 13.809744947458755, "learning_rate": 2e-06, "loss": 0.2936, "step": 7693 }, { "epoch": 1.7849437420252872, "grad_norm": 12.90404228506961, "learning_rate": 2e-06, "loss": 0.2841, "step": 7694 }, { "epoch": 1.7851757336735878, "grad_norm": 9.70647010268845, "learning_rate": 2e-06, "loss": 0.194, "step": 7695 }, { "epoch": 1.7854077253218885, "grad_norm": 10.11452466154601, "learning_rate": 2e-06, "loss": 0.2296, "step": 7696 }, { "epoch": 1.7856397169701892, "grad_norm": 18.241161735120254, "learning_rate": 2e-06, "loss": 0.2553, "step": 7697 }, { "epoch": 1.7858717086184899, "grad_norm": 4.363594729824572, "learning_rate": 2e-06, "loss": 0.1453, "step": 7698 }, { "epoch": 1.7861037002667906, "grad_norm": 19.847002056503342, "learning_rate": 2e-06, "loss": 0.2892, "step": 7699 }, { "epoch": 1.786335691915091, "grad_norm": 12.037313097036915, "learning_rate": 2e-06, "loss": 0.1817, "step": 7700 }, { "epoch": 1.7865676835633917, "grad_norm": 9.665541170604797, "learning_rate": 2e-06, "loss": 0.2488, "step": 7701 }, { "epoch": 1.7867996752116924, "grad_norm": 9.578282900368189, "learning_rate": 2e-06, "loss": 0.1973, "step": 7702 }, { "epoch": 1.787031666859993, "grad_norm": 12.011774653107265, "learning_rate": 2e-06, "loss": 0.1994, "step": 7703 }, { "epoch": 1.7872636585082937, "grad_norm": 14.059264981423192, "learning_rate": 2e-06, "loss": 0.3322, "step": 7704 }, { "epoch": 1.7874956501565944, "grad_norm": 11.308037303258596, "learning_rate": 2e-06, "loss": 0.1671, "step": 7705 }, { "epoch": 1.787727641804895, "grad_norm": 14.830070083622193, "learning_rate": 2e-06, "loss": 0.2383, "step": 7706 }, { "epoch": 1.7879596334531955, "grad_norm": 8.970729653365368, "learning_rate": 2e-06, "loss": 0.1878, "step": 7707 }, { "epoch": 1.7881916251014962, "grad_norm": 15.265954978012068, "learning_rate": 2e-06, "loss": 0.1944, "step": 7708 }, { "epoch": 1.7884236167497969, "grad_norm": 9.267580147523244, "learning_rate": 2e-06, "loss": 0.1827, "step": 7709 }, { "epoch": 1.7886556083980976, "grad_norm": 11.816356243554782, "learning_rate": 2e-06, "loss": 0.2213, "step": 7710 }, { "epoch": 1.7888876000463982, "grad_norm": 14.13892712739281, "learning_rate": 2e-06, "loss": 0.3183, "step": 7711 }, { "epoch": 1.789119591694699, "grad_norm": 24.37772003449913, "learning_rate": 2e-06, "loss": 0.3273, "step": 7712 }, { "epoch": 1.7893515833429996, "grad_norm": 18.881919815002593, "learning_rate": 2e-06, "loss": 0.2567, "step": 7713 }, { "epoch": 1.7895835749913003, "grad_norm": 9.161757211887082, "learning_rate": 2e-06, "loss": 0.1735, "step": 7714 }, { "epoch": 1.789815566639601, "grad_norm": 17.849954977305433, "learning_rate": 2e-06, "loss": 0.3116, "step": 7715 }, { "epoch": 1.7900475582879016, "grad_norm": 8.87979005272126, "learning_rate": 2e-06, "loss": 0.137, "step": 7716 }, { "epoch": 1.7902795499362023, "grad_norm": 17.163684039103828, "learning_rate": 2e-06, "loss": 0.3231, "step": 7717 }, { "epoch": 1.790511541584503, "grad_norm": 10.957487983245123, "learning_rate": 2e-06, "loss": 0.2304, "step": 7718 }, { "epoch": 1.7907435332328037, "grad_norm": 7.331413316833315, "learning_rate": 2e-06, "loss": 0.2005, "step": 7719 }, { "epoch": 1.7909755248811043, "grad_norm": 10.347401603722119, "learning_rate": 2e-06, "loss": 0.2613, "step": 7720 }, { "epoch": 1.791207516529405, "grad_norm": 14.864802701315973, "learning_rate": 2e-06, "loss": 0.2408, "step": 7721 }, { "epoch": 1.7914395081777057, "grad_norm": 12.12204936362209, "learning_rate": 2e-06, "loss": 0.2537, "step": 7722 }, { "epoch": 1.7916714998260064, "grad_norm": 11.61910317439687, "learning_rate": 2e-06, "loss": 0.2107, "step": 7723 }, { "epoch": 1.791903491474307, "grad_norm": 11.021376022092555, "learning_rate": 2e-06, "loss": 0.2689, "step": 7724 }, { "epoch": 1.7921354831226077, "grad_norm": 14.823213878454235, "learning_rate": 2e-06, "loss": 0.2617, "step": 7725 }, { "epoch": 1.7923674747709084, "grad_norm": 13.759950442388043, "learning_rate": 2e-06, "loss": 0.284, "step": 7726 }, { "epoch": 1.7925994664192089, "grad_norm": 13.73359765140498, "learning_rate": 2e-06, "loss": 0.1964, "step": 7727 }, { "epoch": 1.7928314580675095, "grad_norm": 21.592029416160536, "learning_rate": 2e-06, "loss": 0.2878, "step": 7728 }, { "epoch": 1.7930634497158102, "grad_norm": 13.066431196494792, "learning_rate": 2e-06, "loss": 0.2148, "step": 7729 }, { "epoch": 1.793295441364111, "grad_norm": 23.518466840599984, "learning_rate": 2e-06, "loss": 0.3538, "step": 7730 }, { "epoch": 1.7935274330124116, "grad_norm": 22.280556048239266, "learning_rate": 2e-06, "loss": 0.3328, "step": 7731 }, { "epoch": 1.7937594246607123, "grad_norm": 18.70270509152741, "learning_rate": 2e-06, "loss": 0.2993, "step": 7732 }, { "epoch": 1.7939914163090127, "grad_norm": 14.745009418359984, "learning_rate": 2e-06, "loss": 0.2493, "step": 7733 }, { "epoch": 1.7942234079573134, "grad_norm": 7.020182056098816, "learning_rate": 2e-06, "loss": 0.1825, "step": 7734 }, { "epoch": 1.794455399605614, "grad_norm": 8.9041045197384, "learning_rate": 2e-06, "loss": 0.1951, "step": 7735 }, { "epoch": 1.7946873912539147, "grad_norm": 16.96340376259227, "learning_rate": 2e-06, "loss": 0.2135, "step": 7736 }, { "epoch": 1.7949193829022154, "grad_norm": 13.063819447973446, "learning_rate": 2e-06, "loss": 0.3117, "step": 7737 }, { "epoch": 1.795151374550516, "grad_norm": 17.813456129041118, "learning_rate": 2e-06, "loss": 0.319, "step": 7738 }, { "epoch": 1.7953833661988168, "grad_norm": 14.21559790292514, "learning_rate": 2e-06, "loss": 0.3333, "step": 7739 }, { "epoch": 1.7956153578471175, "grad_norm": 13.970632816975339, "learning_rate": 2e-06, "loss": 0.3119, "step": 7740 }, { "epoch": 1.7958473494954181, "grad_norm": 20.13254991016089, "learning_rate": 2e-06, "loss": 0.3724, "step": 7741 }, { "epoch": 1.7960793411437188, "grad_norm": 12.702450368500116, "learning_rate": 2e-06, "loss": 0.1933, "step": 7742 }, { "epoch": 1.7963113327920195, "grad_norm": 5.033487482828364, "learning_rate": 2e-06, "loss": 0.1609, "step": 7743 }, { "epoch": 1.7965433244403202, "grad_norm": 13.226202684968266, "learning_rate": 2e-06, "loss": 0.2027, "step": 7744 }, { "epoch": 1.7967753160886208, "grad_norm": 11.445493042999509, "learning_rate": 2e-06, "loss": 0.2252, "step": 7745 }, { "epoch": 1.7970073077369215, "grad_norm": 9.386391237175017, "learning_rate": 2e-06, "loss": 0.2463, "step": 7746 }, { "epoch": 1.7972392993852222, "grad_norm": 8.137216847310562, "learning_rate": 2e-06, "loss": 0.2218, "step": 7747 }, { "epoch": 1.7974712910335229, "grad_norm": 14.29784916326407, "learning_rate": 2e-06, "loss": 0.2736, "step": 7748 }, { "epoch": 1.7977032826818236, "grad_norm": 32.617210845413794, "learning_rate": 2e-06, "loss": 0.5112, "step": 7749 }, { "epoch": 1.7979352743301242, "grad_norm": 11.9630554774893, "learning_rate": 2e-06, "loss": 0.2474, "step": 7750 }, { "epoch": 1.798167265978425, "grad_norm": 9.624929990079778, "learning_rate": 2e-06, "loss": 0.2495, "step": 7751 }, { "epoch": 1.7983992576267256, "grad_norm": 14.670156870012233, "learning_rate": 2e-06, "loss": 0.2639, "step": 7752 }, { "epoch": 1.798631249275026, "grad_norm": 10.86678132759406, "learning_rate": 2e-06, "loss": 0.2205, "step": 7753 }, { "epoch": 1.7988632409233267, "grad_norm": 16.638640631879152, "learning_rate": 2e-06, "loss": 0.3019, "step": 7754 }, { "epoch": 1.7990952325716274, "grad_norm": 10.957011788431164, "learning_rate": 2e-06, "loss": 0.2409, "step": 7755 }, { "epoch": 1.799327224219928, "grad_norm": 11.232915987644578, "learning_rate": 2e-06, "loss": 0.2198, "step": 7756 }, { "epoch": 1.7995592158682288, "grad_norm": 7.602008455160166, "learning_rate": 2e-06, "loss": 0.1659, "step": 7757 }, { "epoch": 1.7997912075165294, "grad_norm": 10.923209403337351, "learning_rate": 2e-06, "loss": 0.2615, "step": 7758 }, { "epoch": 1.8000231991648301, "grad_norm": 7.540291491452469, "learning_rate": 2e-06, "loss": 0.1623, "step": 7759 }, { "epoch": 1.8002551908131306, "grad_norm": 5.588197524067868, "learning_rate": 2e-06, "loss": 0.1303, "step": 7760 }, { "epoch": 1.8004871824614312, "grad_norm": 7.92180637279653, "learning_rate": 2e-06, "loss": 0.1813, "step": 7761 }, { "epoch": 1.800719174109732, "grad_norm": 10.141586008302626, "learning_rate": 2e-06, "loss": 0.2743, "step": 7762 }, { "epoch": 1.8009511657580326, "grad_norm": 21.33250743923904, "learning_rate": 2e-06, "loss": 0.334, "step": 7763 }, { "epoch": 1.8011831574063333, "grad_norm": 10.204261149433293, "learning_rate": 2e-06, "loss": 0.1503, "step": 7764 }, { "epoch": 1.801415149054634, "grad_norm": 14.724303989684397, "learning_rate": 2e-06, "loss": 0.2317, "step": 7765 }, { "epoch": 1.8016471407029346, "grad_norm": 11.189179616754403, "learning_rate": 2e-06, "loss": 0.2597, "step": 7766 }, { "epoch": 1.8018791323512353, "grad_norm": 8.667717048927448, "learning_rate": 2e-06, "loss": 0.1805, "step": 7767 }, { "epoch": 1.802111123999536, "grad_norm": 10.099312307553099, "learning_rate": 2e-06, "loss": 0.2101, "step": 7768 }, { "epoch": 1.8023431156478367, "grad_norm": 10.156468261813107, "learning_rate": 2e-06, "loss": 0.2434, "step": 7769 }, { "epoch": 1.8025751072961373, "grad_norm": 6.768385693132343, "learning_rate": 2e-06, "loss": 0.23, "step": 7770 }, { "epoch": 1.802807098944438, "grad_norm": 9.443324597411717, "learning_rate": 2e-06, "loss": 0.2353, "step": 7771 }, { "epoch": 1.8030390905927387, "grad_norm": 10.59913449235798, "learning_rate": 2e-06, "loss": 0.2387, "step": 7772 }, { "epoch": 1.8032710822410394, "grad_norm": 16.13258187524545, "learning_rate": 2e-06, "loss": 0.2608, "step": 7773 }, { "epoch": 1.80350307388934, "grad_norm": 15.139015354804348, "learning_rate": 2e-06, "loss": 0.376, "step": 7774 }, { "epoch": 1.8037350655376407, "grad_norm": 10.451231399468004, "learning_rate": 2e-06, "loss": 0.2176, "step": 7775 }, { "epoch": 1.8039670571859414, "grad_norm": 17.00309216109801, "learning_rate": 2e-06, "loss": 0.2994, "step": 7776 }, { "epoch": 1.804199048834242, "grad_norm": 12.648387109888144, "learning_rate": 2e-06, "loss": 0.2219, "step": 7777 }, { "epoch": 1.8044310404825428, "grad_norm": 17.7688065718772, "learning_rate": 2e-06, "loss": 0.2452, "step": 7778 }, { "epoch": 1.8046630321308434, "grad_norm": 14.367544047444886, "learning_rate": 2e-06, "loss": 0.2233, "step": 7779 }, { "epoch": 1.804895023779144, "grad_norm": 9.520236345447163, "learning_rate": 2e-06, "loss": 0.1703, "step": 7780 }, { "epoch": 1.8051270154274446, "grad_norm": 14.606371152511754, "learning_rate": 2e-06, "loss": 0.2248, "step": 7781 }, { "epoch": 1.8053590070757453, "grad_norm": 5.579155887762543, "learning_rate": 2e-06, "loss": 0.1576, "step": 7782 }, { "epoch": 1.805590998724046, "grad_norm": 12.948532567369465, "learning_rate": 2e-06, "loss": 0.2833, "step": 7783 }, { "epoch": 1.8058229903723466, "grad_norm": 16.693346240562605, "learning_rate": 2e-06, "loss": 0.3763, "step": 7784 }, { "epoch": 1.8060549820206473, "grad_norm": 15.82863877420163, "learning_rate": 2e-06, "loss": 0.2494, "step": 7785 }, { "epoch": 1.806286973668948, "grad_norm": 8.62009366577174, "learning_rate": 2e-06, "loss": 0.2333, "step": 7786 }, { "epoch": 1.8065189653172484, "grad_norm": 15.841950814591993, "learning_rate": 2e-06, "loss": 0.2895, "step": 7787 }, { "epoch": 1.806750956965549, "grad_norm": 19.67162703794575, "learning_rate": 2e-06, "loss": 0.2885, "step": 7788 }, { "epoch": 1.8069829486138498, "grad_norm": 9.324527872345765, "learning_rate": 2e-06, "loss": 0.2164, "step": 7789 }, { "epoch": 1.8072149402621505, "grad_norm": 8.803393444656317, "learning_rate": 2e-06, "loss": 0.2439, "step": 7790 }, { "epoch": 1.8074469319104511, "grad_norm": 10.688359265610206, "learning_rate": 2e-06, "loss": 0.2431, "step": 7791 }, { "epoch": 1.8076789235587518, "grad_norm": 20.370846970552368, "learning_rate": 2e-06, "loss": 0.2405, "step": 7792 }, { "epoch": 1.8079109152070525, "grad_norm": 12.314345405676008, "learning_rate": 2e-06, "loss": 0.3558, "step": 7793 }, { "epoch": 1.8081429068553532, "grad_norm": 9.02896450167309, "learning_rate": 2e-06, "loss": 0.2096, "step": 7794 }, { "epoch": 1.8083748985036538, "grad_norm": 11.910756638938176, "learning_rate": 2e-06, "loss": 0.1928, "step": 7795 }, { "epoch": 1.8086068901519545, "grad_norm": 10.854730659767704, "learning_rate": 2e-06, "loss": 0.2081, "step": 7796 }, { "epoch": 1.8088388818002552, "grad_norm": 14.871704613286662, "learning_rate": 2e-06, "loss": 0.2384, "step": 7797 }, { "epoch": 1.8090708734485559, "grad_norm": 10.544675807311968, "learning_rate": 2e-06, "loss": 0.1492, "step": 7798 }, { "epoch": 1.8093028650968566, "grad_norm": 13.806037913485634, "learning_rate": 2e-06, "loss": 0.2252, "step": 7799 }, { "epoch": 1.8095348567451572, "grad_norm": 11.072548191035832, "learning_rate": 2e-06, "loss": 0.2812, "step": 7800 }, { "epoch": 1.809766848393458, "grad_norm": 11.267678733025672, "learning_rate": 2e-06, "loss": 0.1959, "step": 7801 }, { "epoch": 1.8099988400417586, "grad_norm": 18.377656686975637, "learning_rate": 2e-06, "loss": 0.241, "step": 7802 }, { "epoch": 1.8102308316900593, "grad_norm": 25.95688759321459, "learning_rate": 2e-06, "loss": 0.3239, "step": 7803 }, { "epoch": 1.81046282333836, "grad_norm": 13.683329106749722, "learning_rate": 2e-06, "loss": 0.1345, "step": 7804 }, { "epoch": 1.8106948149866606, "grad_norm": 9.097969627130546, "learning_rate": 2e-06, "loss": 0.1568, "step": 7805 }, { "epoch": 1.8109268066349613, "grad_norm": 20.28212796053784, "learning_rate": 2e-06, "loss": 0.2317, "step": 7806 }, { "epoch": 1.8111587982832618, "grad_norm": 13.13116391176701, "learning_rate": 2e-06, "loss": 0.2239, "step": 7807 }, { "epoch": 1.8113907899315624, "grad_norm": 35.367434165456125, "learning_rate": 2e-06, "loss": 0.2811, "step": 7808 }, { "epoch": 1.8116227815798631, "grad_norm": 13.284596823413086, "learning_rate": 2e-06, "loss": 0.218, "step": 7809 }, { "epoch": 1.8118547732281638, "grad_norm": 25.05754023794442, "learning_rate": 2e-06, "loss": 0.199, "step": 7810 }, { "epoch": 1.8120867648764645, "grad_norm": 11.14006570651281, "learning_rate": 2e-06, "loss": 0.1748, "step": 7811 }, { "epoch": 1.8123187565247652, "grad_norm": 9.07722743677708, "learning_rate": 2e-06, "loss": 0.1929, "step": 7812 }, { "epoch": 1.8125507481730656, "grad_norm": 22.349518160097013, "learning_rate": 2e-06, "loss": 0.2513, "step": 7813 }, { "epoch": 1.8127827398213663, "grad_norm": 9.248339748264183, "learning_rate": 2e-06, "loss": 0.1483, "step": 7814 }, { "epoch": 1.813014731469667, "grad_norm": 11.764457418025861, "learning_rate": 2e-06, "loss": 0.2851, "step": 7815 }, { "epoch": 1.8132467231179676, "grad_norm": 12.019207291970828, "learning_rate": 2e-06, "loss": 0.2914, "step": 7816 }, { "epoch": 1.8134787147662683, "grad_norm": 11.780327657611625, "learning_rate": 2e-06, "loss": 0.1862, "step": 7817 }, { "epoch": 1.813710706414569, "grad_norm": 7.56300305679845, "learning_rate": 2e-06, "loss": 0.2186, "step": 7818 }, { "epoch": 1.8139426980628697, "grad_norm": 18.84673033730254, "learning_rate": 2e-06, "loss": 0.2622, "step": 7819 }, { "epoch": 1.8141746897111704, "grad_norm": 4.401220419426188, "learning_rate": 2e-06, "loss": 0.1358, "step": 7820 }, { "epoch": 1.814406681359471, "grad_norm": 22.284767666525287, "learning_rate": 2e-06, "loss": 0.2526, "step": 7821 }, { "epoch": 1.8146386730077717, "grad_norm": 18.64348610947482, "learning_rate": 2e-06, "loss": 0.2767, "step": 7822 }, { "epoch": 1.8148706646560724, "grad_norm": 10.983716767173828, "learning_rate": 2e-06, "loss": 0.1831, "step": 7823 }, { "epoch": 1.815102656304373, "grad_norm": 12.724066637808379, "learning_rate": 2e-06, "loss": 0.1808, "step": 7824 }, { "epoch": 1.8153346479526737, "grad_norm": 14.024835862214248, "learning_rate": 2e-06, "loss": 0.2691, "step": 7825 }, { "epoch": 1.8155666396009744, "grad_norm": 13.519710827991254, "learning_rate": 2e-06, "loss": 0.1943, "step": 7826 }, { "epoch": 1.815798631249275, "grad_norm": 14.843148579657452, "learning_rate": 2e-06, "loss": 0.2213, "step": 7827 }, { "epoch": 1.8160306228975758, "grad_norm": 9.460839761780512, "learning_rate": 2e-06, "loss": 0.1931, "step": 7828 }, { "epoch": 1.8162626145458765, "grad_norm": 38.541938313911785, "learning_rate": 2e-06, "loss": 0.3944, "step": 7829 }, { "epoch": 1.8164946061941771, "grad_norm": 13.17265386137172, "learning_rate": 2e-06, "loss": 0.2638, "step": 7830 }, { "epoch": 1.8167265978424778, "grad_norm": 12.84058855474656, "learning_rate": 2e-06, "loss": 0.2178, "step": 7831 }, { "epoch": 1.8169585894907785, "grad_norm": 10.788615152278911, "learning_rate": 2e-06, "loss": 0.221, "step": 7832 }, { "epoch": 1.817190581139079, "grad_norm": 19.497970514954385, "learning_rate": 2e-06, "loss": 0.1933, "step": 7833 }, { "epoch": 1.8174225727873796, "grad_norm": 11.516461157637663, "learning_rate": 2e-06, "loss": 0.1594, "step": 7834 }, { "epoch": 1.8176545644356803, "grad_norm": 13.070370027025735, "learning_rate": 2e-06, "loss": 0.1997, "step": 7835 }, { "epoch": 1.817886556083981, "grad_norm": 11.093947259075945, "learning_rate": 2e-06, "loss": 0.2507, "step": 7836 }, { "epoch": 1.8181185477322817, "grad_norm": 10.952601265349044, "learning_rate": 2e-06, "loss": 0.2291, "step": 7837 }, { "epoch": 1.8183505393805823, "grad_norm": 12.583982455505959, "learning_rate": 2e-06, "loss": 0.2542, "step": 7838 }, { "epoch": 1.818582531028883, "grad_norm": 10.412530305980933, "learning_rate": 2e-06, "loss": 0.218, "step": 7839 }, { "epoch": 1.8188145226771835, "grad_norm": 20.58663323966539, "learning_rate": 2e-06, "loss": 0.3298, "step": 7840 }, { "epoch": 1.8190465143254841, "grad_norm": 16.069589171499164, "learning_rate": 2e-06, "loss": 0.2718, "step": 7841 }, { "epoch": 1.8192785059737848, "grad_norm": 15.718424709122079, "learning_rate": 2e-06, "loss": 0.2185, "step": 7842 }, { "epoch": 1.8195104976220855, "grad_norm": 13.114479648383073, "learning_rate": 2e-06, "loss": 0.2185, "step": 7843 }, { "epoch": 1.8197424892703862, "grad_norm": 11.817145160415038, "learning_rate": 2e-06, "loss": 0.2357, "step": 7844 }, { "epoch": 1.8199744809186869, "grad_norm": 8.646679220264772, "learning_rate": 2e-06, "loss": 0.1634, "step": 7845 }, { "epoch": 1.8202064725669875, "grad_norm": 12.234848247512957, "learning_rate": 2e-06, "loss": 0.1666, "step": 7846 }, { "epoch": 1.8204384642152882, "grad_norm": 22.072929455242598, "learning_rate": 2e-06, "loss": 0.4095, "step": 7847 }, { "epoch": 1.8206704558635889, "grad_norm": 10.690070905760146, "learning_rate": 2e-06, "loss": 0.1838, "step": 7848 }, { "epoch": 1.8209024475118896, "grad_norm": 11.534125053246434, "learning_rate": 2e-06, "loss": 0.2829, "step": 7849 }, { "epoch": 1.8211344391601902, "grad_norm": 7.3850766019875955, "learning_rate": 2e-06, "loss": 0.1526, "step": 7850 }, { "epoch": 1.821366430808491, "grad_norm": 10.193700861814932, "learning_rate": 2e-06, "loss": 0.1689, "step": 7851 }, { "epoch": 1.8215984224567916, "grad_norm": 7.246342613215927, "learning_rate": 2e-06, "loss": 0.1663, "step": 7852 }, { "epoch": 1.8218304141050923, "grad_norm": 10.911410337288949, "learning_rate": 2e-06, "loss": 0.2319, "step": 7853 }, { "epoch": 1.822062405753393, "grad_norm": 28.141488972018617, "learning_rate": 2e-06, "loss": 0.281, "step": 7854 }, { "epoch": 1.8222943974016936, "grad_norm": 15.119773914996467, "learning_rate": 2e-06, "loss": 0.2547, "step": 7855 }, { "epoch": 1.8225263890499943, "grad_norm": 8.544803647608756, "learning_rate": 2e-06, "loss": 0.1434, "step": 7856 }, { "epoch": 1.822758380698295, "grad_norm": 13.881430004783747, "learning_rate": 2e-06, "loss": 0.3103, "step": 7857 }, { "epoch": 1.8229903723465957, "grad_norm": 20.911031864699904, "learning_rate": 2e-06, "loss": 0.3389, "step": 7858 }, { "epoch": 1.8232223639948963, "grad_norm": 18.0973394831566, "learning_rate": 2e-06, "loss": 0.2147, "step": 7859 }, { "epoch": 1.8234543556431968, "grad_norm": 8.356805904776557, "learning_rate": 2e-06, "loss": 0.1853, "step": 7860 }, { "epoch": 1.8236863472914975, "grad_norm": 9.609173826516205, "learning_rate": 2e-06, "loss": 0.1511, "step": 7861 }, { "epoch": 1.8239183389397982, "grad_norm": 14.17171157061349, "learning_rate": 2e-06, "loss": 0.3058, "step": 7862 }, { "epoch": 1.8241503305880988, "grad_norm": 8.80752026693199, "learning_rate": 2e-06, "loss": 0.2101, "step": 7863 }, { "epoch": 1.8243823222363995, "grad_norm": 7.813485615307996, "learning_rate": 2e-06, "loss": 0.2748, "step": 7864 }, { "epoch": 1.8246143138847002, "grad_norm": 9.32680535990223, "learning_rate": 2e-06, "loss": 0.2522, "step": 7865 }, { "epoch": 1.8248463055330006, "grad_norm": 9.896185768039878, "learning_rate": 2e-06, "loss": 0.1993, "step": 7866 }, { "epoch": 1.8250782971813013, "grad_norm": 16.489932650037563, "learning_rate": 2e-06, "loss": 0.4062, "step": 7867 }, { "epoch": 1.825310288829602, "grad_norm": 12.21555760793375, "learning_rate": 2e-06, "loss": 0.2114, "step": 7868 }, { "epoch": 1.8255422804779027, "grad_norm": 10.668708934887045, "learning_rate": 2e-06, "loss": 0.2042, "step": 7869 }, { "epoch": 1.8257742721262034, "grad_norm": 19.506231923470228, "learning_rate": 2e-06, "loss": 0.2417, "step": 7870 }, { "epoch": 1.826006263774504, "grad_norm": 15.243980399678323, "learning_rate": 2e-06, "loss": 0.3222, "step": 7871 }, { "epoch": 1.8262382554228047, "grad_norm": 8.266212958892414, "learning_rate": 2e-06, "loss": 0.1827, "step": 7872 }, { "epoch": 1.8264702470711054, "grad_norm": 9.228484859678517, "learning_rate": 2e-06, "loss": 0.1577, "step": 7873 }, { "epoch": 1.826702238719406, "grad_norm": 15.119166628188635, "learning_rate": 2e-06, "loss": 0.1612, "step": 7874 }, { "epoch": 1.8269342303677067, "grad_norm": 7.614144747643755, "learning_rate": 2e-06, "loss": 0.1429, "step": 7875 }, { "epoch": 1.8271662220160074, "grad_norm": 13.10715413771453, "learning_rate": 2e-06, "loss": 0.2377, "step": 7876 }, { "epoch": 1.827398213664308, "grad_norm": 17.644967445732938, "learning_rate": 2e-06, "loss": 0.2515, "step": 7877 }, { "epoch": 1.8276302053126088, "grad_norm": 19.092950755612446, "learning_rate": 2e-06, "loss": 0.3464, "step": 7878 }, { "epoch": 1.8278621969609095, "grad_norm": 12.387344155425442, "learning_rate": 2e-06, "loss": 0.3138, "step": 7879 }, { "epoch": 1.8280941886092101, "grad_norm": 14.011979558604576, "learning_rate": 2e-06, "loss": 0.2179, "step": 7880 }, { "epoch": 1.8283261802575108, "grad_norm": 15.390569673859822, "learning_rate": 2e-06, "loss": 0.2641, "step": 7881 }, { "epoch": 1.8285581719058115, "grad_norm": 14.684780900009232, "learning_rate": 2e-06, "loss": 0.1935, "step": 7882 }, { "epoch": 1.8287901635541122, "grad_norm": 11.430777807918494, "learning_rate": 2e-06, "loss": 0.18, "step": 7883 }, { "epoch": 1.8290221552024128, "grad_norm": 7.791418141941399, "learning_rate": 2e-06, "loss": 0.1834, "step": 7884 }, { "epoch": 1.8292541468507135, "grad_norm": 8.64636567131907, "learning_rate": 2e-06, "loss": 0.1954, "step": 7885 }, { "epoch": 1.829486138499014, "grad_norm": 15.346495917999354, "learning_rate": 2e-06, "loss": 0.2054, "step": 7886 }, { "epoch": 1.8297181301473147, "grad_norm": 12.296776262277993, "learning_rate": 2e-06, "loss": 0.1336, "step": 7887 }, { "epoch": 1.8299501217956153, "grad_norm": 19.494222119023746, "learning_rate": 2e-06, "loss": 0.2837, "step": 7888 }, { "epoch": 1.830182113443916, "grad_norm": 13.25582425439502, "learning_rate": 2e-06, "loss": 0.3047, "step": 7889 }, { "epoch": 1.8304141050922167, "grad_norm": 9.671077020296584, "learning_rate": 2e-06, "loss": 0.1777, "step": 7890 }, { "epoch": 1.8306460967405174, "grad_norm": 10.511830121222385, "learning_rate": 2e-06, "loss": 0.1911, "step": 7891 }, { "epoch": 1.830878088388818, "grad_norm": 12.49834553629901, "learning_rate": 2e-06, "loss": 0.2534, "step": 7892 }, { "epoch": 1.8311100800371185, "grad_norm": 17.583887619416416, "learning_rate": 2e-06, "loss": 0.2884, "step": 7893 }, { "epoch": 1.8313420716854192, "grad_norm": 13.841502274963958, "learning_rate": 2e-06, "loss": 0.1891, "step": 7894 }, { "epoch": 1.8315740633337199, "grad_norm": 12.67870442340447, "learning_rate": 2e-06, "loss": 0.2628, "step": 7895 }, { "epoch": 1.8318060549820205, "grad_norm": 16.384562089918386, "learning_rate": 2e-06, "loss": 0.2482, "step": 7896 }, { "epoch": 1.8320380466303212, "grad_norm": 16.046864886905126, "learning_rate": 2e-06, "loss": 0.2183, "step": 7897 }, { "epoch": 1.832270038278622, "grad_norm": 11.993529864813677, "learning_rate": 2e-06, "loss": 0.2744, "step": 7898 }, { "epoch": 1.8325020299269226, "grad_norm": 16.457508380663167, "learning_rate": 2e-06, "loss": 0.3141, "step": 7899 }, { "epoch": 1.8327340215752232, "grad_norm": 11.896321643614053, "learning_rate": 2e-06, "loss": 0.1972, "step": 7900 }, { "epoch": 1.832966013223524, "grad_norm": 10.634033931621662, "learning_rate": 2e-06, "loss": 0.2399, "step": 7901 }, { "epoch": 1.8331980048718246, "grad_norm": 17.425657647560143, "learning_rate": 2e-06, "loss": 0.305, "step": 7902 }, { "epoch": 1.8334299965201253, "grad_norm": 7.202441697727795, "learning_rate": 2e-06, "loss": 0.1521, "step": 7903 }, { "epoch": 1.833661988168426, "grad_norm": 17.19205030465811, "learning_rate": 2e-06, "loss": 0.2845, "step": 7904 }, { "epoch": 1.8338939798167266, "grad_norm": 19.90010493299395, "learning_rate": 2e-06, "loss": 0.2641, "step": 7905 }, { "epoch": 1.8341259714650273, "grad_norm": 19.52854387682745, "learning_rate": 2e-06, "loss": 0.3048, "step": 7906 }, { "epoch": 1.834357963113328, "grad_norm": 10.902417230263483, "learning_rate": 2e-06, "loss": 0.2184, "step": 7907 }, { "epoch": 1.8345899547616287, "grad_norm": 17.362599545004098, "learning_rate": 2e-06, "loss": 0.2387, "step": 7908 }, { "epoch": 1.8348219464099293, "grad_norm": 16.18254568800934, "learning_rate": 2e-06, "loss": 0.2198, "step": 7909 }, { "epoch": 1.83505393805823, "grad_norm": 27.008496402497844, "learning_rate": 2e-06, "loss": 0.5212, "step": 7910 }, { "epoch": 1.8352859297065307, "grad_norm": 23.159668358951915, "learning_rate": 2e-06, "loss": 0.3255, "step": 7911 }, { "epoch": 1.8355179213548314, "grad_norm": 21.345873761286768, "learning_rate": 2e-06, "loss": 0.2788, "step": 7912 }, { "epoch": 1.8357499130031318, "grad_norm": 16.364693029156975, "learning_rate": 2e-06, "loss": 0.2494, "step": 7913 }, { "epoch": 1.8359819046514325, "grad_norm": 21.265562874143434, "learning_rate": 2e-06, "loss": 0.2334, "step": 7914 }, { "epoch": 1.8362138962997332, "grad_norm": 8.42226495039441, "learning_rate": 2e-06, "loss": 0.2178, "step": 7915 }, { "epoch": 1.8364458879480339, "grad_norm": 14.120536069491699, "learning_rate": 2e-06, "loss": 0.2106, "step": 7916 }, { "epoch": 1.8366778795963346, "grad_norm": 16.753426930658062, "learning_rate": 2e-06, "loss": 0.2631, "step": 7917 }, { "epoch": 1.8369098712446352, "grad_norm": 9.596391419187736, "learning_rate": 2e-06, "loss": 0.1716, "step": 7918 }, { "epoch": 1.837141862892936, "grad_norm": 14.833436969950283, "learning_rate": 2e-06, "loss": 0.2554, "step": 7919 }, { "epoch": 1.8373738545412364, "grad_norm": 6.294776122050837, "learning_rate": 2e-06, "loss": 0.1314, "step": 7920 }, { "epoch": 1.837605846189537, "grad_norm": 8.58136538816046, "learning_rate": 2e-06, "loss": 0.1775, "step": 7921 }, { "epoch": 1.8378378378378377, "grad_norm": 6.839034671602942, "learning_rate": 2e-06, "loss": 0.147, "step": 7922 }, { "epoch": 1.8380698294861384, "grad_norm": 11.75226477118561, "learning_rate": 2e-06, "loss": 0.2727, "step": 7923 }, { "epoch": 1.838301821134439, "grad_norm": 16.951382202621378, "learning_rate": 2e-06, "loss": 0.387, "step": 7924 }, { "epoch": 1.8385338127827398, "grad_norm": 9.669318688045863, "learning_rate": 2e-06, "loss": 0.2122, "step": 7925 }, { "epoch": 1.8387658044310404, "grad_norm": 10.658857823286702, "learning_rate": 2e-06, "loss": 0.1693, "step": 7926 }, { "epoch": 1.838997796079341, "grad_norm": 11.164269201204279, "learning_rate": 2e-06, "loss": 0.2675, "step": 7927 }, { "epoch": 1.8392297877276418, "grad_norm": 11.292803059157091, "learning_rate": 2e-06, "loss": 0.3351, "step": 7928 }, { "epoch": 1.8394617793759425, "grad_norm": 13.662105944550406, "learning_rate": 2e-06, "loss": 0.2295, "step": 7929 }, { "epoch": 1.8396937710242431, "grad_norm": 9.984871014211448, "learning_rate": 2e-06, "loss": 0.1737, "step": 7930 }, { "epoch": 1.8399257626725438, "grad_norm": 15.528006175833218, "learning_rate": 2e-06, "loss": 0.2634, "step": 7931 }, { "epoch": 1.8401577543208445, "grad_norm": 11.802930981454681, "learning_rate": 2e-06, "loss": 0.3148, "step": 7932 }, { "epoch": 1.8403897459691452, "grad_norm": 25.823960323458206, "learning_rate": 2e-06, "loss": 0.3875, "step": 7933 }, { "epoch": 1.8406217376174459, "grad_norm": 5.4731564900955645, "learning_rate": 2e-06, "loss": 0.1652, "step": 7934 }, { "epoch": 1.8408537292657465, "grad_norm": 12.465598680342927, "learning_rate": 2e-06, "loss": 0.2969, "step": 7935 }, { "epoch": 1.8410857209140472, "grad_norm": 10.890761811575437, "learning_rate": 2e-06, "loss": 0.1851, "step": 7936 }, { "epoch": 1.8413177125623479, "grad_norm": 11.075531771245522, "learning_rate": 2e-06, "loss": 0.19, "step": 7937 }, { "epoch": 1.8415497042106486, "grad_norm": 10.76191718337455, "learning_rate": 2e-06, "loss": 0.2794, "step": 7938 }, { "epoch": 1.8417816958589492, "grad_norm": 13.447884654447204, "learning_rate": 2e-06, "loss": 0.2633, "step": 7939 }, { "epoch": 1.8420136875072497, "grad_norm": 8.35862362187622, "learning_rate": 2e-06, "loss": 0.2139, "step": 7940 }, { "epoch": 1.8422456791555504, "grad_norm": 11.615926552765899, "learning_rate": 2e-06, "loss": 0.1836, "step": 7941 }, { "epoch": 1.842477670803851, "grad_norm": 16.797906163493213, "learning_rate": 2e-06, "loss": 0.2207, "step": 7942 }, { "epoch": 1.8427096624521517, "grad_norm": 11.526737998281494, "learning_rate": 2e-06, "loss": 0.2684, "step": 7943 }, { "epoch": 1.8429416541004524, "grad_norm": 14.813675655160232, "learning_rate": 2e-06, "loss": 0.2797, "step": 7944 }, { "epoch": 1.843173645748753, "grad_norm": 16.873756448801746, "learning_rate": 2e-06, "loss": 0.1525, "step": 7945 }, { "epoch": 1.8434056373970535, "grad_norm": 8.490637239098254, "learning_rate": 2e-06, "loss": 0.1979, "step": 7946 }, { "epoch": 1.8436376290453542, "grad_norm": 8.188933475422282, "learning_rate": 2e-06, "loss": 0.1951, "step": 7947 }, { "epoch": 1.843869620693655, "grad_norm": 12.989538195546992, "learning_rate": 2e-06, "loss": 0.2963, "step": 7948 }, { "epoch": 1.8441016123419556, "grad_norm": 25.921803203614267, "learning_rate": 2e-06, "loss": 0.2511, "step": 7949 }, { "epoch": 1.8443336039902563, "grad_norm": 13.38270787112594, "learning_rate": 2e-06, "loss": 0.278, "step": 7950 }, { "epoch": 1.844565595638557, "grad_norm": 15.901728624066433, "learning_rate": 2e-06, "loss": 0.2949, "step": 7951 }, { "epoch": 1.8447975872868576, "grad_norm": 30.953565323139976, "learning_rate": 2e-06, "loss": 0.3273, "step": 7952 }, { "epoch": 1.8450295789351583, "grad_norm": 12.810216707183498, "learning_rate": 2e-06, "loss": 0.1859, "step": 7953 }, { "epoch": 1.845261570583459, "grad_norm": 11.592796853621131, "learning_rate": 2e-06, "loss": 0.1906, "step": 7954 }, { "epoch": 1.8454935622317596, "grad_norm": 8.935925628117879, "learning_rate": 2e-06, "loss": 0.2226, "step": 7955 }, { "epoch": 1.8457255538800603, "grad_norm": 9.500849218443497, "learning_rate": 2e-06, "loss": 0.215, "step": 7956 }, { "epoch": 1.845957545528361, "grad_norm": 10.944130618833324, "learning_rate": 2e-06, "loss": 0.2248, "step": 7957 }, { "epoch": 1.8461895371766617, "grad_norm": 9.18069076395506, "learning_rate": 2e-06, "loss": 0.1856, "step": 7958 }, { "epoch": 1.8464215288249624, "grad_norm": 9.685371708277902, "learning_rate": 2e-06, "loss": 0.1926, "step": 7959 }, { "epoch": 1.846653520473263, "grad_norm": 26.997531508708946, "learning_rate": 2e-06, "loss": 0.3127, "step": 7960 }, { "epoch": 1.8468855121215637, "grad_norm": 12.585809388667009, "learning_rate": 2e-06, "loss": 0.1861, "step": 7961 }, { "epoch": 1.8471175037698644, "grad_norm": 11.3527907629372, "learning_rate": 2e-06, "loss": 0.2552, "step": 7962 }, { "epoch": 1.847349495418165, "grad_norm": 23.095925633732367, "learning_rate": 2e-06, "loss": 0.3313, "step": 7963 }, { "epoch": 1.8475814870664657, "grad_norm": 13.084226012680856, "learning_rate": 2e-06, "loss": 0.2949, "step": 7964 }, { "epoch": 1.8478134787147664, "grad_norm": 18.085654292620408, "learning_rate": 2e-06, "loss": 0.3053, "step": 7965 }, { "epoch": 1.8480454703630669, "grad_norm": 14.712791411772923, "learning_rate": 2e-06, "loss": 0.3434, "step": 7966 }, { "epoch": 1.8482774620113676, "grad_norm": 8.334839560631009, "learning_rate": 2e-06, "loss": 0.1873, "step": 7967 }, { "epoch": 1.8485094536596682, "grad_norm": 10.518803209871267, "learning_rate": 2e-06, "loss": 0.3561, "step": 7968 }, { "epoch": 1.848741445307969, "grad_norm": 22.56750349666928, "learning_rate": 2e-06, "loss": 0.286, "step": 7969 }, { "epoch": 1.8489734369562696, "grad_norm": 18.491907036686882, "learning_rate": 2e-06, "loss": 0.321, "step": 7970 }, { "epoch": 1.8492054286045703, "grad_norm": 10.100982824992236, "learning_rate": 2e-06, "loss": 0.1683, "step": 7971 }, { "epoch": 1.849437420252871, "grad_norm": 15.074645252715337, "learning_rate": 2e-06, "loss": 0.2232, "step": 7972 }, { "epoch": 1.8496694119011714, "grad_norm": 14.046821339662545, "learning_rate": 2e-06, "loss": 0.2664, "step": 7973 }, { "epoch": 1.849901403549472, "grad_norm": 13.59824054448896, "learning_rate": 2e-06, "loss": 0.3291, "step": 7974 }, { "epoch": 1.8501333951977728, "grad_norm": 13.790300671692236, "learning_rate": 2e-06, "loss": 0.2642, "step": 7975 }, { "epoch": 1.8503653868460734, "grad_norm": 10.412428158867494, "learning_rate": 2e-06, "loss": 0.1941, "step": 7976 }, { "epoch": 1.8505973784943741, "grad_norm": 9.241196274154442, "learning_rate": 2e-06, "loss": 0.2431, "step": 7977 }, { "epoch": 1.8508293701426748, "grad_norm": 18.006977742518487, "learning_rate": 2e-06, "loss": 0.3314, "step": 7978 }, { "epoch": 1.8510613617909755, "grad_norm": 8.735578702425071, "learning_rate": 2e-06, "loss": 0.1915, "step": 7979 }, { "epoch": 1.8512933534392761, "grad_norm": 11.066825745768144, "learning_rate": 2e-06, "loss": 0.3201, "step": 7980 }, { "epoch": 1.8515253450875768, "grad_norm": 12.934799799849172, "learning_rate": 2e-06, "loss": 0.2668, "step": 7981 }, { "epoch": 1.8517573367358775, "grad_norm": 7.652593730052824, "learning_rate": 2e-06, "loss": 0.2211, "step": 7982 }, { "epoch": 1.8519893283841782, "grad_norm": 17.360645095568245, "learning_rate": 2e-06, "loss": 0.2471, "step": 7983 }, { "epoch": 1.8522213200324789, "grad_norm": 9.342320938908026, "learning_rate": 2e-06, "loss": 0.2092, "step": 7984 }, { "epoch": 1.8524533116807795, "grad_norm": 28.26538822948439, "learning_rate": 2e-06, "loss": 0.3829, "step": 7985 }, { "epoch": 1.8526853033290802, "grad_norm": 12.575102199523327, "learning_rate": 2e-06, "loss": 0.2662, "step": 7986 }, { "epoch": 1.852917294977381, "grad_norm": 12.649445490250741, "learning_rate": 2e-06, "loss": 0.2228, "step": 7987 }, { "epoch": 1.8531492866256816, "grad_norm": 10.65686234012664, "learning_rate": 2e-06, "loss": 0.2862, "step": 7988 }, { "epoch": 1.8533812782739822, "grad_norm": 13.377746222563427, "learning_rate": 2e-06, "loss": 0.2825, "step": 7989 }, { "epoch": 1.853613269922283, "grad_norm": 8.499970384087424, "learning_rate": 2e-06, "loss": 0.2493, "step": 7990 }, { "epoch": 1.8538452615705836, "grad_norm": 8.641111636956055, "learning_rate": 2e-06, "loss": 0.1472, "step": 7991 }, { "epoch": 1.8540772532188843, "grad_norm": 18.614890935703087, "learning_rate": 2e-06, "loss": 0.2897, "step": 7992 }, { "epoch": 1.8543092448671847, "grad_norm": 9.830085780138903, "learning_rate": 2e-06, "loss": 0.2636, "step": 7993 }, { "epoch": 1.8545412365154854, "grad_norm": 7.283981619222619, "learning_rate": 2e-06, "loss": 0.2193, "step": 7994 }, { "epoch": 1.854773228163786, "grad_norm": 16.965974276924957, "learning_rate": 2e-06, "loss": 0.2347, "step": 7995 }, { "epoch": 1.8550052198120868, "grad_norm": 11.141632499056673, "learning_rate": 2e-06, "loss": 0.2715, "step": 7996 }, { "epoch": 1.8552372114603874, "grad_norm": 13.656532823530375, "learning_rate": 2e-06, "loss": 0.2832, "step": 7997 }, { "epoch": 1.8554692031086881, "grad_norm": 8.84745272325778, "learning_rate": 2e-06, "loss": 0.2077, "step": 7998 }, { "epoch": 1.8557011947569888, "grad_norm": 9.806319010310668, "learning_rate": 2e-06, "loss": 0.2299, "step": 7999 }, { "epoch": 1.8559331864052893, "grad_norm": 8.63698914343859, "learning_rate": 2e-06, "loss": 0.2444, "step": 8000 }, { "epoch": 1.85616517805359, "grad_norm": 19.746684480722426, "learning_rate": 2e-06, "loss": 0.3618, "step": 8001 }, { "epoch": 1.8563971697018906, "grad_norm": 10.050691436850114, "learning_rate": 2e-06, "loss": 0.2331, "step": 8002 }, { "epoch": 1.8566291613501913, "grad_norm": 23.133285568263815, "learning_rate": 2e-06, "loss": 0.2847, "step": 8003 }, { "epoch": 1.856861152998492, "grad_norm": 9.853651040793197, "learning_rate": 2e-06, "loss": 0.1745, "step": 8004 }, { "epoch": 1.8570931446467926, "grad_norm": 6.07637328157186, "learning_rate": 2e-06, "loss": 0.1538, "step": 8005 }, { "epoch": 1.8573251362950933, "grad_norm": 10.665413106483905, "learning_rate": 2e-06, "loss": 0.303, "step": 8006 }, { "epoch": 1.857557127943394, "grad_norm": 5.973799154259255, "learning_rate": 2e-06, "loss": 0.1648, "step": 8007 }, { "epoch": 1.8577891195916947, "grad_norm": 11.260741284363519, "learning_rate": 2e-06, "loss": 0.2539, "step": 8008 }, { "epoch": 1.8580211112399954, "grad_norm": 12.612140361854662, "learning_rate": 2e-06, "loss": 0.1837, "step": 8009 }, { "epoch": 1.858253102888296, "grad_norm": 6.64501874593489, "learning_rate": 2e-06, "loss": 0.127, "step": 8010 }, { "epoch": 1.8584850945365967, "grad_norm": 12.641913011016891, "learning_rate": 2e-06, "loss": 0.2472, "step": 8011 }, { "epoch": 1.8587170861848974, "grad_norm": 16.01004189377708, "learning_rate": 2e-06, "loss": 0.2818, "step": 8012 }, { "epoch": 1.858949077833198, "grad_norm": 17.650523284827663, "learning_rate": 2e-06, "loss": 0.3115, "step": 8013 }, { "epoch": 1.8591810694814987, "grad_norm": 67.9849712980046, "learning_rate": 2e-06, "loss": 0.2104, "step": 8014 }, { "epoch": 1.8594130611297994, "grad_norm": 18.553952981423404, "learning_rate": 2e-06, "loss": 0.3201, "step": 8015 }, { "epoch": 1.8596450527781, "grad_norm": 19.8359578410062, "learning_rate": 2e-06, "loss": 0.2287, "step": 8016 }, { "epoch": 1.8598770444264008, "grad_norm": 13.506166327338736, "learning_rate": 2e-06, "loss": 0.2029, "step": 8017 }, { "epoch": 1.8601090360747015, "grad_norm": 13.173351169007882, "learning_rate": 2e-06, "loss": 0.1775, "step": 8018 }, { "epoch": 1.860341027723002, "grad_norm": 16.9773971290991, "learning_rate": 2e-06, "loss": 0.2437, "step": 8019 }, { "epoch": 1.8605730193713026, "grad_norm": 11.166665045313033, "learning_rate": 2e-06, "loss": 0.2486, "step": 8020 }, { "epoch": 1.8608050110196033, "grad_norm": 17.771633832862648, "learning_rate": 2e-06, "loss": 0.3317, "step": 8021 }, { "epoch": 1.861037002667904, "grad_norm": 13.650262107790912, "learning_rate": 2e-06, "loss": 0.2381, "step": 8022 }, { "epoch": 1.8612689943162046, "grad_norm": 14.93861953518762, "learning_rate": 2e-06, "loss": 0.2917, "step": 8023 }, { "epoch": 1.8615009859645053, "grad_norm": 50.26662400292754, "learning_rate": 2e-06, "loss": 0.1945, "step": 8024 }, { "epoch": 1.861732977612806, "grad_norm": 11.603664768722052, "learning_rate": 2e-06, "loss": 0.3147, "step": 8025 }, { "epoch": 1.8619649692611064, "grad_norm": 12.386479170427224, "learning_rate": 2e-06, "loss": 0.201, "step": 8026 }, { "epoch": 1.8621969609094071, "grad_norm": 20.00899319309306, "learning_rate": 2e-06, "loss": 0.2685, "step": 8027 }, { "epoch": 1.8624289525577078, "grad_norm": 14.307786611545218, "learning_rate": 2e-06, "loss": 0.3113, "step": 8028 }, { "epoch": 1.8626609442060085, "grad_norm": 10.419544932315354, "learning_rate": 2e-06, "loss": 0.2153, "step": 8029 }, { "epoch": 1.8628929358543092, "grad_norm": 14.944128238574825, "learning_rate": 2e-06, "loss": 0.2304, "step": 8030 }, { "epoch": 1.8631249275026098, "grad_norm": 14.828493347874334, "learning_rate": 2e-06, "loss": 0.2339, "step": 8031 }, { "epoch": 1.8633569191509105, "grad_norm": 8.43164667293264, "learning_rate": 2e-06, "loss": 0.1578, "step": 8032 }, { "epoch": 1.8635889107992112, "grad_norm": 8.225180499158036, "learning_rate": 2e-06, "loss": 0.2174, "step": 8033 }, { "epoch": 1.8638209024475119, "grad_norm": 8.714004111926837, "learning_rate": 2e-06, "loss": 0.1848, "step": 8034 }, { "epoch": 1.8640528940958125, "grad_norm": 7.209489143883522, "learning_rate": 2e-06, "loss": 0.1175, "step": 8035 }, { "epoch": 1.8642848857441132, "grad_norm": 19.47233075136568, "learning_rate": 2e-06, "loss": 0.3315, "step": 8036 }, { "epoch": 1.864516877392414, "grad_norm": 11.761565148017262, "learning_rate": 2e-06, "loss": 0.2358, "step": 8037 }, { "epoch": 1.8647488690407146, "grad_norm": 22.946883432516643, "learning_rate": 2e-06, "loss": 0.2992, "step": 8038 }, { "epoch": 1.8649808606890153, "grad_norm": 10.099079023837012, "learning_rate": 2e-06, "loss": 0.2294, "step": 8039 }, { "epoch": 1.865212852337316, "grad_norm": 11.982340199964142, "learning_rate": 2e-06, "loss": 0.225, "step": 8040 }, { "epoch": 1.8654448439856166, "grad_norm": 7.414469724974377, "learning_rate": 2e-06, "loss": 0.1843, "step": 8041 }, { "epoch": 1.8656768356339173, "grad_norm": 14.110442842535285, "learning_rate": 2e-06, "loss": 0.3108, "step": 8042 }, { "epoch": 1.865908827282218, "grad_norm": 8.306951984448888, "learning_rate": 2e-06, "loss": 0.222, "step": 8043 }, { "epoch": 1.8661408189305186, "grad_norm": 8.020722441848871, "learning_rate": 2e-06, "loss": 0.2296, "step": 8044 }, { "epoch": 1.8663728105788193, "grad_norm": 14.329130788383804, "learning_rate": 2e-06, "loss": 0.3023, "step": 8045 }, { "epoch": 1.8666048022271198, "grad_norm": 12.243991467568708, "learning_rate": 2e-06, "loss": 0.2126, "step": 8046 }, { "epoch": 1.8668367938754205, "grad_norm": 11.49317180930521, "learning_rate": 2e-06, "loss": 0.1405, "step": 8047 }, { "epoch": 1.8670687855237211, "grad_norm": 12.517361716203531, "learning_rate": 2e-06, "loss": 0.2399, "step": 8048 }, { "epoch": 1.8673007771720218, "grad_norm": 14.413158198703666, "learning_rate": 2e-06, "loss": 0.2407, "step": 8049 }, { "epoch": 1.8675327688203225, "grad_norm": 20.62454734785914, "learning_rate": 2e-06, "loss": 0.224, "step": 8050 }, { "epoch": 1.8677647604686232, "grad_norm": 11.221516745189199, "learning_rate": 2e-06, "loss": 0.2663, "step": 8051 }, { "epoch": 1.8679967521169238, "grad_norm": 14.150459004169447, "learning_rate": 2e-06, "loss": 0.2292, "step": 8052 }, { "epoch": 1.8682287437652243, "grad_norm": 16.522724664226192, "learning_rate": 2e-06, "loss": 0.2884, "step": 8053 }, { "epoch": 1.868460735413525, "grad_norm": 11.491441717510634, "learning_rate": 2e-06, "loss": 0.2876, "step": 8054 }, { "epoch": 1.8686927270618257, "grad_norm": 19.27225227412006, "learning_rate": 2e-06, "loss": 0.2874, "step": 8055 }, { "epoch": 1.8689247187101263, "grad_norm": 7.696650394731279, "learning_rate": 2e-06, "loss": 0.1884, "step": 8056 }, { "epoch": 1.869156710358427, "grad_norm": 13.935186097930975, "learning_rate": 2e-06, "loss": 0.2719, "step": 8057 }, { "epoch": 1.8693887020067277, "grad_norm": 7.901974875172806, "learning_rate": 2e-06, "loss": 0.2239, "step": 8058 }, { "epoch": 1.8696206936550284, "grad_norm": 14.279966452089885, "learning_rate": 2e-06, "loss": 0.309, "step": 8059 }, { "epoch": 1.869852685303329, "grad_norm": 21.199725575895616, "learning_rate": 2e-06, "loss": 0.3386, "step": 8060 }, { "epoch": 1.8700846769516297, "grad_norm": 9.775247696139246, "learning_rate": 2e-06, "loss": 0.2333, "step": 8061 }, { "epoch": 1.8703166685999304, "grad_norm": 17.735314243096184, "learning_rate": 2e-06, "loss": 0.274, "step": 8062 }, { "epoch": 1.870548660248231, "grad_norm": 9.118166555172795, "learning_rate": 2e-06, "loss": 0.2709, "step": 8063 }, { "epoch": 1.8707806518965318, "grad_norm": 10.560664539208704, "learning_rate": 2e-06, "loss": 0.1477, "step": 8064 }, { "epoch": 1.8710126435448324, "grad_norm": 21.11132835177889, "learning_rate": 2e-06, "loss": 0.2022, "step": 8065 }, { "epoch": 1.871244635193133, "grad_norm": 12.965268707465519, "learning_rate": 2e-06, "loss": 0.2166, "step": 8066 }, { "epoch": 1.8714766268414338, "grad_norm": 16.22190776075008, "learning_rate": 2e-06, "loss": 0.3488, "step": 8067 }, { "epoch": 1.8717086184897345, "grad_norm": 18.535484259025015, "learning_rate": 2e-06, "loss": 0.2677, "step": 8068 }, { "epoch": 1.8719406101380351, "grad_norm": 12.973138242724986, "learning_rate": 2e-06, "loss": 0.2302, "step": 8069 }, { "epoch": 1.8721726017863358, "grad_norm": 17.321787407067543, "learning_rate": 2e-06, "loss": 0.424, "step": 8070 }, { "epoch": 1.8724045934346365, "grad_norm": 8.641817252315901, "learning_rate": 2e-06, "loss": 0.2172, "step": 8071 }, { "epoch": 1.8726365850829372, "grad_norm": 13.667276124603173, "learning_rate": 2e-06, "loss": 0.1638, "step": 8072 }, { "epoch": 1.8728685767312376, "grad_norm": 12.34392074525113, "learning_rate": 2e-06, "loss": 0.1892, "step": 8073 }, { "epoch": 1.8731005683795383, "grad_norm": 13.175216729247062, "learning_rate": 2e-06, "loss": 0.2071, "step": 8074 }, { "epoch": 1.873332560027839, "grad_norm": 10.656296949914095, "learning_rate": 2e-06, "loss": 0.2188, "step": 8075 }, { "epoch": 1.8735645516761397, "grad_norm": 21.55205836915179, "learning_rate": 2e-06, "loss": 0.2832, "step": 8076 }, { "epoch": 1.8737965433244403, "grad_norm": 14.915656126608042, "learning_rate": 2e-06, "loss": 0.2965, "step": 8077 }, { "epoch": 1.874028534972741, "grad_norm": 15.797128126647014, "learning_rate": 2e-06, "loss": 0.1939, "step": 8078 }, { "epoch": 1.8742605266210415, "grad_norm": 15.63902295953352, "learning_rate": 2e-06, "loss": 0.2841, "step": 8079 }, { "epoch": 1.8744925182693422, "grad_norm": 7.098117949851355, "learning_rate": 2e-06, "loss": 0.1646, "step": 8080 }, { "epoch": 1.8747245099176428, "grad_norm": 16.878481122030315, "learning_rate": 2e-06, "loss": 0.212, "step": 8081 }, { "epoch": 1.8749565015659435, "grad_norm": 14.102839925698367, "learning_rate": 2e-06, "loss": 0.259, "step": 8082 }, { "epoch": 1.8751884932142442, "grad_norm": 8.015270991643696, "learning_rate": 2e-06, "loss": 0.2123, "step": 8083 }, { "epoch": 1.8754204848625449, "grad_norm": 10.75137113578418, "learning_rate": 2e-06, "loss": 0.1887, "step": 8084 }, { "epoch": 1.8756524765108455, "grad_norm": 12.192149049115477, "learning_rate": 2e-06, "loss": 0.2144, "step": 8085 }, { "epoch": 1.8758844681591462, "grad_norm": 16.200978015518178, "learning_rate": 2e-06, "loss": 0.1866, "step": 8086 }, { "epoch": 1.876116459807447, "grad_norm": 13.604564928941926, "learning_rate": 2e-06, "loss": 0.2224, "step": 8087 }, { "epoch": 1.8763484514557476, "grad_norm": 18.27121505748652, "learning_rate": 2e-06, "loss": 0.3974, "step": 8088 }, { "epoch": 1.8765804431040483, "grad_norm": 20.230308054727587, "learning_rate": 2e-06, "loss": 0.2641, "step": 8089 }, { "epoch": 1.876812434752349, "grad_norm": 19.98312531455832, "learning_rate": 2e-06, "loss": 0.2823, "step": 8090 }, { "epoch": 1.8770444264006496, "grad_norm": 13.997583676506107, "learning_rate": 2e-06, "loss": 0.3769, "step": 8091 }, { "epoch": 1.8772764180489503, "grad_norm": 16.245997317386614, "learning_rate": 2e-06, "loss": 0.265, "step": 8092 }, { "epoch": 1.877508409697251, "grad_norm": 6.987822813233787, "learning_rate": 2e-06, "loss": 0.1468, "step": 8093 }, { "epoch": 1.8777404013455516, "grad_norm": 13.866209639245193, "learning_rate": 2e-06, "loss": 0.2698, "step": 8094 }, { "epoch": 1.8779723929938523, "grad_norm": 9.262533352772289, "learning_rate": 2e-06, "loss": 0.2204, "step": 8095 }, { "epoch": 1.878204384642153, "grad_norm": 17.625891059497523, "learning_rate": 2e-06, "loss": 0.3638, "step": 8096 }, { "epoch": 1.8784363762904537, "grad_norm": 15.109355853479892, "learning_rate": 2e-06, "loss": 0.1749, "step": 8097 }, { "epoch": 1.8786683679387544, "grad_norm": 23.316790726338905, "learning_rate": 2e-06, "loss": 0.3017, "step": 8098 }, { "epoch": 1.8789003595870548, "grad_norm": 11.417710489306446, "learning_rate": 2e-06, "loss": 0.2762, "step": 8099 }, { "epoch": 1.8791323512353555, "grad_norm": 7.568858560857904, "learning_rate": 2e-06, "loss": 0.1969, "step": 8100 }, { "epoch": 1.8793643428836562, "grad_norm": 24.255230697484876, "learning_rate": 2e-06, "loss": 0.3469, "step": 8101 }, { "epoch": 1.8795963345319568, "grad_norm": 10.709730914448418, "learning_rate": 2e-06, "loss": 0.1396, "step": 8102 }, { "epoch": 1.8798283261802575, "grad_norm": 15.12566667585955, "learning_rate": 2e-06, "loss": 0.2438, "step": 8103 }, { "epoch": 1.8800603178285582, "grad_norm": 10.69027851351895, "learning_rate": 2e-06, "loss": 0.1836, "step": 8104 }, { "epoch": 1.8802923094768589, "grad_norm": 8.275867129578193, "learning_rate": 2e-06, "loss": 0.1433, "step": 8105 }, { "epoch": 1.8805243011251593, "grad_norm": 18.796644003060116, "learning_rate": 2e-06, "loss": 0.3486, "step": 8106 }, { "epoch": 1.88075629277346, "grad_norm": 11.441232359659232, "learning_rate": 2e-06, "loss": 0.267, "step": 8107 }, { "epoch": 1.8809882844217607, "grad_norm": 13.62519582991966, "learning_rate": 2e-06, "loss": 0.1969, "step": 8108 }, { "epoch": 1.8812202760700614, "grad_norm": 12.018684733601567, "learning_rate": 2e-06, "loss": 0.2544, "step": 8109 }, { "epoch": 1.881452267718362, "grad_norm": 18.41427434587355, "learning_rate": 2e-06, "loss": 0.2961, "step": 8110 }, { "epoch": 1.8816842593666627, "grad_norm": 11.72345637527048, "learning_rate": 2e-06, "loss": 0.2071, "step": 8111 }, { "epoch": 1.8819162510149634, "grad_norm": 9.216262154161301, "learning_rate": 2e-06, "loss": 0.2301, "step": 8112 }, { "epoch": 1.882148242663264, "grad_norm": 17.853558650966367, "learning_rate": 2e-06, "loss": 0.2497, "step": 8113 }, { "epoch": 1.8823802343115648, "grad_norm": 15.682849567438378, "learning_rate": 2e-06, "loss": 0.2568, "step": 8114 }, { "epoch": 1.8826122259598654, "grad_norm": 23.39963649706194, "learning_rate": 2e-06, "loss": 0.3241, "step": 8115 }, { "epoch": 1.8828442176081661, "grad_norm": 11.397005268434754, "learning_rate": 2e-06, "loss": 0.2757, "step": 8116 }, { "epoch": 1.8830762092564668, "grad_norm": 11.623011034774825, "learning_rate": 2e-06, "loss": 0.2046, "step": 8117 }, { "epoch": 1.8833082009047675, "grad_norm": 16.244386942551117, "learning_rate": 2e-06, "loss": 0.2464, "step": 8118 }, { "epoch": 1.8835401925530681, "grad_norm": 6.50215631748504, "learning_rate": 2e-06, "loss": 0.1487, "step": 8119 }, { "epoch": 1.8837721842013688, "grad_norm": 13.968814913063218, "learning_rate": 2e-06, "loss": 0.24, "step": 8120 }, { "epoch": 1.8840041758496695, "grad_norm": 14.418952475658717, "learning_rate": 2e-06, "loss": 0.2565, "step": 8121 }, { "epoch": 1.8842361674979702, "grad_norm": 12.90293443244331, "learning_rate": 2e-06, "loss": 0.2305, "step": 8122 }, { "epoch": 1.8844681591462709, "grad_norm": 11.279220611971848, "learning_rate": 2e-06, "loss": 0.2261, "step": 8123 }, { "epoch": 1.8847001507945715, "grad_norm": 12.477288015553475, "learning_rate": 2e-06, "loss": 0.2421, "step": 8124 }, { "epoch": 1.8849321424428722, "grad_norm": 12.22566777943852, "learning_rate": 2e-06, "loss": 0.2004, "step": 8125 }, { "epoch": 1.8851641340911727, "grad_norm": 10.719972656821513, "learning_rate": 2e-06, "loss": 0.2487, "step": 8126 }, { "epoch": 1.8853961257394733, "grad_norm": 12.691193607855086, "learning_rate": 2e-06, "loss": 0.3267, "step": 8127 }, { "epoch": 1.885628117387774, "grad_norm": 11.938062303337183, "learning_rate": 2e-06, "loss": 0.2, "step": 8128 }, { "epoch": 1.8858601090360747, "grad_norm": 16.38343012082749, "learning_rate": 2e-06, "loss": 0.3129, "step": 8129 }, { "epoch": 1.8860921006843754, "grad_norm": 13.360707433601444, "learning_rate": 2e-06, "loss": 0.297, "step": 8130 }, { "epoch": 1.886324092332676, "grad_norm": 8.182444282018277, "learning_rate": 2e-06, "loss": 0.2032, "step": 8131 }, { "epoch": 1.8865560839809767, "grad_norm": 10.785681550556108, "learning_rate": 2e-06, "loss": 0.198, "step": 8132 }, { "epoch": 1.8867880756292772, "grad_norm": 32.651483206036424, "learning_rate": 2e-06, "loss": 0.2223, "step": 8133 }, { "epoch": 1.8870200672775779, "grad_norm": 12.78225895745723, "learning_rate": 2e-06, "loss": 0.3003, "step": 8134 }, { "epoch": 1.8872520589258786, "grad_norm": 16.118136488781317, "learning_rate": 2e-06, "loss": 0.3566, "step": 8135 }, { "epoch": 1.8874840505741792, "grad_norm": 16.010772026878108, "learning_rate": 2e-06, "loss": 0.2491, "step": 8136 }, { "epoch": 1.88771604222248, "grad_norm": 19.512267007684628, "learning_rate": 2e-06, "loss": 0.298, "step": 8137 }, { "epoch": 1.8879480338707806, "grad_norm": 12.91385202920017, "learning_rate": 2e-06, "loss": 0.2058, "step": 8138 }, { "epoch": 1.8881800255190813, "grad_norm": 13.799231253139101, "learning_rate": 2e-06, "loss": 0.1785, "step": 8139 }, { "epoch": 1.888412017167382, "grad_norm": 22.71783170409705, "learning_rate": 2e-06, "loss": 0.3614, "step": 8140 }, { "epoch": 1.8886440088156826, "grad_norm": 17.47576531872985, "learning_rate": 2e-06, "loss": 0.2828, "step": 8141 }, { "epoch": 1.8888760004639833, "grad_norm": 17.212646341870357, "learning_rate": 2e-06, "loss": 0.2752, "step": 8142 }, { "epoch": 1.889107992112284, "grad_norm": 11.878432237227706, "learning_rate": 2e-06, "loss": 0.216, "step": 8143 }, { "epoch": 1.8893399837605847, "grad_norm": 14.344434202109612, "learning_rate": 2e-06, "loss": 0.2197, "step": 8144 }, { "epoch": 1.8895719754088853, "grad_norm": 24.818920744905732, "learning_rate": 2e-06, "loss": 0.2646, "step": 8145 }, { "epoch": 1.889803967057186, "grad_norm": 10.651778755421654, "learning_rate": 2e-06, "loss": 0.1695, "step": 8146 }, { "epoch": 1.8900359587054867, "grad_norm": 14.289848463260581, "learning_rate": 2e-06, "loss": 0.2756, "step": 8147 }, { "epoch": 1.8902679503537874, "grad_norm": 7.128992669850312, "learning_rate": 2e-06, "loss": 0.1521, "step": 8148 }, { "epoch": 1.890499942002088, "grad_norm": 17.376190428609913, "learning_rate": 2e-06, "loss": 0.2186, "step": 8149 }, { "epoch": 1.8907319336503887, "grad_norm": 14.718226807755526, "learning_rate": 2e-06, "loss": 0.2132, "step": 8150 }, { "epoch": 1.8909639252986894, "grad_norm": 35.42613465223065, "learning_rate": 2e-06, "loss": 0.3643, "step": 8151 }, { "epoch": 1.89119591694699, "grad_norm": 29.24116725879479, "learning_rate": 2e-06, "loss": 0.3953, "step": 8152 }, { "epoch": 1.8914279085952905, "grad_norm": 11.504743827237121, "learning_rate": 2e-06, "loss": 0.1929, "step": 8153 }, { "epoch": 1.8916599002435912, "grad_norm": 15.527277538736046, "learning_rate": 2e-06, "loss": 0.2618, "step": 8154 }, { "epoch": 1.8918918918918919, "grad_norm": 34.41089193580936, "learning_rate": 2e-06, "loss": 0.3138, "step": 8155 }, { "epoch": 1.8921238835401926, "grad_norm": 17.68286000532179, "learning_rate": 2e-06, "loss": 0.2378, "step": 8156 }, { "epoch": 1.8923558751884932, "grad_norm": 13.15350512468965, "learning_rate": 2e-06, "loss": 0.134, "step": 8157 }, { "epoch": 1.892587866836794, "grad_norm": 10.329227674219053, "learning_rate": 2e-06, "loss": 0.1932, "step": 8158 }, { "epoch": 1.8928198584850944, "grad_norm": 13.006155127117866, "learning_rate": 2e-06, "loss": 0.2922, "step": 8159 }, { "epoch": 1.893051850133395, "grad_norm": 12.329676024489999, "learning_rate": 2e-06, "loss": 0.2295, "step": 8160 }, { "epoch": 1.8932838417816957, "grad_norm": 7.363387316454607, "learning_rate": 2e-06, "loss": 0.1269, "step": 8161 }, { "epoch": 1.8935158334299964, "grad_norm": 17.318169665052793, "learning_rate": 2e-06, "loss": 0.2694, "step": 8162 }, { "epoch": 1.893747825078297, "grad_norm": 19.869125915460828, "learning_rate": 2e-06, "loss": 0.2753, "step": 8163 }, { "epoch": 1.8939798167265978, "grad_norm": 7.136062236164506, "learning_rate": 2e-06, "loss": 0.2068, "step": 8164 }, { "epoch": 1.8942118083748984, "grad_norm": 12.456129038837584, "learning_rate": 2e-06, "loss": 0.2313, "step": 8165 }, { "epoch": 1.8944438000231991, "grad_norm": 17.244983081129227, "learning_rate": 2e-06, "loss": 0.3142, "step": 8166 }, { "epoch": 1.8946757916714998, "grad_norm": 16.69022335332699, "learning_rate": 2e-06, "loss": 0.2655, "step": 8167 }, { "epoch": 1.8949077833198005, "grad_norm": 15.769969245776224, "learning_rate": 2e-06, "loss": 0.2282, "step": 8168 }, { "epoch": 1.8951397749681012, "grad_norm": 8.289435804973813, "learning_rate": 2e-06, "loss": 0.1708, "step": 8169 }, { "epoch": 1.8953717666164018, "grad_norm": 10.823837362743815, "learning_rate": 2e-06, "loss": 0.234, "step": 8170 }, { "epoch": 1.8956037582647025, "grad_norm": 11.207424384436552, "learning_rate": 2e-06, "loss": 0.1962, "step": 8171 }, { "epoch": 1.8958357499130032, "grad_norm": 9.150322406339379, "learning_rate": 2e-06, "loss": 0.2094, "step": 8172 }, { "epoch": 1.8960677415613039, "grad_norm": 17.94134879246771, "learning_rate": 2e-06, "loss": 0.2231, "step": 8173 }, { "epoch": 1.8962997332096045, "grad_norm": 22.93012671108854, "learning_rate": 2e-06, "loss": 0.2061, "step": 8174 }, { "epoch": 1.8965317248579052, "grad_norm": 16.504892149263032, "learning_rate": 2e-06, "loss": 0.2255, "step": 8175 }, { "epoch": 1.896763716506206, "grad_norm": 17.36398239932726, "learning_rate": 2e-06, "loss": 0.2867, "step": 8176 }, { "epoch": 1.8969957081545066, "grad_norm": 22.820471052406855, "learning_rate": 2e-06, "loss": 0.1776, "step": 8177 }, { "epoch": 1.8972276998028073, "grad_norm": 12.508140962756473, "learning_rate": 2e-06, "loss": 0.2118, "step": 8178 }, { "epoch": 1.8974596914511077, "grad_norm": 15.703706473722347, "learning_rate": 2e-06, "loss": 0.2247, "step": 8179 }, { "epoch": 1.8976916830994084, "grad_norm": 34.07236853610495, "learning_rate": 2e-06, "loss": 0.4813, "step": 8180 }, { "epoch": 1.897923674747709, "grad_norm": 15.111982169520063, "learning_rate": 2e-06, "loss": 0.3347, "step": 8181 }, { "epoch": 1.8981556663960097, "grad_norm": 18.108042990665616, "learning_rate": 2e-06, "loss": 0.3716, "step": 8182 }, { "epoch": 1.8983876580443104, "grad_norm": 10.371113738441565, "learning_rate": 2e-06, "loss": 0.1723, "step": 8183 }, { "epoch": 1.898619649692611, "grad_norm": 11.176901885735523, "learning_rate": 2e-06, "loss": 0.2535, "step": 8184 }, { "epoch": 1.8988516413409118, "grad_norm": 11.546728228280674, "learning_rate": 2e-06, "loss": 0.1787, "step": 8185 }, { "epoch": 1.8990836329892122, "grad_norm": 18.031724353604417, "learning_rate": 2e-06, "loss": 0.263, "step": 8186 }, { "epoch": 1.899315624637513, "grad_norm": 7.97087998375236, "learning_rate": 2e-06, "loss": 0.128, "step": 8187 }, { "epoch": 1.8995476162858136, "grad_norm": 10.91288450703131, "learning_rate": 2e-06, "loss": 0.1763, "step": 8188 }, { "epoch": 1.8997796079341143, "grad_norm": 15.287091653761278, "learning_rate": 2e-06, "loss": 0.2395, "step": 8189 }, { "epoch": 1.900011599582415, "grad_norm": 9.40977049520668, "learning_rate": 2e-06, "loss": 0.1625, "step": 8190 }, { "epoch": 1.9002435912307156, "grad_norm": 16.861117607116114, "learning_rate": 2e-06, "loss": 0.2271, "step": 8191 }, { "epoch": 1.9004755828790163, "grad_norm": 11.037214769342043, "learning_rate": 2e-06, "loss": 0.1528, "step": 8192 }, { "epoch": 1.900707574527317, "grad_norm": 10.195558248624783, "learning_rate": 2e-06, "loss": 0.1352, "step": 8193 }, { "epoch": 1.9009395661756177, "grad_norm": 19.357736986172732, "learning_rate": 2e-06, "loss": 0.2074, "step": 8194 }, { "epoch": 1.9011715578239183, "grad_norm": 22.36942699777849, "learning_rate": 2e-06, "loss": 0.3187, "step": 8195 }, { "epoch": 1.901403549472219, "grad_norm": 8.715362015576734, "learning_rate": 2e-06, "loss": 0.2511, "step": 8196 }, { "epoch": 1.9016355411205197, "grad_norm": 10.25230107442266, "learning_rate": 2e-06, "loss": 0.1778, "step": 8197 }, { "epoch": 1.9018675327688204, "grad_norm": 23.561928495390756, "learning_rate": 2e-06, "loss": 0.4416, "step": 8198 }, { "epoch": 1.902099524417121, "grad_norm": 18.159330177747112, "learning_rate": 2e-06, "loss": 0.265, "step": 8199 }, { "epoch": 1.9023315160654217, "grad_norm": 18.474659179229178, "learning_rate": 2e-06, "loss": 0.2264, "step": 8200 }, { "epoch": 1.9025635077137224, "grad_norm": 11.213883376723222, "learning_rate": 2e-06, "loss": 0.2187, "step": 8201 }, { "epoch": 1.902795499362023, "grad_norm": 11.325475746908815, "learning_rate": 2e-06, "loss": 0.1437, "step": 8202 }, { "epoch": 1.9030274910103238, "grad_norm": 12.667406137830117, "learning_rate": 2e-06, "loss": 0.2505, "step": 8203 }, { "epoch": 1.9032594826586244, "grad_norm": 14.219083128107396, "learning_rate": 2e-06, "loss": 0.1709, "step": 8204 }, { "epoch": 1.9034914743069251, "grad_norm": 21.08465805561743, "learning_rate": 2e-06, "loss": 0.4011, "step": 8205 }, { "epoch": 1.9037234659552256, "grad_norm": 16.782938445686284, "learning_rate": 2e-06, "loss": 0.2381, "step": 8206 }, { "epoch": 1.9039554576035262, "grad_norm": 12.519800931564225, "learning_rate": 2e-06, "loss": 0.1999, "step": 8207 }, { "epoch": 1.904187449251827, "grad_norm": 14.461461522859151, "learning_rate": 2e-06, "loss": 0.3346, "step": 8208 }, { "epoch": 1.9044194409001276, "grad_norm": 14.345881539762717, "learning_rate": 2e-06, "loss": 0.2979, "step": 8209 }, { "epoch": 1.9046514325484283, "grad_norm": 21.421155368467556, "learning_rate": 2e-06, "loss": 0.3399, "step": 8210 }, { "epoch": 1.904883424196729, "grad_norm": 14.75430383047075, "learning_rate": 2e-06, "loss": 0.1874, "step": 8211 }, { "epoch": 1.9051154158450294, "grad_norm": 16.452560204189812, "learning_rate": 2e-06, "loss": 0.2955, "step": 8212 }, { "epoch": 1.90534740749333, "grad_norm": 12.58674547104881, "learning_rate": 2e-06, "loss": 0.2175, "step": 8213 }, { "epoch": 1.9055793991416308, "grad_norm": 32.0433974432042, "learning_rate": 2e-06, "loss": 0.3187, "step": 8214 }, { "epoch": 1.9058113907899314, "grad_norm": 27.844541614260155, "learning_rate": 2e-06, "loss": 0.3755, "step": 8215 }, { "epoch": 1.9060433824382321, "grad_norm": 10.550625708302274, "learning_rate": 2e-06, "loss": 0.1614, "step": 8216 }, { "epoch": 1.9062753740865328, "grad_norm": 20.29109359857516, "learning_rate": 2e-06, "loss": 0.3744, "step": 8217 }, { "epoch": 1.9065073657348335, "grad_norm": 18.18674145651999, "learning_rate": 2e-06, "loss": 0.3038, "step": 8218 }, { "epoch": 1.9067393573831342, "grad_norm": 26.554136681725186, "learning_rate": 2e-06, "loss": 0.3285, "step": 8219 }, { "epoch": 1.9069713490314348, "grad_norm": 17.913901890935545, "learning_rate": 2e-06, "loss": 0.2838, "step": 8220 }, { "epoch": 1.9072033406797355, "grad_norm": 19.446754092117164, "learning_rate": 2e-06, "loss": 0.2213, "step": 8221 }, { "epoch": 1.9074353323280362, "grad_norm": 19.892146142585084, "learning_rate": 2e-06, "loss": 0.3371, "step": 8222 }, { "epoch": 1.9076673239763369, "grad_norm": 13.182404615018127, "learning_rate": 2e-06, "loss": 0.2448, "step": 8223 }, { "epoch": 1.9078993156246375, "grad_norm": 20.5755363946165, "learning_rate": 2e-06, "loss": 0.2963, "step": 8224 }, { "epoch": 1.9081313072729382, "grad_norm": 12.559442015884914, "learning_rate": 2e-06, "loss": 0.2099, "step": 8225 }, { "epoch": 1.908363298921239, "grad_norm": 9.005075480299361, "learning_rate": 2e-06, "loss": 0.1641, "step": 8226 }, { "epoch": 1.9085952905695396, "grad_norm": 12.662288781435917, "learning_rate": 2e-06, "loss": 0.3134, "step": 8227 }, { "epoch": 1.9088272822178403, "grad_norm": 12.007012421122173, "learning_rate": 2e-06, "loss": 0.2553, "step": 8228 }, { "epoch": 1.909059273866141, "grad_norm": 11.816634760711612, "learning_rate": 2e-06, "loss": 0.2257, "step": 8229 }, { "epoch": 1.9092912655144416, "grad_norm": 12.862882132997411, "learning_rate": 2e-06, "loss": 0.1843, "step": 8230 }, { "epoch": 1.9095232571627423, "grad_norm": 26.931106829079567, "learning_rate": 2e-06, "loss": 0.236, "step": 8231 }, { "epoch": 1.9097552488110427, "grad_norm": 9.259853349503151, "learning_rate": 2e-06, "loss": 0.2687, "step": 8232 }, { "epoch": 1.9099872404593434, "grad_norm": 12.53754130652307, "learning_rate": 2e-06, "loss": 0.2398, "step": 8233 }, { "epoch": 1.910219232107644, "grad_norm": 18.023157803986198, "learning_rate": 2e-06, "loss": 0.3218, "step": 8234 }, { "epoch": 1.9104512237559448, "grad_norm": 8.078029810459396, "learning_rate": 2e-06, "loss": 0.1691, "step": 8235 }, { "epoch": 1.9106832154042455, "grad_norm": 14.604188755436693, "learning_rate": 2e-06, "loss": 0.2846, "step": 8236 }, { "epoch": 1.9109152070525461, "grad_norm": 9.915303648016163, "learning_rate": 2e-06, "loss": 0.1822, "step": 8237 }, { "epoch": 1.9111471987008468, "grad_norm": 8.937008541033672, "learning_rate": 2e-06, "loss": 0.1915, "step": 8238 }, { "epoch": 1.9113791903491473, "grad_norm": 15.495136841108435, "learning_rate": 2e-06, "loss": 0.1943, "step": 8239 }, { "epoch": 1.911611181997448, "grad_norm": 18.594498427474978, "learning_rate": 2e-06, "loss": 0.2526, "step": 8240 }, { "epoch": 1.9118431736457486, "grad_norm": 11.037879862277668, "learning_rate": 2e-06, "loss": 0.2421, "step": 8241 }, { "epoch": 1.9120751652940493, "grad_norm": 24.98641628833053, "learning_rate": 2e-06, "loss": 0.2371, "step": 8242 }, { "epoch": 1.91230715694235, "grad_norm": 16.29291873274218, "learning_rate": 2e-06, "loss": 0.3609, "step": 8243 }, { "epoch": 1.9125391485906507, "grad_norm": 21.25534135384721, "learning_rate": 2e-06, "loss": 0.3265, "step": 8244 }, { "epoch": 1.9127711402389513, "grad_norm": 10.772522409197187, "learning_rate": 2e-06, "loss": 0.2059, "step": 8245 }, { "epoch": 1.913003131887252, "grad_norm": 16.822319519518206, "learning_rate": 2e-06, "loss": 0.2291, "step": 8246 }, { "epoch": 1.9132351235355527, "grad_norm": 14.701476350588532, "learning_rate": 2e-06, "loss": 0.2077, "step": 8247 }, { "epoch": 1.9134671151838534, "grad_norm": 10.151290714699144, "learning_rate": 2e-06, "loss": 0.1983, "step": 8248 }, { "epoch": 1.913699106832154, "grad_norm": 11.486781285690082, "learning_rate": 2e-06, "loss": 0.1778, "step": 8249 }, { "epoch": 1.9139310984804547, "grad_norm": 16.169915834829315, "learning_rate": 2e-06, "loss": 0.2844, "step": 8250 }, { "epoch": 1.9141630901287554, "grad_norm": 18.397321285879713, "learning_rate": 2e-06, "loss": 0.2849, "step": 8251 }, { "epoch": 1.914395081777056, "grad_norm": 11.317068990602722, "learning_rate": 2e-06, "loss": 0.2229, "step": 8252 }, { "epoch": 1.9146270734253568, "grad_norm": 19.361350050762294, "learning_rate": 2e-06, "loss": 0.2358, "step": 8253 }, { "epoch": 1.9148590650736574, "grad_norm": 14.06912297548446, "learning_rate": 2e-06, "loss": 0.3069, "step": 8254 }, { "epoch": 1.9150910567219581, "grad_norm": 17.87175382915879, "learning_rate": 2e-06, "loss": 0.2287, "step": 8255 }, { "epoch": 1.9153230483702588, "grad_norm": 15.518948701012697, "learning_rate": 2e-06, "loss": 0.2169, "step": 8256 }, { "epoch": 1.9155550400185595, "grad_norm": 9.438148701509457, "learning_rate": 2e-06, "loss": 0.1725, "step": 8257 }, { "epoch": 1.9157870316668602, "grad_norm": 17.408432016873743, "learning_rate": 2e-06, "loss": 0.2678, "step": 8258 }, { "epoch": 1.9160190233151606, "grad_norm": 8.518150459279795, "learning_rate": 2e-06, "loss": 0.1942, "step": 8259 }, { "epoch": 1.9162510149634613, "grad_norm": 11.916440509055764, "learning_rate": 2e-06, "loss": 0.2316, "step": 8260 }, { "epoch": 1.916483006611762, "grad_norm": 19.22244658410548, "learning_rate": 2e-06, "loss": 0.2165, "step": 8261 }, { "epoch": 1.9167149982600626, "grad_norm": 22.489005047076123, "learning_rate": 2e-06, "loss": 0.3285, "step": 8262 }, { "epoch": 1.9169469899083633, "grad_norm": 13.18035388952861, "learning_rate": 2e-06, "loss": 0.2058, "step": 8263 }, { "epoch": 1.917178981556664, "grad_norm": 16.614831732754023, "learning_rate": 2e-06, "loss": 0.2375, "step": 8264 }, { "epoch": 1.9174109732049647, "grad_norm": 11.545535466753725, "learning_rate": 2e-06, "loss": 0.1232, "step": 8265 }, { "epoch": 1.9176429648532651, "grad_norm": 12.093689927206457, "learning_rate": 2e-06, "loss": 0.2792, "step": 8266 }, { "epoch": 1.9178749565015658, "grad_norm": 9.959454490978523, "learning_rate": 2e-06, "loss": 0.2145, "step": 8267 }, { "epoch": 1.9181069481498665, "grad_norm": 12.2647047989879, "learning_rate": 2e-06, "loss": 0.2634, "step": 8268 }, { "epoch": 1.9183389397981672, "grad_norm": 16.72304117556915, "learning_rate": 2e-06, "loss": 0.3142, "step": 8269 }, { "epoch": 1.9185709314464678, "grad_norm": 19.31870618615468, "learning_rate": 2e-06, "loss": 0.382, "step": 8270 }, { "epoch": 1.9188029230947685, "grad_norm": 6.752555646575119, "learning_rate": 2e-06, "loss": 0.1397, "step": 8271 }, { "epoch": 1.9190349147430692, "grad_norm": 7.376211731271454, "learning_rate": 2e-06, "loss": 0.1096, "step": 8272 }, { "epoch": 1.9192669063913699, "grad_norm": 14.1403769816287, "learning_rate": 2e-06, "loss": 0.2297, "step": 8273 }, { "epoch": 1.9194988980396706, "grad_norm": 18.630262161070675, "learning_rate": 2e-06, "loss": 0.2822, "step": 8274 }, { "epoch": 1.9197308896879712, "grad_norm": 10.637772181676997, "learning_rate": 2e-06, "loss": 0.2134, "step": 8275 }, { "epoch": 1.919962881336272, "grad_norm": 10.803230879586474, "learning_rate": 2e-06, "loss": 0.2273, "step": 8276 }, { "epoch": 1.9201948729845726, "grad_norm": 14.041361424353319, "learning_rate": 2e-06, "loss": 0.2323, "step": 8277 }, { "epoch": 1.9204268646328733, "grad_norm": 25.059508495132192, "learning_rate": 2e-06, "loss": 0.3291, "step": 8278 }, { "epoch": 1.920658856281174, "grad_norm": 17.070058774407894, "learning_rate": 2e-06, "loss": 0.2736, "step": 8279 }, { "epoch": 1.9208908479294746, "grad_norm": 11.351872788066839, "learning_rate": 2e-06, "loss": 0.2799, "step": 8280 }, { "epoch": 1.9211228395777753, "grad_norm": 18.216056033597024, "learning_rate": 2e-06, "loss": 0.2414, "step": 8281 }, { "epoch": 1.921354831226076, "grad_norm": 13.080737756337564, "learning_rate": 2e-06, "loss": 0.3448, "step": 8282 }, { "epoch": 1.9215868228743767, "grad_norm": 20.208896374040844, "learning_rate": 2e-06, "loss": 0.259, "step": 8283 }, { "epoch": 1.9218188145226773, "grad_norm": 2.866532475964635, "learning_rate": 2e-06, "loss": 0.1151, "step": 8284 }, { "epoch": 1.922050806170978, "grad_norm": 7.573395237380319, "learning_rate": 2e-06, "loss": 0.1573, "step": 8285 }, { "epoch": 1.9222827978192785, "grad_norm": 18.308860336279682, "learning_rate": 2e-06, "loss": 0.2621, "step": 8286 }, { "epoch": 1.9225147894675791, "grad_norm": 11.665796555064631, "learning_rate": 2e-06, "loss": 0.1945, "step": 8287 }, { "epoch": 1.9227467811158798, "grad_norm": 15.296308819790674, "learning_rate": 2e-06, "loss": 0.2331, "step": 8288 }, { "epoch": 1.9229787727641805, "grad_norm": 9.646629419972417, "learning_rate": 2e-06, "loss": 0.1939, "step": 8289 }, { "epoch": 1.9232107644124812, "grad_norm": 103.93129525379511, "learning_rate": 2e-06, "loss": 0.2315, "step": 8290 }, { "epoch": 1.9234427560607819, "grad_norm": 13.058749888640612, "learning_rate": 2e-06, "loss": 0.2627, "step": 8291 }, { "epoch": 1.9236747477090823, "grad_norm": 17.24259009477687, "learning_rate": 2e-06, "loss": 0.2115, "step": 8292 }, { "epoch": 1.923906739357383, "grad_norm": 10.338322982938008, "learning_rate": 2e-06, "loss": 0.2494, "step": 8293 }, { "epoch": 1.9241387310056837, "grad_norm": 13.010569352737638, "learning_rate": 2e-06, "loss": 0.2818, "step": 8294 }, { "epoch": 1.9243707226539843, "grad_norm": 10.243548363837144, "learning_rate": 2e-06, "loss": 0.2308, "step": 8295 }, { "epoch": 1.924602714302285, "grad_norm": 26.38176885433789, "learning_rate": 2e-06, "loss": 0.2453, "step": 8296 }, { "epoch": 1.9248347059505857, "grad_norm": 18.326231746899325, "learning_rate": 2e-06, "loss": 0.2372, "step": 8297 }, { "epoch": 1.9250666975988864, "grad_norm": 15.340498799445832, "learning_rate": 2e-06, "loss": 0.2787, "step": 8298 }, { "epoch": 1.925298689247187, "grad_norm": 11.107298049649948, "learning_rate": 2e-06, "loss": 0.2725, "step": 8299 }, { "epoch": 1.9255306808954877, "grad_norm": 15.409151955869918, "learning_rate": 2e-06, "loss": 0.3218, "step": 8300 }, { "epoch": 1.9257626725437884, "grad_norm": 13.18529269749906, "learning_rate": 2e-06, "loss": 0.3214, "step": 8301 }, { "epoch": 1.925994664192089, "grad_norm": 12.997759326811828, "learning_rate": 2e-06, "loss": 0.1669, "step": 8302 }, { "epoch": 1.9262266558403898, "grad_norm": 11.75141841418822, "learning_rate": 2e-06, "loss": 0.2567, "step": 8303 }, { "epoch": 1.9264586474886904, "grad_norm": 13.803993613795765, "learning_rate": 2e-06, "loss": 0.3057, "step": 8304 }, { "epoch": 1.9266906391369911, "grad_norm": 11.200192090314818, "learning_rate": 2e-06, "loss": 0.2454, "step": 8305 }, { "epoch": 1.9269226307852918, "grad_norm": 10.956488078705902, "learning_rate": 2e-06, "loss": 0.2506, "step": 8306 }, { "epoch": 1.9271546224335925, "grad_norm": 38.90968870785902, "learning_rate": 2e-06, "loss": 0.26, "step": 8307 }, { "epoch": 1.9273866140818932, "grad_norm": 12.874901051121027, "learning_rate": 2e-06, "loss": 0.3622, "step": 8308 }, { "epoch": 1.9276186057301938, "grad_norm": 18.50058674378736, "learning_rate": 2e-06, "loss": 0.2987, "step": 8309 }, { "epoch": 1.9278505973784945, "grad_norm": 14.402281423883219, "learning_rate": 2e-06, "loss": 0.2354, "step": 8310 }, { "epoch": 1.9280825890267952, "grad_norm": 14.094001216596647, "learning_rate": 2e-06, "loss": 0.3899, "step": 8311 }, { "epoch": 1.9283145806750956, "grad_norm": 18.630838256596093, "learning_rate": 2e-06, "loss": 0.2709, "step": 8312 }, { "epoch": 1.9285465723233963, "grad_norm": 16.355351861756276, "learning_rate": 2e-06, "loss": 0.3076, "step": 8313 }, { "epoch": 1.928778563971697, "grad_norm": 13.490106187278478, "learning_rate": 2e-06, "loss": 0.2527, "step": 8314 }, { "epoch": 1.9290105556199977, "grad_norm": 17.257607194236968, "learning_rate": 2e-06, "loss": 0.2722, "step": 8315 }, { "epoch": 1.9292425472682984, "grad_norm": 10.354500315551464, "learning_rate": 2e-06, "loss": 0.2323, "step": 8316 }, { "epoch": 1.929474538916599, "grad_norm": 16.34349615296237, "learning_rate": 2e-06, "loss": 0.2311, "step": 8317 }, { "epoch": 1.9297065305648997, "grad_norm": 8.077491084678064, "learning_rate": 2e-06, "loss": 0.177, "step": 8318 }, { "epoch": 1.9299385222132002, "grad_norm": 19.54466387276364, "learning_rate": 2e-06, "loss": 0.3311, "step": 8319 }, { "epoch": 1.9301705138615008, "grad_norm": 11.75052050959431, "learning_rate": 2e-06, "loss": 0.2188, "step": 8320 }, { "epoch": 1.9304025055098015, "grad_norm": 15.061184790407024, "learning_rate": 2e-06, "loss": 0.2134, "step": 8321 }, { "epoch": 1.9306344971581022, "grad_norm": 14.553576441539253, "learning_rate": 2e-06, "loss": 0.2815, "step": 8322 }, { "epoch": 1.9308664888064029, "grad_norm": 14.718620141267312, "learning_rate": 2e-06, "loss": 0.2289, "step": 8323 }, { "epoch": 1.9310984804547036, "grad_norm": 20.38486878391961, "learning_rate": 2e-06, "loss": 0.2531, "step": 8324 }, { "epoch": 1.9313304721030042, "grad_norm": 14.426701058009007, "learning_rate": 2e-06, "loss": 0.2395, "step": 8325 }, { "epoch": 1.931562463751305, "grad_norm": 7.443348328961412, "learning_rate": 2e-06, "loss": 0.1931, "step": 8326 }, { "epoch": 1.9317944553996056, "grad_norm": 13.279828797272623, "learning_rate": 2e-06, "loss": 0.2471, "step": 8327 }, { "epoch": 1.9320264470479063, "grad_norm": 14.691548590843379, "learning_rate": 2e-06, "loss": 0.2645, "step": 8328 }, { "epoch": 1.932258438696207, "grad_norm": 18.380291854873782, "learning_rate": 2e-06, "loss": 0.2756, "step": 8329 }, { "epoch": 1.9324904303445076, "grad_norm": 16.434077330676157, "learning_rate": 2e-06, "loss": 0.2942, "step": 8330 }, { "epoch": 1.9327224219928083, "grad_norm": 23.01022355060371, "learning_rate": 2e-06, "loss": 0.337, "step": 8331 }, { "epoch": 1.932954413641109, "grad_norm": 15.352569187881347, "learning_rate": 2e-06, "loss": 0.2557, "step": 8332 }, { "epoch": 1.9331864052894097, "grad_norm": 31.26671018444753, "learning_rate": 2e-06, "loss": 0.2989, "step": 8333 }, { "epoch": 1.9334183969377103, "grad_norm": 4.16190214180648, "learning_rate": 2e-06, "loss": 0.1166, "step": 8334 }, { "epoch": 1.933650388586011, "grad_norm": 19.25567276550992, "learning_rate": 2e-06, "loss": 0.3374, "step": 8335 }, { "epoch": 1.9338823802343117, "grad_norm": 16.127640357750774, "learning_rate": 2e-06, "loss": 0.1962, "step": 8336 }, { "epoch": 1.9341143718826124, "grad_norm": 9.010495407693012, "learning_rate": 2e-06, "loss": 0.1595, "step": 8337 }, { "epoch": 1.934346363530913, "grad_norm": 13.093907921328455, "learning_rate": 2e-06, "loss": 0.2079, "step": 8338 }, { "epoch": 1.9345783551792135, "grad_norm": 11.93257976455428, "learning_rate": 2e-06, "loss": 0.1789, "step": 8339 }, { "epoch": 1.9348103468275142, "grad_norm": 12.455310510281812, "learning_rate": 2e-06, "loss": 0.1926, "step": 8340 }, { "epoch": 1.9350423384758149, "grad_norm": 9.872377564603019, "learning_rate": 2e-06, "loss": 0.1529, "step": 8341 }, { "epoch": 1.9352743301241155, "grad_norm": 16.326098772159725, "learning_rate": 2e-06, "loss": 0.1564, "step": 8342 }, { "epoch": 1.9355063217724162, "grad_norm": 8.191776245582423, "learning_rate": 2e-06, "loss": 0.1847, "step": 8343 }, { "epoch": 1.935738313420717, "grad_norm": 15.177847539987345, "learning_rate": 2e-06, "loss": 0.2256, "step": 8344 }, { "epoch": 1.9359703050690173, "grad_norm": 15.284297069013228, "learning_rate": 2e-06, "loss": 0.2495, "step": 8345 }, { "epoch": 1.936202296717318, "grad_norm": 10.819774117608612, "learning_rate": 2e-06, "loss": 0.1997, "step": 8346 }, { "epoch": 1.9364342883656187, "grad_norm": 24.980519277732025, "learning_rate": 2e-06, "loss": 0.2839, "step": 8347 }, { "epoch": 1.9366662800139194, "grad_norm": 16.686504111394285, "learning_rate": 2e-06, "loss": 0.256, "step": 8348 }, { "epoch": 1.93689827166222, "grad_norm": 15.473655593524072, "learning_rate": 2e-06, "loss": 0.2852, "step": 8349 }, { "epoch": 1.9371302633105207, "grad_norm": 13.78303381473132, "learning_rate": 2e-06, "loss": 0.3147, "step": 8350 }, { "epoch": 1.9373622549588214, "grad_norm": 15.809047183386966, "learning_rate": 2e-06, "loss": 0.2181, "step": 8351 }, { "epoch": 1.937594246607122, "grad_norm": 14.087075162865043, "learning_rate": 2e-06, "loss": 0.2784, "step": 8352 }, { "epoch": 1.9378262382554228, "grad_norm": 6.435673804344862, "learning_rate": 2e-06, "loss": 0.144, "step": 8353 }, { "epoch": 1.9380582299037235, "grad_norm": 12.885130455224658, "learning_rate": 2e-06, "loss": 0.3624, "step": 8354 }, { "epoch": 1.9382902215520241, "grad_norm": 13.006022998323044, "learning_rate": 2e-06, "loss": 0.1781, "step": 8355 }, { "epoch": 1.9385222132003248, "grad_norm": 12.62404139588954, "learning_rate": 2e-06, "loss": 0.2423, "step": 8356 }, { "epoch": 1.9387542048486255, "grad_norm": 9.91038736403853, "learning_rate": 2e-06, "loss": 0.2856, "step": 8357 }, { "epoch": 1.9389861964969262, "grad_norm": 24.05402448981315, "learning_rate": 2e-06, "loss": 0.3787, "step": 8358 }, { "epoch": 1.9392181881452268, "grad_norm": 15.102452709625638, "learning_rate": 2e-06, "loss": 0.2343, "step": 8359 }, { "epoch": 1.9394501797935275, "grad_norm": 13.076728471248122, "learning_rate": 2e-06, "loss": 0.1743, "step": 8360 }, { "epoch": 1.9396821714418282, "grad_norm": 9.24397385494658, "learning_rate": 2e-06, "loss": 0.2097, "step": 8361 }, { "epoch": 1.9399141630901289, "grad_norm": 16.266485171871672, "learning_rate": 2e-06, "loss": 0.2397, "step": 8362 }, { "epoch": 1.9401461547384296, "grad_norm": 12.217629891584592, "learning_rate": 2e-06, "loss": 0.16, "step": 8363 }, { "epoch": 1.9403781463867302, "grad_norm": 11.591707102973945, "learning_rate": 2e-06, "loss": 0.2571, "step": 8364 }, { "epoch": 1.9406101380350307, "grad_norm": 8.643280962767939, "learning_rate": 2e-06, "loss": 0.1058, "step": 8365 }, { "epoch": 1.9408421296833314, "grad_norm": 40.216672592940476, "learning_rate": 2e-06, "loss": 0.378, "step": 8366 }, { "epoch": 1.941074121331632, "grad_norm": 6.157949903531771, "learning_rate": 2e-06, "loss": 0.1596, "step": 8367 }, { "epoch": 1.9413061129799327, "grad_norm": 11.693409202021451, "learning_rate": 2e-06, "loss": 0.2576, "step": 8368 }, { "epoch": 1.9415381046282334, "grad_norm": 10.782026224472514, "learning_rate": 2e-06, "loss": 0.1393, "step": 8369 }, { "epoch": 1.941770096276534, "grad_norm": 23.328197673210656, "learning_rate": 2e-06, "loss": 0.3011, "step": 8370 }, { "epoch": 1.9420020879248348, "grad_norm": 20.896594843390798, "learning_rate": 2e-06, "loss": 0.2877, "step": 8371 }, { "epoch": 1.9422340795731352, "grad_norm": 15.411905183941613, "learning_rate": 2e-06, "loss": 0.2703, "step": 8372 }, { "epoch": 1.9424660712214359, "grad_norm": 13.43832227814687, "learning_rate": 2e-06, "loss": 0.209, "step": 8373 }, { "epoch": 1.9426980628697366, "grad_norm": 11.963382454873283, "learning_rate": 2e-06, "loss": 0.3074, "step": 8374 }, { "epoch": 1.9429300545180372, "grad_norm": 11.95775311491443, "learning_rate": 2e-06, "loss": 0.223, "step": 8375 }, { "epoch": 1.943162046166338, "grad_norm": 17.428277570749742, "learning_rate": 2e-06, "loss": 0.3112, "step": 8376 }, { "epoch": 1.9433940378146386, "grad_norm": 15.230805680496852, "learning_rate": 2e-06, "loss": 0.2687, "step": 8377 }, { "epoch": 1.9436260294629393, "grad_norm": 11.831562166444332, "learning_rate": 2e-06, "loss": 0.2156, "step": 8378 }, { "epoch": 1.94385802111124, "grad_norm": 11.559861634564646, "learning_rate": 2e-06, "loss": 0.205, "step": 8379 }, { "epoch": 1.9440900127595406, "grad_norm": 20.769356547787247, "learning_rate": 2e-06, "loss": 0.2047, "step": 8380 }, { "epoch": 1.9443220044078413, "grad_norm": 14.79081337535743, "learning_rate": 2e-06, "loss": 0.3526, "step": 8381 }, { "epoch": 1.944553996056142, "grad_norm": 26.84448752847015, "learning_rate": 2e-06, "loss": 0.2859, "step": 8382 }, { "epoch": 1.9447859877044427, "grad_norm": 24.08944134225375, "learning_rate": 2e-06, "loss": 0.2332, "step": 8383 }, { "epoch": 1.9450179793527433, "grad_norm": 15.492793474505326, "learning_rate": 2e-06, "loss": 0.2011, "step": 8384 }, { "epoch": 1.945249971001044, "grad_norm": 13.641540417777914, "learning_rate": 2e-06, "loss": 0.2306, "step": 8385 }, { "epoch": 1.9454819626493447, "grad_norm": 10.80299252573023, "learning_rate": 2e-06, "loss": 0.1511, "step": 8386 }, { "epoch": 1.9457139542976454, "grad_norm": 13.673854162605489, "learning_rate": 2e-06, "loss": 0.2412, "step": 8387 }, { "epoch": 1.945945945945946, "grad_norm": 10.380498854463934, "learning_rate": 2e-06, "loss": 0.1457, "step": 8388 }, { "epoch": 1.9461779375942467, "grad_norm": 22.70279296127693, "learning_rate": 2e-06, "loss": 0.2985, "step": 8389 }, { "epoch": 1.9464099292425474, "grad_norm": 10.8828809073517, "learning_rate": 2e-06, "loss": 0.2901, "step": 8390 }, { "epoch": 1.946641920890848, "grad_norm": 12.740824574439682, "learning_rate": 2e-06, "loss": 0.2331, "step": 8391 }, { "epoch": 1.9468739125391485, "grad_norm": 10.986567109093164, "learning_rate": 2e-06, "loss": 0.2182, "step": 8392 }, { "epoch": 1.9471059041874492, "grad_norm": 12.355360192244024, "learning_rate": 2e-06, "loss": 0.3446, "step": 8393 }, { "epoch": 1.94733789583575, "grad_norm": 21.81122795551603, "learning_rate": 2e-06, "loss": 0.3911, "step": 8394 }, { "epoch": 1.9475698874840506, "grad_norm": 10.852009445706015, "learning_rate": 2e-06, "loss": 0.1806, "step": 8395 }, { "epoch": 1.9478018791323513, "grad_norm": 15.858822478149348, "learning_rate": 2e-06, "loss": 0.307, "step": 8396 }, { "epoch": 1.948033870780652, "grad_norm": 7.640288640596409, "learning_rate": 2e-06, "loss": 0.2044, "step": 8397 }, { "epoch": 1.9482658624289526, "grad_norm": 12.081191805075523, "learning_rate": 2e-06, "loss": 0.2311, "step": 8398 }, { "epoch": 1.948497854077253, "grad_norm": 14.423207635176416, "learning_rate": 2e-06, "loss": 0.2262, "step": 8399 }, { "epoch": 1.9487298457255537, "grad_norm": 11.753273624897368, "learning_rate": 2e-06, "loss": 0.1815, "step": 8400 }, { "epoch": 1.9489618373738544, "grad_norm": 14.569786005421452, "learning_rate": 2e-06, "loss": 0.1847, "step": 8401 }, { "epoch": 1.949193829022155, "grad_norm": 12.004180040477387, "learning_rate": 2e-06, "loss": 0.2279, "step": 8402 }, { "epoch": 1.9494258206704558, "grad_norm": 11.534062119713278, "learning_rate": 2e-06, "loss": 0.2343, "step": 8403 }, { "epoch": 1.9496578123187565, "grad_norm": 16.491537593130033, "learning_rate": 2e-06, "loss": 0.4219, "step": 8404 }, { "epoch": 1.9498898039670571, "grad_norm": 20.454712496719754, "learning_rate": 2e-06, "loss": 0.3419, "step": 8405 }, { "epoch": 1.9501217956153578, "grad_norm": 14.254428634177303, "learning_rate": 2e-06, "loss": 0.229, "step": 8406 }, { "epoch": 1.9503537872636585, "grad_norm": 17.131492337479326, "learning_rate": 2e-06, "loss": 0.2399, "step": 8407 }, { "epoch": 1.9505857789119592, "grad_norm": 10.012126423335909, "learning_rate": 2e-06, "loss": 0.176, "step": 8408 }, { "epoch": 1.9508177705602598, "grad_norm": 15.887064355909393, "learning_rate": 2e-06, "loss": 0.2605, "step": 8409 }, { "epoch": 1.9510497622085605, "grad_norm": 18.013675269362956, "learning_rate": 2e-06, "loss": 0.2112, "step": 8410 }, { "epoch": 1.9512817538568612, "grad_norm": 16.777442020004855, "learning_rate": 2e-06, "loss": 0.2361, "step": 8411 }, { "epoch": 1.9515137455051619, "grad_norm": 14.27637380350311, "learning_rate": 2e-06, "loss": 0.1877, "step": 8412 }, { "epoch": 1.9517457371534626, "grad_norm": 5.247172375194371, "learning_rate": 2e-06, "loss": 0.1402, "step": 8413 }, { "epoch": 1.9519777288017632, "grad_norm": 12.012920149131409, "learning_rate": 2e-06, "loss": 0.2541, "step": 8414 }, { "epoch": 1.952209720450064, "grad_norm": 8.75045047925571, "learning_rate": 2e-06, "loss": 0.2358, "step": 8415 }, { "epoch": 1.9524417120983646, "grad_norm": 11.665687192552866, "learning_rate": 2e-06, "loss": 0.2806, "step": 8416 }, { "epoch": 1.9526737037466653, "grad_norm": 11.848604742375645, "learning_rate": 2e-06, "loss": 0.2743, "step": 8417 }, { "epoch": 1.952905695394966, "grad_norm": 7.23663640660394, "learning_rate": 2e-06, "loss": 0.1936, "step": 8418 }, { "epoch": 1.9531376870432664, "grad_norm": 12.568935820789614, "learning_rate": 2e-06, "loss": 0.3088, "step": 8419 }, { "epoch": 1.953369678691567, "grad_norm": 8.327932198506467, "learning_rate": 2e-06, "loss": 0.2023, "step": 8420 }, { "epoch": 1.9536016703398678, "grad_norm": 7.20468914494589, "learning_rate": 2e-06, "loss": 0.164, "step": 8421 }, { "epoch": 1.9538336619881684, "grad_norm": 11.390215281587516, "learning_rate": 2e-06, "loss": 0.1818, "step": 8422 }, { "epoch": 1.9540656536364691, "grad_norm": 7.102489376796252, "learning_rate": 2e-06, "loss": 0.1231, "step": 8423 }, { "epoch": 1.9542976452847698, "grad_norm": 15.509530220768998, "learning_rate": 2e-06, "loss": 0.2076, "step": 8424 }, { "epoch": 1.9545296369330702, "grad_norm": 16.151138514300136, "learning_rate": 2e-06, "loss": 0.3305, "step": 8425 }, { "epoch": 1.954761628581371, "grad_norm": 8.82614805592662, "learning_rate": 2e-06, "loss": 0.1806, "step": 8426 }, { "epoch": 1.9549936202296716, "grad_norm": 20.327694912149823, "learning_rate": 2e-06, "loss": 0.3804, "step": 8427 }, { "epoch": 1.9552256118779723, "grad_norm": 13.419014003838033, "learning_rate": 2e-06, "loss": 0.25, "step": 8428 }, { "epoch": 1.955457603526273, "grad_norm": 24.40155706745644, "learning_rate": 2e-06, "loss": 0.4561, "step": 8429 }, { "epoch": 1.9556895951745736, "grad_norm": 14.019688656480323, "learning_rate": 2e-06, "loss": 0.1941, "step": 8430 }, { "epoch": 1.9559215868228743, "grad_norm": 12.53301727015699, "learning_rate": 2e-06, "loss": 0.2806, "step": 8431 }, { "epoch": 1.956153578471175, "grad_norm": 14.361679193213567, "learning_rate": 2e-06, "loss": 0.3081, "step": 8432 }, { "epoch": 1.9563855701194757, "grad_norm": 26.510147634656878, "learning_rate": 2e-06, "loss": 0.4196, "step": 8433 }, { "epoch": 1.9566175617677763, "grad_norm": 13.513129730273098, "learning_rate": 2e-06, "loss": 0.2441, "step": 8434 }, { "epoch": 1.956849553416077, "grad_norm": 14.561957210594727, "learning_rate": 2e-06, "loss": 0.279, "step": 8435 }, { "epoch": 1.9570815450643777, "grad_norm": 26.445220538157606, "learning_rate": 2e-06, "loss": 0.2931, "step": 8436 }, { "epoch": 1.9573135367126784, "grad_norm": 13.783969377146583, "learning_rate": 2e-06, "loss": 0.2249, "step": 8437 }, { "epoch": 1.957545528360979, "grad_norm": 27.928379555788805, "learning_rate": 2e-06, "loss": 0.4553, "step": 8438 }, { "epoch": 1.9577775200092797, "grad_norm": 17.986716161532808, "learning_rate": 2e-06, "loss": 0.2743, "step": 8439 }, { "epoch": 1.9580095116575804, "grad_norm": 14.087517558232287, "learning_rate": 2e-06, "loss": 0.3162, "step": 8440 }, { "epoch": 1.958241503305881, "grad_norm": 12.892139932310869, "learning_rate": 2e-06, "loss": 0.2236, "step": 8441 }, { "epoch": 1.9584734949541818, "grad_norm": 11.508804773043272, "learning_rate": 2e-06, "loss": 0.2051, "step": 8442 }, { "epoch": 1.9587054866024824, "grad_norm": 22.660170295163322, "learning_rate": 2e-06, "loss": 0.3131, "step": 8443 }, { "epoch": 1.9589374782507831, "grad_norm": 19.71433988724558, "learning_rate": 2e-06, "loss": 0.3338, "step": 8444 }, { "epoch": 1.9591694698990836, "grad_norm": 13.96157075577866, "learning_rate": 2e-06, "loss": 0.2529, "step": 8445 }, { "epoch": 1.9594014615473843, "grad_norm": 19.057960436934465, "learning_rate": 2e-06, "loss": 0.2632, "step": 8446 }, { "epoch": 1.959633453195685, "grad_norm": 11.549432793969682, "learning_rate": 2e-06, "loss": 0.275, "step": 8447 }, { "epoch": 1.9598654448439856, "grad_norm": 21.336870167414542, "learning_rate": 2e-06, "loss": 0.2588, "step": 8448 }, { "epoch": 1.9600974364922863, "grad_norm": 11.302086911448749, "learning_rate": 2e-06, "loss": 0.2778, "step": 8449 }, { "epoch": 1.960329428140587, "grad_norm": 15.244429894793514, "learning_rate": 2e-06, "loss": 0.2052, "step": 8450 }, { "epoch": 1.9605614197888876, "grad_norm": 8.502244674605915, "learning_rate": 2e-06, "loss": 0.2113, "step": 8451 }, { "epoch": 1.960793411437188, "grad_norm": 26.1900071729061, "learning_rate": 2e-06, "loss": 0.3883, "step": 8452 }, { "epoch": 1.9610254030854888, "grad_norm": 14.440024116328479, "learning_rate": 2e-06, "loss": 0.2319, "step": 8453 }, { "epoch": 1.9612573947337895, "grad_norm": 10.80774147458881, "learning_rate": 2e-06, "loss": 0.1606, "step": 8454 }, { "epoch": 1.9614893863820901, "grad_norm": 15.094895749753553, "learning_rate": 2e-06, "loss": 0.2338, "step": 8455 }, { "epoch": 1.9617213780303908, "grad_norm": 15.965997277124025, "learning_rate": 2e-06, "loss": 0.1719, "step": 8456 }, { "epoch": 1.9619533696786915, "grad_norm": 19.69390806864314, "learning_rate": 2e-06, "loss": 0.2806, "step": 8457 }, { "epoch": 1.9621853613269922, "grad_norm": 9.577296538084363, "learning_rate": 2e-06, "loss": 0.2495, "step": 8458 }, { "epoch": 1.9624173529752929, "grad_norm": 26.75682900422842, "learning_rate": 2e-06, "loss": 0.2845, "step": 8459 }, { "epoch": 1.9626493446235935, "grad_norm": 9.733980937932044, "learning_rate": 2e-06, "loss": 0.2037, "step": 8460 }, { "epoch": 1.9628813362718942, "grad_norm": 22.218555882854805, "learning_rate": 2e-06, "loss": 0.3003, "step": 8461 }, { "epoch": 1.9631133279201949, "grad_norm": 7.914416513340461, "learning_rate": 2e-06, "loss": 0.137, "step": 8462 }, { "epoch": 1.9633453195684956, "grad_norm": 10.2317656645032, "learning_rate": 2e-06, "loss": 0.1882, "step": 8463 }, { "epoch": 1.9635773112167962, "grad_norm": 10.376146409636112, "learning_rate": 2e-06, "loss": 0.1614, "step": 8464 }, { "epoch": 1.963809302865097, "grad_norm": 17.186088573393064, "learning_rate": 2e-06, "loss": 0.2096, "step": 8465 }, { "epoch": 1.9640412945133976, "grad_norm": 7.104689584111022, "learning_rate": 2e-06, "loss": 0.1568, "step": 8466 }, { "epoch": 1.9642732861616983, "grad_norm": 21.66118189834683, "learning_rate": 2e-06, "loss": 0.3, "step": 8467 }, { "epoch": 1.964505277809999, "grad_norm": 19.41182700985375, "learning_rate": 2e-06, "loss": 0.3131, "step": 8468 }, { "epoch": 1.9647372694582996, "grad_norm": 9.70519193662155, "learning_rate": 2e-06, "loss": 0.3182, "step": 8469 }, { "epoch": 1.9649692611066003, "grad_norm": 15.098456257992314, "learning_rate": 2e-06, "loss": 0.2525, "step": 8470 }, { "epoch": 1.965201252754901, "grad_norm": 8.604709981076459, "learning_rate": 2e-06, "loss": 0.285, "step": 8471 }, { "epoch": 1.9654332444032014, "grad_norm": 16.637331812879484, "learning_rate": 2e-06, "loss": 0.3224, "step": 8472 }, { "epoch": 1.9656652360515021, "grad_norm": 21.586022385819454, "learning_rate": 2e-06, "loss": 0.4029, "step": 8473 }, { "epoch": 1.9658972276998028, "grad_norm": 7.344537697532603, "learning_rate": 2e-06, "loss": 0.1618, "step": 8474 }, { "epoch": 1.9661292193481035, "grad_norm": 11.941410369839828, "learning_rate": 2e-06, "loss": 0.2365, "step": 8475 }, { "epoch": 1.9663612109964042, "grad_norm": 10.13680904838394, "learning_rate": 2e-06, "loss": 0.1756, "step": 8476 }, { "epoch": 1.9665932026447048, "grad_norm": 9.082398998611101, "learning_rate": 2e-06, "loss": 0.2709, "step": 8477 }, { "epoch": 1.9668251942930053, "grad_norm": 18.60080880891386, "learning_rate": 2e-06, "loss": 0.3045, "step": 8478 }, { "epoch": 1.967057185941306, "grad_norm": 16.36407242270446, "learning_rate": 2e-06, "loss": 0.2604, "step": 8479 }, { "epoch": 1.9672891775896066, "grad_norm": 14.299477048910845, "learning_rate": 2e-06, "loss": 0.2473, "step": 8480 }, { "epoch": 1.9675211692379073, "grad_norm": 16.17852866015562, "learning_rate": 2e-06, "loss": 0.3183, "step": 8481 }, { "epoch": 1.967753160886208, "grad_norm": 19.005085835163797, "learning_rate": 2e-06, "loss": 0.2687, "step": 8482 }, { "epoch": 1.9679851525345087, "grad_norm": 12.339758285163436, "learning_rate": 2e-06, "loss": 0.2092, "step": 8483 }, { "epoch": 1.9682171441828094, "grad_norm": 17.454199640819382, "learning_rate": 2e-06, "loss": 0.2479, "step": 8484 }, { "epoch": 1.96844913583111, "grad_norm": 20.698836453320574, "learning_rate": 2e-06, "loss": 0.405, "step": 8485 }, { "epoch": 1.9686811274794107, "grad_norm": 6.671319954473673, "learning_rate": 2e-06, "loss": 0.2192, "step": 8486 }, { "epoch": 1.9689131191277114, "grad_norm": 13.887452692343361, "learning_rate": 2e-06, "loss": 0.2378, "step": 8487 }, { "epoch": 1.969145110776012, "grad_norm": 15.72369330544798, "learning_rate": 2e-06, "loss": 0.2271, "step": 8488 }, { "epoch": 1.9693771024243127, "grad_norm": 22.1359744407644, "learning_rate": 2e-06, "loss": 0.3123, "step": 8489 }, { "epoch": 1.9696090940726134, "grad_norm": 9.595794165045705, "learning_rate": 2e-06, "loss": 0.2045, "step": 8490 }, { "epoch": 1.969841085720914, "grad_norm": 8.637735599591288, "learning_rate": 2e-06, "loss": 0.1908, "step": 8491 }, { "epoch": 1.9700730773692148, "grad_norm": 18.339224502699604, "learning_rate": 2e-06, "loss": 0.2914, "step": 8492 }, { "epoch": 1.9703050690175155, "grad_norm": 13.889624100787907, "learning_rate": 2e-06, "loss": 0.2646, "step": 8493 }, { "epoch": 1.9705370606658161, "grad_norm": 14.503935064188033, "learning_rate": 2e-06, "loss": 0.2686, "step": 8494 }, { "epoch": 1.9707690523141168, "grad_norm": 7.739563925770556, "learning_rate": 2e-06, "loss": 0.1703, "step": 8495 }, { "epoch": 1.9710010439624175, "grad_norm": 10.551688065569241, "learning_rate": 2e-06, "loss": 0.3515, "step": 8496 }, { "epoch": 1.9712330356107182, "grad_norm": 9.902050304949746, "learning_rate": 2e-06, "loss": 0.2068, "step": 8497 }, { "epoch": 1.9714650272590186, "grad_norm": 6.983275072558769, "learning_rate": 2e-06, "loss": 0.1782, "step": 8498 }, { "epoch": 1.9716970189073193, "grad_norm": 13.785828158122156, "learning_rate": 2e-06, "loss": 0.3464, "step": 8499 }, { "epoch": 1.97192901055562, "grad_norm": 14.076067112016535, "learning_rate": 2e-06, "loss": 0.3078, "step": 8500 }, { "epoch": 1.9721610022039207, "grad_norm": 12.483396942471327, "learning_rate": 2e-06, "loss": 0.2082, "step": 8501 }, { "epoch": 1.9723929938522213, "grad_norm": 12.186631988394545, "learning_rate": 2e-06, "loss": 0.2424, "step": 8502 }, { "epoch": 1.972624985500522, "grad_norm": 9.102218431247547, "learning_rate": 2e-06, "loss": 0.3024, "step": 8503 }, { "epoch": 1.9728569771488227, "grad_norm": 10.886415063131288, "learning_rate": 2e-06, "loss": 0.2534, "step": 8504 }, { "epoch": 1.9730889687971231, "grad_norm": 12.836982023025246, "learning_rate": 2e-06, "loss": 0.2293, "step": 8505 }, { "epoch": 1.9733209604454238, "grad_norm": 7.1802546184489255, "learning_rate": 2e-06, "loss": 0.1422, "step": 8506 }, { "epoch": 1.9735529520937245, "grad_norm": 10.10370150232109, "learning_rate": 2e-06, "loss": 0.2613, "step": 8507 }, { "epoch": 1.9737849437420252, "grad_norm": 12.471189951292699, "learning_rate": 2e-06, "loss": 0.2318, "step": 8508 }, { "epoch": 1.9740169353903259, "grad_norm": 21.52531851333419, "learning_rate": 2e-06, "loss": 0.3975, "step": 8509 }, { "epoch": 1.9742489270386265, "grad_norm": 9.963227113698434, "learning_rate": 2e-06, "loss": 0.2103, "step": 8510 }, { "epoch": 1.9744809186869272, "grad_norm": 11.232264002401344, "learning_rate": 2e-06, "loss": 0.2087, "step": 8511 }, { "epoch": 1.9747129103352279, "grad_norm": 16.55607130186788, "learning_rate": 2e-06, "loss": 0.3419, "step": 8512 }, { "epoch": 1.9749449019835286, "grad_norm": 8.849922497856467, "learning_rate": 2e-06, "loss": 0.224, "step": 8513 }, { "epoch": 1.9751768936318292, "grad_norm": 21.600280522778597, "learning_rate": 2e-06, "loss": 0.2065, "step": 8514 }, { "epoch": 1.97540888528013, "grad_norm": 19.806163133323412, "learning_rate": 2e-06, "loss": 0.3007, "step": 8515 }, { "epoch": 1.9756408769284306, "grad_norm": 20.337850588002727, "learning_rate": 2e-06, "loss": 0.237, "step": 8516 }, { "epoch": 1.9758728685767313, "grad_norm": 8.776756918468648, "learning_rate": 2e-06, "loss": 0.1599, "step": 8517 }, { "epoch": 1.976104860225032, "grad_norm": 11.533070577796943, "learning_rate": 2e-06, "loss": 0.1877, "step": 8518 }, { "epoch": 1.9763368518733326, "grad_norm": 12.84657385589497, "learning_rate": 2e-06, "loss": 0.2746, "step": 8519 }, { "epoch": 1.9765688435216333, "grad_norm": 16.518796669554234, "learning_rate": 2e-06, "loss": 0.295, "step": 8520 }, { "epoch": 1.976800835169934, "grad_norm": 16.963351981037178, "learning_rate": 2e-06, "loss": 0.2811, "step": 8521 }, { "epoch": 1.9770328268182347, "grad_norm": 4.346222300206503, "learning_rate": 2e-06, "loss": 0.1426, "step": 8522 }, { "epoch": 1.9772648184665353, "grad_norm": 11.419250342857609, "learning_rate": 2e-06, "loss": 0.2009, "step": 8523 }, { "epoch": 1.977496810114836, "grad_norm": 18.097650604177662, "learning_rate": 2e-06, "loss": 0.3862, "step": 8524 }, { "epoch": 1.9777288017631365, "grad_norm": 20.249542109447514, "learning_rate": 2e-06, "loss": 0.3922, "step": 8525 }, { "epoch": 1.9779607934114372, "grad_norm": 13.871707038549962, "learning_rate": 2e-06, "loss": 0.1997, "step": 8526 }, { "epoch": 1.9781927850597378, "grad_norm": 13.029721724629182, "learning_rate": 2e-06, "loss": 0.1359, "step": 8527 }, { "epoch": 1.9784247767080385, "grad_norm": 8.589532756585148, "learning_rate": 2e-06, "loss": 0.203, "step": 8528 }, { "epoch": 1.9786567683563392, "grad_norm": 19.65133151674858, "learning_rate": 2e-06, "loss": 0.3163, "step": 8529 }, { "epoch": 1.9788887600046399, "grad_norm": 10.528372449802873, "learning_rate": 2e-06, "loss": 0.2023, "step": 8530 }, { "epoch": 1.9791207516529405, "grad_norm": 13.753512180806643, "learning_rate": 2e-06, "loss": 0.2618, "step": 8531 }, { "epoch": 1.979352743301241, "grad_norm": 10.418585440923946, "learning_rate": 2e-06, "loss": 0.2562, "step": 8532 }, { "epoch": 1.9795847349495417, "grad_norm": 9.050020397117102, "learning_rate": 2e-06, "loss": 0.2337, "step": 8533 }, { "epoch": 1.9798167265978424, "grad_norm": 10.006801325208919, "learning_rate": 2e-06, "loss": 0.2385, "step": 8534 }, { "epoch": 1.980048718246143, "grad_norm": 11.336402703687698, "learning_rate": 2e-06, "loss": 0.205, "step": 8535 }, { "epoch": 1.9802807098944437, "grad_norm": 12.825693375037904, "learning_rate": 2e-06, "loss": 0.2171, "step": 8536 }, { "epoch": 1.9805127015427444, "grad_norm": 5.524550556954387, "learning_rate": 2e-06, "loss": 0.1233, "step": 8537 }, { "epoch": 1.980744693191045, "grad_norm": 8.083841633609582, "learning_rate": 2e-06, "loss": 0.1239, "step": 8538 }, { "epoch": 1.9809766848393457, "grad_norm": 11.573832770938761, "learning_rate": 2e-06, "loss": 0.2471, "step": 8539 }, { "epoch": 1.9812086764876464, "grad_norm": 7.392452023260391, "learning_rate": 2e-06, "loss": 0.1823, "step": 8540 }, { "epoch": 1.981440668135947, "grad_norm": 21.98841567744905, "learning_rate": 2e-06, "loss": 0.3394, "step": 8541 }, { "epoch": 1.9816726597842478, "grad_norm": 6.8136512886727445, "learning_rate": 2e-06, "loss": 0.1468, "step": 8542 }, { "epoch": 1.9819046514325485, "grad_norm": 19.584918378180546, "learning_rate": 2e-06, "loss": 0.247, "step": 8543 }, { "epoch": 1.9821366430808491, "grad_norm": 13.017497347406255, "learning_rate": 2e-06, "loss": 0.2514, "step": 8544 }, { "epoch": 1.9823686347291498, "grad_norm": 16.294108863284833, "learning_rate": 2e-06, "loss": 0.2779, "step": 8545 }, { "epoch": 1.9826006263774505, "grad_norm": 8.147068821027043, "learning_rate": 2e-06, "loss": 0.1924, "step": 8546 }, { "epoch": 1.9828326180257512, "grad_norm": 9.830621743145237, "learning_rate": 2e-06, "loss": 0.24, "step": 8547 }, { "epoch": 1.9830646096740518, "grad_norm": 22.846163306008492, "learning_rate": 2e-06, "loss": 0.3014, "step": 8548 }, { "epoch": 1.9832966013223525, "grad_norm": 18.613538314175056, "learning_rate": 2e-06, "loss": 0.4346, "step": 8549 }, { "epoch": 1.9835285929706532, "grad_norm": 13.590506060031638, "learning_rate": 2e-06, "loss": 0.2421, "step": 8550 }, { "epoch": 1.9837605846189539, "grad_norm": 13.04190187877181, "learning_rate": 2e-06, "loss": 0.2047, "step": 8551 }, { "epoch": 1.9839925762672543, "grad_norm": 10.602516907217499, "learning_rate": 2e-06, "loss": 0.2152, "step": 8552 }, { "epoch": 1.984224567915555, "grad_norm": 12.780620813300855, "learning_rate": 2e-06, "loss": 0.2415, "step": 8553 }, { "epoch": 1.9844565595638557, "grad_norm": 12.627158631424301, "learning_rate": 2e-06, "loss": 0.1876, "step": 8554 }, { "epoch": 1.9846885512121564, "grad_norm": 19.562289971581983, "learning_rate": 2e-06, "loss": 0.2719, "step": 8555 }, { "epoch": 1.984920542860457, "grad_norm": 19.225652733982006, "learning_rate": 2e-06, "loss": 0.3401, "step": 8556 }, { "epoch": 1.9851525345087577, "grad_norm": 10.428271595257316, "learning_rate": 2e-06, "loss": 0.2353, "step": 8557 }, { "epoch": 1.9853845261570582, "grad_norm": 9.14187799266162, "learning_rate": 2e-06, "loss": 0.2572, "step": 8558 }, { "epoch": 1.9856165178053589, "grad_norm": 12.648061748666258, "learning_rate": 2e-06, "loss": 0.2418, "step": 8559 }, { "epoch": 1.9858485094536595, "grad_norm": 10.629099645791218, "learning_rate": 2e-06, "loss": 0.235, "step": 8560 }, { "epoch": 1.9860805011019602, "grad_norm": 4.342336370217446, "learning_rate": 2e-06, "loss": 0.1194, "step": 8561 }, { "epoch": 1.986312492750261, "grad_norm": 16.143025239136698, "learning_rate": 2e-06, "loss": 0.2595, "step": 8562 }, { "epoch": 1.9865444843985616, "grad_norm": 9.754089385774627, "learning_rate": 2e-06, "loss": 0.2004, "step": 8563 }, { "epoch": 1.9867764760468622, "grad_norm": 15.841521291649542, "learning_rate": 2e-06, "loss": 0.2853, "step": 8564 }, { "epoch": 1.987008467695163, "grad_norm": 14.760371139310362, "learning_rate": 2e-06, "loss": 0.2199, "step": 8565 }, { "epoch": 1.9872404593434636, "grad_norm": 10.32037228954311, "learning_rate": 2e-06, "loss": 0.2619, "step": 8566 }, { "epoch": 1.9874724509917643, "grad_norm": 11.920341608033274, "learning_rate": 2e-06, "loss": 0.1909, "step": 8567 }, { "epoch": 1.987704442640065, "grad_norm": 11.357525599468852, "learning_rate": 2e-06, "loss": 0.172, "step": 8568 }, { "epoch": 1.9879364342883656, "grad_norm": 12.124003669396531, "learning_rate": 2e-06, "loss": 0.229, "step": 8569 }, { "epoch": 1.9881684259366663, "grad_norm": 19.62315796808522, "learning_rate": 2e-06, "loss": 0.2791, "step": 8570 }, { "epoch": 1.988400417584967, "grad_norm": 11.750587502060243, "learning_rate": 2e-06, "loss": 0.2483, "step": 8571 }, { "epoch": 1.9886324092332677, "grad_norm": 15.404513427589219, "learning_rate": 2e-06, "loss": 0.2913, "step": 8572 }, { "epoch": 1.9888644008815684, "grad_norm": 5.384890915201693, "learning_rate": 2e-06, "loss": 0.149, "step": 8573 }, { "epoch": 1.989096392529869, "grad_norm": 11.17698439178024, "learning_rate": 2e-06, "loss": 0.236, "step": 8574 }, { "epoch": 1.9893283841781697, "grad_norm": 25.955564600183294, "learning_rate": 2e-06, "loss": 0.3429, "step": 8575 }, { "epoch": 1.9895603758264704, "grad_norm": 9.573922131436106, "learning_rate": 2e-06, "loss": 0.2218, "step": 8576 }, { "epoch": 1.989792367474771, "grad_norm": 10.128885271798515, "learning_rate": 2e-06, "loss": 0.2154, "step": 8577 }, { "epoch": 1.9900243591230715, "grad_norm": 12.827613625373983, "learning_rate": 2e-06, "loss": 0.2551, "step": 8578 }, { "epoch": 1.9902563507713722, "grad_norm": 12.181597847453952, "learning_rate": 2e-06, "loss": 0.2036, "step": 8579 }, { "epoch": 1.9904883424196729, "grad_norm": 11.204479881120816, "learning_rate": 2e-06, "loss": 0.2682, "step": 8580 }, { "epoch": 1.9907203340679736, "grad_norm": 14.7149776713658, "learning_rate": 2e-06, "loss": 0.1934, "step": 8581 }, { "epoch": 1.9909523257162742, "grad_norm": 14.559952872618316, "learning_rate": 2e-06, "loss": 0.3198, "step": 8582 }, { "epoch": 1.991184317364575, "grad_norm": 17.65883394269422, "learning_rate": 2e-06, "loss": 0.2938, "step": 8583 }, { "epoch": 1.9914163090128756, "grad_norm": 11.148759808439678, "learning_rate": 2e-06, "loss": 0.2647, "step": 8584 }, { "epoch": 1.991648300661176, "grad_norm": 11.688280097465956, "learning_rate": 2e-06, "loss": 0.2514, "step": 8585 }, { "epoch": 1.9918802923094767, "grad_norm": 12.486888404537797, "learning_rate": 2e-06, "loss": 0.2763, "step": 8586 }, { "epoch": 1.9921122839577774, "grad_norm": 11.592115694074757, "learning_rate": 2e-06, "loss": 0.2743, "step": 8587 }, { "epoch": 1.992344275606078, "grad_norm": 11.680842392664022, "learning_rate": 2e-06, "loss": 0.2208, "step": 8588 }, { "epoch": 1.9925762672543788, "grad_norm": 18.036521977119452, "learning_rate": 2e-06, "loss": 0.3916, "step": 8589 }, { "epoch": 1.9928082589026794, "grad_norm": 8.003475013553198, "learning_rate": 2e-06, "loss": 0.1752, "step": 8590 }, { "epoch": 1.99304025055098, "grad_norm": 13.081533947310318, "learning_rate": 2e-06, "loss": 0.2328, "step": 8591 }, { "epoch": 1.9932722421992808, "grad_norm": 12.915462699759964, "learning_rate": 2e-06, "loss": 0.2749, "step": 8592 }, { "epoch": 1.9935042338475815, "grad_norm": 10.856135360597765, "learning_rate": 2e-06, "loss": 0.376, "step": 8593 }, { "epoch": 1.9937362254958821, "grad_norm": 7.591018863066772, "learning_rate": 2e-06, "loss": 0.1334, "step": 8594 }, { "epoch": 1.9939682171441828, "grad_norm": 16.73779734886259, "learning_rate": 2e-06, "loss": 0.3323, "step": 8595 }, { "epoch": 1.9942002087924835, "grad_norm": 17.095465102448927, "learning_rate": 2e-06, "loss": 0.2698, "step": 8596 }, { "epoch": 1.9944322004407842, "grad_norm": 16.201500392677026, "learning_rate": 2e-06, "loss": 0.3091, "step": 8597 }, { "epoch": 1.9946641920890849, "grad_norm": 10.38865379973004, "learning_rate": 2e-06, "loss": 0.2829, "step": 8598 }, { "epoch": 1.9948961837373855, "grad_norm": 9.53782022055871, "learning_rate": 2e-06, "loss": 0.2776, "step": 8599 }, { "epoch": 1.9951281753856862, "grad_norm": 8.811020125502003, "learning_rate": 2e-06, "loss": 0.1423, "step": 8600 }, { "epoch": 1.9953601670339869, "grad_norm": 15.270821714163787, "learning_rate": 2e-06, "loss": 0.2136, "step": 8601 }, { "epoch": 1.9955921586822876, "grad_norm": 18.901918550352107, "learning_rate": 2e-06, "loss": 0.3078, "step": 8602 }, { "epoch": 1.9958241503305882, "grad_norm": 9.205250195423652, "learning_rate": 2e-06, "loss": 0.1244, "step": 8603 }, { "epoch": 1.996056141978889, "grad_norm": 12.846303081127727, "learning_rate": 2e-06, "loss": 0.2795, "step": 8604 }, { "epoch": 1.9962881336271894, "grad_norm": 15.88210580620789, "learning_rate": 2e-06, "loss": 0.2166, "step": 8605 }, { "epoch": 1.99652012527549, "grad_norm": 10.421273958167372, "learning_rate": 2e-06, "loss": 0.2085, "step": 8606 }, { "epoch": 1.9967521169237907, "grad_norm": 11.151099582393117, "learning_rate": 2e-06, "loss": 0.3363, "step": 8607 }, { "epoch": 1.9969841085720914, "grad_norm": 14.37049931427536, "learning_rate": 2e-06, "loss": 0.2388, "step": 8608 }, { "epoch": 1.997216100220392, "grad_norm": 14.278416156594247, "learning_rate": 2e-06, "loss": 0.1511, "step": 8609 }, { "epoch": 1.9974480918686928, "grad_norm": 12.665305808679461, "learning_rate": 2e-06, "loss": 0.1878, "step": 8610 }, { "epoch": 1.9976800835169934, "grad_norm": 15.377701007457812, "learning_rate": 2e-06, "loss": 0.3327, "step": 8611 }, { "epoch": 1.997912075165294, "grad_norm": 8.160117309260238, "learning_rate": 2e-06, "loss": 0.1728, "step": 8612 }, { "epoch": 1.9981440668135946, "grad_norm": 10.501757269939331, "learning_rate": 2e-06, "loss": 0.218, "step": 8613 }, { "epoch": 1.9983760584618953, "grad_norm": 12.866025684053145, "learning_rate": 2e-06, "loss": 0.3282, "step": 8614 }, { "epoch": 1.998608050110196, "grad_norm": 13.778437356571668, "learning_rate": 2e-06, "loss": 0.2975, "step": 8615 }, { "epoch": 1.9988400417584966, "grad_norm": 13.683219081874212, "learning_rate": 2e-06, "loss": 0.2543, "step": 8616 }, { "epoch": 1.9990720334067973, "grad_norm": 7.564723850571399, "learning_rate": 2e-06, "loss": 0.1385, "step": 8617 }, { "epoch": 1.999304025055098, "grad_norm": 11.625230524676542, "learning_rate": 2e-06, "loss": 0.3084, "step": 8618 }, { "epoch": 1.9995360167033986, "grad_norm": 17.15768884295696, "learning_rate": 2e-06, "loss": 0.3042, "step": 8619 }, { "epoch": 1.9997680083516993, "grad_norm": 9.186683071124046, "learning_rate": 2e-06, "loss": 0.1746, "step": 8620 }, { "epoch": 1.9997680083516993, "step": 8620, "total_flos": 2997308663627776.0, "train_loss": 0.283832880054231, "train_runtime": 20710.279, "train_samples_per_second": 6.66, "train_steps_per_second": 0.416 } ], "logging_steps": 1.0, "max_steps": 8620, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2997308663627776.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }