{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4339, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023046784973496196, "grad_norm": 49.091495513916016, "learning_rate": 7.633587786259542e-07, "loss": 2.2176, "step": 1 }, { "epoch": 0.0004609356994699239, "grad_norm": 46.45875930786133, "learning_rate": 1.5267175572519084e-06, "loss": 2.1992, "step": 2 }, { "epoch": 0.0006914035492048859, "grad_norm": 47.82147979736328, "learning_rate": 2.2900763358778625e-06, "loss": 2.2094, "step": 3 }, { "epoch": 0.0009218713989398478, "grad_norm": 44.642494201660156, "learning_rate": 3.053435114503817e-06, "loss": 2.1912, "step": 4 }, { "epoch": 0.00115233924867481, "grad_norm": 35.80204772949219, "learning_rate": 3.816793893129772e-06, "loss": 2.0999, "step": 5 }, { "epoch": 0.0013828070984097719, "grad_norm": 30.77144432067871, "learning_rate": 4.580152671755725e-06, "loss": 2.0554, "step": 6 }, { "epoch": 0.0016132749481447338, "grad_norm": 16.0904483795166, "learning_rate": 5.343511450381679e-06, "loss": 1.9752, "step": 7 }, { "epoch": 0.0018437427978796957, "grad_norm": 21.500225067138672, "learning_rate": 6.106870229007634e-06, "loss": 1.8799, "step": 8 }, { "epoch": 0.0020742106476146576, "grad_norm": 25.848834991455078, "learning_rate": 6.870229007633589e-06, "loss": 2.0303, "step": 9 }, { "epoch": 0.00230467849734962, "grad_norm": 21.812026977539062, "learning_rate": 7.633587786259543e-06, "loss": 1.9578, "step": 10 }, { "epoch": 0.002535146347084582, "grad_norm": 29.04991912841797, "learning_rate": 8.396946564885497e-06, "loss": 1.7629, "step": 11 }, { "epoch": 0.0027656141968195437, "grad_norm": 6.3423566818237305, "learning_rate": 9.16030534351145e-06, "loss": 1.6577, "step": 12 }, { "epoch": 0.0029960820465545056, "grad_norm": 37.8600959777832, "learning_rate": 9.923664122137405e-06, "loss": 1.7216, "step": 13 }, { "epoch": 0.0032265498962894676, "grad_norm": 13.13255500793457, "learning_rate": 1.0687022900763359e-05, "loss": 1.6066, "step": 14 }, { "epoch": 0.0034570177460244295, "grad_norm": 5.612797260284424, "learning_rate": 1.1450381679389314e-05, "loss": 1.5293, "step": 15 }, { "epoch": 0.0036874855957593914, "grad_norm": 4.661881923675537, "learning_rate": 1.2213740458015267e-05, "loss": 1.5133, "step": 16 }, { "epoch": 0.003917953445494353, "grad_norm": 2.878075361251831, "learning_rate": 1.2977099236641221e-05, "loss": 1.4992, "step": 17 }, { "epoch": 0.004148421295229315, "grad_norm": 2.4321236610412598, "learning_rate": 1.3740458015267178e-05, "loss": 1.4797, "step": 18 }, { "epoch": 0.004378889144964277, "grad_norm": 2.1403255462646484, "learning_rate": 1.450381679389313e-05, "loss": 1.4561, "step": 19 }, { "epoch": 0.00460935699469924, "grad_norm": 1.831888198852539, "learning_rate": 1.5267175572519086e-05, "loss": 1.4504, "step": 20 }, { "epoch": 0.004839824844434202, "grad_norm": 2.384835958480835, "learning_rate": 1.6030534351145038e-05, "loss": 1.4364, "step": 21 }, { "epoch": 0.005070292694169164, "grad_norm": 1.8255668878555298, "learning_rate": 1.6793893129770993e-05, "loss": 1.4241, "step": 22 }, { "epoch": 0.005300760543904126, "grad_norm": 20.282546997070312, "learning_rate": 1.7557251908396945e-05, "loss": 1.4204, "step": 23 }, { "epoch": 0.0055312283936390875, "grad_norm": 1.2769057750701904, "learning_rate": 1.83206106870229e-05, "loss": 1.4212, "step": 24 }, { "epoch": 0.005761696243374049, "grad_norm": 1.2663878202438354, "learning_rate": 1.9083969465648855e-05, "loss": 1.3945, "step": 25 }, { "epoch": 0.005992164093109011, "grad_norm": 1.3147037029266357, "learning_rate": 1.984732824427481e-05, "loss": 1.3976, "step": 26 }, { "epoch": 0.006222631942843973, "grad_norm": 1.3301242589950562, "learning_rate": 2.0610687022900766e-05, "loss": 1.3872, "step": 27 }, { "epoch": 0.006453099792578935, "grad_norm": 1.579620599746704, "learning_rate": 2.1374045801526718e-05, "loss": 1.3758, "step": 28 }, { "epoch": 0.006683567642313897, "grad_norm": 1.2853105068206787, "learning_rate": 2.2137404580152673e-05, "loss": 1.3719, "step": 29 }, { "epoch": 0.006914035492048859, "grad_norm": 1.264007329940796, "learning_rate": 2.2900763358778628e-05, "loss": 1.3545, "step": 30 }, { "epoch": 0.007144503341783821, "grad_norm": 1.1497324705123901, "learning_rate": 2.3664122137404583e-05, "loss": 1.3492, "step": 31 }, { "epoch": 0.007374971191518783, "grad_norm": 0.9906431436538696, "learning_rate": 2.4427480916030535e-05, "loss": 1.3537, "step": 32 }, { "epoch": 0.0076054390412537455, "grad_norm": 0.7753263115882874, "learning_rate": 2.5190839694656487e-05, "loss": 1.3452, "step": 33 }, { "epoch": 0.007835906890988707, "grad_norm": 0.7904003858566284, "learning_rate": 2.5954198473282442e-05, "loss": 1.3312, "step": 34 }, { "epoch": 0.00806637474072367, "grad_norm": 0.826572597026825, "learning_rate": 2.6717557251908397e-05, "loss": 1.3327, "step": 35 }, { "epoch": 0.00829684259045863, "grad_norm": 0.7653324007987976, "learning_rate": 2.7480916030534355e-05, "loss": 1.3333, "step": 36 }, { "epoch": 0.008527310440193593, "grad_norm": 0.6942451000213623, "learning_rate": 2.824427480916031e-05, "loss": 1.3149, "step": 37 }, { "epoch": 0.008757778289928554, "grad_norm": 0.6542969942092896, "learning_rate": 2.900763358778626e-05, "loss": 1.3153, "step": 38 }, { "epoch": 0.008988246139663517, "grad_norm": 0.619317889213562, "learning_rate": 2.9770992366412214e-05, "loss": 1.3035, "step": 39 }, { "epoch": 0.00921871398939848, "grad_norm": 0.5745211839675903, "learning_rate": 3.053435114503817e-05, "loss": 1.303, "step": 40 }, { "epoch": 0.00944918183913344, "grad_norm": 0.5202656388282776, "learning_rate": 3.129770992366413e-05, "loss": 1.2884, "step": 41 }, { "epoch": 0.009679649688868404, "grad_norm": 0.5099622011184692, "learning_rate": 3.2061068702290076e-05, "loss": 1.2864, "step": 42 }, { "epoch": 0.009910117538603365, "grad_norm": 0.5038639307022095, "learning_rate": 3.282442748091603e-05, "loss": 1.2713, "step": 43 }, { "epoch": 0.010140585388338327, "grad_norm": 0.5165843963623047, "learning_rate": 3.358778625954199e-05, "loss": 1.2843, "step": 44 }, { "epoch": 0.010371053238073288, "grad_norm": 0.4482625424861908, "learning_rate": 3.435114503816794e-05, "loss": 1.2722, "step": 45 }, { "epoch": 0.010601521087808251, "grad_norm": 0.4263209402561188, "learning_rate": 3.511450381679389e-05, "loss": 1.2686, "step": 46 }, { "epoch": 0.010831988937543212, "grad_norm": 0.4388543367385864, "learning_rate": 3.5877862595419845e-05, "loss": 1.2519, "step": 47 }, { "epoch": 0.011062456787278175, "grad_norm": 0.40465718507766724, "learning_rate": 3.66412213740458e-05, "loss": 1.2509, "step": 48 }, { "epoch": 0.011292924637013136, "grad_norm": 0.4082145094871521, "learning_rate": 3.7404580152671756e-05, "loss": 1.2527, "step": 49 }, { "epoch": 0.011523392486748099, "grad_norm": 0.40573209524154663, "learning_rate": 3.816793893129771e-05, "loss": 1.2557, "step": 50 }, { "epoch": 0.01175386033648306, "grad_norm": 0.39484360814094543, "learning_rate": 3.8931297709923666e-05, "loss": 1.2488, "step": 51 }, { "epoch": 0.011984328186218023, "grad_norm": 0.32370519638061523, "learning_rate": 3.969465648854962e-05, "loss": 1.2396, "step": 52 }, { "epoch": 0.012214796035952985, "grad_norm": 0.36196169257164, "learning_rate": 4.0458015267175576e-05, "loss": 1.2454, "step": 53 }, { "epoch": 0.012445263885687946, "grad_norm": 0.36752983927726746, "learning_rate": 4.122137404580153e-05, "loss": 1.2354, "step": 54 }, { "epoch": 0.01267573173542291, "grad_norm": 0.312123566865921, "learning_rate": 4.198473282442748e-05, "loss": 1.2354, "step": 55 }, { "epoch": 0.01290619958515787, "grad_norm": 0.30579739809036255, "learning_rate": 4.2748091603053435e-05, "loss": 1.2225, "step": 56 }, { "epoch": 0.013136667434892833, "grad_norm": 0.3238474428653717, "learning_rate": 4.351145038167939e-05, "loss": 1.233, "step": 57 }, { "epoch": 0.013367135284627794, "grad_norm": 0.32208338379859924, "learning_rate": 4.4274809160305345e-05, "loss": 1.2306, "step": 58 }, { "epoch": 0.013597603134362757, "grad_norm": 0.3156214654445648, "learning_rate": 4.5038167938931294e-05, "loss": 1.22, "step": 59 }, { "epoch": 0.013828070984097718, "grad_norm": 0.27707698941230774, "learning_rate": 4.5801526717557256e-05, "loss": 1.2219, "step": 60 }, { "epoch": 0.01405853883383268, "grad_norm": 0.29033875465393066, "learning_rate": 4.656488549618321e-05, "loss": 1.214, "step": 61 }, { "epoch": 0.014289006683567642, "grad_norm": 0.316847562789917, "learning_rate": 4.7328244274809166e-05, "loss": 1.2227, "step": 62 }, { "epoch": 0.014519474533302604, "grad_norm": 0.2887651324272156, "learning_rate": 4.809160305343512e-05, "loss": 1.2157, "step": 63 }, { "epoch": 0.014749942383037565, "grad_norm": 0.36876314878463745, "learning_rate": 4.885496183206107e-05, "loss": 1.1988, "step": 64 }, { "epoch": 0.014980410232772528, "grad_norm": 0.2880174517631531, "learning_rate": 4.9618320610687025e-05, "loss": 1.2203, "step": 65 }, { "epoch": 0.015210878082507491, "grad_norm": 0.2820625603199005, "learning_rate": 5.038167938931297e-05, "loss": 1.2109, "step": 66 }, { "epoch": 0.015441345932242452, "grad_norm": 0.27386295795440674, "learning_rate": 5.114503816793893e-05, "loss": 1.2097, "step": 67 }, { "epoch": 0.015671813781977413, "grad_norm": 0.26974838972091675, "learning_rate": 5.1908396946564884e-05, "loss": 1.205, "step": 68 }, { "epoch": 0.015902281631712378, "grad_norm": 0.2931689918041229, "learning_rate": 5.267175572519084e-05, "loss": 1.2097, "step": 69 }, { "epoch": 0.01613274948144734, "grad_norm": 0.2843622863292694, "learning_rate": 5.3435114503816794e-05, "loss": 1.1955, "step": 70 }, { "epoch": 0.0163632173311823, "grad_norm": 0.28659501671791077, "learning_rate": 5.419847328244275e-05, "loss": 1.1836, "step": 71 }, { "epoch": 0.01659368518091726, "grad_norm": 0.2758232355117798, "learning_rate": 5.496183206106871e-05, "loss": 1.2005, "step": 72 }, { "epoch": 0.016824153030652225, "grad_norm": 0.37796103954315186, "learning_rate": 5.5725190839694666e-05, "loss": 1.1996, "step": 73 }, { "epoch": 0.017054620880387186, "grad_norm": 0.6489807963371277, "learning_rate": 5.648854961832062e-05, "loss": 1.1854, "step": 74 }, { "epoch": 0.017285088730122147, "grad_norm": 0.9581599831581116, "learning_rate": 5.725190839694656e-05, "loss": 1.1948, "step": 75 }, { "epoch": 0.01751555657985711, "grad_norm": 0.6904142498970032, "learning_rate": 5.801526717557252e-05, "loss": 1.1835, "step": 76 }, { "epoch": 0.017746024429592073, "grad_norm": 0.546022355556488, "learning_rate": 5.877862595419847e-05, "loss": 1.1789, "step": 77 }, { "epoch": 0.017976492279327034, "grad_norm": 0.7557573914527893, "learning_rate": 5.954198473282443e-05, "loss": 1.1842, "step": 78 }, { "epoch": 0.018206960129061995, "grad_norm": 0.40329957008361816, "learning_rate": 6.0305343511450384e-05, "loss": 1.179, "step": 79 }, { "epoch": 0.01843742797879696, "grad_norm": 0.5815610885620117, "learning_rate": 6.106870229007635e-05, "loss": 1.173, "step": 80 }, { "epoch": 0.01866789582853192, "grad_norm": 0.5922554135322571, "learning_rate": 6.18320610687023e-05, "loss": 1.1773, "step": 81 }, { "epoch": 0.01889836367826688, "grad_norm": 0.347842276096344, "learning_rate": 6.259541984732826e-05, "loss": 1.1756, "step": 82 }, { "epoch": 0.019128831528001843, "grad_norm": 0.8533177971839905, "learning_rate": 6.33587786259542e-05, "loss": 1.1734, "step": 83 }, { "epoch": 0.019359299377736807, "grad_norm": 0.9222707152366638, "learning_rate": 6.412213740458015e-05, "loss": 1.175, "step": 84 }, { "epoch": 0.019589767227471768, "grad_norm": 0.4554316997528076, "learning_rate": 6.488549618320611e-05, "loss": 1.1612, "step": 85 }, { "epoch": 0.01982023507720673, "grad_norm": 0.6344988346099854, "learning_rate": 6.564885496183206e-05, "loss": 1.1587, "step": 86 }, { "epoch": 0.02005070292694169, "grad_norm": 0.8068343997001648, "learning_rate": 6.641221374045802e-05, "loss": 1.1671, "step": 87 }, { "epoch": 0.020281170776676655, "grad_norm": 0.4366961121559143, "learning_rate": 6.717557251908397e-05, "loss": 1.169, "step": 88 }, { "epoch": 0.020511638626411616, "grad_norm": 0.666015088558197, "learning_rate": 6.793893129770993e-05, "loss": 1.1451, "step": 89 }, { "epoch": 0.020742106476146577, "grad_norm": 0.6575168371200562, "learning_rate": 6.870229007633588e-05, "loss": 1.1572, "step": 90 }, { "epoch": 0.020972574325881538, "grad_norm": 0.4764537513256073, "learning_rate": 6.946564885496184e-05, "loss": 1.1506, "step": 91 }, { "epoch": 0.021203042175616502, "grad_norm": 0.6614679098129272, "learning_rate": 7.022900763358778e-05, "loss": 1.1677, "step": 92 }, { "epoch": 0.021433510025351463, "grad_norm": 0.6387923955917358, "learning_rate": 7.099236641221374e-05, "loss": 1.1541, "step": 93 }, { "epoch": 0.021663977875086424, "grad_norm": 0.5480379462242126, "learning_rate": 7.175572519083969e-05, "loss": 1.1419, "step": 94 }, { "epoch": 0.02189444572482139, "grad_norm": 0.7831230759620667, "learning_rate": 7.251908396946565e-05, "loss": 1.1501, "step": 95 }, { "epoch": 0.02212491357455635, "grad_norm": 0.6071659326553345, "learning_rate": 7.32824427480916e-05, "loss": 1.1424, "step": 96 }, { "epoch": 0.02235538142429131, "grad_norm": 0.4921586513519287, "learning_rate": 7.404580152671756e-05, "loss": 1.1442, "step": 97 }, { "epoch": 0.022585849274026272, "grad_norm": 0.9020814299583435, "learning_rate": 7.480916030534351e-05, "loss": 1.1541, "step": 98 }, { "epoch": 0.022816317123761237, "grad_norm": 0.9073303937911987, "learning_rate": 7.557251908396947e-05, "loss": 1.154, "step": 99 }, { "epoch": 0.023046784973496198, "grad_norm": 0.5485337376594543, "learning_rate": 7.633587786259542e-05, "loss": 1.1372, "step": 100 }, { "epoch": 0.02327725282323116, "grad_norm": 1.202399492263794, "learning_rate": 7.709923664122138e-05, "loss": 1.1339, "step": 101 }, { "epoch": 0.02350772067296612, "grad_norm": 1.6502957344055176, "learning_rate": 7.786259541984733e-05, "loss": 1.1466, "step": 102 }, { "epoch": 0.023738188522701084, "grad_norm": 0.9507301449775696, "learning_rate": 7.862595419847329e-05, "loss": 1.1418, "step": 103 }, { "epoch": 0.023968656372436045, "grad_norm": 1.0010653734207153, "learning_rate": 7.938931297709924e-05, "loss": 1.1503, "step": 104 }, { "epoch": 0.024199124222171006, "grad_norm": 0.6749522089958191, "learning_rate": 8.01526717557252e-05, "loss": 1.1335, "step": 105 }, { "epoch": 0.02442959207190597, "grad_norm": 0.7454925775527954, "learning_rate": 8.091603053435115e-05, "loss": 1.1353, "step": 106 }, { "epoch": 0.024660059921640932, "grad_norm": 0.6050741672515869, "learning_rate": 8.167938931297711e-05, "loss": 1.1396, "step": 107 }, { "epoch": 0.024890527771375893, "grad_norm": 0.8601208925247192, "learning_rate": 8.244274809160306e-05, "loss": 1.135, "step": 108 }, { "epoch": 0.025120995621110854, "grad_norm": 0.6721903085708618, "learning_rate": 8.320610687022902e-05, "loss": 1.1259, "step": 109 }, { "epoch": 0.02535146347084582, "grad_norm": 0.5092166662216187, "learning_rate": 8.396946564885496e-05, "loss": 1.1276, "step": 110 }, { "epoch": 0.02558193132058078, "grad_norm": 0.5667235851287842, "learning_rate": 8.473282442748092e-05, "loss": 1.1262, "step": 111 }, { "epoch": 0.02581239917031574, "grad_norm": 0.6341401934623718, "learning_rate": 8.549618320610687e-05, "loss": 1.1269, "step": 112 }, { "epoch": 0.0260428670200507, "grad_norm": 0.5853081941604614, "learning_rate": 8.625954198473283e-05, "loss": 1.1321, "step": 113 }, { "epoch": 0.026273334869785666, "grad_norm": 0.514251708984375, "learning_rate": 8.702290076335878e-05, "loss": 1.1322, "step": 114 }, { "epoch": 0.026503802719520627, "grad_norm": 0.8147274255752563, "learning_rate": 8.778625954198474e-05, "loss": 1.1153, "step": 115 }, { "epoch": 0.026734270569255588, "grad_norm": 1.1456818580627441, "learning_rate": 8.854961832061069e-05, "loss": 1.1361, "step": 116 }, { "epoch": 0.02696473841899055, "grad_norm": 1.8368473052978516, "learning_rate": 8.931297709923665e-05, "loss": 1.1387, "step": 117 }, { "epoch": 0.027195206268725514, "grad_norm": 2.4498660564422607, "learning_rate": 9.007633587786259e-05, "loss": 1.1505, "step": 118 }, { "epoch": 0.027425674118460475, "grad_norm": 2.034926652908325, "learning_rate": 9.083969465648856e-05, "loss": 1.1443, "step": 119 }, { "epoch": 0.027656141968195436, "grad_norm": 1.2848972082138062, "learning_rate": 9.160305343511451e-05, "loss": 1.1466, "step": 120 }, { "epoch": 0.0278866098179304, "grad_norm": 1.265265703201294, "learning_rate": 9.236641221374047e-05, "loss": 1.1345, "step": 121 }, { "epoch": 0.02811707766766536, "grad_norm": 0.8716845512390137, "learning_rate": 9.312977099236642e-05, "loss": 1.1412, "step": 122 }, { "epoch": 0.028347545517400322, "grad_norm": 0.9397414922714233, "learning_rate": 9.389312977099238e-05, "loss": 1.1358, "step": 123 }, { "epoch": 0.028578013367135283, "grad_norm": 0.9730684757232666, "learning_rate": 9.465648854961833e-05, "loss": 1.1232, "step": 124 }, { "epoch": 0.028808481216870248, "grad_norm": 0.8047937154769897, "learning_rate": 9.541984732824429e-05, "loss": 1.1329, "step": 125 }, { "epoch": 0.02903894906660521, "grad_norm": 0.7371867895126343, "learning_rate": 9.618320610687024e-05, "loss": 1.1292, "step": 126 }, { "epoch": 0.02926941691634017, "grad_norm": 0.9497559666633606, "learning_rate": 9.694656488549618e-05, "loss": 1.1213, "step": 127 }, { "epoch": 0.02949988476607513, "grad_norm": 0.6228593587875366, "learning_rate": 9.770992366412214e-05, "loss": 1.1112, "step": 128 }, { "epoch": 0.029730352615810095, "grad_norm": 0.8247762322425842, "learning_rate": 9.84732824427481e-05, "loss": 1.1187, "step": 129 }, { "epoch": 0.029960820465545056, "grad_norm": 0.6637692451477051, "learning_rate": 9.923664122137405e-05, "loss": 1.1046, "step": 130 }, { "epoch": 0.030191288315280018, "grad_norm": 0.8043597936630249, "learning_rate": 0.0001, "loss": 1.1031, "step": 131 }, { "epoch": 0.030421756165014982, "grad_norm": 0.6135521531105042, "learning_rate": 9.999998606560007e-05, "loss": 1.1086, "step": 132 }, { "epoch": 0.030652224014749943, "grad_norm": 0.5676527619361877, "learning_rate": 9.999994426240797e-05, "loss": 1.0931, "step": 133 }, { "epoch": 0.030882691864484904, "grad_norm": 0.6945146918296814, "learning_rate": 9.999987459044706e-05, "loss": 1.1038, "step": 134 }, { "epoch": 0.031113159714219865, "grad_norm": 0.6486102938652039, "learning_rate": 9.999977704975617e-05, "loss": 1.1031, "step": 135 }, { "epoch": 0.031343627563954826, "grad_norm": 0.7887089848518372, "learning_rate": 9.999965164038963e-05, "loss": 1.1089, "step": 136 }, { "epoch": 0.03157409541368979, "grad_norm": 1.3936797380447388, "learning_rate": 9.999949836241736e-05, "loss": 1.1004, "step": 137 }, { "epoch": 0.031804563263424755, "grad_norm": 2.2348039150238037, "learning_rate": 9.999931721592481e-05, "loss": 1.1181, "step": 138 }, { "epoch": 0.032035031113159716, "grad_norm": 1.54178786277771, "learning_rate": 9.999910820101293e-05, "loss": 1.1246, "step": 139 }, { "epoch": 0.03226549896289468, "grad_norm": 0.7922399640083313, "learning_rate": 9.99988713177982e-05, "loss": 1.1064, "step": 140 }, { "epoch": 0.03249596681262964, "grad_norm": 0.8318815231323242, "learning_rate": 9.999860656641268e-05, "loss": 1.1061, "step": 141 }, { "epoch": 0.0327264346623646, "grad_norm": 0.800605297088623, "learning_rate": 9.999831394700395e-05, "loss": 1.1053, "step": 142 }, { "epoch": 0.03295690251209956, "grad_norm": 0.6274577379226685, "learning_rate": 9.999799345973506e-05, "loss": 1.0969, "step": 143 }, { "epoch": 0.03318737036183452, "grad_norm": 0.7087227702140808, "learning_rate": 9.99976451047847e-05, "loss": 1.0991, "step": 144 }, { "epoch": 0.03341783821156949, "grad_norm": 0.68040931224823, "learning_rate": 9.999726888234698e-05, "loss": 1.0971, "step": 145 }, { "epoch": 0.03364830606130445, "grad_norm": 0.6233592629432678, "learning_rate": 9.999686479263164e-05, "loss": 1.0923, "step": 146 }, { "epoch": 0.03387877391103941, "grad_norm": 0.8178934454917908, "learning_rate": 9.999643283586388e-05, "loss": 1.0971, "step": 147 }, { "epoch": 0.03410924176077437, "grad_norm": 0.6832984089851379, "learning_rate": 9.999597301228448e-05, "loss": 1.0957, "step": 148 }, { "epoch": 0.034339709610509334, "grad_norm": 0.49561241269111633, "learning_rate": 9.999548532214973e-05, "loss": 1.0898, "step": 149 }, { "epoch": 0.034570177460244295, "grad_norm": 0.6173511743545532, "learning_rate": 9.999496976573145e-05, "loss": 1.0912, "step": 150 }, { "epoch": 0.034800645309979256, "grad_norm": 0.5704337358474731, "learning_rate": 9.999442634331703e-05, "loss": 1.0805, "step": 151 }, { "epoch": 0.03503111315971422, "grad_norm": 0.4808043837547302, "learning_rate": 9.999385505520931e-05, "loss": 1.0877, "step": 152 }, { "epoch": 0.035261581009449185, "grad_norm": 0.5703374743461609, "learning_rate": 9.999325590172675e-05, "loss": 1.0754, "step": 153 }, { "epoch": 0.035492048859184146, "grad_norm": 0.7379648089408875, "learning_rate": 9.999262888320329e-05, "loss": 1.0987, "step": 154 }, { "epoch": 0.03572251670891911, "grad_norm": 0.7264831066131592, "learning_rate": 9.999197399998841e-05, "loss": 1.0787, "step": 155 }, { "epoch": 0.03595298455865407, "grad_norm": 0.5585530400276184, "learning_rate": 9.999129125244714e-05, "loss": 1.0797, "step": 156 }, { "epoch": 0.03618345240838903, "grad_norm": 0.8825198411941528, "learning_rate": 9.999058064096002e-05, "loss": 1.0704, "step": 157 }, { "epoch": 0.03641392025812399, "grad_norm": 1.1649545431137085, "learning_rate": 9.998984216592313e-05, "loss": 1.0844, "step": 158 }, { "epoch": 0.03664438810785895, "grad_norm": 1.1949596405029297, "learning_rate": 9.998907582774807e-05, "loss": 1.071, "step": 159 }, { "epoch": 0.03687485595759392, "grad_norm": 1.635244607925415, "learning_rate": 9.998828162686197e-05, "loss": 1.0777, "step": 160 }, { "epoch": 0.03710532380732888, "grad_norm": 1.7061165571212769, "learning_rate": 9.998745956370754e-05, "loss": 1.0744, "step": 161 }, { "epoch": 0.03733579165706384, "grad_norm": 1.4206252098083496, "learning_rate": 9.998660963874294e-05, "loss": 1.0777, "step": 162 }, { "epoch": 0.0375662595067988, "grad_norm": 1.5643310546875, "learning_rate": 9.998573185244192e-05, "loss": 1.0783, "step": 163 }, { "epoch": 0.03779672735653376, "grad_norm": 1.2039772272109985, "learning_rate": 9.998482620529371e-05, "loss": 1.0766, "step": 164 }, { "epoch": 0.038027195206268724, "grad_norm": 0.8104932904243469, "learning_rate": 9.998389269780312e-05, "loss": 1.0693, "step": 165 }, { "epoch": 0.038257663056003685, "grad_norm": 0.8776142597198486, "learning_rate": 9.998293133049046e-05, "loss": 1.0668, "step": 166 }, { "epoch": 0.038488130905738646, "grad_norm": 0.837092936038971, "learning_rate": 9.998194210389157e-05, "loss": 1.061, "step": 167 }, { "epoch": 0.038718598755473614, "grad_norm": 0.8989808559417725, "learning_rate": 9.998092501855782e-05, "loss": 1.0699, "step": 168 }, { "epoch": 0.038949066605208575, "grad_norm": 0.97697913646698, "learning_rate": 9.99798800750561e-05, "loss": 1.0568, "step": 169 }, { "epoch": 0.039179534454943536, "grad_norm": 0.8058460354804993, "learning_rate": 9.997880727396886e-05, "loss": 1.0501, "step": 170 }, { "epoch": 0.0394100023046785, "grad_norm": 0.8557106852531433, "learning_rate": 9.997770661589403e-05, "loss": 1.0567, "step": 171 }, { "epoch": 0.03964047015441346, "grad_norm": 0.794967532157898, "learning_rate": 9.997657810144511e-05, "loss": 1.0555, "step": 172 }, { "epoch": 0.03987093800414842, "grad_norm": 0.7823812961578369, "learning_rate": 9.99754217312511e-05, "loss": 1.0373, "step": 173 }, { "epoch": 0.04010140585388338, "grad_norm": 1.165642261505127, "learning_rate": 9.997423750595651e-05, "loss": 1.0486, "step": 174 }, { "epoch": 0.04033187370361835, "grad_norm": 1.893602967262268, "learning_rate": 9.997302542622144e-05, "loss": 1.0738, "step": 175 }, { "epoch": 0.04056234155335331, "grad_norm": 1.9267841577529907, "learning_rate": 9.997178549272145e-05, "loss": 1.0595, "step": 176 }, { "epoch": 0.04079280940308827, "grad_norm": 1.4017184972763062, "learning_rate": 9.997051770614765e-05, "loss": 1.0589, "step": 177 }, { "epoch": 0.04102327725282323, "grad_norm": 0.9901034235954285, "learning_rate": 9.996922206720667e-05, "loss": 1.0584, "step": 178 }, { "epoch": 0.04125374510255819, "grad_norm": 0.9640342593193054, "learning_rate": 9.996789857662068e-05, "loss": 1.0513, "step": 179 }, { "epoch": 0.041484212952293154, "grad_norm": 0.9942569136619568, "learning_rate": 9.996654723512736e-05, "loss": 1.0478, "step": 180 }, { "epoch": 0.041714680802028115, "grad_norm": 0.9683095812797546, "learning_rate": 9.996516804347991e-05, "loss": 1.0514, "step": 181 }, { "epoch": 0.041945148651763076, "grad_norm": 1.039068341255188, "learning_rate": 9.996376100244704e-05, "loss": 1.0368, "step": 182 }, { "epoch": 0.042175616501498044, "grad_norm": 0.8390737175941467, "learning_rate": 9.996232611281304e-05, "loss": 1.033, "step": 183 }, { "epoch": 0.042406084351233005, "grad_norm": 0.7644326090812683, "learning_rate": 9.996086337537767e-05, "loss": 1.0359, "step": 184 }, { "epoch": 0.042636552200967966, "grad_norm": 0.7521729469299316, "learning_rate": 9.995937279095621e-05, "loss": 1.0259, "step": 185 }, { "epoch": 0.04286702005070293, "grad_norm": 0.7316980361938477, "learning_rate": 9.995785436037947e-05, "loss": 1.0388, "step": 186 }, { "epoch": 0.04309748790043789, "grad_norm": 0.7523071765899658, "learning_rate": 9.995630808449383e-05, "loss": 1.0286, "step": 187 }, { "epoch": 0.04332795575017285, "grad_norm": 0.8001862168312073, "learning_rate": 9.995473396416111e-05, "loss": 1.0319, "step": 188 }, { "epoch": 0.04355842359990781, "grad_norm": 0.859374463558197, "learning_rate": 9.995313200025869e-05, "loss": 1.0244, "step": 189 }, { "epoch": 0.04378889144964278, "grad_norm": 0.9133301973342896, "learning_rate": 9.995150219367946e-05, "loss": 1.0106, "step": 190 }, { "epoch": 0.04401935929937774, "grad_norm": 0.8615682125091553, "learning_rate": 9.994984454533185e-05, "loss": 1.0147, "step": 191 }, { "epoch": 0.0442498271491127, "grad_norm": 0.7992554903030396, "learning_rate": 9.994815905613981e-05, "loss": 1.0163, "step": 192 }, { "epoch": 0.04448029499884766, "grad_norm": 0.8807731866836548, "learning_rate": 9.994644572704275e-05, "loss": 1.0208, "step": 193 }, { "epoch": 0.04471076284858262, "grad_norm": 1.1343860626220703, "learning_rate": 9.994470455899568e-05, "loss": 1.0152, "step": 194 }, { "epoch": 0.04494123069831758, "grad_norm": 1.4376226663589478, "learning_rate": 9.994293555296904e-05, "loss": 1.0261, "step": 195 }, { "epoch": 0.045171698548052544, "grad_norm": 1.5723721981048584, "learning_rate": 9.994113870994888e-05, "loss": 1.0137, "step": 196 }, { "epoch": 0.04540216639778751, "grad_norm": 1.2862370014190674, "learning_rate": 9.993931403093668e-05, "loss": 1.0159, "step": 197 }, { "epoch": 0.04563263424752247, "grad_norm": 1.0813417434692383, "learning_rate": 9.99374615169495e-05, "loss": 0.9976, "step": 198 }, { "epoch": 0.045863102097257434, "grad_norm": 0.7794204354286194, "learning_rate": 9.993558116901985e-05, "loss": 1.0036, "step": 199 }, { "epoch": 0.046093569946992395, "grad_norm": 1.225102186203003, "learning_rate": 9.993367298819583e-05, "loss": 0.9909, "step": 200 }, { "epoch": 0.046324037796727356, "grad_norm": 1.1289259195327759, "learning_rate": 9.9931736975541e-05, "loss": 1.0123, "step": 201 }, { "epoch": 0.04655450564646232, "grad_norm": 0.755088746547699, "learning_rate": 9.992977313213443e-05, "loss": 0.9916, "step": 202 }, { "epoch": 0.04678497349619728, "grad_norm": 0.8576580286026001, "learning_rate": 9.992778145907073e-05, "loss": 0.9914, "step": 203 }, { "epoch": 0.04701544134593224, "grad_norm": 0.8619646430015564, "learning_rate": 9.992576195746003e-05, "loss": 0.9963, "step": 204 }, { "epoch": 0.04724590919566721, "grad_norm": 0.8409770727157593, "learning_rate": 9.992371462842794e-05, "loss": 0.9923, "step": 205 }, { "epoch": 0.04747637704540217, "grad_norm": 0.7922968864440918, "learning_rate": 9.992163947311557e-05, "loss": 0.9859, "step": 206 }, { "epoch": 0.04770684489513713, "grad_norm": 0.7922948002815247, "learning_rate": 9.99195364926796e-05, "loss": 0.9798, "step": 207 }, { "epoch": 0.04793731274487209, "grad_norm": 0.8570684790611267, "learning_rate": 9.991740568829215e-05, "loss": 0.9644, "step": 208 }, { "epoch": 0.04816778059460705, "grad_norm": 0.9860967993736267, "learning_rate": 9.99152470611409e-05, "loss": 0.976, "step": 209 }, { "epoch": 0.04839824844434201, "grad_norm": 1.1492034196853638, "learning_rate": 9.991306061242899e-05, "loss": 0.9746, "step": 210 }, { "epoch": 0.048628716294076973, "grad_norm": 1.3347015380859375, "learning_rate": 9.991084634337511e-05, "loss": 0.973, "step": 211 }, { "epoch": 0.04885918414381194, "grad_norm": 1.097365140914917, "learning_rate": 9.990860425521347e-05, "loss": 0.9814, "step": 212 }, { "epoch": 0.0490896519935469, "grad_norm": 1.006355881690979, "learning_rate": 9.990633434919369e-05, "loss": 0.9726, "step": 213 }, { "epoch": 0.049320119843281864, "grad_norm": 0.8860179781913757, "learning_rate": 9.990403662658104e-05, "loss": 0.973, "step": 214 }, { "epoch": 0.049550587693016825, "grad_norm": 0.8287469148635864, "learning_rate": 9.990171108865614e-05, "loss": 0.9673, "step": 215 }, { "epoch": 0.049781055542751786, "grad_norm": 0.8757283687591553, "learning_rate": 9.989935773671525e-05, "loss": 0.9571, "step": 216 }, { "epoch": 0.05001152339248675, "grad_norm": 0.8432666063308716, "learning_rate": 9.989697657207002e-05, "loss": 0.9543, "step": 217 }, { "epoch": 0.05024199124222171, "grad_norm": 0.8379467725753784, "learning_rate": 9.98945675960477e-05, "loss": 0.9643, "step": 218 }, { "epoch": 0.05047245909195667, "grad_norm": 0.7548062801361084, "learning_rate": 9.989213080999097e-05, "loss": 0.9637, "step": 219 }, { "epoch": 0.05070292694169164, "grad_norm": 0.6869426965713501, "learning_rate": 9.988966621525804e-05, "loss": 0.9411, "step": 220 }, { "epoch": 0.0509333947914266, "grad_norm": 0.8082894086837769, "learning_rate": 9.988717381322262e-05, "loss": 0.9441, "step": 221 }, { "epoch": 0.05116386264116156, "grad_norm": 0.8997180461883545, "learning_rate": 9.988465360527389e-05, "loss": 0.9436, "step": 222 }, { "epoch": 0.05139433049089652, "grad_norm": 0.9218918681144714, "learning_rate": 9.988210559281658e-05, "loss": 0.9616, "step": 223 }, { "epoch": 0.05162479834063148, "grad_norm": 0.8721861839294434, "learning_rate": 9.98795297772709e-05, "loss": 0.9312, "step": 224 }, { "epoch": 0.05185526619036644, "grad_norm": 0.8172184824943542, "learning_rate": 9.987692616007253e-05, "loss": 0.945, "step": 225 }, { "epoch": 0.0520857340401014, "grad_norm": 0.8974547386169434, "learning_rate": 9.987429474267268e-05, "loss": 0.9419, "step": 226 }, { "epoch": 0.05231620188983637, "grad_norm": 1.0265862941741943, "learning_rate": 9.987163552653802e-05, "loss": 0.9443, "step": 227 }, { "epoch": 0.05254666973957133, "grad_norm": 1.184733510017395, "learning_rate": 9.986894851315074e-05, "loss": 0.9374, "step": 228 }, { "epoch": 0.05277713758930629, "grad_norm": 1.3877384662628174, "learning_rate": 9.98662337040085e-05, "loss": 0.9444, "step": 229 }, { "epoch": 0.053007605439041254, "grad_norm": 1.0500129461288452, "learning_rate": 9.98634911006245e-05, "loss": 0.9305, "step": 230 }, { "epoch": 0.053238073288776215, "grad_norm": 0.6418678164482117, "learning_rate": 9.986072070452738e-05, "loss": 0.9261, "step": 231 }, { "epoch": 0.053468541138511176, "grad_norm": 0.9703414440155029, "learning_rate": 9.985792251726131e-05, "loss": 0.9323, "step": 232 }, { "epoch": 0.05369900898824614, "grad_norm": 0.8372150659561157, "learning_rate": 9.985509654038591e-05, "loss": 0.9335, "step": 233 }, { "epoch": 0.0539294768379811, "grad_norm": 0.7052909135818481, "learning_rate": 9.985224277547634e-05, "loss": 0.9088, "step": 234 }, { "epoch": 0.054159944687716066, "grad_norm": 0.7531741261482239, "learning_rate": 9.984936122412319e-05, "loss": 0.9182, "step": 235 }, { "epoch": 0.05439041253745103, "grad_norm": 0.633127748966217, "learning_rate": 9.98464518879326e-05, "loss": 0.923, "step": 236 }, { "epoch": 0.05462088038718599, "grad_norm": 0.5811619758605957, "learning_rate": 9.984351476852613e-05, "loss": 0.9184, "step": 237 }, { "epoch": 0.05485134823692095, "grad_norm": 0.6498633027076721, "learning_rate": 9.984054986754088e-05, "loss": 0.919, "step": 238 }, { "epoch": 0.05508181608665591, "grad_norm": 0.5947354435920715, "learning_rate": 9.98375571866294e-05, "loss": 0.9233, "step": 239 }, { "epoch": 0.05531228393639087, "grad_norm": 0.5955505967140198, "learning_rate": 9.983453672745975e-05, "loss": 0.9212, "step": 240 }, { "epoch": 0.05554275178612583, "grad_norm": 0.571624755859375, "learning_rate": 9.983148849171546e-05, "loss": 0.9166, "step": 241 }, { "epoch": 0.0557732196358608, "grad_norm": 0.6101223230361938, "learning_rate": 9.982841248109555e-05, "loss": 0.912, "step": 242 }, { "epoch": 0.05600368748559576, "grad_norm": 0.6820085644721985, "learning_rate": 9.982530869731451e-05, "loss": 0.9112, "step": 243 }, { "epoch": 0.05623415533533072, "grad_norm": 0.9035933613777161, "learning_rate": 9.982217714210232e-05, "loss": 0.9129, "step": 244 }, { "epoch": 0.056464623185065684, "grad_norm": 1.0833593606948853, "learning_rate": 9.98190178172044e-05, "loss": 0.9062, "step": 245 }, { "epoch": 0.056695091034800645, "grad_norm": 1.1479918956756592, "learning_rate": 9.981583072438173e-05, "loss": 0.9166, "step": 246 }, { "epoch": 0.056925558884535606, "grad_norm": 0.9499214291572571, "learning_rate": 9.981261586541068e-05, "loss": 0.9116, "step": 247 }, { "epoch": 0.05715602673427057, "grad_norm": 0.8736324906349182, "learning_rate": 9.980937324208317e-05, "loss": 0.9076, "step": 248 }, { "epoch": 0.057386494584005535, "grad_norm": 0.8064135313034058, "learning_rate": 9.980610285620654e-05, "loss": 0.9052, "step": 249 }, { "epoch": 0.057616962433740496, "grad_norm": 0.7814821600914001, "learning_rate": 9.980280470960363e-05, "loss": 0.8956, "step": 250 }, { "epoch": 0.05784743028347546, "grad_norm": 0.7428434491157532, "learning_rate": 9.979947880411273e-05, "loss": 0.8941, "step": 251 }, { "epoch": 0.05807789813321042, "grad_norm": 0.6821320652961731, "learning_rate": 9.979612514158765e-05, "loss": 0.8985, "step": 252 }, { "epoch": 0.05830836598294538, "grad_norm": 0.6651344895362854, "learning_rate": 9.979274372389762e-05, "loss": 0.8986, "step": 253 }, { "epoch": 0.05853883383268034, "grad_norm": 0.7630136013031006, "learning_rate": 9.978933455292736e-05, "loss": 0.8916, "step": 254 }, { "epoch": 0.0587693016824153, "grad_norm": 0.801031768321991, "learning_rate": 9.978589763057708e-05, "loss": 0.8947, "step": 255 }, { "epoch": 0.05899976953215026, "grad_norm": 0.7151498794555664, "learning_rate": 9.978243295876242e-05, "loss": 0.8945, "step": 256 }, { "epoch": 0.05923023738188523, "grad_norm": 0.7015817761421204, "learning_rate": 9.97789405394145e-05, "loss": 0.8903, "step": 257 }, { "epoch": 0.05946070523162019, "grad_norm": 0.7685847282409668, "learning_rate": 9.977542037447994e-05, "loss": 0.8959, "step": 258 }, { "epoch": 0.05969117308135515, "grad_norm": 0.8099703192710876, "learning_rate": 9.977187246592076e-05, "loss": 0.8984, "step": 259 }, { "epoch": 0.05992164093109011, "grad_norm": 0.7858747243881226, "learning_rate": 9.97682968157145e-05, "loss": 0.8845, "step": 260 }, { "epoch": 0.060152108780825074, "grad_norm": 0.8646757006645203, "learning_rate": 9.976469342585413e-05, "loss": 0.8776, "step": 261 }, { "epoch": 0.060382576630560035, "grad_norm": 0.8440235257148743, "learning_rate": 9.976106229834812e-05, "loss": 0.8711, "step": 262 }, { "epoch": 0.060613044480294996, "grad_norm": 0.8207287192344666, "learning_rate": 9.975740343522033e-05, "loss": 0.8806, "step": 263 }, { "epoch": 0.060843512330029964, "grad_norm": 0.8601641058921814, "learning_rate": 9.975371683851016e-05, "loss": 0.8794, "step": 264 }, { "epoch": 0.061073980179764925, "grad_norm": 0.8005819916725159, "learning_rate": 9.975000251027242e-05, "loss": 0.889, "step": 265 }, { "epoch": 0.061304448029499886, "grad_norm": 0.7524363994598389, "learning_rate": 9.974626045257738e-05, "loss": 0.8734, "step": 266 }, { "epoch": 0.06153491587923485, "grad_norm": 0.7972283959388733, "learning_rate": 9.974249066751077e-05, "loss": 0.8856, "step": 267 }, { "epoch": 0.06176538372896981, "grad_norm": 0.925719678401947, "learning_rate": 9.973869315717379e-05, "loss": 0.8745, "step": 268 }, { "epoch": 0.06199585157870477, "grad_norm": 0.8908208608627319, "learning_rate": 9.973486792368307e-05, "loss": 0.8676, "step": 269 }, { "epoch": 0.06222631942843973, "grad_norm": 0.832204282283783, "learning_rate": 9.973101496917072e-05, "loss": 0.874, "step": 270 }, { "epoch": 0.06245678727817469, "grad_norm": 0.8755982518196106, "learning_rate": 9.972713429578427e-05, "loss": 0.8564, "step": 271 }, { "epoch": 0.06268725512790965, "grad_norm": 0.8395307660102844, "learning_rate": 9.97232259056867e-05, "loss": 0.8624, "step": 272 }, { "epoch": 0.06291772297764461, "grad_norm": 0.7844563722610474, "learning_rate": 9.97192898010565e-05, "loss": 0.8637, "step": 273 }, { "epoch": 0.06314819082737957, "grad_norm": 0.6658696532249451, "learning_rate": 9.97153259840875e-05, "loss": 0.8695, "step": 274 }, { "epoch": 0.06337865867711455, "grad_norm": 0.7139994502067566, "learning_rate": 9.971133445698908e-05, "loss": 0.8486, "step": 275 }, { "epoch": 0.06360912652684951, "grad_norm": 0.6378993391990662, "learning_rate": 9.970731522198602e-05, "loss": 0.8639, "step": 276 }, { "epoch": 0.06383959437658447, "grad_norm": 0.6299728155136108, "learning_rate": 9.970326828131852e-05, "loss": 0.8541, "step": 277 }, { "epoch": 0.06407006222631943, "grad_norm": 0.619757890701294, "learning_rate": 9.969919363724226e-05, "loss": 0.8506, "step": 278 }, { "epoch": 0.0643005300760544, "grad_norm": 0.575935423374176, "learning_rate": 9.969509129202837e-05, "loss": 0.8599, "step": 279 }, { "epoch": 0.06453099792578935, "grad_norm": 0.5778565406799316, "learning_rate": 9.969096124796335e-05, "loss": 0.8495, "step": 280 }, { "epoch": 0.06476146577552432, "grad_norm": 0.6680006980895996, "learning_rate": 9.968680350734922e-05, "loss": 0.8468, "step": 281 }, { "epoch": 0.06499193362525928, "grad_norm": 0.5861308574676514, "learning_rate": 9.968261807250341e-05, "loss": 0.8444, "step": 282 }, { "epoch": 0.06522240147499424, "grad_norm": 0.5511894226074219, "learning_rate": 9.967840494575879e-05, "loss": 0.852, "step": 283 }, { "epoch": 0.0654528693247292, "grad_norm": 0.6007016897201538, "learning_rate": 9.967416412946362e-05, "loss": 0.8503, "step": 284 }, { "epoch": 0.06568333717446416, "grad_norm": 0.5989518761634827, "learning_rate": 9.966989562598163e-05, "loss": 0.8463, "step": 285 }, { "epoch": 0.06591380502419912, "grad_norm": 0.623166561126709, "learning_rate": 9.966559943769203e-05, "loss": 0.8389, "step": 286 }, { "epoch": 0.06614427287393408, "grad_norm": 0.5255123376846313, "learning_rate": 9.966127556698936e-05, "loss": 0.8486, "step": 287 }, { "epoch": 0.06637474072366904, "grad_norm": 0.6101188063621521, "learning_rate": 9.965692401628368e-05, "loss": 0.8508, "step": 288 }, { "epoch": 0.066605208573404, "grad_norm": 0.6269940733909607, "learning_rate": 9.96525447880004e-05, "loss": 0.8407, "step": 289 }, { "epoch": 0.06683567642313898, "grad_norm": 0.7046296000480652, "learning_rate": 9.964813788458043e-05, "loss": 0.8329, "step": 290 }, { "epoch": 0.06706614427287394, "grad_norm": 0.8176979422569275, "learning_rate": 9.964370330848005e-05, "loss": 0.8347, "step": 291 }, { "epoch": 0.0672966121226089, "grad_norm": 0.8342549800872803, "learning_rate": 9.963924106217102e-05, "loss": 0.836, "step": 292 }, { "epoch": 0.06752707997234386, "grad_norm": 0.8378840684890747, "learning_rate": 9.963475114814045e-05, "loss": 0.8339, "step": 293 }, { "epoch": 0.06775754782207882, "grad_norm": 0.7933089733123779, "learning_rate": 9.963023356889093e-05, "loss": 0.8321, "step": 294 }, { "epoch": 0.06798801567181378, "grad_norm": 0.7538984417915344, "learning_rate": 9.962568832694044e-05, "loss": 0.8395, "step": 295 }, { "epoch": 0.06821848352154875, "grad_norm": 0.6032546162605286, "learning_rate": 9.962111542482241e-05, "loss": 0.8314, "step": 296 }, { "epoch": 0.0684489513712837, "grad_norm": 0.694686770439148, "learning_rate": 9.961651486508564e-05, "loss": 0.826, "step": 297 }, { "epoch": 0.06867941922101867, "grad_norm": 0.7310389876365662, "learning_rate": 9.96118866502944e-05, "loss": 0.8221, "step": 298 }, { "epoch": 0.06890988707075363, "grad_norm": 0.6587698459625244, "learning_rate": 9.960723078302832e-05, "loss": 0.8275, "step": 299 }, { "epoch": 0.06914035492048859, "grad_norm": 0.6249524354934692, "learning_rate": 9.960254726588246e-05, "loss": 0.8329, "step": 300 }, { "epoch": 0.06937082277022355, "grad_norm": 0.6110243797302246, "learning_rate": 9.959783610146733e-05, "loss": 0.8296, "step": 301 }, { "epoch": 0.06960129061995851, "grad_norm": 0.528419017791748, "learning_rate": 9.959309729240882e-05, "loss": 0.825, "step": 302 }, { "epoch": 0.06983175846969347, "grad_norm": 0.47809135913848877, "learning_rate": 9.95883308413482e-05, "loss": 0.8163, "step": 303 }, { "epoch": 0.07006222631942843, "grad_norm": 0.4991075098514557, "learning_rate": 9.95835367509422e-05, "loss": 0.8368, "step": 304 }, { "epoch": 0.07029269416916341, "grad_norm": 0.5650983452796936, "learning_rate": 9.957871502386291e-05, "loss": 0.8194, "step": 305 }, { "epoch": 0.07052316201889837, "grad_norm": 0.4641514718532562, "learning_rate": 9.957386566279788e-05, "loss": 0.8259, "step": 306 }, { "epoch": 0.07075362986863333, "grad_norm": 0.4920820891857147, "learning_rate": 9.956898867044999e-05, "loss": 0.8188, "step": 307 }, { "epoch": 0.07098409771836829, "grad_norm": 0.525382936000824, "learning_rate": 9.956408404953756e-05, "loss": 0.8156, "step": 308 }, { "epoch": 0.07121456556810325, "grad_norm": 0.5120217800140381, "learning_rate": 9.955915180279433e-05, "loss": 0.8213, "step": 309 }, { "epoch": 0.07144503341783821, "grad_norm": 0.448485791683197, "learning_rate": 9.955419193296943e-05, "loss": 0.8191, "step": 310 }, { "epoch": 0.07167550126757317, "grad_norm": 0.5565616488456726, "learning_rate": 9.954920444282732e-05, "loss": 0.8244, "step": 311 }, { "epoch": 0.07190596911730814, "grad_norm": 0.5660645365715027, "learning_rate": 9.954418933514795e-05, "loss": 0.8114, "step": 312 }, { "epoch": 0.0721364369670431, "grad_norm": 0.5123659372329712, "learning_rate": 9.953914661272661e-05, "loss": 0.8135, "step": 313 }, { "epoch": 0.07236690481677806, "grad_norm": 0.47135382890701294, "learning_rate": 9.953407627837398e-05, "loss": 0.813, "step": 314 }, { "epoch": 0.07259737266651302, "grad_norm": 0.5098188519477844, "learning_rate": 9.952897833491617e-05, "loss": 0.8116, "step": 315 }, { "epoch": 0.07282784051624798, "grad_norm": 0.5356022715568542, "learning_rate": 9.952385278519462e-05, "loss": 0.8152, "step": 316 }, { "epoch": 0.07305830836598294, "grad_norm": 0.5658923983573914, "learning_rate": 9.951869963206622e-05, "loss": 0.8085, "step": 317 }, { "epoch": 0.0732887762157179, "grad_norm": 0.6336301565170288, "learning_rate": 9.951351887840317e-05, "loss": 0.8137, "step": 318 }, { "epoch": 0.07351924406545286, "grad_norm": 0.7044976949691772, "learning_rate": 9.950831052709314e-05, "loss": 0.809, "step": 319 }, { "epoch": 0.07374971191518784, "grad_norm": 0.6830228567123413, "learning_rate": 9.950307458103911e-05, "loss": 0.8091, "step": 320 }, { "epoch": 0.0739801797649228, "grad_norm": 0.6315388083457947, "learning_rate": 9.949781104315951e-05, "loss": 0.811, "step": 321 }, { "epoch": 0.07421064761465776, "grad_norm": 0.44383642077445984, "learning_rate": 9.949251991638806e-05, "loss": 0.8062, "step": 322 }, { "epoch": 0.07444111546439272, "grad_norm": 0.48055797815322876, "learning_rate": 9.948720120367394e-05, "loss": 0.8063, "step": 323 }, { "epoch": 0.07467158331412768, "grad_norm": 0.452512264251709, "learning_rate": 9.948185490798168e-05, "loss": 0.8024, "step": 324 }, { "epoch": 0.07490205116386264, "grad_norm": 0.46535223722457886, "learning_rate": 9.947648103229113e-05, "loss": 0.8065, "step": 325 }, { "epoch": 0.0751325190135976, "grad_norm": 0.6422431468963623, "learning_rate": 9.94710795795976e-05, "loss": 0.8096, "step": 326 }, { "epoch": 0.07536298686333257, "grad_norm": 0.6489291191101074, "learning_rate": 9.946565055291174e-05, "loss": 0.8136, "step": 327 }, { "epoch": 0.07559345471306753, "grad_norm": 0.7198114991188049, "learning_rate": 9.946019395525951e-05, "loss": 0.8052, "step": 328 }, { "epoch": 0.07582392256280249, "grad_norm": 0.6340928077697754, "learning_rate": 9.945470978968234e-05, "loss": 0.8, "step": 329 }, { "epoch": 0.07605439041253745, "grad_norm": 0.6357820630073547, "learning_rate": 9.944919805923694e-05, "loss": 0.8031, "step": 330 }, { "epoch": 0.07628485826227241, "grad_norm": 0.5786786675453186, "learning_rate": 9.944365876699544e-05, "loss": 0.8049, "step": 331 }, { "epoch": 0.07651532611200737, "grad_norm": 0.4964509904384613, "learning_rate": 9.943809191604527e-05, "loss": 0.8092, "step": 332 }, { "epoch": 0.07674579396174233, "grad_norm": 0.431325763463974, "learning_rate": 9.943249750948929e-05, "loss": 0.798, "step": 333 }, { "epoch": 0.07697626181147729, "grad_norm": 0.41866233944892883, "learning_rate": 9.942687555044568e-05, "loss": 0.8017, "step": 334 }, { "epoch": 0.07720672966121227, "grad_norm": 0.4726838767528534, "learning_rate": 9.9421226042048e-05, "loss": 0.8015, "step": 335 }, { "epoch": 0.07743719751094723, "grad_norm": 0.473954439163208, "learning_rate": 9.941554898744511e-05, "loss": 0.7963, "step": 336 }, { "epoch": 0.07766766536068219, "grad_norm": 0.49290353059768677, "learning_rate": 9.940984438980131e-05, "loss": 0.807, "step": 337 }, { "epoch": 0.07789813321041715, "grad_norm": 0.4410178065299988, "learning_rate": 9.940411225229618e-05, "loss": 0.7958, "step": 338 }, { "epoch": 0.07812860106015211, "grad_norm": 0.41889092326164246, "learning_rate": 9.939835257812468e-05, "loss": 0.7984, "step": 339 }, { "epoch": 0.07835906890988707, "grad_norm": 0.4519578218460083, "learning_rate": 9.939256537049711e-05, "loss": 0.8046, "step": 340 }, { "epoch": 0.07858953675962203, "grad_norm": 0.5191729664802551, "learning_rate": 9.938675063263914e-05, "loss": 0.8045, "step": 341 }, { "epoch": 0.078820004609357, "grad_norm": 0.5066516399383545, "learning_rate": 9.938090836779174e-05, "loss": 0.7918, "step": 342 }, { "epoch": 0.07905047245909196, "grad_norm": 0.4769982695579529, "learning_rate": 9.937503857921125e-05, "loss": 0.8122, "step": 343 }, { "epoch": 0.07928094030882692, "grad_norm": 0.4449038505554199, "learning_rate": 9.936914127016938e-05, "loss": 0.8004, "step": 344 }, { "epoch": 0.07951140815856188, "grad_norm": 0.38136690855026245, "learning_rate": 9.936321644395312e-05, "loss": 0.8063, "step": 345 }, { "epoch": 0.07974187600829684, "grad_norm": 0.5003980994224548, "learning_rate": 9.935726410386484e-05, "loss": 0.7955, "step": 346 }, { "epoch": 0.0799723438580318, "grad_norm": 0.47362205386161804, "learning_rate": 9.93512842532222e-05, "loss": 0.7994, "step": 347 }, { "epoch": 0.08020281170776676, "grad_norm": 0.4894074499607086, "learning_rate": 9.934527689535826e-05, "loss": 0.8096, "step": 348 }, { "epoch": 0.08043327955750172, "grad_norm": 0.4951101839542389, "learning_rate": 9.933924203362138e-05, "loss": 0.7788, "step": 349 }, { "epoch": 0.0806637474072367, "grad_norm": 0.46733275055885315, "learning_rate": 9.933317967137524e-05, "loss": 0.7885, "step": 350 }, { "epoch": 0.08089421525697166, "grad_norm": 0.48279932141304016, "learning_rate": 9.932708981199883e-05, "loss": 0.7924, "step": 351 }, { "epoch": 0.08112468310670662, "grad_norm": 0.48102623224258423, "learning_rate": 9.932097245888652e-05, "loss": 0.7921, "step": 352 }, { "epoch": 0.08135515095644158, "grad_norm": 0.4802628457546234, "learning_rate": 9.931482761544797e-05, "loss": 0.8044, "step": 353 }, { "epoch": 0.08158561880617654, "grad_norm": 0.4573599398136139, "learning_rate": 9.930865528510815e-05, "loss": 0.7928, "step": 354 }, { "epoch": 0.0818160866559115, "grad_norm": 0.5361927151679993, "learning_rate": 9.93024554713074e-05, "loss": 0.7967, "step": 355 }, { "epoch": 0.08204655450564646, "grad_norm": 0.56016606092453, "learning_rate": 9.929622817750133e-05, "loss": 0.8019, "step": 356 }, { "epoch": 0.08227702235538142, "grad_norm": 0.5165932774543762, "learning_rate": 9.928997340716087e-05, "loss": 0.7981, "step": 357 }, { "epoch": 0.08250749020511638, "grad_norm": 0.4367765188217163, "learning_rate": 9.928369116377231e-05, "loss": 0.7892, "step": 358 }, { "epoch": 0.08273795805485135, "grad_norm": 0.6103436946868896, "learning_rate": 9.92773814508372e-05, "loss": 0.7887, "step": 359 }, { "epoch": 0.08296842590458631, "grad_norm": 0.6656332015991211, "learning_rate": 9.927104427187243e-05, "loss": 0.7924, "step": 360 }, { "epoch": 0.08319889375432127, "grad_norm": 0.6310869455337524, "learning_rate": 9.926467963041018e-05, "loss": 0.7973, "step": 361 }, { "epoch": 0.08342936160405623, "grad_norm": 0.6289921998977661, "learning_rate": 9.925828752999797e-05, "loss": 0.7897, "step": 362 }, { "epoch": 0.08365982945379119, "grad_norm": 0.501392126083374, "learning_rate": 9.925186797419858e-05, "loss": 0.7915, "step": 363 }, { "epoch": 0.08389029730352615, "grad_norm": 0.4528777301311493, "learning_rate": 9.924542096659015e-05, "loss": 0.7925, "step": 364 }, { "epoch": 0.08412076515326113, "grad_norm": 0.5350551605224609, "learning_rate": 9.923894651076605e-05, "loss": 0.7941, "step": 365 }, { "epoch": 0.08435123300299609, "grad_norm": 0.4819077253341675, "learning_rate": 9.9232444610335e-05, "loss": 0.7899, "step": 366 }, { "epoch": 0.08458170085273105, "grad_norm": 0.47890499234199524, "learning_rate": 9.9225915268921e-05, "loss": 0.7899, "step": 367 }, { "epoch": 0.08481216870246601, "grad_norm": 0.6021353602409363, "learning_rate": 9.921935849016338e-05, "loss": 0.7949, "step": 368 }, { "epoch": 0.08504263655220097, "grad_norm": 0.624280571937561, "learning_rate": 9.921277427771667e-05, "loss": 0.7977, "step": 369 }, { "epoch": 0.08527310440193593, "grad_norm": 0.48100805282592773, "learning_rate": 9.92061626352508e-05, "loss": 0.7847, "step": 370 }, { "epoch": 0.08550357225167089, "grad_norm": 0.4303872287273407, "learning_rate": 9.919952356645092e-05, "loss": 0.7925, "step": 371 }, { "epoch": 0.08573404010140585, "grad_norm": 0.4995540678501129, "learning_rate": 9.91928570750175e-05, "loss": 0.7931, "step": 372 }, { "epoch": 0.08596450795114081, "grad_norm": 0.49925413727760315, "learning_rate": 9.918616316466628e-05, "loss": 0.7875, "step": 373 }, { "epoch": 0.08619497580087578, "grad_norm": 0.4929007887840271, "learning_rate": 9.917944183912828e-05, "loss": 0.7821, "step": 374 }, { "epoch": 0.08642544365061074, "grad_norm": 0.4669882357120514, "learning_rate": 9.91726931021498e-05, "loss": 0.786, "step": 375 }, { "epoch": 0.0866559115003457, "grad_norm": 0.4014081358909607, "learning_rate": 9.916591695749244e-05, "loss": 0.7833, "step": 376 }, { "epoch": 0.08688637935008066, "grad_norm": 0.38111427426338196, "learning_rate": 9.915911340893305e-05, "loss": 0.7894, "step": 377 }, { "epoch": 0.08711684719981562, "grad_norm": 0.457692950963974, "learning_rate": 9.915228246026376e-05, "loss": 0.7912, "step": 378 }, { "epoch": 0.0873473150495506, "grad_norm": 0.467342734336853, "learning_rate": 9.9145424115292e-05, "loss": 0.7843, "step": 379 }, { "epoch": 0.08757778289928556, "grad_norm": 0.44467970728874207, "learning_rate": 9.913853837784042e-05, "loss": 0.7939, "step": 380 }, { "epoch": 0.08780825074902052, "grad_norm": 0.4361025094985962, "learning_rate": 9.913162525174697e-05, "loss": 0.7803, "step": 381 }, { "epoch": 0.08803871859875548, "grad_norm": 0.44507619738578796, "learning_rate": 9.912468474086486e-05, "loss": 0.7765, "step": 382 }, { "epoch": 0.08826918644849044, "grad_norm": 0.5151132941246033, "learning_rate": 9.911771684906257e-05, "loss": 0.7824, "step": 383 }, { "epoch": 0.0884996542982254, "grad_norm": 0.45923879742622375, "learning_rate": 9.911072158022385e-05, "loss": 0.7881, "step": 384 }, { "epoch": 0.08873012214796036, "grad_norm": 0.41044628620147705, "learning_rate": 9.910369893824767e-05, "loss": 0.7841, "step": 385 }, { "epoch": 0.08896058999769532, "grad_norm": 0.41591617465019226, "learning_rate": 9.90966489270483e-05, "loss": 0.7754, "step": 386 }, { "epoch": 0.08919105784743028, "grad_norm": 0.5302747488021851, "learning_rate": 9.908957155055523e-05, "loss": 0.7784, "step": 387 }, { "epoch": 0.08942152569716524, "grad_norm": 0.41914960741996765, "learning_rate": 9.908246681271322e-05, "loss": 0.7817, "step": 388 }, { "epoch": 0.0896519935469002, "grad_norm": 0.38282233476638794, "learning_rate": 9.907533471748231e-05, "loss": 0.779, "step": 389 }, { "epoch": 0.08988246139663517, "grad_norm": 0.4711924195289612, "learning_rate": 9.906817526883774e-05, "loss": 0.7853, "step": 390 }, { "epoch": 0.09011292924637013, "grad_norm": 0.45906805992126465, "learning_rate": 9.906098847076999e-05, "loss": 0.7711, "step": 391 }, { "epoch": 0.09034339709610509, "grad_norm": 0.44320812821388245, "learning_rate": 9.905377432728484e-05, "loss": 0.7792, "step": 392 }, { "epoch": 0.09057386494584005, "grad_norm": 0.41927963495254517, "learning_rate": 9.904653284240328e-05, "loss": 0.784, "step": 393 }, { "epoch": 0.09080433279557502, "grad_norm": 0.5191970467567444, "learning_rate": 9.903926402016153e-05, "loss": 0.7834, "step": 394 }, { "epoch": 0.09103480064530999, "grad_norm": 0.6078465580940247, "learning_rate": 9.903196786461106e-05, "loss": 0.7784, "step": 395 }, { "epoch": 0.09126526849504495, "grad_norm": 0.6223181486129761, "learning_rate": 9.902464437981855e-05, "loss": 0.7913, "step": 396 }, { "epoch": 0.09149573634477991, "grad_norm": 0.6932902336120605, "learning_rate": 9.901729356986597e-05, "loss": 0.7817, "step": 397 }, { "epoch": 0.09172620419451487, "grad_norm": 0.6165162920951843, "learning_rate": 9.900991543885048e-05, "loss": 0.775, "step": 398 }, { "epoch": 0.09195667204424983, "grad_norm": 0.5054426193237305, "learning_rate": 9.900250999088447e-05, "loss": 0.7824, "step": 399 }, { "epoch": 0.09218713989398479, "grad_norm": 0.5113014578819275, "learning_rate": 9.899507723009554e-05, "loss": 0.7827, "step": 400 }, { "epoch": 0.09241760774371975, "grad_norm": 0.5064506530761719, "learning_rate": 9.898761716062654e-05, "loss": 0.7848, "step": 401 }, { "epoch": 0.09264807559345471, "grad_norm": 0.4201597571372986, "learning_rate": 9.898012978663553e-05, "loss": 0.7804, "step": 402 }, { "epoch": 0.09287854344318967, "grad_norm": 0.47890499234199524, "learning_rate": 9.897261511229583e-05, "loss": 0.7775, "step": 403 }, { "epoch": 0.09310901129292463, "grad_norm": 0.5132191777229309, "learning_rate": 9.896507314179588e-05, "loss": 0.7739, "step": 404 }, { "epoch": 0.0933394791426596, "grad_norm": 0.5053228735923767, "learning_rate": 9.895750387933944e-05, "loss": 0.7725, "step": 405 }, { "epoch": 0.09356994699239456, "grad_norm": 0.5740991830825806, "learning_rate": 9.894990732914541e-05, "loss": 0.7784, "step": 406 }, { "epoch": 0.09380041484212952, "grad_norm": 0.5145570635795593, "learning_rate": 9.894228349544796e-05, "loss": 0.7783, "step": 407 }, { "epoch": 0.09403088269186448, "grad_norm": 0.5113042593002319, "learning_rate": 9.893463238249638e-05, "loss": 0.7835, "step": 408 }, { "epoch": 0.09426135054159945, "grad_norm": 0.5256659984588623, "learning_rate": 9.892695399455525e-05, "loss": 0.7746, "step": 409 }, { "epoch": 0.09449181839133441, "grad_norm": 0.5014989972114563, "learning_rate": 9.891924833590431e-05, "loss": 0.7715, "step": 410 }, { "epoch": 0.09472228624106938, "grad_norm": 0.45504307746887207, "learning_rate": 9.891151541083852e-05, "loss": 0.7749, "step": 411 }, { "epoch": 0.09495275409080434, "grad_norm": 0.4456666111946106, "learning_rate": 9.8903755223668e-05, "loss": 0.7685, "step": 412 }, { "epoch": 0.0951832219405393, "grad_norm": 0.45063498616218567, "learning_rate": 9.889596777871812e-05, "loss": 0.7675, "step": 413 }, { "epoch": 0.09541368979027426, "grad_norm": 0.48387956619262695, "learning_rate": 9.88881530803294e-05, "loss": 0.7759, "step": 414 }, { "epoch": 0.09564415764000922, "grad_norm": 0.4509301483631134, "learning_rate": 9.888031113285757e-05, "loss": 0.7717, "step": 415 }, { "epoch": 0.09587462548974418, "grad_norm": 0.453188419342041, "learning_rate": 9.887244194067355e-05, "loss": 0.7707, "step": 416 }, { "epoch": 0.09610509333947914, "grad_norm": 0.4492356777191162, "learning_rate": 9.886454550816342e-05, "loss": 0.7736, "step": 417 }, { "epoch": 0.0963355611892141, "grad_norm": 0.3863151967525482, "learning_rate": 9.885662183972848e-05, "loss": 0.7812, "step": 418 }, { "epoch": 0.09656602903894906, "grad_norm": 0.4794161021709442, "learning_rate": 9.884867093978519e-05, "loss": 0.7704, "step": 419 }, { "epoch": 0.09679649688868402, "grad_norm": 0.44275277853012085, "learning_rate": 9.884069281276517e-05, "loss": 0.7693, "step": 420 }, { "epoch": 0.09702696473841899, "grad_norm": 0.4233629107475281, "learning_rate": 9.883268746311528e-05, "loss": 0.7742, "step": 421 }, { "epoch": 0.09725743258815395, "grad_norm": 0.34396764636039734, "learning_rate": 9.882465489529747e-05, "loss": 0.772, "step": 422 }, { "epoch": 0.09748790043788891, "grad_norm": 0.4056984484195709, "learning_rate": 9.881659511378892e-05, "loss": 0.7751, "step": 423 }, { "epoch": 0.09771836828762388, "grad_norm": 0.4664100408554077, "learning_rate": 9.880850812308196e-05, "loss": 0.7706, "step": 424 }, { "epoch": 0.09794883613735884, "grad_norm": 0.5346910357475281, "learning_rate": 9.880039392768405e-05, "loss": 0.7661, "step": 425 }, { "epoch": 0.0981793039870938, "grad_norm": 0.4764558970928192, "learning_rate": 9.87922525321179e-05, "loss": 0.7794, "step": 426 }, { "epoch": 0.09840977183682877, "grad_norm": 0.39610040187835693, "learning_rate": 9.878408394092129e-05, "loss": 0.7711, "step": 427 }, { "epoch": 0.09864023968656373, "grad_norm": 0.42657968401908875, "learning_rate": 9.877588815864722e-05, "loss": 0.7739, "step": 428 }, { "epoch": 0.09887070753629869, "grad_norm": 0.46280384063720703, "learning_rate": 9.87676651898638e-05, "loss": 0.7811, "step": 429 }, { "epoch": 0.09910117538603365, "grad_norm": 0.44775912165641785, "learning_rate": 9.875941503915434e-05, "loss": 0.7733, "step": 430 }, { "epoch": 0.09933164323576861, "grad_norm": 0.4808795154094696, "learning_rate": 9.875113771111725e-05, "loss": 0.7734, "step": 431 }, { "epoch": 0.09956211108550357, "grad_norm": 0.42084455490112305, "learning_rate": 9.874283321036615e-05, "loss": 0.7679, "step": 432 }, { "epoch": 0.09979257893523853, "grad_norm": 0.4245053231716156, "learning_rate": 9.873450154152972e-05, "loss": 0.7755, "step": 433 }, { "epoch": 0.1000230467849735, "grad_norm": 0.4106716811656952, "learning_rate": 9.872614270925188e-05, "loss": 0.7757, "step": 434 }, { "epoch": 0.10025351463470845, "grad_norm": 0.38926827907562256, "learning_rate": 9.871775671819162e-05, "loss": 0.7761, "step": 435 }, { "epoch": 0.10048398248444342, "grad_norm": 0.3636447489261627, "learning_rate": 9.870934357302308e-05, "loss": 0.7722, "step": 436 }, { "epoch": 0.10071445033417838, "grad_norm": 0.3969525992870331, "learning_rate": 9.870090327843557e-05, "loss": 0.7722, "step": 437 }, { "epoch": 0.10094491818391334, "grad_norm": 0.36944982409477234, "learning_rate": 9.869243583913348e-05, "loss": 0.7716, "step": 438 }, { "epoch": 0.10117538603364831, "grad_norm": 0.44781213998794556, "learning_rate": 9.86839412598364e-05, "loss": 0.7757, "step": 439 }, { "epoch": 0.10140585388338327, "grad_norm": 0.5165262818336487, "learning_rate": 9.867541954527894e-05, "loss": 0.7678, "step": 440 }, { "epoch": 0.10163632173311823, "grad_norm": 0.5470614433288574, "learning_rate": 9.866687070021097e-05, "loss": 0.7684, "step": 441 }, { "epoch": 0.1018667895828532, "grad_norm": 0.4986436665058136, "learning_rate": 9.865829472939736e-05, "loss": 0.7682, "step": 442 }, { "epoch": 0.10209725743258816, "grad_norm": 0.4134550988674164, "learning_rate": 9.864969163761817e-05, "loss": 0.7734, "step": 443 }, { "epoch": 0.10232772528232312, "grad_norm": 0.5219729542732239, "learning_rate": 9.864106142966856e-05, "loss": 0.7744, "step": 444 }, { "epoch": 0.10255819313205808, "grad_norm": 0.539275586605072, "learning_rate": 9.863240411035878e-05, "loss": 0.7721, "step": 445 }, { "epoch": 0.10278866098179304, "grad_norm": 0.4756945073604584, "learning_rate": 9.862371968451423e-05, "loss": 0.7645, "step": 446 }, { "epoch": 0.103019128831528, "grad_norm": 0.43748557567596436, "learning_rate": 9.861500815697541e-05, "loss": 0.7693, "step": 447 }, { "epoch": 0.10324959668126296, "grad_norm": 0.356668084859848, "learning_rate": 9.860626953259791e-05, "loss": 0.7645, "step": 448 }, { "epoch": 0.10348006453099792, "grad_norm": 0.31597620248794556, "learning_rate": 9.859750381625241e-05, "loss": 0.7626, "step": 449 }, { "epoch": 0.10371053238073288, "grad_norm": 0.3589065968990326, "learning_rate": 9.858871101282472e-05, "loss": 0.782, "step": 450 }, { "epoch": 0.10394100023046784, "grad_norm": 0.3751053810119629, "learning_rate": 9.857989112721574e-05, "loss": 0.7701, "step": 451 }, { "epoch": 0.1041714680802028, "grad_norm": 0.38888126611709595, "learning_rate": 9.857104416434147e-05, "loss": 0.769, "step": 452 }, { "epoch": 0.10440193592993777, "grad_norm": 0.3789352774620056, "learning_rate": 9.856217012913299e-05, "loss": 0.7725, "step": 453 }, { "epoch": 0.10463240377967274, "grad_norm": 0.36712217330932617, "learning_rate": 9.855326902653647e-05, "loss": 0.7763, "step": 454 }, { "epoch": 0.1048628716294077, "grad_norm": 0.38460057973861694, "learning_rate": 9.854434086151318e-05, "loss": 0.7603, "step": 455 }, { "epoch": 0.10509333947914266, "grad_norm": 0.41386526823043823, "learning_rate": 9.853538563903945e-05, "loss": 0.7683, "step": 456 }, { "epoch": 0.10532380732887762, "grad_norm": 0.42844244837760925, "learning_rate": 9.852640336410671e-05, "loss": 0.7587, "step": 457 }, { "epoch": 0.10555427517861259, "grad_norm": 0.41544997692108154, "learning_rate": 9.851739404172147e-05, "loss": 0.7622, "step": 458 }, { "epoch": 0.10578474302834755, "grad_norm": 0.42414671182632446, "learning_rate": 9.850835767690532e-05, "loss": 0.7644, "step": 459 }, { "epoch": 0.10601521087808251, "grad_norm": 0.4521081745624542, "learning_rate": 9.849929427469488e-05, "loss": 0.7621, "step": 460 }, { "epoch": 0.10624567872781747, "grad_norm": 0.4264926314353943, "learning_rate": 9.849020384014192e-05, "loss": 0.7661, "step": 461 }, { "epoch": 0.10647614657755243, "grad_norm": 0.4072660207748413, "learning_rate": 9.848108637831319e-05, "loss": 0.7686, "step": 462 }, { "epoch": 0.10670661442728739, "grad_norm": 0.38507211208343506, "learning_rate": 9.847194189429058e-05, "loss": 0.7705, "step": 463 }, { "epoch": 0.10693708227702235, "grad_norm": 0.4261547923088074, "learning_rate": 9.846277039317095e-05, "loss": 0.7827, "step": 464 }, { "epoch": 0.10716755012675731, "grad_norm": 0.5000432133674622, "learning_rate": 9.845357188006635e-05, "loss": 0.7627, "step": 465 }, { "epoch": 0.10739801797649227, "grad_norm": 0.5209843516349792, "learning_rate": 9.844434636010373e-05, "loss": 0.7754, "step": 466 }, { "epoch": 0.10762848582622724, "grad_norm": 0.4857335090637207, "learning_rate": 9.843509383842525e-05, "loss": 0.7687, "step": 467 }, { "epoch": 0.1078589536759622, "grad_norm": 0.5346872806549072, "learning_rate": 9.842581432018798e-05, "loss": 0.772, "step": 468 }, { "epoch": 0.10808942152569717, "grad_norm": 0.6131623387336731, "learning_rate": 9.841650781056413e-05, "loss": 0.7808, "step": 469 }, { "epoch": 0.10831988937543213, "grad_norm": 0.5165646076202393, "learning_rate": 9.840717431474094e-05, "loss": 0.772, "step": 470 }, { "epoch": 0.1085503572251671, "grad_norm": 0.39740481972694397, "learning_rate": 9.839781383792064e-05, "loss": 0.7671, "step": 471 }, { "epoch": 0.10878082507490205, "grad_norm": 0.42045560479164124, "learning_rate": 9.838842638532056e-05, "loss": 0.7667, "step": 472 }, { "epoch": 0.10901129292463702, "grad_norm": 0.4750736355781555, "learning_rate": 9.837901196217303e-05, "loss": 0.7565, "step": 473 }, { "epoch": 0.10924176077437198, "grad_norm": 0.3689127266407013, "learning_rate": 9.836957057372544e-05, "loss": 0.7652, "step": 474 }, { "epoch": 0.10947222862410694, "grad_norm": 0.3930782675743103, "learning_rate": 9.836010222524018e-05, "loss": 0.7538, "step": 475 }, { "epoch": 0.1097026964738419, "grad_norm": 0.45599618554115295, "learning_rate": 9.835060692199468e-05, "loss": 0.7724, "step": 476 }, { "epoch": 0.10993316432357686, "grad_norm": 0.5148651003837585, "learning_rate": 9.83410846692814e-05, "loss": 0.7612, "step": 477 }, { "epoch": 0.11016363217331182, "grad_norm": 0.4592342674732208, "learning_rate": 9.83315354724078e-05, "loss": 0.7667, "step": 478 }, { "epoch": 0.11039410002304678, "grad_norm": 0.40691468119621277, "learning_rate": 9.83219593366964e-05, "loss": 0.7676, "step": 479 }, { "epoch": 0.11062456787278174, "grad_norm": 0.4532166123390198, "learning_rate": 9.831235626748467e-05, "loss": 0.7683, "step": 480 }, { "epoch": 0.1108550357225167, "grad_norm": 0.42121079564094543, "learning_rate": 9.830272627012518e-05, "loss": 0.7691, "step": 481 }, { "epoch": 0.11108550357225166, "grad_norm": 0.4073924422264099, "learning_rate": 9.82930693499854e-05, "loss": 0.7705, "step": 482 }, { "epoch": 0.11131597142198664, "grad_norm": 0.39546170830726624, "learning_rate": 9.828338551244794e-05, "loss": 0.7684, "step": 483 }, { "epoch": 0.1115464392717216, "grad_norm": 0.38143351674079895, "learning_rate": 9.827367476291027e-05, "loss": 0.7554, "step": 484 }, { "epoch": 0.11177690712145656, "grad_norm": 0.3818657100200653, "learning_rate": 9.826393710678497e-05, "loss": 0.7632, "step": 485 }, { "epoch": 0.11200737497119152, "grad_norm": 0.40947970747947693, "learning_rate": 9.825417254949953e-05, "loss": 0.7628, "step": 486 }, { "epoch": 0.11223784282092648, "grad_norm": 0.40739309787750244, "learning_rate": 9.824438109649654e-05, "loss": 0.7659, "step": 487 }, { "epoch": 0.11246831067066144, "grad_norm": 0.4805825650691986, "learning_rate": 9.823456275323348e-05, "loss": 0.7682, "step": 488 }, { "epoch": 0.1126987785203964, "grad_norm": 0.48237094283103943, "learning_rate": 9.822471752518288e-05, "loss": 0.7658, "step": 489 }, { "epoch": 0.11292924637013137, "grad_norm": 0.44534748792648315, "learning_rate": 9.821484541783221e-05, "loss": 0.769, "step": 490 }, { "epoch": 0.11315971421986633, "grad_norm": 0.4775294363498688, "learning_rate": 9.820494643668396e-05, "loss": 0.7654, "step": 491 }, { "epoch": 0.11339018206960129, "grad_norm": 0.4316689968109131, "learning_rate": 9.81950205872556e-05, "loss": 0.7689, "step": 492 }, { "epoch": 0.11362064991933625, "grad_norm": 0.4406385123729706, "learning_rate": 9.818506787507952e-05, "loss": 0.7615, "step": 493 }, { "epoch": 0.11385111776907121, "grad_norm": 0.37954282760620117, "learning_rate": 9.817508830570318e-05, "loss": 0.7581, "step": 494 }, { "epoch": 0.11408158561880617, "grad_norm": 0.37608101963996887, "learning_rate": 9.81650818846889e-05, "loss": 0.7619, "step": 495 }, { "epoch": 0.11431205346854113, "grad_norm": 0.35068264603614807, "learning_rate": 9.815504861761404e-05, "loss": 0.763, "step": 496 }, { "epoch": 0.1145425213182761, "grad_norm": 0.3810550570487976, "learning_rate": 9.81449885100709e-05, "loss": 0.7551, "step": 497 }, { "epoch": 0.11477298916801107, "grad_norm": 0.3873169422149658, "learning_rate": 9.813490156766676e-05, "loss": 0.7606, "step": 498 }, { "epoch": 0.11500345701774603, "grad_norm": 0.39904019236564636, "learning_rate": 9.812478779602381e-05, "loss": 0.7562, "step": 499 }, { "epoch": 0.11523392486748099, "grad_norm": 0.379290908575058, "learning_rate": 9.811464720077923e-05, "loss": 0.7544, "step": 500 }, { "epoch": 0.11546439271721595, "grad_norm": 0.3641830384731293, "learning_rate": 9.810447978758517e-05, "loss": 0.7587, "step": 501 }, { "epoch": 0.11569486056695091, "grad_norm": 0.3822626769542694, "learning_rate": 9.809428556210867e-05, "loss": 0.7577, "step": 502 }, { "epoch": 0.11592532841668587, "grad_norm": 0.42176035046577454, "learning_rate": 9.808406453003175e-05, "loss": 0.7602, "step": 503 }, { "epoch": 0.11615579626642084, "grad_norm": 0.3986082971096039, "learning_rate": 9.80738166970514e-05, "loss": 0.7504, "step": 504 }, { "epoch": 0.1163862641161558, "grad_norm": 0.3723549246788025, "learning_rate": 9.806354206887949e-05, "loss": 0.764, "step": 505 }, { "epoch": 0.11661673196589076, "grad_norm": 0.3810165524482727, "learning_rate": 9.805324065124283e-05, "loss": 0.7632, "step": 506 }, { "epoch": 0.11684719981562572, "grad_norm": 0.3603302538394928, "learning_rate": 9.804291244988324e-05, "loss": 0.7572, "step": 507 }, { "epoch": 0.11707766766536068, "grad_norm": 0.3849985897541046, "learning_rate": 9.803255747055737e-05, "loss": 0.7562, "step": 508 }, { "epoch": 0.11730813551509564, "grad_norm": 0.3381344974040985, "learning_rate": 9.802217571903685e-05, "loss": 0.7643, "step": 509 }, { "epoch": 0.1175386033648306, "grad_norm": 0.35623160004615784, "learning_rate": 9.80117672011082e-05, "loss": 0.752, "step": 510 }, { "epoch": 0.11776907121456556, "grad_norm": 0.36817556619644165, "learning_rate": 9.800133192257291e-05, "loss": 0.7709, "step": 511 }, { "epoch": 0.11799953906430052, "grad_norm": 0.5149376392364502, "learning_rate": 9.799086988924733e-05, "loss": 0.7566, "step": 512 }, { "epoch": 0.1182300069140355, "grad_norm": 0.4210844039916992, "learning_rate": 9.798038110696275e-05, "loss": 0.7523, "step": 513 }, { "epoch": 0.11846047476377046, "grad_norm": 0.4472396671772003, "learning_rate": 9.796986558156538e-05, "loss": 0.7586, "step": 514 }, { "epoch": 0.11869094261350542, "grad_norm": 0.45051324367523193, "learning_rate": 9.79593233189163e-05, "loss": 0.7561, "step": 515 }, { "epoch": 0.11892141046324038, "grad_norm": 0.4668648838996887, "learning_rate": 9.794875432489152e-05, "loss": 0.7548, "step": 516 }, { "epoch": 0.11915187831297534, "grad_norm": 0.3892306685447693, "learning_rate": 9.793815860538197e-05, "loss": 0.7536, "step": 517 }, { "epoch": 0.1193823461627103, "grad_norm": 0.36023855209350586, "learning_rate": 9.792753616629342e-05, "loss": 0.7595, "step": 518 }, { "epoch": 0.11961281401244526, "grad_norm": 0.40682411193847656, "learning_rate": 9.791688701354656e-05, "loss": 0.7583, "step": 519 }, { "epoch": 0.11984328186218023, "grad_norm": 0.464751660823822, "learning_rate": 9.790621115307699e-05, "loss": 0.7654, "step": 520 }, { "epoch": 0.12007374971191519, "grad_norm": 0.4933690130710602, "learning_rate": 9.789550859083517e-05, "loss": 0.7567, "step": 521 }, { "epoch": 0.12030421756165015, "grad_norm": 0.459985613822937, "learning_rate": 9.788477933278646e-05, "loss": 0.7464, "step": 522 }, { "epoch": 0.12053468541138511, "grad_norm": 0.43470343947410583, "learning_rate": 9.787402338491108e-05, "loss": 0.7539, "step": 523 }, { "epoch": 0.12076515326112007, "grad_norm": 0.3725113868713379, "learning_rate": 9.786324075320414e-05, "loss": 0.7536, "step": 524 }, { "epoch": 0.12099562111085503, "grad_norm": 0.3641432225704193, "learning_rate": 9.785243144367562e-05, "loss": 0.7546, "step": 525 }, { "epoch": 0.12122608896058999, "grad_norm": 0.3699088990688324, "learning_rate": 9.784159546235037e-05, "loss": 0.7544, "step": 526 }, { "epoch": 0.12145655681032495, "grad_norm": 0.3829246461391449, "learning_rate": 9.78307328152681e-05, "loss": 0.7613, "step": 527 }, { "epoch": 0.12168702466005993, "grad_norm": 0.42919662594795227, "learning_rate": 9.78198435084834e-05, "loss": 0.7546, "step": 528 }, { "epoch": 0.12191749250979489, "grad_norm": 0.446468323469162, "learning_rate": 9.780892754806571e-05, "loss": 0.7496, "step": 529 }, { "epoch": 0.12214796035952985, "grad_norm": 0.459417462348938, "learning_rate": 9.779798494009931e-05, "loss": 0.7559, "step": 530 }, { "epoch": 0.12237842820926481, "grad_norm": 0.4683552086353302, "learning_rate": 9.778701569068336e-05, "loss": 0.7681, "step": 531 }, { "epoch": 0.12260889605899977, "grad_norm": 0.38343241810798645, "learning_rate": 9.777601980593184e-05, "loss": 0.7649, "step": 532 }, { "epoch": 0.12283936390873473, "grad_norm": 0.4223579466342926, "learning_rate": 9.776499729197362e-05, "loss": 0.75, "step": 533 }, { "epoch": 0.1230698317584697, "grad_norm": 0.4097927510738373, "learning_rate": 9.775394815495236e-05, "loss": 0.7476, "step": 534 }, { "epoch": 0.12330029960820466, "grad_norm": 0.4324532151222229, "learning_rate": 9.77428724010266e-05, "loss": 0.7568, "step": 535 }, { "epoch": 0.12353076745793962, "grad_norm": 0.4441714584827423, "learning_rate": 9.773177003636969e-05, "loss": 0.7574, "step": 536 }, { "epoch": 0.12376123530767458, "grad_norm": 0.49834537506103516, "learning_rate": 9.77206410671698e-05, "loss": 0.7559, "step": 537 }, { "epoch": 0.12399170315740954, "grad_norm": 0.5015109181404114, "learning_rate": 9.770948549963e-05, "loss": 0.7566, "step": 538 }, { "epoch": 0.1242221710071445, "grad_norm": 0.3993091881275177, "learning_rate": 9.76983033399681e-05, "loss": 0.7574, "step": 539 }, { "epoch": 0.12445263885687946, "grad_norm": 0.37793558835983276, "learning_rate": 9.768709459441676e-05, "loss": 0.7433, "step": 540 }, { "epoch": 0.12468310670661442, "grad_norm": 0.48581400513648987, "learning_rate": 9.76758592692235e-05, "loss": 0.7547, "step": 541 }, { "epoch": 0.12491357455634938, "grad_norm": 0.5075335502624512, "learning_rate": 9.76645973706506e-05, "loss": 0.7625, "step": 542 }, { "epoch": 0.12514404240608434, "grad_norm": 0.5207210779190063, "learning_rate": 9.765330890497518e-05, "loss": 0.7551, "step": 543 }, { "epoch": 0.1253745102558193, "grad_norm": 0.42284563183784485, "learning_rate": 9.764199387848915e-05, "loss": 0.7599, "step": 544 }, { "epoch": 0.12560497810555427, "grad_norm": 0.3669079542160034, "learning_rate": 9.763065229749923e-05, "loss": 0.7433, "step": 545 }, { "epoch": 0.12583544595528923, "grad_norm": 0.3668546676635742, "learning_rate": 9.761928416832695e-05, "loss": 0.7589, "step": 546 }, { "epoch": 0.1260659138050242, "grad_norm": 0.520101010799408, "learning_rate": 9.760788949730866e-05, "loss": 0.7503, "step": 547 }, { "epoch": 0.12629638165475915, "grad_norm": 0.5019710659980774, "learning_rate": 9.759646829079543e-05, "loss": 0.7548, "step": 548 }, { "epoch": 0.1265268495044941, "grad_norm": 0.4565122425556183, "learning_rate": 9.75850205551532e-05, "loss": 0.7479, "step": 549 }, { "epoch": 0.1267573173542291, "grad_norm": 0.4054650664329529, "learning_rate": 9.757354629676265e-05, "loss": 0.7455, "step": 550 }, { "epoch": 0.12698778520396406, "grad_norm": 0.5145794153213501, "learning_rate": 9.756204552201926e-05, "loss": 0.7514, "step": 551 }, { "epoch": 0.12721825305369902, "grad_norm": 0.4342021644115448, "learning_rate": 9.755051823733328e-05, "loss": 0.7457, "step": 552 }, { "epoch": 0.12744872090343398, "grad_norm": 0.3916120231151581, "learning_rate": 9.753896444912973e-05, "loss": 0.7557, "step": 553 }, { "epoch": 0.12767918875316894, "grad_norm": 0.3801400363445282, "learning_rate": 9.752738416384844e-05, "loss": 0.7486, "step": 554 }, { "epoch": 0.1279096566029039, "grad_norm": 0.4345585107803345, "learning_rate": 9.751577738794398e-05, "loss": 0.7561, "step": 555 }, { "epoch": 0.12814012445263886, "grad_norm": 0.3579169809818268, "learning_rate": 9.750414412788567e-05, "loss": 0.7552, "step": 556 }, { "epoch": 0.12837059230237383, "grad_norm": 0.3728508949279785, "learning_rate": 9.749248439015763e-05, "loss": 0.7605, "step": 557 }, { "epoch": 0.1286010601521088, "grad_norm": 0.4141934812068939, "learning_rate": 9.74807981812587e-05, "loss": 0.7514, "step": 558 }, { "epoch": 0.12883152800184375, "grad_norm": 0.34202003479003906, "learning_rate": 9.746908550770252e-05, "loss": 0.742, "step": 559 }, { "epoch": 0.1290619958515787, "grad_norm": 0.39449524879455566, "learning_rate": 9.745734637601743e-05, "loss": 0.745, "step": 560 }, { "epoch": 0.12929246370131367, "grad_norm": 0.3760475218296051, "learning_rate": 9.744558079274652e-05, "loss": 0.7586, "step": 561 }, { "epoch": 0.12952293155104863, "grad_norm": 0.3431721329689026, "learning_rate": 9.743378876444769e-05, "loss": 0.7523, "step": 562 }, { "epoch": 0.1297533994007836, "grad_norm": 0.3754710555076599, "learning_rate": 9.74219702976935e-05, "loss": 0.7374, "step": 563 }, { "epoch": 0.12998386725051855, "grad_norm": 0.45317164063453674, "learning_rate": 9.741012539907131e-05, "loss": 0.7599, "step": 564 }, { "epoch": 0.13021433510025351, "grad_norm": 0.4640779495239258, "learning_rate": 9.739825407518314e-05, "loss": 0.7408, "step": 565 }, { "epoch": 0.13044480294998848, "grad_norm": 0.49257683753967285, "learning_rate": 9.738635633264581e-05, "loss": 0.752, "step": 566 }, { "epoch": 0.13067527079972344, "grad_norm": 0.4536359906196594, "learning_rate": 9.737443217809083e-05, "loss": 0.7566, "step": 567 }, { "epoch": 0.1309057386494584, "grad_norm": 0.4063389003276825, "learning_rate": 9.736248161816446e-05, "loss": 0.7556, "step": 568 }, { "epoch": 0.13113620649919336, "grad_norm": 0.33868643641471863, "learning_rate": 9.735050465952761e-05, "loss": 0.751, "step": 569 }, { "epoch": 0.13136667434892832, "grad_norm": 0.3786289095878601, "learning_rate": 9.733850130885598e-05, "loss": 0.7568, "step": 570 }, { "epoch": 0.13159714219866328, "grad_norm": 0.43096908926963806, "learning_rate": 9.732647157283994e-05, "loss": 0.7508, "step": 571 }, { "epoch": 0.13182761004839824, "grad_norm": 0.39179953932762146, "learning_rate": 9.731441545818459e-05, "loss": 0.749, "step": 572 }, { "epoch": 0.1320580778981332, "grad_norm": 0.3335493803024292, "learning_rate": 9.730233297160969e-05, "loss": 0.7491, "step": 573 }, { "epoch": 0.13228854574786816, "grad_norm": 0.4363969564437866, "learning_rate": 9.729022411984975e-05, "loss": 0.7496, "step": 574 }, { "epoch": 0.13251901359760312, "grad_norm": 0.3922342360019684, "learning_rate": 9.727808890965396e-05, "loss": 0.7481, "step": 575 }, { "epoch": 0.13274948144733809, "grad_norm": 0.3694200813770294, "learning_rate": 9.726592734778617e-05, "loss": 0.7561, "step": 576 }, { "epoch": 0.13297994929707305, "grad_norm": 0.40044036507606506, "learning_rate": 9.725373944102496e-05, "loss": 0.7463, "step": 577 }, { "epoch": 0.133210417146808, "grad_norm": 0.39088964462280273, "learning_rate": 9.724152519616358e-05, "loss": 0.7528, "step": 578 }, { "epoch": 0.13344088499654297, "grad_norm": 0.35432836413383484, "learning_rate": 9.722928462000995e-05, "loss": 0.7393, "step": 579 }, { "epoch": 0.13367135284627796, "grad_norm": 0.36177772283554077, "learning_rate": 9.721701771938666e-05, "loss": 0.7511, "step": 580 }, { "epoch": 0.13390182069601292, "grad_norm": 0.3491220474243164, "learning_rate": 9.7204724501131e-05, "loss": 0.7516, "step": 581 }, { "epoch": 0.13413228854574788, "grad_norm": 0.3720824420452118, "learning_rate": 9.719240497209493e-05, "loss": 0.7416, "step": 582 }, { "epoch": 0.13436275639548284, "grad_norm": 0.3904234766960144, "learning_rate": 9.718005913914503e-05, "loss": 0.7429, "step": 583 }, { "epoch": 0.1345932242452178, "grad_norm": 0.3584286868572235, "learning_rate": 9.716768700916258e-05, "loss": 0.7443, "step": 584 }, { "epoch": 0.13482369209495276, "grad_norm": 0.36260986328125, "learning_rate": 9.715528858904353e-05, "loss": 0.7491, "step": 585 }, { "epoch": 0.13505415994468772, "grad_norm": 0.41411370038986206, "learning_rate": 9.714286388569845e-05, "loss": 0.7493, "step": 586 }, { "epoch": 0.13528462779442268, "grad_norm": 0.4128418266773224, "learning_rate": 9.713041290605254e-05, "loss": 0.7531, "step": 587 }, { "epoch": 0.13551509564415765, "grad_norm": 0.37664923071861267, "learning_rate": 9.711793565704572e-05, "loss": 0.7491, "step": 588 }, { "epoch": 0.1357455634938926, "grad_norm": 0.3493359386920929, "learning_rate": 9.71054321456325e-05, "loss": 0.743, "step": 589 }, { "epoch": 0.13597603134362757, "grad_norm": 0.4216603934764862, "learning_rate": 9.709290237878202e-05, "loss": 0.7446, "step": 590 }, { "epoch": 0.13620649919336253, "grad_norm": 0.4375728964805603, "learning_rate": 9.708034636347807e-05, "loss": 0.7533, "step": 591 }, { "epoch": 0.1364369670430975, "grad_norm": 0.3984435796737671, "learning_rate": 9.70677641067191e-05, "loss": 0.7379, "step": 592 }, { "epoch": 0.13666743489283245, "grad_norm": 0.3685891330242157, "learning_rate": 9.705515561551814e-05, "loss": 0.7462, "step": 593 }, { "epoch": 0.1368979027425674, "grad_norm": 0.35260993242263794, "learning_rate": 9.704252089690284e-05, "loss": 0.7491, "step": 594 }, { "epoch": 0.13712837059230237, "grad_norm": 0.33761847019195557, "learning_rate": 9.702985995791554e-05, "loss": 0.7403, "step": 595 }, { "epoch": 0.13735883844203733, "grad_norm": 0.4064388573169708, "learning_rate": 9.701717280561309e-05, "loss": 0.7511, "step": 596 }, { "epoch": 0.1375893062917723, "grad_norm": 0.4629051089286804, "learning_rate": 9.700445944706704e-05, "loss": 0.746, "step": 597 }, { "epoch": 0.13781977414150726, "grad_norm": 0.45266193151474, "learning_rate": 9.699171988936349e-05, "loss": 0.7394, "step": 598 }, { "epoch": 0.13805024199124222, "grad_norm": 0.3218851387500763, "learning_rate": 9.697895413960319e-05, "loss": 0.7477, "step": 599 }, { "epoch": 0.13828070984097718, "grad_norm": 0.4335274398326874, "learning_rate": 9.696616220490143e-05, "loss": 0.7356, "step": 600 }, { "epoch": 0.13851117769071214, "grad_norm": 0.45627596974372864, "learning_rate": 9.695334409238813e-05, "loss": 0.746, "step": 601 }, { "epoch": 0.1387416455404471, "grad_norm": 0.35524532198905945, "learning_rate": 9.694049980920783e-05, "loss": 0.7494, "step": 602 }, { "epoch": 0.13897211339018206, "grad_norm": 0.40399858355522156, "learning_rate": 9.69276293625196e-05, "loss": 0.7468, "step": 603 }, { "epoch": 0.13920258123991702, "grad_norm": 0.4082375168800354, "learning_rate": 9.691473275949712e-05, "loss": 0.7518, "step": 604 }, { "epoch": 0.13943304908965198, "grad_norm": 0.3440902829170227, "learning_rate": 9.690181000732864e-05, "loss": 0.7421, "step": 605 }, { "epoch": 0.13966351693938694, "grad_norm": 0.481870174407959, "learning_rate": 9.688886111321703e-05, "loss": 0.7558, "step": 606 }, { "epoch": 0.1398939847891219, "grad_norm": 0.3942829370498657, "learning_rate": 9.687588608437963e-05, "loss": 0.7387, "step": 607 }, { "epoch": 0.14012445263885687, "grad_norm": 0.3642619550228119, "learning_rate": 9.686288492804846e-05, "loss": 0.7369, "step": 608 }, { "epoch": 0.14035492048859183, "grad_norm": 0.4085538685321808, "learning_rate": 9.684985765147006e-05, "loss": 0.7455, "step": 609 }, { "epoch": 0.14058538833832682, "grad_norm": 0.36684495210647583, "learning_rate": 9.683680426190547e-05, "loss": 0.7427, "step": 610 }, { "epoch": 0.14081585618806178, "grad_norm": 0.3928658068180084, "learning_rate": 9.682372476663037e-05, "loss": 0.7503, "step": 611 }, { "epoch": 0.14104632403779674, "grad_norm": 0.40327298641204834, "learning_rate": 9.681061917293497e-05, "loss": 0.7497, "step": 612 }, { "epoch": 0.1412767918875317, "grad_norm": 0.45683008432388306, "learning_rate": 9.679748748812397e-05, "loss": 0.7443, "step": 613 }, { "epoch": 0.14150725973726666, "grad_norm": 0.3893783986568451, "learning_rate": 9.678432971951669e-05, "loss": 0.7487, "step": 614 }, { "epoch": 0.14173772758700162, "grad_norm": 0.36293652653694153, "learning_rate": 9.677114587444696e-05, "loss": 0.7477, "step": 615 }, { "epoch": 0.14196819543673658, "grad_norm": 0.3704957365989685, "learning_rate": 9.67579359602631e-05, "loss": 0.7442, "step": 616 }, { "epoch": 0.14219866328647154, "grad_norm": 0.3900201618671417, "learning_rate": 9.674469998432802e-05, "loss": 0.7408, "step": 617 }, { "epoch": 0.1424291311362065, "grad_norm": 0.35476595163345337, "learning_rate": 9.673143795401915e-05, "loss": 0.7369, "step": 618 }, { "epoch": 0.14265959898594147, "grad_norm": 0.39594173431396484, "learning_rate": 9.671814987672842e-05, "loss": 0.7495, "step": 619 }, { "epoch": 0.14289006683567643, "grad_norm": 0.3954264521598816, "learning_rate": 9.670483575986229e-05, "loss": 0.7536, "step": 620 }, { "epoch": 0.1431205346854114, "grad_norm": 0.329731285572052, "learning_rate": 9.66914956108417e-05, "loss": 0.7432, "step": 621 }, { "epoch": 0.14335100253514635, "grad_norm": 0.33711501955986023, "learning_rate": 9.667812943710215e-05, "loss": 0.733, "step": 622 }, { "epoch": 0.1435814703848813, "grad_norm": 0.3300781548023224, "learning_rate": 9.666473724609364e-05, "loss": 0.7366, "step": 623 }, { "epoch": 0.14381193823461627, "grad_norm": 0.32129067182540894, "learning_rate": 9.665131904528063e-05, "loss": 0.7457, "step": 624 }, { "epoch": 0.14404240608435123, "grad_norm": 0.3335452973842621, "learning_rate": 9.66378748421421e-05, "loss": 0.7453, "step": 625 }, { "epoch": 0.1442728739340862, "grad_norm": 0.31821250915527344, "learning_rate": 9.662440464417155e-05, "loss": 0.7381, "step": 626 }, { "epoch": 0.14450334178382115, "grad_norm": 0.6273105144500732, "learning_rate": 9.661090845887693e-05, "loss": 0.7383, "step": 627 }, { "epoch": 0.14473380963355612, "grad_norm": 0.32164305448532104, "learning_rate": 9.65973862937807e-05, "loss": 0.7398, "step": 628 }, { "epoch": 0.14496427748329108, "grad_norm": 0.3327626585960388, "learning_rate": 9.658383815641978e-05, "loss": 0.7357, "step": 629 }, { "epoch": 0.14519474533302604, "grad_norm": 0.30448755621910095, "learning_rate": 9.657026405434557e-05, "loss": 0.7309, "step": 630 }, { "epoch": 0.145425213182761, "grad_norm": 0.30915525555610657, "learning_rate": 9.655666399512397e-05, "loss": 0.7407, "step": 631 }, { "epoch": 0.14565568103249596, "grad_norm": 0.3181747794151306, "learning_rate": 9.654303798633532e-05, "loss": 0.7443, "step": 632 }, { "epoch": 0.14588614888223092, "grad_norm": 0.3349412977695465, "learning_rate": 9.652938603557442e-05, "loss": 0.7459, "step": 633 }, { "epoch": 0.14611661673196588, "grad_norm": 0.32056349515914917, "learning_rate": 9.651570815045054e-05, "loss": 0.7415, "step": 634 }, { "epoch": 0.14634708458170084, "grad_norm": 0.34287700057029724, "learning_rate": 9.650200433858741e-05, "loss": 0.7412, "step": 635 }, { "epoch": 0.1465775524314358, "grad_norm": 0.3507481813430786, "learning_rate": 9.648827460762322e-05, "loss": 0.7487, "step": 636 }, { "epoch": 0.14680802028117076, "grad_norm": 0.3356325328350067, "learning_rate": 9.647451896521055e-05, "loss": 0.7423, "step": 637 }, { "epoch": 0.14703848813090573, "grad_norm": 0.37254631519317627, "learning_rate": 9.646073741901652e-05, "loss": 0.743, "step": 638 }, { "epoch": 0.14726895598064071, "grad_norm": 0.3373221755027771, "learning_rate": 9.64469299767226e-05, "loss": 0.7406, "step": 639 }, { "epoch": 0.14749942383037568, "grad_norm": 0.3344241976737976, "learning_rate": 9.643309664602474e-05, "loss": 0.7377, "step": 640 }, { "epoch": 0.14772989168011064, "grad_norm": 0.34580978751182556, "learning_rate": 9.641923743463327e-05, "loss": 0.7454, "step": 641 }, { "epoch": 0.1479603595298456, "grad_norm": 0.31924277544021606, "learning_rate": 9.640535235027303e-05, "loss": 0.7369, "step": 642 }, { "epoch": 0.14819082737958056, "grad_norm": 0.33480551838874817, "learning_rate": 9.639144140068324e-05, "loss": 0.7582, "step": 643 }, { "epoch": 0.14842129522931552, "grad_norm": 0.3792799413204193, "learning_rate": 9.637750459361748e-05, "loss": 0.7495, "step": 644 }, { "epoch": 0.14865176307905048, "grad_norm": 0.42829740047454834, "learning_rate": 9.63635419368438e-05, "loss": 0.7406, "step": 645 }, { "epoch": 0.14888223092878544, "grad_norm": 0.48589882254600525, "learning_rate": 9.634955343814469e-05, "loss": 0.7468, "step": 646 }, { "epoch": 0.1491126987785204, "grad_norm": 0.43215352296829224, "learning_rate": 9.633553910531697e-05, "loss": 0.7444, "step": 647 }, { "epoch": 0.14934316662825536, "grad_norm": 0.38566887378692627, "learning_rate": 9.632149894617191e-05, "loss": 0.7447, "step": 648 }, { "epoch": 0.14957363447799032, "grad_norm": 0.31605324149131775, "learning_rate": 9.630743296853513e-05, "loss": 0.7462, "step": 649 }, { "epoch": 0.14980410232772529, "grad_norm": 0.3389308452606201, "learning_rate": 9.629334118024669e-05, "loss": 0.7345, "step": 650 }, { "epoch": 0.15003457017746025, "grad_norm": 0.3665103614330292, "learning_rate": 9.627922358916102e-05, "loss": 0.7393, "step": 651 }, { "epoch": 0.1502650380271952, "grad_norm": 0.33083248138427734, "learning_rate": 9.626508020314693e-05, "loss": 0.7439, "step": 652 }, { "epoch": 0.15049550587693017, "grad_norm": 0.3108620345592499, "learning_rate": 9.625091103008757e-05, "loss": 0.7347, "step": 653 }, { "epoch": 0.15072597372666513, "grad_norm": 0.32993149757385254, "learning_rate": 9.623671607788054e-05, "loss": 0.7429, "step": 654 }, { "epoch": 0.1509564415764001, "grad_norm": 0.35567355155944824, "learning_rate": 9.622249535443773e-05, "loss": 0.7365, "step": 655 }, { "epoch": 0.15118690942613505, "grad_norm": 0.3489120602607727, "learning_rate": 9.620824886768545e-05, "loss": 0.7314, "step": 656 }, { "epoch": 0.15141737727587, "grad_norm": 0.34390199184417725, "learning_rate": 9.619397662556435e-05, "loss": 0.7468, "step": 657 }, { "epoch": 0.15164784512560497, "grad_norm": 0.3184950351715088, "learning_rate": 9.617967863602941e-05, "loss": 0.7474, "step": 658 }, { "epoch": 0.15187831297533994, "grad_norm": 0.3136204779148102, "learning_rate": 9.616535490705004e-05, "loss": 0.7423, "step": 659 }, { "epoch": 0.1521087808250749, "grad_norm": 0.29085636138916016, "learning_rate": 9.615100544660987e-05, "loss": 0.7374, "step": 660 }, { "epoch": 0.15233924867480986, "grad_norm": 0.35054919123649597, "learning_rate": 9.6136630262707e-05, "loss": 0.7436, "step": 661 }, { "epoch": 0.15256971652454482, "grad_norm": 0.32511407136917114, "learning_rate": 9.61222293633538e-05, "loss": 0.7401, "step": 662 }, { "epoch": 0.15280018437427978, "grad_norm": 0.3348987400531769, "learning_rate": 9.610780275657698e-05, "loss": 0.7387, "step": 663 }, { "epoch": 0.15303065222401474, "grad_norm": 0.3278294801712036, "learning_rate": 9.609335045041759e-05, "loss": 0.7402, "step": 664 }, { "epoch": 0.1532611200737497, "grad_norm": 0.3211444020271301, "learning_rate": 9.6078872452931e-05, "loss": 0.7448, "step": 665 }, { "epoch": 0.15349158792348466, "grad_norm": 0.341467946767807, "learning_rate": 9.606436877218688e-05, "loss": 0.7395, "step": 666 }, { "epoch": 0.15372205577321962, "grad_norm": 0.3915879726409912, "learning_rate": 9.604983941626924e-05, "loss": 0.7405, "step": 667 }, { "epoch": 0.15395252362295458, "grad_norm": 0.41400331258773804, "learning_rate": 9.603528439327642e-05, "loss": 0.7456, "step": 668 }, { "epoch": 0.15418299147268957, "grad_norm": 0.35899117588996887, "learning_rate": 9.602070371132102e-05, "loss": 0.7282, "step": 669 }, { "epoch": 0.15441345932242453, "grad_norm": 0.273082971572876, "learning_rate": 9.600609737852995e-05, "loss": 0.7323, "step": 670 }, { "epoch": 0.1546439271721595, "grad_norm": 0.34714290499687195, "learning_rate": 9.599146540304444e-05, "loss": 0.7426, "step": 671 }, { "epoch": 0.15487439502189446, "grad_norm": 0.3830714523792267, "learning_rate": 9.597680779302e-05, "loss": 0.7362, "step": 672 }, { "epoch": 0.15510486287162942, "grad_norm": 0.30310484766960144, "learning_rate": 9.596212455662645e-05, "loss": 0.7368, "step": 673 }, { "epoch": 0.15533533072136438, "grad_norm": 0.3948887586593628, "learning_rate": 9.594741570204787e-05, "loss": 0.7377, "step": 674 }, { "epoch": 0.15556579857109934, "grad_norm": 0.42148318886756897, "learning_rate": 9.593268123748259e-05, "loss": 0.7415, "step": 675 }, { "epoch": 0.1557962664208343, "grad_norm": 0.5412166118621826, "learning_rate": 9.591792117114328e-05, "loss": 0.7277, "step": 676 }, { "epoch": 0.15602673427056926, "grad_norm": 0.6122573614120483, "learning_rate": 9.590313551125683e-05, "loss": 0.7376, "step": 677 }, { "epoch": 0.15625720212030422, "grad_norm": 0.4985668361186981, "learning_rate": 9.58883242660644e-05, "loss": 0.742, "step": 678 }, { "epoch": 0.15648766997003918, "grad_norm": 0.356157124042511, "learning_rate": 9.587348744382145e-05, "loss": 0.73, "step": 679 }, { "epoch": 0.15671813781977414, "grad_norm": 0.3795771300792694, "learning_rate": 9.585862505279766e-05, "loss": 0.735, "step": 680 }, { "epoch": 0.1569486056695091, "grad_norm": 0.4135006070137024, "learning_rate": 9.584373710127697e-05, "loss": 0.7361, "step": 681 }, { "epoch": 0.15717907351924407, "grad_norm": 0.3758498728275299, "learning_rate": 9.582882359755756e-05, "loss": 0.7335, "step": 682 }, { "epoch": 0.15740954136897903, "grad_norm": 0.35648760199546814, "learning_rate": 9.581388454995187e-05, "loss": 0.7387, "step": 683 }, { "epoch": 0.157640009218714, "grad_norm": 0.3326440453529358, "learning_rate": 9.579891996678655e-05, "loss": 0.7326, "step": 684 }, { "epoch": 0.15787047706844895, "grad_norm": 0.3023831248283386, "learning_rate": 9.578392985640252e-05, "loss": 0.7302, "step": 685 }, { "epoch": 0.1581009449181839, "grad_norm": 0.3539700508117676, "learning_rate": 9.576891422715489e-05, "loss": 0.729, "step": 686 }, { "epoch": 0.15833141276791887, "grad_norm": 0.38474544882774353, "learning_rate": 9.575387308741301e-05, "loss": 0.7299, "step": 687 }, { "epoch": 0.15856188061765383, "grad_norm": 0.3182377517223358, "learning_rate": 9.573880644556047e-05, "loss": 0.7424, "step": 688 }, { "epoch": 0.1587923484673888, "grad_norm": 0.3241504430770874, "learning_rate": 9.572371430999506e-05, "loss": 0.7363, "step": 689 }, { "epoch": 0.15902281631712376, "grad_norm": 0.3680818974971771, "learning_rate": 9.570859668912873e-05, "loss": 0.7336, "step": 690 }, { "epoch": 0.15925328416685872, "grad_norm": 0.3773689270019531, "learning_rate": 9.569345359138771e-05, "loss": 0.7435, "step": 691 }, { "epoch": 0.15948375201659368, "grad_norm": 0.34049540758132935, "learning_rate": 9.567828502521239e-05, "loss": 0.7327, "step": 692 }, { "epoch": 0.15971421986632864, "grad_norm": 0.34470391273498535, "learning_rate": 9.566309099905739e-05, "loss": 0.7308, "step": 693 }, { "epoch": 0.1599446877160636, "grad_norm": 0.3475648760795593, "learning_rate": 9.564787152139146e-05, "loss": 0.7393, "step": 694 }, { "epoch": 0.16017515556579856, "grad_norm": 0.304973840713501, "learning_rate": 9.563262660069759e-05, "loss": 0.7398, "step": 695 }, { "epoch": 0.16040562341553352, "grad_norm": 0.33955731987953186, "learning_rate": 9.561735624547294e-05, "loss": 0.7388, "step": 696 }, { "epoch": 0.16063609126526848, "grad_norm": 0.31284675002098083, "learning_rate": 9.560206046422881e-05, "loss": 0.7468, "step": 697 }, { "epoch": 0.16086655911500344, "grad_norm": 0.3111793100833893, "learning_rate": 9.558673926549075e-05, "loss": 0.7445, "step": 698 }, { "epoch": 0.16109702696473843, "grad_norm": 0.32062941789627075, "learning_rate": 9.557139265779838e-05, "loss": 0.7373, "step": 699 }, { "epoch": 0.1613274948144734, "grad_norm": 0.32092171907424927, "learning_rate": 9.555602064970554e-05, "loss": 0.7315, "step": 700 }, { "epoch": 0.16155796266420835, "grad_norm": 0.29449769854545593, "learning_rate": 9.554062324978025e-05, "loss": 0.7327, "step": 701 }, { "epoch": 0.16178843051394332, "grad_norm": 0.339097261428833, "learning_rate": 9.552520046660462e-05, "loss": 0.7433, "step": 702 }, { "epoch": 0.16201889836367828, "grad_norm": 0.3251134157180786, "learning_rate": 9.550975230877495e-05, "loss": 0.742, "step": 703 }, { "epoch": 0.16224936621341324, "grad_norm": 0.2941744923591614, "learning_rate": 9.549427878490168e-05, "loss": 0.7321, "step": 704 }, { "epoch": 0.1624798340631482, "grad_norm": 0.31333979964256287, "learning_rate": 9.547877990360935e-05, "loss": 0.7374, "step": 705 }, { "epoch": 0.16271030191288316, "grad_norm": 0.3424711525440216, "learning_rate": 9.546325567353671e-05, "loss": 0.7381, "step": 706 }, { "epoch": 0.16294076976261812, "grad_norm": 0.37201154232025146, "learning_rate": 9.544770610333655e-05, "loss": 0.7333, "step": 707 }, { "epoch": 0.16317123761235308, "grad_norm": 0.29893067479133606, "learning_rate": 9.543213120167586e-05, "loss": 0.7397, "step": 708 }, { "epoch": 0.16340170546208804, "grad_norm": 0.2980719804763794, "learning_rate": 9.54165309772357e-05, "loss": 0.7293, "step": 709 }, { "epoch": 0.163632173311823, "grad_norm": 0.3534228503704071, "learning_rate": 9.540090543871126e-05, "loss": 0.7227, "step": 710 }, { "epoch": 0.16386264116155796, "grad_norm": 0.42632773518562317, "learning_rate": 9.538525459481185e-05, "loss": 0.7304, "step": 711 }, { "epoch": 0.16409310901129293, "grad_norm": 0.3898342549800873, "learning_rate": 9.536957845426086e-05, "loss": 0.7268, "step": 712 }, { "epoch": 0.1643235768610279, "grad_norm": 0.290547251701355, "learning_rate": 9.535387702579581e-05, "loss": 0.7349, "step": 713 }, { "epoch": 0.16455404471076285, "grad_norm": 0.2991439402103424, "learning_rate": 9.53381503181683e-05, "loss": 0.7406, "step": 714 }, { "epoch": 0.1647845125604978, "grad_norm": 0.30085012316703796, "learning_rate": 9.5322398340144e-05, "loss": 0.7353, "step": 715 }, { "epoch": 0.16501498041023277, "grad_norm": 0.39195868372917175, "learning_rate": 9.53066211005027e-05, "loss": 0.7314, "step": 716 }, { "epoch": 0.16524544825996773, "grad_norm": 0.3485901951789856, "learning_rate": 9.529081860803825e-05, "loss": 0.7291, "step": 717 }, { "epoch": 0.1654759161097027, "grad_norm": 0.27913734316825867, "learning_rate": 9.527499087155857e-05, "loss": 0.7291, "step": 718 }, { "epoch": 0.16570638395943765, "grad_norm": 0.3681480288505554, "learning_rate": 9.52591378998857e-05, "loss": 0.7409, "step": 719 }, { "epoch": 0.16593685180917261, "grad_norm": 0.37066853046417236, "learning_rate": 9.524325970185565e-05, "loss": 0.7291, "step": 720 }, { "epoch": 0.16616731965890758, "grad_norm": 0.2994784712791443, "learning_rate": 9.52273562863186e-05, "loss": 0.7274, "step": 721 }, { "epoch": 0.16639778750864254, "grad_norm": 0.3262505531311035, "learning_rate": 9.521142766213869e-05, "loss": 0.7387, "step": 722 }, { "epoch": 0.1666282553583775, "grad_norm": 0.37113088369369507, "learning_rate": 9.519547383819416e-05, "loss": 0.7303, "step": 723 }, { "epoch": 0.16685872320811246, "grad_norm": 0.35534918308258057, "learning_rate": 9.517949482337732e-05, "loss": 0.7383, "step": 724 }, { "epoch": 0.16708919105784742, "grad_norm": 0.3642905056476593, "learning_rate": 9.516349062659444e-05, "loss": 0.7299, "step": 725 }, { "epoch": 0.16731965890758238, "grad_norm": 0.3798482418060303, "learning_rate": 9.514746125676593e-05, "loss": 0.7279, "step": 726 }, { "epoch": 0.16755012675731734, "grad_norm": 0.38027462363243103, "learning_rate": 9.513140672282612e-05, "loss": 0.7324, "step": 727 }, { "epoch": 0.1677805946070523, "grad_norm": 0.34657275676727295, "learning_rate": 9.511532703372348e-05, "loss": 0.7394, "step": 728 }, { "epoch": 0.1680110624567873, "grad_norm": 0.35259348154067993, "learning_rate": 9.50992221984204e-05, "loss": 0.7282, "step": 729 }, { "epoch": 0.16824153030652225, "grad_norm": 0.3222431242465973, "learning_rate": 9.508309222589333e-05, "loss": 0.7242, "step": 730 }, { "epoch": 0.1684719981562572, "grad_norm": 0.32721391320228577, "learning_rate": 9.506693712513274e-05, "loss": 0.7302, "step": 731 }, { "epoch": 0.16870246600599217, "grad_norm": 0.3368002474308014, "learning_rate": 9.505075690514312e-05, "loss": 0.7265, "step": 732 }, { "epoch": 0.16893293385572714, "grad_norm": 0.28599828481674194, "learning_rate": 9.503455157494289e-05, "loss": 0.7267, "step": 733 }, { "epoch": 0.1691634017054621, "grad_norm": 0.30026113986968994, "learning_rate": 9.501832114356453e-05, "loss": 0.7375, "step": 734 }, { "epoch": 0.16939386955519706, "grad_norm": 0.3616209030151367, "learning_rate": 9.500206562005451e-05, "loss": 0.736, "step": 735 }, { "epoch": 0.16962433740493202, "grad_norm": 0.34600475430488586, "learning_rate": 9.498578501347327e-05, "loss": 0.7412, "step": 736 }, { "epoch": 0.16985480525466698, "grad_norm": 0.33249667286872864, "learning_rate": 9.49694793328952e-05, "loss": 0.7355, "step": 737 }, { "epoch": 0.17008527310440194, "grad_norm": 0.31944364309310913, "learning_rate": 9.495314858740869e-05, "loss": 0.7263, "step": 738 }, { "epoch": 0.1703157409541369, "grad_norm": 0.29929810762405396, "learning_rate": 9.493679278611616e-05, "loss": 0.7244, "step": 739 }, { "epoch": 0.17054620880387186, "grad_norm": 0.3145100474357605, "learning_rate": 9.492041193813388e-05, "loss": 0.7316, "step": 740 }, { "epoch": 0.17077667665360682, "grad_norm": 0.34142231941223145, "learning_rate": 9.490400605259218e-05, "loss": 0.7281, "step": 741 }, { "epoch": 0.17100714450334178, "grad_norm": 0.39629876613616943, "learning_rate": 9.488757513863531e-05, "loss": 0.7353, "step": 742 }, { "epoch": 0.17123761235307675, "grad_norm": 0.43608033657073975, "learning_rate": 9.487111920542143e-05, "loss": 0.7323, "step": 743 }, { "epoch": 0.1714680802028117, "grad_norm": 0.4607144296169281, "learning_rate": 9.48546382621227e-05, "loss": 0.7307, "step": 744 }, { "epoch": 0.17169854805254667, "grad_norm": 0.4801574945449829, "learning_rate": 9.483813231792523e-05, "loss": 0.7299, "step": 745 }, { "epoch": 0.17192901590228163, "grad_norm": 0.4828302264213562, "learning_rate": 9.4821601382029e-05, "loss": 0.734, "step": 746 }, { "epoch": 0.1721594837520166, "grad_norm": 0.35751068592071533, "learning_rate": 9.4805045463648e-05, "loss": 0.7269, "step": 747 }, { "epoch": 0.17238995160175155, "grad_norm": 0.35991883277893066, "learning_rate": 9.478846457201003e-05, "loss": 0.7308, "step": 748 }, { "epoch": 0.1726204194514865, "grad_norm": 0.38963592052459717, "learning_rate": 9.477185871635694e-05, "loss": 0.7394, "step": 749 }, { "epoch": 0.17285088730122147, "grad_norm": 0.3973761796951294, "learning_rate": 9.475522790594443e-05, "loss": 0.7279, "step": 750 }, { "epoch": 0.17308135515095643, "grad_norm": 0.35446715354919434, "learning_rate": 9.473857215004208e-05, "loss": 0.7328, "step": 751 }, { "epoch": 0.1733118230006914, "grad_norm": 0.32000207901000977, "learning_rate": 9.472189145793345e-05, "loss": 0.7304, "step": 752 }, { "epoch": 0.17354229085042636, "grad_norm": 0.3462921679019928, "learning_rate": 9.470518583891592e-05, "loss": 0.7266, "step": 753 }, { "epoch": 0.17377275870016132, "grad_norm": 0.37639400362968445, "learning_rate": 9.468845530230084e-05, "loss": 0.7261, "step": 754 }, { "epoch": 0.17400322654989628, "grad_norm": 0.3057329058647156, "learning_rate": 9.467169985741337e-05, "loss": 0.7332, "step": 755 }, { "epoch": 0.17423369439963124, "grad_norm": 0.33508437871932983, "learning_rate": 9.465491951359265e-05, "loss": 0.7242, "step": 756 }, { "epoch": 0.1744641622493662, "grad_norm": 0.41482967138290405, "learning_rate": 9.463811428019156e-05, "loss": 0.7284, "step": 757 }, { "epoch": 0.1746946300991012, "grad_norm": 0.3846109211444855, "learning_rate": 9.4621284166577e-05, "loss": 0.7358, "step": 758 }, { "epoch": 0.17492509794883615, "grad_norm": 0.35918307304382324, "learning_rate": 9.460442918212965e-05, "loss": 0.7249, "step": 759 }, { "epoch": 0.1751555657985711, "grad_norm": 0.38481009006500244, "learning_rate": 9.458754933624406e-05, "loss": 0.736, "step": 760 }, { "epoch": 0.17538603364830607, "grad_norm": 0.34131038188934326, "learning_rate": 9.457064463832868e-05, "loss": 0.7291, "step": 761 }, { "epoch": 0.17561650149804103, "grad_norm": 0.3763889670372009, "learning_rate": 9.455371509780575e-05, "loss": 0.7322, "step": 762 }, { "epoch": 0.175846969347776, "grad_norm": 0.36019256711006165, "learning_rate": 9.453676072411142e-05, "loss": 0.7206, "step": 763 }, { "epoch": 0.17607743719751096, "grad_norm": 0.324862003326416, "learning_rate": 9.451978152669563e-05, "loss": 0.7277, "step": 764 }, { "epoch": 0.17630790504724592, "grad_norm": 0.3834820091724396, "learning_rate": 9.450277751502218e-05, "loss": 0.7416, "step": 765 }, { "epoch": 0.17653837289698088, "grad_norm": 0.37926480174064636, "learning_rate": 9.44857486985687e-05, "loss": 0.7293, "step": 766 }, { "epoch": 0.17676884074671584, "grad_norm": 0.3776107430458069, "learning_rate": 9.446869508682666e-05, "loss": 0.7384, "step": 767 }, { "epoch": 0.1769993085964508, "grad_norm": 0.2972352206707001, "learning_rate": 9.445161668930129e-05, "loss": 0.7351, "step": 768 }, { "epoch": 0.17722977644618576, "grad_norm": 0.2998862862586975, "learning_rate": 9.443451351551174e-05, "loss": 0.7326, "step": 769 }, { "epoch": 0.17746024429592072, "grad_norm": 0.31847459077835083, "learning_rate": 9.441738557499087e-05, "loss": 0.7267, "step": 770 }, { "epoch": 0.17769071214565568, "grad_norm": 0.3504515588283539, "learning_rate": 9.440023287728537e-05, "loss": 0.732, "step": 771 }, { "epoch": 0.17792117999539064, "grad_norm": 0.2948881983757019, "learning_rate": 9.438305543195579e-05, "loss": 0.735, "step": 772 }, { "epoch": 0.1781516478451256, "grad_norm": 0.3130739629268646, "learning_rate": 9.436585324857636e-05, "loss": 0.7325, "step": 773 }, { "epoch": 0.17838211569486057, "grad_norm": 0.30023884773254395, "learning_rate": 9.434862633673523e-05, "loss": 0.7274, "step": 774 }, { "epoch": 0.17861258354459553, "grad_norm": 0.36194613575935364, "learning_rate": 9.433137470603424e-05, "loss": 0.7273, "step": 775 }, { "epoch": 0.1788430513943305, "grad_norm": 0.330105721950531, "learning_rate": 9.431409836608902e-05, "loss": 0.7236, "step": 776 }, { "epoch": 0.17907351924406545, "grad_norm": 0.3341142535209656, "learning_rate": 9.429679732652901e-05, "loss": 0.7262, "step": 777 }, { "epoch": 0.1793039870938004, "grad_norm": 0.32330140471458435, "learning_rate": 9.427947159699738e-05, "loss": 0.7299, "step": 778 }, { "epoch": 0.17953445494353537, "grad_norm": 0.3477638363838196, "learning_rate": 9.426212118715108e-05, "loss": 0.7379, "step": 779 }, { "epoch": 0.17976492279327033, "grad_norm": 0.36434227228164673, "learning_rate": 9.424474610666082e-05, "loss": 0.7247, "step": 780 }, { "epoch": 0.1799953906430053, "grad_norm": 0.3425809144973755, "learning_rate": 9.422734636521104e-05, "loss": 0.7276, "step": 781 }, { "epoch": 0.18022585849274025, "grad_norm": 0.33053991198539734, "learning_rate": 9.420992197249994e-05, "loss": 0.7255, "step": 782 }, { "epoch": 0.18045632634247522, "grad_norm": 0.3649672567844391, "learning_rate": 9.419247293823947e-05, "loss": 0.7249, "step": 783 }, { "epoch": 0.18068679419221018, "grad_norm": 0.4266059696674347, "learning_rate": 9.417499927215528e-05, "loss": 0.7247, "step": 784 }, { "epoch": 0.18091726204194514, "grad_norm": 0.360500693321228, "learning_rate": 9.41575009839868e-05, "loss": 0.7343, "step": 785 }, { "epoch": 0.1811477298916801, "grad_norm": 0.37427565455436707, "learning_rate": 9.413997808348714e-05, "loss": 0.7195, "step": 786 }, { "epoch": 0.18137819774141506, "grad_norm": 0.42820265889167786, "learning_rate": 9.412243058042315e-05, "loss": 0.7426, "step": 787 }, { "epoch": 0.18160866559115005, "grad_norm": 0.36763229966163635, "learning_rate": 9.410485848457537e-05, "loss": 0.7335, "step": 788 }, { "epoch": 0.181839133440885, "grad_norm": 0.3920682668685913, "learning_rate": 9.408726180573811e-05, "loss": 0.7293, "step": 789 }, { "epoch": 0.18206960129061997, "grad_norm": 0.41110143065452576, "learning_rate": 9.406964055371928e-05, "loss": 0.7226, "step": 790 }, { "epoch": 0.18230006914035493, "grad_norm": 0.31723544001579285, "learning_rate": 9.405199473834057e-05, "loss": 0.7195, "step": 791 }, { "epoch": 0.1825305369900899, "grad_norm": 0.3424417972564697, "learning_rate": 9.403432436943733e-05, "loss": 0.727, "step": 792 }, { "epoch": 0.18276100483982485, "grad_norm": 0.4001381993293762, "learning_rate": 9.40166294568586e-05, "loss": 0.7267, "step": 793 }, { "epoch": 0.18299147268955981, "grad_norm": 0.4231438636779785, "learning_rate": 9.399891001046712e-05, "loss": 0.7256, "step": 794 }, { "epoch": 0.18322194053929478, "grad_norm": 0.3733879029750824, "learning_rate": 9.398116604013925e-05, "loss": 0.7341, "step": 795 }, { "epoch": 0.18345240838902974, "grad_norm": 0.28780287504196167, "learning_rate": 9.396339755576506e-05, "loss": 0.7255, "step": 796 }, { "epoch": 0.1836828762387647, "grad_norm": 0.3469153046607971, "learning_rate": 9.39456045672483e-05, "loss": 0.716, "step": 797 }, { "epoch": 0.18391334408849966, "grad_norm": 0.30638718605041504, "learning_rate": 9.392778708450633e-05, "loss": 0.7111, "step": 798 }, { "epoch": 0.18414381193823462, "grad_norm": 0.3063083291053772, "learning_rate": 9.39099451174702e-05, "loss": 0.7271, "step": 799 }, { "epoch": 0.18437427978796958, "grad_norm": 0.32252591848373413, "learning_rate": 9.389207867608461e-05, "loss": 0.7266, "step": 800 }, { "epoch": 0.18460474763770454, "grad_norm": 0.3048442304134369, "learning_rate": 9.387418777030784e-05, "loss": 0.7214, "step": 801 }, { "epoch": 0.1848352154874395, "grad_norm": 0.3063707649707794, "learning_rate": 9.385627241011187e-05, "loss": 0.7223, "step": 802 }, { "epoch": 0.18506568333717446, "grad_norm": 0.32035815715789795, "learning_rate": 9.383833260548233e-05, "loss": 0.724, "step": 803 }, { "epoch": 0.18529615118690942, "grad_norm": 0.3516310453414917, "learning_rate": 9.382036836641839e-05, "loss": 0.7246, "step": 804 }, { "epoch": 0.18552661903664439, "grad_norm": 0.34494486451148987, "learning_rate": 9.380237970293291e-05, "loss": 0.7116, "step": 805 }, { "epoch": 0.18575708688637935, "grad_norm": 0.34283608198165894, "learning_rate": 9.378436662505232e-05, "loss": 0.7211, "step": 806 }, { "epoch": 0.1859875547361143, "grad_norm": 0.30393049120903015, "learning_rate": 9.376632914281669e-05, "loss": 0.724, "step": 807 }, { "epoch": 0.18621802258584927, "grad_norm": 0.27396926283836365, "learning_rate": 9.374826726627969e-05, "loss": 0.7202, "step": 808 }, { "epoch": 0.18644849043558423, "grad_norm": 0.3302895724773407, "learning_rate": 9.373018100550855e-05, "loss": 0.7303, "step": 809 }, { "epoch": 0.1866789582853192, "grad_norm": 0.34311676025390625, "learning_rate": 9.371207037058414e-05, "loss": 0.7205, "step": 810 }, { "epoch": 0.18690942613505415, "grad_norm": 0.3147958219051361, "learning_rate": 9.369393537160089e-05, "loss": 0.7234, "step": 811 }, { "epoch": 0.1871398939847891, "grad_norm": 0.29841968417167664, "learning_rate": 9.36757760186668e-05, "loss": 0.7242, "step": 812 }, { "epoch": 0.18737036183452407, "grad_norm": 0.3176330029964447, "learning_rate": 9.365759232190348e-05, "loss": 0.7222, "step": 813 }, { "epoch": 0.18760082968425904, "grad_norm": 0.3466809093952179, "learning_rate": 9.363938429144605e-05, "loss": 0.7221, "step": 814 }, { "epoch": 0.187831297533994, "grad_norm": 0.35498663783073425, "learning_rate": 9.362115193744328e-05, "loss": 0.7221, "step": 815 }, { "epoch": 0.18806176538372896, "grad_norm": 0.4183621108531952, "learning_rate": 9.360289527005739e-05, "loss": 0.7296, "step": 816 }, { "epoch": 0.18829223323346392, "grad_norm": 0.41364750266075134, "learning_rate": 9.358461429946425e-05, "loss": 0.7226, "step": 817 }, { "epoch": 0.1885227010831989, "grad_norm": 0.3849059045314789, "learning_rate": 9.356630903585321e-05, "loss": 0.7242, "step": 818 }, { "epoch": 0.18875316893293387, "grad_norm": 0.3003298342227936, "learning_rate": 9.354797948942719e-05, "loss": 0.7267, "step": 819 }, { "epoch": 0.18898363678266883, "grad_norm": 0.294464647769928, "learning_rate": 9.352962567040266e-05, "loss": 0.7208, "step": 820 }, { "epoch": 0.1892141046324038, "grad_norm": 0.3354666531085968, "learning_rate": 9.35112475890096e-05, "loss": 0.7165, "step": 821 }, { "epoch": 0.18944457248213875, "grad_norm": 0.3083663582801819, "learning_rate": 9.349284525549147e-05, "loss": 0.7148, "step": 822 }, { "epoch": 0.1896750403318737, "grad_norm": 0.28768306970596313, "learning_rate": 9.347441868010531e-05, "loss": 0.7136, "step": 823 }, { "epoch": 0.18990550818160867, "grad_norm": 0.2938915193080902, "learning_rate": 9.345596787312165e-05, "loss": 0.7293, "step": 824 }, { "epoch": 0.19013597603134363, "grad_norm": 0.3318259119987488, "learning_rate": 9.343749284482454e-05, "loss": 0.7088, "step": 825 }, { "epoch": 0.1903664438810786, "grad_norm": 0.3234262764453888, "learning_rate": 9.341899360551151e-05, "loss": 0.7196, "step": 826 }, { "epoch": 0.19059691173081356, "grad_norm": 0.32499444484710693, "learning_rate": 9.340047016549358e-05, "loss": 0.7169, "step": 827 }, { "epoch": 0.19082737958054852, "grad_norm": 0.28975191712379456, "learning_rate": 9.33819225350953e-05, "loss": 0.7216, "step": 828 }, { "epoch": 0.19105784743028348, "grad_norm": 0.28860896825790405, "learning_rate": 9.336335072465465e-05, "loss": 0.7174, "step": 829 }, { "epoch": 0.19128831528001844, "grad_norm": 0.3400057554244995, "learning_rate": 9.334475474452308e-05, "loss": 0.7273, "step": 830 }, { "epoch": 0.1915187831297534, "grad_norm": 0.3378770649433136, "learning_rate": 9.332613460506563e-05, "loss": 0.7221, "step": 831 }, { "epoch": 0.19174925097948836, "grad_norm": 0.2862728536128998, "learning_rate": 9.330749031666064e-05, "loss": 0.7144, "step": 832 }, { "epoch": 0.19197971882922332, "grad_norm": 0.3653225004673004, "learning_rate": 9.328882188970003e-05, "loss": 0.7235, "step": 833 }, { "epoch": 0.19221018667895828, "grad_norm": 0.40798598527908325, "learning_rate": 9.327012933458909e-05, "loss": 0.7232, "step": 834 }, { "epoch": 0.19244065452869324, "grad_norm": 0.3767978250980377, "learning_rate": 9.325141266174666e-05, "loss": 0.7354, "step": 835 }, { "epoch": 0.1926711223784282, "grad_norm": 0.3316027522087097, "learning_rate": 9.323267188160494e-05, "loss": 0.7245, "step": 836 }, { "epoch": 0.19290159022816317, "grad_norm": 0.2936481535434723, "learning_rate": 9.321390700460956e-05, "loss": 0.7238, "step": 837 }, { "epoch": 0.19313205807789813, "grad_norm": 0.29495328664779663, "learning_rate": 9.319511804121967e-05, "loss": 0.7136, "step": 838 }, { "epoch": 0.1933625259276331, "grad_norm": 0.3090824484825134, "learning_rate": 9.317630500190774e-05, "loss": 0.7112, "step": 839 }, { "epoch": 0.19359299377736805, "grad_norm": 0.288487046957016, "learning_rate": 9.315746789715973e-05, "loss": 0.722, "step": 840 }, { "epoch": 0.193823461627103, "grad_norm": 0.31703829765319824, "learning_rate": 9.313860673747496e-05, "loss": 0.7235, "step": 841 }, { "epoch": 0.19405392947683797, "grad_norm": 0.36949485540390015, "learning_rate": 9.311972153336623e-05, "loss": 0.7211, "step": 842 }, { "epoch": 0.19428439732657293, "grad_norm": 0.3008967936038971, "learning_rate": 9.310081229535968e-05, "loss": 0.722, "step": 843 }, { "epoch": 0.1945148651763079, "grad_norm": 0.28252121806144714, "learning_rate": 9.308187903399486e-05, "loss": 0.7231, "step": 844 }, { "epoch": 0.19474533302604286, "grad_norm": 0.32899102568626404, "learning_rate": 9.306292175982472e-05, "loss": 0.7103, "step": 845 }, { "epoch": 0.19497580087577782, "grad_norm": 0.331802099943161, "learning_rate": 9.304394048341559e-05, "loss": 0.7263, "step": 846 }, { "epoch": 0.1952062687255128, "grad_norm": 0.2927376925945282, "learning_rate": 9.302493521534719e-05, "loss": 0.7177, "step": 847 }, { "epoch": 0.19543673657524777, "grad_norm": 0.28728216886520386, "learning_rate": 9.300590596621257e-05, "loss": 0.7118, "step": 848 }, { "epoch": 0.19566720442498273, "grad_norm": 0.2990799844264984, "learning_rate": 9.29868527466182e-05, "loss": 0.7141, "step": 849 }, { "epoch": 0.1958976722747177, "grad_norm": 0.2656897306442261, "learning_rate": 9.296777556718387e-05, "loss": 0.728, "step": 850 }, { "epoch": 0.19612814012445265, "grad_norm": 0.30828338861465454, "learning_rate": 9.294867443854278e-05, "loss": 0.7262, "step": 851 }, { "epoch": 0.1963586079741876, "grad_norm": 0.29851728677749634, "learning_rate": 9.292954937134142e-05, "loss": 0.7277, "step": 852 }, { "epoch": 0.19658907582392257, "grad_norm": 0.3060734272003174, "learning_rate": 9.291040037623961e-05, "loss": 0.7239, "step": 853 }, { "epoch": 0.19681954367365753, "grad_norm": 0.3331679105758667, "learning_rate": 9.28912274639106e-05, "loss": 0.7227, "step": 854 }, { "epoch": 0.1970500115233925, "grad_norm": 0.28434881567955017, "learning_rate": 9.287203064504084e-05, "loss": 0.7143, "step": 855 }, { "epoch": 0.19728047937312745, "grad_norm": 0.2975858449935913, "learning_rate": 9.285280993033024e-05, "loss": 0.7194, "step": 856 }, { "epoch": 0.19751094722286242, "grad_norm": 0.26783379912376404, "learning_rate": 9.283356533049194e-05, "loss": 0.7158, "step": 857 }, { "epoch": 0.19774141507259738, "grad_norm": 0.31870049238204956, "learning_rate": 9.281429685625243e-05, "loss": 0.7282, "step": 858 }, { "epoch": 0.19797188292233234, "grad_norm": 0.3032819330692291, "learning_rate": 9.279500451835145e-05, "loss": 0.7217, "step": 859 }, { "epoch": 0.1982023507720673, "grad_norm": 0.2742626667022705, "learning_rate": 9.277568832754216e-05, "loss": 0.7199, "step": 860 }, { "epoch": 0.19843281862180226, "grad_norm": 0.2852528989315033, "learning_rate": 9.275634829459087e-05, "loss": 0.7242, "step": 861 }, { "epoch": 0.19866328647153722, "grad_norm": 0.26641765236854553, "learning_rate": 9.27369844302773e-05, "loss": 0.7202, "step": 862 }, { "epoch": 0.19889375432127218, "grad_norm": 0.27515074610710144, "learning_rate": 9.271759674539438e-05, "loss": 0.7143, "step": 863 }, { "epoch": 0.19912422217100714, "grad_norm": 0.3010948598384857, "learning_rate": 9.269818525074833e-05, "loss": 0.7182, "step": 864 }, { "epoch": 0.1993546900207421, "grad_norm": 0.28209689259529114, "learning_rate": 9.267874995715868e-05, "loss": 0.724, "step": 865 }, { "epoch": 0.19958515787047706, "grad_norm": 0.29856470227241516, "learning_rate": 9.26592908754582e-05, "loss": 0.7151, "step": 866 }, { "epoch": 0.19981562572021203, "grad_norm": 0.3309277594089508, "learning_rate": 9.263980801649286e-05, "loss": 0.7171, "step": 867 }, { "epoch": 0.200046093569947, "grad_norm": 0.264207661151886, "learning_rate": 9.262030139112198e-05, "loss": 0.723, "step": 868 }, { "epoch": 0.20027656141968195, "grad_norm": 0.31711748242378235, "learning_rate": 9.260077101021811e-05, "loss": 0.7258, "step": 869 }, { "epoch": 0.2005070292694169, "grad_norm": 0.3416388928890228, "learning_rate": 9.258121688466696e-05, "loss": 0.7271, "step": 870 }, { "epoch": 0.20073749711915187, "grad_norm": 0.37464427947998047, "learning_rate": 9.256163902536756e-05, "loss": 0.7173, "step": 871 }, { "epoch": 0.20096796496888683, "grad_norm": 0.36626744270324707, "learning_rate": 9.254203744323216e-05, "loss": 0.7294, "step": 872 }, { "epoch": 0.2011984328186218, "grad_norm": 0.27999523282051086, "learning_rate": 9.252241214918615e-05, "loss": 0.7204, "step": 873 }, { "epoch": 0.20142890066835675, "grad_norm": 0.4048886001110077, "learning_rate": 9.250276315416825e-05, "loss": 0.7151, "step": 874 }, { "epoch": 0.20165936851809171, "grad_norm": 0.4276858866214752, "learning_rate": 9.248309046913032e-05, "loss": 0.7311, "step": 875 }, { "epoch": 0.20188983636782667, "grad_norm": 0.3747239410877228, "learning_rate": 9.246339410503745e-05, "loss": 0.7223, "step": 876 }, { "epoch": 0.20212030421756166, "grad_norm": 0.2862272560596466, "learning_rate": 9.24436740728679e-05, "loss": 0.7135, "step": 877 }, { "epoch": 0.20235077206729662, "grad_norm": 0.3424084484577179, "learning_rate": 9.242393038361316e-05, "loss": 0.7224, "step": 878 }, { "epoch": 0.20258123991703159, "grad_norm": 0.3472643792629242, "learning_rate": 9.24041630482779e-05, "loss": 0.7144, "step": 879 }, { "epoch": 0.20281170776676655, "grad_norm": 0.31430667638778687, "learning_rate": 9.238437207787994e-05, "loss": 0.7234, "step": 880 }, { "epoch": 0.2030421756165015, "grad_norm": 0.2940575182437897, "learning_rate": 9.23645574834503e-05, "loss": 0.7105, "step": 881 }, { "epoch": 0.20327264346623647, "grad_norm": 0.3699895739555359, "learning_rate": 9.234471927603314e-05, "loss": 0.7107, "step": 882 }, { "epoch": 0.20350311131597143, "grad_norm": 0.3853522837162018, "learning_rate": 9.232485746668584e-05, "loss": 0.721, "step": 883 }, { "epoch": 0.2037335791657064, "grad_norm": 0.37054017186164856, "learning_rate": 9.230497206647885e-05, "loss": 0.7209, "step": 884 }, { "epoch": 0.20396404701544135, "grad_norm": 0.2768975496292114, "learning_rate": 9.228506308649585e-05, "loss": 0.7226, "step": 885 }, { "epoch": 0.2041945148651763, "grad_norm": 0.3139522671699524, "learning_rate": 9.22651305378336e-05, "loss": 0.7189, "step": 886 }, { "epoch": 0.20442498271491127, "grad_norm": 0.33646050095558167, "learning_rate": 9.224517443160205e-05, "loss": 0.7163, "step": 887 }, { "epoch": 0.20465545056464624, "grad_norm": 0.28335535526275635, "learning_rate": 9.222519477892425e-05, "loss": 0.7233, "step": 888 }, { "epoch": 0.2048859184143812, "grad_norm": 0.31536251306533813, "learning_rate": 9.220519159093637e-05, "loss": 0.7152, "step": 889 }, { "epoch": 0.20511638626411616, "grad_norm": 0.2616060972213745, "learning_rate": 9.21851648787877e-05, "loss": 0.7144, "step": 890 }, { "epoch": 0.20534685411385112, "grad_norm": 0.30261725187301636, "learning_rate": 9.216511465364066e-05, "loss": 0.717, "step": 891 }, { "epoch": 0.20557732196358608, "grad_norm": 0.3098640739917755, "learning_rate": 9.214504092667075e-05, "loss": 0.7182, "step": 892 }, { "epoch": 0.20580778981332104, "grad_norm": 0.32920923829078674, "learning_rate": 9.212494370906661e-05, "loss": 0.7217, "step": 893 }, { "epoch": 0.206038257663056, "grad_norm": 0.31871381402015686, "learning_rate": 9.210482301202994e-05, "loss": 0.7135, "step": 894 }, { "epoch": 0.20626872551279096, "grad_norm": 0.2803013324737549, "learning_rate": 9.208467884677551e-05, "loss": 0.7218, "step": 895 }, { "epoch": 0.20649919336252592, "grad_norm": 0.31041911244392395, "learning_rate": 9.206451122453122e-05, "loss": 0.7207, "step": 896 }, { "epoch": 0.20672966121226088, "grad_norm": 0.3201538324356079, "learning_rate": 9.204432015653801e-05, "loss": 0.7163, "step": 897 }, { "epoch": 0.20696012906199585, "grad_norm": 0.26762691140174866, "learning_rate": 9.202410565404988e-05, "loss": 0.7103, "step": 898 }, { "epoch": 0.2071905969117308, "grad_norm": 0.3141172528266907, "learning_rate": 9.200386772833394e-05, "loss": 0.7251, "step": 899 }, { "epoch": 0.20742106476146577, "grad_norm": 0.3220638632774353, "learning_rate": 9.19836063906703e-05, "loss": 0.716, "step": 900 }, { "epoch": 0.20765153261120073, "grad_norm": 0.3352587819099426, "learning_rate": 9.196332165235215e-05, "loss": 0.7235, "step": 901 }, { "epoch": 0.2078820004609357, "grad_norm": 0.3438172936439514, "learning_rate": 9.194301352468572e-05, "loss": 0.713, "step": 902 }, { "epoch": 0.20811246831067065, "grad_norm": 0.3374473452568054, "learning_rate": 9.192268201899028e-05, "loss": 0.7203, "step": 903 }, { "epoch": 0.2083429361604056, "grad_norm": 0.32671868801116943, "learning_rate": 9.19023271465981e-05, "loss": 0.7264, "step": 904 }, { "epoch": 0.20857340401014057, "grad_norm": 0.27988868951797485, "learning_rate": 9.188194891885453e-05, "loss": 0.7129, "step": 905 }, { "epoch": 0.20880387185987553, "grad_norm": 0.2564987540245056, "learning_rate": 9.186154734711786e-05, "loss": 0.7251, "step": 906 }, { "epoch": 0.20903433970961052, "grad_norm": 0.30010056495666504, "learning_rate": 9.184112244275948e-05, "loss": 0.7137, "step": 907 }, { "epoch": 0.20926480755934548, "grad_norm": 0.2974473536014557, "learning_rate": 9.182067421716372e-05, "loss": 0.7259, "step": 908 }, { "epoch": 0.20949527540908044, "grad_norm": 0.31991463899612427, "learning_rate": 9.180020268172794e-05, "loss": 0.7124, "step": 909 }, { "epoch": 0.2097257432588154, "grad_norm": 0.32082507014274597, "learning_rate": 9.177970784786245e-05, "loss": 0.7184, "step": 910 }, { "epoch": 0.20995621110855037, "grad_norm": 0.32305994629859924, "learning_rate": 9.175918972699063e-05, "loss": 0.7147, "step": 911 }, { "epoch": 0.21018667895828533, "grad_norm": 0.3100352883338928, "learning_rate": 9.173864833054875e-05, "loss": 0.7199, "step": 912 }, { "epoch": 0.2104171468080203, "grad_norm": 0.30127498507499695, "learning_rate": 9.17180836699861e-05, "loss": 0.7188, "step": 913 }, { "epoch": 0.21064761465775525, "grad_norm": 0.27540695667266846, "learning_rate": 9.169749575676496e-05, "loss": 0.7133, "step": 914 }, { "epoch": 0.2108780825074902, "grad_norm": 0.3038792610168457, "learning_rate": 9.167688460236049e-05, "loss": 0.716, "step": 915 }, { "epoch": 0.21110855035722517, "grad_norm": 0.3074706792831421, "learning_rate": 9.165625021826087e-05, "loss": 0.7136, "step": 916 }, { "epoch": 0.21133901820696013, "grad_norm": 0.31850558519363403, "learning_rate": 9.163559261596723e-05, "loss": 0.7147, "step": 917 }, { "epoch": 0.2115694860566951, "grad_norm": 0.326848566532135, "learning_rate": 9.161491180699359e-05, "loss": 0.7126, "step": 918 }, { "epoch": 0.21179995390643006, "grad_norm": 0.278940349817276, "learning_rate": 9.159420780286699e-05, "loss": 0.7173, "step": 919 }, { "epoch": 0.21203042175616502, "grad_norm": 0.3120673894882202, "learning_rate": 9.157348061512727e-05, "loss": 0.7157, "step": 920 }, { "epoch": 0.21226088960589998, "grad_norm": 0.2851211428642273, "learning_rate": 9.155273025532731e-05, "loss": 0.7153, "step": 921 }, { "epoch": 0.21249135745563494, "grad_norm": 0.2742621600627899, "learning_rate": 9.153195673503286e-05, "loss": 0.7158, "step": 922 }, { "epoch": 0.2127218253053699, "grad_norm": 0.29812031984329224, "learning_rate": 9.151116006582259e-05, "loss": 0.7083, "step": 923 }, { "epoch": 0.21295229315510486, "grad_norm": 0.33117279410362244, "learning_rate": 9.149034025928804e-05, "loss": 0.7196, "step": 924 }, { "epoch": 0.21318276100483982, "grad_norm": 0.2820033133029938, "learning_rate": 9.146949732703368e-05, "loss": 0.7102, "step": 925 }, { "epoch": 0.21341322885457478, "grad_norm": 0.2883508801460266, "learning_rate": 9.144863128067687e-05, "loss": 0.7246, "step": 926 }, { "epoch": 0.21364369670430974, "grad_norm": 0.34615033864974976, "learning_rate": 9.142774213184784e-05, "loss": 0.7164, "step": 927 }, { "epoch": 0.2138741645540447, "grad_norm": 0.33778396248817444, "learning_rate": 9.140682989218969e-05, "loss": 0.7152, "step": 928 }, { "epoch": 0.21410463240377967, "grad_norm": 0.37993401288986206, "learning_rate": 9.138589457335842e-05, "loss": 0.715, "step": 929 }, { "epoch": 0.21433510025351463, "grad_norm": 0.32394763827323914, "learning_rate": 9.136493618702284e-05, "loss": 0.7263, "step": 930 }, { "epoch": 0.2145655681032496, "grad_norm": 0.2940785884857178, "learning_rate": 9.13439547448647e-05, "loss": 0.7267, "step": 931 }, { "epoch": 0.21479603595298455, "grad_norm": 0.32346901297569275, "learning_rate": 9.132295025857851e-05, "loss": 0.719, "step": 932 }, { "epoch": 0.2150265038027195, "grad_norm": 0.40225303173065186, "learning_rate": 9.130192273987168e-05, "loss": 0.7186, "step": 933 }, { "epoch": 0.21525697165245447, "grad_norm": 0.28617575764656067, "learning_rate": 9.128087220046445e-05, "loss": 0.7148, "step": 934 }, { "epoch": 0.21548743950218943, "grad_norm": 0.3431677222251892, "learning_rate": 9.125979865208988e-05, "loss": 0.7135, "step": 935 }, { "epoch": 0.2157179073519244, "grad_norm": 0.4484764039516449, "learning_rate": 9.123870210649387e-05, "loss": 0.7096, "step": 936 }, { "epoch": 0.21594837520165938, "grad_norm": 0.39285802841186523, "learning_rate": 9.121758257543511e-05, "loss": 0.7138, "step": 937 }, { "epoch": 0.21617884305139434, "grad_norm": 0.2899276912212372, "learning_rate": 9.119644007068513e-05, "loss": 0.7072, "step": 938 }, { "epoch": 0.2164093109011293, "grad_norm": 0.39116156101226807, "learning_rate": 9.117527460402826e-05, "loss": 0.7193, "step": 939 }, { "epoch": 0.21663977875086426, "grad_norm": 0.3671886622905731, "learning_rate": 9.115408618726162e-05, "loss": 0.7171, "step": 940 }, { "epoch": 0.21687024660059923, "grad_norm": 0.35713937878608704, "learning_rate": 9.113287483219511e-05, "loss": 0.7105, "step": 941 }, { "epoch": 0.2171007144503342, "grad_norm": 0.3286116123199463, "learning_rate": 9.111164055065145e-05, "loss": 0.7185, "step": 942 }, { "epoch": 0.21733118230006915, "grad_norm": 0.31977954506874084, "learning_rate": 9.109038335446612e-05, "loss": 0.7177, "step": 943 }, { "epoch": 0.2175616501498041, "grad_norm": 0.3611249625682831, "learning_rate": 9.106910325548734e-05, "loss": 0.7169, "step": 944 }, { "epoch": 0.21779211799953907, "grad_norm": 0.352547824382782, "learning_rate": 9.104780026557618e-05, "loss": 0.7129, "step": 945 }, { "epoch": 0.21802258584927403, "grad_norm": 0.33489990234375, "learning_rate": 9.102647439660637e-05, "loss": 0.7123, "step": 946 }, { "epoch": 0.218253053699009, "grad_norm": 0.37026363611221313, "learning_rate": 9.100512566046444e-05, "loss": 0.7225, "step": 947 }, { "epoch": 0.21848352154874395, "grad_norm": 0.3738853335380554, "learning_rate": 9.098375406904968e-05, "loss": 0.7086, "step": 948 }, { "epoch": 0.21871398939847891, "grad_norm": 0.2526826560497284, "learning_rate": 9.09623596342741e-05, "loss": 0.7142, "step": 949 }, { "epoch": 0.21894445724821388, "grad_norm": 0.3173041045665741, "learning_rate": 9.094094236806244e-05, "loss": 0.7133, "step": 950 }, { "epoch": 0.21917492509794884, "grad_norm": 0.35794541239738464, "learning_rate": 9.091950228235217e-05, "loss": 0.7211, "step": 951 }, { "epoch": 0.2194053929476838, "grad_norm": 0.36241787672042847, "learning_rate": 9.089803938909349e-05, "loss": 0.7164, "step": 952 }, { "epoch": 0.21963586079741876, "grad_norm": 0.28174248337745667, "learning_rate": 9.087655370024928e-05, "loss": 0.7085, "step": 953 }, { "epoch": 0.21986632864715372, "grad_norm": 0.3472788631916046, "learning_rate": 9.085504522779517e-05, "loss": 0.7166, "step": 954 }, { "epoch": 0.22009679649688868, "grad_norm": 0.39354413747787476, "learning_rate": 9.083351398371944e-05, "loss": 0.7086, "step": 955 }, { "epoch": 0.22032726434662364, "grad_norm": 0.4095928966999054, "learning_rate": 9.081195998002312e-05, "loss": 0.7136, "step": 956 }, { "epoch": 0.2205577321963586, "grad_norm": 0.3270823061466217, "learning_rate": 9.079038322871987e-05, "loss": 0.7218, "step": 957 }, { "epoch": 0.22078820004609356, "grad_norm": 0.30970245599746704, "learning_rate": 9.076878374183606e-05, "loss": 0.7144, "step": 958 }, { "epoch": 0.22101866789582852, "grad_norm": 0.33222824335098267, "learning_rate": 9.074716153141074e-05, "loss": 0.7069, "step": 959 }, { "epoch": 0.22124913574556349, "grad_norm": 0.29064905643463135, "learning_rate": 9.072551660949558e-05, "loss": 0.7171, "step": 960 }, { "epoch": 0.22147960359529845, "grad_norm": 0.2900223135948181, "learning_rate": 9.070384898815497e-05, "loss": 0.7182, "step": 961 }, { "epoch": 0.2217100714450334, "grad_norm": 0.33210277557373047, "learning_rate": 9.068215867946591e-05, "loss": 0.7184, "step": 962 }, { "epoch": 0.22194053929476837, "grad_norm": 0.35366907715797424, "learning_rate": 9.066044569551807e-05, "loss": 0.7209, "step": 963 }, { "epoch": 0.22217100714450333, "grad_norm": 0.29984578490257263, "learning_rate": 9.063871004841371e-05, "loss": 0.7083, "step": 964 }, { "epoch": 0.2224014749942383, "grad_norm": 0.2960159182548523, "learning_rate": 9.061695175026779e-05, "loss": 0.7146, "step": 965 }, { "epoch": 0.22263194284397328, "grad_norm": 0.3301765024662018, "learning_rate": 9.059517081320787e-05, "loss": 0.7001, "step": 966 }, { "epoch": 0.22286241069370824, "grad_norm": 0.3346138894557953, "learning_rate": 9.057336724937409e-05, "loss": 0.7132, "step": 967 }, { "epoch": 0.2230928785434432, "grad_norm": 0.32978126406669617, "learning_rate": 9.055154107091925e-05, "loss": 0.7125, "step": 968 }, { "epoch": 0.22332334639317816, "grad_norm": 0.3108513653278351, "learning_rate": 9.052969229000874e-05, "loss": 0.7251, "step": 969 }, { "epoch": 0.22355381424291312, "grad_norm": 0.36438095569610596, "learning_rate": 9.050782091882056e-05, "loss": 0.7118, "step": 970 }, { "epoch": 0.22378428209264808, "grad_norm": 0.3605962097644806, "learning_rate": 9.048592696954524e-05, "loss": 0.7099, "step": 971 }, { "epoch": 0.22401474994238305, "grad_norm": 0.2907812297344208, "learning_rate": 9.0464010454386e-05, "loss": 0.7062, "step": 972 }, { "epoch": 0.224245217792118, "grad_norm": 0.30365508794784546, "learning_rate": 9.044207138555854e-05, "loss": 0.7057, "step": 973 }, { "epoch": 0.22447568564185297, "grad_norm": 0.3420591950416565, "learning_rate": 9.042010977529118e-05, "loss": 0.7136, "step": 974 }, { "epoch": 0.22470615349158793, "grad_norm": 0.27449890971183777, "learning_rate": 9.039812563582482e-05, "loss": 0.7185, "step": 975 }, { "epoch": 0.2249366213413229, "grad_norm": 0.3264601230621338, "learning_rate": 9.037611897941283e-05, "loss": 0.7205, "step": 976 }, { "epoch": 0.22516708919105785, "grad_norm": 0.3168565332889557, "learning_rate": 9.035408981832126e-05, "loss": 0.723, "step": 977 }, { "epoch": 0.2253975570407928, "grad_norm": 0.34214240312576294, "learning_rate": 9.03320381648286e-05, "loss": 0.7068, "step": 978 }, { "epoch": 0.22562802489052777, "grad_norm": 0.3359646797180176, "learning_rate": 9.030996403122592e-05, "loss": 0.7159, "step": 979 }, { "epoch": 0.22585849274026273, "grad_norm": 0.32919085025787354, "learning_rate": 9.02878674298168e-05, "loss": 0.7126, "step": 980 }, { "epoch": 0.2260889605899977, "grad_norm": 0.2955667972564697, "learning_rate": 9.026574837291739e-05, "loss": 0.7104, "step": 981 }, { "epoch": 0.22631942843973266, "grad_norm": 0.307659775018692, "learning_rate": 9.024360687285629e-05, "loss": 0.7065, "step": 982 }, { "epoch": 0.22654989628946762, "grad_norm": 0.31468522548675537, "learning_rate": 9.022144294197466e-05, "loss": 0.7091, "step": 983 }, { "epoch": 0.22678036413920258, "grad_norm": 0.2780282497406006, "learning_rate": 9.019925659262612e-05, "loss": 0.7115, "step": 984 }, { "epoch": 0.22701083198893754, "grad_norm": 0.32122835516929626, "learning_rate": 9.01770478371768e-05, "loss": 0.7219, "step": 985 }, { "epoch": 0.2272412998386725, "grad_norm": 0.33038368821144104, "learning_rate": 9.015481668800537e-05, "loss": 0.7051, "step": 986 }, { "epoch": 0.22747176768840746, "grad_norm": 0.3402576744556427, "learning_rate": 9.013256315750291e-05, "loss": 0.7195, "step": 987 }, { "epoch": 0.22770223553814242, "grad_norm": 0.37470388412475586, "learning_rate": 9.011028725807302e-05, "loss": 0.7078, "step": 988 }, { "epoch": 0.22793270338787738, "grad_norm": 0.3758593797683716, "learning_rate": 9.008798900213173e-05, "loss": 0.7092, "step": 989 }, { "epoch": 0.22816317123761234, "grad_norm": 0.31909018754959106, "learning_rate": 9.006566840210757e-05, "loss": 0.7129, "step": 990 }, { "epoch": 0.2283936390873473, "grad_norm": 0.34421950578689575, "learning_rate": 9.00433254704415e-05, "loss": 0.7181, "step": 991 }, { "epoch": 0.22862410693708227, "grad_norm": 0.3339763879776001, "learning_rate": 9.002096021958693e-05, "loss": 0.7227, "step": 992 }, { "epoch": 0.22885457478681723, "grad_norm": 0.29700416326522827, "learning_rate": 8.999857266200973e-05, "loss": 0.712, "step": 993 }, { "epoch": 0.2290850426365522, "grad_norm": 0.3107481896877289, "learning_rate": 8.997616281018816e-05, "loss": 0.7106, "step": 994 }, { "epoch": 0.22931551048628715, "grad_norm": 0.3676799535751343, "learning_rate": 8.995373067661296e-05, "loss": 0.7113, "step": 995 }, { "epoch": 0.22954597833602214, "grad_norm": 0.2843577563762665, "learning_rate": 8.993127627378727e-05, "loss": 0.7127, "step": 996 }, { "epoch": 0.2297764461857571, "grad_norm": 0.30162549018859863, "learning_rate": 8.99087996142266e-05, "loss": 0.7159, "step": 997 }, { "epoch": 0.23000691403549206, "grad_norm": 0.4627409279346466, "learning_rate": 8.988630071045892e-05, "loss": 0.714, "step": 998 }, { "epoch": 0.23023738188522702, "grad_norm": 0.4146185517311096, "learning_rate": 8.986377957502459e-05, "loss": 0.7239, "step": 999 }, { "epoch": 0.23046784973496198, "grad_norm": 0.29513731598854065, "learning_rate": 8.984123622047632e-05, "loss": 0.7084, "step": 1000 }, { "epoch": 0.23069831758469694, "grad_norm": 0.29365894198417664, "learning_rate": 8.981867065937925e-05, "loss": 0.7183, "step": 1001 }, { "epoch": 0.2309287854344319, "grad_norm": 0.2846957743167877, "learning_rate": 8.97960829043109e-05, "loss": 0.7087, "step": 1002 }, { "epoch": 0.23115925328416687, "grad_norm": 0.3023827373981476, "learning_rate": 8.977347296786113e-05, "loss": 0.7142, "step": 1003 }, { "epoch": 0.23138972113390183, "grad_norm": 0.2838916480541229, "learning_rate": 8.975084086263217e-05, "loss": 0.7173, "step": 1004 }, { "epoch": 0.2316201889836368, "grad_norm": 0.3648954927921295, "learning_rate": 8.97281866012386e-05, "loss": 0.7182, "step": 1005 }, { "epoch": 0.23185065683337175, "grad_norm": 0.29563209414482117, "learning_rate": 8.97055101963074e-05, "loss": 0.7105, "step": 1006 }, { "epoch": 0.2320811246831067, "grad_norm": 0.3027130365371704, "learning_rate": 8.968281166047781e-05, "loss": 0.7092, "step": 1007 }, { "epoch": 0.23231159253284167, "grad_norm": 0.3136472702026367, "learning_rate": 8.966009100640148e-05, "loss": 0.7097, "step": 1008 }, { "epoch": 0.23254206038257663, "grad_norm": 0.2663586735725403, "learning_rate": 8.963734824674235e-05, "loss": 0.716, "step": 1009 }, { "epoch": 0.2327725282323116, "grad_norm": 0.27872374653816223, "learning_rate": 8.961458339417669e-05, "loss": 0.7119, "step": 1010 }, { "epoch": 0.23300299608204655, "grad_norm": 0.32824763655662537, "learning_rate": 8.959179646139306e-05, "loss": 0.7013, "step": 1011 }, { "epoch": 0.23323346393178152, "grad_norm": 0.2956524193286896, "learning_rate": 8.95689874610924e-05, "loss": 0.7092, "step": 1012 }, { "epoch": 0.23346393178151648, "grad_norm": 0.2657110095024109, "learning_rate": 8.954615640598782e-05, "loss": 0.7061, "step": 1013 }, { "epoch": 0.23369439963125144, "grad_norm": 0.31145045161247253, "learning_rate": 8.952330330880487e-05, "loss": 0.7105, "step": 1014 }, { "epoch": 0.2339248674809864, "grad_norm": 0.31407973170280457, "learning_rate": 8.95004281822813e-05, "loss": 0.7045, "step": 1015 }, { "epoch": 0.23415533533072136, "grad_norm": 0.24322229623794556, "learning_rate": 8.947753103916716e-05, "loss": 0.7169, "step": 1016 }, { "epoch": 0.23438580318045632, "grad_norm": 0.34636759757995605, "learning_rate": 8.945461189222474e-05, "loss": 0.7111, "step": 1017 }, { "epoch": 0.23461627103019128, "grad_norm": 0.3195311725139618, "learning_rate": 8.943167075422867e-05, "loss": 0.7136, "step": 1018 }, { "epoch": 0.23484673887992624, "grad_norm": 0.30927303433418274, "learning_rate": 8.940870763796574e-05, "loss": 0.7086, "step": 1019 }, { "epoch": 0.2350772067296612, "grad_norm": 0.29347729682922363, "learning_rate": 8.938572255623509e-05, "loss": 0.7046, "step": 1020 }, { "epoch": 0.23530767457939616, "grad_norm": 0.28396493196487427, "learning_rate": 8.9362715521848e-05, "loss": 0.7064, "step": 1021 }, { "epoch": 0.23553814242913113, "grad_norm": 0.26140013337135315, "learning_rate": 8.933968654762808e-05, "loss": 0.7112, "step": 1022 }, { "epoch": 0.2357686102788661, "grad_norm": 0.2797393500804901, "learning_rate": 8.93166356464111e-05, "loss": 0.7127, "step": 1023 }, { "epoch": 0.23599907812860105, "grad_norm": 0.289408802986145, "learning_rate": 8.92935628310451e-05, "loss": 0.7079, "step": 1024 }, { "epoch": 0.236229545978336, "grad_norm": 0.31490978598594666, "learning_rate": 8.92704681143903e-05, "loss": 0.7066, "step": 1025 }, { "epoch": 0.236460013828071, "grad_norm": 0.30412665009498596, "learning_rate": 8.924735150931915e-05, "loss": 0.7068, "step": 1026 }, { "epoch": 0.23669048167780596, "grad_norm": 0.3499104380607605, "learning_rate": 8.922421302871629e-05, "loss": 0.7169, "step": 1027 }, { "epoch": 0.23692094952754092, "grad_norm": 0.3591751158237457, "learning_rate": 8.920105268547854e-05, "loss": 0.7108, "step": 1028 }, { "epoch": 0.23715141737727588, "grad_norm": 0.35666418075561523, "learning_rate": 8.917787049251493e-05, "loss": 0.7055, "step": 1029 }, { "epoch": 0.23738188522701084, "grad_norm": 0.35615837574005127, "learning_rate": 8.915466646274666e-05, "loss": 0.7131, "step": 1030 }, { "epoch": 0.2376123530767458, "grad_norm": 0.37170687317848206, "learning_rate": 8.91314406091071e-05, "loss": 0.7194, "step": 1031 }, { "epoch": 0.23784282092648076, "grad_norm": 0.35245972871780396, "learning_rate": 8.910819294454177e-05, "loss": 0.7031, "step": 1032 }, { "epoch": 0.23807328877621572, "grad_norm": 0.2725777328014374, "learning_rate": 8.908492348200838e-05, "loss": 0.7062, "step": 1033 }, { "epoch": 0.23830375662595069, "grad_norm": 0.3370811343193054, "learning_rate": 8.906163223447676e-05, "loss": 0.7078, "step": 1034 }, { "epoch": 0.23853422447568565, "grad_norm": 0.35790640115737915, "learning_rate": 8.903831921492889e-05, "loss": 0.7107, "step": 1035 }, { "epoch": 0.2387646923254206, "grad_norm": 0.2598463296890259, "learning_rate": 8.90149844363589e-05, "loss": 0.7191, "step": 1036 }, { "epoch": 0.23899516017515557, "grad_norm": 0.2851402461528778, "learning_rate": 8.899162791177301e-05, "loss": 0.7103, "step": 1037 }, { "epoch": 0.23922562802489053, "grad_norm": 0.34928953647613525, "learning_rate": 8.896824965418961e-05, "loss": 0.7202, "step": 1038 }, { "epoch": 0.2394560958746255, "grad_norm": 0.3223908245563507, "learning_rate": 8.894484967663917e-05, "loss": 0.7086, "step": 1039 }, { "epoch": 0.23968656372436045, "grad_norm": 0.31179115176200867, "learning_rate": 8.892142799216428e-05, "loss": 0.7122, "step": 1040 }, { "epoch": 0.2399170315740954, "grad_norm": 0.3074735403060913, "learning_rate": 8.889798461381961e-05, "loss": 0.7091, "step": 1041 }, { "epoch": 0.24014749942383037, "grad_norm": 0.2974226474761963, "learning_rate": 8.887451955467195e-05, "loss": 0.7096, "step": 1042 }, { "epoch": 0.24037796727356533, "grad_norm": 0.3330132067203522, "learning_rate": 8.885103282780016e-05, "loss": 0.7104, "step": 1043 }, { "epoch": 0.2406084351233003, "grad_norm": 0.28745895624160767, "learning_rate": 8.882752444629518e-05, "loss": 0.7174, "step": 1044 }, { "epoch": 0.24083890297303526, "grad_norm": 0.2735026180744171, "learning_rate": 8.880399442326001e-05, "loss": 0.7211, "step": 1045 }, { "epoch": 0.24106937082277022, "grad_norm": 0.2821330726146698, "learning_rate": 8.878044277180975e-05, "loss": 0.7153, "step": 1046 }, { "epoch": 0.24129983867250518, "grad_norm": 0.30076584219932556, "learning_rate": 8.875686950507147e-05, "loss": 0.7149, "step": 1047 }, { "epoch": 0.24153030652224014, "grad_norm": 0.24282529950141907, "learning_rate": 8.873327463618438e-05, "loss": 0.7039, "step": 1048 }, { "epoch": 0.2417607743719751, "grad_norm": 0.26797229051589966, "learning_rate": 8.87096581782997e-05, "loss": 0.7115, "step": 1049 }, { "epoch": 0.24199124222171006, "grad_norm": 0.27635279297828674, "learning_rate": 8.868602014458065e-05, "loss": 0.7142, "step": 1050 }, { "epoch": 0.24222171007144502, "grad_norm": 0.6189936399459839, "learning_rate": 8.866236054820251e-05, "loss": 0.7268, "step": 1051 }, { "epoch": 0.24245217792117998, "grad_norm": 0.25462353229522705, "learning_rate": 8.86386794023526e-05, "loss": 0.7054, "step": 1052 }, { "epoch": 0.24268264577091495, "grad_norm": 0.2922193706035614, "learning_rate": 8.861497672023018e-05, "loss": 0.712, "step": 1053 }, { "epoch": 0.2429131136206499, "grad_norm": 0.28528186678886414, "learning_rate": 8.859125251504657e-05, "loss": 0.7158, "step": 1054 }, { "epoch": 0.24314358147038487, "grad_norm": 0.26813432574272156, "learning_rate": 8.85675068000251e-05, "loss": 0.7048, "step": 1055 }, { "epoch": 0.24337404932011986, "grad_norm": 0.27089470624923706, "learning_rate": 8.854373958840102e-05, "loss": 0.6999, "step": 1056 }, { "epoch": 0.24360451716985482, "grad_norm": 0.27272212505340576, "learning_rate": 8.851995089342163e-05, "loss": 0.7162, "step": 1057 }, { "epoch": 0.24383498501958978, "grad_norm": 0.30942168831825256, "learning_rate": 8.849614072834617e-05, "loss": 0.7128, "step": 1058 }, { "epoch": 0.24406545286932474, "grad_norm": 0.27839475870132446, "learning_rate": 8.847230910644586e-05, "loss": 0.7007, "step": 1059 }, { "epoch": 0.2442959207190597, "grad_norm": 0.28866031765937805, "learning_rate": 8.844845604100387e-05, "loss": 0.707, "step": 1060 }, { "epoch": 0.24452638856879466, "grad_norm": 0.30991750955581665, "learning_rate": 8.842458154531533e-05, "loss": 0.7081, "step": 1061 }, { "epoch": 0.24475685641852962, "grad_norm": 0.2961871325969696, "learning_rate": 8.840068563268728e-05, "loss": 0.7093, "step": 1062 }, { "epoch": 0.24498732426826458, "grad_norm": 0.26729321479797363, "learning_rate": 8.837676831643877e-05, "loss": 0.7093, "step": 1063 }, { "epoch": 0.24521779211799954, "grad_norm": 0.32758355140686035, "learning_rate": 8.835282960990073e-05, "loss": 0.7103, "step": 1064 }, { "epoch": 0.2454482599677345, "grad_norm": 0.2944065034389496, "learning_rate": 8.8328869526416e-05, "loss": 0.7166, "step": 1065 }, { "epoch": 0.24567872781746947, "grad_norm": 0.2752934992313385, "learning_rate": 8.830488807933937e-05, "loss": 0.7063, "step": 1066 }, { "epoch": 0.24590919566720443, "grad_norm": 0.29886919260025024, "learning_rate": 8.828088528203753e-05, "loss": 0.7187, "step": 1067 }, { "epoch": 0.2461396635169394, "grad_norm": 0.32834017276763916, "learning_rate": 8.825686114788904e-05, "loss": 0.7117, "step": 1068 }, { "epoch": 0.24637013136667435, "grad_norm": 0.3167296051979065, "learning_rate": 8.823281569028439e-05, "loss": 0.7019, "step": 1069 }, { "epoch": 0.2466005992164093, "grad_norm": 0.29267629981040955, "learning_rate": 8.820874892262596e-05, "loss": 0.7158, "step": 1070 }, { "epoch": 0.24683106706614427, "grad_norm": 0.29060524702072144, "learning_rate": 8.818466085832796e-05, "loss": 0.7046, "step": 1071 }, { "epoch": 0.24706153491587923, "grad_norm": 0.29323479533195496, "learning_rate": 8.81605515108165e-05, "loss": 0.7078, "step": 1072 }, { "epoch": 0.2472920027656142, "grad_norm": 0.27811145782470703, "learning_rate": 8.813642089352957e-05, "loss": 0.7155, "step": 1073 }, { "epoch": 0.24752247061534915, "grad_norm": 0.31902894377708435, "learning_rate": 8.811226901991698e-05, "loss": 0.7104, "step": 1074 }, { "epoch": 0.24775293846508412, "grad_norm": 0.2658187747001648, "learning_rate": 8.808809590344042e-05, "loss": 0.6963, "step": 1075 }, { "epoch": 0.24798340631481908, "grad_norm": 0.2968449592590332, "learning_rate": 8.806390155757339e-05, "loss": 0.703, "step": 1076 }, { "epoch": 0.24821387416455404, "grad_norm": 0.29377731680870056, "learning_rate": 8.803968599580125e-05, "loss": 0.7063, "step": 1077 }, { "epoch": 0.248444342014289, "grad_norm": 0.2845487594604492, "learning_rate": 8.801544923162116e-05, "loss": 0.7152, "step": 1078 }, { "epoch": 0.24867480986402396, "grad_norm": 0.2773647904396057, "learning_rate": 8.799119127854212e-05, "loss": 0.7128, "step": 1079 }, { "epoch": 0.24890527771375892, "grad_norm": 0.26741766929626465, "learning_rate": 8.796691215008492e-05, "loss": 0.7124, "step": 1080 }, { "epoch": 0.24913574556349388, "grad_norm": 0.28756871819496155, "learning_rate": 8.794261185978219e-05, "loss": 0.7144, "step": 1081 }, { "epoch": 0.24936621341322884, "grad_norm": 0.29482561349868774, "learning_rate": 8.79182904211783e-05, "loss": 0.712, "step": 1082 }, { "epoch": 0.2495966812629638, "grad_norm": 0.2730870842933655, "learning_rate": 8.789394784782945e-05, "loss": 0.7076, "step": 1083 }, { "epoch": 0.24982714911269877, "grad_norm": 0.2692764103412628, "learning_rate": 8.786958415330359e-05, "loss": 0.7024, "step": 1084 }, { "epoch": 0.2500576169624337, "grad_norm": 0.2866170108318329, "learning_rate": 8.78451993511805e-05, "loss": 0.7082, "step": 1085 }, { "epoch": 0.2502880848121687, "grad_norm": 0.300509512424469, "learning_rate": 8.782079345505163e-05, "loss": 0.7082, "step": 1086 }, { "epoch": 0.25051855266190365, "grad_norm": 0.2729628086090088, "learning_rate": 8.779636647852027e-05, "loss": 0.7143, "step": 1087 }, { "epoch": 0.2507490205116386, "grad_norm": 0.26800036430358887, "learning_rate": 8.777191843520142e-05, "loss": 0.7149, "step": 1088 }, { "epoch": 0.25097948836137357, "grad_norm": 0.2820512056350708, "learning_rate": 8.774744933872186e-05, "loss": 0.7154, "step": 1089 }, { "epoch": 0.25120995621110853, "grad_norm": 0.2764631509780884, "learning_rate": 8.772295920272003e-05, "loss": 0.7179, "step": 1090 }, { "epoch": 0.2514404240608435, "grad_norm": 0.23512467741966248, "learning_rate": 8.769844804084619e-05, "loss": 0.7126, "step": 1091 }, { "epoch": 0.25167089191057845, "grad_norm": 0.25535455346107483, "learning_rate": 8.767391586676223e-05, "loss": 0.7041, "step": 1092 }, { "epoch": 0.2519013597603134, "grad_norm": 0.250704288482666, "learning_rate": 8.764936269414184e-05, "loss": 0.7144, "step": 1093 }, { "epoch": 0.2521318276100484, "grad_norm": 0.353876531124115, "learning_rate": 8.762478853667033e-05, "loss": 0.7028, "step": 1094 }, { "epoch": 0.25236229545978334, "grad_norm": 0.2544647455215454, "learning_rate": 8.760019340804478e-05, "loss": 0.7035, "step": 1095 }, { "epoch": 0.2525927633095183, "grad_norm": 0.2700643241405487, "learning_rate": 8.75755773219739e-05, "loss": 0.7063, "step": 1096 }, { "epoch": 0.25282323115925326, "grad_norm": 0.3172542154788971, "learning_rate": 8.755094029217809e-05, "loss": 0.7113, "step": 1097 }, { "epoch": 0.2530536990089882, "grad_norm": 0.2928423583507538, "learning_rate": 8.75262823323895e-05, "loss": 0.7074, "step": 1098 }, { "epoch": 0.2532841668587232, "grad_norm": 0.27802032232284546, "learning_rate": 8.750160345635183e-05, "loss": 0.7005, "step": 1099 }, { "epoch": 0.2535146347084582, "grad_norm": 0.313966304063797, "learning_rate": 8.74769036778205e-05, "loss": 0.7019, "step": 1100 }, { "epoch": 0.25374510255819316, "grad_norm": 0.3200061619281769, "learning_rate": 8.74521830105626e-05, "loss": 0.7082, "step": 1101 }, { "epoch": 0.2539755704079281, "grad_norm": 0.30982211232185364, "learning_rate": 8.74274414683568e-05, "loss": 0.7161, "step": 1102 }, { "epoch": 0.2542060382576631, "grad_norm": 0.26937606930732727, "learning_rate": 8.740267906499346e-05, "loss": 0.7037, "step": 1103 }, { "epoch": 0.25443650610739804, "grad_norm": 0.33216673135757446, "learning_rate": 8.737789581427455e-05, "loss": 0.7087, "step": 1104 }, { "epoch": 0.254666973957133, "grad_norm": 0.31464841961860657, "learning_rate": 8.73530917300137e-05, "loss": 0.7027, "step": 1105 }, { "epoch": 0.25489744180686796, "grad_norm": 0.2963610887527466, "learning_rate": 8.732826682603603e-05, "loss": 0.7244, "step": 1106 }, { "epoch": 0.2551279096566029, "grad_norm": 0.28852593898773193, "learning_rate": 8.73034211161784e-05, "loss": 0.703, "step": 1107 }, { "epoch": 0.2553583775063379, "grad_norm": 0.2891587018966675, "learning_rate": 8.72785546142892e-05, "loss": 0.6958, "step": 1108 }, { "epoch": 0.25558884535607285, "grad_norm": 0.3704643249511719, "learning_rate": 8.725366733422842e-05, "loss": 0.7071, "step": 1109 }, { "epoch": 0.2558193132058078, "grad_norm": 0.3873230516910553, "learning_rate": 8.722875928986762e-05, "loss": 0.708, "step": 1110 }, { "epoch": 0.25604978105554277, "grad_norm": 0.29551389813423157, "learning_rate": 8.720383049508997e-05, "loss": 0.6972, "step": 1111 }, { "epoch": 0.25628024890527773, "grad_norm": 0.2649083137512207, "learning_rate": 8.717888096379018e-05, "loss": 0.7027, "step": 1112 }, { "epoch": 0.2565107167550127, "grad_norm": 0.33623412251472473, "learning_rate": 8.71539107098745e-05, "loss": 0.7038, "step": 1113 }, { "epoch": 0.25674118460474765, "grad_norm": 0.450360506772995, "learning_rate": 8.712891974726076e-05, "loss": 0.7067, "step": 1114 }, { "epoch": 0.2569716524544826, "grad_norm": 1.8468785285949707, "learning_rate": 8.710390808987833e-05, "loss": 0.7232, "step": 1115 }, { "epoch": 0.2572021203042176, "grad_norm": 0.29168421030044556, "learning_rate": 8.70788757516681e-05, "loss": 0.6999, "step": 1116 }, { "epoch": 0.25743258815395254, "grad_norm": 0.3151211738586426, "learning_rate": 8.705382274658249e-05, "loss": 0.7001, "step": 1117 }, { "epoch": 0.2576630560036875, "grad_norm": 0.3864755630493164, "learning_rate": 8.702874908858545e-05, "loss": 0.7102, "step": 1118 }, { "epoch": 0.25789352385342246, "grad_norm": 0.3055296242237091, "learning_rate": 8.700365479165244e-05, "loss": 0.6979, "step": 1119 }, { "epoch": 0.2581239917031574, "grad_norm": 0.3239305317401886, "learning_rate": 8.697853986977041e-05, "loss": 0.7155, "step": 1120 }, { "epoch": 0.2583544595528924, "grad_norm": 0.29787853360176086, "learning_rate": 8.695340433693781e-05, "loss": 0.7129, "step": 1121 }, { "epoch": 0.25858492740262734, "grad_norm": 0.2536768615245819, "learning_rate": 8.692824820716461e-05, "loss": 0.7032, "step": 1122 }, { "epoch": 0.2588153952523623, "grad_norm": 0.31833314895629883, "learning_rate": 8.690307149447221e-05, "loss": 0.709, "step": 1123 }, { "epoch": 0.25904586310209726, "grad_norm": 0.30274853110313416, "learning_rate": 8.68778742128935e-05, "loss": 0.7057, "step": 1124 }, { "epoch": 0.2592763309518322, "grad_norm": 0.2601548135280609, "learning_rate": 8.685265637647284e-05, "loss": 0.7002, "step": 1125 }, { "epoch": 0.2595067988015672, "grad_norm": 0.26416724920272827, "learning_rate": 8.682741799926609e-05, "loss": 0.7109, "step": 1126 }, { "epoch": 0.25973726665130215, "grad_norm": 0.3520905375480652, "learning_rate": 8.680215909534044e-05, "loss": 0.7183, "step": 1127 }, { "epoch": 0.2599677345010371, "grad_norm": 0.2665805220603943, "learning_rate": 8.677687967877466e-05, "loss": 0.7108, "step": 1128 }, { "epoch": 0.26019820235077207, "grad_norm": 0.3360787034034729, "learning_rate": 8.675157976365886e-05, "loss": 0.7033, "step": 1129 }, { "epoch": 0.26042867020050703, "grad_norm": 0.28345534205436707, "learning_rate": 8.672625936409462e-05, "loss": 0.7068, "step": 1130 }, { "epoch": 0.260659138050242, "grad_norm": 0.2835051119327545, "learning_rate": 8.67009184941949e-05, "loss": 0.7021, "step": 1131 }, { "epoch": 0.26088960589997695, "grad_norm": 0.2675539553165436, "learning_rate": 8.667555716808414e-05, "loss": 0.7028, "step": 1132 }, { "epoch": 0.2611200737497119, "grad_norm": 0.29232192039489746, "learning_rate": 8.665017539989808e-05, "loss": 0.7108, "step": 1133 }, { "epoch": 0.2613505415994469, "grad_norm": 0.27097558975219727, "learning_rate": 8.662477320378395e-05, "loss": 0.7142, "step": 1134 }, { "epoch": 0.26158100944918183, "grad_norm": 0.2470446676015854, "learning_rate": 8.659935059390028e-05, "loss": 0.7061, "step": 1135 }, { "epoch": 0.2618114772989168, "grad_norm": 0.2592596113681793, "learning_rate": 8.657390758441708e-05, "loss": 0.705, "step": 1136 }, { "epoch": 0.26204194514865176, "grad_norm": 0.27617162466049194, "learning_rate": 8.654844418951563e-05, "loss": 0.7039, "step": 1137 }, { "epoch": 0.2622724129983867, "grad_norm": 0.2819935977458954, "learning_rate": 8.652296042338861e-05, "loss": 0.7089, "step": 1138 }, { "epoch": 0.2625028808481217, "grad_norm": 0.24662001430988312, "learning_rate": 8.64974563002401e-05, "loss": 0.7052, "step": 1139 }, { "epoch": 0.26273334869785664, "grad_norm": 0.28374093770980835, "learning_rate": 8.647193183428545e-05, "loss": 0.7071, "step": 1140 }, { "epoch": 0.2629638165475916, "grad_norm": 0.2856753468513489, "learning_rate": 8.64463870397514e-05, "loss": 0.7074, "step": 1141 }, { "epoch": 0.26319428439732656, "grad_norm": 0.2997211813926697, "learning_rate": 8.6420821930876e-05, "loss": 0.7084, "step": 1142 }, { "epoch": 0.2634247522470615, "grad_norm": 0.26690179109573364, "learning_rate": 8.639523652190864e-05, "loss": 0.7025, "step": 1143 }, { "epoch": 0.2636552200967965, "grad_norm": 0.27999091148376465, "learning_rate": 8.636963082710999e-05, "loss": 0.7008, "step": 1144 }, { "epoch": 0.26388568794653144, "grad_norm": 0.3172776997089386, "learning_rate": 8.634400486075207e-05, "loss": 0.7137, "step": 1145 }, { "epoch": 0.2641161557962664, "grad_norm": 0.30888575315475464, "learning_rate": 8.631835863711817e-05, "loss": 0.7105, "step": 1146 }, { "epoch": 0.26434662364600137, "grad_norm": 0.28774750232696533, "learning_rate": 8.629269217050289e-05, "loss": 0.7057, "step": 1147 }, { "epoch": 0.2645770914957363, "grad_norm": 0.24954736232757568, "learning_rate": 8.626700547521209e-05, "loss": 0.7165, "step": 1148 }, { "epoch": 0.2648075593454713, "grad_norm": 0.2624035179615021, "learning_rate": 8.624129856556291e-05, "loss": 0.7006, "step": 1149 }, { "epoch": 0.26503802719520625, "grad_norm": 0.2522975504398346, "learning_rate": 8.62155714558838e-05, "loss": 0.7096, "step": 1150 }, { "epoch": 0.2652684950449412, "grad_norm": 0.2908228039741516, "learning_rate": 8.618982416051438e-05, "loss": 0.7077, "step": 1151 }, { "epoch": 0.26549896289467617, "grad_norm": 0.23220433294773102, "learning_rate": 8.616405669380561e-05, "loss": 0.7094, "step": 1152 }, { "epoch": 0.26572943074441113, "grad_norm": 0.31848591566085815, "learning_rate": 8.613826907011965e-05, "loss": 0.7091, "step": 1153 }, { "epoch": 0.2659598985941461, "grad_norm": 0.2557988464832306, "learning_rate": 8.611246130382992e-05, "loss": 0.7028, "step": 1154 }, { "epoch": 0.26619036644388105, "grad_norm": 0.24541644752025604, "learning_rate": 8.608663340932104e-05, "loss": 0.6998, "step": 1155 }, { "epoch": 0.266420834293616, "grad_norm": 0.25283804535865784, "learning_rate": 8.606078540098883e-05, "loss": 0.7044, "step": 1156 }, { "epoch": 0.266651302143351, "grad_norm": 0.24628406763076782, "learning_rate": 8.603491729324039e-05, "loss": 0.7136, "step": 1157 }, { "epoch": 0.26688176999308594, "grad_norm": 0.24645006656646729, "learning_rate": 8.600902910049393e-05, "loss": 0.7039, "step": 1158 }, { "epoch": 0.2671122378428209, "grad_norm": 0.23496174812316895, "learning_rate": 8.598312083717896e-05, "loss": 0.7146, "step": 1159 }, { "epoch": 0.2673427056925559, "grad_norm": 0.26855039596557617, "learning_rate": 8.59571925177361e-05, "loss": 0.7036, "step": 1160 }, { "epoch": 0.2675731735422909, "grad_norm": 0.21970762312412262, "learning_rate": 8.59312441566172e-05, "loss": 0.7109, "step": 1161 }, { "epoch": 0.26780364139202584, "grad_norm": 0.2488812357187271, "learning_rate": 8.59052757682852e-05, "loss": 0.703, "step": 1162 }, { "epoch": 0.2680341092417608, "grad_norm": 0.2618003487586975, "learning_rate": 8.587928736721432e-05, "loss": 0.695, "step": 1163 }, { "epoch": 0.26826457709149576, "grad_norm": 0.25669237971305847, "learning_rate": 8.585327896788981e-05, "loss": 0.6989, "step": 1164 }, { "epoch": 0.2684950449412307, "grad_norm": 0.26360106468200684, "learning_rate": 8.582725058480816e-05, "loss": 0.7075, "step": 1165 }, { "epoch": 0.2687255127909657, "grad_norm": 0.24905282258987427, "learning_rate": 8.580120223247697e-05, "loss": 0.7065, "step": 1166 }, { "epoch": 0.26895598064070064, "grad_norm": 0.343464195728302, "learning_rate": 8.577513392541496e-05, "loss": 0.6978, "step": 1167 }, { "epoch": 0.2691864484904356, "grad_norm": 0.2586437463760376, "learning_rate": 8.574904567815196e-05, "loss": 0.7056, "step": 1168 }, { "epoch": 0.26941691634017056, "grad_norm": 0.23977968096733093, "learning_rate": 8.572293750522897e-05, "loss": 0.7056, "step": 1169 }, { "epoch": 0.2696473841899055, "grad_norm": 0.2433982640504837, "learning_rate": 8.569680942119804e-05, "loss": 0.7021, "step": 1170 }, { "epoch": 0.2698778520396405, "grad_norm": 0.26769572496414185, "learning_rate": 8.567066144062232e-05, "loss": 0.6999, "step": 1171 }, { "epoch": 0.27010831988937545, "grad_norm": 0.24118681252002716, "learning_rate": 8.564449357807608e-05, "loss": 0.707, "step": 1172 }, { "epoch": 0.2703387877391104, "grad_norm": 0.2884605824947357, "learning_rate": 8.561830584814468e-05, "loss": 0.7099, "step": 1173 }, { "epoch": 0.27056925558884537, "grad_norm": 0.28720924258232117, "learning_rate": 8.559209826542451e-05, "loss": 0.6992, "step": 1174 }, { "epoch": 0.27079972343858033, "grad_norm": 0.24960888922214508, "learning_rate": 8.556587084452305e-05, "loss": 0.7054, "step": 1175 }, { "epoch": 0.2710301912883153, "grad_norm": 0.2377004772424698, "learning_rate": 8.553962360005882e-05, "loss": 0.7014, "step": 1176 }, { "epoch": 0.27126065913805025, "grad_norm": 0.2665834426879883, "learning_rate": 8.551335654666144e-05, "loss": 0.7019, "step": 1177 }, { "epoch": 0.2714911269877852, "grad_norm": 0.25975316762924194, "learning_rate": 8.54870696989715e-05, "loss": 0.6971, "step": 1178 }, { "epoch": 0.2717215948375202, "grad_norm": 0.2380094677209854, "learning_rate": 8.546076307164068e-05, "loss": 0.6961, "step": 1179 }, { "epoch": 0.27195206268725514, "grad_norm": 0.24472801387310028, "learning_rate": 8.543443667933166e-05, "loss": 0.7049, "step": 1180 }, { "epoch": 0.2721825305369901, "grad_norm": 0.2646014094352722, "learning_rate": 8.540809053671812e-05, "loss": 0.7148, "step": 1181 }, { "epoch": 0.27241299838672506, "grad_norm": 0.23404592275619507, "learning_rate": 8.538172465848479e-05, "loss": 0.7094, "step": 1182 }, { "epoch": 0.27264346623646, "grad_norm": 0.2830381691455841, "learning_rate": 8.535533905932738e-05, "loss": 0.7061, "step": 1183 }, { "epoch": 0.272873934086195, "grad_norm": 0.2919880747795105, "learning_rate": 8.532893375395257e-05, "loss": 0.7036, "step": 1184 }, { "epoch": 0.27310440193592994, "grad_norm": 0.24687440693378448, "learning_rate": 8.530250875707806e-05, "loss": 0.7004, "step": 1185 }, { "epoch": 0.2733348697856649, "grad_norm": 0.2443513572216034, "learning_rate": 8.527606408343249e-05, "loss": 0.6899, "step": 1186 }, { "epoch": 0.27356533763539986, "grad_norm": 0.2695756256580353, "learning_rate": 8.524959974775551e-05, "loss": 0.7065, "step": 1187 }, { "epoch": 0.2737958054851348, "grad_norm": 0.24225404858589172, "learning_rate": 8.522311576479768e-05, "loss": 0.7019, "step": 1188 }, { "epoch": 0.2740262733348698, "grad_norm": 0.2715757191181183, "learning_rate": 8.519661214932055e-05, "loss": 0.7099, "step": 1189 }, { "epoch": 0.27425674118460475, "grad_norm": 0.31206077337265015, "learning_rate": 8.517008891609661e-05, "loss": 0.7088, "step": 1190 }, { "epoch": 0.2744872090343397, "grad_norm": 0.2775088846683502, "learning_rate": 8.514354607990926e-05, "loss": 0.7031, "step": 1191 }, { "epoch": 0.27471767688407467, "grad_norm": 0.2421555519104004, "learning_rate": 8.511698365555285e-05, "loss": 0.7112, "step": 1192 }, { "epoch": 0.27494814473380963, "grad_norm": 0.31836438179016113, "learning_rate": 8.509040165783263e-05, "loss": 0.7034, "step": 1193 }, { "epoch": 0.2751786125835446, "grad_norm": 0.3080047369003296, "learning_rate": 8.506380010156476e-05, "loss": 0.7053, "step": 1194 }, { "epoch": 0.27540908043327955, "grad_norm": 0.2516467869281769, "learning_rate": 8.503717900157632e-05, "loss": 0.7077, "step": 1195 }, { "epoch": 0.2756395482830145, "grad_norm": 0.26254069805145264, "learning_rate": 8.501053837270528e-05, "loss": 0.7066, "step": 1196 }, { "epoch": 0.2758700161327495, "grad_norm": 0.3127571940422058, "learning_rate": 8.498387822980046e-05, "loss": 0.6957, "step": 1197 }, { "epoch": 0.27610048398248443, "grad_norm": 0.2957381308078766, "learning_rate": 8.49571985877216e-05, "loss": 0.6919, "step": 1198 }, { "epoch": 0.2763309518322194, "grad_norm": 0.29202744364738464, "learning_rate": 8.49304994613393e-05, "loss": 0.7101, "step": 1199 }, { "epoch": 0.27656141968195436, "grad_norm": 0.272582083940506, "learning_rate": 8.490378086553499e-05, "loss": 0.6988, "step": 1200 }, { "epoch": 0.2767918875316893, "grad_norm": 0.24930858612060547, "learning_rate": 8.4877042815201e-05, "loss": 0.699, "step": 1201 }, { "epoch": 0.2770223553814243, "grad_norm": 0.2611529231071472, "learning_rate": 8.485028532524046e-05, "loss": 0.7102, "step": 1202 }, { "epoch": 0.27725282323115924, "grad_norm": 0.3048509359359741, "learning_rate": 8.482350841056737e-05, "loss": 0.7006, "step": 1203 }, { "epoch": 0.2774832910808942, "grad_norm": 0.26629912853240967, "learning_rate": 8.479671208610653e-05, "loss": 0.6953, "step": 1204 }, { "epoch": 0.27771375893062916, "grad_norm": 0.27922797203063965, "learning_rate": 8.476989636679355e-05, "loss": 0.7101, "step": 1205 }, { "epoch": 0.2779442267803641, "grad_norm": 0.26804566383361816, "learning_rate": 8.47430612675749e-05, "loss": 0.7082, "step": 1206 }, { "epoch": 0.2781746946300991, "grad_norm": 0.3235054910182953, "learning_rate": 8.471620680340779e-05, "loss": 0.6958, "step": 1207 }, { "epoch": 0.27840516247983405, "grad_norm": 0.260946124792099, "learning_rate": 8.468933298926029e-05, "loss": 0.6972, "step": 1208 }, { "epoch": 0.278635630329569, "grad_norm": 0.2844315469264984, "learning_rate": 8.466243984011117e-05, "loss": 0.6987, "step": 1209 }, { "epoch": 0.27886609817930397, "grad_norm": 0.2835547626018524, "learning_rate": 8.463552737095007e-05, "loss": 0.6999, "step": 1210 }, { "epoch": 0.27909656602903893, "grad_norm": 0.2883650064468384, "learning_rate": 8.460859559677734e-05, "loss": 0.7028, "step": 1211 }, { "epoch": 0.2793270338787739, "grad_norm": 0.2849369943141937, "learning_rate": 8.458164453260408e-05, "loss": 0.7007, "step": 1212 }, { "epoch": 0.27955750172850885, "grad_norm": 0.24277077615261078, "learning_rate": 8.455467419345222e-05, "loss": 0.698, "step": 1213 }, { "epoch": 0.2797879695782438, "grad_norm": 0.2855810821056366, "learning_rate": 8.452768459435434e-05, "loss": 0.6947, "step": 1214 }, { "epoch": 0.2800184374279788, "grad_norm": 0.2892487645149231, "learning_rate": 8.450067575035378e-05, "loss": 0.7041, "step": 1215 }, { "epoch": 0.28024890527771373, "grad_norm": 0.2550940215587616, "learning_rate": 8.447364767650468e-05, "loss": 0.6982, "step": 1216 }, { "epoch": 0.2804793731274487, "grad_norm": 0.28303179144859314, "learning_rate": 8.444660038787178e-05, "loss": 0.6891, "step": 1217 }, { "epoch": 0.28070984097718366, "grad_norm": 0.29353299736976624, "learning_rate": 8.441953389953062e-05, "loss": 0.699, "step": 1218 }, { "epoch": 0.28094030882691867, "grad_norm": 0.2536478340625763, "learning_rate": 8.43924482265674e-05, "loss": 0.7046, "step": 1219 }, { "epoch": 0.28117077667665363, "grad_norm": 0.32717081904411316, "learning_rate": 8.436534338407902e-05, "loss": 0.7079, "step": 1220 }, { "epoch": 0.2814012445263886, "grad_norm": 0.2975577116012573, "learning_rate": 8.43382193871731e-05, "loss": 0.7002, "step": 1221 }, { "epoch": 0.28163171237612356, "grad_norm": 0.25119921565055847, "learning_rate": 8.431107625096787e-05, "loss": 0.702, "step": 1222 }, { "epoch": 0.2818621802258585, "grad_norm": 0.30405279994010925, "learning_rate": 8.428391399059228e-05, "loss": 0.6982, "step": 1223 }, { "epoch": 0.2820926480755935, "grad_norm": 0.2723596692085266, "learning_rate": 8.42567326211859e-05, "loss": 0.7001, "step": 1224 }, { "epoch": 0.28232311592532844, "grad_norm": 0.226847305893898, "learning_rate": 8.4229532157899e-05, "loss": 0.6981, "step": 1225 }, { "epoch": 0.2825535837750634, "grad_norm": 0.2864554226398468, "learning_rate": 8.420231261589246e-05, "loss": 0.7076, "step": 1226 }, { "epoch": 0.28278405162479836, "grad_norm": 0.24330449104309082, "learning_rate": 8.417507401033779e-05, "loss": 0.6981, "step": 1227 }, { "epoch": 0.2830145194745333, "grad_norm": 0.3172352612018585, "learning_rate": 8.414781635641714e-05, "loss": 0.6957, "step": 1228 }, { "epoch": 0.2832449873242683, "grad_norm": 0.2853315770626068, "learning_rate": 8.412053966932326e-05, "loss": 0.7, "step": 1229 }, { "epoch": 0.28347545517400324, "grad_norm": 0.3434127867221832, "learning_rate": 8.409324396425954e-05, "loss": 0.7085, "step": 1230 }, { "epoch": 0.2837059230237382, "grad_norm": 0.2804059088230133, "learning_rate": 8.406592925643995e-05, "loss": 0.7037, "step": 1231 }, { "epoch": 0.28393639087347317, "grad_norm": 0.26557254791259766, "learning_rate": 8.403859556108904e-05, "loss": 0.699, "step": 1232 }, { "epoch": 0.2841668587232081, "grad_norm": 0.28296953439712524, "learning_rate": 8.401124289344195e-05, "loss": 0.6907, "step": 1233 }, { "epoch": 0.2843973265729431, "grad_norm": 0.2905629873275757, "learning_rate": 8.398387126874443e-05, "loss": 0.6962, "step": 1234 }, { "epoch": 0.28462779442267805, "grad_norm": 0.2673647403717041, "learning_rate": 8.395648070225272e-05, "loss": 0.6941, "step": 1235 }, { "epoch": 0.284858262272413, "grad_norm": 0.30280497670173645, "learning_rate": 8.392907120923373e-05, "loss": 0.6993, "step": 1236 }, { "epoch": 0.28508873012214797, "grad_norm": 0.28587183356285095, "learning_rate": 8.39016428049648e-05, "loss": 0.696, "step": 1237 }, { "epoch": 0.28531919797188293, "grad_norm": 0.24387072026729584, "learning_rate": 8.387419550473387e-05, "loss": 0.7027, "step": 1238 }, { "epoch": 0.2855496658216179, "grad_norm": 0.2583426237106323, "learning_rate": 8.384672932383942e-05, "loss": 0.701, "step": 1239 }, { "epoch": 0.28578013367135285, "grad_norm": 0.2734989821910858, "learning_rate": 8.381924427759044e-05, "loss": 0.6959, "step": 1240 }, { "epoch": 0.2860106015210878, "grad_norm": 0.2484637349843979, "learning_rate": 8.379174038130643e-05, "loss": 0.6997, "step": 1241 }, { "epoch": 0.2862410693708228, "grad_norm": 0.2433558702468872, "learning_rate": 8.376421765031741e-05, "loss": 0.6998, "step": 1242 }, { "epoch": 0.28647153722055774, "grad_norm": 0.2775626480579376, "learning_rate": 8.373667609996387e-05, "loss": 0.6962, "step": 1243 }, { "epoch": 0.2867020050702927, "grad_norm": 0.26966193318367004, "learning_rate": 8.370911574559683e-05, "loss": 0.7024, "step": 1244 }, { "epoch": 0.28693247292002766, "grad_norm": 0.2636489272117615, "learning_rate": 8.368153660257776e-05, "loss": 0.7041, "step": 1245 }, { "epoch": 0.2871629407697626, "grad_norm": 0.2798607051372528, "learning_rate": 8.365393868627862e-05, "loss": 0.6912, "step": 1246 }, { "epoch": 0.2873934086194976, "grad_norm": 0.26497724652290344, "learning_rate": 8.362632201208181e-05, "loss": 0.6995, "step": 1247 }, { "epoch": 0.28762387646923254, "grad_norm": 0.26089614629745483, "learning_rate": 8.359868659538022e-05, "loss": 0.6994, "step": 1248 }, { "epoch": 0.2878543443189675, "grad_norm": 0.2735275328159332, "learning_rate": 8.357103245157715e-05, "loss": 0.6995, "step": 1249 }, { "epoch": 0.28808481216870246, "grad_norm": 0.27281323075294495, "learning_rate": 8.354335959608638e-05, "loss": 0.6947, "step": 1250 }, { "epoch": 0.2883152800184374, "grad_norm": 0.2844576835632324, "learning_rate": 8.351566804433207e-05, "loss": 0.7073, "step": 1251 }, { "epoch": 0.2885457478681724, "grad_norm": 0.2630547285079956, "learning_rate": 8.348795781174885e-05, "loss": 0.7021, "step": 1252 }, { "epoch": 0.28877621571790735, "grad_norm": 0.23190078139305115, "learning_rate": 8.346022891378172e-05, "loss": 0.7013, "step": 1253 }, { "epoch": 0.2890066835676423, "grad_norm": 0.28771939873695374, "learning_rate": 8.34324813658861e-05, "loss": 0.7027, "step": 1254 }, { "epoch": 0.28923715141737727, "grad_norm": 0.2860211133956909, "learning_rate": 8.340471518352781e-05, "loss": 0.7008, "step": 1255 }, { "epoch": 0.28946761926711223, "grad_norm": 0.26749560236930847, "learning_rate": 8.337693038218308e-05, "loss": 0.7003, "step": 1256 }, { "epoch": 0.2896980871168472, "grad_norm": 0.22241248190402985, "learning_rate": 8.334912697733845e-05, "loss": 0.7045, "step": 1257 }, { "epoch": 0.28992855496658215, "grad_norm": 0.2874812185764313, "learning_rate": 8.33213049844909e-05, "loss": 0.696, "step": 1258 }, { "epoch": 0.2901590228163171, "grad_norm": 0.28802555799484253, "learning_rate": 8.329346441914774e-05, "loss": 0.6997, "step": 1259 }, { "epoch": 0.2903894906660521, "grad_norm": 0.24786509573459625, "learning_rate": 8.326560529682661e-05, "loss": 0.6983, "step": 1260 }, { "epoch": 0.29061995851578704, "grad_norm": 0.2500215768814087, "learning_rate": 8.323772763305554e-05, "loss": 0.7038, "step": 1261 }, { "epoch": 0.290850426365522, "grad_norm": 0.22261877357959747, "learning_rate": 8.320983144337286e-05, "loss": 0.6928, "step": 1262 }, { "epoch": 0.29108089421525696, "grad_norm": 0.26974332332611084, "learning_rate": 8.318191674332724e-05, "loss": 0.7024, "step": 1263 }, { "epoch": 0.2913113620649919, "grad_norm": 0.2661108374595642, "learning_rate": 8.315398354847766e-05, "loss": 0.6996, "step": 1264 }, { "epoch": 0.2915418299147269, "grad_norm": 0.2478044480085373, "learning_rate": 8.31260318743934e-05, "loss": 0.6913, "step": 1265 }, { "epoch": 0.29177229776446184, "grad_norm": 0.28388431668281555, "learning_rate": 8.309806173665409e-05, "loss": 0.6914, "step": 1266 }, { "epoch": 0.2920027656141968, "grad_norm": 0.26973721385002136, "learning_rate": 8.307007315084958e-05, "loss": 0.706, "step": 1267 }, { "epoch": 0.29223323346393176, "grad_norm": 0.2543336749076843, "learning_rate": 8.304206613258003e-05, "loss": 0.6988, "step": 1268 }, { "epoch": 0.2924637013136667, "grad_norm": 0.2953673303127289, "learning_rate": 8.301404069745592e-05, "loss": 0.6876, "step": 1269 }, { "epoch": 0.2926941691634017, "grad_norm": 0.24503032863140106, "learning_rate": 8.298599686109792e-05, "loss": 0.6943, "step": 1270 }, { "epoch": 0.29292463701313665, "grad_norm": 0.2405981868505478, "learning_rate": 8.2957934639137e-05, "loss": 0.7012, "step": 1271 }, { "epoch": 0.2931551048628716, "grad_norm": 0.23449143767356873, "learning_rate": 8.292985404721438e-05, "loss": 0.6919, "step": 1272 }, { "epoch": 0.29338557271260657, "grad_norm": 0.2791789472103119, "learning_rate": 8.29017551009815e-05, "loss": 0.6987, "step": 1273 }, { "epoch": 0.29361604056234153, "grad_norm": 0.26588696241378784, "learning_rate": 8.287363781610003e-05, "loss": 0.7009, "step": 1274 }, { "epoch": 0.2938465084120765, "grad_norm": 0.2526492178440094, "learning_rate": 8.284550220824187e-05, "loss": 0.7001, "step": 1275 }, { "epoch": 0.29407697626181145, "grad_norm": 0.23610974848270416, "learning_rate": 8.281734829308914e-05, "loss": 0.6982, "step": 1276 }, { "epoch": 0.2943074441115464, "grad_norm": 0.24589475989341736, "learning_rate": 8.278917608633416e-05, "loss": 0.7077, "step": 1277 }, { "epoch": 0.29453791196128143, "grad_norm": 0.25068679451942444, "learning_rate": 8.276098560367944e-05, "loss": 0.6999, "step": 1278 }, { "epoch": 0.2947683798110164, "grad_norm": 0.24742679297924042, "learning_rate": 8.273277686083767e-05, "loss": 0.7018, "step": 1279 }, { "epoch": 0.29499884766075135, "grad_norm": 0.25572851300239563, "learning_rate": 8.270454987353173e-05, "loss": 0.7108, "step": 1280 }, { "epoch": 0.2952293155104863, "grad_norm": 0.2460428923368454, "learning_rate": 8.267630465749467e-05, "loss": 0.6992, "step": 1281 }, { "epoch": 0.2954597833602213, "grad_norm": 0.2824234068393707, "learning_rate": 8.264804122846969e-05, "loss": 0.6957, "step": 1282 }, { "epoch": 0.29569025120995623, "grad_norm": 0.23757845163345337, "learning_rate": 8.261975960221017e-05, "loss": 0.6955, "step": 1283 }, { "epoch": 0.2959207190596912, "grad_norm": 0.2634974718093872, "learning_rate": 8.259145979447957e-05, "loss": 0.6982, "step": 1284 }, { "epoch": 0.29615118690942616, "grad_norm": 0.26144111156463623, "learning_rate": 8.256314182105155e-05, "loss": 0.7052, "step": 1285 }, { "epoch": 0.2963816547591611, "grad_norm": 0.2734619081020355, "learning_rate": 8.253480569770986e-05, "loss": 0.705, "step": 1286 }, { "epoch": 0.2966121226088961, "grad_norm": 0.28824108839035034, "learning_rate": 8.250645144024838e-05, "loss": 0.7058, "step": 1287 }, { "epoch": 0.29684259045863104, "grad_norm": 0.2391139417886734, "learning_rate": 8.247807906447108e-05, "loss": 0.6906, "step": 1288 }, { "epoch": 0.297073058308366, "grad_norm": 0.2693355977535248, "learning_rate": 8.244968858619206e-05, "loss": 0.7039, "step": 1289 }, { "epoch": 0.29730352615810096, "grad_norm": 0.2826348841190338, "learning_rate": 8.242128002123548e-05, "loss": 0.696, "step": 1290 }, { "epoch": 0.2975339940078359, "grad_norm": 0.22388167679309845, "learning_rate": 8.239285338543558e-05, "loss": 0.7022, "step": 1291 }, { "epoch": 0.2977644618575709, "grad_norm": 0.29318729043006897, "learning_rate": 8.236440869463671e-05, "loss": 0.6948, "step": 1292 }, { "epoch": 0.29799492970730584, "grad_norm": 0.28733378648757935, "learning_rate": 8.233594596469325e-05, "loss": 0.7068, "step": 1293 }, { "epoch": 0.2982253975570408, "grad_norm": 0.2567828893661499, "learning_rate": 8.230746521146963e-05, "loss": 0.6986, "step": 1294 }, { "epoch": 0.29845586540677577, "grad_norm": 0.2597467601299286, "learning_rate": 8.227896645084036e-05, "loss": 0.685, "step": 1295 }, { "epoch": 0.2986863332565107, "grad_norm": 0.26486629247665405, "learning_rate": 8.225044969868994e-05, "loss": 0.694, "step": 1296 }, { "epoch": 0.2989168011062457, "grad_norm": 0.2641950249671936, "learning_rate": 8.222191497091294e-05, "loss": 0.7005, "step": 1297 }, { "epoch": 0.29914726895598065, "grad_norm": 0.3094748556613922, "learning_rate": 8.219336228341392e-05, "loss": 0.7035, "step": 1298 }, { "epoch": 0.2993777368057156, "grad_norm": 0.2441185563802719, "learning_rate": 8.216479165210748e-05, "loss": 0.6908, "step": 1299 }, { "epoch": 0.29960820465545057, "grad_norm": 0.23661471903324127, "learning_rate": 8.213620309291819e-05, "loss": 0.7005, "step": 1300 }, { "epoch": 0.29983867250518553, "grad_norm": 0.3035421371459961, "learning_rate": 8.210759662178064e-05, "loss": 0.6909, "step": 1301 }, { "epoch": 0.3000691403549205, "grad_norm": 0.2798277735710144, "learning_rate": 8.207897225463938e-05, "loss": 0.6936, "step": 1302 }, { "epoch": 0.30029960820465545, "grad_norm": 0.232849583029747, "learning_rate": 8.205033000744894e-05, "loss": 0.6943, "step": 1303 }, { "epoch": 0.3005300760543904, "grad_norm": 0.26353907585144043, "learning_rate": 8.202166989617383e-05, "loss": 0.6917, "step": 1304 }, { "epoch": 0.3007605439041254, "grad_norm": 0.27309128642082214, "learning_rate": 8.199299193678851e-05, "loss": 0.702, "step": 1305 }, { "epoch": 0.30099101175386034, "grad_norm": 0.2615521252155304, "learning_rate": 8.196429614527737e-05, "loss": 0.6962, "step": 1306 }, { "epoch": 0.3012214796035953, "grad_norm": 0.25091928243637085, "learning_rate": 8.193558253763478e-05, "loss": 0.6988, "step": 1307 }, { "epoch": 0.30145194745333026, "grad_norm": 0.25908082723617554, "learning_rate": 8.1906851129865e-05, "loss": 0.6996, "step": 1308 }, { "epoch": 0.3016824153030652, "grad_norm": 0.2851544916629791, "learning_rate": 8.187810193798223e-05, "loss": 0.6932, "step": 1309 }, { "epoch": 0.3019128831528002, "grad_norm": 0.2424258440732956, "learning_rate": 8.184933497801059e-05, "loss": 0.6926, "step": 1310 }, { "epoch": 0.30214335100253514, "grad_norm": 0.24669015407562256, "learning_rate": 8.182055026598408e-05, "loss": 0.701, "step": 1311 }, { "epoch": 0.3023738188522701, "grad_norm": 0.27151423692703247, "learning_rate": 8.179174781794658e-05, "loss": 0.6909, "step": 1312 }, { "epoch": 0.30260428670200507, "grad_norm": 0.2658521831035614, "learning_rate": 8.176292764995195e-05, "loss": 0.6906, "step": 1313 }, { "epoch": 0.30283475455174, "grad_norm": 0.2563072443008423, "learning_rate": 8.173408977806382e-05, "loss": 0.6948, "step": 1314 }, { "epoch": 0.303065222401475, "grad_norm": 0.27026668190956116, "learning_rate": 8.170523421835572e-05, "loss": 0.6986, "step": 1315 }, { "epoch": 0.30329569025120995, "grad_norm": 0.2493722289800644, "learning_rate": 8.167636098691105e-05, "loss": 0.695, "step": 1316 }, { "epoch": 0.3035261581009449, "grad_norm": 0.46334898471832275, "learning_rate": 8.164747009982308e-05, "loss": 0.6948, "step": 1317 }, { "epoch": 0.30375662595067987, "grad_norm": 0.3177790343761444, "learning_rate": 8.161856157319488e-05, "loss": 0.692, "step": 1318 }, { "epoch": 0.30398709380041483, "grad_norm": 0.25527554750442505, "learning_rate": 8.158963542313937e-05, "loss": 0.7024, "step": 1319 }, { "epoch": 0.3042175616501498, "grad_norm": 0.2921774983406067, "learning_rate": 8.156069166577928e-05, "loss": 0.7102, "step": 1320 }, { "epoch": 0.30444802949988475, "grad_norm": 0.3124329149723053, "learning_rate": 8.153173031724719e-05, "loss": 0.6898, "step": 1321 }, { "epoch": 0.3046784973496197, "grad_norm": 0.2984555959701538, "learning_rate": 8.150275139368544e-05, "loss": 0.6944, "step": 1322 }, { "epoch": 0.3049089651993547, "grad_norm": 0.2839076817035675, "learning_rate": 8.14737549112462e-05, "loss": 0.6978, "step": 1323 }, { "epoch": 0.30513943304908964, "grad_norm": 0.25131693482398987, "learning_rate": 8.144474088609141e-05, "loss": 0.6931, "step": 1324 }, { "epoch": 0.3053699008988246, "grad_norm": 0.2686595916748047, "learning_rate": 8.141570933439277e-05, "loss": 0.6873, "step": 1325 }, { "epoch": 0.30560036874855956, "grad_norm": 0.2873900830745697, "learning_rate": 8.138666027233181e-05, "loss": 0.6959, "step": 1326 }, { "epoch": 0.3058308365982945, "grad_norm": 0.26454657316207886, "learning_rate": 8.135759371609976e-05, "loss": 0.6977, "step": 1327 }, { "epoch": 0.3060613044480295, "grad_norm": 0.27393782138824463, "learning_rate": 8.132850968189762e-05, "loss": 0.6977, "step": 1328 }, { "epoch": 0.30629177229776444, "grad_norm": 0.29595717787742615, "learning_rate": 8.129940818593612e-05, "loss": 0.6941, "step": 1329 }, { "epoch": 0.3065222401474994, "grad_norm": 0.2389097660779953, "learning_rate": 8.127028924443573e-05, "loss": 0.7001, "step": 1330 }, { "epoch": 0.30675270799723436, "grad_norm": 0.7923142910003662, "learning_rate": 8.12411528736267e-05, "loss": 0.6915, "step": 1331 }, { "epoch": 0.3069831758469693, "grad_norm": 0.40648335218429565, "learning_rate": 8.12119990897489e-05, "loss": 0.6905, "step": 1332 }, { "epoch": 0.3072136436967043, "grad_norm": 0.2911185622215271, "learning_rate": 8.118282790905197e-05, "loss": 0.6953, "step": 1333 }, { "epoch": 0.30744411154643925, "grad_norm": 0.2990292012691498, "learning_rate": 8.11536393477952e-05, "loss": 0.6953, "step": 1334 }, { "epoch": 0.3076745793961742, "grad_norm": 0.28816699981689453, "learning_rate": 8.112443342224761e-05, "loss": 0.6954, "step": 1335 }, { "epoch": 0.30790504724590917, "grad_norm": 0.32540613412857056, "learning_rate": 8.109521014868789e-05, "loss": 0.6935, "step": 1336 }, { "epoch": 0.30813551509564413, "grad_norm": 0.29871416091918945, "learning_rate": 8.106596954340438e-05, "loss": 0.7029, "step": 1337 }, { "epoch": 0.30836598294537915, "grad_norm": 0.29098618030548096, "learning_rate": 8.10367116226951e-05, "loss": 0.7083, "step": 1338 }, { "epoch": 0.3085964507951141, "grad_norm": 0.27469146251678467, "learning_rate": 8.100743640286768e-05, "loss": 0.7025, "step": 1339 }, { "epoch": 0.30882691864484907, "grad_norm": 0.34525662660598755, "learning_rate": 8.097814390023947e-05, "loss": 0.6983, "step": 1340 }, { "epoch": 0.30905738649458403, "grad_norm": 0.3090488314628601, "learning_rate": 8.09488341311374e-05, "loss": 0.7003, "step": 1341 }, { "epoch": 0.309287854344319, "grad_norm": 0.2643933892250061, "learning_rate": 8.091950711189801e-05, "loss": 0.689, "step": 1342 }, { "epoch": 0.30951832219405395, "grad_norm": 0.3033878803253174, "learning_rate": 8.089016285886748e-05, "loss": 0.6958, "step": 1343 }, { "epoch": 0.3097487900437889, "grad_norm": 0.29167822003364563, "learning_rate": 8.086080138840162e-05, "loss": 0.6976, "step": 1344 }, { "epoch": 0.3099792578935239, "grad_norm": 0.29147741198539734, "learning_rate": 8.083142271686577e-05, "loss": 0.6985, "step": 1345 }, { "epoch": 0.31020972574325884, "grad_norm": 0.2536722719669342, "learning_rate": 8.080202686063492e-05, "loss": 0.7038, "step": 1346 }, { "epoch": 0.3104401935929938, "grad_norm": 0.2939794361591339, "learning_rate": 8.077261383609363e-05, "loss": 0.6915, "step": 1347 }, { "epoch": 0.31067066144272876, "grad_norm": 0.260345458984375, "learning_rate": 8.074318365963597e-05, "loss": 0.6942, "step": 1348 }, { "epoch": 0.3109011292924637, "grad_norm": 0.25490236282348633, "learning_rate": 8.071373634766563e-05, "loss": 0.6947, "step": 1349 }, { "epoch": 0.3111315971421987, "grad_norm": 0.2755090892314911, "learning_rate": 8.068427191659586e-05, "loss": 0.7035, "step": 1350 }, { "epoch": 0.31136206499193364, "grad_norm": 0.2627999484539032, "learning_rate": 8.065479038284942e-05, "loss": 0.6992, "step": 1351 }, { "epoch": 0.3115925328416686, "grad_norm": 0.25387728214263916, "learning_rate": 8.062529176285858e-05, "loss": 0.7024, "step": 1352 }, { "epoch": 0.31182300069140356, "grad_norm": 0.27479302883148193, "learning_rate": 8.059577607306518e-05, "loss": 0.6976, "step": 1353 }, { "epoch": 0.3120534685411385, "grad_norm": 0.23171396553516388, "learning_rate": 8.056624332992057e-05, "loss": 0.6977, "step": 1354 }, { "epoch": 0.3122839363908735, "grad_norm": 0.26171013712882996, "learning_rate": 8.05366935498856e-05, "loss": 0.6927, "step": 1355 }, { "epoch": 0.31251440424060845, "grad_norm": 0.27590593695640564, "learning_rate": 8.050712674943055e-05, "loss": 0.6868, "step": 1356 }, { "epoch": 0.3127448720903434, "grad_norm": 0.2615238428115845, "learning_rate": 8.047754294503531e-05, "loss": 0.6913, "step": 1357 }, { "epoch": 0.31297533994007837, "grad_norm": 0.2482849508523941, "learning_rate": 8.044794215318916e-05, "loss": 0.6949, "step": 1358 }, { "epoch": 0.31320580778981333, "grad_norm": 0.26599544286727905, "learning_rate": 8.041832439039084e-05, "loss": 0.6972, "step": 1359 }, { "epoch": 0.3134362756395483, "grad_norm": 0.2676306962966919, "learning_rate": 8.038868967314863e-05, "loss": 0.7023, "step": 1360 }, { "epoch": 0.31366674348928325, "grad_norm": 0.2429536134004593, "learning_rate": 8.035903801798018e-05, "loss": 0.691, "step": 1361 }, { "epoch": 0.3138972113390182, "grad_norm": 0.264345645904541, "learning_rate": 8.032936944141261e-05, "loss": 0.6948, "step": 1362 }, { "epoch": 0.3141276791887532, "grad_norm": 0.2679714262485504, "learning_rate": 8.02996839599825e-05, "loss": 0.6977, "step": 1363 }, { "epoch": 0.31435814703848813, "grad_norm": 0.28790920972824097, "learning_rate": 8.02699815902358e-05, "loss": 0.6931, "step": 1364 }, { "epoch": 0.3145886148882231, "grad_norm": 0.23752626776695251, "learning_rate": 8.02402623487279e-05, "loss": 0.692, "step": 1365 }, { "epoch": 0.31481908273795806, "grad_norm": 0.25893813371658325, "learning_rate": 8.021052625202359e-05, "loss": 0.7112, "step": 1366 }, { "epoch": 0.315049550587693, "grad_norm": 0.29539117217063904, "learning_rate": 8.018077331669706e-05, "loss": 0.6844, "step": 1367 }, { "epoch": 0.315280018437428, "grad_norm": 0.2858338952064514, "learning_rate": 8.015100355933189e-05, "loss": 0.6944, "step": 1368 }, { "epoch": 0.31551048628716294, "grad_norm": 0.24025440216064453, "learning_rate": 8.012121699652103e-05, "loss": 0.6995, "step": 1369 }, { "epoch": 0.3157409541368979, "grad_norm": 0.2733021676540375, "learning_rate": 8.009141364486679e-05, "loss": 0.6892, "step": 1370 }, { "epoch": 0.31597142198663286, "grad_norm": 0.286783903837204, "learning_rate": 8.006159352098082e-05, "loss": 0.6881, "step": 1371 }, { "epoch": 0.3162018898363678, "grad_norm": 0.2588256001472473, "learning_rate": 8.003175664148416e-05, "loss": 0.6927, "step": 1372 }, { "epoch": 0.3164323576861028, "grad_norm": 0.26540815830230713, "learning_rate": 8.000190302300721e-05, "loss": 0.6959, "step": 1373 }, { "epoch": 0.31666282553583774, "grad_norm": 0.2769984006881714, "learning_rate": 7.99720326821896e-05, "loss": 0.6986, "step": 1374 }, { "epoch": 0.3168932933855727, "grad_norm": 0.23313313722610474, "learning_rate": 7.994214563568036e-05, "loss": 0.6915, "step": 1375 }, { "epoch": 0.31712376123530767, "grad_norm": 0.24230745434761047, "learning_rate": 7.991224190013782e-05, "loss": 0.7006, "step": 1376 }, { "epoch": 0.3173542290850426, "grad_norm": 0.2684518098831177, "learning_rate": 7.988232149222959e-05, "loss": 0.6968, "step": 1377 }, { "epoch": 0.3175846969347776, "grad_norm": 0.24516130983829498, "learning_rate": 7.985238442863261e-05, "loss": 0.6957, "step": 1378 }, { "epoch": 0.31781516478451255, "grad_norm": 0.22360306978225708, "learning_rate": 7.982243072603306e-05, "loss": 0.7003, "step": 1379 }, { "epoch": 0.3180456326342475, "grad_norm": 0.2547612190246582, "learning_rate": 7.979246040112643e-05, "loss": 0.6904, "step": 1380 }, { "epoch": 0.31827610048398247, "grad_norm": 0.3107689619064331, "learning_rate": 7.976247347061745e-05, "loss": 0.6902, "step": 1381 }, { "epoch": 0.31850656833371743, "grad_norm": 0.2589125335216522, "learning_rate": 7.973246995122013e-05, "loss": 0.6908, "step": 1382 }, { "epoch": 0.3187370361834524, "grad_norm": 0.24334730207920074, "learning_rate": 7.970244985965767e-05, "loss": 0.6942, "step": 1383 }, { "epoch": 0.31896750403318735, "grad_norm": 0.249411478638649, "learning_rate": 7.96724132126626e-05, "loss": 0.6968, "step": 1384 }, { "epoch": 0.3191979718829223, "grad_norm": 0.29693132638931274, "learning_rate": 7.964236002697661e-05, "loss": 0.6869, "step": 1385 }, { "epoch": 0.3194284397326573, "grad_norm": 0.2691076993942261, "learning_rate": 7.96122903193506e-05, "loss": 0.6914, "step": 1386 }, { "epoch": 0.31965890758239224, "grad_norm": 0.2546314597129822, "learning_rate": 7.958220410654475e-05, "loss": 0.6944, "step": 1387 }, { "epoch": 0.3198893754321272, "grad_norm": 0.29681459069252014, "learning_rate": 7.955210140532833e-05, "loss": 0.6801, "step": 1388 }, { "epoch": 0.32011984328186216, "grad_norm": 0.24881964921951294, "learning_rate": 7.952198223247993e-05, "loss": 0.6966, "step": 1389 }, { "epoch": 0.3203503111315971, "grad_norm": 0.2598433196544647, "learning_rate": 7.949184660478721e-05, "loss": 0.6955, "step": 1390 }, { "epoch": 0.3205807789813321, "grad_norm": 0.2911115884780884, "learning_rate": 7.946169453904706e-05, "loss": 0.696, "step": 1391 }, { "epoch": 0.32081124683106704, "grad_norm": 0.2831457853317261, "learning_rate": 7.943152605206551e-05, "loss": 0.6951, "step": 1392 }, { "epoch": 0.321041714680802, "grad_norm": 0.25077253580093384, "learning_rate": 7.940134116065776e-05, "loss": 0.691, "step": 1393 }, { "epoch": 0.32127218253053696, "grad_norm": 0.2983539402484894, "learning_rate": 7.937113988164814e-05, "loss": 0.69, "step": 1394 }, { "epoch": 0.3215026503802719, "grad_norm": 0.28595560789108276, "learning_rate": 7.93409222318701e-05, "loss": 0.6948, "step": 1395 }, { "epoch": 0.3217331182300069, "grad_norm": 0.2612264156341553, "learning_rate": 7.931068822816627e-05, "loss": 0.6897, "step": 1396 }, { "epoch": 0.3219635860797419, "grad_norm": 0.2846459150314331, "learning_rate": 7.928043788738835e-05, "loss": 0.7021, "step": 1397 }, { "epoch": 0.32219405392947686, "grad_norm": 0.26386579871177673, "learning_rate": 7.925017122639712e-05, "loss": 0.6866, "step": 1398 }, { "epoch": 0.3224245217792118, "grad_norm": 0.24749140441417694, "learning_rate": 7.921988826206252e-05, "loss": 0.6953, "step": 1399 }, { "epoch": 0.3226549896289468, "grad_norm": 0.24181775748729706, "learning_rate": 7.918958901126354e-05, "loss": 0.6928, "step": 1400 }, { "epoch": 0.32288545747868175, "grad_norm": 0.26094570755958557, "learning_rate": 7.915927349088825e-05, "loss": 0.6997, "step": 1401 }, { "epoch": 0.3231159253284167, "grad_norm": 0.25756776332855225, "learning_rate": 7.912894171783383e-05, "loss": 0.6834, "step": 1402 }, { "epoch": 0.32334639317815167, "grad_norm": 0.2835976183414459, "learning_rate": 7.909859370900642e-05, "loss": 0.6971, "step": 1403 }, { "epoch": 0.32357686102788663, "grad_norm": 0.27739542722702026, "learning_rate": 7.906822948132131e-05, "loss": 0.6931, "step": 1404 }, { "epoch": 0.3238073288776216, "grad_norm": 0.2250133603811264, "learning_rate": 7.903784905170277e-05, "loss": 0.6969, "step": 1405 }, { "epoch": 0.32403779672735655, "grad_norm": 0.2541544735431671, "learning_rate": 7.900745243708416e-05, "loss": 0.6874, "step": 1406 }, { "epoch": 0.3242682645770915, "grad_norm": 0.24582208693027496, "learning_rate": 7.89770396544078e-05, "loss": 0.688, "step": 1407 }, { "epoch": 0.3244987324268265, "grad_norm": 0.2596662938594818, "learning_rate": 7.894661072062503e-05, "loss": 0.7033, "step": 1408 }, { "epoch": 0.32472920027656144, "grad_norm": 0.24980351328849792, "learning_rate": 7.891616565269623e-05, "loss": 0.6844, "step": 1409 }, { "epoch": 0.3249596681262964, "grad_norm": 0.2832036316394806, "learning_rate": 7.888570446759074e-05, "loss": 0.6904, "step": 1410 }, { "epoch": 0.32519013597603136, "grad_norm": 0.27952510118484497, "learning_rate": 7.88552271822869e-05, "loss": 0.6913, "step": 1411 }, { "epoch": 0.3254206038257663, "grad_norm": 0.2454599142074585, "learning_rate": 7.882473381377202e-05, "loss": 0.6917, "step": 1412 }, { "epoch": 0.3256510716755013, "grad_norm": 0.219132199883461, "learning_rate": 7.879422437904236e-05, "loss": 0.6959, "step": 1413 }, { "epoch": 0.32588153952523624, "grad_norm": 0.25890830159187317, "learning_rate": 7.876369889510316e-05, "loss": 0.6987, "step": 1414 }, { "epoch": 0.3261120073749712, "grad_norm": 0.22995416820049286, "learning_rate": 7.873315737896856e-05, "loss": 0.6895, "step": 1415 }, { "epoch": 0.32634247522470616, "grad_norm": 0.26824015378952026, "learning_rate": 7.87025998476617e-05, "loss": 0.6919, "step": 1416 }, { "epoch": 0.3265729430744411, "grad_norm": 0.26897183060646057, "learning_rate": 7.867202631821462e-05, "loss": 0.6913, "step": 1417 }, { "epoch": 0.3268034109241761, "grad_norm": 0.24150390923023224, "learning_rate": 7.864143680766826e-05, "loss": 0.6967, "step": 1418 }, { "epoch": 0.32703387877391105, "grad_norm": 0.26501011848449707, "learning_rate": 7.861083133307247e-05, "loss": 0.6967, "step": 1419 }, { "epoch": 0.327264346623646, "grad_norm": 0.23892751336097717, "learning_rate": 7.858020991148602e-05, "loss": 0.6999, "step": 1420 }, { "epoch": 0.32749481447338097, "grad_norm": 0.2643808126449585, "learning_rate": 7.854957255997653e-05, "loss": 0.6919, "step": 1421 }, { "epoch": 0.32772528232311593, "grad_norm": 0.26628732681274414, "learning_rate": 7.851891929562057e-05, "loss": 0.6963, "step": 1422 }, { "epoch": 0.3279557501728509, "grad_norm": 0.27681294083595276, "learning_rate": 7.84882501355035e-05, "loss": 0.6879, "step": 1423 }, { "epoch": 0.32818621802258585, "grad_norm": 0.25981584191322327, "learning_rate": 7.845756509671955e-05, "loss": 0.6974, "step": 1424 }, { "epoch": 0.3284166858723208, "grad_norm": 0.2389248162508011, "learning_rate": 7.842686419637189e-05, "loss": 0.6961, "step": 1425 }, { "epoch": 0.3286471537220558, "grad_norm": 0.27327385544776917, "learning_rate": 7.839614745157242e-05, "loss": 0.6834, "step": 1426 }, { "epoch": 0.32887762157179073, "grad_norm": 0.2433241307735443, "learning_rate": 7.836541487944193e-05, "loss": 0.6887, "step": 1427 }, { "epoch": 0.3291080894215257, "grad_norm": 0.23156027495861053, "learning_rate": 7.833466649711002e-05, "loss": 0.6872, "step": 1428 }, { "epoch": 0.32933855727126066, "grad_norm": 0.27620989084243774, "learning_rate": 7.83039023217151e-05, "loss": 0.7045, "step": 1429 }, { "epoch": 0.3295690251209956, "grad_norm": 0.2832176983356476, "learning_rate": 7.827312237040435e-05, "loss": 0.6952, "step": 1430 }, { "epoch": 0.3297994929707306, "grad_norm": 0.2531898319721222, "learning_rate": 7.824232666033382e-05, "loss": 0.6888, "step": 1431 }, { "epoch": 0.33002996082046554, "grad_norm": 0.21987776458263397, "learning_rate": 7.821151520866829e-05, "loss": 0.687, "step": 1432 }, { "epoch": 0.3302604286702005, "grad_norm": 0.26392921805381775, "learning_rate": 7.818068803258131e-05, "loss": 0.6815, "step": 1433 }, { "epoch": 0.33049089651993546, "grad_norm": 0.23336456716060638, "learning_rate": 7.814984514925521e-05, "loss": 0.6834, "step": 1434 }, { "epoch": 0.3307213643696704, "grad_norm": 0.25581541657447815, "learning_rate": 7.811898657588109e-05, "loss": 0.6811, "step": 1435 }, { "epoch": 0.3309518322194054, "grad_norm": 0.23013727366924286, "learning_rate": 7.808811232965876e-05, "loss": 0.676, "step": 1436 }, { "epoch": 0.33118230006914035, "grad_norm": 0.222098246216774, "learning_rate": 7.805722242779678e-05, "loss": 0.6909, "step": 1437 }, { "epoch": 0.3314127679188753, "grad_norm": 0.24615336954593658, "learning_rate": 7.802631688751247e-05, "loss": 0.6924, "step": 1438 }, { "epoch": 0.33164323576861027, "grad_norm": 0.23416768014431, "learning_rate": 7.79953957260318e-05, "loss": 0.7015, "step": 1439 }, { "epoch": 0.33187370361834523, "grad_norm": 0.2338051199913025, "learning_rate": 7.796445896058953e-05, "loss": 0.6929, "step": 1440 }, { "epoch": 0.3321041714680802, "grad_norm": 0.24458903074264526, "learning_rate": 7.793350660842902e-05, "loss": 0.6852, "step": 1441 }, { "epoch": 0.33233463931781515, "grad_norm": 0.2856997847557068, "learning_rate": 7.790253868680237e-05, "loss": 0.6926, "step": 1442 }, { "epoch": 0.3325651071675501, "grad_norm": 0.24041031301021576, "learning_rate": 7.78715552129704e-05, "loss": 0.689, "step": 1443 }, { "epoch": 0.3327955750172851, "grad_norm": 0.286312997341156, "learning_rate": 7.784055620420251e-05, "loss": 0.6954, "step": 1444 }, { "epoch": 0.33302604286702003, "grad_norm": 0.25002339482307434, "learning_rate": 7.780954167777684e-05, "loss": 0.6929, "step": 1445 }, { "epoch": 0.333256510716755, "grad_norm": 0.2585028409957886, "learning_rate": 7.777851165098012e-05, "loss": 0.6919, "step": 1446 }, { "epoch": 0.33348697856648996, "grad_norm": 0.25653818249702454, "learning_rate": 7.774746614110775e-05, "loss": 0.6909, "step": 1447 }, { "epoch": 0.3337174464162249, "grad_norm": 0.2610546052455902, "learning_rate": 7.771640516546373e-05, "loss": 0.6921, "step": 1448 }, { "epoch": 0.3339479142659599, "grad_norm": 0.23188693821430206, "learning_rate": 7.768532874136074e-05, "loss": 0.6898, "step": 1449 }, { "epoch": 0.33417838211569484, "grad_norm": 0.2562350332736969, "learning_rate": 7.765423688612001e-05, "loss": 0.6815, "step": 1450 }, { "epoch": 0.3344088499654298, "grad_norm": 0.2621154487133026, "learning_rate": 7.762312961707141e-05, "loss": 0.6978, "step": 1451 }, { "epoch": 0.33463931781516476, "grad_norm": 0.23152048885822296, "learning_rate": 7.759200695155336e-05, "loss": 0.6961, "step": 1452 }, { "epoch": 0.3348697856648997, "grad_norm": 0.2416309267282486, "learning_rate": 7.75608689069129e-05, "loss": 0.6855, "step": 1453 }, { "epoch": 0.3351002535146347, "grad_norm": 0.2472970187664032, "learning_rate": 7.752971550050563e-05, "loss": 0.6993, "step": 1454 }, { "epoch": 0.33533072136436964, "grad_norm": 0.24622397124767303, "learning_rate": 7.749854674969573e-05, "loss": 0.7027, "step": 1455 }, { "epoch": 0.3355611892141046, "grad_norm": 0.21482546627521515, "learning_rate": 7.746736267185587e-05, "loss": 0.6861, "step": 1456 }, { "epoch": 0.3357916570638396, "grad_norm": 0.2635815143585205, "learning_rate": 7.743616328436733e-05, "loss": 0.6936, "step": 1457 }, { "epoch": 0.3360221249135746, "grad_norm": 0.22743616998195648, "learning_rate": 7.740494860461991e-05, "loss": 0.6925, "step": 1458 }, { "epoch": 0.33625259276330954, "grad_norm": 0.24108994007110596, "learning_rate": 7.73737186500119e-05, "loss": 0.6888, "step": 1459 }, { "epoch": 0.3364830606130445, "grad_norm": 0.2540665566921234, "learning_rate": 7.734247343795016e-05, "loss": 0.6934, "step": 1460 }, { "epoch": 0.33671352846277947, "grad_norm": 0.22767134010791779, "learning_rate": 7.731121298585e-05, "loss": 0.6871, "step": 1461 }, { "epoch": 0.3369439963125144, "grad_norm": 0.2467404305934906, "learning_rate": 7.727993731113523e-05, "loss": 0.6895, "step": 1462 }, { "epoch": 0.3371744641622494, "grad_norm": 0.23155854642391205, "learning_rate": 7.724864643123819e-05, "loss": 0.683, "step": 1463 }, { "epoch": 0.33740493201198435, "grad_norm": 0.2407347857952118, "learning_rate": 7.721734036359964e-05, "loss": 0.6873, "step": 1464 }, { "epoch": 0.3376353998617193, "grad_norm": 0.254625141620636, "learning_rate": 7.718601912566887e-05, "loss": 0.6934, "step": 1465 }, { "epoch": 0.33786586771145427, "grad_norm": 0.2507195770740509, "learning_rate": 7.715468273490354e-05, "loss": 0.6922, "step": 1466 }, { "epoch": 0.33809633556118923, "grad_norm": 0.2584257423877716, "learning_rate": 7.712333120876983e-05, "loss": 0.6855, "step": 1467 }, { "epoch": 0.3383268034109242, "grad_norm": 0.24727049469947815, "learning_rate": 7.709196456474231e-05, "loss": 0.6748, "step": 1468 }, { "epoch": 0.33855727126065915, "grad_norm": 0.2488059401512146, "learning_rate": 7.7060582820304e-05, "loss": 0.6869, "step": 1469 }, { "epoch": 0.3387877391103941, "grad_norm": 0.24992689490318298, "learning_rate": 7.702918599294636e-05, "loss": 0.6857, "step": 1470 }, { "epoch": 0.3390182069601291, "grad_norm": 0.2532345652580261, "learning_rate": 7.69977741001692e-05, "loss": 0.6923, "step": 1471 }, { "epoch": 0.33924867480986404, "grad_norm": 0.2411700189113617, "learning_rate": 7.696634715948072e-05, "loss": 0.6909, "step": 1472 }, { "epoch": 0.339479142659599, "grad_norm": 0.2590501606464386, "learning_rate": 7.693490518839763e-05, "loss": 0.6771, "step": 1473 }, { "epoch": 0.33970961050933396, "grad_norm": 0.2995712161064148, "learning_rate": 7.690344820444486e-05, "loss": 0.6867, "step": 1474 }, { "epoch": 0.3399400783590689, "grad_norm": 0.26080286502838135, "learning_rate": 7.68719762251558e-05, "loss": 0.696, "step": 1475 }, { "epoch": 0.3401705462088039, "grad_norm": 0.2599622309207916, "learning_rate": 7.684048926807219e-05, "loss": 0.6828, "step": 1476 }, { "epoch": 0.34040101405853884, "grad_norm": 0.2792719006538391, "learning_rate": 7.680898735074407e-05, "loss": 0.6876, "step": 1477 }, { "epoch": 0.3406314819082738, "grad_norm": 0.25866684317588806, "learning_rate": 7.677747049072987e-05, "loss": 0.6922, "step": 1478 }, { "epoch": 0.34086194975800876, "grad_norm": 0.23737114667892456, "learning_rate": 7.674593870559634e-05, "loss": 0.692, "step": 1479 }, { "epoch": 0.3410924176077437, "grad_norm": 0.26033899188041687, "learning_rate": 7.671439201291853e-05, "loss": 0.69, "step": 1480 }, { "epoch": 0.3413228854574787, "grad_norm": 0.24097611010074615, "learning_rate": 7.668283043027982e-05, "loss": 0.6896, "step": 1481 }, { "epoch": 0.34155335330721365, "grad_norm": 0.26150503754615784, "learning_rate": 7.665125397527187e-05, "loss": 0.6919, "step": 1482 }, { "epoch": 0.3417838211569486, "grad_norm": 0.21474456787109375, "learning_rate": 7.661966266549463e-05, "loss": 0.7017, "step": 1483 }, { "epoch": 0.34201428900668357, "grad_norm": 0.2468489408493042, "learning_rate": 7.658805651855636e-05, "loss": 0.6858, "step": 1484 }, { "epoch": 0.34224475685641853, "grad_norm": 0.2597835659980774, "learning_rate": 7.655643555207355e-05, "loss": 0.6919, "step": 1485 }, { "epoch": 0.3424752247061535, "grad_norm": 0.24256932735443115, "learning_rate": 7.652479978367097e-05, "loss": 0.6915, "step": 1486 }, { "epoch": 0.34270569255588845, "grad_norm": 0.2565065026283264, "learning_rate": 7.649314923098164e-05, "loss": 0.6863, "step": 1487 }, { "epoch": 0.3429361604056234, "grad_norm": 0.27215030789375305, "learning_rate": 7.646148391164682e-05, "loss": 0.6871, "step": 1488 }, { "epoch": 0.3431666282553584, "grad_norm": 0.27064988017082214, "learning_rate": 7.6429803843316e-05, "loss": 0.6914, "step": 1489 }, { "epoch": 0.34339709610509334, "grad_norm": 0.2974797487258911, "learning_rate": 7.63981090436469e-05, "loss": 0.6805, "step": 1490 }, { "epoch": 0.3436275639548283, "grad_norm": 0.21655718982219696, "learning_rate": 7.636639953030541e-05, "loss": 0.688, "step": 1491 }, { "epoch": 0.34385803180456326, "grad_norm": 0.26122844219207764, "learning_rate": 7.633467532096567e-05, "loss": 0.675, "step": 1492 }, { "epoch": 0.3440884996542982, "grad_norm": 0.22643567621707916, "learning_rate": 7.630293643331001e-05, "loss": 0.6917, "step": 1493 }, { "epoch": 0.3443189675040332, "grad_norm": 0.26686912775039673, "learning_rate": 7.627118288502889e-05, "loss": 0.6975, "step": 1494 }, { "epoch": 0.34454943535376814, "grad_norm": 0.25299954414367676, "learning_rate": 7.623941469382099e-05, "loss": 0.6944, "step": 1495 }, { "epoch": 0.3447799032035031, "grad_norm": 0.2686970829963684, "learning_rate": 7.620763187739315e-05, "loss": 0.6879, "step": 1496 }, { "epoch": 0.34501037105323806, "grad_norm": 0.2587553858757019, "learning_rate": 7.617583445346033e-05, "loss": 0.6885, "step": 1497 }, { "epoch": 0.345240838902973, "grad_norm": 0.25488153100013733, "learning_rate": 7.614402243974568e-05, "loss": 0.7005, "step": 1498 }, { "epoch": 0.345471306752708, "grad_norm": 0.2613796889781952, "learning_rate": 7.61121958539804e-05, "loss": 0.6872, "step": 1499 }, { "epoch": 0.34570177460244295, "grad_norm": 0.21812820434570312, "learning_rate": 7.608035471390394e-05, "loss": 0.6909, "step": 1500 }, { "epoch": 0.3459322424521779, "grad_norm": 0.24839772284030914, "learning_rate": 7.604849903726372e-05, "loss": 0.6839, "step": 1501 }, { "epoch": 0.34616271030191287, "grad_norm": 0.25548359751701355, "learning_rate": 7.601662884181534e-05, "loss": 0.6927, "step": 1502 }, { "epoch": 0.34639317815164783, "grad_norm": 0.25990694761276245, "learning_rate": 7.598474414532252e-05, "loss": 0.6875, "step": 1503 }, { "epoch": 0.3466236460013828, "grad_norm": 0.23840296268463135, "learning_rate": 7.595284496555698e-05, "loss": 0.6963, "step": 1504 }, { "epoch": 0.34685411385111775, "grad_norm": 0.24960501492023468, "learning_rate": 7.592093132029861e-05, "loss": 0.6853, "step": 1505 }, { "epoch": 0.3470845817008527, "grad_norm": 0.22266826033592224, "learning_rate": 7.588900322733526e-05, "loss": 0.6832, "step": 1506 }, { "epoch": 0.3473150495505877, "grad_norm": 0.23632311820983887, "learning_rate": 7.585706070446288e-05, "loss": 0.6813, "step": 1507 }, { "epoch": 0.34754551740032263, "grad_norm": 0.22808539867401123, "learning_rate": 7.582510376948552e-05, "loss": 0.6788, "step": 1508 }, { "epoch": 0.3477759852500576, "grad_norm": 0.2298205941915512, "learning_rate": 7.579313244021515e-05, "loss": 0.6926, "step": 1509 }, { "epoch": 0.34800645309979256, "grad_norm": 0.25376659631729126, "learning_rate": 7.576114673447186e-05, "loss": 0.6953, "step": 1510 }, { "epoch": 0.3482369209495275, "grad_norm": 0.24706676602363586, "learning_rate": 7.57291466700837e-05, "loss": 0.685, "step": 1511 }, { "epoch": 0.3484673887992625, "grad_norm": 0.25202682614326477, "learning_rate": 7.569713226488674e-05, "loss": 0.6867, "step": 1512 }, { "epoch": 0.34869785664899744, "grad_norm": 0.2461564689874649, "learning_rate": 7.566510353672504e-05, "loss": 0.6946, "step": 1513 }, { "epoch": 0.3489283244987324, "grad_norm": 0.26574838161468506, "learning_rate": 7.563306050345062e-05, "loss": 0.699, "step": 1514 }, { "epoch": 0.34915879234846736, "grad_norm": 0.256676584482193, "learning_rate": 7.560100318292355e-05, "loss": 0.6896, "step": 1515 }, { "epoch": 0.3493892601982024, "grad_norm": 0.23021619021892548, "learning_rate": 7.556893159301178e-05, "loss": 0.693, "step": 1516 }, { "epoch": 0.34961972804793734, "grad_norm": 0.23799525201320648, "learning_rate": 7.553684575159124e-05, "loss": 0.6855, "step": 1517 }, { "epoch": 0.3498501958976723, "grad_norm": 0.23646856844425201, "learning_rate": 7.550474567654583e-05, "loss": 0.6894, "step": 1518 }, { "epoch": 0.35008066374740726, "grad_norm": 0.2123948484659195, "learning_rate": 7.547263138576732e-05, "loss": 0.6744, "step": 1519 }, { "epoch": 0.3503111315971422, "grad_norm": 0.22038936614990234, "learning_rate": 7.54405028971555e-05, "loss": 0.6812, "step": 1520 }, { "epoch": 0.3505415994468772, "grad_norm": 0.22953416407108307, "learning_rate": 7.540836022861797e-05, "loss": 0.679, "step": 1521 }, { "epoch": 0.35077206729661214, "grad_norm": 0.23723843693733215, "learning_rate": 7.53762033980703e-05, "loss": 0.6828, "step": 1522 }, { "epoch": 0.3510025351463471, "grad_norm": 0.2359115332365036, "learning_rate": 7.534403242343595e-05, "loss": 0.6821, "step": 1523 }, { "epoch": 0.35123300299608207, "grad_norm": 0.23382125794887543, "learning_rate": 7.531184732264624e-05, "loss": 0.6885, "step": 1524 }, { "epoch": 0.351463470845817, "grad_norm": 0.2295057624578476, "learning_rate": 7.527964811364035e-05, "loss": 0.686, "step": 1525 }, { "epoch": 0.351693938695552, "grad_norm": 0.21867454051971436, "learning_rate": 7.524743481436537e-05, "loss": 0.6864, "step": 1526 }, { "epoch": 0.35192440654528695, "grad_norm": 0.21911703050136566, "learning_rate": 7.521520744277623e-05, "loss": 0.6882, "step": 1527 }, { "epoch": 0.3521548743950219, "grad_norm": 0.227437362074852, "learning_rate": 7.518296601683567e-05, "loss": 0.6894, "step": 1528 }, { "epoch": 0.35238534224475687, "grad_norm": 0.23779958486557007, "learning_rate": 7.515071055451429e-05, "loss": 0.6749, "step": 1529 }, { "epoch": 0.35261581009449183, "grad_norm": 0.22434940934181213, "learning_rate": 7.511844107379052e-05, "loss": 0.6897, "step": 1530 }, { "epoch": 0.3528462779442268, "grad_norm": 0.22103999555110931, "learning_rate": 7.508615759265059e-05, "loss": 0.68, "step": 1531 }, { "epoch": 0.35307674579396175, "grad_norm": 0.22952067852020264, "learning_rate": 7.505386012908853e-05, "loss": 0.6892, "step": 1532 }, { "epoch": 0.3533072136436967, "grad_norm": 0.24926556646823883, "learning_rate": 7.50215487011062e-05, "loss": 0.6829, "step": 1533 }, { "epoch": 0.3535376814934317, "grad_norm": 0.2560986280441284, "learning_rate": 7.498922332671317e-05, "loss": 0.6971, "step": 1534 }, { "epoch": 0.35376814934316664, "grad_norm": 0.256628155708313, "learning_rate": 7.495688402392686e-05, "loss": 0.6908, "step": 1535 }, { "epoch": 0.3539986171929016, "grad_norm": 0.23531514406204224, "learning_rate": 7.492453081077241e-05, "loss": 0.6811, "step": 1536 }, { "epoch": 0.35422908504263656, "grad_norm": 0.24269689619541168, "learning_rate": 7.489216370528273e-05, "loss": 0.6824, "step": 1537 }, { "epoch": 0.3544595528923715, "grad_norm": 0.2383260726928711, "learning_rate": 7.485978272549847e-05, "loss": 0.6876, "step": 1538 }, { "epoch": 0.3546900207421065, "grad_norm": 0.25450727343559265, "learning_rate": 7.482738788946799e-05, "loss": 0.6852, "step": 1539 }, { "epoch": 0.35492048859184144, "grad_norm": 0.21417759358882904, "learning_rate": 7.479497921524741e-05, "loss": 0.6848, "step": 1540 }, { "epoch": 0.3551509564415764, "grad_norm": 0.23762138187885284, "learning_rate": 7.476255672090055e-05, "loss": 0.6943, "step": 1541 }, { "epoch": 0.35538142429131137, "grad_norm": 0.2219182252883911, "learning_rate": 7.473012042449894e-05, "loss": 0.687, "step": 1542 }, { "epoch": 0.3556118921410463, "grad_norm": 0.23807905614376068, "learning_rate": 7.469767034412176e-05, "loss": 0.6859, "step": 1543 }, { "epoch": 0.3558423599907813, "grad_norm": 0.24784959852695465, "learning_rate": 7.466520649785593e-05, "loss": 0.6787, "step": 1544 }, { "epoch": 0.35607282784051625, "grad_norm": 0.2389153093099594, "learning_rate": 7.463272890379602e-05, "loss": 0.6922, "step": 1545 }, { "epoch": 0.3563032956902512, "grad_norm": 0.24661684036254883, "learning_rate": 7.460023758004426e-05, "loss": 0.6782, "step": 1546 }, { "epoch": 0.35653376353998617, "grad_norm": 0.22186362743377686, "learning_rate": 7.456773254471053e-05, "loss": 0.684, "step": 1547 }, { "epoch": 0.35676423138972113, "grad_norm": 0.2341771274805069, "learning_rate": 7.453521381591233e-05, "loss": 0.688, "step": 1548 }, { "epoch": 0.3569946992394561, "grad_norm": 0.2423945963382721, "learning_rate": 7.450268141177486e-05, "loss": 0.6738, "step": 1549 }, { "epoch": 0.35722516708919105, "grad_norm": 0.2303714007139206, "learning_rate": 7.44701353504309e-05, "loss": 0.6826, "step": 1550 }, { "epoch": 0.357455634938926, "grad_norm": 0.26467540860176086, "learning_rate": 7.443757565002081e-05, "loss": 0.6836, "step": 1551 }, { "epoch": 0.357686102788661, "grad_norm": 0.25847116112709045, "learning_rate": 7.440500232869262e-05, "loss": 0.6886, "step": 1552 }, { "epoch": 0.35791657063839594, "grad_norm": 0.22903743386268616, "learning_rate": 7.43724154046019e-05, "loss": 0.6929, "step": 1553 }, { "epoch": 0.3581470384881309, "grad_norm": 0.2548694312572479, "learning_rate": 7.433981489591181e-05, "loss": 0.6879, "step": 1554 }, { "epoch": 0.35837750633786586, "grad_norm": 0.2554045617580414, "learning_rate": 7.43072008207931e-05, "loss": 0.6834, "step": 1555 }, { "epoch": 0.3586079741876008, "grad_norm": 0.23032429814338684, "learning_rate": 7.42745731974241e-05, "loss": 0.6771, "step": 1556 }, { "epoch": 0.3588384420373358, "grad_norm": 0.23225395381450653, "learning_rate": 7.424193204399061e-05, "loss": 0.6906, "step": 1557 }, { "epoch": 0.35906890988707074, "grad_norm": 0.24884352087974548, "learning_rate": 7.420927737868608e-05, "loss": 0.6937, "step": 1558 }, { "epoch": 0.3592993777368057, "grad_norm": 0.22079841792583466, "learning_rate": 7.417660921971141e-05, "loss": 0.6965, "step": 1559 }, { "epoch": 0.35952984558654066, "grad_norm": 0.2275458425283432, "learning_rate": 7.414392758527504e-05, "loss": 0.6788, "step": 1560 }, { "epoch": 0.3597603134362756, "grad_norm": 0.25046491622924805, "learning_rate": 7.411123249359294e-05, "loss": 0.6874, "step": 1561 }, { "epoch": 0.3599907812860106, "grad_norm": 0.24402521550655365, "learning_rate": 7.407852396288857e-05, "loss": 0.6775, "step": 1562 }, { "epoch": 0.36022124913574555, "grad_norm": 0.2387145459651947, "learning_rate": 7.404580201139286e-05, "loss": 0.6817, "step": 1563 }, { "epoch": 0.3604517169854805, "grad_norm": 0.2291865348815918, "learning_rate": 7.401306665734429e-05, "loss": 0.691, "step": 1564 }, { "epoch": 0.36068218483521547, "grad_norm": 0.24180877208709717, "learning_rate": 7.398031791898872e-05, "loss": 0.6863, "step": 1565 }, { "epoch": 0.36091265268495043, "grad_norm": 0.2156798541545868, "learning_rate": 7.394755581457949e-05, "loss": 0.683, "step": 1566 }, { "epoch": 0.3611431205346854, "grad_norm": 0.2224697768688202, "learning_rate": 7.391478036237747e-05, "loss": 0.6863, "step": 1567 }, { "epoch": 0.36137358838442035, "grad_norm": 0.23175424337387085, "learning_rate": 7.388199158065086e-05, "loss": 0.6896, "step": 1568 }, { "epoch": 0.3616040562341553, "grad_norm": 0.20944839715957642, "learning_rate": 7.384918948767538e-05, "loss": 0.6949, "step": 1569 }, { "epoch": 0.3618345240838903, "grad_norm": 0.24800141155719757, "learning_rate": 7.38163741017341e-05, "loss": 0.6827, "step": 1570 }, { "epoch": 0.36206499193362524, "grad_norm": 0.2055944949388504, "learning_rate": 7.378354544111755e-05, "loss": 0.6859, "step": 1571 }, { "epoch": 0.3622954597833602, "grad_norm": 0.23914718627929688, "learning_rate": 7.37507035241236e-05, "loss": 0.691, "step": 1572 }, { "epoch": 0.36252592763309516, "grad_norm": 0.24815700948238373, "learning_rate": 7.371784836905758e-05, "loss": 0.6964, "step": 1573 }, { "epoch": 0.3627563954828301, "grad_norm": 0.22123336791992188, "learning_rate": 7.368497999423216e-05, "loss": 0.6821, "step": 1574 }, { "epoch": 0.3629868633325651, "grad_norm": 0.22671015560626984, "learning_rate": 7.365209841796738e-05, "loss": 0.6824, "step": 1575 }, { "epoch": 0.3632173311823001, "grad_norm": 0.2351810485124588, "learning_rate": 7.361920365859066e-05, "loss": 0.6829, "step": 1576 }, { "epoch": 0.36344779903203506, "grad_norm": 0.27589890360832214, "learning_rate": 7.35862957344367e-05, "loss": 0.6909, "step": 1577 }, { "epoch": 0.36367826688177, "grad_norm": 0.24565422534942627, "learning_rate": 7.355337466384761e-05, "loss": 0.6882, "step": 1578 }, { "epoch": 0.363908734731505, "grad_norm": 0.24734973907470703, "learning_rate": 7.352044046517285e-05, "loss": 0.694, "step": 1579 }, { "epoch": 0.36413920258123994, "grad_norm": 0.23439617455005646, "learning_rate": 7.34874931567691e-05, "loss": 0.6883, "step": 1580 }, { "epoch": 0.3643696704309749, "grad_norm": 0.2454935908317566, "learning_rate": 7.34545327570004e-05, "loss": 0.692, "step": 1581 }, { "epoch": 0.36460013828070986, "grad_norm": 0.21465803682804108, "learning_rate": 7.342155928423812e-05, "loss": 0.68, "step": 1582 }, { "epoch": 0.3648306061304448, "grad_norm": 0.24188017845153809, "learning_rate": 7.338857275686084e-05, "loss": 0.683, "step": 1583 }, { "epoch": 0.3650610739801798, "grad_norm": 0.2033400982618332, "learning_rate": 7.335557319325449e-05, "loss": 0.6855, "step": 1584 }, { "epoch": 0.36529154182991475, "grad_norm": 0.2551295757293701, "learning_rate": 7.332256061181222e-05, "loss": 0.6839, "step": 1585 }, { "epoch": 0.3655220096796497, "grad_norm": 0.218764066696167, "learning_rate": 7.328953503093446e-05, "loss": 0.6823, "step": 1586 }, { "epoch": 0.36575247752938467, "grad_norm": 0.23315462470054626, "learning_rate": 7.325649646902887e-05, "loss": 0.6884, "step": 1587 }, { "epoch": 0.36598294537911963, "grad_norm": 0.2543986439704895, "learning_rate": 7.322344494451034e-05, "loss": 0.681, "step": 1588 }, { "epoch": 0.3662134132288546, "grad_norm": 0.3002406060695648, "learning_rate": 7.319038047580102e-05, "loss": 0.6907, "step": 1589 }, { "epoch": 0.36644388107858955, "grad_norm": 0.24670617282390594, "learning_rate": 7.315730308133023e-05, "loss": 0.6922, "step": 1590 }, { "epoch": 0.3666743489283245, "grad_norm": 0.23123899102210999, "learning_rate": 7.312421277953454e-05, "loss": 0.682, "step": 1591 }, { "epoch": 0.3669048167780595, "grad_norm": 0.3154587745666504, "learning_rate": 7.309110958885768e-05, "loss": 0.6841, "step": 1592 }, { "epoch": 0.36713528462779443, "grad_norm": 0.24491167068481445, "learning_rate": 7.305799352775055e-05, "loss": 0.6869, "step": 1593 }, { "epoch": 0.3673657524775294, "grad_norm": 0.24614278972148895, "learning_rate": 7.302486461467128e-05, "loss": 0.678, "step": 1594 }, { "epoch": 0.36759622032726436, "grad_norm": 0.2878095507621765, "learning_rate": 7.299172286808511e-05, "loss": 0.6899, "step": 1595 }, { "epoch": 0.3678266881769993, "grad_norm": 0.26783236861228943, "learning_rate": 7.295856830646446e-05, "loss": 0.693, "step": 1596 }, { "epoch": 0.3680571560267343, "grad_norm": 0.2804826498031616, "learning_rate": 7.29254009482889e-05, "loss": 0.687, "step": 1597 }, { "epoch": 0.36828762387646924, "grad_norm": 0.29956531524658203, "learning_rate": 7.28922208120451e-05, "loss": 0.6868, "step": 1598 }, { "epoch": 0.3685180917262042, "grad_norm": 0.22456161677837372, "learning_rate": 7.285902791622688e-05, "loss": 0.6829, "step": 1599 }, { "epoch": 0.36874855957593916, "grad_norm": 0.2787242829799652, "learning_rate": 7.282582227933517e-05, "loss": 0.6845, "step": 1600 }, { "epoch": 0.3689790274256741, "grad_norm": 0.28973785042762756, "learning_rate": 7.279260391987799e-05, "loss": 0.6921, "step": 1601 }, { "epoch": 0.3692094952754091, "grad_norm": 0.25046855211257935, "learning_rate": 7.275937285637044e-05, "loss": 0.6825, "step": 1602 }, { "epoch": 0.36943996312514404, "grad_norm": 0.25190284848213196, "learning_rate": 7.272612910733475e-05, "loss": 0.6882, "step": 1603 }, { "epoch": 0.369670430974879, "grad_norm": 0.2506854832172394, "learning_rate": 7.269287269130017e-05, "loss": 0.6851, "step": 1604 }, { "epoch": 0.36990089882461397, "grad_norm": 0.2547498643398285, "learning_rate": 7.2659603626803e-05, "loss": 0.6797, "step": 1605 }, { "epoch": 0.3701313666743489, "grad_norm": 0.23294281959533691, "learning_rate": 7.262632193238668e-05, "loss": 0.6831, "step": 1606 }, { "epoch": 0.3703618345240839, "grad_norm": 0.24481774866580963, "learning_rate": 7.259302762660157e-05, "loss": 0.6835, "step": 1607 }, { "epoch": 0.37059230237381885, "grad_norm": 0.24376718699932098, "learning_rate": 7.255972072800514e-05, "loss": 0.6762, "step": 1608 }, { "epoch": 0.3708227702235538, "grad_norm": 0.22260530292987823, "learning_rate": 7.252640125516189e-05, "loss": 0.6831, "step": 1609 }, { "epoch": 0.37105323807328877, "grad_norm": 0.23927612602710724, "learning_rate": 7.249306922664322e-05, "loss": 0.6926, "step": 1610 }, { "epoch": 0.37128370592302373, "grad_norm": 0.23161613941192627, "learning_rate": 7.245972466102766e-05, "loss": 0.6795, "step": 1611 }, { "epoch": 0.3715141737727587, "grad_norm": 0.2783213257789612, "learning_rate": 7.242636757690064e-05, "loss": 0.6919, "step": 1612 }, { "epoch": 0.37174464162249365, "grad_norm": 0.3050868809223175, "learning_rate": 7.239299799285462e-05, "loss": 0.6864, "step": 1613 }, { "epoch": 0.3719751094722286, "grad_norm": 0.2619262635707855, "learning_rate": 7.235961592748901e-05, "loss": 0.6822, "step": 1614 }, { "epoch": 0.3722055773219636, "grad_norm": 0.24144136905670166, "learning_rate": 7.232622139941016e-05, "loss": 0.6942, "step": 1615 }, { "epoch": 0.37243604517169854, "grad_norm": 0.2681979238986969, "learning_rate": 7.229281442723136e-05, "loss": 0.686, "step": 1616 }, { "epoch": 0.3726665130214335, "grad_norm": 0.23774147033691406, "learning_rate": 7.225939502957287e-05, "loss": 0.6849, "step": 1617 }, { "epoch": 0.37289698087116846, "grad_norm": 0.24318799376487732, "learning_rate": 7.222596322506188e-05, "loss": 0.6866, "step": 1618 }, { "epoch": 0.3731274487209034, "grad_norm": 0.2579040825366974, "learning_rate": 7.219251903233246e-05, "loss": 0.6873, "step": 1619 }, { "epoch": 0.3733579165706384, "grad_norm": 0.26902881264686584, "learning_rate": 7.215906247002557e-05, "loss": 0.684, "step": 1620 }, { "epoch": 0.37358838442037334, "grad_norm": 0.22406496107578278, "learning_rate": 7.212559355678915e-05, "loss": 0.6909, "step": 1621 }, { "epoch": 0.3738188522701083, "grad_norm": 0.2592219114303589, "learning_rate": 7.209211231127791e-05, "loss": 0.6849, "step": 1622 }, { "epoch": 0.37404932011984326, "grad_norm": 0.22760163247585297, "learning_rate": 7.205861875215357e-05, "loss": 0.6806, "step": 1623 }, { "epoch": 0.3742797879695782, "grad_norm": 0.24689623713493347, "learning_rate": 7.202511289808456e-05, "loss": 0.6842, "step": 1624 }, { "epoch": 0.3745102558193132, "grad_norm": 0.23074333369731903, "learning_rate": 7.199159476774627e-05, "loss": 0.6808, "step": 1625 }, { "epoch": 0.37474072366904815, "grad_norm": 0.23396410048007965, "learning_rate": 7.19580643798209e-05, "loss": 0.6801, "step": 1626 }, { "epoch": 0.3749711915187831, "grad_norm": 0.21473470330238342, "learning_rate": 7.192452175299748e-05, "loss": 0.6891, "step": 1627 }, { "epoch": 0.37520165936851807, "grad_norm": 0.24224236607551575, "learning_rate": 7.189096690597188e-05, "loss": 0.6844, "step": 1628 }, { "epoch": 0.37543212721825303, "grad_norm": 0.21217916905879974, "learning_rate": 7.185739985744675e-05, "loss": 0.681, "step": 1629 }, { "epoch": 0.375662595067988, "grad_norm": 0.24271690845489502, "learning_rate": 7.182382062613156e-05, "loss": 0.6834, "step": 1630 }, { "epoch": 0.37589306291772295, "grad_norm": 0.2046801894903183, "learning_rate": 7.179022923074258e-05, "loss": 0.685, "step": 1631 }, { "epoch": 0.3761235307674579, "grad_norm": 0.23926329612731934, "learning_rate": 7.175662569000282e-05, "loss": 0.6768, "step": 1632 }, { "epoch": 0.3763539986171929, "grad_norm": 0.2330237329006195, "learning_rate": 7.172301002264212e-05, "loss": 0.6862, "step": 1633 }, { "epoch": 0.37658446646692784, "grad_norm": 0.2538076937198639, "learning_rate": 7.168938224739704e-05, "loss": 0.6871, "step": 1634 }, { "epoch": 0.37681493431666285, "grad_norm": 0.2078879177570343, "learning_rate": 7.165574238301085e-05, "loss": 0.6845, "step": 1635 }, { "epoch": 0.3770454021663978, "grad_norm": 0.23577922582626343, "learning_rate": 7.162209044823367e-05, "loss": 0.6771, "step": 1636 }, { "epoch": 0.3772758700161328, "grad_norm": 0.22798825800418854, "learning_rate": 7.158842646182222e-05, "loss": 0.6861, "step": 1637 }, { "epoch": 0.37750633786586774, "grad_norm": 0.2441009134054184, "learning_rate": 7.155475044254006e-05, "loss": 0.6834, "step": 1638 }, { "epoch": 0.3777368057156027, "grad_norm": 0.2254721075296402, "learning_rate": 7.152106240915735e-05, "loss": 0.6814, "step": 1639 }, { "epoch": 0.37796727356533766, "grad_norm": 0.23852422833442688, "learning_rate": 7.148736238045098e-05, "loss": 0.6843, "step": 1640 }, { "epoch": 0.3781977414150726, "grad_norm": 0.23249536752700806, "learning_rate": 7.14536503752046e-05, "loss": 0.69, "step": 1641 }, { "epoch": 0.3784282092648076, "grad_norm": 0.2591071128845215, "learning_rate": 7.141992641220841e-05, "loss": 0.6809, "step": 1642 }, { "epoch": 0.37865867711454254, "grad_norm": 0.24217630922794342, "learning_rate": 7.138619051025935e-05, "loss": 0.6746, "step": 1643 }, { "epoch": 0.3788891449642775, "grad_norm": 0.24814555048942566, "learning_rate": 7.135244268816102e-05, "loss": 0.6856, "step": 1644 }, { "epoch": 0.37911961281401246, "grad_norm": 0.24192722141742706, "learning_rate": 7.131868296472366e-05, "loss": 0.6856, "step": 1645 }, { "epoch": 0.3793500806637474, "grad_norm": 0.27118441462516785, "learning_rate": 7.12849113587641e-05, "loss": 0.6829, "step": 1646 }, { "epoch": 0.3795805485134824, "grad_norm": 0.2808471918106079, "learning_rate": 7.125112788910581e-05, "loss": 0.6844, "step": 1647 }, { "epoch": 0.37981101636321735, "grad_norm": 0.23738627135753632, "learning_rate": 7.121733257457893e-05, "loss": 0.6808, "step": 1648 }, { "epoch": 0.3800414842129523, "grad_norm": 0.241289883852005, "learning_rate": 7.11835254340201e-05, "loss": 0.6853, "step": 1649 }, { "epoch": 0.38027195206268727, "grad_norm": 0.240788996219635, "learning_rate": 7.114970648627267e-05, "loss": 0.6805, "step": 1650 }, { "epoch": 0.38050241991242223, "grad_norm": 0.2523535192012787, "learning_rate": 7.111587575018648e-05, "loss": 0.6764, "step": 1651 }, { "epoch": 0.3807328877621572, "grad_norm": 0.2584069073200226, "learning_rate": 7.108203324461795e-05, "loss": 0.6913, "step": 1652 }, { "epoch": 0.38096335561189215, "grad_norm": 0.24159248173236847, "learning_rate": 7.104817898843013e-05, "loss": 0.6815, "step": 1653 }, { "epoch": 0.3811938234616271, "grad_norm": 0.22381487488746643, "learning_rate": 7.101431300049253e-05, "loss": 0.6824, "step": 1654 }, { "epoch": 0.3814242913113621, "grad_norm": 0.25524625182151794, "learning_rate": 7.098043529968123e-05, "loss": 0.6801, "step": 1655 }, { "epoch": 0.38165475916109703, "grad_norm": 0.24695929884910583, "learning_rate": 7.09465459048789e-05, "loss": 0.6799, "step": 1656 }, { "epoch": 0.381885227010832, "grad_norm": 0.21066734194755554, "learning_rate": 7.091264483497463e-05, "loss": 0.6713, "step": 1657 }, { "epoch": 0.38211569486056696, "grad_norm": 0.25811514258384705, "learning_rate": 7.087873210886406e-05, "loss": 0.6852, "step": 1658 }, { "epoch": 0.3823461627103019, "grad_norm": 0.21766284108161926, "learning_rate": 7.084480774544937e-05, "loss": 0.6869, "step": 1659 }, { "epoch": 0.3825766305600369, "grad_norm": 0.2117396891117096, "learning_rate": 7.081087176363916e-05, "loss": 0.6815, "step": 1660 }, { "epoch": 0.38280709840977184, "grad_norm": 0.23894590139389038, "learning_rate": 7.077692418234852e-05, "loss": 0.6919, "step": 1661 }, { "epoch": 0.3830375662595068, "grad_norm": 0.2442779392004013, "learning_rate": 7.074296502049903e-05, "loss": 0.6762, "step": 1662 }, { "epoch": 0.38326803410924176, "grad_norm": 0.20315802097320557, "learning_rate": 7.070899429701873e-05, "loss": 0.6747, "step": 1663 }, { "epoch": 0.3834985019589767, "grad_norm": 0.22201156616210938, "learning_rate": 7.067501203084203e-05, "loss": 0.6802, "step": 1664 }, { "epoch": 0.3837289698087117, "grad_norm": 0.21427859365940094, "learning_rate": 7.064101824090991e-05, "loss": 0.6758, "step": 1665 }, { "epoch": 0.38395943765844665, "grad_norm": 0.20603862404823303, "learning_rate": 7.060701294616963e-05, "loss": 0.6759, "step": 1666 }, { "epoch": 0.3841899055081816, "grad_norm": 0.2058810144662857, "learning_rate": 7.057299616557493e-05, "loss": 0.6883, "step": 1667 }, { "epoch": 0.38442037335791657, "grad_norm": 0.20582230389118195, "learning_rate": 7.053896791808598e-05, "loss": 0.6883, "step": 1668 }, { "epoch": 0.38465084120765153, "grad_norm": 0.20562750101089478, "learning_rate": 7.050492822266929e-05, "loss": 0.6827, "step": 1669 }, { "epoch": 0.3848813090573865, "grad_norm": 0.1945854127407074, "learning_rate": 7.047087709829777e-05, "loss": 0.6869, "step": 1670 }, { "epoch": 0.38511177690712145, "grad_norm": 0.22347545623779297, "learning_rate": 7.043681456395068e-05, "loss": 0.6801, "step": 1671 }, { "epoch": 0.3853422447568564, "grad_norm": 0.21079128980636597, "learning_rate": 7.04027406386137e-05, "loss": 0.6774, "step": 1672 }, { "epoch": 0.3855727126065914, "grad_norm": 0.2209564447402954, "learning_rate": 7.036865534127879e-05, "loss": 0.6855, "step": 1673 }, { "epoch": 0.38580318045632633, "grad_norm": 0.226369246840477, "learning_rate": 7.033455869094428e-05, "loss": 0.6808, "step": 1674 }, { "epoch": 0.3860336483060613, "grad_norm": 0.2145013064146042, "learning_rate": 7.030045070661484e-05, "loss": 0.6904, "step": 1675 }, { "epoch": 0.38626411615579626, "grad_norm": 0.20242154598236084, "learning_rate": 7.02663314073014e-05, "loss": 0.6812, "step": 1676 }, { "epoch": 0.3864945840055312, "grad_norm": 0.2313269078731537, "learning_rate": 7.023220081202128e-05, "loss": 0.6791, "step": 1677 }, { "epoch": 0.3867250518552662, "grad_norm": 0.2067273110151291, "learning_rate": 7.019805893979805e-05, "loss": 0.6795, "step": 1678 }, { "epoch": 0.38695551970500114, "grad_norm": 0.2214699238538742, "learning_rate": 7.016390580966157e-05, "loss": 0.6751, "step": 1679 }, { "epoch": 0.3871859875547361, "grad_norm": 0.23702481389045715, "learning_rate": 7.012974144064795e-05, "loss": 0.6868, "step": 1680 }, { "epoch": 0.38741645540447106, "grad_norm": 0.23808321356773376, "learning_rate": 7.00955658517996e-05, "loss": 0.6805, "step": 1681 }, { "epoch": 0.387646923254206, "grad_norm": 0.22761982679367065, "learning_rate": 7.00613790621652e-05, "loss": 0.6794, "step": 1682 }, { "epoch": 0.387877391103941, "grad_norm": 0.22034461796283722, "learning_rate": 7.002718109079964e-05, "loss": 0.6863, "step": 1683 }, { "epoch": 0.38810785895367594, "grad_norm": 0.24931013584136963, "learning_rate": 6.999297195676399e-05, "loss": 0.6806, "step": 1684 }, { "epoch": 0.3883383268034109, "grad_norm": 0.24123869836330414, "learning_rate": 6.995875167912566e-05, "loss": 0.6765, "step": 1685 }, { "epoch": 0.38856879465314587, "grad_norm": 0.20375779271125793, "learning_rate": 6.992452027695821e-05, "loss": 0.677, "step": 1686 }, { "epoch": 0.3887992625028808, "grad_norm": 0.2386665791273117, "learning_rate": 6.989027776934138e-05, "loss": 0.6855, "step": 1687 }, { "epoch": 0.3890297303526158, "grad_norm": 0.21312054991722107, "learning_rate": 6.985602417536112e-05, "loss": 0.6694, "step": 1688 }, { "epoch": 0.38926019820235075, "grad_norm": 0.21280424296855927, "learning_rate": 6.982175951410957e-05, "loss": 0.674, "step": 1689 }, { "epoch": 0.3894906660520857, "grad_norm": 0.24086953699588776, "learning_rate": 6.978748380468504e-05, "loss": 0.6634, "step": 1690 }, { "epoch": 0.38972113390182067, "grad_norm": 0.21306774020195007, "learning_rate": 6.975319706619197e-05, "loss": 0.6797, "step": 1691 }, { "epoch": 0.38995160175155563, "grad_norm": 0.22666017711162567, "learning_rate": 6.971889931774098e-05, "loss": 0.6839, "step": 1692 }, { "epoch": 0.3901820696012906, "grad_norm": 0.22966359555721283, "learning_rate": 6.968459057844881e-05, "loss": 0.6755, "step": 1693 }, { "epoch": 0.3904125374510256, "grad_norm": 0.2565663158893585, "learning_rate": 6.96502708674383e-05, "loss": 0.6831, "step": 1694 }, { "epoch": 0.39064300530076057, "grad_norm": 0.21080222725868225, "learning_rate": 6.961594020383848e-05, "loss": 0.6786, "step": 1695 }, { "epoch": 0.39087347315049553, "grad_norm": 0.22605536878108978, "learning_rate": 6.958159860678439e-05, "loss": 0.6843, "step": 1696 }, { "epoch": 0.3911039410002305, "grad_norm": 0.23553648591041565, "learning_rate": 6.954724609541727e-05, "loss": 0.6775, "step": 1697 }, { "epoch": 0.39133440884996545, "grad_norm": 0.2261909395456314, "learning_rate": 6.951288268888431e-05, "loss": 0.6903, "step": 1698 }, { "epoch": 0.3915648766997004, "grad_norm": 0.201494961977005, "learning_rate": 6.947850840633892e-05, "loss": 0.6812, "step": 1699 }, { "epoch": 0.3917953445494354, "grad_norm": 0.24182942509651184, "learning_rate": 6.944412326694046e-05, "loss": 0.6807, "step": 1700 }, { "epoch": 0.39202581239917034, "grad_norm": 0.2183101326227188, "learning_rate": 6.940972728985438e-05, "loss": 0.6847, "step": 1701 }, { "epoch": 0.3922562802489053, "grad_norm": 0.21912945806980133, "learning_rate": 6.93753204942522e-05, "loss": 0.6768, "step": 1702 }, { "epoch": 0.39248674809864026, "grad_norm": 0.24168923497200012, "learning_rate": 6.93409028993114e-05, "loss": 0.6825, "step": 1703 }, { "epoch": 0.3927172159483752, "grad_norm": 0.21193869411945343, "learning_rate": 6.930647452421557e-05, "loss": 0.6891, "step": 1704 }, { "epoch": 0.3929476837981102, "grad_norm": 0.23223961889743805, "learning_rate": 6.927203538815422e-05, "loss": 0.6724, "step": 1705 }, { "epoch": 0.39317815164784514, "grad_norm": 0.2386191338300705, "learning_rate": 6.923758551032291e-05, "loss": 0.6857, "step": 1706 }, { "epoch": 0.3934086194975801, "grad_norm": 0.2278871089220047, "learning_rate": 6.92031249099232e-05, "loss": 0.6871, "step": 1707 }, { "epoch": 0.39363908734731506, "grad_norm": 0.25711220502853394, "learning_rate": 6.916865360616256e-05, "loss": 0.6841, "step": 1708 }, { "epoch": 0.39386955519705, "grad_norm": 0.21602317690849304, "learning_rate": 6.91341716182545e-05, "loss": 0.6761, "step": 1709 }, { "epoch": 0.394100023046785, "grad_norm": 0.24047702550888062, "learning_rate": 6.909967896541843e-05, "loss": 0.6855, "step": 1710 }, { "epoch": 0.39433049089651995, "grad_norm": 0.2949325740337372, "learning_rate": 6.906517566687973e-05, "loss": 0.6794, "step": 1711 }, { "epoch": 0.3945609587462549, "grad_norm": 0.2666512727737427, "learning_rate": 6.903066174186974e-05, "loss": 0.685, "step": 1712 }, { "epoch": 0.39479142659598987, "grad_norm": 0.22977623343467712, "learning_rate": 6.899613720962566e-05, "loss": 0.6855, "step": 1713 }, { "epoch": 0.39502189444572483, "grad_norm": 0.2291439175605774, "learning_rate": 6.896160208939063e-05, "loss": 0.6755, "step": 1714 }, { "epoch": 0.3952523622954598, "grad_norm": 0.26552703976631165, "learning_rate": 6.892705640041373e-05, "loss": 0.6886, "step": 1715 }, { "epoch": 0.39548283014519475, "grad_norm": 0.20833250880241394, "learning_rate": 6.889250016194988e-05, "loss": 0.6832, "step": 1716 }, { "epoch": 0.3957132979949297, "grad_norm": 0.23018650710582733, "learning_rate": 6.88579333932599e-05, "loss": 0.6717, "step": 1717 }, { "epoch": 0.3959437658446647, "grad_norm": 0.23923005163669586, "learning_rate": 6.882335611361046e-05, "loss": 0.6858, "step": 1718 }, { "epoch": 0.39617423369439964, "grad_norm": 0.22628365457057953, "learning_rate": 6.878876834227413e-05, "loss": 0.6803, "step": 1719 }, { "epoch": 0.3964047015441346, "grad_norm": 0.232679545879364, "learning_rate": 6.87541700985293e-05, "loss": 0.6903, "step": 1720 }, { "epoch": 0.39663516939386956, "grad_norm": 0.24142929911613464, "learning_rate": 6.871956140166019e-05, "loss": 0.676, "step": 1721 }, { "epoch": 0.3968656372436045, "grad_norm": 0.21797588467597961, "learning_rate": 6.868494227095688e-05, "loss": 0.6824, "step": 1722 }, { "epoch": 0.3970961050933395, "grad_norm": 0.24194461107254028, "learning_rate": 6.86503127257152e-05, "loss": 0.6743, "step": 1723 }, { "epoch": 0.39732657294307444, "grad_norm": 0.21700245141983032, "learning_rate": 6.861567278523687e-05, "loss": 0.6746, "step": 1724 }, { "epoch": 0.3975570407928094, "grad_norm": 0.2519386410713196, "learning_rate": 6.858102246882934e-05, "loss": 0.6816, "step": 1725 }, { "epoch": 0.39778750864254436, "grad_norm": 0.2088238000869751, "learning_rate": 6.854636179580587e-05, "loss": 0.6775, "step": 1726 }, { "epoch": 0.3980179764922793, "grad_norm": 0.23535437881946564, "learning_rate": 6.851169078548549e-05, "loss": 0.6774, "step": 1727 }, { "epoch": 0.3982484443420143, "grad_norm": 0.22251394391059875, "learning_rate": 6.847700945719298e-05, "loss": 0.68, "step": 1728 }, { "epoch": 0.39847891219174925, "grad_norm": 0.21713124215602875, "learning_rate": 6.844231783025888e-05, "loss": 0.6772, "step": 1729 }, { "epoch": 0.3987093800414842, "grad_norm": 0.21638789772987366, "learning_rate": 6.840761592401948e-05, "loss": 0.6854, "step": 1730 }, { "epoch": 0.39893984789121917, "grad_norm": 0.22768516838550568, "learning_rate": 6.837290375781678e-05, "loss": 0.6828, "step": 1731 }, { "epoch": 0.39917031574095413, "grad_norm": 0.22035862505435944, "learning_rate": 6.833818135099852e-05, "loss": 0.678, "step": 1732 }, { "epoch": 0.3994007835906891, "grad_norm": 0.2309066206216812, "learning_rate": 6.830344872291812e-05, "loss": 0.6797, "step": 1733 }, { "epoch": 0.39963125144042405, "grad_norm": 0.2268085926771164, "learning_rate": 6.826870589293474e-05, "loss": 0.6789, "step": 1734 }, { "epoch": 0.399861719290159, "grad_norm": 0.22533169388771057, "learning_rate": 6.823395288041316e-05, "loss": 0.6791, "step": 1735 }, { "epoch": 0.400092187139894, "grad_norm": 0.22329865396022797, "learning_rate": 6.819918970472391e-05, "loss": 0.6779, "step": 1736 }, { "epoch": 0.40032265498962893, "grad_norm": 0.22880035638809204, "learning_rate": 6.816441638524315e-05, "loss": 0.6845, "step": 1737 }, { "epoch": 0.4005531228393639, "grad_norm": 0.20969046652317047, "learning_rate": 6.812963294135265e-05, "loss": 0.6739, "step": 1738 }, { "epoch": 0.40078359068909886, "grad_norm": 0.20743465423583984, "learning_rate": 6.809483939243992e-05, "loss": 0.6761, "step": 1739 }, { "epoch": 0.4010140585388338, "grad_norm": 0.22215551137924194, "learning_rate": 6.806003575789801e-05, "loss": 0.6696, "step": 1740 }, { "epoch": 0.4012445263885688, "grad_norm": 0.20283448696136475, "learning_rate": 6.802522205712564e-05, "loss": 0.6786, "step": 1741 }, { "epoch": 0.40147499423830374, "grad_norm": 0.21726839244365692, "learning_rate": 6.799039830952718e-05, "loss": 0.6883, "step": 1742 }, { "epoch": 0.4017054620880387, "grad_norm": 0.20720744132995605, "learning_rate": 6.795556453451246e-05, "loss": 0.6844, "step": 1743 }, { "epoch": 0.40193592993777366, "grad_norm": 0.21686407923698425, "learning_rate": 6.792072075149707e-05, "loss": 0.6776, "step": 1744 }, { "epoch": 0.4021663977875086, "grad_norm": 0.21741721034049988, "learning_rate": 6.788586697990206e-05, "loss": 0.6675, "step": 1745 }, { "epoch": 0.4023968656372436, "grad_norm": 0.22603777050971985, "learning_rate": 6.785100323915409e-05, "loss": 0.6708, "step": 1746 }, { "epoch": 0.40262733348697854, "grad_norm": 0.2569337487220764, "learning_rate": 6.781612954868538e-05, "loss": 0.6787, "step": 1747 }, { "epoch": 0.4028578013367135, "grad_norm": 0.21277722716331482, "learning_rate": 6.778124592793368e-05, "loss": 0.6772, "step": 1748 }, { "epoch": 0.40308826918644847, "grad_norm": 0.23380626738071442, "learning_rate": 6.774635239634229e-05, "loss": 0.6775, "step": 1749 }, { "epoch": 0.40331873703618343, "grad_norm": 0.22136418521404266, "learning_rate": 6.771144897336001e-05, "loss": 0.679, "step": 1750 }, { "epoch": 0.4035492048859184, "grad_norm": 0.22446192800998688, "learning_rate": 6.767653567844121e-05, "loss": 0.6803, "step": 1751 }, { "epoch": 0.40377967273565335, "grad_norm": 0.21622978150844574, "learning_rate": 6.764161253104567e-05, "loss": 0.6771, "step": 1752 }, { "epoch": 0.4040101405853883, "grad_norm": 0.2152906060218811, "learning_rate": 6.760667955063876e-05, "loss": 0.6848, "step": 1753 }, { "epoch": 0.4042406084351233, "grad_norm": 0.19828496873378754, "learning_rate": 6.757173675669127e-05, "loss": 0.6788, "step": 1754 }, { "epoch": 0.4044710762848583, "grad_norm": 0.20052795112133026, "learning_rate": 6.753678416867944e-05, "loss": 0.6759, "step": 1755 }, { "epoch": 0.40470154413459325, "grad_norm": 0.21270537376403809, "learning_rate": 6.750182180608505e-05, "loss": 0.6801, "step": 1756 }, { "epoch": 0.4049320119843282, "grad_norm": 0.20280341804027557, "learning_rate": 6.746684968839525e-05, "loss": 0.6724, "step": 1757 }, { "epoch": 0.40516247983406317, "grad_norm": 0.22918108105659485, "learning_rate": 6.743186783510269e-05, "loss": 0.6797, "step": 1758 }, { "epoch": 0.40539294768379813, "grad_norm": 0.23558905720710754, "learning_rate": 6.739687626570541e-05, "loss": 0.6875, "step": 1759 }, { "epoch": 0.4056234155335331, "grad_norm": 0.19638538360595703, "learning_rate": 6.736187499970684e-05, "loss": 0.6751, "step": 1760 }, { "epoch": 0.40585388338326805, "grad_norm": 0.19849823415279388, "learning_rate": 6.732686405661587e-05, "loss": 0.6771, "step": 1761 }, { "epoch": 0.406084351233003, "grad_norm": 0.2098354548215866, "learning_rate": 6.729184345594677e-05, "loss": 0.6775, "step": 1762 }, { "epoch": 0.406314819082738, "grad_norm": 0.20642666518688202, "learning_rate": 6.725681321721916e-05, "loss": 0.6746, "step": 1763 }, { "epoch": 0.40654528693247294, "grad_norm": 0.23004627227783203, "learning_rate": 6.722177335995805e-05, "loss": 0.6811, "step": 1764 }, { "epoch": 0.4067757547822079, "grad_norm": 0.21482717990875244, "learning_rate": 6.718672390369384e-05, "loss": 0.6762, "step": 1765 }, { "epoch": 0.40700622263194286, "grad_norm": 0.19076307117938995, "learning_rate": 6.715166486796225e-05, "loss": 0.6849, "step": 1766 }, { "epoch": 0.4072366904816778, "grad_norm": 0.23472625017166138, "learning_rate": 6.711659627230432e-05, "loss": 0.6638, "step": 1767 }, { "epoch": 0.4074671583314128, "grad_norm": 0.20769445598125458, "learning_rate": 6.708151813626648e-05, "loss": 0.6732, "step": 1768 }, { "epoch": 0.40769762618114774, "grad_norm": 0.2301913946866989, "learning_rate": 6.704643047940041e-05, "loss": 0.6707, "step": 1769 }, { "epoch": 0.4079280940308827, "grad_norm": 0.2207859307527542, "learning_rate": 6.701133332126313e-05, "loss": 0.6799, "step": 1770 }, { "epoch": 0.40815856188061767, "grad_norm": 0.20174138247966766, "learning_rate": 6.697622668141698e-05, "loss": 0.6815, "step": 1771 }, { "epoch": 0.4083890297303526, "grad_norm": 0.24854540824890137, "learning_rate": 6.694111057942953e-05, "loss": 0.6759, "step": 1772 }, { "epoch": 0.4086194975800876, "grad_norm": 0.2082047462463379, "learning_rate": 6.690598503487368e-05, "loss": 0.6792, "step": 1773 }, { "epoch": 0.40884996542982255, "grad_norm": 0.2406819462776184, "learning_rate": 6.687085006732755e-05, "loss": 0.6852, "step": 1774 }, { "epoch": 0.4090804332795575, "grad_norm": 0.22995240986347198, "learning_rate": 6.683570569637451e-05, "loss": 0.675, "step": 1775 }, { "epoch": 0.40931090112929247, "grad_norm": 0.21281692385673523, "learning_rate": 6.680055194160323e-05, "loss": 0.6801, "step": 1776 }, { "epoch": 0.40954136897902743, "grad_norm": 0.2113114297389984, "learning_rate": 6.676538882260752e-05, "loss": 0.6841, "step": 1777 }, { "epoch": 0.4097718368287624, "grad_norm": 0.22743813693523407, "learning_rate": 6.67302163589865e-05, "loss": 0.6817, "step": 1778 }, { "epoch": 0.41000230467849735, "grad_norm": 0.23003269731998444, "learning_rate": 6.669503457034446e-05, "loss": 0.6772, "step": 1779 }, { "epoch": 0.4102327725282323, "grad_norm": 0.22872349619865417, "learning_rate": 6.665984347629083e-05, "loss": 0.6782, "step": 1780 }, { "epoch": 0.4104632403779673, "grad_norm": 0.2347979098558426, "learning_rate": 6.662464309644034e-05, "loss": 0.678, "step": 1781 }, { "epoch": 0.41069370822770224, "grad_norm": 0.21414168179035187, "learning_rate": 6.658943345041279e-05, "loss": 0.6821, "step": 1782 }, { "epoch": 0.4109241760774372, "grad_norm": 0.21668468415737152, "learning_rate": 6.655421455783324e-05, "loss": 0.6705, "step": 1783 }, { "epoch": 0.41115464392717216, "grad_norm": 0.22316017746925354, "learning_rate": 6.651898643833182e-05, "loss": 0.6775, "step": 1784 }, { "epoch": 0.4113851117769071, "grad_norm": 0.21024148166179657, "learning_rate": 6.648374911154385e-05, "loss": 0.678, "step": 1785 }, { "epoch": 0.4116155796266421, "grad_norm": 0.2320425808429718, "learning_rate": 6.644850259710978e-05, "loss": 0.6739, "step": 1786 }, { "epoch": 0.41184604747637704, "grad_norm": 0.21307185292243958, "learning_rate": 6.641324691467514e-05, "loss": 0.6781, "step": 1787 }, { "epoch": 0.412076515326112, "grad_norm": 0.21239805221557617, "learning_rate": 6.637798208389063e-05, "loss": 0.671, "step": 1788 }, { "epoch": 0.41230698317584696, "grad_norm": 0.20224244892597198, "learning_rate": 6.634270812441201e-05, "loss": 0.6801, "step": 1789 }, { "epoch": 0.4125374510255819, "grad_norm": 0.228766068816185, "learning_rate": 6.630742505590015e-05, "loss": 0.6785, "step": 1790 }, { "epoch": 0.4127679188753169, "grad_norm": 0.24188366532325745, "learning_rate": 6.627213289802098e-05, "loss": 0.6889, "step": 1791 }, { "epoch": 0.41299838672505185, "grad_norm": 0.2053944319486618, "learning_rate": 6.623683167044548e-05, "loss": 0.6838, "step": 1792 }, { "epoch": 0.4132288545747868, "grad_norm": 0.22618556022644043, "learning_rate": 6.620152139284974e-05, "loss": 0.6778, "step": 1793 }, { "epoch": 0.41345932242452177, "grad_norm": 0.20091716945171356, "learning_rate": 6.616620208491482e-05, "loss": 0.6835, "step": 1794 }, { "epoch": 0.41368979027425673, "grad_norm": 0.2244507372379303, "learning_rate": 6.61308737663269e-05, "loss": 0.6756, "step": 1795 }, { "epoch": 0.4139202581239917, "grad_norm": 0.24260321259498596, "learning_rate": 6.609553645677714e-05, "loss": 0.6853, "step": 1796 }, { "epoch": 0.41415072597372665, "grad_norm": 0.21369081735610962, "learning_rate": 6.606019017596164e-05, "loss": 0.6953, "step": 1797 }, { "epoch": 0.4143811938234616, "grad_norm": 0.20895330607891083, "learning_rate": 6.602483494358164e-05, "loss": 0.6743, "step": 1798 }, { "epoch": 0.4146116616731966, "grad_norm": 0.231523796916008, "learning_rate": 6.598947077934326e-05, "loss": 0.6732, "step": 1799 }, { "epoch": 0.41484212952293154, "grad_norm": 0.23038837313652039, "learning_rate": 6.595409770295765e-05, "loss": 0.6803, "step": 1800 }, { "epoch": 0.4150725973726665, "grad_norm": 0.2283259779214859, "learning_rate": 6.591871573414093e-05, "loss": 0.6748, "step": 1801 }, { "epoch": 0.41530306522240146, "grad_norm": 0.23936063051223755, "learning_rate": 6.588332489261412e-05, "loss": 0.676, "step": 1802 }, { "epoch": 0.4155335330721364, "grad_norm": 0.20093607902526855, "learning_rate": 6.584792519810325e-05, "loss": 0.6767, "step": 1803 }, { "epoch": 0.4157640009218714, "grad_norm": 0.24578706920146942, "learning_rate": 6.581251667033927e-05, "loss": 0.678, "step": 1804 }, { "epoch": 0.41599446877160634, "grad_norm": 0.21614333987236023, "learning_rate": 6.577709932905802e-05, "loss": 0.6824, "step": 1805 }, { "epoch": 0.4162249366213413, "grad_norm": 0.22684995830059052, "learning_rate": 6.574167319400028e-05, "loss": 0.6815, "step": 1806 }, { "epoch": 0.41645540447107626, "grad_norm": 0.2361087203025818, "learning_rate": 6.570623828491175e-05, "loss": 0.681, "step": 1807 }, { "epoch": 0.4166858723208112, "grad_norm": 0.2213921695947647, "learning_rate": 6.567079462154298e-05, "loss": 0.6683, "step": 1808 }, { "epoch": 0.4169163401705462, "grad_norm": 0.22478877007961273, "learning_rate": 6.563534222364941e-05, "loss": 0.6801, "step": 1809 }, { "epoch": 0.41714680802028115, "grad_norm": 0.22775450348854065, "learning_rate": 6.559988111099139e-05, "loss": 0.6733, "step": 1810 }, { "epoch": 0.4173772758700161, "grad_norm": 0.21958664059638977, "learning_rate": 6.556441130333403e-05, "loss": 0.6759, "step": 1811 }, { "epoch": 0.41760774371975107, "grad_norm": 0.21274985373020172, "learning_rate": 6.55289328204474e-05, "loss": 0.6758, "step": 1812 }, { "epoch": 0.4178382115694861, "grad_norm": 0.2310141921043396, "learning_rate": 6.549344568210636e-05, "loss": 0.6769, "step": 1813 }, { "epoch": 0.41806867941922105, "grad_norm": 0.20216527581214905, "learning_rate": 6.545794990809056e-05, "loss": 0.6665, "step": 1814 }, { "epoch": 0.418299147268956, "grad_norm": 0.22055986523628235, "learning_rate": 6.54224455181845e-05, "loss": 0.6646, "step": 1815 }, { "epoch": 0.41852961511869097, "grad_norm": 0.22518932819366455, "learning_rate": 6.538693253217748e-05, "loss": 0.6865, "step": 1816 }, { "epoch": 0.41876008296842593, "grad_norm": 0.23729877173900604, "learning_rate": 6.535141096986359e-05, "loss": 0.6711, "step": 1817 }, { "epoch": 0.4189905508181609, "grad_norm": 0.2299748808145523, "learning_rate": 6.531588085104169e-05, "loss": 0.6771, "step": 1818 }, { "epoch": 0.41922101866789585, "grad_norm": 0.248263880610466, "learning_rate": 6.528034219551543e-05, "loss": 0.6709, "step": 1819 }, { "epoch": 0.4194514865176308, "grad_norm": 0.22252872586250305, "learning_rate": 6.524479502309315e-05, "loss": 0.6814, "step": 1820 }, { "epoch": 0.4196819543673658, "grad_norm": 0.24352537095546722, "learning_rate": 6.520923935358806e-05, "loss": 0.6792, "step": 1821 }, { "epoch": 0.41991242221710073, "grad_norm": 0.24360835552215576, "learning_rate": 6.517367520681801e-05, "loss": 0.6874, "step": 1822 }, { "epoch": 0.4201428900668357, "grad_norm": 0.24170473217964172, "learning_rate": 6.513810260260558e-05, "loss": 0.6815, "step": 1823 }, { "epoch": 0.42037335791657066, "grad_norm": 0.2242230921983719, "learning_rate": 6.510252156077813e-05, "loss": 0.676, "step": 1824 }, { "epoch": 0.4206038257663056, "grad_norm": 0.22861500084400177, "learning_rate": 6.506693210116764e-05, "loss": 0.671, "step": 1825 }, { "epoch": 0.4208342936160406, "grad_norm": 0.23668934404850006, "learning_rate": 6.503133424361082e-05, "loss": 0.6788, "step": 1826 }, { "epoch": 0.42106476146577554, "grad_norm": 0.2199348360300064, "learning_rate": 6.499572800794911e-05, "loss": 0.6786, "step": 1827 }, { "epoch": 0.4212952293155105, "grad_norm": 0.23262521624565125, "learning_rate": 6.496011341402852e-05, "loss": 0.6797, "step": 1828 }, { "epoch": 0.42152569716524546, "grad_norm": 0.2463759034872055, "learning_rate": 6.492449048169977e-05, "loss": 0.6763, "step": 1829 }, { "epoch": 0.4217561650149804, "grad_norm": 0.2045678198337555, "learning_rate": 6.488885923081827e-05, "loss": 0.6856, "step": 1830 }, { "epoch": 0.4219866328647154, "grad_norm": 0.23405107855796814, "learning_rate": 6.485321968124398e-05, "loss": 0.671, "step": 1831 }, { "epoch": 0.42221710071445034, "grad_norm": 0.21921029686927795, "learning_rate": 6.481757185284157e-05, "loss": 0.6799, "step": 1832 }, { "epoch": 0.4224475685641853, "grad_norm": 0.218912273645401, "learning_rate": 6.478191576548024e-05, "loss": 0.672, "step": 1833 }, { "epoch": 0.42267803641392027, "grad_norm": 0.20524494349956512, "learning_rate": 6.474625143903387e-05, "loss": 0.686, "step": 1834 }, { "epoch": 0.4229085042636552, "grad_norm": 0.2237442284822464, "learning_rate": 6.471057889338089e-05, "loss": 0.6793, "step": 1835 }, { "epoch": 0.4231389721133902, "grad_norm": 0.21341606974601746, "learning_rate": 6.46748981484043e-05, "loss": 0.678, "step": 1836 }, { "epoch": 0.42336943996312515, "grad_norm": 0.258621484041214, "learning_rate": 6.463920922399173e-05, "loss": 0.6849, "step": 1837 }, { "epoch": 0.4235999078128601, "grad_norm": 0.23613744974136353, "learning_rate": 6.46035121400353e-05, "loss": 0.6809, "step": 1838 }, { "epoch": 0.42383037566259507, "grad_norm": 0.21923868358135223, "learning_rate": 6.456780691643171e-05, "loss": 0.6752, "step": 1839 }, { "epoch": 0.42406084351233003, "grad_norm": 0.26361706852912903, "learning_rate": 6.453209357308224e-05, "loss": 0.6801, "step": 1840 }, { "epoch": 0.424291311362065, "grad_norm": 0.22276267409324646, "learning_rate": 6.449637212989256e-05, "loss": 0.6755, "step": 1841 }, { "epoch": 0.42452177921179995, "grad_norm": 0.2298184633255005, "learning_rate": 6.446064260677303e-05, "loss": 0.6751, "step": 1842 }, { "epoch": 0.4247522470615349, "grad_norm": 0.2290985882282257, "learning_rate": 6.442490502363838e-05, "loss": 0.6794, "step": 1843 }, { "epoch": 0.4249827149112699, "grad_norm": 0.2414330095052719, "learning_rate": 6.438915940040791e-05, "loss": 0.6783, "step": 1844 }, { "epoch": 0.42521318276100484, "grad_norm": 0.21323642134666443, "learning_rate": 6.435340575700536e-05, "loss": 0.6757, "step": 1845 }, { "epoch": 0.4254436506107398, "grad_norm": 0.24010320007801056, "learning_rate": 6.431764411335894e-05, "loss": 0.6741, "step": 1846 }, { "epoch": 0.42567411846047476, "grad_norm": 0.22818125784397125, "learning_rate": 6.428187448940136e-05, "loss": 0.676, "step": 1847 }, { "epoch": 0.4259045863102097, "grad_norm": 0.26238662004470825, "learning_rate": 6.424609690506972e-05, "loss": 0.6724, "step": 1848 }, { "epoch": 0.4261350541599447, "grad_norm": 0.21544672548770905, "learning_rate": 6.421031138030562e-05, "loss": 0.6768, "step": 1849 }, { "epoch": 0.42636552200967964, "grad_norm": 0.25987011194229126, "learning_rate": 6.417451793505502e-05, "loss": 0.6761, "step": 1850 }, { "epoch": 0.4265959898594146, "grad_norm": 0.23844222724437714, "learning_rate": 6.413871658926833e-05, "loss": 0.6709, "step": 1851 }, { "epoch": 0.42682645770914956, "grad_norm": 0.25137385725975037, "learning_rate": 6.41029073629004e-05, "loss": 0.6798, "step": 1852 }, { "epoch": 0.4270569255588845, "grad_norm": 0.2629540264606476, "learning_rate": 6.406709027591039e-05, "loss": 0.6829, "step": 1853 }, { "epoch": 0.4272873934086195, "grad_norm": 0.27725306153297424, "learning_rate": 6.403126534826189e-05, "loss": 0.6825, "step": 1854 }, { "epoch": 0.42751786125835445, "grad_norm": 0.2338194102048874, "learning_rate": 6.399543259992288e-05, "loss": 0.6717, "step": 1855 }, { "epoch": 0.4277483291080894, "grad_norm": 0.26456090807914734, "learning_rate": 6.395959205086564e-05, "loss": 0.6735, "step": 1856 }, { "epoch": 0.42797879695782437, "grad_norm": 0.22425277531147003, "learning_rate": 6.392374372106686e-05, "loss": 0.6744, "step": 1857 }, { "epoch": 0.42820926480755933, "grad_norm": 0.26653608679771423, "learning_rate": 6.388788763050753e-05, "loss": 0.6773, "step": 1858 }, { "epoch": 0.4284397326572943, "grad_norm": 0.29735860228538513, "learning_rate": 6.385202379917297e-05, "loss": 0.6769, "step": 1859 }, { "epoch": 0.42867020050702925, "grad_norm": 0.25533658266067505, "learning_rate": 6.381615224705283e-05, "loss": 0.6652, "step": 1860 }, { "epoch": 0.4289006683567642, "grad_norm": 0.27232304215431213, "learning_rate": 6.378027299414104e-05, "loss": 0.6763, "step": 1861 }, { "epoch": 0.4291311362064992, "grad_norm": 0.25832486152648926, "learning_rate": 6.374438606043582e-05, "loss": 0.6839, "step": 1862 }, { "epoch": 0.42936160405623414, "grad_norm": 0.22207960486412048, "learning_rate": 6.370849146593973e-05, "loss": 0.6797, "step": 1863 }, { "epoch": 0.4295920719059691, "grad_norm": 0.2290259748697281, "learning_rate": 6.367258923065951e-05, "loss": 0.6747, "step": 1864 }, { "epoch": 0.42982253975570406, "grad_norm": 0.24262063205242157, "learning_rate": 6.363667937460624e-05, "loss": 0.6784, "step": 1865 }, { "epoch": 0.430053007605439, "grad_norm": 0.21408423781394958, "learning_rate": 6.360076191779519e-05, "loss": 0.6623, "step": 1866 }, { "epoch": 0.430283475455174, "grad_norm": 0.2212885320186615, "learning_rate": 6.356483688024588e-05, "loss": 0.6676, "step": 1867 }, { "epoch": 0.43051394330490894, "grad_norm": 0.23267394304275513, "learning_rate": 6.352890428198208e-05, "loss": 0.6764, "step": 1868 }, { "epoch": 0.4307444111546439, "grad_norm": 0.21774008870124817, "learning_rate": 6.349296414303176e-05, "loss": 0.6752, "step": 1869 }, { "epoch": 0.43097487900437886, "grad_norm": 0.22179049253463745, "learning_rate": 6.345701648342709e-05, "loss": 0.6886, "step": 1870 }, { "epoch": 0.4312053468541138, "grad_norm": 0.2196541428565979, "learning_rate": 6.342106132320442e-05, "loss": 0.6793, "step": 1871 }, { "epoch": 0.4314358147038488, "grad_norm": 0.22896122932434082, "learning_rate": 6.338509868240432e-05, "loss": 0.6643, "step": 1872 }, { "epoch": 0.4316662825535838, "grad_norm": 0.22660693526268005, "learning_rate": 6.334912858107147e-05, "loss": 0.6817, "step": 1873 }, { "epoch": 0.43189675040331876, "grad_norm": 0.1979314535856247, "learning_rate": 6.331315103925475e-05, "loss": 0.6786, "step": 1874 }, { "epoch": 0.4321272182530537, "grad_norm": 0.22958844900131226, "learning_rate": 6.327716607700719e-05, "loss": 0.6719, "step": 1875 }, { "epoch": 0.4323576861027887, "grad_norm": 0.20848886668682098, "learning_rate": 6.324117371438593e-05, "loss": 0.6718, "step": 1876 }, { "epoch": 0.43258815395252365, "grad_norm": 0.21527381241321564, "learning_rate": 6.320517397145228e-05, "loss": 0.6845, "step": 1877 }, { "epoch": 0.4328186218022586, "grad_norm": 0.20677097141742706, "learning_rate": 6.316916686827159e-05, "loss": 0.6769, "step": 1878 }, { "epoch": 0.43304908965199357, "grad_norm": 0.20856349170207977, "learning_rate": 6.313315242491338e-05, "loss": 0.6758, "step": 1879 }, { "epoch": 0.43327955750172853, "grad_norm": 0.21471652388572693, "learning_rate": 6.309713066145123e-05, "loss": 0.6686, "step": 1880 }, { "epoch": 0.4335100253514635, "grad_norm": 0.22820867598056793, "learning_rate": 6.306110159796282e-05, "loss": 0.6764, "step": 1881 }, { "epoch": 0.43374049320119845, "grad_norm": 0.21438585221767426, "learning_rate": 6.302506525452986e-05, "loss": 0.675, "step": 1882 }, { "epoch": 0.4339709610509334, "grad_norm": 0.21153029799461365, "learning_rate": 6.298902165123815e-05, "loss": 0.6701, "step": 1883 }, { "epoch": 0.4342014289006684, "grad_norm": 0.21586838364601135, "learning_rate": 6.295297080817754e-05, "loss": 0.6741, "step": 1884 }, { "epoch": 0.43443189675040333, "grad_norm": 0.20780424773693085, "learning_rate": 6.29169127454419e-05, "loss": 0.6765, "step": 1885 }, { "epoch": 0.4346623646001383, "grad_norm": 0.2166220098733902, "learning_rate": 6.288084748312915e-05, "loss": 0.6734, "step": 1886 }, { "epoch": 0.43489283244987326, "grad_norm": 0.209901362657547, "learning_rate": 6.284477504134116e-05, "loss": 0.6738, "step": 1887 }, { "epoch": 0.4351233002996082, "grad_norm": 0.21214714646339417, "learning_rate": 6.280869544018385e-05, "loss": 0.6764, "step": 1888 }, { "epoch": 0.4353537681493432, "grad_norm": 0.22302688658237457, "learning_rate": 6.277260869976716e-05, "loss": 0.6727, "step": 1889 }, { "epoch": 0.43558423599907814, "grad_norm": 0.21137331426143646, "learning_rate": 6.273651484020492e-05, "loss": 0.6813, "step": 1890 }, { "epoch": 0.4358147038488131, "grad_norm": 0.23412688076496124, "learning_rate": 6.270041388161503e-05, "loss": 0.6748, "step": 1891 }, { "epoch": 0.43604517169854806, "grad_norm": 0.21604032814502716, "learning_rate": 6.26643058441193e-05, "loss": 0.676, "step": 1892 }, { "epoch": 0.436275639548283, "grad_norm": 0.2466844618320465, "learning_rate": 6.262819074784343e-05, "loss": 0.6804, "step": 1893 }, { "epoch": 0.436506107398018, "grad_norm": 0.2448147088289261, "learning_rate": 6.259206861291716e-05, "loss": 0.6694, "step": 1894 }, { "epoch": 0.43673657524775295, "grad_norm": 0.23058217763900757, "learning_rate": 6.255593945947407e-05, "loss": 0.6764, "step": 1895 }, { "epoch": 0.4369670430974879, "grad_norm": 0.2556455731391907, "learning_rate": 6.251980330765171e-05, "loss": 0.6752, "step": 1896 }, { "epoch": 0.43719751094722287, "grad_norm": 0.251406729221344, "learning_rate": 6.248366017759146e-05, "loss": 0.6751, "step": 1897 }, { "epoch": 0.43742797879695783, "grad_norm": 0.2086581438779831, "learning_rate": 6.244751008943867e-05, "loss": 0.6774, "step": 1898 }, { "epoch": 0.4376584466466928, "grad_norm": 0.2590232789516449, "learning_rate": 6.241135306334254e-05, "loss": 0.6764, "step": 1899 }, { "epoch": 0.43788891449642775, "grad_norm": 0.23755843937397003, "learning_rate": 6.237518911945608e-05, "loss": 0.675, "step": 1900 }, { "epoch": 0.4381193823461627, "grad_norm": 0.20899838209152222, "learning_rate": 6.233901827793625e-05, "loss": 0.6699, "step": 1901 }, { "epoch": 0.4383498501958977, "grad_norm": 0.2480127364397049, "learning_rate": 6.230284055894379e-05, "loss": 0.6698, "step": 1902 }, { "epoch": 0.43858031804563263, "grad_norm": 0.23556362092494965, "learning_rate": 6.22666559826433e-05, "loss": 0.6734, "step": 1903 }, { "epoch": 0.4388107858953676, "grad_norm": 0.2578951418399811, "learning_rate": 6.223046456920321e-05, "loss": 0.6689, "step": 1904 }, { "epoch": 0.43904125374510256, "grad_norm": 0.275614470243454, "learning_rate": 6.21942663387957e-05, "loss": 0.6684, "step": 1905 }, { "epoch": 0.4392717215948375, "grad_norm": 0.21438564360141754, "learning_rate": 6.215806131159683e-05, "loss": 0.6766, "step": 1906 }, { "epoch": 0.4395021894445725, "grad_norm": 0.26986971497535706, "learning_rate": 6.21218495077864e-05, "loss": 0.6716, "step": 1907 }, { "epoch": 0.43973265729430744, "grad_norm": 0.25460711121559143, "learning_rate": 6.208563094754802e-05, "loss": 0.6763, "step": 1908 }, { "epoch": 0.4399631251440424, "grad_norm": 0.2263481318950653, "learning_rate": 6.2049405651069e-05, "loss": 0.6738, "step": 1909 }, { "epoch": 0.44019359299377736, "grad_norm": 0.26499027013778687, "learning_rate": 6.20131736385405e-05, "loss": 0.6708, "step": 1910 }, { "epoch": 0.4404240608435123, "grad_norm": 0.2109207808971405, "learning_rate": 6.197693493015734e-05, "loss": 0.6737, "step": 1911 }, { "epoch": 0.4406545286932473, "grad_norm": 0.22647859156131744, "learning_rate": 6.194068954611814e-05, "loss": 0.6719, "step": 1912 }, { "epoch": 0.44088499654298224, "grad_norm": 0.24029402434825897, "learning_rate": 6.190443750662518e-05, "loss": 0.6752, "step": 1913 }, { "epoch": 0.4411154643927172, "grad_norm": 0.20866861939430237, "learning_rate": 6.186817883188449e-05, "loss": 0.6746, "step": 1914 }, { "epoch": 0.44134593224245217, "grad_norm": 0.20965643227100372, "learning_rate": 6.183191354210577e-05, "loss": 0.675, "step": 1915 }, { "epoch": 0.4415764000921871, "grad_norm": 0.23772816359996796, "learning_rate": 6.179564165750244e-05, "loss": 0.6806, "step": 1916 }, { "epoch": 0.4418068679419221, "grad_norm": 0.2268814593553543, "learning_rate": 6.175936319829157e-05, "loss": 0.6682, "step": 1917 }, { "epoch": 0.44203733579165705, "grad_norm": 0.24102425575256348, "learning_rate": 6.17230781846939e-05, "loss": 0.679, "step": 1918 }, { "epoch": 0.442267803641392, "grad_norm": 0.22678816318511963, "learning_rate": 6.168678663693382e-05, "loss": 0.6703, "step": 1919 }, { "epoch": 0.44249827149112697, "grad_norm": 0.22168827056884766, "learning_rate": 6.165048857523938e-05, "loss": 0.6867, "step": 1920 }, { "epoch": 0.44272873934086193, "grad_norm": 0.23149913549423218, "learning_rate": 6.161418401984225e-05, "loss": 0.6705, "step": 1921 }, { "epoch": 0.4429592071905969, "grad_norm": 0.22349926829338074, "learning_rate": 6.157787299097771e-05, "loss": 0.6817, "step": 1922 }, { "epoch": 0.44318967504033185, "grad_norm": 0.2404746562242508, "learning_rate": 6.154155550888466e-05, "loss": 0.6758, "step": 1923 }, { "epoch": 0.4434201428900668, "grad_norm": 0.2592242956161499, "learning_rate": 6.150523159380558e-05, "loss": 0.6769, "step": 1924 }, { "epoch": 0.4436506107398018, "grad_norm": 0.20442263782024384, "learning_rate": 6.146890126598657e-05, "loss": 0.6711, "step": 1925 }, { "epoch": 0.44388107858953674, "grad_norm": 0.22541704773902893, "learning_rate": 6.143256454567727e-05, "loss": 0.674, "step": 1926 }, { "epoch": 0.4441115464392717, "grad_norm": 0.20725850760936737, "learning_rate": 6.139622145313089e-05, "loss": 0.6663, "step": 1927 }, { "epoch": 0.44434201428900666, "grad_norm": 0.21723538637161255, "learning_rate": 6.13598720086042e-05, "loss": 0.675, "step": 1928 }, { "epoch": 0.4445724821387416, "grad_norm": 0.21730880439281464, "learning_rate": 6.132351623235753e-05, "loss": 0.6746, "step": 1929 }, { "epoch": 0.4448029499884766, "grad_norm": 0.23787382245063782, "learning_rate": 6.12871541446547e-05, "loss": 0.6767, "step": 1930 }, { "epoch": 0.44503341783821154, "grad_norm": 0.23728126287460327, "learning_rate": 6.125078576576306e-05, "loss": 0.6811, "step": 1931 }, { "epoch": 0.44526388568794656, "grad_norm": 0.2103312760591507, "learning_rate": 6.121441111595347e-05, "loss": 0.6702, "step": 1932 }, { "epoch": 0.4454943535376815, "grad_norm": 0.23246845602989197, "learning_rate": 6.117803021550028e-05, "loss": 0.6714, "step": 1933 }, { "epoch": 0.4457248213874165, "grad_norm": 0.2163802981376648, "learning_rate": 6.114164308468136e-05, "loss": 0.6749, "step": 1934 }, { "epoch": 0.44595528923715144, "grad_norm": 0.22772881388664246, "learning_rate": 6.110524974377802e-05, "loss": 0.6758, "step": 1935 }, { "epoch": 0.4461857570868864, "grad_norm": 0.21084089577198029, "learning_rate": 6.1068850213075e-05, "loss": 0.6662, "step": 1936 }, { "epoch": 0.44641622493662136, "grad_norm": 0.21491113305091858, "learning_rate": 6.1032444512860556e-05, "loss": 0.6724, "step": 1937 }, { "epoch": 0.4466466927863563, "grad_norm": 0.2205878645181656, "learning_rate": 6.099603266342635e-05, "loss": 0.6817, "step": 1938 }, { "epoch": 0.4468771606360913, "grad_norm": 0.22424796223640442, "learning_rate": 6.0959614685067444e-05, "loss": 0.6858, "step": 1939 }, { "epoch": 0.44710762848582625, "grad_norm": 0.2058129459619522, "learning_rate": 6.092319059808238e-05, "loss": 0.6659, "step": 1940 }, { "epoch": 0.4473380963355612, "grad_norm": 0.22552870213985443, "learning_rate": 6.088676042277306e-05, "loss": 0.6786, "step": 1941 }, { "epoch": 0.44756856418529617, "grad_norm": 0.24123747646808624, "learning_rate": 6.085032417944477e-05, "loss": 0.6724, "step": 1942 }, { "epoch": 0.44779903203503113, "grad_norm": 0.2038024514913559, "learning_rate": 6.081388188840623e-05, "loss": 0.6653, "step": 1943 }, { "epoch": 0.4480294998847661, "grad_norm": 0.21841315925121307, "learning_rate": 6.077743356996947e-05, "loss": 0.6684, "step": 1944 }, { "epoch": 0.44825996773450105, "grad_norm": 0.2158724069595337, "learning_rate": 6.074097924444992e-05, "loss": 0.6716, "step": 1945 }, { "epoch": 0.448490435584236, "grad_norm": 0.2177586555480957, "learning_rate": 6.0704518932166356e-05, "loss": 0.6714, "step": 1946 }, { "epoch": 0.448720903433971, "grad_norm": 0.21237412095069885, "learning_rate": 6.066805265344084e-05, "loss": 0.6753, "step": 1947 }, { "epoch": 0.44895137128370594, "grad_norm": 0.22231976687908173, "learning_rate": 6.0631580428598864e-05, "loss": 0.6767, "step": 1948 }, { "epoch": 0.4491818391334409, "grad_norm": 0.200894296169281, "learning_rate": 6.059510227796912e-05, "loss": 0.6752, "step": 1949 }, { "epoch": 0.44941230698317586, "grad_norm": 0.22096696496009827, "learning_rate": 6.0558618221883664e-05, "loss": 0.6732, "step": 1950 }, { "epoch": 0.4496427748329108, "grad_norm": 0.1898728609085083, "learning_rate": 6.052212828067787e-05, "loss": 0.6762, "step": 1951 }, { "epoch": 0.4498732426826458, "grad_norm": 0.21051624417304993, "learning_rate": 6.0485632474690304e-05, "loss": 0.675, "step": 1952 }, { "epoch": 0.45010371053238074, "grad_norm": 0.20307575166225433, "learning_rate": 6.0449130824262864e-05, "loss": 0.6638, "step": 1953 }, { "epoch": 0.4503341783821157, "grad_norm": 0.2037128061056137, "learning_rate": 6.041262334974072e-05, "loss": 0.6732, "step": 1954 }, { "epoch": 0.45056464623185066, "grad_norm": 0.20359142124652863, "learning_rate": 6.0376110071472234e-05, "loss": 0.6662, "step": 1955 }, { "epoch": 0.4507951140815856, "grad_norm": 0.19084343314170837, "learning_rate": 6.033959100980905e-05, "loss": 0.6774, "step": 1956 }, { "epoch": 0.4510255819313206, "grad_norm": 0.2061081826686859, "learning_rate": 6.0303066185106e-05, "loss": 0.674, "step": 1957 }, { "epoch": 0.45125604978105555, "grad_norm": 0.19711264967918396, "learning_rate": 6.026653561772118e-05, "loss": 0.6689, "step": 1958 }, { "epoch": 0.4514865176307905, "grad_norm": 0.19926705956459045, "learning_rate": 6.022999932801579e-05, "loss": 0.6764, "step": 1959 }, { "epoch": 0.45171698548052547, "grad_norm": 0.20394420623779297, "learning_rate": 6.019345733635433e-05, "loss": 0.6714, "step": 1960 }, { "epoch": 0.45194745333026043, "grad_norm": 0.19170762598514557, "learning_rate": 6.015690966310441e-05, "loss": 0.6676, "step": 1961 }, { "epoch": 0.4521779211799954, "grad_norm": 0.20363013446331024, "learning_rate": 6.012035632863683e-05, "loss": 0.6661, "step": 1962 }, { "epoch": 0.45240838902973035, "grad_norm": 0.1984439343214035, "learning_rate": 6.008379735332556e-05, "loss": 0.67, "step": 1963 }, { "epoch": 0.4526388568794653, "grad_norm": 0.20422999560832977, "learning_rate": 6.0047232757547654e-05, "loss": 0.6804, "step": 1964 }, { "epoch": 0.4528693247292003, "grad_norm": 0.2078217715024948, "learning_rate": 6.001066256168337e-05, "loss": 0.6704, "step": 1965 }, { "epoch": 0.45309979257893523, "grad_norm": 0.21460744738578796, "learning_rate": 5.997408678611606e-05, "loss": 0.674, "step": 1966 }, { "epoch": 0.4533302604286702, "grad_norm": 0.2205062061548233, "learning_rate": 5.9937505451232165e-05, "loss": 0.6722, "step": 1967 }, { "epoch": 0.45356072827840516, "grad_norm": 0.18799827992916107, "learning_rate": 5.990091857742126e-05, "loss": 0.6728, "step": 1968 }, { "epoch": 0.4537911961281401, "grad_norm": 0.2258245050907135, "learning_rate": 5.986432618507598e-05, "loss": 0.6753, "step": 1969 }, { "epoch": 0.4540216639778751, "grad_norm": 0.20360851287841797, "learning_rate": 5.982772829459204e-05, "loss": 0.6741, "step": 1970 }, { "epoch": 0.45425213182761004, "grad_norm": 0.21974268555641174, "learning_rate": 5.979112492636824e-05, "loss": 0.6732, "step": 1971 }, { "epoch": 0.454482599677345, "grad_norm": 0.18492695689201355, "learning_rate": 5.9754516100806423e-05, "loss": 0.667, "step": 1972 }, { "epoch": 0.45471306752707996, "grad_norm": 0.2193833887577057, "learning_rate": 5.971790183831145e-05, "loss": 0.6748, "step": 1973 }, { "epoch": 0.4549435353768149, "grad_norm": 0.19800156354904175, "learning_rate": 5.968128215929123e-05, "loss": 0.6672, "step": 1974 }, { "epoch": 0.4551740032265499, "grad_norm": 0.2352752387523651, "learning_rate": 5.964465708415673e-05, "loss": 0.6753, "step": 1975 }, { "epoch": 0.45540447107628484, "grad_norm": 0.22403676807880402, "learning_rate": 5.9608026633321846e-05, "loss": 0.6752, "step": 1976 }, { "epoch": 0.4556349389260198, "grad_norm": 0.22172044217586517, "learning_rate": 5.957139082720353e-05, "loss": 0.6721, "step": 1977 }, { "epoch": 0.45586540677575477, "grad_norm": 0.2300235778093338, "learning_rate": 5.9534749686221715e-05, "loss": 0.6779, "step": 1978 }, { "epoch": 0.45609587462548973, "grad_norm": 0.2155136615037918, "learning_rate": 5.9498103230799274e-05, "loss": 0.6717, "step": 1979 }, { "epoch": 0.4563263424752247, "grad_norm": 0.2152310460805893, "learning_rate": 5.9461451481362054e-05, "loss": 0.6599, "step": 1980 }, { "epoch": 0.45655681032495965, "grad_norm": 0.23655883967876434, "learning_rate": 5.942479445833887e-05, "loss": 0.6705, "step": 1981 }, { "epoch": 0.4567872781746946, "grad_norm": 0.20639467239379883, "learning_rate": 5.938813218216149e-05, "loss": 0.674, "step": 1982 }, { "epoch": 0.45701774602442957, "grad_norm": 0.24684162437915802, "learning_rate": 5.9351464673264556e-05, "loss": 0.6707, "step": 1983 }, { "epoch": 0.45724821387416453, "grad_norm": 0.2441384196281433, "learning_rate": 5.931479195208566e-05, "loss": 0.6608, "step": 1984 }, { "epoch": 0.4574786817238995, "grad_norm": 0.20324283838272095, "learning_rate": 5.927811403906531e-05, "loss": 0.6749, "step": 1985 }, { "epoch": 0.45770914957363446, "grad_norm": 0.24871313571929932, "learning_rate": 5.9241430954646884e-05, "loss": 0.6715, "step": 1986 }, { "epoch": 0.4579396174233694, "grad_norm": 0.20936191082000732, "learning_rate": 5.9204742719276676e-05, "loss": 0.6773, "step": 1987 }, { "epoch": 0.4581700852731044, "grad_norm": 0.2653404474258423, "learning_rate": 5.916804935340379e-05, "loss": 0.6766, "step": 1988 }, { "epoch": 0.45840055312283934, "grad_norm": 0.24194635450839996, "learning_rate": 5.913135087748025e-05, "loss": 0.6688, "step": 1989 }, { "epoch": 0.4586310209725743, "grad_norm": 0.20647594332695007, "learning_rate": 5.90946473119609e-05, "loss": 0.675, "step": 1990 }, { "epoch": 0.45886148882230926, "grad_norm": 0.26715630292892456, "learning_rate": 5.9057938677303435e-05, "loss": 0.6729, "step": 1991 }, { "epoch": 0.4590919566720443, "grad_norm": 0.21820828318595886, "learning_rate": 5.902122499396836e-05, "loss": 0.6683, "step": 1992 }, { "epoch": 0.45932242452177924, "grad_norm": 0.2330540269613266, "learning_rate": 5.898450628241899e-05, "loss": 0.6566, "step": 1993 }, { "epoch": 0.4595528923715142, "grad_norm": 0.2669338583946228, "learning_rate": 5.894778256312149e-05, "loss": 0.6655, "step": 1994 }, { "epoch": 0.45978336022124916, "grad_norm": 0.21196076273918152, "learning_rate": 5.891105385654474e-05, "loss": 0.6765, "step": 1995 }, { "epoch": 0.4600138280709841, "grad_norm": 0.23386694490909576, "learning_rate": 5.887432018316045e-05, "loss": 0.6729, "step": 1996 }, { "epoch": 0.4602442959207191, "grad_norm": 0.21577976644039154, "learning_rate": 5.88375815634431e-05, "loss": 0.6745, "step": 1997 }, { "epoch": 0.46047476377045404, "grad_norm": 0.2088993936777115, "learning_rate": 5.8800838017869886e-05, "loss": 0.671, "step": 1998 }, { "epoch": 0.460705231620189, "grad_norm": 0.2429114431142807, "learning_rate": 5.876408956692083e-05, "loss": 0.6655, "step": 1999 }, { "epoch": 0.46093569946992397, "grad_norm": 0.19586068391799927, "learning_rate": 5.87273362310786e-05, "loss": 0.6832, "step": 2000 }, { "epoch": 0.4611661673196589, "grad_norm": 0.22119756042957306, "learning_rate": 5.86905780308286e-05, "loss": 0.674, "step": 2001 }, { "epoch": 0.4613966351693939, "grad_norm": 0.2294689267873764, "learning_rate": 5.8653814986659026e-05, "loss": 0.6654, "step": 2002 }, { "epoch": 0.46162710301912885, "grad_norm": 0.20585887134075165, "learning_rate": 5.861704711906067e-05, "loss": 0.6715, "step": 2003 }, { "epoch": 0.4618575708688638, "grad_norm": 0.22693046927452087, "learning_rate": 5.8580274448527094e-05, "loss": 0.6639, "step": 2004 }, { "epoch": 0.46208803871859877, "grad_norm": 0.23352816700935364, "learning_rate": 5.854349699555448e-05, "loss": 0.674, "step": 2005 }, { "epoch": 0.46231850656833373, "grad_norm": 0.21101726591587067, "learning_rate": 5.850671478064169e-05, "loss": 0.6763, "step": 2006 }, { "epoch": 0.4625489744180687, "grad_norm": 0.21366450190544128, "learning_rate": 5.846992782429027e-05, "loss": 0.6712, "step": 2007 }, { "epoch": 0.46277944226780365, "grad_norm": 0.20312468707561493, "learning_rate": 5.843313614700438e-05, "loss": 0.6683, "step": 2008 }, { "epoch": 0.4630099101175386, "grad_norm": 0.21990333497524261, "learning_rate": 5.8396339769290795e-05, "loss": 0.6773, "step": 2009 }, { "epoch": 0.4632403779672736, "grad_norm": 0.1999209225177765, "learning_rate": 5.8359538711658976e-05, "loss": 0.6837, "step": 2010 }, { "epoch": 0.46347084581700854, "grad_norm": 0.22440659999847412, "learning_rate": 5.832273299462092e-05, "loss": 0.6775, "step": 2011 }, { "epoch": 0.4637013136667435, "grad_norm": 0.23072776198387146, "learning_rate": 5.8285922638691246e-05, "loss": 0.6676, "step": 2012 }, { "epoch": 0.46393178151647846, "grad_norm": 0.2057875543832779, "learning_rate": 5.824910766438718e-05, "loss": 0.6673, "step": 2013 }, { "epoch": 0.4641622493662134, "grad_norm": 0.23501604795455933, "learning_rate": 5.8212288092228504e-05, "loss": 0.6752, "step": 2014 }, { "epoch": 0.4643927172159484, "grad_norm": 0.22704358398914337, "learning_rate": 5.817546394273754e-05, "loss": 0.6716, "step": 2015 }, { "epoch": 0.46462318506568334, "grad_norm": 0.20482905209064484, "learning_rate": 5.8138635236439207e-05, "loss": 0.6605, "step": 2016 }, { "epoch": 0.4648536529154183, "grad_norm": 0.22710943222045898, "learning_rate": 5.810180199386096e-05, "loss": 0.6605, "step": 2017 }, { "epoch": 0.46508412076515326, "grad_norm": 0.20111124217510223, "learning_rate": 5.8064964235532705e-05, "loss": 0.668, "step": 2018 }, { "epoch": 0.4653145886148882, "grad_norm": 0.21100562810897827, "learning_rate": 5.802812198198699e-05, "loss": 0.6779, "step": 2019 }, { "epoch": 0.4655450564646232, "grad_norm": 0.22643785178661346, "learning_rate": 5.799127525375876e-05, "loss": 0.6764, "step": 2020 }, { "epoch": 0.46577552431435815, "grad_norm": 0.20172421634197235, "learning_rate": 5.7954424071385505e-05, "loss": 0.6709, "step": 2021 }, { "epoch": 0.4660059921640931, "grad_norm": 0.20389583706855774, "learning_rate": 5.791756845540721e-05, "loss": 0.6713, "step": 2022 }, { "epoch": 0.46623646001382807, "grad_norm": 0.1967121958732605, "learning_rate": 5.788070842636629e-05, "loss": 0.6654, "step": 2023 }, { "epoch": 0.46646692786356303, "grad_norm": 0.20393262803554535, "learning_rate": 5.784384400480765e-05, "loss": 0.6693, "step": 2024 }, { "epoch": 0.466697395713298, "grad_norm": 0.22986234724521637, "learning_rate": 5.780697521127862e-05, "loss": 0.6671, "step": 2025 }, { "epoch": 0.46692786356303295, "grad_norm": 0.2337898313999176, "learning_rate": 5.7770102066329e-05, "loss": 0.6716, "step": 2026 }, { "epoch": 0.4671583314127679, "grad_norm": 0.21430113911628723, "learning_rate": 5.773322459051098e-05, "loss": 0.668, "step": 2027 }, { "epoch": 0.4673887992625029, "grad_norm": 0.2504282295703888, "learning_rate": 5.769634280437919e-05, "loss": 0.6752, "step": 2028 }, { "epoch": 0.46761926711223784, "grad_norm": 0.2045115828514099, "learning_rate": 5.765945672849066e-05, "loss": 0.6687, "step": 2029 }, { "epoch": 0.4678497349619728, "grad_norm": 0.26005640625953674, "learning_rate": 5.7622566383404774e-05, "loss": 0.6812, "step": 2030 }, { "epoch": 0.46808020281170776, "grad_norm": 0.21131455898284912, "learning_rate": 5.758567178968336e-05, "loss": 0.6679, "step": 2031 }, { "epoch": 0.4683106706614427, "grad_norm": 0.24453459680080414, "learning_rate": 5.754877296789056e-05, "loss": 0.6715, "step": 2032 }, { "epoch": 0.4685411385111777, "grad_norm": 0.25293347239494324, "learning_rate": 5.751186993859287e-05, "loss": 0.6685, "step": 2033 }, { "epoch": 0.46877160636091264, "grad_norm": 0.20162692666053772, "learning_rate": 5.74749627223592e-05, "loss": 0.6718, "step": 2034 }, { "epoch": 0.4690020742106476, "grad_norm": 0.2305351048707962, "learning_rate": 5.743805133976071e-05, "loss": 0.6754, "step": 2035 }, { "epoch": 0.46923254206038256, "grad_norm": 0.22010259330272675, "learning_rate": 5.740113581137094e-05, "loss": 0.6668, "step": 2036 }, { "epoch": 0.4694630099101175, "grad_norm": 0.23104584217071533, "learning_rate": 5.736421615776573e-05, "loss": 0.6791, "step": 2037 }, { "epoch": 0.4696934777598525, "grad_norm": 0.2572833299636841, "learning_rate": 5.732729239952316e-05, "loss": 0.6708, "step": 2038 }, { "epoch": 0.46992394560958745, "grad_norm": 0.230802983045578, "learning_rate": 5.7290364557223685e-05, "loss": 0.6669, "step": 2039 }, { "epoch": 0.4701544134593224, "grad_norm": 0.20792588591575623, "learning_rate": 5.725343265144999e-05, "loss": 0.6723, "step": 2040 }, { "epoch": 0.47038488130905737, "grad_norm": 0.22476236522197723, "learning_rate": 5.721649670278704e-05, "loss": 0.6714, "step": 2041 }, { "epoch": 0.47061534915879233, "grad_norm": 0.1980828195810318, "learning_rate": 5.717955673182202e-05, "loss": 0.6707, "step": 2042 }, { "epoch": 0.4708458170085273, "grad_norm": 0.2145676165819168, "learning_rate": 5.714261275914442e-05, "loss": 0.6716, "step": 2043 }, { "epoch": 0.47107628485826225, "grad_norm": 0.19914093613624573, "learning_rate": 5.71056648053459e-05, "loss": 0.6762, "step": 2044 }, { "epoch": 0.4713067527079972, "grad_norm": 0.21874842047691345, "learning_rate": 5.706871289102036e-05, "loss": 0.667, "step": 2045 }, { "epoch": 0.4715372205577322, "grad_norm": 0.20119917392730713, "learning_rate": 5.7031757036763934e-05, "loss": 0.6658, "step": 2046 }, { "epoch": 0.47176768840746713, "grad_norm": 0.21794843673706055, "learning_rate": 5.69947972631749e-05, "loss": 0.6702, "step": 2047 }, { "epoch": 0.4719981562572021, "grad_norm": 0.20612038671970367, "learning_rate": 5.695783359085377e-05, "loss": 0.663, "step": 2048 }, { "epoch": 0.47222862410693706, "grad_norm": 0.20984764397144318, "learning_rate": 5.69208660404032e-05, "loss": 0.6743, "step": 2049 }, { "epoch": 0.472459091956672, "grad_norm": 0.22582849860191345, "learning_rate": 5.6883894632428005e-05, "loss": 0.6827, "step": 2050 }, { "epoch": 0.47268955980640703, "grad_norm": 0.1955862045288086, "learning_rate": 5.684691938753517e-05, "loss": 0.6679, "step": 2051 }, { "epoch": 0.472920027656142, "grad_norm": 0.22272345423698425, "learning_rate": 5.680994032633381e-05, "loss": 0.6657, "step": 2052 }, { "epoch": 0.47315049550587696, "grad_norm": 0.208807110786438, "learning_rate": 5.6772957469435176e-05, "loss": 0.6673, "step": 2053 }, { "epoch": 0.4733809633556119, "grad_norm": 0.21152140200138092, "learning_rate": 5.67359708374526e-05, "loss": 0.6739, "step": 2054 }, { "epoch": 0.4736114312053469, "grad_norm": 0.22693230211734772, "learning_rate": 5.669898045100156e-05, "loss": 0.6648, "step": 2055 }, { "epoch": 0.47384189905508184, "grad_norm": 0.22745169699192047, "learning_rate": 5.6661986330699615e-05, "loss": 0.6723, "step": 2056 }, { "epoch": 0.4740723669048168, "grad_norm": 0.217105895280838, "learning_rate": 5.662498849716636e-05, "loss": 0.6751, "step": 2057 }, { "epoch": 0.47430283475455176, "grad_norm": 0.2365586906671524, "learning_rate": 5.6587986971023564e-05, "loss": 0.6833, "step": 2058 }, { "epoch": 0.4745333026042867, "grad_norm": 0.22789350152015686, "learning_rate": 5.655098177289496e-05, "loss": 0.673, "step": 2059 }, { "epoch": 0.4747637704540217, "grad_norm": 0.20388175547122955, "learning_rate": 5.651397292340632e-05, "loss": 0.6667, "step": 2060 }, { "epoch": 0.47499423830375664, "grad_norm": 0.22663913667201996, "learning_rate": 5.6476960443185546e-05, "loss": 0.676, "step": 2061 }, { "epoch": 0.4752247061534916, "grad_norm": 0.19801859557628632, "learning_rate": 5.6439944352862476e-05, "loss": 0.6707, "step": 2062 }, { "epoch": 0.47545517400322657, "grad_norm": 0.21870480477809906, "learning_rate": 5.640292467306899e-05, "loss": 0.6705, "step": 2063 }, { "epoch": 0.4756856418529615, "grad_norm": 0.21825294196605682, "learning_rate": 5.6365901424438985e-05, "loss": 0.6729, "step": 2064 }, { "epoch": 0.4759161097026965, "grad_norm": 0.19710072875022888, "learning_rate": 5.632887462760831e-05, "loss": 0.6783, "step": 2065 }, { "epoch": 0.47614657755243145, "grad_norm": 0.21672062575817108, "learning_rate": 5.6291844303214826e-05, "loss": 0.6733, "step": 2066 }, { "epoch": 0.4763770454021664, "grad_norm": 0.1974102258682251, "learning_rate": 5.625481047189835e-05, "loss": 0.6797, "step": 2067 }, { "epoch": 0.47660751325190137, "grad_norm": 0.220162034034729, "learning_rate": 5.6217773154300646e-05, "loss": 0.6668, "step": 2068 }, { "epoch": 0.47683798110163633, "grad_norm": 0.20030367374420166, "learning_rate": 5.618073237106541e-05, "loss": 0.6704, "step": 2069 }, { "epoch": 0.4770684489513713, "grad_norm": 0.1981177181005478, "learning_rate": 5.614368814283831e-05, "loss": 0.6609, "step": 2070 }, { "epoch": 0.47729891680110625, "grad_norm": 0.19721218943595886, "learning_rate": 5.6106640490266904e-05, "loss": 0.665, "step": 2071 }, { "epoch": 0.4775293846508412, "grad_norm": 0.20160210132598877, "learning_rate": 5.606958943400066e-05, "loss": 0.6687, "step": 2072 }, { "epoch": 0.4777598525005762, "grad_norm": 0.20998936891555786, "learning_rate": 5.6032534994690945e-05, "loss": 0.6719, "step": 2073 }, { "epoch": 0.47799032035031114, "grad_norm": 0.19076962769031525, "learning_rate": 5.599547719299102e-05, "loss": 0.6719, "step": 2074 }, { "epoch": 0.4782207882000461, "grad_norm": 0.2014273852109909, "learning_rate": 5.595841604955601e-05, "loss": 0.6757, "step": 2075 }, { "epoch": 0.47845125604978106, "grad_norm": 0.20173531770706177, "learning_rate": 5.5921351585042915e-05, "loss": 0.6707, "step": 2076 }, { "epoch": 0.478681723899516, "grad_norm": 0.2177407145500183, "learning_rate": 5.588428382011055e-05, "loss": 0.6611, "step": 2077 }, { "epoch": 0.478912191749251, "grad_norm": 0.22235806286334991, "learning_rate": 5.584721277541964e-05, "loss": 0.6692, "step": 2078 }, { "epoch": 0.47914265959898594, "grad_norm": 0.19388116896152496, "learning_rate": 5.581013847163267e-05, "loss": 0.6679, "step": 2079 }, { "epoch": 0.4793731274487209, "grad_norm": 0.23979228734970093, "learning_rate": 5.577306092941397e-05, "loss": 0.6729, "step": 2080 }, { "epoch": 0.47960359529845586, "grad_norm": 0.22609175741672516, "learning_rate": 5.573598016942968e-05, "loss": 0.6638, "step": 2081 }, { "epoch": 0.4798340631481908, "grad_norm": 0.20589977502822876, "learning_rate": 5.569889621234771e-05, "loss": 0.667, "step": 2082 }, { "epoch": 0.4800645309979258, "grad_norm": 0.23106610774993896, "learning_rate": 5.566180907883777e-05, "loss": 0.6704, "step": 2083 }, { "epoch": 0.48029499884766075, "grad_norm": 0.19989155232906342, "learning_rate": 5.562471878957135e-05, "loss": 0.674, "step": 2084 }, { "epoch": 0.4805254666973957, "grad_norm": 0.22377397119998932, "learning_rate": 5.55876253652217e-05, "loss": 0.6624, "step": 2085 }, { "epoch": 0.48075593454713067, "grad_norm": 0.21617180109024048, "learning_rate": 5.5550528826463754e-05, "loss": 0.6764, "step": 2086 }, { "epoch": 0.48098640239686563, "grad_norm": 0.20691335201263428, "learning_rate": 5.551342919397429e-05, "loss": 0.6638, "step": 2087 }, { "epoch": 0.4812168702466006, "grad_norm": 0.20599327981472015, "learning_rate": 5.547632648843172e-05, "loss": 0.665, "step": 2088 }, { "epoch": 0.48144733809633555, "grad_norm": 0.21349526941776276, "learning_rate": 5.54392207305162e-05, "loss": 0.6734, "step": 2089 }, { "epoch": 0.4816778059460705, "grad_norm": 0.20899753272533417, "learning_rate": 5.5402111940909595e-05, "loss": 0.6702, "step": 2090 }, { "epoch": 0.4819082737958055, "grad_norm": 0.22185955941677094, "learning_rate": 5.536500014029547e-05, "loss": 0.6638, "step": 2091 }, { "epoch": 0.48213874164554044, "grad_norm": 0.19926568865776062, "learning_rate": 5.532788534935902e-05, "loss": 0.6649, "step": 2092 }, { "epoch": 0.4823692094952754, "grad_norm": 0.22930283844470978, "learning_rate": 5.529076758878718e-05, "loss": 0.6691, "step": 2093 }, { "epoch": 0.48259967734501036, "grad_norm": 0.2192930430173874, "learning_rate": 5.525364687926846e-05, "loss": 0.6681, "step": 2094 }, { "epoch": 0.4828301451947453, "grad_norm": 0.23378318548202515, "learning_rate": 5.521652324149307e-05, "loss": 0.6736, "step": 2095 }, { "epoch": 0.4830606130444803, "grad_norm": 0.20761679112911224, "learning_rate": 5.517939669615284e-05, "loss": 0.6678, "step": 2096 }, { "epoch": 0.48329108089421524, "grad_norm": 0.22278013825416565, "learning_rate": 5.5142267263941204e-05, "loss": 0.6649, "step": 2097 }, { "epoch": 0.4835215487439502, "grad_norm": 0.2117822915315628, "learning_rate": 5.510513496555322e-05, "loss": 0.6664, "step": 2098 }, { "epoch": 0.48375201659368516, "grad_norm": 0.20482023060321808, "learning_rate": 5.506799982168553e-05, "loss": 0.6709, "step": 2099 }, { "epoch": 0.4839824844434201, "grad_norm": 0.2443208247423172, "learning_rate": 5.50308618530364e-05, "loss": 0.6694, "step": 2100 }, { "epoch": 0.4842129522931551, "grad_norm": 0.1926216185092926, "learning_rate": 5.4993721080305614e-05, "loss": 0.6608, "step": 2101 }, { "epoch": 0.48444342014289005, "grad_norm": 0.22089843451976776, "learning_rate": 5.495657752419455e-05, "loss": 0.6718, "step": 2102 }, { "epoch": 0.484673887992625, "grad_norm": 0.22440844774246216, "learning_rate": 5.491943120540616e-05, "loss": 0.6648, "step": 2103 }, { "epoch": 0.48490435584235997, "grad_norm": 0.19866810739040375, "learning_rate": 5.488228214464487e-05, "loss": 0.6596, "step": 2104 }, { "epoch": 0.48513482369209493, "grad_norm": 0.22278371453285217, "learning_rate": 5.484513036261671e-05, "loss": 0.6685, "step": 2105 }, { "epoch": 0.4853652915418299, "grad_norm": 0.21866042912006378, "learning_rate": 5.480797588002918e-05, "loss": 0.6683, "step": 2106 }, { "epoch": 0.48559575939156485, "grad_norm": 0.20206943154335022, "learning_rate": 5.47708187175913e-05, "loss": 0.6705, "step": 2107 }, { "epoch": 0.4858262272412998, "grad_norm": 0.2211609184741974, "learning_rate": 5.4733658896013575e-05, "loss": 0.6669, "step": 2108 }, { "epoch": 0.4860566950910348, "grad_norm": 0.20941609144210815, "learning_rate": 5.4696496436008e-05, "loss": 0.6641, "step": 2109 }, { "epoch": 0.48628716294076973, "grad_norm": 0.21404802799224854, "learning_rate": 5.465933135828802e-05, "loss": 0.6754, "step": 2110 }, { "epoch": 0.48651763079050475, "grad_norm": 0.2135351300239563, "learning_rate": 5.4622163683568584e-05, "loss": 0.6755, "step": 2111 }, { "epoch": 0.4867480986402397, "grad_norm": 0.22787488996982574, "learning_rate": 5.4584993432566066e-05, "loss": 0.6608, "step": 2112 }, { "epoch": 0.4869785664899747, "grad_norm": 0.22770029306411743, "learning_rate": 5.4547820625998244e-05, "loss": 0.6669, "step": 2113 }, { "epoch": 0.48720903433970963, "grad_norm": 0.2112642526626587, "learning_rate": 5.4510645284584364e-05, "loss": 0.6636, "step": 2114 }, { "epoch": 0.4874395021894446, "grad_norm": 0.22324852645397186, "learning_rate": 5.447346742904508e-05, "loss": 0.679, "step": 2115 }, { "epoch": 0.48766997003917956, "grad_norm": 0.2138347178697586, "learning_rate": 5.443628708010239e-05, "loss": 0.6636, "step": 2116 }, { "epoch": 0.4879004378889145, "grad_norm": 0.22339309751987457, "learning_rate": 5.439910425847979e-05, "loss": 0.6648, "step": 2117 }, { "epoch": 0.4881309057386495, "grad_norm": 0.22265255451202393, "learning_rate": 5.436191898490207e-05, "loss": 0.6686, "step": 2118 }, { "epoch": 0.48836137358838444, "grad_norm": 0.22078566253185272, "learning_rate": 5.4324731280095374e-05, "loss": 0.6704, "step": 2119 }, { "epoch": 0.4885918414381194, "grad_norm": 0.19747048616409302, "learning_rate": 5.428754116478729e-05, "loss": 0.6723, "step": 2120 }, { "epoch": 0.48882230928785436, "grad_norm": 0.22434230148792267, "learning_rate": 5.425034865970666e-05, "loss": 0.6769, "step": 2121 }, { "epoch": 0.4890527771375893, "grad_norm": 0.2001955658197403, "learning_rate": 5.4213153785583705e-05, "loss": 0.6748, "step": 2122 }, { "epoch": 0.4892832449873243, "grad_norm": 0.2093842476606369, "learning_rate": 5.417595656314997e-05, "loss": 0.666, "step": 2123 }, { "epoch": 0.48951371283705924, "grad_norm": 0.18802720308303833, "learning_rate": 5.413875701313825e-05, "loss": 0.6667, "step": 2124 }, { "epoch": 0.4897441806867942, "grad_norm": 0.2099105566740036, "learning_rate": 5.410155515628272e-05, "loss": 0.6793, "step": 2125 }, { "epoch": 0.48997464853652917, "grad_norm": 0.1877232789993286, "learning_rate": 5.406435101331879e-05, "loss": 0.6583, "step": 2126 }, { "epoch": 0.49020511638626413, "grad_norm": 0.21019746363162994, "learning_rate": 5.402714460498318e-05, "loss": 0.6643, "step": 2127 }, { "epoch": 0.4904355842359991, "grad_norm": 0.19312387704849243, "learning_rate": 5.39899359520138e-05, "loss": 0.666, "step": 2128 }, { "epoch": 0.49066605208573405, "grad_norm": 0.19418299198150635, "learning_rate": 5.39527250751499e-05, "loss": 0.6663, "step": 2129 }, { "epoch": 0.490896519935469, "grad_norm": 0.19869892299175262, "learning_rate": 5.391551199513192e-05, "loss": 0.67, "step": 2130 }, { "epoch": 0.491126987785204, "grad_norm": 0.19379819929599762, "learning_rate": 5.3878296732701515e-05, "loss": 0.6661, "step": 2131 }, { "epoch": 0.49135745563493893, "grad_norm": 0.20687328279018402, "learning_rate": 5.384107930860162e-05, "loss": 0.6815, "step": 2132 }, { "epoch": 0.4915879234846739, "grad_norm": 0.20180322229862213, "learning_rate": 5.38038597435763e-05, "loss": 0.6631, "step": 2133 }, { "epoch": 0.49181839133440886, "grad_norm": 0.19170688092708588, "learning_rate": 5.3766638058370855e-05, "loss": 0.6794, "step": 2134 }, { "epoch": 0.4920488591841438, "grad_norm": 0.1976936012506485, "learning_rate": 5.372941427373178e-05, "loss": 0.6709, "step": 2135 }, { "epoch": 0.4922793270338788, "grad_norm": 0.1848510205745697, "learning_rate": 5.3692188410406695e-05, "loss": 0.6664, "step": 2136 }, { "epoch": 0.49250979488361374, "grad_norm": 0.19432882964611053, "learning_rate": 5.36549604891444e-05, "loss": 0.6613, "step": 2137 }, { "epoch": 0.4927402627333487, "grad_norm": 0.19802847504615784, "learning_rate": 5.361773053069487e-05, "loss": 0.663, "step": 2138 }, { "epoch": 0.49297073058308366, "grad_norm": 0.19331657886505127, "learning_rate": 5.3580498555809163e-05, "loss": 0.6743, "step": 2139 }, { "epoch": 0.4932011984328186, "grad_norm": 0.20699891448020935, "learning_rate": 5.354326458523952e-05, "loss": 0.6685, "step": 2140 }, { "epoch": 0.4934316662825536, "grad_norm": 0.17387264966964722, "learning_rate": 5.350602863973923e-05, "loss": 0.6707, "step": 2141 }, { "epoch": 0.49366213413228854, "grad_norm": 0.205754354596138, "learning_rate": 5.346879074006271e-05, "loss": 0.6634, "step": 2142 }, { "epoch": 0.4938926019820235, "grad_norm": 0.19446855783462524, "learning_rate": 5.343155090696551e-05, "loss": 0.6784, "step": 2143 }, { "epoch": 0.49412306983175847, "grad_norm": 0.20317377150058746, "learning_rate": 5.33943091612042e-05, "loss": 0.6599, "step": 2144 }, { "epoch": 0.4943535376814934, "grad_norm": 0.1977040022611618, "learning_rate": 5.335706552353643e-05, "loss": 0.6631, "step": 2145 }, { "epoch": 0.4945840055312284, "grad_norm": 0.19229522347450256, "learning_rate": 5.331982001472091e-05, "loss": 0.6602, "step": 2146 }, { "epoch": 0.49481447338096335, "grad_norm": 0.20331966876983643, "learning_rate": 5.3282572655517416e-05, "loss": 0.6738, "step": 2147 }, { "epoch": 0.4950449412306983, "grad_norm": 0.20568066835403442, "learning_rate": 5.324532346668668e-05, "loss": 0.6712, "step": 2148 }, { "epoch": 0.49527540908043327, "grad_norm": 0.20340462028980255, "learning_rate": 5.3208072468990555e-05, "loss": 0.6651, "step": 2149 }, { "epoch": 0.49550587693016823, "grad_norm": 0.2173525094985962, "learning_rate": 5.317081968319185e-05, "loss": 0.6651, "step": 2150 }, { "epoch": 0.4957363447799032, "grad_norm": 0.19157260656356812, "learning_rate": 5.313356513005433e-05, "loss": 0.6687, "step": 2151 }, { "epoch": 0.49596681262963815, "grad_norm": 0.20568935573101044, "learning_rate": 5.3096308830342844e-05, "loss": 0.6671, "step": 2152 }, { "epoch": 0.4961972804793731, "grad_norm": 0.19956238567829132, "learning_rate": 5.305905080482312e-05, "loss": 0.6649, "step": 2153 }, { "epoch": 0.4964277483291081, "grad_norm": 0.19893619418144226, "learning_rate": 5.302179107426191e-05, "loss": 0.6692, "step": 2154 }, { "epoch": 0.49665821617884304, "grad_norm": 0.20772667229175568, "learning_rate": 5.298452965942687e-05, "loss": 0.6686, "step": 2155 }, { "epoch": 0.496888684028578, "grad_norm": 0.1938534379005432, "learning_rate": 5.294726658108665e-05, "loss": 0.6659, "step": 2156 }, { "epoch": 0.49711915187831296, "grad_norm": 0.19728915393352509, "learning_rate": 5.291000186001076e-05, "loss": 0.6745, "step": 2157 }, { "epoch": 0.4973496197280479, "grad_norm": 0.19719786942005157, "learning_rate": 5.2872735516969695e-05, "loss": 0.6713, "step": 2158 }, { "epoch": 0.4975800875777829, "grad_norm": 0.18405082821846008, "learning_rate": 5.28354675727348e-05, "loss": 0.6542, "step": 2159 }, { "epoch": 0.49781055542751784, "grad_norm": 0.2271628975868225, "learning_rate": 5.279819804807834e-05, "loss": 0.6674, "step": 2160 }, { "epoch": 0.4980410232772528, "grad_norm": 0.18239864706993103, "learning_rate": 5.2760926963773436e-05, "loss": 0.6755, "step": 2161 }, { "epoch": 0.49827149112698776, "grad_norm": 0.19707393646240234, "learning_rate": 5.272365434059413e-05, "loss": 0.6676, "step": 2162 }, { "epoch": 0.4985019589767227, "grad_norm": 0.21636438369750977, "learning_rate": 5.2686380199315244e-05, "loss": 0.6743, "step": 2163 }, { "epoch": 0.4987324268264577, "grad_norm": 0.2144385576248169, "learning_rate": 5.2649104560712536e-05, "loss": 0.6648, "step": 2164 }, { "epoch": 0.49896289467619265, "grad_norm": 0.21302272379398346, "learning_rate": 5.261182744556252e-05, "loss": 0.6673, "step": 2165 }, { "epoch": 0.4991933625259276, "grad_norm": 0.22367598116397858, "learning_rate": 5.257454887464258e-05, "loss": 0.6754, "step": 2166 }, { "epoch": 0.49942383037566257, "grad_norm": 0.1969953179359436, "learning_rate": 5.253726886873089e-05, "loss": 0.6759, "step": 2167 }, { "epoch": 0.49965429822539753, "grad_norm": 0.22727370262145996, "learning_rate": 5.2499987448606436e-05, "loss": 0.6622, "step": 2168 }, { "epoch": 0.4998847660751325, "grad_norm": 0.19945083558559418, "learning_rate": 5.246270463504898e-05, "loss": 0.6689, "step": 2169 }, { "epoch": 0.5001152339248675, "grad_norm": 0.19829121232032776, "learning_rate": 5.2425420448839055e-05, "loss": 0.6707, "step": 2170 }, { "epoch": 0.5003457017746025, "grad_norm": 0.19157467782497406, "learning_rate": 5.2388134910758015e-05, "loss": 0.6674, "step": 2171 }, { "epoch": 0.5005761696243374, "grad_norm": 0.19981160759925842, "learning_rate": 5.235084804158787e-05, "loss": 0.6703, "step": 2172 }, { "epoch": 0.5008066374740724, "grad_norm": 0.2080090194940567, "learning_rate": 5.231355986211146e-05, "loss": 0.6659, "step": 2173 }, { "epoch": 0.5010371053238073, "grad_norm": 0.19367657601833344, "learning_rate": 5.2276270393112325e-05, "loss": 0.6576, "step": 2174 }, { "epoch": 0.5012675731735423, "grad_norm": 0.21728312969207764, "learning_rate": 5.223897965537469e-05, "loss": 0.6723, "step": 2175 }, { "epoch": 0.5014980410232772, "grad_norm": 0.19311760365962982, "learning_rate": 5.220168766968355e-05, "loss": 0.6622, "step": 2176 }, { "epoch": 0.5017285088730122, "grad_norm": 0.20615801215171814, "learning_rate": 5.216439445682455e-05, "loss": 0.6652, "step": 2177 }, { "epoch": 0.5019589767227471, "grad_norm": 0.1940842866897583, "learning_rate": 5.212710003758401e-05, "loss": 0.667, "step": 2178 }, { "epoch": 0.5021894445724822, "grad_norm": 0.19929523766040802, "learning_rate": 5.208980443274899e-05, "loss": 0.6649, "step": 2179 }, { "epoch": 0.5024199124222171, "grad_norm": 0.181834876537323, "learning_rate": 5.205250766310712e-05, "loss": 0.6654, "step": 2180 }, { "epoch": 0.5026503802719521, "grad_norm": 0.20930717885494232, "learning_rate": 5.201520974944675e-05, "loss": 0.6627, "step": 2181 }, { "epoch": 0.502880848121687, "grad_norm": 0.18552522361278534, "learning_rate": 5.1977910712556834e-05, "loss": 0.6738, "step": 2182 }, { "epoch": 0.503111315971422, "grad_norm": 0.21850310266017914, "learning_rate": 5.1940610573226955e-05, "loss": 0.6776, "step": 2183 }, { "epoch": 0.5033417838211569, "grad_norm": 0.18333658576011658, "learning_rate": 5.190330935224732e-05, "loss": 0.6657, "step": 2184 }, { "epoch": 0.5035722516708919, "grad_norm": 0.21493695676326752, "learning_rate": 5.186600707040874e-05, "loss": 0.6724, "step": 2185 }, { "epoch": 0.5038027195206268, "grad_norm": 0.19420945644378662, "learning_rate": 5.1828703748502614e-05, "loss": 0.6571, "step": 2186 }, { "epoch": 0.5040331873703618, "grad_norm": 0.19441378116607666, "learning_rate": 5.179139940732091e-05, "loss": 0.6548, "step": 2187 }, { "epoch": 0.5042636552200968, "grad_norm": 0.201436385512352, "learning_rate": 5.1754094067656164e-05, "loss": 0.6734, "step": 2188 }, { "epoch": 0.5044941230698318, "grad_norm": 0.222244992852211, "learning_rate": 5.17167877503015e-05, "loss": 0.67, "step": 2189 }, { "epoch": 0.5047245909195667, "grad_norm": 0.20612792670726776, "learning_rate": 5.1679480476050525e-05, "loss": 0.6591, "step": 2190 }, { "epoch": 0.5049550587693017, "grad_norm": 0.20908065140247345, "learning_rate": 5.164217226569747e-05, "loss": 0.6708, "step": 2191 }, { "epoch": 0.5051855266190366, "grad_norm": 0.1922689974308014, "learning_rate": 5.1604863140037e-05, "loss": 0.6586, "step": 2192 }, { "epoch": 0.5054159944687716, "grad_norm": 0.22337596118450165, "learning_rate": 5.156755311986433e-05, "loss": 0.6675, "step": 2193 }, { "epoch": 0.5056464623185065, "grad_norm": 0.20729419589042664, "learning_rate": 5.153024222597519e-05, "loss": 0.6646, "step": 2194 }, { "epoch": 0.5058769301682415, "grad_norm": 0.20641185343265533, "learning_rate": 5.149293047916576e-05, "loss": 0.6679, "step": 2195 }, { "epoch": 0.5061073980179764, "grad_norm": 0.20790040493011475, "learning_rate": 5.14556179002327e-05, "loss": 0.6635, "step": 2196 }, { "epoch": 0.5063378658677115, "grad_norm": 0.19893454015254974, "learning_rate": 5.141830450997316e-05, "loss": 0.6687, "step": 2197 }, { "epoch": 0.5065683337174464, "grad_norm": 0.2278234213590622, "learning_rate": 5.138099032918475e-05, "loss": 0.6625, "step": 2198 }, { "epoch": 0.5067988015671814, "grad_norm": 0.20414131879806519, "learning_rate": 5.1343675378665455e-05, "loss": 0.6677, "step": 2199 }, { "epoch": 0.5070292694169164, "grad_norm": 0.20839910209178925, "learning_rate": 5.130635967921377e-05, "loss": 0.6679, "step": 2200 }, { "epoch": 0.5072597372666513, "grad_norm": 0.1961502730846405, "learning_rate": 5.1269043251628556e-05, "loss": 0.6703, "step": 2201 }, { "epoch": 0.5074902051163863, "grad_norm": 0.19088955223560333, "learning_rate": 5.123172611670907e-05, "loss": 0.6757, "step": 2202 }, { "epoch": 0.5077206729661212, "grad_norm": 0.20697516202926636, "learning_rate": 5.119440829525504e-05, "loss": 0.6694, "step": 2203 }, { "epoch": 0.5079511408158562, "grad_norm": 0.20802126824855804, "learning_rate": 5.115708980806647e-05, "loss": 0.6742, "step": 2204 }, { "epoch": 0.5081816086655911, "grad_norm": 0.19614768028259277, "learning_rate": 5.111977067594382e-05, "loss": 0.6664, "step": 2205 }, { "epoch": 0.5084120765153262, "grad_norm": 0.1986846625804901, "learning_rate": 5.1082450919687884e-05, "loss": 0.6523, "step": 2206 }, { "epoch": 0.5086425443650611, "grad_norm": 0.20315955579280853, "learning_rate": 5.1045130560099776e-05, "loss": 0.6673, "step": 2207 }, { "epoch": 0.5088730122147961, "grad_norm": 0.20175747573375702, "learning_rate": 5.100780961798098e-05, "loss": 0.6709, "step": 2208 }, { "epoch": 0.509103480064531, "grad_norm": 0.19836615025997162, "learning_rate": 5.097048811413331e-05, "loss": 0.6712, "step": 2209 }, { "epoch": 0.509333947914266, "grad_norm": 0.2123645395040512, "learning_rate": 5.093316606935883e-05, "loss": 0.662, "step": 2210 }, { "epoch": 0.5095644157640009, "grad_norm": 0.19712963700294495, "learning_rate": 5.0895843504460005e-05, "loss": 0.6771, "step": 2211 }, { "epoch": 0.5097948836137359, "grad_norm": 0.19324639439582825, "learning_rate": 5.08585204402395e-05, "loss": 0.6606, "step": 2212 }, { "epoch": 0.5100253514634708, "grad_norm": 0.19227439165115356, "learning_rate": 5.08211968975003e-05, "loss": 0.6669, "step": 2213 }, { "epoch": 0.5102558193132058, "grad_norm": 0.21038465201854706, "learning_rate": 5.078387289704568e-05, "loss": 0.6682, "step": 2214 }, { "epoch": 0.5104862871629408, "grad_norm": 0.1920362412929535, "learning_rate": 5.074654845967911e-05, "loss": 0.6558, "step": 2215 }, { "epoch": 0.5107167550126758, "grad_norm": 0.20892934501171112, "learning_rate": 5.0709223606204345e-05, "loss": 0.6653, "step": 2216 }, { "epoch": 0.5109472228624107, "grad_norm": 0.1770077347755432, "learning_rate": 5.0671898357425366e-05, "loss": 0.6642, "step": 2217 }, { "epoch": 0.5111776907121457, "grad_norm": 0.1956147402524948, "learning_rate": 5.063457273414638e-05, "loss": 0.6676, "step": 2218 }, { "epoch": 0.5114081585618806, "grad_norm": 0.1727982610464096, "learning_rate": 5.059724675717177e-05, "loss": 0.6607, "step": 2219 }, { "epoch": 0.5116386264116156, "grad_norm": 0.18610809743404388, "learning_rate": 5.055992044730615e-05, "loss": 0.6625, "step": 2220 }, { "epoch": 0.5118690942613505, "grad_norm": 0.1783786416053772, "learning_rate": 5.0522593825354336e-05, "loss": 0.6589, "step": 2221 }, { "epoch": 0.5120995621110855, "grad_norm": 0.18354859948158264, "learning_rate": 5.048526691212123e-05, "loss": 0.6544, "step": 2222 }, { "epoch": 0.5123300299608204, "grad_norm": 0.1823883056640625, "learning_rate": 5.044793972841203e-05, "loss": 0.6713, "step": 2223 }, { "epoch": 0.5125604978105555, "grad_norm": 0.19022877514362335, "learning_rate": 5.041061229503196e-05, "loss": 0.6669, "step": 2224 }, { "epoch": 0.5127909656602904, "grad_norm": 0.19528958201408386, "learning_rate": 5.037328463278646e-05, "loss": 0.6589, "step": 2225 }, { "epoch": 0.5130214335100254, "grad_norm": 0.1989632546901703, "learning_rate": 5.033595676248106e-05, "loss": 0.6609, "step": 2226 }, { "epoch": 0.5132519013597603, "grad_norm": 0.17961591482162476, "learning_rate": 5.029862870492142e-05, "loss": 0.6652, "step": 2227 }, { "epoch": 0.5134823692094953, "grad_norm": 0.18870680034160614, "learning_rate": 5.026130048091331e-05, "loss": 0.6658, "step": 2228 }, { "epoch": 0.5137128370592302, "grad_norm": 0.2052677571773529, "learning_rate": 5.0223972111262584e-05, "loss": 0.6663, "step": 2229 }, { "epoch": 0.5139433049089652, "grad_norm": 0.23451951146125793, "learning_rate": 5.018664361677519e-05, "loss": 0.6642, "step": 2230 }, { "epoch": 0.5141737727587001, "grad_norm": 0.18537165224552155, "learning_rate": 5.01493150182571e-05, "loss": 0.6698, "step": 2231 }, { "epoch": 0.5144042406084351, "grad_norm": 0.21171410381793976, "learning_rate": 5.011198633651442e-05, "loss": 0.6687, "step": 2232 }, { "epoch": 0.51463470845817, "grad_norm": 0.22823838889598846, "learning_rate": 5.0074657592353246e-05, "loss": 0.6657, "step": 2233 }, { "epoch": 0.5148651763079051, "grad_norm": 0.1847493201494217, "learning_rate": 5.003732880657971e-05, "loss": 0.6638, "step": 2234 }, { "epoch": 0.51509564415764, "grad_norm": 0.2114555537700653, "learning_rate": 5e-05, "loss": 0.668, "step": 2235 }, { "epoch": 0.515326112007375, "grad_norm": 0.17518627643585205, "learning_rate": 4.996267119342029e-05, "loss": 0.6551, "step": 2236 }, { "epoch": 0.5155565798571099, "grad_norm": 0.23371440172195435, "learning_rate": 4.992534240764677e-05, "loss": 0.6697, "step": 2237 }, { "epoch": 0.5157870477068449, "grad_norm": 0.1804964542388916, "learning_rate": 4.9888013663485586e-05, "loss": 0.6622, "step": 2238 }, { "epoch": 0.5160175155565798, "grad_norm": 0.22412839531898499, "learning_rate": 4.98506849817429e-05, "loss": 0.6576, "step": 2239 }, { "epoch": 0.5162479834063148, "grad_norm": 0.2174907922744751, "learning_rate": 4.981335638322484e-05, "loss": 0.6597, "step": 2240 }, { "epoch": 0.5164784512560497, "grad_norm": 0.20137697458267212, "learning_rate": 4.9776027888737434e-05, "loss": 0.6747, "step": 2241 }, { "epoch": 0.5167089191057848, "grad_norm": 0.2253003716468811, "learning_rate": 4.973869951908669e-05, "loss": 0.6686, "step": 2242 }, { "epoch": 0.5169393869555197, "grad_norm": 0.2619769871234894, "learning_rate": 4.9701371295078603e-05, "loss": 0.6616, "step": 2243 }, { "epoch": 0.5171698548052547, "grad_norm": 0.26056036353111267, "learning_rate": 4.966404323751896e-05, "loss": 0.6653, "step": 2244 }, { "epoch": 0.5174003226549896, "grad_norm": 0.21332813799381256, "learning_rate": 4.962671536721355e-05, "loss": 0.6609, "step": 2245 }, { "epoch": 0.5176307905047246, "grad_norm": 0.25862419605255127, "learning_rate": 4.9589387704968054e-05, "loss": 0.6685, "step": 2246 }, { "epoch": 0.5178612583544595, "grad_norm": 0.24932904541492462, "learning_rate": 4.955206027158798e-05, "loss": 0.6642, "step": 2247 }, { "epoch": 0.5180917262041945, "grad_norm": 0.21036414802074432, "learning_rate": 4.951473308787876e-05, "loss": 0.6636, "step": 2248 }, { "epoch": 0.5183221940539294, "grad_norm": 0.27057451009750366, "learning_rate": 4.947740617464568e-05, "loss": 0.6603, "step": 2249 }, { "epoch": 0.5185526619036644, "grad_norm": 0.2021026611328125, "learning_rate": 4.9440079552693854e-05, "loss": 0.6779, "step": 2250 }, { "epoch": 0.5187831297533994, "grad_norm": 0.24280990660190582, "learning_rate": 4.940275324282824e-05, "loss": 0.6528, "step": 2251 }, { "epoch": 0.5190135976031344, "grad_norm": 0.21159270405769348, "learning_rate": 4.9365427265853644e-05, "loss": 0.6706, "step": 2252 }, { "epoch": 0.5192440654528693, "grad_norm": 0.2115175724029541, "learning_rate": 4.9328101642574646e-05, "loss": 0.6645, "step": 2253 }, { "epoch": 0.5194745333026043, "grad_norm": 0.21375073492527008, "learning_rate": 4.929077639379566e-05, "loss": 0.6633, "step": 2254 }, { "epoch": 0.5197050011523392, "grad_norm": 0.2012326568365097, "learning_rate": 4.925345154032092e-05, "loss": 0.658, "step": 2255 }, { "epoch": 0.5199354690020742, "grad_norm": 0.2035793513059616, "learning_rate": 4.921612710295433e-05, "loss": 0.6658, "step": 2256 }, { "epoch": 0.5201659368518091, "grad_norm": 0.2022104114294052, "learning_rate": 4.91788031024997e-05, "loss": 0.664, "step": 2257 }, { "epoch": 0.5203964047015441, "grad_norm": 0.2398633062839508, "learning_rate": 4.9141479559760517e-05, "loss": 0.6608, "step": 2258 }, { "epoch": 0.5206268725512792, "grad_norm": 0.1719241738319397, "learning_rate": 4.910415649554001e-05, "loss": 0.663, "step": 2259 }, { "epoch": 0.5208573404010141, "grad_norm": 0.22672203183174133, "learning_rate": 4.906683393064117e-05, "loss": 0.6632, "step": 2260 }, { "epoch": 0.5210878082507491, "grad_norm": 0.19581197202205658, "learning_rate": 4.9029511885866717e-05, "loss": 0.6602, "step": 2261 }, { "epoch": 0.521318276100484, "grad_norm": 0.22291046380996704, "learning_rate": 4.899219038201903e-05, "loss": 0.6525, "step": 2262 }, { "epoch": 0.521548743950219, "grad_norm": 0.19381098449230194, "learning_rate": 4.895486943990023e-05, "loss": 0.6722, "step": 2263 }, { "epoch": 0.5217792117999539, "grad_norm": 0.234474778175354, "learning_rate": 4.891754908031213e-05, "loss": 0.6639, "step": 2264 }, { "epoch": 0.5220096796496889, "grad_norm": 0.21784402430057526, "learning_rate": 4.8880229324056184e-05, "loss": 0.6639, "step": 2265 }, { "epoch": 0.5222401474994238, "grad_norm": 0.20804181694984436, "learning_rate": 4.8842910191933526e-05, "loss": 0.6595, "step": 2266 }, { "epoch": 0.5224706153491588, "grad_norm": 0.2230507731437683, "learning_rate": 4.880559170474499e-05, "loss": 0.6716, "step": 2267 }, { "epoch": 0.5227010831988937, "grad_norm": 0.21720674633979797, "learning_rate": 4.876827388329094e-05, "loss": 0.6584, "step": 2268 }, { "epoch": 0.5229315510486288, "grad_norm": 0.23036789894104004, "learning_rate": 4.873095674837146e-05, "loss": 0.6654, "step": 2269 }, { "epoch": 0.5231620188983637, "grad_norm": 0.21202722191810608, "learning_rate": 4.869364032078625e-05, "loss": 0.6704, "step": 2270 }, { "epoch": 0.5233924867480987, "grad_norm": 0.22500722110271454, "learning_rate": 4.8656324621334557e-05, "loss": 0.6703, "step": 2271 }, { "epoch": 0.5236229545978336, "grad_norm": 0.20429456233978271, "learning_rate": 4.8619009670815265e-05, "loss": 0.6656, "step": 2272 }, { "epoch": 0.5238534224475686, "grad_norm": 0.23129715025424957, "learning_rate": 4.8581695490026845e-05, "loss": 0.6689, "step": 2273 }, { "epoch": 0.5240838902973035, "grad_norm": 0.21389861404895782, "learning_rate": 4.854438209976731e-05, "loss": 0.6718, "step": 2274 }, { "epoch": 0.5243143581470385, "grad_norm": 0.20860935747623444, "learning_rate": 4.850706952083426e-05, "loss": 0.6725, "step": 2275 }, { "epoch": 0.5245448259967734, "grad_norm": 0.20308835804462433, "learning_rate": 4.846975777402483e-05, "loss": 0.6665, "step": 2276 }, { "epoch": 0.5247752938465085, "grad_norm": 0.2039281278848648, "learning_rate": 4.843244688013568e-05, "loss": 0.671, "step": 2277 }, { "epoch": 0.5250057616962434, "grad_norm": 0.21113687753677368, "learning_rate": 4.839513685996301e-05, "loss": 0.6605, "step": 2278 }, { "epoch": 0.5252362295459784, "grad_norm": 0.1845845878124237, "learning_rate": 4.835782773430255e-05, "loss": 0.656, "step": 2279 }, { "epoch": 0.5254666973957133, "grad_norm": 0.22030138969421387, "learning_rate": 4.832051952394948e-05, "loss": 0.672, "step": 2280 }, { "epoch": 0.5256971652454483, "grad_norm": 0.19281761348247528, "learning_rate": 4.8283212249698515e-05, "loss": 0.6572, "step": 2281 }, { "epoch": 0.5259276330951832, "grad_norm": 0.21558502316474915, "learning_rate": 4.824590593234386e-05, "loss": 0.6674, "step": 2282 }, { "epoch": 0.5261581009449182, "grad_norm": 0.1933777928352356, "learning_rate": 4.82086005926791e-05, "loss": 0.6634, "step": 2283 }, { "epoch": 0.5263885687946531, "grad_norm": 0.19798806309700012, "learning_rate": 4.81712962514974e-05, "loss": 0.6658, "step": 2284 }, { "epoch": 0.5266190366443881, "grad_norm": 0.21283666789531708, "learning_rate": 4.8133992929591265e-05, "loss": 0.6731, "step": 2285 }, { "epoch": 0.526849504494123, "grad_norm": 0.2087125927209854, "learning_rate": 4.809669064775269e-05, "loss": 0.6658, "step": 2286 }, { "epoch": 0.5270799723438581, "grad_norm": 0.21951022744178772, "learning_rate": 4.805938942677306e-05, "loss": 0.6581, "step": 2287 }, { "epoch": 0.527310440193593, "grad_norm": 0.19603340327739716, "learning_rate": 4.802208928744319e-05, "loss": 0.6689, "step": 2288 }, { "epoch": 0.527540908043328, "grad_norm": 0.20134252309799194, "learning_rate": 4.798479025055327e-05, "loss": 0.6639, "step": 2289 }, { "epoch": 0.5277713758930629, "grad_norm": 0.20186404883861542, "learning_rate": 4.7947492336892894e-05, "loss": 0.668, "step": 2290 }, { "epoch": 0.5280018437427979, "grad_norm": 0.1830950379371643, "learning_rate": 4.791019556725104e-05, "loss": 0.6719, "step": 2291 }, { "epoch": 0.5282323115925328, "grad_norm": 0.20880278944969177, "learning_rate": 4.7872899962416e-05, "loss": 0.6679, "step": 2292 }, { "epoch": 0.5284627794422678, "grad_norm": 0.19241443276405334, "learning_rate": 4.783560554317546e-05, "loss": 0.6723, "step": 2293 }, { "epoch": 0.5286932472920027, "grad_norm": 0.2183217704296112, "learning_rate": 4.779831233031647e-05, "loss": 0.6537, "step": 2294 }, { "epoch": 0.5289237151417377, "grad_norm": 0.20928408205509186, "learning_rate": 4.776102034462532e-05, "loss": 0.6647, "step": 2295 }, { "epoch": 0.5291541829914727, "grad_norm": 0.18923519551753998, "learning_rate": 4.772372960688768e-05, "loss": 0.6654, "step": 2296 }, { "epoch": 0.5293846508412077, "grad_norm": 0.20825320482254028, "learning_rate": 4.7686440137888555e-05, "loss": 0.664, "step": 2297 }, { "epoch": 0.5296151186909426, "grad_norm": 0.21106471121311188, "learning_rate": 4.764915195841214e-05, "loss": 0.6672, "step": 2298 }, { "epoch": 0.5298455865406776, "grad_norm": 0.2134147584438324, "learning_rate": 4.7611865089242004e-05, "loss": 0.6697, "step": 2299 }, { "epoch": 0.5300760543904125, "grad_norm": 0.19978366792201996, "learning_rate": 4.757457955116095e-05, "loss": 0.66, "step": 2300 }, { "epoch": 0.5303065222401475, "grad_norm": 0.21647867560386658, "learning_rate": 4.753729536495104e-05, "loss": 0.6638, "step": 2301 }, { "epoch": 0.5305369900898824, "grad_norm": 0.1859664022922516, "learning_rate": 4.750001255139358e-05, "loss": 0.658, "step": 2302 }, { "epoch": 0.5307674579396174, "grad_norm": 0.1935039460659027, "learning_rate": 4.7462731131269114e-05, "loss": 0.6568, "step": 2303 }, { "epoch": 0.5309979257893523, "grad_norm": 0.21178743243217468, "learning_rate": 4.7425451125357435e-05, "loss": 0.6627, "step": 2304 }, { "epoch": 0.5312283936390874, "grad_norm": 0.20362286269664764, "learning_rate": 4.738817255443749e-05, "loss": 0.6597, "step": 2305 }, { "epoch": 0.5314588614888223, "grad_norm": 0.20342570543289185, "learning_rate": 4.735089543928746e-05, "loss": 0.6692, "step": 2306 }, { "epoch": 0.5316893293385573, "grad_norm": 0.1900341808795929, "learning_rate": 4.731361980068476e-05, "loss": 0.6613, "step": 2307 }, { "epoch": 0.5319197971882922, "grad_norm": 0.19465115666389465, "learning_rate": 4.727634565940588e-05, "loss": 0.6727, "step": 2308 }, { "epoch": 0.5321502650380272, "grad_norm": 0.19808262586593628, "learning_rate": 4.723907303622656e-05, "loss": 0.6605, "step": 2309 }, { "epoch": 0.5323807328877621, "grad_norm": 0.1885158270597458, "learning_rate": 4.7201801951921676e-05, "loss": 0.6574, "step": 2310 }, { "epoch": 0.5326112007374971, "grad_norm": 0.2195241004228592, "learning_rate": 4.7164532427265204e-05, "loss": 0.6655, "step": 2311 }, { "epoch": 0.532841668587232, "grad_norm": 0.182617649435997, "learning_rate": 4.712726448303031e-05, "loss": 0.6608, "step": 2312 }, { "epoch": 0.533072136436967, "grad_norm": 0.19868247210979462, "learning_rate": 4.708999813998924e-05, "loss": 0.656, "step": 2313 }, { "epoch": 0.533302604286702, "grad_norm": 0.17593586444854736, "learning_rate": 4.7052733418913366e-05, "loss": 0.6577, "step": 2314 }, { "epoch": 0.533533072136437, "grad_norm": 0.20548640191555023, "learning_rate": 4.701547034057313e-05, "loss": 0.6563, "step": 2315 }, { "epoch": 0.5337635399861719, "grad_norm": 0.20077823102474213, "learning_rate": 4.697820892573811e-05, "loss": 0.6607, "step": 2316 }, { "epoch": 0.5339940078359069, "grad_norm": 0.2142491191625595, "learning_rate": 4.694094919517689e-05, "loss": 0.6625, "step": 2317 }, { "epoch": 0.5342244756856418, "grad_norm": 0.1935112625360489, "learning_rate": 4.6903691169657154e-05, "loss": 0.6687, "step": 2318 }, { "epoch": 0.5344549435353768, "grad_norm": 0.1958986222743988, "learning_rate": 4.686643486994568e-05, "loss": 0.6676, "step": 2319 }, { "epoch": 0.5346854113851118, "grad_norm": 0.1859496831893921, "learning_rate": 4.6829180316808165e-05, "loss": 0.6581, "step": 2320 }, { "epoch": 0.5349158792348467, "grad_norm": 0.21781980991363525, "learning_rate": 4.6791927531009436e-05, "loss": 0.6678, "step": 2321 }, { "epoch": 0.5351463470845818, "grad_norm": 0.19916626811027527, "learning_rate": 4.675467653331333e-05, "loss": 0.6597, "step": 2322 }, { "epoch": 0.5353768149343167, "grad_norm": 0.21010006964206696, "learning_rate": 4.67174273444826e-05, "loss": 0.6651, "step": 2323 }, { "epoch": 0.5356072827840517, "grad_norm": 0.19810812175273895, "learning_rate": 4.668017998527909e-05, "loss": 0.6601, "step": 2324 }, { "epoch": 0.5358377506337866, "grad_norm": 0.21458233892917633, "learning_rate": 4.664293447646358e-05, "loss": 0.6638, "step": 2325 }, { "epoch": 0.5360682184835216, "grad_norm": 0.1828889101743698, "learning_rate": 4.660569083879581e-05, "loss": 0.6631, "step": 2326 }, { "epoch": 0.5362986863332565, "grad_norm": 0.21997065842151642, "learning_rate": 4.656844909303449e-05, "loss": 0.6602, "step": 2327 }, { "epoch": 0.5365291541829915, "grad_norm": 0.18035876750946045, "learning_rate": 4.653120925993729e-05, "loss": 0.6686, "step": 2328 }, { "epoch": 0.5367596220327264, "grad_norm": 0.1994839310646057, "learning_rate": 4.649397136026079e-05, "loss": 0.66, "step": 2329 }, { "epoch": 0.5369900898824614, "grad_norm": 0.1981378048658371, "learning_rate": 4.645673541476049e-05, "loss": 0.6705, "step": 2330 }, { "epoch": 0.5372205577321963, "grad_norm": 0.1892865151166916, "learning_rate": 4.641950144419085e-05, "loss": 0.6569, "step": 2331 }, { "epoch": 0.5374510255819314, "grad_norm": 0.19472159445285797, "learning_rate": 4.6382269469305143e-05, "loss": 0.6662, "step": 2332 }, { "epoch": 0.5376814934316663, "grad_norm": 0.20402705669403076, "learning_rate": 4.634503951085559e-05, "loss": 0.6651, "step": 2333 }, { "epoch": 0.5379119612814013, "grad_norm": 0.19563409686088562, "learning_rate": 4.630781158959332e-05, "loss": 0.6596, "step": 2334 }, { "epoch": 0.5381424291311362, "grad_norm": 0.20649971067905426, "learning_rate": 4.627058572626823e-05, "loss": 0.6567, "step": 2335 }, { "epoch": 0.5383728969808712, "grad_norm": 0.178875133395195, "learning_rate": 4.6233361941629136e-05, "loss": 0.672, "step": 2336 }, { "epoch": 0.5386033648306061, "grad_norm": 0.20355398952960968, "learning_rate": 4.619614025642371e-05, "loss": 0.6639, "step": 2337 }, { "epoch": 0.5388338326803411, "grad_norm": 0.19655583798885345, "learning_rate": 4.615892069139839e-05, "loss": 0.6685, "step": 2338 }, { "epoch": 0.539064300530076, "grad_norm": 0.1810363531112671, "learning_rate": 4.612170326729849e-05, "loss": 0.6592, "step": 2339 }, { "epoch": 0.539294768379811, "grad_norm": 0.19939152896404266, "learning_rate": 4.6084488004868105e-05, "loss": 0.6565, "step": 2340 }, { "epoch": 0.539525236229546, "grad_norm": 0.18242943286895752, "learning_rate": 4.604727492485011e-05, "loss": 0.6669, "step": 2341 }, { "epoch": 0.539755704079281, "grad_norm": 0.20084112882614136, "learning_rate": 4.601006404798621e-05, "loss": 0.6603, "step": 2342 }, { "epoch": 0.5399861719290159, "grad_norm": 0.18553341925144196, "learning_rate": 4.597285539501684e-05, "loss": 0.655, "step": 2343 }, { "epoch": 0.5402166397787509, "grad_norm": 0.18034282326698303, "learning_rate": 4.5935648986681215e-05, "loss": 0.6612, "step": 2344 }, { "epoch": 0.5404471076284858, "grad_norm": 0.20821136236190796, "learning_rate": 4.5898444843717275e-05, "loss": 0.6679, "step": 2345 }, { "epoch": 0.5406775754782208, "grad_norm": 0.18303748965263367, "learning_rate": 4.586124298686177e-05, "loss": 0.6627, "step": 2346 }, { "epoch": 0.5409080433279557, "grad_norm": 0.2008381336927414, "learning_rate": 4.582404343685005e-05, "loss": 0.6603, "step": 2347 }, { "epoch": 0.5411385111776907, "grad_norm": 0.18936984241008759, "learning_rate": 4.578684621441629e-05, "loss": 0.6602, "step": 2348 }, { "epoch": 0.5413689790274256, "grad_norm": 0.22308258712291718, "learning_rate": 4.574965134029335e-05, "loss": 0.6597, "step": 2349 }, { "epoch": 0.5415994468771607, "grad_norm": 0.1877029538154602, "learning_rate": 4.5712458835212716e-05, "loss": 0.6599, "step": 2350 }, { "epoch": 0.5418299147268956, "grad_norm": 0.21585100889205933, "learning_rate": 4.5675268719904624e-05, "loss": 0.6659, "step": 2351 }, { "epoch": 0.5420603825766306, "grad_norm": 0.20248469710350037, "learning_rate": 4.5638081015097956e-05, "loss": 0.6575, "step": 2352 }, { "epoch": 0.5422908504263655, "grad_norm": 0.19426268339157104, "learning_rate": 4.560089574152021e-05, "loss": 0.6667, "step": 2353 }, { "epoch": 0.5425213182761005, "grad_norm": 0.22340944409370422, "learning_rate": 4.5563712919897606e-05, "loss": 0.6619, "step": 2354 }, { "epoch": 0.5427517861258354, "grad_norm": 0.1823105663061142, "learning_rate": 4.552653257095495e-05, "loss": 0.6642, "step": 2355 }, { "epoch": 0.5429822539755704, "grad_norm": 0.23891690373420715, "learning_rate": 4.548935471541565e-05, "loss": 0.6654, "step": 2356 }, { "epoch": 0.5432127218253053, "grad_norm": 0.21343962848186493, "learning_rate": 4.545217937400177e-05, "loss": 0.6641, "step": 2357 }, { "epoch": 0.5434431896750404, "grad_norm": 0.2255440205335617, "learning_rate": 4.541500656743396e-05, "loss": 0.6682, "step": 2358 }, { "epoch": 0.5436736575247753, "grad_norm": 0.24012601375579834, "learning_rate": 4.537783631643143e-05, "loss": 0.6729, "step": 2359 }, { "epoch": 0.5439041253745103, "grad_norm": 0.18320710957050323, "learning_rate": 4.534066864171198e-05, "loss": 0.6559, "step": 2360 }, { "epoch": 0.5441345932242452, "grad_norm": 0.22548629343509674, "learning_rate": 4.530350356399203e-05, "loss": 0.6601, "step": 2361 }, { "epoch": 0.5443650610739802, "grad_norm": 0.20092882215976715, "learning_rate": 4.5266341103986436e-05, "loss": 0.661, "step": 2362 }, { "epoch": 0.5445955289237151, "grad_norm": 0.18173551559448242, "learning_rate": 4.5229181282408705e-05, "loss": 0.6608, "step": 2363 }, { "epoch": 0.5448259967734501, "grad_norm": 0.19635426998138428, "learning_rate": 4.519202411997083e-05, "loss": 0.6637, "step": 2364 }, { "epoch": 0.545056464623185, "grad_norm": 0.1842816174030304, "learning_rate": 4.515486963738329e-05, "loss": 0.664, "step": 2365 }, { "epoch": 0.54528693247292, "grad_norm": 0.19681525230407715, "learning_rate": 4.511771785535513e-05, "loss": 0.6692, "step": 2366 }, { "epoch": 0.5455174003226549, "grad_norm": 0.21367399394512177, "learning_rate": 4.5080568794593865e-05, "loss": 0.6541, "step": 2367 }, { "epoch": 0.54574786817239, "grad_norm": 0.19998280704021454, "learning_rate": 4.504342247580546e-05, "loss": 0.6609, "step": 2368 }, { "epoch": 0.5459783360221249, "grad_norm": 0.20067118108272552, "learning_rate": 4.50062789196944e-05, "loss": 0.6614, "step": 2369 }, { "epoch": 0.5462088038718599, "grad_norm": 0.19367091357707977, "learning_rate": 4.4969138146963625e-05, "loss": 0.663, "step": 2370 }, { "epoch": 0.5464392717215948, "grad_norm": 0.19135761260986328, "learning_rate": 4.493200017831448e-05, "loss": 0.6648, "step": 2371 }, { "epoch": 0.5466697395713298, "grad_norm": 0.21066224575042725, "learning_rate": 4.4894865034446784e-05, "loss": 0.6591, "step": 2372 }, { "epoch": 0.5469002074210647, "grad_norm": 0.19303461909294128, "learning_rate": 4.4857732736058814e-05, "loss": 0.6621, "step": 2373 }, { "epoch": 0.5471306752707997, "grad_norm": 0.2217341661453247, "learning_rate": 4.482060330384716e-05, "loss": 0.6596, "step": 2374 }, { "epoch": 0.5473611431205346, "grad_norm": 0.20083634555339813, "learning_rate": 4.478347675850693e-05, "loss": 0.6644, "step": 2375 }, { "epoch": 0.5475916109702696, "grad_norm": 0.22229771316051483, "learning_rate": 4.474635312073155e-05, "loss": 0.6623, "step": 2376 }, { "epoch": 0.5478220788200046, "grad_norm": 0.20578941702842712, "learning_rate": 4.470923241121283e-05, "loss": 0.654, "step": 2377 }, { "epoch": 0.5480525466697396, "grad_norm": 0.21209031343460083, "learning_rate": 4.467211465064097e-05, "loss": 0.6662, "step": 2378 }, { "epoch": 0.5482830145194746, "grad_norm": 0.2017475962638855, "learning_rate": 4.4634999859704546e-05, "loss": 0.6629, "step": 2379 }, { "epoch": 0.5485134823692095, "grad_norm": 0.19928203523159027, "learning_rate": 4.459788805909041e-05, "loss": 0.6593, "step": 2380 }, { "epoch": 0.5487439502189445, "grad_norm": 0.19713225960731506, "learning_rate": 4.456077926948381e-05, "loss": 0.6611, "step": 2381 }, { "epoch": 0.5489744180686794, "grad_norm": 0.198060542345047, "learning_rate": 4.45236735115683e-05, "loss": 0.6569, "step": 2382 }, { "epoch": 0.5492048859184144, "grad_norm": 0.19292356073856354, "learning_rate": 4.448657080602573e-05, "loss": 0.6666, "step": 2383 }, { "epoch": 0.5494353537681493, "grad_norm": 0.18970555067062378, "learning_rate": 4.444947117353625e-05, "loss": 0.6612, "step": 2384 }, { "epoch": 0.5496658216178844, "grad_norm": 0.19588854908943176, "learning_rate": 4.441237463477833e-05, "loss": 0.6697, "step": 2385 }, { "epoch": 0.5498962894676193, "grad_norm": 0.20147165656089783, "learning_rate": 4.437528121042866e-05, "loss": 0.6583, "step": 2386 }, { "epoch": 0.5501267573173543, "grad_norm": 0.21111683547496796, "learning_rate": 4.433819092116223e-05, "loss": 0.6589, "step": 2387 }, { "epoch": 0.5503572251670892, "grad_norm": 0.17622540891170502, "learning_rate": 4.430110378765232e-05, "loss": 0.6652, "step": 2388 }, { "epoch": 0.5505876930168242, "grad_norm": 0.22303026914596558, "learning_rate": 4.4264019830570334e-05, "loss": 0.6743, "step": 2389 }, { "epoch": 0.5508181608665591, "grad_norm": 0.19800515472888947, "learning_rate": 4.4226939070586035e-05, "loss": 0.6707, "step": 2390 }, { "epoch": 0.5510486287162941, "grad_norm": 0.19506274163722992, "learning_rate": 4.418986152836734e-05, "loss": 0.6626, "step": 2391 }, { "epoch": 0.551279096566029, "grad_norm": 0.197477325797081, "learning_rate": 4.4152787224580364e-05, "loss": 0.6636, "step": 2392 }, { "epoch": 0.551509564415764, "grad_norm": 0.2039872705936432, "learning_rate": 4.411571617988945e-05, "loss": 0.6642, "step": 2393 }, { "epoch": 0.551740032265499, "grad_norm": 0.19217687845230103, "learning_rate": 4.407864841495711e-05, "loss": 0.6634, "step": 2394 }, { "epoch": 0.551970500115234, "grad_norm": 0.1925874948501587, "learning_rate": 4.4041583950444004e-05, "loss": 0.6565, "step": 2395 }, { "epoch": 0.5522009679649689, "grad_norm": 0.20073387026786804, "learning_rate": 4.400452280700899e-05, "loss": 0.6565, "step": 2396 }, { "epoch": 0.5524314358147039, "grad_norm": 0.19481593370437622, "learning_rate": 4.3967465005309073e-05, "loss": 0.6638, "step": 2397 }, { "epoch": 0.5526619036644388, "grad_norm": 0.18334874510765076, "learning_rate": 4.393041056599936e-05, "loss": 0.6625, "step": 2398 }, { "epoch": 0.5528923715141738, "grad_norm": 0.187678724527359, "learning_rate": 4.38933595097331e-05, "loss": 0.6575, "step": 2399 }, { "epoch": 0.5531228393639087, "grad_norm": 0.20493364334106445, "learning_rate": 4.3856311857161705e-05, "loss": 0.6672, "step": 2400 }, { "epoch": 0.5533533072136437, "grad_norm": 0.1791137009859085, "learning_rate": 4.381926762893459e-05, "loss": 0.6603, "step": 2401 }, { "epoch": 0.5535837750633786, "grad_norm": 0.19737230241298676, "learning_rate": 4.378222684569937e-05, "loss": 0.6646, "step": 2402 }, { "epoch": 0.5538142429131137, "grad_norm": 0.22899094223976135, "learning_rate": 4.374518952810167e-05, "loss": 0.66, "step": 2403 }, { "epoch": 0.5540447107628486, "grad_norm": 0.18935391306877136, "learning_rate": 4.3708155696785186e-05, "loss": 0.6459, "step": 2404 }, { "epoch": 0.5542751786125836, "grad_norm": 0.17492151260375977, "learning_rate": 4.3671125372391704e-05, "loss": 0.6542, "step": 2405 }, { "epoch": 0.5545056464623185, "grad_norm": 0.21164800226688385, "learning_rate": 4.363409857556104e-05, "loss": 0.6638, "step": 2406 }, { "epoch": 0.5547361143120535, "grad_norm": 0.2046276330947876, "learning_rate": 4.359707532693102e-05, "loss": 0.6591, "step": 2407 }, { "epoch": 0.5549665821617884, "grad_norm": 0.20502637326717377, "learning_rate": 4.3560055647137536e-05, "loss": 0.6636, "step": 2408 }, { "epoch": 0.5551970500115234, "grad_norm": 0.19672958552837372, "learning_rate": 4.352303955681447e-05, "loss": 0.6641, "step": 2409 }, { "epoch": 0.5554275178612583, "grad_norm": 0.2050708532333374, "learning_rate": 4.348602707659369e-05, "loss": 0.6539, "step": 2410 }, { "epoch": 0.5556579857109933, "grad_norm": 0.20396442711353302, "learning_rate": 4.344901822710506e-05, "loss": 0.6589, "step": 2411 }, { "epoch": 0.5558884535607282, "grad_norm": 0.20161718130111694, "learning_rate": 4.341201302897645e-05, "loss": 0.6672, "step": 2412 }, { "epoch": 0.5561189214104633, "grad_norm": 0.19954702258110046, "learning_rate": 4.337501150283364e-05, "loss": 0.6578, "step": 2413 }, { "epoch": 0.5563493892601982, "grad_norm": 0.18954674899578094, "learning_rate": 4.3338013669300396e-05, "loss": 0.6642, "step": 2414 }, { "epoch": 0.5565798571099332, "grad_norm": 0.2116153985261917, "learning_rate": 4.3301019548998465e-05, "loss": 0.6569, "step": 2415 }, { "epoch": 0.5568103249596681, "grad_norm": 0.2053348273038864, "learning_rate": 4.326402916254741e-05, "loss": 0.6547, "step": 2416 }, { "epoch": 0.5570407928094031, "grad_norm": 0.21485279500484467, "learning_rate": 4.322704253056483e-05, "loss": 0.6657, "step": 2417 }, { "epoch": 0.557271260659138, "grad_norm": 0.21614287793636322, "learning_rate": 4.31900596736662e-05, "loss": 0.664, "step": 2418 }, { "epoch": 0.557501728508873, "grad_norm": 0.18734347820281982, "learning_rate": 4.3153080612464835e-05, "loss": 0.6608, "step": 2419 }, { "epoch": 0.5577321963586079, "grad_norm": 0.2166302502155304, "learning_rate": 4.3116105367572e-05, "loss": 0.666, "step": 2420 }, { "epoch": 0.557962664208343, "grad_norm": 0.18908052146434784, "learning_rate": 4.3079133959596825e-05, "loss": 0.6561, "step": 2421 }, { "epoch": 0.5581931320580779, "grad_norm": 0.21493315696716309, "learning_rate": 4.304216640914625e-05, "loss": 0.658, "step": 2422 }, { "epoch": 0.5584235999078129, "grad_norm": 0.17654471099376678, "learning_rate": 4.300520273682511e-05, "loss": 0.6602, "step": 2423 }, { "epoch": 0.5586540677575478, "grad_norm": 0.22438247501850128, "learning_rate": 4.2968242963236084e-05, "loss": 0.6561, "step": 2424 }, { "epoch": 0.5588845356072828, "grad_norm": 0.19954872131347656, "learning_rate": 4.293128710897965e-05, "loss": 0.6625, "step": 2425 }, { "epoch": 0.5591150034570177, "grad_norm": 0.2289677858352661, "learning_rate": 4.28943351946541e-05, "loss": 0.6567, "step": 2426 }, { "epoch": 0.5593454713067527, "grad_norm": 0.19166941940784454, "learning_rate": 4.28573872408556e-05, "loss": 0.6689, "step": 2427 }, { "epoch": 0.5595759391564876, "grad_norm": 0.21529777348041534, "learning_rate": 4.282044326817798e-05, "loss": 0.6652, "step": 2428 }, { "epoch": 0.5598064070062226, "grad_norm": 0.2114769071340561, "learning_rate": 4.2783503297212975e-05, "loss": 0.6591, "step": 2429 }, { "epoch": 0.5600368748559575, "grad_norm": 0.19013917446136475, "learning_rate": 4.2746567348550024e-05, "loss": 0.6567, "step": 2430 }, { "epoch": 0.5602673427056926, "grad_norm": 0.2237829715013504, "learning_rate": 4.2709635442776333e-05, "loss": 0.6673, "step": 2431 }, { "epoch": 0.5604978105554275, "grad_norm": 0.18001073598861694, "learning_rate": 4.267270760047685e-05, "loss": 0.663, "step": 2432 }, { "epoch": 0.5607282784051625, "grad_norm": 0.21743640303611755, "learning_rate": 4.263578384223429e-05, "loss": 0.6747, "step": 2433 }, { "epoch": 0.5609587462548974, "grad_norm": 0.1813591569662094, "learning_rate": 4.2598864188629065e-05, "loss": 0.6593, "step": 2434 }, { "epoch": 0.5611892141046324, "grad_norm": 0.19372020661830902, "learning_rate": 4.256194866023929e-05, "loss": 0.6569, "step": 2435 }, { "epoch": 0.5614196819543673, "grad_norm": 0.17591120302677155, "learning_rate": 4.25250372776408e-05, "loss": 0.6594, "step": 2436 }, { "epoch": 0.5616501498041023, "grad_norm": 0.19922824203968048, "learning_rate": 4.248813006140714e-05, "loss": 0.6646, "step": 2437 }, { "epoch": 0.5618806176538373, "grad_norm": 0.1966255158185959, "learning_rate": 4.245122703210945e-05, "loss": 0.6507, "step": 2438 }, { "epoch": 0.5621110855035722, "grad_norm": 0.18422779440879822, "learning_rate": 4.241432821031664e-05, "loss": 0.6578, "step": 2439 }, { "epoch": 0.5623415533533073, "grad_norm": 0.1995001882314682, "learning_rate": 4.2377433616595244e-05, "loss": 0.6682, "step": 2440 }, { "epoch": 0.5625720212030422, "grad_norm": 0.1927601844072342, "learning_rate": 4.234054327150935e-05, "loss": 0.6568, "step": 2441 }, { "epoch": 0.5628024890527772, "grad_norm": 0.20397785305976868, "learning_rate": 4.2303657195620804e-05, "loss": 0.6562, "step": 2442 }, { "epoch": 0.5630329569025121, "grad_norm": 0.18410134315490723, "learning_rate": 4.2266775409489023e-05, "loss": 0.6654, "step": 2443 }, { "epoch": 0.5632634247522471, "grad_norm": 0.18105067312717438, "learning_rate": 4.2229897933671006e-05, "loss": 0.6571, "step": 2444 }, { "epoch": 0.563493892601982, "grad_norm": 0.16961467266082764, "learning_rate": 4.219302478872138e-05, "loss": 0.6645, "step": 2445 }, { "epoch": 0.563724360451717, "grad_norm": 0.19589442014694214, "learning_rate": 4.2156155995192364e-05, "loss": 0.6596, "step": 2446 }, { "epoch": 0.5639548283014519, "grad_norm": 0.17968355119228363, "learning_rate": 4.211929157363372e-05, "loss": 0.6652, "step": 2447 }, { "epoch": 0.564185296151187, "grad_norm": 0.19263651967048645, "learning_rate": 4.208243154459279e-05, "loss": 0.6628, "step": 2448 }, { "epoch": 0.5644157640009219, "grad_norm": 0.22454223036766052, "learning_rate": 4.20455759286145e-05, "loss": 0.6667, "step": 2449 }, { "epoch": 0.5646462318506569, "grad_norm": 0.18612834811210632, "learning_rate": 4.200872474624125e-05, "loss": 0.6652, "step": 2450 }, { "epoch": 0.5648766997003918, "grad_norm": 0.1997823268175125, "learning_rate": 4.197187801801301e-05, "loss": 0.6521, "step": 2451 }, { "epoch": 0.5651071675501268, "grad_norm": 0.1961279809474945, "learning_rate": 4.1935035764467306e-05, "loss": 0.6687, "step": 2452 }, { "epoch": 0.5653376353998617, "grad_norm": 0.20370718836784363, "learning_rate": 4.189819800613906e-05, "loss": 0.6631, "step": 2453 }, { "epoch": 0.5655681032495967, "grad_norm": 0.2019963413476944, "learning_rate": 4.1861364763560785e-05, "loss": 0.6546, "step": 2454 }, { "epoch": 0.5657985710993316, "grad_norm": 0.2010907530784607, "learning_rate": 4.182453605726247e-05, "loss": 0.6675, "step": 2455 }, { "epoch": 0.5660290389490666, "grad_norm": 0.19456058740615845, "learning_rate": 4.178771190777151e-05, "loss": 0.6623, "step": 2456 }, { "epoch": 0.5662595067988015, "grad_norm": 0.19006440043449402, "learning_rate": 4.175089233561282e-05, "loss": 0.6699, "step": 2457 }, { "epoch": 0.5664899746485366, "grad_norm": 0.19521141052246094, "learning_rate": 4.171407736130876e-05, "loss": 0.6509, "step": 2458 }, { "epoch": 0.5667204424982715, "grad_norm": 0.1771191656589508, "learning_rate": 4.167726700537909e-05, "loss": 0.6619, "step": 2459 }, { "epoch": 0.5669509103480065, "grad_norm": 0.21703976392745972, "learning_rate": 4.164046128834103e-05, "loss": 0.6601, "step": 2460 }, { "epoch": 0.5671813781977414, "grad_norm": 0.18353256583213806, "learning_rate": 4.160366023070921e-05, "loss": 0.6564, "step": 2461 }, { "epoch": 0.5674118460474764, "grad_norm": 0.20448799431324005, "learning_rate": 4.1566863852995635e-05, "loss": 0.6552, "step": 2462 }, { "epoch": 0.5676423138972113, "grad_norm": 0.18381868302822113, "learning_rate": 4.153007217570973e-05, "loss": 0.6629, "step": 2463 }, { "epoch": 0.5678727817469463, "grad_norm": 0.20256194472312927, "learning_rate": 4.149328521935832e-05, "loss": 0.662, "step": 2464 }, { "epoch": 0.5681032495966812, "grad_norm": 0.19661514461040497, "learning_rate": 4.145650300444553e-05, "loss": 0.6552, "step": 2465 }, { "epoch": 0.5683337174464163, "grad_norm": 0.19295766949653625, "learning_rate": 4.141972555147291e-05, "loss": 0.6603, "step": 2466 }, { "epoch": 0.5685641852961512, "grad_norm": 0.19877560436725616, "learning_rate": 4.1382952880939346e-05, "loss": 0.6532, "step": 2467 }, { "epoch": 0.5687946531458862, "grad_norm": 0.20013585686683655, "learning_rate": 4.1346185013340985e-05, "loss": 0.6658, "step": 2468 }, { "epoch": 0.5690251209956211, "grad_norm": 0.1775607168674469, "learning_rate": 4.13094219691714e-05, "loss": 0.6679, "step": 2469 }, { "epoch": 0.5692555888453561, "grad_norm": 0.1954258233308792, "learning_rate": 4.1272663768921426e-05, "loss": 0.6593, "step": 2470 }, { "epoch": 0.569486056695091, "grad_norm": 0.18076026439666748, "learning_rate": 4.123591043307918e-05, "loss": 0.66, "step": 2471 }, { "epoch": 0.569716524544826, "grad_norm": 0.18170256912708282, "learning_rate": 4.1199161982130105e-05, "loss": 0.6651, "step": 2472 }, { "epoch": 0.5699469923945609, "grad_norm": 0.19181379675865173, "learning_rate": 4.116241843655692e-05, "loss": 0.6556, "step": 2473 }, { "epoch": 0.5701774602442959, "grad_norm": 0.1833178848028183, "learning_rate": 4.1125679816839564e-05, "loss": 0.6556, "step": 2474 }, { "epoch": 0.5704079280940308, "grad_norm": 0.19886410236358643, "learning_rate": 4.108894614345527e-05, "loss": 0.6608, "step": 2475 }, { "epoch": 0.5706383959437659, "grad_norm": 0.18102464079856873, "learning_rate": 4.105221743687853e-05, "loss": 0.6604, "step": 2476 }, { "epoch": 0.5708688637935008, "grad_norm": 0.18585921823978424, "learning_rate": 4.101549371758101e-05, "loss": 0.6546, "step": 2477 }, { "epoch": 0.5710993316432358, "grad_norm": 0.18972377479076385, "learning_rate": 4.097877500603164e-05, "loss": 0.6664, "step": 2478 }, { "epoch": 0.5713297994929707, "grad_norm": 0.18075346946716309, "learning_rate": 4.0942061322696584e-05, "loss": 0.6573, "step": 2479 }, { "epoch": 0.5715602673427057, "grad_norm": 0.185418963432312, "learning_rate": 4.09053526880391e-05, "loss": 0.6631, "step": 2480 }, { "epoch": 0.5717907351924406, "grad_norm": 0.1810043305158615, "learning_rate": 4.0868649122519756e-05, "loss": 0.6557, "step": 2481 }, { "epoch": 0.5720212030421756, "grad_norm": 0.1840018481016159, "learning_rate": 4.083195064659622e-05, "loss": 0.6653, "step": 2482 }, { "epoch": 0.5722516708919105, "grad_norm": 0.19294288754463196, "learning_rate": 4.079525728072334e-05, "loss": 0.6607, "step": 2483 }, { "epoch": 0.5724821387416456, "grad_norm": 0.1916273981332779, "learning_rate": 4.0758569045353114e-05, "loss": 0.6646, "step": 2484 }, { "epoch": 0.5727126065913805, "grad_norm": 0.18346838653087616, "learning_rate": 4.07218859609347e-05, "loss": 0.6529, "step": 2485 }, { "epoch": 0.5729430744411155, "grad_norm": 0.19156713783740997, "learning_rate": 4.0685208047914346e-05, "loss": 0.6647, "step": 2486 }, { "epoch": 0.5731735422908504, "grad_norm": 0.18695339560508728, "learning_rate": 4.064853532673546e-05, "loss": 0.6557, "step": 2487 }, { "epoch": 0.5734040101405854, "grad_norm": 0.20183449983596802, "learning_rate": 4.061186781783853e-05, "loss": 0.6653, "step": 2488 }, { "epoch": 0.5736344779903203, "grad_norm": 0.19282028079032898, "learning_rate": 4.0575205541661135e-05, "loss": 0.6603, "step": 2489 }, { "epoch": 0.5738649458400553, "grad_norm": 0.20781488716602325, "learning_rate": 4.0538548518637944e-05, "loss": 0.6528, "step": 2490 }, { "epoch": 0.5740954136897902, "grad_norm": 0.21310502290725708, "learning_rate": 4.050189676920075e-05, "loss": 0.6605, "step": 2491 }, { "epoch": 0.5743258815395252, "grad_norm": 0.2098270207643509, "learning_rate": 4.046525031377829e-05, "loss": 0.6566, "step": 2492 }, { "epoch": 0.5745563493892601, "grad_norm": 0.19469839334487915, "learning_rate": 4.042860917279647e-05, "loss": 0.6613, "step": 2493 }, { "epoch": 0.5747868172389952, "grad_norm": 0.20969228446483612, "learning_rate": 4.039197336667816e-05, "loss": 0.661, "step": 2494 }, { "epoch": 0.5750172850887301, "grad_norm": 0.20014849305152893, "learning_rate": 4.035534291584328e-05, "loss": 0.6572, "step": 2495 }, { "epoch": 0.5752477529384651, "grad_norm": 0.20189215242862701, "learning_rate": 4.0318717840708766e-05, "loss": 0.6551, "step": 2496 }, { "epoch": 0.5754782207882001, "grad_norm": 0.20174747705459595, "learning_rate": 4.028209816168857e-05, "loss": 0.663, "step": 2497 }, { "epoch": 0.575708688637935, "grad_norm": 0.18536518514156342, "learning_rate": 4.0245483899193595e-05, "loss": 0.6579, "step": 2498 }, { "epoch": 0.57593915648767, "grad_norm": 0.2104630321264267, "learning_rate": 4.0208875073631767e-05, "loss": 0.6583, "step": 2499 }, { "epoch": 0.5761696243374049, "grad_norm": 0.20068304240703583, "learning_rate": 4.017227170540797e-05, "loss": 0.6642, "step": 2500 }, { "epoch": 0.57640009218714, "grad_norm": 0.2080548256635666, "learning_rate": 4.013567381492404e-05, "loss": 0.6574, "step": 2501 }, { "epoch": 0.5766305600368749, "grad_norm": 0.1887027621269226, "learning_rate": 4.009908142257875e-05, "loss": 0.6603, "step": 2502 }, { "epoch": 0.5768610278866099, "grad_norm": 0.2056475281715393, "learning_rate": 4.006249454876785e-05, "loss": 0.6586, "step": 2503 }, { "epoch": 0.5770914957363448, "grad_norm": 0.19129523634910583, "learning_rate": 4.002591321388395e-05, "loss": 0.6669, "step": 2504 }, { "epoch": 0.5773219635860798, "grad_norm": 0.20545504987239838, "learning_rate": 3.998933743831663e-05, "loss": 0.6603, "step": 2505 }, { "epoch": 0.5775524314358147, "grad_norm": 0.1966099590063095, "learning_rate": 3.9952767242452364e-05, "loss": 0.6575, "step": 2506 }, { "epoch": 0.5777828992855497, "grad_norm": 0.20709416270256042, "learning_rate": 3.9916202646674454e-05, "loss": 0.6619, "step": 2507 }, { "epoch": 0.5780133671352846, "grad_norm": 0.20298060774803162, "learning_rate": 3.987964367136317e-05, "loss": 0.6602, "step": 2508 }, { "epoch": 0.5782438349850196, "grad_norm": 0.19368208944797516, "learning_rate": 3.98430903368956e-05, "loss": 0.6637, "step": 2509 }, { "epoch": 0.5784743028347545, "grad_norm": 0.21980130672454834, "learning_rate": 3.980654266364569e-05, "loss": 0.6596, "step": 2510 }, { "epoch": 0.5787047706844896, "grad_norm": 0.20827548205852509, "learning_rate": 3.977000067198422e-05, "loss": 0.6553, "step": 2511 }, { "epoch": 0.5789352385342245, "grad_norm": 0.2053709775209427, "learning_rate": 3.9733464382278854e-05, "loss": 0.6532, "step": 2512 }, { "epoch": 0.5791657063839595, "grad_norm": 0.2115817666053772, "learning_rate": 3.969693381489401e-05, "loss": 0.6573, "step": 2513 }, { "epoch": 0.5793961742336944, "grad_norm": 0.20240476727485657, "learning_rate": 3.966040899019096e-05, "loss": 0.6572, "step": 2514 }, { "epoch": 0.5796266420834294, "grad_norm": 0.20553265511989594, "learning_rate": 3.962388992852778e-05, "loss": 0.6593, "step": 2515 }, { "epoch": 0.5798571099331643, "grad_norm": 0.20434992015361786, "learning_rate": 3.95873766502593e-05, "loss": 0.657, "step": 2516 }, { "epoch": 0.5800875777828993, "grad_norm": 0.19303986430168152, "learning_rate": 3.955086917573714e-05, "loss": 0.6632, "step": 2517 }, { "epoch": 0.5803180456326342, "grad_norm": 0.18680398166179657, "learning_rate": 3.951436752530973e-05, "loss": 0.6624, "step": 2518 }, { "epoch": 0.5805485134823692, "grad_norm": 0.19972355663776398, "learning_rate": 3.947787171932215e-05, "loss": 0.656, "step": 2519 }, { "epoch": 0.5807789813321041, "grad_norm": 0.18271125853061676, "learning_rate": 3.944138177811633e-05, "loss": 0.6557, "step": 2520 }, { "epoch": 0.5810094491818392, "grad_norm": 0.21198797225952148, "learning_rate": 3.9404897722030886e-05, "loss": 0.6563, "step": 2521 }, { "epoch": 0.5812399170315741, "grad_norm": 0.18243679404258728, "learning_rate": 3.936841957140115e-05, "loss": 0.6591, "step": 2522 }, { "epoch": 0.5814703848813091, "grad_norm": 0.21174080669879913, "learning_rate": 3.933194734655916e-05, "loss": 0.6634, "step": 2523 }, { "epoch": 0.581700852731044, "grad_norm": 0.19727849960327148, "learning_rate": 3.929548106783367e-05, "loss": 0.6504, "step": 2524 }, { "epoch": 0.581931320580779, "grad_norm": 0.19438689947128296, "learning_rate": 3.925902075555009e-05, "loss": 0.6678, "step": 2525 }, { "epoch": 0.5821617884305139, "grad_norm": 0.20954324305057526, "learning_rate": 3.9222566430030536e-05, "loss": 0.6592, "step": 2526 }, { "epoch": 0.5823922562802489, "grad_norm": 0.1915033459663391, "learning_rate": 3.918611811159379e-05, "loss": 0.6503, "step": 2527 }, { "epoch": 0.5826227241299838, "grad_norm": 0.21163514256477356, "learning_rate": 3.914967582055524e-05, "loss": 0.6506, "step": 2528 }, { "epoch": 0.5828531919797189, "grad_norm": 0.19895008206367493, "learning_rate": 3.911323957722694e-05, "loss": 0.659, "step": 2529 }, { "epoch": 0.5830836598294538, "grad_norm": 0.1981336772441864, "learning_rate": 3.907680940191764e-05, "loss": 0.6543, "step": 2530 }, { "epoch": 0.5833141276791888, "grad_norm": 0.19202502071857452, "learning_rate": 3.904038531493257e-05, "loss": 0.6523, "step": 2531 }, { "epoch": 0.5835445955289237, "grad_norm": 0.2164509892463684, "learning_rate": 3.900396733657366e-05, "loss": 0.6538, "step": 2532 }, { "epoch": 0.5837750633786587, "grad_norm": 0.18908166885375977, "learning_rate": 3.896755548713946e-05, "loss": 0.6575, "step": 2533 }, { "epoch": 0.5840055312283936, "grad_norm": 0.21118958294391632, "learning_rate": 3.8931149786925e-05, "loss": 0.6605, "step": 2534 }, { "epoch": 0.5842359990781286, "grad_norm": 0.1941760927438736, "learning_rate": 3.889475025622199e-05, "loss": 0.6668, "step": 2535 }, { "epoch": 0.5844664669278635, "grad_norm": 0.20846354961395264, "learning_rate": 3.885835691531865e-05, "loss": 0.6565, "step": 2536 }, { "epoch": 0.5846969347775985, "grad_norm": 0.19090569019317627, "learning_rate": 3.882196978449972e-05, "loss": 0.6618, "step": 2537 }, { "epoch": 0.5849274026273334, "grad_norm": 0.2103571891784668, "learning_rate": 3.878558888404655e-05, "loss": 0.6521, "step": 2538 }, { "epoch": 0.5851578704770685, "grad_norm": 0.19896583259105682, "learning_rate": 3.874921423423697e-05, "loss": 0.6568, "step": 2539 }, { "epoch": 0.5853883383268034, "grad_norm": 0.18624718487262726, "learning_rate": 3.8712845855345325e-05, "loss": 0.6519, "step": 2540 }, { "epoch": 0.5856188061765384, "grad_norm": 0.19909995794296265, "learning_rate": 3.867648376764248e-05, "loss": 0.6624, "step": 2541 }, { "epoch": 0.5858492740262733, "grad_norm": 0.18448121845722198, "learning_rate": 3.864012799139581e-05, "loss": 0.6622, "step": 2542 }, { "epoch": 0.5860797418760083, "grad_norm": 0.20322075486183167, "learning_rate": 3.860377854686913e-05, "loss": 0.6648, "step": 2543 }, { "epoch": 0.5863102097257432, "grad_norm": 0.19707633554935455, "learning_rate": 3.8567435454322745e-05, "loss": 0.6581, "step": 2544 }, { "epoch": 0.5865406775754782, "grad_norm": 0.18212544918060303, "learning_rate": 3.853109873401346e-05, "loss": 0.6537, "step": 2545 }, { "epoch": 0.5867711454252131, "grad_norm": 0.19013650715351105, "learning_rate": 3.849476840619443e-05, "loss": 0.6569, "step": 2546 }, { "epoch": 0.5870016132749482, "grad_norm": 0.20143313705921173, "learning_rate": 3.845844449111535e-05, "loss": 0.6615, "step": 2547 }, { "epoch": 0.5872320811246831, "grad_norm": 0.20364569127559662, "learning_rate": 3.842212700902231e-05, "loss": 0.6565, "step": 2548 }, { "epoch": 0.5874625489744181, "grad_norm": 0.1817772090435028, "learning_rate": 3.838581598015776e-05, "loss": 0.6533, "step": 2549 }, { "epoch": 0.587693016824153, "grad_norm": 0.1907220184803009, "learning_rate": 3.834951142476063e-05, "loss": 0.6604, "step": 2550 }, { "epoch": 0.587923484673888, "grad_norm": 0.19262264668941498, "learning_rate": 3.83132133630662e-05, "loss": 0.656, "step": 2551 }, { "epoch": 0.5881539525236229, "grad_norm": 0.18963393568992615, "learning_rate": 3.827692181530612e-05, "loss": 0.6608, "step": 2552 }, { "epoch": 0.5883844203733579, "grad_norm": 0.20289820432662964, "learning_rate": 3.8240636801708444e-05, "loss": 0.6643, "step": 2553 }, { "epoch": 0.5886148882230928, "grad_norm": 0.17837589979171753, "learning_rate": 3.8204358342497584e-05, "loss": 0.6626, "step": 2554 }, { "epoch": 0.5888453560728278, "grad_norm": 0.2154434323310852, "learning_rate": 3.816808645789425e-05, "loss": 0.6643, "step": 2555 }, { "epoch": 0.5890758239225629, "grad_norm": 0.19185711443424225, "learning_rate": 3.813182116811552e-05, "loss": 0.6568, "step": 2556 }, { "epoch": 0.5893062917722978, "grad_norm": 0.18890288472175598, "learning_rate": 3.809556249337484e-05, "loss": 0.652, "step": 2557 }, { "epoch": 0.5895367596220328, "grad_norm": 0.20426598191261292, "learning_rate": 3.805931045388188e-05, "loss": 0.6576, "step": 2558 }, { "epoch": 0.5897672274717677, "grad_norm": 0.18901459872722626, "learning_rate": 3.8023065069842653e-05, "loss": 0.6577, "step": 2559 }, { "epoch": 0.5899976953215027, "grad_norm": 0.19619275629520416, "learning_rate": 3.7986826361459524e-05, "loss": 0.6664, "step": 2560 }, { "epoch": 0.5902281631712376, "grad_norm": 0.1934560090303421, "learning_rate": 3.795059434893101e-05, "loss": 0.6597, "step": 2561 }, { "epoch": 0.5904586310209726, "grad_norm": 0.1980142444372177, "learning_rate": 3.7914369052452006e-05, "loss": 0.6619, "step": 2562 }, { "epoch": 0.5906890988707075, "grad_norm": 0.1985742449760437, "learning_rate": 3.787815049221361e-05, "loss": 0.6524, "step": 2563 }, { "epoch": 0.5909195667204425, "grad_norm": 0.1910460889339447, "learning_rate": 3.784193868840318e-05, "loss": 0.6597, "step": 2564 }, { "epoch": 0.5911500345701775, "grad_norm": 0.1858750432729721, "learning_rate": 3.7805733661204306e-05, "loss": 0.6636, "step": 2565 }, { "epoch": 0.5913805024199125, "grad_norm": 0.20883874595165253, "learning_rate": 3.77695354307968e-05, "loss": 0.6565, "step": 2566 }, { "epoch": 0.5916109702696474, "grad_norm": 0.18166543543338776, "learning_rate": 3.77333440173567e-05, "loss": 0.6525, "step": 2567 }, { "epoch": 0.5918414381193824, "grad_norm": 0.1856246143579483, "learning_rate": 3.7697159441056205e-05, "loss": 0.6615, "step": 2568 }, { "epoch": 0.5920719059691173, "grad_norm": 0.19537490606307983, "learning_rate": 3.7660981722063745e-05, "loss": 0.6563, "step": 2569 }, { "epoch": 0.5923023738188523, "grad_norm": 0.19945433735847473, "learning_rate": 3.762481088054393e-05, "loss": 0.6573, "step": 2570 }, { "epoch": 0.5925328416685872, "grad_norm": 0.20831739902496338, "learning_rate": 3.758864693665748e-05, "loss": 0.6633, "step": 2571 }, { "epoch": 0.5927633095183222, "grad_norm": 0.21567675471305847, "learning_rate": 3.7552489910561326e-05, "loss": 0.6572, "step": 2572 }, { "epoch": 0.5929937773680571, "grad_norm": 0.215605229139328, "learning_rate": 3.751633982240855e-05, "loss": 0.6625, "step": 2573 }, { "epoch": 0.5932242452177922, "grad_norm": 0.19509264826774597, "learning_rate": 3.7480196692348315e-05, "loss": 0.6619, "step": 2574 }, { "epoch": 0.5934547130675271, "grad_norm": 0.20573195815086365, "learning_rate": 3.744406054052594e-05, "loss": 0.6536, "step": 2575 }, { "epoch": 0.5936851809172621, "grad_norm": 0.18587082624435425, "learning_rate": 3.740793138708285e-05, "loss": 0.6453, "step": 2576 }, { "epoch": 0.593915648766997, "grad_norm": 0.20044218003749847, "learning_rate": 3.737180925215658e-05, "loss": 0.6622, "step": 2577 }, { "epoch": 0.594146116616732, "grad_norm": 0.20294342935085297, "learning_rate": 3.733569415588071e-05, "loss": 0.6649, "step": 2578 }, { "epoch": 0.5943765844664669, "grad_norm": 0.1900031715631485, "learning_rate": 3.729958611838496e-05, "loss": 0.6607, "step": 2579 }, { "epoch": 0.5946070523162019, "grad_norm": 0.21229608356952667, "learning_rate": 3.7263485159795075e-05, "loss": 0.6624, "step": 2580 }, { "epoch": 0.5948375201659368, "grad_norm": 0.18590863049030304, "learning_rate": 3.7227391300232845e-05, "loss": 0.6529, "step": 2581 }, { "epoch": 0.5950679880156718, "grad_norm": 0.20582017302513123, "learning_rate": 3.7191304559816165e-05, "loss": 0.6516, "step": 2582 }, { "epoch": 0.5952984558654068, "grad_norm": 0.18912175297737122, "learning_rate": 3.715522495865885e-05, "loss": 0.6669, "step": 2583 }, { "epoch": 0.5955289237151418, "grad_norm": 0.2016802579164505, "learning_rate": 3.711915251687086e-05, "loss": 0.6562, "step": 2584 }, { "epoch": 0.5957593915648767, "grad_norm": 0.18063144385814667, "learning_rate": 3.7083087254558116e-05, "loss": 0.6661, "step": 2585 }, { "epoch": 0.5959898594146117, "grad_norm": 0.20485232770442963, "learning_rate": 3.7047029191822455e-05, "loss": 0.6597, "step": 2586 }, { "epoch": 0.5962203272643466, "grad_norm": 0.17791825532913208, "learning_rate": 3.701097834876185e-05, "loss": 0.6555, "step": 2587 }, { "epoch": 0.5964507951140816, "grad_norm": 0.2008713036775589, "learning_rate": 3.697493474547016e-05, "loss": 0.6649, "step": 2588 }, { "epoch": 0.5966812629638165, "grad_norm": 0.19855642318725586, "learning_rate": 3.693889840203719e-05, "loss": 0.6608, "step": 2589 }, { "epoch": 0.5969117308135515, "grad_norm": 0.1890726089477539, "learning_rate": 3.690286933854877e-05, "loss": 0.6555, "step": 2590 }, { "epoch": 0.5971421986632864, "grad_norm": 0.19872720539569855, "learning_rate": 3.686684757508663e-05, "loss": 0.6596, "step": 2591 }, { "epoch": 0.5973726665130215, "grad_norm": 0.1832602471113205, "learning_rate": 3.683083313172841e-05, "loss": 0.6586, "step": 2592 }, { "epoch": 0.5976031343627564, "grad_norm": 0.2064426690340042, "learning_rate": 3.679482602854773e-05, "loss": 0.6593, "step": 2593 }, { "epoch": 0.5978336022124914, "grad_norm": 0.17292998731136322, "learning_rate": 3.675882628561408e-05, "loss": 0.6601, "step": 2594 }, { "epoch": 0.5980640700622263, "grad_norm": 0.2106057107448578, "learning_rate": 3.672283392299282e-05, "loss": 0.6503, "step": 2595 }, { "epoch": 0.5982945379119613, "grad_norm": 0.1862911581993103, "learning_rate": 3.6686848960745244e-05, "loss": 0.6562, "step": 2596 }, { "epoch": 0.5985250057616962, "grad_norm": 0.1884952336549759, "learning_rate": 3.6650871418928556e-05, "loss": 0.6618, "step": 2597 }, { "epoch": 0.5987554736114312, "grad_norm": 0.17884479463100433, "learning_rate": 3.66149013175957e-05, "loss": 0.6558, "step": 2598 }, { "epoch": 0.5989859414611661, "grad_norm": 0.1988133192062378, "learning_rate": 3.6578938676795575e-05, "loss": 0.6535, "step": 2599 }, { "epoch": 0.5992164093109011, "grad_norm": 0.18073412775993347, "learning_rate": 3.6542983516572915e-05, "loss": 0.6506, "step": 2600 }, { "epoch": 0.599446877160636, "grad_norm": 0.19410258531570435, "learning_rate": 3.650703585696824e-05, "loss": 0.6578, "step": 2601 }, { "epoch": 0.5996773450103711, "grad_norm": 0.1835651844739914, "learning_rate": 3.647109571801792e-05, "loss": 0.6606, "step": 2602 }, { "epoch": 0.599907812860106, "grad_norm": 0.21423934400081635, "learning_rate": 3.643516311975413e-05, "loss": 0.6515, "step": 2603 }, { "epoch": 0.600138280709841, "grad_norm": 0.19270770251750946, "learning_rate": 3.639923808220483e-05, "loss": 0.6521, "step": 2604 }, { "epoch": 0.6003687485595759, "grad_norm": 0.18155768513679504, "learning_rate": 3.636332062539378e-05, "loss": 0.6557, "step": 2605 }, { "epoch": 0.6005992164093109, "grad_norm": 0.2070186883211136, "learning_rate": 3.6327410769340505e-05, "loss": 0.6535, "step": 2606 }, { "epoch": 0.6008296842590458, "grad_norm": 0.17192067205905914, "learning_rate": 3.6291508534060296e-05, "loss": 0.6545, "step": 2607 }, { "epoch": 0.6010601521087808, "grad_norm": 0.20923547446727753, "learning_rate": 3.625561393956417e-05, "loss": 0.6645, "step": 2608 }, { "epoch": 0.6012906199585157, "grad_norm": 0.18093320727348328, "learning_rate": 3.621972700585898e-05, "loss": 0.653, "step": 2609 }, { "epoch": 0.6015210878082508, "grad_norm": 0.19666415452957153, "learning_rate": 3.618384775294718e-05, "loss": 0.6512, "step": 2610 }, { "epoch": 0.6017515556579857, "grad_norm": 0.1838379055261612, "learning_rate": 3.614797620082703e-05, "loss": 0.658, "step": 2611 }, { "epoch": 0.6019820235077207, "grad_norm": 0.19848011434078217, "learning_rate": 3.611211236949248e-05, "loss": 0.6495, "step": 2612 }, { "epoch": 0.6022124913574556, "grad_norm": 0.19375987350940704, "learning_rate": 3.6076256278933145e-05, "loss": 0.6578, "step": 2613 }, { "epoch": 0.6024429592071906, "grad_norm": 0.18482361733913422, "learning_rate": 3.604040794913437e-05, "loss": 0.6623, "step": 2614 }, { "epoch": 0.6026734270569255, "grad_norm": 0.18653304874897003, "learning_rate": 3.600456740007714e-05, "loss": 0.6625, "step": 2615 }, { "epoch": 0.6029038949066605, "grad_norm": 0.17585410177707672, "learning_rate": 3.5968734651738124e-05, "loss": 0.6578, "step": 2616 }, { "epoch": 0.6031343627563955, "grad_norm": 0.18889284133911133, "learning_rate": 3.5932909724089626e-05, "loss": 0.6487, "step": 2617 }, { "epoch": 0.6033648306061304, "grad_norm": 0.1867912858724594, "learning_rate": 3.589709263709963e-05, "loss": 0.65, "step": 2618 }, { "epoch": 0.6035952984558655, "grad_norm": 0.18602915108203888, "learning_rate": 3.586128341073167e-05, "loss": 0.6587, "step": 2619 }, { "epoch": 0.6038257663056004, "grad_norm": 0.16733336448669434, "learning_rate": 3.582548206494499e-05, "loss": 0.6489, "step": 2620 }, { "epoch": 0.6040562341553354, "grad_norm": 0.1732863187789917, "learning_rate": 3.578968861969441e-05, "loss": 0.6551, "step": 2621 }, { "epoch": 0.6042867020050703, "grad_norm": 0.18000958859920502, "learning_rate": 3.575390309493029e-05, "loss": 0.6646, "step": 2622 }, { "epoch": 0.6045171698548053, "grad_norm": 0.17645825445652008, "learning_rate": 3.5718125510598645e-05, "loss": 0.6579, "step": 2623 }, { "epoch": 0.6047476377045402, "grad_norm": 0.18449077010154724, "learning_rate": 3.5682355886641075e-05, "loss": 0.6467, "step": 2624 }, { "epoch": 0.6049781055542752, "grad_norm": 0.1781143993139267, "learning_rate": 3.5646594242994646e-05, "loss": 0.6607, "step": 2625 }, { "epoch": 0.6052085734040101, "grad_norm": 0.1856512576341629, "learning_rate": 3.5610840599592096e-05, "loss": 0.6588, "step": 2626 }, { "epoch": 0.6054390412537451, "grad_norm": 0.18362030386924744, "learning_rate": 3.5575094976361625e-05, "loss": 0.6636, "step": 2627 }, { "epoch": 0.60566950910348, "grad_norm": 0.172404944896698, "learning_rate": 3.553935739322698e-05, "loss": 0.6543, "step": 2628 }, { "epoch": 0.6058999769532151, "grad_norm": 0.18513377010822296, "learning_rate": 3.550362787010744e-05, "loss": 0.6549, "step": 2629 }, { "epoch": 0.60613044480295, "grad_norm": 0.1736796498298645, "learning_rate": 3.5467906426917795e-05, "loss": 0.6516, "step": 2630 }, { "epoch": 0.606360912652685, "grad_norm": 0.18415535986423492, "learning_rate": 3.54321930835683e-05, "loss": 0.6537, "step": 2631 }, { "epoch": 0.6065913805024199, "grad_norm": 0.1662503480911255, "learning_rate": 3.539648785996471e-05, "loss": 0.6554, "step": 2632 }, { "epoch": 0.6068218483521549, "grad_norm": 0.17874038219451904, "learning_rate": 3.536079077600829e-05, "loss": 0.6543, "step": 2633 }, { "epoch": 0.6070523162018898, "grad_norm": 0.17104828357696533, "learning_rate": 3.532510185159572e-05, "loss": 0.6532, "step": 2634 }, { "epoch": 0.6072827840516248, "grad_norm": 0.1689852625131607, "learning_rate": 3.5289421106619126e-05, "loss": 0.6549, "step": 2635 }, { "epoch": 0.6075132519013597, "grad_norm": 0.17767558991909027, "learning_rate": 3.525374856096616e-05, "loss": 0.6609, "step": 2636 }, { "epoch": 0.6077437197510948, "grad_norm": 0.1855451464653015, "learning_rate": 3.5218084234519764e-05, "loss": 0.6619, "step": 2637 }, { "epoch": 0.6079741876008297, "grad_norm": 0.17707620561122894, "learning_rate": 3.518242814715844e-05, "loss": 0.6615, "step": 2638 }, { "epoch": 0.6082046554505647, "grad_norm": 0.18121664226055145, "learning_rate": 3.514678031875602e-05, "loss": 0.663, "step": 2639 }, { "epoch": 0.6084351233002996, "grad_norm": 0.1841084361076355, "learning_rate": 3.5111140769181735e-05, "loss": 0.6526, "step": 2640 }, { "epoch": 0.6086655911500346, "grad_norm": 0.18913474678993225, "learning_rate": 3.5075509518300224e-05, "loss": 0.6576, "step": 2641 }, { "epoch": 0.6088960589997695, "grad_norm": 0.1743757128715515, "learning_rate": 3.5039886585971496e-05, "loss": 0.6534, "step": 2642 }, { "epoch": 0.6091265268495045, "grad_norm": 0.1718021035194397, "learning_rate": 3.500427199205091e-05, "loss": 0.667, "step": 2643 }, { "epoch": 0.6093569946992394, "grad_norm": 0.18834391236305237, "learning_rate": 3.4968665756389174e-05, "loss": 0.6608, "step": 2644 }, { "epoch": 0.6095874625489744, "grad_norm": 0.17941658198833466, "learning_rate": 3.4933067898832375e-05, "loss": 0.6586, "step": 2645 }, { "epoch": 0.6098179303987094, "grad_norm": 0.1901034712791443, "learning_rate": 3.489747843922189e-05, "loss": 0.6558, "step": 2646 }, { "epoch": 0.6100483982484444, "grad_norm": 0.1816999912261963, "learning_rate": 3.4861897397394415e-05, "loss": 0.6595, "step": 2647 }, { "epoch": 0.6102788660981793, "grad_norm": 0.187819242477417, "learning_rate": 3.482632479318201e-05, "loss": 0.6464, "step": 2648 }, { "epoch": 0.6105093339479143, "grad_norm": 0.20110177993774414, "learning_rate": 3.479076064641195e-05, "loss": 0.6544, "step": 2649 }, { "epoch": 0.6107398017976492, "grad_norm": 0.19999714195728302, "learning_rate": 3.475520497690684e-05, "loss": 0.6536, "step": 2650 }, { "epoch": 0.6109702696473842, "grad_norm": 0.20862232148647308, "learning_rate": 3.471965780448461e-05, "loss": 0.6516, "step": 2651 }, { "epoch": 0.6112007374971191, "grad_norm": 0.1882224977016449, "learning_rate": 3.4684119148958314e-05, "loss": 0.6561, "step": 2652 }, { "epoch": 0.6114312053468541, "grad_norm": 0.22021904587745667, "learning_rate": 3.464858903013641e-05, "loss": 0.6543, "step": 2653 }, { "epoch": 0.611661673196589, "grad_norm": 0.1685914695262909, "learning_rate": 3.461306746782253e-05, "loss": 0.6563, "step": 2654 }, { "epoch": 0.6118921410463241, "grad_norm": 0.19287706911563873, "learning_rate": 3.457755448181551e-05, "loss": 0.647, "step": 2655 }, { "epoch": 0.612122608896059, "grad_norm": 0.18482106924057007, "learning_rate": 3.454205009190945e-05, "loss": 0.6534, "step": 2656 }, { "epoch": 0.612353076745794, "grad_norm": 0.18831658363342285, "learning_rate": 3.450655431789366e-05, "loss": 0.6577, "step": 2657 }, { "epoch": 0.6125835445955289, "grad_norm": 0.1809016317129135, "learning_rate": 3.447106717955261e-05, "loss": 0.6562, "step": 2658 }, { "epoch": 0.6128140124452639, "grad_norm": 0.19347211718559265, "learning_rate": 3.443558869666598e-05, "loss": 0.649, "step": 2659 }, { "epoch": 0.6130444802949988, "grad_norm": 0.19009865820407867, "learning_rate": 3.4400118889008646e-05, "loss": 0.6468, "step": 2660 }, { "epoch": 0.6132749481447338, "grad_norm": 0.19617056846618652, "learning_rate": 3.4364657776350605e-05, "loss": 0.658, "step": 2661 }, { "epoch": 0.6135054159944687, "grad_norm": 0.19712841510772705, "learning_rate": 3.432920537845703e-05, "loss": 0.6584, "step": 2662 }, { "epoch": 0.6137358838442037, "grad_norm": 0.18756158649921417, "learning_rate": 3.429376171508827e-05, "loss": 0.6505, "step": 2663 }, { "epoch": 0.6139663516939387, "grad_norm": 0.19545352458953857, "learning_rate": 3.425832680599972e-05, "loss": 0.6567, "step": 2664 }, { "epoch": 0.6141968195436737, "grad_norm": 0.19188739359378815, "learning_rate": 3.4222900670941995e-05, "loss": 0.6664, "step": 2665 }, { "epoch": 0.6144272873934086, "grad_norm": 0.19670219719409943, "learning_rate": 3.4187483329660755e-05, "loss": 0.6572, "step": 2666 }, { "epoch": 0.6146577552431436, "grad_norm": 0.18227505683898926, "learning_rate": 3.415207480189676e-05, "loss": 0.6524, "step": 2667 }, { "epoch": 0.6148882230928785, "grad_norm": 0.19035394489765167, "learning_rate": 3.4116675107385885e-05, "loss": 0.6538, "step": 2668 }, { "epoch": 0.6151186909426135, "grad_norm": 0.18735790252685547, "learning_rate": 3.408128426585909e-05, "loss": 0.6554, "step": 2669 }, { "epoch": 0.6153491587923484, "grad_norm": 0.20038571953773499, "learning_rate": 3.4045902297042356e-05, "loss": 0.6627, "step": 2670 }, { "epoch": 0.6155796266420834, "grad_norm": 0.17390772700309753, "learning_rate": 3.401052922065675e-05, "loss": 0.6551, "step": 2671 }, { "epoch": 0.6158100944918183, "grad_norm": 0.19239278137683868, "learning_rate": 3.397516505641838e-05, "loss": 0.654, "step": 2672 }, { "epoch": 0.6160405623415534, "grad_norm": 0.1672012060880661, "learning_rate": 3.393980982403837e-05, "loss": 0.6537, "step": 2673 }, { "epoch": 0.6162710301912883, "grad_norm": 0.19565598666667938, "learning_rate": 3.3904463543222876e-05, "loss": 0.6523, "step": 2674 }, { "epoch": 0.6165014980410233, "grad_norm": 0.17734399437904358, "learning_rate": 3.386912623367311e-05, "loss": 0.6533, "step": 2675 }, { "epoch": 0.6167319658907583, "grad_norm": 0.18294768035411835, "learning_rate": 3.383379791508519e-05, "loss": 0.6492, "step": 2676 }, { "epoch": 0.6169624337404932, "grad_norm": 0.187822625041008, "learning_rate": 3.3798478607150274e-05, "loss": 0.6568, "step": 2677 }, { "epoch": 0.6171929015902282, "grad_norm": 0.17213211953639984, "learning_rate": 3.376316832955454e-05, "loss": 0.6634, "step": 2678 }, { "epoch": 0.6174233694399631, "grad_norm": 0.18802215158939362, "learning_rate": 3.3727867101979036e-05, "loss": 0.6512, "step": 2679 }, { "epoch": 0.6176538372896981, "grad_norm": 0.16777820885181427, "learning_rate": 3.369257494409985e-05, "loss": 0.6475, "step": 2680 }, { "epoch": 0.617884305139433, "grad_norm": 0.19203679263591766, "learning_rate": 3.3657291875587996e-05, "loss": 0.6552, "step": 2681 }, { "epoch": 0.6181147729891681, "grad_norm": 0.19129416346549988, "learning_rate": 3.3622017916109376e-05, "loss": 0.6645, "step": 2682 }, { "epoch": 0.618345240838903, "grad_norm": 0.17216765880584717, "learning_rate": 3.358675308532486e-05, "loss": 0.6611, "step": 2683 }, { "epoch": 0.618575708688638, "grad_norm": 0.18110480904579163, "learning_rate": 3.355149740289024e-05, "loss": 0.6505, "step": 2684 }, { "epoch": 0.6188061765383729, "grad_norm": 0.18171606957912445, "learning_rate": 3.3516250888456165e-05, "loss": 0.6439, "step": 2685 }, { "epoch": 0.6190366443881079, "grad_norm": 0.17510759830474854, "learning_rate": 3.348101356166819e-05, "loss": 0.6511, "step": 2686 }, { "epoch": 0.6192671122378428, "grad_norm": 0.18047381937503815, "learning_rate": 3.344578544216678e-05, "loss": 0.6561, "step": 2687 }, { "epoch": 0.6194975800875778, "grad_norm": 0.17559714615345, "learning_rate": 3.341056654958722e-05, "loss": 0.6538, "step": 2688 }, { "epoch": 0.6197280479373127, "grad_norm": 0.18052367866039276, "learning_rate": 3.3375356903559676e-05, "loss": 0.6521, "step": 2689 }, { "epoch": 0.6199585157870477, "grad_norm": 0.17809216678142548, "learning_rate": 3.3340156523709185e-05, "loss": 0.657, "step": 2690 }, { "epoch": 0.6201889836367827, "grad_norm": 0.18209248781204224, "learning_rate": 3.330496542965556e-05, "loss": 0.651, "step": 2691 }, { "epoch": 0.6204194514865177, "grad_norm": 0.1755290925502777, "learning_rate": 3.32697836410135e-05, "loss": 0.6581, "step": 2692 }, { "epoch": 0.6206499193362526, "grad_norm": 0.16924412548542023, "learning_rate": 3.323461117739248e-05, "loss": 0.6497, "step": 2693 }, { "epoch": 0.6208803871859876, "grad_norm": 0.18715137243270874, "learning_rate": 3.3199448058396786e-05, "loss": 0.6562, "step": 2694 }, { "epoch": 0.6211108550357225, "grad_norm": 0.17708511650562286, "learning_rate": 3.3164294303625486e-05, "loss": 0.6584, "step": 2695 }, { "epoch": 0.6213413228854575, "grad_norm": 0.1799190789461136, "learning_rate": 3.312914993267246e-05, "loss": 0.6591, "step": 2696 }, { "epoch": 0.6215717907351924, "grad_norm": 0.19582661986351013, "learning_rate": 3.309401496512633e-05, "loss": 0.6479, "step": 2697 }, { "epoch": 0.6218022585849274, "grad_norm": 0.17881685495376587, "learning_rate": 3.305888942057047e-05, "loss": 0.653, "step": 2698 }, { "epoch": 0.6220327264346623, "grad_norm": 0.1858767569065094, "learning_rate": 3.302377331858302e-05, "loss": 0.652, "step": 2699 }, { "epoch": 0.6222631942843974, "grad_norm": 0.16495144367218018, "learning_rate": 3.298866667873688e-05, "loss": 0.6534, "step": 2700 }, { "epoch": 0.6224936621341323, "grad_norm": 0.1828274130821228, "learning_rate": 3.2953569520599606e-05, "loss": 0.6452, "step": 2701 }, { "epoch": 0.6227241299838673, "grad_norm": 0.1840105652809143, "learning_rate": 3.291848186373353e-05, "loss": 0.6509, "step": 2702 }, { "epoch": 0.6229545978336022, "grad_norm": 0.1832989603281021, "learning_rate": 3.2883403727695694e-05, "loss": 0.6469, "step": 2703 }, { "epoch": 0.6231850656833372, "grad_norm": 0.18650995194911957, "learning_rate": 3.2848335132037763e-05, "loss": 0.654, "step": 2704 }, { "epoch": 0.6234155335330721, "grad_norm": 0.17295216023921967, "learning_rate": 3.2813276096306156e-05, "loss": 0.654, "step": 2705 }, { "epoch": 0.6236460013828071, "grad_norm": 0.17538554966449738, "learning_rate": 3.2778226640041956e-05, "loss": 0.6558, "step": 2706 }, { "epoch": 0.623876469232542, "grad_norm": 0.15956903994083405, "learning_rate": 3.274318678278085e-05, "loss": 0.6538, "step": 2707 }, { "epoch": 0.624106937082277, "grad_norm": 0.18525734543800354, "learning_rate": 3.270815654405324e-05, "loss": 0.6538, "step": 2708 }, { "epoch": 0.624337404932012, "grad_norm": 0.16766776144504547, "learning_rate": 3.267313594338414e-05, "loss": 0.6503, "step": 2709 }, { "epoch": 0.624567872781747, "grad_norm": 0.19396458566188812, "learning_rate": 3.2638125000293165e-05, "loss": 0.6553, "step": 2710 }, { "epoch": 0.6247983406314819, "grad_norm": 0.1728057563304901, "learning_rate": 3.2603123734294606e-05, "loss": 0.6621, "step": 2711 }, { "epoch": 0.6250288084812169, "grad_norm": 0.179381862282753, "learning_rate": 3.2568132164897316e-05, "loss": 0.6579, "step": 2712 }, { "epoch": 0.6252592763309518, "grad_norm": 0.1791330873966217, "learning_rate": 3.253315031160475e-05, "loss": 0.66, "step": 2713 }, { "epoch": 0.6254897441806868, "grad_norm": 0.18653972446918488, "learning_rate": 3.249817819391495e-05, "loss": 0.6593, "step": 2714 }, { "epoch": 0.6257202120304217, "grad_norm": 0.17987924814224243, "learning_rate": 3.246321583132058e-05, "loss": 0.6539, "step": 2715 }, { "epoch": 0.6259506798801567, "grad_norm": 0.19162432849407196, "learning_rate": 3.242826324330875e-05, "loss": 0.6572, "step": 2716 }, { "epoch": 0.6261811477298916, "grad_norm": 0.19111159443855286, "learning_rate": 3.2393320449361246e-05, "loss": 0.6546, "step": 2717 }, { "epoch": 0.6264116155796267, "grad_norm": 0.17735035717487335, "learning_rate": 3.235838746895434e-05, "loss": 0.6561, "step": 2718 }, { "epoch": 0.6266420834293616, "grad_norm": 0.1921262890100479, "learning_rate": 3.2323464321558804e-05, "loss": 0.6506, "step": 2719 }, { "epoch": 0.6268725512790966, "grad_norm": 0.1894858479499817, "learning_rate": 3.2288551026639986e-05, "loss": 0.6502, "step": 2720 }, { "epoch": 0.6271030191288315, "grad_norm": 0.19639429450035095, "learning_rate": 3.225364760365772e-05, "loss": 0.6546, "step": 2721 }, { "epoch": 0.6273334869785665, "grad_norm": 0.1766640543937683, "learning_rate": 3.221875407206633e-05, "loss": 0.66, "step": 2722 }, { "epoch": 0.6275639548283014, "grad_norm": 0.18973618745803833, "learning_rate": 3.2183870451314624e-05, "loss": 0.6616, "step": 2723 }, { "epoch": 0.6277944226780364, "grad_norm": 0.19568420946598053, "learning_rate": 3.2148996760845924e-05, "loss": 0.653, "step": 2724 }, { "epoch": 0.6280248905277713, "grad_norm": 0.18159765005111694, "learning_rate": 3.2114133020097945e-05, "loss": 0.6494, "step": 2725 }, { "epoch": 0.6282553583775063, "grad_norm": 0.2033732682466507, "learning_rate": 3.2079279248502925e-05, "loss": 0.6529, "step": 2726 }, { "epoch": 0.6284858262272413, "grad_norm": 0.17295779287815094, "learning_rate": 3.204443546548754e-05, "loss": 0.65, "step": 2727 }, { "epoch": 0.6287162940769763, "grad_norm": 0.1940968781709671, "learning_rate": 3.2009601690472834e-05, "loss": 0.6572, "step": 2728 }, { "epoch": 0.6289467619267112, "grad_norm": 0.17500554025173187, "learning_rate": 3.197477794287435e-05, "loss": 0.6498, "step": 2729 }, { "epoch": 0.6291772297764462, "grad_norm": 0.18749865889549255, "learning_rate": 3.1939964242102004e-05, "loss": 0.65, "step": 2730 }, { "epoch": 0.6294076976261811, "grad_norm": 0.19215604662895203, "learning_rate": 3.190516060756009e-05, "loss": 0.6679, "step": 2731 }, { "epoch": 0.6296381654759161, "grad_norm": 0.18513312935829163, "learning_rate": 3.187036705864736e-05, "loss": 0.6617, "step": 2732 }, { "epoch": 0.629868633325651, "grad_norm": 0.20599323511123657, "learning_rate": 3.183558361475687e-05, "loss": 0.652, "step": 2733 }, { "epoch": 0.630099101175386, "grad_norm": 0.18766285479068756, "learning_rate": 3.1800810295276095e-05, "loss": 0.6536, "step": 2734 }, { "epoch": 0.630329569025121, "grad_norm": 0.19500939548015594, "learning_rate": 3.1766047119586845e-05, "loss": 0.6591, "step": 2735 }, { "epoch": 0.630560036874856, "grad_norm": 0.19361793994903564, "learning_rate": 3.173129410706528e-05, "loss": 0.6434, "step": 2736 }, { "epoch": 0.630790504724591, "grad_norm": 0.17595571279525757, "learning_rate": 3.169655127708189e-05, "loss": 0.6405, "step": 2737 }, { "epoch": 0.6310209725743259, "grad_norm": 0.19558657705783844, "learning_rate": 3.1661818649001494e-05, "loss": 0.6524, "step": 2738 }, { "epoch": 0.6312514404240609, "grad_norm": 0.18385924398899078, "learning_rate": 3.1627096242183243e-05, "loss": 0.6614, "step": 2739 }, { "epoch": 0.6314819082737958, "grad_norm": 0.19142907857894897, "learning_rate": 3.159238407598054e-05, "loss": 0.657, "step": 2740 }, { "epoch": 0.6317123761235308, "grad_norm": 0.18829232454299927, "learning_rate": 3.155768216974112e-05, "loss": 0.6483, "step": 2741 }, { "epoch": 0.6319428439732657, "grad_norm": 0.1925613284111023, "learning_rate": 3.1522990542807046e-05, "loss": 0.6478, "step": 2742 }, { "epoch": 0.6321733118230007, "grad_norm": 0.17510046064853668, "learning_rate": 3.148830921451452e-05, "loss": 0.6551, "step": 2743 }, { "epoch": 0.6324037796727356, "grad_norm": 0.17866365611553192, "learning_rate": 3.1453638204194135e-05, "loss": 0.6585, "step": 2744 }, { "epoch": 0.6326342475224707, "grad_norm": 0.18686874210834503, "learning_rate": 3.141897753117067e-05, "loss": 0.6517, "step": 2745 }, { "epoch": 0.6328647153722056, "grad_norm": 0.16817151010036469, "learning_rate": 3.1384327214763135e-05, "loss": 0.6591, "step": 2746 }, { "epoch": 0.6330951832219406, "grad_norm": 0.1925475150346756, "learning_rate": 3.13496872742848e-05, "loss": 0.6576, "step": 2747 }, { "epoch": 0.6333256510716755, "grad_norm": 0.19002029299736023, "learning_rate": 3.131505772904314e-05, "loss": 0.6528, "step": 2748 }, { "epoch": 0.6335561189214105, "grad_norm": 0.1708815097808838, "learning_rate": 3.128043859833981e-05, "loss": 0.6493, "step": 2749 }, { "epoch": 0.6337865867711454, "grad_norm": 0.19003167748451233, "learning_rate": 3.12458299014707e-05, "loss": 0.6568, "step": 2750 }, { "epoch": 0.6340170546208804, "grad_norm": 0.1745605319738388, "learning_rate": 3.121123165772588e-05, "loss": 0.6547, "step": 2751 }, { "epoch": 0.6342475224706153, "grad_norm": 0.18086248636245728, "learning_rate": 3.1176643886389554e-05, "loss": 0.6454, "step": 2752 }, { "epoch": 0.6344779903203503, "grad_norm": 0.17058676481246948, "learning_rate": 3.114206660674011e-05, "loss": 0.6611, "step": 2753 }, { "epoch": 0.6347084581700853, "grad_norm": 0.1747319996356964, "learning_rate": 3.1107499838050146e-05, "loss": 0.6428, "step": 2754 }, { "epoch": 0.6349389260198203, "grad_norm": 0.17745546996593475, "learning_rate": 3.107294359958628e-05, "loss": 0.6602, "step": 2755 }, { "epoch": 0.6351693938695552, "grad_norm": 0.16670075058937073, "learning_rate": 3.103839791060937e-05, "loss": 0.6608, "step": 2756 }, { "epoch": 0.6353998617192902, "grad_norm": 0.1686505526304245, "learning_rate": 3.1003862790374364e-05, "loss": 0.6535, "step": 2757 }, { "epoch": 0.6356303295690251, "grad_norm": 0.1769195795059204, "learning_rate": 3.0969338258130274e-05, "loss": 0.6537, "step": 2758 }, { "epoch": 0.6358607974187601, "grad_norm": 0.16672547161579132, "learning_rate": 3.093482433312027e-05, "loss": 0.644, "step": 2759 }, { "epoch": 0.636091265268495, "grad_norm": 0.17630359530448914, "learning_rate": 3.090032103458159e-05, "loss": 0.6616, "step": 2760 }, { "epoch": 0.63632173311823, "grad_norm": 0.1749248057603836, "learning_rate": 3.086582838174551e-05, "loss": 0.6506, "step": 2761 }, { "epoch": 0.6365522009679649, "grad_norm": 0.16957643628120422, "learning_rate": 3.0831346393837445e-05, "loss": 0.6552, "step": 2762 }, { "epoch": 0.6367826688177, "grad_norm": 0.16888821125030518, "learning_rate": 3.079687509007682e-05, "loss": 0.6457, "step": 2763 }, { "epoch": 0.6370131366674349, "grad_norm": 0.17261339724063873, "learning_rate": 3.076241448967709e-05, "loss": 0.6568, "step": 2764 }, { "epoch": 0.6372436045171699, "grad_norm": 0.18425433337688446, "learning_rate": 3.072796461184579e-05, "loss": 0.6558, "step": 2765 }, { "epoch": 0.6374740723669048, "grad_norm": 0.1994054913520813, "learning_rate": 3.069352547578445e-05, "loss": 0.6559, "step": 2766 }, { "epoch": 0.6377045402166398, "grad_norm": 0.17605717480182648, "learning_rate": 3.0659097100688607e-05, "loss": 0.6496, "step": 2767 }, { "epoch": 0.6379350080663747, "grad_norm": 0.18694092333316803, "learning_rate": 3.062467950574781e-05, "loss": 0.6579, "step": 2768 }, { "epoch": 0.6381654759161097, "grad_norm": 0.18670520186424255, "learning_rate": 3.059027271014564e-05, "loss": 0.6445, "step": 2769 }, { "epoch": 0.6383959437658446, "grad_norm": 0.18028950691223145, "learning_rate": 3.055587673305955e-05, "loss": 0.6488, "step": 2770 }, { "epoch": 0.6386264116155796, "grad_norm": 0.18050305545330048, "learning_rate": 3.052149159366109e-05, "loss": 0.6566, "step": 2771 }, { "epoch": 0.6388568794653146, "grad_norm": 0.18738004565238953, "learning_rate": 3.0487117311115687e-05, "loss": 0.6465, "step": 2772 }, { "epoch": 0.6390873473150496, "grad_norm": 0.1994061917066574, "learning_rate": 3.0452753904582747e-05, "loss": 0.65, "step": 2773 }, { "epoch": 0.6393178151647845, "grad_norm": 0.1728476881980896, "learning_rate": 3.04184013932156e-05, "loss": 0.6544, "step": 2774 }, { "epoch": 0.6395482830145195, "grad_norm": 0.20216725766658783, "learning_rate": 3.0384059796161535e-05, "loss": 0.6532, "step": 2775 }, { "epoch": 0.6397787508642544, "grad_norm": 0.17996247112751007, "learning_rate": 3.03497291325617e-05, "loss": 0.6548, "step": 2776 }, { "epoch": 0.6400092187139894, "grad_norm": 0.20604853332042694, "learning_rate": 3.03154094215512e-05, "loss": 0.6472, "step": 2777 }, { "epoch": 0.6402396865637243, "grad_norm": 0.1871616393327713, "learning_rate": 3.0281100682259032e-05, "loss": 0.6537, "step": 2778 }, { "epoch": 0.6404701544134593, "grad_norm": 0.18597093224525452, "learning_rate": 3.024680293380804e-05, "loss": 0.6539, "step": 2779 }, { "epoch": 0.6407006222631942, "grad_norm": 0.2045731246471405, "learning_rate": 3.0212516195314955e-05, "loss": 0.6574, "step": 2780 }, { "epoch": 0.6409310901129293, "grad_norm": 0.19084343314170837, "learning_rate": 3.017824048589044e-05, "loss": 0.6615, "step": 2781 }, { "epoch": 0.6411615579626642, "grad_norm": 0.17811843752861023, "learning_rate": 3.0143975824638887e-05, "loss": 0.6502, "step": 2782 }, { "epoch": 0.6413920258123992, "grad_norm": 0.19356852769851685, "learning_rate": 3.0109722230658633e-05, "loss": 0.6544, "step": 2783 }, { "epoch": 0.6416224936621341, "grad_norm": 0.18267957866191864, "learning_rate": 3.0075479723041804e-05, "loss": 0.6531, "step": 2784 }, { "epoch": 0.6418529615118691, "grad_norm": 0.18512317538261414, "learning_rate": 3.0041248320874343e-05, "loss": 0.6616, "step": 2785 }, { "epoch": 0.642083429361604, "grad_norm": 0.17581506073474884, "learning_rate": 3.0007028043236013e-05, "loss": 0.6548, "step": 2786 }, { "epoch": 0.642313897211339, "grad_norm": 0.17305217683315277, "learning_rate": 2.9972818909200396e-05, "loss": 0.6558, "step": 2787 }, { "epoch": 0.6425443650610739, "grad_norm": 0.1679084450006485, "learning_rate": 2.9938620937834804e-05, "loss": 0.6515, "step": 2788 }, { "epoch": 0.642774832910809, "grad_norm": 0.16354484856128693, "learning_rate": 2.9904434148200395e-05, "loss": 0.6591, "step": 2789 }, { "epoch": 0.6430053007605439, "grad_norm": 0.171888068318367, "learning_rate": 2.9870258559352067e-05, "loss": 0.6477, "step": 2790 }, { "epoch": 0.6432357686102789, "grad_norm": 0.1731024980545044, "learning_rate": 2.9836094190338448e-05, "loss": 0.6491, "step": 2791 }, { "epoch": 0.6434662364600138, "grad_norm": 0.1673286110162735, "learning_rate": 2.9801941060201944e-05, "loss": 0.6474, "step": 2792 }, { "epoch": 0.6436967043097488, "grad_norm": 0.18038569390773773, "learning_rate": 2.976779918797873e-05, "loss": 0.6492, "step": 2793 }, { "epoch": 0.6439271721594838, "grad_norm": 0.1763104796409607, "learning_rate": 2.973366859269861e-05, "loss": 0.6451, "step": 2794 }, { "epoch": 0.6441576400092187, "grad_norm": 0.17095611989498138, "learning_rate": 2.9699549293385176e-05, "loss": 0.6561, "step": 2795 }, { "epoch": 0.6443881078589537, "grad_norm": 0.16650767624378204, "learning_rate": 2.966544130905574e-05, "loss": 0.665, "step": 2796 }, { "epoch": 0.6446185757086886, "grad_norm": 0.1730603128671646, "learning_rate": 2.9631344658721215e-05, "loss": 0.6566, "step": 2797 }, { "epoch": 0.6448490435584237, "grad_norm": 0.17055529356002808, "learning_rate": 2.9597259361386305e-05, "loss": 0.6559, "step": 2798 }, { "epoch": 0.6450795114081586, "grad_norm": 0.246446430683136, "learning_rate": 2.9563185436049323e-05, "loss": 0.659, "step": 2799 }, { "epoch": 0.6453099792578936, "grad_norm": 0.16206693649291992, "learning_rate": 2.9529122901702244e-05, "loss": 0.6473, "step": 2800 }, { "epoch": 0.6455404471076285, "grad_norm": 0.17527292668819427, "learning_rate": 2.9495071777330717e-05, "loss": 0.6558, "step": 2801 }, { "epoch": 0.6457709149573635, "grad_norm": 0.16923247277736664, "learning_rate": 2.946103208191403e-05, "loss": 0.6567, "step": 2802 }, { "epoch": 0.6460013828070984, "grad_norm": 0.18780246376991272, "learning_rate": 2.9427003834425075e-05, "loss": 0.6511, "step": 2803 }, { "epoch": 0.6462318506568334, "grad_norm": 0.18644435703754425, "learning_rate": 2.939298705383039e-05, "loss": 0.6495, "step": 2804 }, { "epoch": 0.6464623185065683, "grad_norm": 0.1641910821199417, "learning_rate": 2.935898175909012e-05, "loss": 0.65, "step": 2805 }, { "epoch": 0.6466927863563033, "grad_norm": 0.18704113364219666, "learning_rate": 2.9324987969157974e-05, "loss": 0.6479, "step": 2806 }, { "epoch": 0.6469232542060382, "grad_norm": 0.18336251378059387, "learning_rate": 2.9291005702981288e-05, "loss": 0.6506, "step": 2807 }, { "epoch": 0.6471537220557733, "grad_norm": 0.18807761371135712, "learning_rate": 2.9257034979500986e-05, "loss": 0.6477, "step": 2808 }, { "epoch": 0.6473841899055082, "grad_norm": 0.18197950720787048, "learning_rate": 2.922307581765149e-05, "loss": 0.6592, "step": 2809 }, { "epoch": 0.6476146577552432, "grad_norm": 0.20037148892879486, "learning_rate": 2.9189128236360852e-05, "loss": 0.6513, "step": 2810 }, { "epoch": 0.6478451256049781, "grad_norm": 0.17539845407009125, "learning_rate": 2.915519225455065e-05, "loss": 0.646, "step": 2811 }, { "epoch": 0.6480755934547131, "grad_norm": 0.19264990091323853, "learning_rate": 2.9121267891135952e-05, "loss": 0.6494, "step": 2812 }, { "epoch": 0.648306061304448, "grad_norm": 0.1686212420463562, "learning_rate": 2.908735516502537e-05, "loss": 0.6511, "step": 2813 }, { "epoch": 0.648536529154183, "grad_norm": 0.1705242097377777, "learning_rate": 2.905345409512112e-05, "loss": 0.6491, "step": 2814 }, { "epoch": 0.6487669970039179, "grad_norm": 0.1819016933441162, "learning_rate": 2.9019564700318768e-05, "loss": 0.6421, "step": 2815 }, { "epoch": 0.648997464853653, "grad_norm": 0.17861327528953552, "learning_rate": 2.8985686999507488e-05, "loss": 0.6532, "step": 2816 }, { "epoch": 0.6492279327033879, "grad_norm": 0.2011132538318634, "learning_rate": 2.89518210115699e-05, "loss": 0.6426, "step": 2817 }, { "epoch": 0.6494584005531229, "grad_norm": 0.16986913979053497, "learning_rate": 2.8917966755382048e-05, "loss": 0.6544, "step": 2818 }, { "epoch": 0.6496888684028578, "grad_norm": 0.19294098019599915, "learning_rate": 2.8884124249813526e-05, "loss": 0.655, "step": 2819 }, { "epoch": 0.6499193362525928, "grad_norm": 0.18572697043418884, "learning_rate": 2.885029351372735e-05, "loss": 0.6563, "step": 2820 }, { "epoch": 0.6501498041023277, "grad_norm": 0.19467751681804657, "learning_rate": 2.881647456597991e-05, "loss": 0.6606, "step": 2821 }, { "epoch": 0.6503802719520627, "grad_norm": 0.18023499846458435, "learning_rate": 2.8782667425421096e-05, "loss": 0.6527, "step": 2822 }, { "epoch": 0.6506107398017976, "grad_norm": 0.19630929827690125, "learning_rate": 2.87488721108942e-05, "loss": 0.6475, "step": 2823 }, { "epoch": 0.6508412076515326, "grad_norm": 0.17777912318706512, "learning_rate": 2.871508864123592e-05, "loss": 0.6543, "step": 2824 }, { "epoch": 0.6510716755012675, "grad_norm": 0.20080679655075073, "learning_rate": 2.868131703527635e-05, "loss": 0.6556, "step": 2825 }, { "epoch": 0.6513021433510026, "grad_norm": 0.1675284504890442, "learning_rate": 2.864755731183899e-05, "loss": 0.6482, "step": 2826 }, { "epoch": 0.6515326112007375, "grad_norm": 0.1921423077583313, "learning_rate": 2.8613809489740662e-05, "loss": 0.6511, "step": 2827 }, { "epoch": 0.6517630790504725, "grad_norm": 0.17145875096321106, "learning_rate": 2.8580073587791596e-05, "loss": 0.6484, "step": 2828 }, { "epoch": 0.6519935469002074, "grad_norm": 0.19045400619506836, "learning_rate": 2.8546349624795404e-05, "loss": 0.6555, "step": 2829 }, { "epoch": 0.6522240147499424, "grad_norm": 0.18223097920417786, "learning_rate": 2.8512637619549022e-05, "loss": 0.643, "step": 2830 }, { "epoch": 0.6524544825996773, "grad_norm": 0.17837011814117432, "learning_rate": 2.847893759084267e-05, "loss": 0.653, "step": 2831 }, { "epoch": 0.6526849504494123, "grad_norm": 0.17700053751468658, "learning_rate": 2.8445249557459953e-05, "loss": 0.6537, "step": 2832 }, { "epoch": 0.6529154182991472, "grad_norm": 0.17131763696670532, "learning_rate": 2.8411573538177772e-05, "loss": 0.6515, "step": 2833 }, { "epoch": 0.6531458861488822, "grad_norm": 0.1803225427865982, "learning_rate": 2.837790955176634e-05, "loss": 0.6526, "step": 2834 }, { "epoch": 0.6533763539986172, "grad_norm": 0.17444370687007904, "learning_rate": 2.8344257616989144e-05, "loss": 0.6564, "step": 2835 }, { "epoch": 0.6536068218483522, "grad_norm": 0.18952690064907074, "learning_rate": 2.8310617752602996e-05, "loss": 0.6535, "step": 2836 }, { "epoch": 0.6538372896980871, "grad_norm": 0.16605597734451294, "learning_rate": 2.8276989977357894e-05, "loss": 0.6439, "step": 2837 }, { "epoch": 0.6540677575478221, "grad_norm": 0.19188542664051056, "learning_rate": 2.8243374309997166e-05, "loss": 0.654, "step": 2838 }, { "epoch": 0.654298225397557, "grad_norm": 0.17647671699523926, "learning_rate": 2.8209770769257437e-05, "loss": 0.643, "step": 2839 }, { "epoch": 0.654528693247292, "grad_norm": 0.19883349537849426, "learning_rate": 2.8176179373868443e-05, "loss": 0.662, "step": 2840 }, { "epoch": 0.6547591610970269, "grad_norm": 0.17992442846298218, "learning_rate": 2.8142600142553254e-05, "loss": 0.6503, "step": 2841 }, { "epoch": 0.6549896289467619, "grad_norm": 0.19443680346012115, "learning_rate": 2.8109033094028126e-05, "loss": 0.6492, "step": 2842 }, { "epoch": 0.6552200967964968, "grad_norm": 0.18997105956077576, "learning_rate": 2.8075478247002518e-05, "loss": 0.6544, "step": 2843 }, { "epoch": 0.6554505646462319, "grad_norm": 0.2033330202102661, "learning_rate": 2.8041935620179105e-05, "loss": 0.6607, "step": 2844 }, { "epoch": 0.6556810324959668, "grad_norm": 0.18512104451656342, "learning_rate": 2.8008405232253752e-05, "loss": 0.6491, "step": 2845 }, { "epoch": 0.6559115003457018, "grad_norm": 0.1780814528465271, "learning_rate": 2.7974887101915458e-05, "loss": 0.6497, "step": 2846 }, { "epoch": 0.6561419681954367, "grad_norm": 0.20027370750904083, "learning_rate": 2.7941381247846453e-05, "loss": 0.6509, "step": 2847 }, { "epoch": 0.6563724360451717, "grad_norm": 0.1773580014705658, "learning_rate": 2.7907887688722085e-05, "loss": 0.6508, "step": 2848 }, { "epoch": 0.6566029038949066, "grad_norm": 0.20906241238117218, "learning_rate": 2.7874406443210866e-05, "loss": 0.6531, "step": 2849 }, { "epoch": 0.6568333717446416, "grad_norm": 0.17152054607868195, "learning_rate": 2.784093752997443e-05, "loss": 0.6519, "step": 2850 }, { "epoch": 0.6570638395943765, "grad_norm": 0.20871195197105408, "learning_rate": 2.7807480967667576e-05, "loss": 0.6517, "step": 2851 }, { "epoch": 0.6572943074441115, "grad_norm": 0.18561682105064392, "learning_rate": 2.7774036774938138e-05, "loss": 0.6447, "step": 2852 }, { "epoch": 0.6575247752938465, "grad_norm": 0.19581186771392822, "learning_rate": 2.774060497042712e-05, "loss": 0.6593, "step": 2853 }, { "epoch": 0.6577552431435815, "grad_norm": 0.1915295422077179, "learning_rate": 2.7707185572768656e-05, "loss": 0.6545, "step": 2854 }, { "epoch": 0.6579857109933165, "grad_norm": 0.19746430218219757, "learning_rate": 2.7673778600589862e-05, "loss": 0.6521, "step": 2855 }, { "epoch": 0.6582161788430514, "grad_norm": 0.17735208570957184, "learning_rate": 2.7640384072510994e-05, "loss": 0.6554, "step": 2856 }, { "epoch": 0.6584466466927864, "grad_norm": 0.19774432480335236, "learning_rate": 2.7607002007145377e-05, "loss": 0.6499, "step": 2857 }, { "epoch": 0.6586771145425213, "grad_norm": 0.17123498022556305, "learning_rate": 2.7573632423099355e-05, "loss": 0.6539, "step": 2858 }, { "epoch": 0.6589075823922563, "grad_norm": 0.19462165236473083, "learning_rate": 2.7540275338972343e-05, "loss": 0.6533, "step": 2859 }, { "epoch": 0.6591380502419912, "grad_norm": 0.1761290431022644, "learning_rate": 2.75069307733568e-05, "loss": 0.6443, "step": 2860 }, { "epoch": 0.6593685180917263, "grad_norm": 0.17915204167366028, "learning_rate": 2.7473598744838146e-05, "loss": 0.6526, "step": 2861 }, { "epoch": 0.6595989859414612, "grad_norm": 0.170929417014122, "learning_rate": 2.744027927199486e-05, "loss": 0.6531, "step": 2862 }, { "epoch": 0.6598294537911962, "grad_norm": 0.18236766755580902, "learning_rate": 2.7406972373398443e-05, "loss": 0.6508, "step": 2863 }, { "epoch": 0.6600599216409311, "grad_norm": 0.16858653724193573, "learning_rate": 2.737367806761334e-05, "loss": 0.6535, "step": 2864 }, { "epoch": 0.6602903894906661, "grad_norm": 0.1613864302635193, "learning_rate": 2.7340396373196996e-05, "loss": 0.6503, "step": 2865 }, { "epoch": 0.660520857340401, "grad_norm": 0.17449095845222473, "learning_rate": 2.7307127308699865e-05, "loss": 0.6549, "step": 2866 }, { "epoch": 0.660751325190136, "grad_norm": 0.16756795346736908, "learning_rate": 2.7273870892665253e-05, "loss": 0.6488, "step": 2867 }, { "epoch": 0.6609817930398709, "grad_norm": 0.16483251750469208, "learning_rate": 2.724062714362955e-05, "loss": 0.6564, "step": 2868 }, { "epoch": 0.6612122608896059, "grad_norm": 0.16283339262008667, "learning_rate": 2.7207396080122028e-05, "loss": 0.655, "step": 2869 }, { "epoch": 0.6614427287393408, "grad_norm": 0.16669294238090515, "learning_rate": 2.717417772066484e-05, "loss": 0.6508, "step": 2870 }, { "epoch": 0.6616731965890759, "grad_norm": 0.18657532334327698, "learning_rate": 2.7140972083773124e-05, "loss": 0.6463, "step": 2871 }, { "epoch": 0.6619036644388108, "grad_norm": 0.17012335360050201, "learning_rate": 2.7107779187954908e-05, "loss": 0.6552, "step": 2872 }, { "epoch": 0.6621341322885458, "grad_norm": 0.17444707453250885, "learning_rate": 2.7074599051711108e-05, "loss": 0.6454, "step": 2873 }, { "epoch": 0.6623646001382807, "grad_norm": 0.17587541043758392, "learning_rate": 2.704143169353554e-05, "loss": 0.6565, "step": 2874 }, { "epoch": 0.6625950679880157, "grad_norm": 0.16666634380817413, "learning_rate": 2.7008277131914916e-05, "loss": 0.6483, "step": 2875 }, { "epoch": 0.6628255358377506, "grad_norm": 0.16995498538017273, "learning_rate": 2.6975135385328743e-05, "loss": 0.6459, "step": 2876 }, { "epoch": 0.6630560036874856, "grad_norm": 0.17786766588687897, "learning_rate": 2.6942006472249447e-05, "loss": 0.6482, "step": 2877 }, { "epoch": 0.6632864715372205, "grad_norm": 0.15976811945438385, "learning_rate": 2.690889041114234e-05, "loss": 0.6426, "step": 2878 }, { "epoch": 0.6635169393869556, "grad_norm": 0.17242498695850372, "learning_rate": 2.6875787220465463e-05, "loss": 0.6514, "step": 2879 }, { "epoch": 0.6637474072366905, "grad_norm": 0.18254664540290833, "learning_rate": 2.6842696918669768e-05, "loss": 0.6541, "step": 2880 }, { "epoch": 0.6639778750864255, "grad_norm": 0.18394513428211212, "learning_rate": 2.6809619524199004e-05, "loss": 0.6491, "step": 2881 }, { "epoch": 0.6642083429361604, "grad_norm": 0.17171119153499603, "learning_rate": 2.677655505548966e-05, "loss": 0.6435, "step": 2882 }, { "epoch": 0.6644388107858954, "grad_norm": 0.17666159570217133, "learning_rate": 2.6743503530971138e-05, "loss": 0.6427, "step": 2883 }, { "epoch": 0.6646692786356303, "grad_norm": 0.18384835124015808, "learning_rate": 2.6710464969065563e-05, "loss": 0.6508, "step": 2884 }, { "epoch": 0.6648997464853653, "grad_norm": 0.16794157028198242, "learning_rate": 2.6677439388187796e-05, "loss": 0.6547, "step": 2885 }, { "epoch": 0.6651302143351002, "grad_norm": 0.17705510556697845, "learning_rate": 2.6644426806745526e-05, "loss": 0.6458, "step": 2886 }, { "epoch": 0.6653606821848352, "grad_norm": 0.17769016325473785, "learning_rate": 2.6611427243139166e-05, "loss": 0.6518, "step": 2887 }, { "epoch": 0.6655911500345701, "grad_norm": 0.16915744543075562, "learning_rate": 2.6578440715761894e-05, "loss": 0.65, "step": 2888 }, { "epoch": 0.6658216178843052, "grad_norm": 0.17128948867321014, "learning_rate": 2.65454672429996e-05, "loss": 0.6476, "step": 2889 }, { "epoch": 0.6660520857340401, "grad_norm": 0.16499844193458557, "learning_rate": 2.6512506843230922e-05, "loss": 0.661, "step": 2890 }, { "epoch": 0.6662825535837751, "grad_norm": 0.1791979968547821, "learning_rate": 2.6479559534827168e-05, "loss": 0.6513, "step": 2891 }, { "epoch": 0.66651302143351, "grad_norm": 0.16432271897792816, "learning_rate": 2.6446625336152364e-05, "loss": 0.6536, "step": 2892 }, { "epoch": 0.666743489283245, "grad_norm": 0.17049454152584076, "learning_rate": 2.641370426556331e-05, "loss": 0.6504, "step": 2893 }, { "epoch": 0.6669739571329799, "grad_norm": 0.17584246397018433, "learning_rate": 2.6380796341409364e-05, "loss": 0.6444, "step": 2894 }, { "epoch": 0.6672044249827149, "grad_norm": 0.1704198718070984, "learning_rate": 2.6347901582032623e-05, "loss": 0.6566, "step": 2895 }, { "epoch": 0.6674348928324498, "grad_norm": 0.1673116832971573, "learning_rate": 2.6315020005767843e-05, "loss": 0.6508, "step": 2896 }, { "epoch": 0.6676653606821848, "grad_norm": 0.16023887693881989, "learning_rate": 2.628215163094242e-05, "loss": 0.6522, "step": 2897 }, { "epoch": 0.6678958285319198, "grad_norm": 0.16509434580802917, "learning_rate": 2.6249296475876407e-05, "loss": 0.6573, "step": 2898 }, { "epoch": 0.6681262963816548, "grad_norm": 0.17283135652542114, "learning_rate": 2.6216454558882486e-05, "loss": 0.6567, "step": 2899 }, { "epoch": 0.6683567642313897, "grad_norm": 0.15373359620571136, "learning_rate": 2.6183625898265918e-05, "loss": 0.6509, "step": 2900 }, { "epoch": 0.6685872320811247, "grad_norm": 0.17244744300842285, "learning_rate": 2.6150810512324637e-05, "loss": 0.6472, "step": 2901 }, { "epoch": 0.6688176999308596, "grad_norm": 0.17626379430294037, "learning_rate": 2.6118008419349148e-05, "loss": 0.6575, "step": 2902 }, { "epoch": 0.6690481677805946, "grad_norm": 0.16538438200950623, "learning_rate": 2.6085219637622544e-05, "loss": 0.6507, "step": 2903 }, { "epoch": 0.6692786356303295, "grad_norm": 0.18583250045776367, "learning_rate": 2.605244418542051e-05, "loss": 0.6585, "step": 2904 }, { "epoch": 0.6695091034800645, "grad_norm": 0.17038129270076752, "learning_rate": 2.601968208101132e-05, "loss": 0.6591, "step": 2905 }, { "epoch": 0.6697395713297994, "grad_norm": 0.17771609127521515, "learning_rate": 2.5986933342655717e-05, "loss": 0.6568, "step": 2906 }, { "epoch": 0.6699700391795345, "grad_norm": 0.17149491608142853, "learning_rate": 2.5954197988607133e-05, "loss": 0.6535, "step": 2907 }, { "epoch": 0.6702005070292694, "grad_norm": 0.18247468769550323, "learning_rate": 2.5921476037111448e-05, "loss": 0.6533, "step": 2908 }, { "epoch": 0.6704309748790044, "grad_norm": 0.16240952908992767, "learning_rate": 2.5888767506407075e-05, "loss": 0.6556, "step": 2909 }, { "epoch": 0.6706614427287393, "grad_norm": 0.18257564306259155, "learning_rate": 2.5856072414724974e-05, "loss": 0.6539, "step": 2910 }, { "epoch": 0.6708919105784743, "grad_norm": 0.16340118646621704, "learning_rate": 2.5823390780288604e-05, "loss": 0.6605, "step": 2911 }, { "epoch": 0.6711223784282092, "grad_norm": 0.18919028341770172, "learning_rate": 2.5790722621313924e-05, "loss": 0.6558, "step": 2912 }, { "epoch": 0.6713528462779442, "grad_norm": 0.1639140248298645, "learning_rate": 2.575806795600938e-05, "loss": 0.6472, "step": 2913 }, { "epoch": 0.6715833141276792, "grad_norm": 0.18610194325447083, "learning_rate": 2.5725426802575925e-05, "loss": 0.6476, "step": 2914 }, { "epoch": 0.6718137819774141, "grad_norm": 0.18196745216846466, "learning_rate": 2.5692799179206906e-05, "loss": 0.6472, "step": 2915 }, { "epoch": 0.6720442498271492, "grad_norm": 0.1704276204109192, "learning_rate": 2.5660185104088207e-05, "loss": 0.6442, "step": 2916 }, { "epoch": 0.6722747176768841, "grad_norm": 0.1694510579109192, "learning_rate": 2.5627584595398125e-05, "loss": 0.6528, "step": 2917 }, { "epoch": 0.6725051855266191, "grad_norm": 0.16790097951889038, "learning_rate": 2.5594997671307397e-05, "loss": 0.6555, "step": 2918 }, { "epoch": 0.672735653376354, "grad_norm": 0.17335295677185059, "learning_rate": 2.5562424349979198e-05, "loss": 0.6553, "step": 2919 }, { "epoch": 0.672966121226089, "grad_norm": 0.1660207062959671, "learning_rate": 2.552986464956913e-05, "loss": 0.648, "step": 2920 }, { "epoch": 0.6731965890758239, "grad_norm": 0.1704881191253662, "learning_rate": 2.5497318588225137e-05, "loss": 0.6468, "step": 2921 }, { "epoch": 0.6734270569255589, "grad_norm": 0.17709209024906158, "learning_rate": 2.5464786184087665e-05, "loss": 0.6567, "step": 2922 }, { "epoch": 0.6736575247752938, "grad_norm": 0.16337838768959045, "learning_rate": 2.5432267455289503e-05, "loss": 0.6459, "step": 2923 }, { "epoch": 0.6738879926250289, "grad_norm": 0.1824062317609787, "learning_rate": 2.5399762419955764e-05, "loss": 0.6545, "step": 2924 }, { "epoch": 0.6741184604747638, "grad_norm": 0.16532284021377563, "learning_rate": 2.5367271096203994e-05, "loss": 0.6471, "step": 2925 }, { "epoch": 0.6743489283244988, "grad_norm": 0.18639203906059265, "learning_rate": 2.5334793502144077e-05, "loss": 0.6585, "step": 2926 }, { "epoch": 0.6745793961742337, "grad_norm": 0.1666010320186615, "learning_rate": 2.5302329655878244e-05, "loss": 0.656, "step": 2927 }, { "epoch": 0.6748098640239687, "grad_norm": 0.17926305532455444, "learning_rate": 2.5269879575501072e-05, "loss": 0.6496, "step": 2928 }, { "epoch": 0.6750403318737036, "grad_norm": 0.1780381202697754, "learning_rate": 2.523744327909947e-05, "loss": 0.6528, "step": 2929 }, { "epoch": 0.6752707997234386, "grad_norm": 0.17732974886894226, "learning_rate": 2.520502078475261e-05, "loss": 0.6516, "step": 2930 }, { "epoch": 0.6755012675731735, "grad_norm": 0.17732085287570953, "learning_rate": 2.5172612110532012e-05, "loss": 0.6591, "step": 2931 }, { "epoch": 0.6757317354229085, "grad_norm": 0.18524208664894104, "learning_rate": 2.5140217274501555e-05, "loss": 0.648, "step": 2932 }, { "epoch": 0.6759622032726434, "grad_norm": 0.18076317012310028, "learning_rate": 2.510783629471728e-05, "loss": 0.6565, "step": 2933 }, { "epoch": 0.6761926711223785, "grad_norm": 0.1893717348575592, "learning_rate": 2.5075469189227597e-05, "loss": 0.6421, "step": 2934 }, { "epoch": 0.6764231389721134, "grad_norm": 0.17758077383041382, "learning_rate": 2.5043115976073167e-05, "loss": 0.646, "step": 2935 }, { "epoch": 0.6766536068218484, "grad_norm": 0.17987452447414398, "learning_rate": 2.5010776673286834e-05, "loss": 0.6401, "step": 2936 }, { "epoch": 0.6768840746715833, "grad_norm": 0.1678553968667984, "learning_rate": 2.497845129889381e-05, "loss": 0.647, "step": 2937 }, { "epoch": 0.6771145425213183, "grad_norm": 0.1824767142534256, "learning_rate": 2.4946139870911482e-05, "loss": 0.6409, "step": 2938 }, { "epoch": 0.6773450103710532, "grad_norm": 0.1831919103860855, "learning_rate": 2.491384240734943e-05, "loss": 0.6454, "step": 2939 }, { "epoch": 0.6775754782207882, "grad_norm": 0.17052872478961945, "learning_rate": 2.48815589262095e-05, "loss": 0.6512, "step": 2940 }, { "epoch": 0.6778059460705231, "grad_norm": 0.17829042673110962, "learning_rate": 2.4849289445485723e-05, "loss": 0.6466, "step": 2941 }, { "epoch": 0.6780364139202582, "grad_norm": 0.170830637216568, "learning_rate": 2.4817033983164345e-05, "loss": 0.6609, "step": 2942 }, { "epoch": 0.6782668817699931, "grad_norm": 0.16118223965168, "learning_rate": 2.478479255722378e-05, "loss": 0.6512, "step": 2943 }, { "epoch": 0.6784973496197281, "grad_norm": 0.1625395268201828, "learning_rate": 2.4752565185634645e-05, "loss": 0.6502, "step": 2944 }, { "epoch": 0.678727817469463, "grad_norm": 0.17522037029266357, "learning_rate": 2.472035188635967e-05, "loss": 0.6458, "step": 2945 }, { "epoch": 0.678958285319198, "grad_norm": 0.16705302894115448, "learning_rate": 2.468815267735377e-05, "loss": 0.6545, "step": 2946 }, { "epoch": 0.6791887531689329, "grad_norm": 0.1653483510017395, "learning_rate": 2.4655967576564064e-05, "loss": 0.6473, "step": 2947 }, { "epoch": 0.6794192210186679, "grad_norm": 0.16879500448703766, "learning_rate": 2.4623796601929712e-05, "loss": 0.652, "step": 2948 }, { "epoch": 0.6796496888684028, "grad_norm": 0.16743235290050507, "learning_rate": 2.4591639771382043e-05, "loss": 0.6524, "step": 2949 }, { "epoch": 0.6798801567181378, "grad_norm": 0.17148716747760773, "learning_rate": 2.4559497102844514e-05, "loss": 0.6468, "step": 2950 }, { "epoch": 0.6801106245678727, "grad_norm": 0.15919731557369232, "learning_rate": 2.452736861423268e-05, "loss": 0.6566, "step": 2951 }, { "epoch": 0.6803410924176078, "grad_norm": 0.17846500873565674, "learning_rate": 2.449525432345418e-05, "loss": 0.6603, "step": 2952 }, { "epoch": 0.6805715602673427, "grad_norm": 0.16970974206924438, "learning_rate": 2.4463154248408777e-05, "loss": 0.6434, "step": 2953 }, { "epoch": 0.6808020281170777, "grad_norm": 0.1619907021522522, "learning_rate": 2.4431068406988238e-05, "loss": 0.6526, "step": 2954 }, { "epoch": 0.6810324959668126, "grad_norm": 0.1664748191833496, "learning_rate": 2.439899681707646e-05, "loss": 0.648, "step": 2955 }, { "epoch": 0.6812629638165476, "grad_norm": 0.16397912800312042, "learning_rate": 2.4366939496549378e-05, "loss": 0.6482, "step": 2956 }, { "epoch": 0.6814934316662825, "grad_norm": 0.16301095485687256, "learning_rate": 2.4334896463274976e-05, "loss": 0.6452, "step": 2957 }, { "epoch": 0.6817238995160175, "grad_norm": 0.16809646785259247, "learning_rate": 2.430286773511327e-05, "loss": 0.6518, "step": 2958 }, { "epoch": 0.6819543673657524, "grad_norm": 0.15831275284290314, "learning_rate": 2.4270853329916304e-05, "loss": 0.6463, "step": 2959 }, { "epoch": 0.6821848352154875, "grad_norm": 0.1629604548215866, "learning_rate": 2.4238853265528143e-05, "loss": 0.6586, "step": 2960 }, { "epoch": 0.6824153030652224, "grad_norm": 0.1589616984128952, "learning_rate": 2.4206867559784847e-05, "loss": 0.6457, "step": 2961 }, { "epoch": 0.6826457709149574, "grad_norm": 0.17117534577846527, "learning_rate": 2.417489623051448e-05, "loss": 0.6551, "step": 2962 }, { "epoch": 0.6828762387646923, "grad_norm": 0.1638975292444229, "learning_rate": 2.4142939295537126e-05, "loss": 0.649, "step": 2963 }, { "epoch": 0.6831067066144273, "grad_norm": 0.1592676341533661, "learning_rate": 2.411099677266476e-05, "loss": 0.6549, "step": 2964 }, { "epoch": 0.6833371744641622, "grad_norm": 0.1702510565519333, "learning_rate": 2.407906867970141e-05, "loss": 0.644, "step": 2965 }, { "epoch": 0.6835676423138972, "grad_norm": 0.17177124321460724, "learning_rate": 2.404715503444302e-05, "loss": 0.6515, "step": 2966 }, { "epoch": 0.6837981101636321, "grad_norm": 0.17113865911960602, "learning_rate": 2.4015255854677488e-05, "loss": 0.6572, "step": 2967 }, { "epoch": 0.6840285780133671, "grad_norm": 0.17294318974018097, "learning_rate": 2.398337115818466e-05, "loss": 0.6504, "step": 2968 }, { "epoch": 0.684259045863102, "grad_norm": 0.16647367179393768, "learning_rate": 2.3951500962736312e-05, "loss": 0.6593, "step": 2969 }, { "epoch": 0.6844895137128371, "grad_norm": 0.16758325695991516, "learning_rate": 2.391964528609609e-05, "loss": 0.6422, "step": 2970 }, { "epoch": 0.684719981562572, "grad_norm": 0.18237681686878204, "learning_rate": 2.388780414601959e-05, "loss": 0.6479, "step": 2971 }, { "epoch": 0.684950449412307, "grad_norm": 0.16424010694026947, "learning_rate": 2.3855977560254338e-05, "loss": 0.6441, "step": 2972 }, { "epoch": 0.685180917262042, "grad_norm": 0.1714371293783188, "learning_rate": 2.3824165546539673e-05, "loss": 0.6534, "step": 2973 }, { "epoch": 0.6854113851117769, "grad_norm": 0.17393812537193298, "learning_rate": 2.3792368122606856e-05, "loss": 0.65, "step": 2974 }, { "epoch": 0.6856418529615119, "grad_norm": 0.18081031739711761, "learning_rate": 2.3760585306179012e-05, "loss": 0.6493, "step": 2975 }, { "epoch": 0.6858723208112468, "grad_norm": 0.16579000651836395, "learning_rate": 2.3728817114971118e-05, "loss": 0.6448, "step": 2976 }, { "epoch": 0.6861027886609818, "grad_norm": 0.17078308761119843, "learning_rate": 2.3697063566689998e-05, "loss": 0.6526, "step": 2977 }, { "epoch": 0.6863332565107167, "grad_norm": 0.16518385708332062, "learning_rate": 2.3665324679034344e-05, "loss": 0.6374, "step": 2978 }, { "epoch": 0.6865637243604518, "grad_norm": 0.16557030379772186, "learning_rate": 2.3633600469694606e-05, "loss": 0.6436, "step": 2979 }, { "epoch": 0.6867941922101867, "grad_norm": 0.17224299907684326, "learning_rate": 2.3601890956353118e-05, "loss": 0.6588, "step": 2980 }, { "epoch": 0.6870246600599217, "grad_norm": 0.17414404451847076, "learning_rate": 2.3570196156684005e-05, "loss": 0.6473, "step": 2981 }, { "epoch": 0.6872551279096566, "grad_norm": 0.16957977414131165, "learning_rate": 2.353851608835318e-05, "loss": 0.6469, "step": 2982 }, { "epoch": 0.6874855957593916, "grad_norm": 0.16680394113063812, "learning_rate": 2.3506850769018363e-05, "loss": 0.648, "step": 2983 }, { "epoch": 0.6877160636091265, "grad_norm": 0.1786261647939682, "learning_rate": 2.3475200216329052e-05, "loss": 0.6516, "step": 2984 }, { "epoch": 0.6879465314588615, "grad_norm": 0.17838843166828156, "learning_rate": 2.3443564447926447e-05, "loss": 0.6538, "step": 2985 }, { "epoch": 0.6881769993085964, "grad_norm": 0.16465331614017487, "learning_rate": 2.3411943481443637e-05, "loss": 0.6512, "step": 2986 }, { "epoch": 0.6884074671583315, "grad_norm": 0.16617000102996826, "learning_rate": 2.3380337334505374e-05, "loss": 0.6529, "step": 2987 }, { "epoch": 0.6886379350080664, "grad_norm": 0.16464616358280182, "learning_rate": 2.3348746024728142e-05, "loss": 0.6582, "step": 2988 }, { "epoch": 0.6888684028578014, "grad_norm": 0.1769178956747055, "learning_rate": 2.3317169569720187e-05, "loss": 0.6525, "step": 2989 }, { "epoch": 0.6890988707075363, "grad_norm": 0.16962438821792603, "learning_rate": 2.328560798708147e-05, "loss": 0.65, "step": 2990 }, { "epoch": 0.6893293385572713, "grad_norm": 0.16247595846652985, "learning_rate": 2.3254061294403663e-05, "loss": 0.6484, "step": 2991 }, { "epoch": 0.6895598064070062, "grad_norm": 0.17956919968128204, "learning_rate": 2.322252950927013e-05, "loss": 0.6464, "step": 2992 }, { "epoch": 0.6897902742567412, "grad_norm": 0.17907127737998962, "learning_rate": 2.3191012649255956e-05, "loss": 0.653, "step": 2993 }, { "epoch": 0.6900207421064761, "grad_norm": 0.16521626710891724, "learning_rate": 2.3159510731927842e-05, "loss": 0.6548, "step": 2994 }, { "epoch": 0.6902512099562111, "grad_norm": 0.18841508030891418, "learning_rate": 2.3128023774844194e-05, "loss": 0.6502, "step": 2995 }, { "epoch": 0.690481677805946, "grad_norm": 0.17101146280765533, "learning_rate": 2.3096551795555155e-05, "loss": 0.6417, "step": 2996 }, { "epoch": 0.6907121456556811, "grad_norm": 0.19493061304092407, "learning_rate": 2.3065094811602383e-05, "loss": 0.6547, "step": 2997 }, { "epoch": 0.690942613505416, "grad_norm": 0.17434021830558777, "learning_rate": 2.3033652840519272e-05, "loss": 0.6511, "step": 2998 }, { "epoch": 0.691173081355151, "grad_norm": 0.17021694779396057, "learning_rate": 2.300222589983084e-05, "loss": 0.6542, "step": 2999 }, { "epoch": 0.6914035492048859, "grad_norm": 0.1962118148803711, "learning_rate": 2.297081400705365e-05, "loss": 0.6532, "step": 3000 }, { "epoch": 0.6916340170546209, "grad_norm": 0.17823426425457, "learning_rate": 2.293941717969599e-05, "loss": 0.6437, "step": 3001 }, { "epoch": 0.6918644849043558, "grad_norm": 0.1798575520515442, "learning_rate": 2.2908035435257706e-05, "loss": 0.6528, "step": 3002 }, { "epoch": 0.6920949527540908, "grad_norm": 0.1800197809934616, "learning_rate": 2.2876668791230193e-05, "loss": 0.6527, "step": 3003 }, { "epoch": 0.6923254206038257, "grad_norm": 0.17531868815422058, "learning_rate": 2.2845317265096474e-05, "loss": 0.651, "step": 3004 }, { "epoch": 0.6925558884535608, "grad_norm": 0.19876468181610107, "learning_rate": 2.2813980874331146e-05, "loss": 0.6485, "step": 3005 }, { "epoch": 0.6927863563032957, "grad_norm": 0.16582679748535156, "learning_rate": 2.2782659636400355e-05, "loss": 0.6532, "step": 3006 }, { "epoch": 0.6930168241530307, "grad_norm": 0.18158891797065735, "learning_rate": 2.2751353568761813e-05, "loss": 0.6509, "step": 3007 }, { "epoch": 0.6932472920027656, "grad_norm": 0.17845842242240906, "learning_rate": 2.272006268886479e-05, "loss": 0.6502, "step": 3008 }, { "epoch": 0.6934777598525006, "grad_norm": 0.17071473598480225, "learning_rate": 2.2688787014150027e-05, "loss": 0.6491, "step": 3009 }, { "epoch": 0.6937082277022355, "grad_norm": 0.17339655756950378, "learning_rate": 2.2657526562049836e-05, "loss": 0.6563, "step": 3010 }, { "epoch": 0.6939386955519705, "grad_norm": 0.16644999384880066, "learning_rate": 2.2626281349988103e-05, "loss": 0.6527, "step": 3011 }, { "epoch": 0.6941691634017054, "grad_norm": 0.16771528124809265, "learning_rate": 2.25950513953801e-05, "loss": 0.6455, "step": 3012 }, { "epoch": 0.6943996312514404, "grad_norm": 0.17318162322044373, "learning_rate": 2.2563836715632676e-05, "loss": 0.6634, "step": 3013 }, { "epoch": 0.6946300991011753, "grad_norm": 0.15834879875183105, "learning_rate": 2.253263732814414e-05, "loss": 0.6532, "step": 3014 }, { "epoch": 0.6948605669509104, "grad_norm": 0.1758836805820465, "learning_rate": 2.2501453250304283e-05, "loss": 0.6399, "step": 3015 }, { "epoch": 0.6950910348006453, "grad_norm": 0.1595585197210312, "learning_rate": 2.2470284499494364e-05, "loss": 0.6525, "step": 3016 }, { "epoch": 0.6953215026503803, "grad_norm": 0.1615588665008545, "learning_rate": 2.2439131093087113e-05, "loss": 0.643, "step": 3017 }, { "epoch": 0.6955519705001152, "grad_norm": 0.16813978552818298, "learning_rate": 2.2407993048446656e-05, "loss": 0.6448, "step": 3018 }, { "epoch": 0.6957824383498502, "grad_norm": 0.16986919939517975, "learning_rate": 2.2376870382928607e-05, "loss": 0.6473, "step": 3019 }, { "epoch": 0.6960129061995851, "grad_norm": 0.17389391362667084, "learning_rate": 2.2345763113879996e-05, "loss": 0.6468, "step": 3020 }, { "epoch": 0.6962433740493201, "grad_norm": 0.17148233950138092, "learning_rate": 2.2314671258639263e-05, "loss": 0.6585, "step": 3021 }, { "epoch": 0.696473841899055, "grad_norm": 0.18123234808444977, "learning_rate": 2.228359483453627e-05, "loss": 0.6487, "step": 3022 }, { "epoch": 0.69670430974879, "grad_norm": 0.16465289890766144, "learning_rate": 2.2252533858892277e-05, "loss": 0.6497, "step": 3023 }, { "epoch": 0.696934777598525, "grad_norm": 0.1766558438539505, "learning_rate": 2.2221488349019903e-05, "loss": 0.6505, "step": 3024 }, { "epoch": 0.69716524544826, "grad_norm": 0.16725115478038788, "learning_rate": 2.2190458322223163e-05, "loss": 0.6475, "step": 3025 }, { "epoch": 0.6973957132979949, "grad_norm": 0.17288514971733093, "learning_rate": 2.2159443795797497e-05, "loss": 0.6519, "step": 3026 }, { "epoch": 0.6976261811477299, "grad_norm": 0.165741428732872, "learning_rate": 2.2128444787029618e-05, "loss": 0.6421, "step": 3027 }, { "epoch": 0.6978566489974648, "grad_norm": 0.17508132755756378, "learning_rate": 2.209746131319764e-05, "loss": 0.6495, "step": 3028 }, { "epoch": 0.6980871168471998, "grad_norm": 0.1722734570503235, "learning_rate": 2.2066493391570996e-05, "loss": 0.6473, "step": 3029 }, { "epoch": 0.6983175846969347, "grad_norm": 0.16845974326133728, "learning_rate": 2.2035541039410483e-05, "loss": 0.6449, "step": 3030 }, { "epoch": 0.6985480525466697, "grad_norm": 0.18813900649547577, "learning_rate": 2.200460427396819e-05, "loss": 0.647, "step": 3031 }, { "epoch": 0.6987785203964048, "grad_norm": 0.18029405176639557, "learning_rate": 2.1973683112487544e-05, "loss": 0.6461, "step": 3032 }, { "epoch": 0.6990089882461397, "grad_norm": 0.18963518738746643, "learning_rate": 2.194277757220322e-05, "loss": 0.6387, "step": 3033 }, { "epoch": 0.6992394560958747, "grad_norm": 0.17914853990077972, "learning_rate": 2.191188767034125e-05, "loss": 0.642, "step": 3034 }, { "epoch": 0.6994699239456096, "grad_norm": 0.20282970368862152, "learning_rate": 2.1881013424118922e-05, "loss": 0.6574, "step": 3035 }, { "epoch": 0.6997003917953446, "grad_norm": 0.17879828810691833, "learning_rate": 2.18501548507448e-05, "loss": 0.6485, "step": 3036 }, { "epoch": 0.6999308596450795, "grad_norm": 0.18236006796360016, "learning_rate": 2.1819311967418697e-05, "loss": 0.6525, "step": 3037 }, { "epoch": 0.7001613274948145, "grad_norm": 0.18954585492610931, "learning_rate": 2.1788484791331738e-05, "loss": 0.6411, "step": 3038 }, { "epoch": 0.7003917953445494, "grad_norm": 0.19611108303070068, "learning_rate": 2.1757673339666178e-05, "loss": 0.6474, "step": 3039 }, { "epoch": 0.7006222631942844, "grad_norm": 0.1711956113576889, "learning_rate": 2.172687762959565e-05, "loss": 0.6458, "step": 3040 }, { "epoch": 0.7008527310440194, "grad_norm": 0.1677607148885727, "learning_rate": 2.169609767828493e-05, "loss": 0.6419, "step": 3041 }, { "epoch": 0.7010831988937544, "grad_norm": 0.19762775301933289, "learning_rate": 2.1665333502889994e-05, "loss": 0.6508, "step": 3042 }, { "epoch": 0.7013136667434893, "grad_norm": 0.1680401712656021, "learning_rate": 2.1634585120558078e-05, "loss": 0.6456, "step": 3043 }, { "epoch": 0.7015441345932243, "grad_norm": 0.1744004786014557, "learning_rate": 2.1603852548427582e-05, "loss": 0.6496, "step": 3044 }, { "epoch": 0.7017746024429592, "grad_norm": 0.179367795586586, "learning_rate": 2.1573135803628114e-05, "loss": 0.6438, "step": 3045 }, { "epoch": 0.7020050702926942, "grad_norm": 0.16695451736450195, "learning_rate": 2.154243490328044e-05, "loss": 0.6508, "step": 3046 }, { "epoch": 0.7022355381424291, "grad_norm": 0.18172797560691833, "learning_rate": 2.1511749864496534e-05, "loss": 0.6414, "step": 3047 }, { "epoch": 0.7024660059921641, "grad_norm": 0.17104046046733856, "learning_rate": 2.148108070437945e-05, "loss": 0.649, "step": 3048 }, { "epoch": 0.702696473841899, "grad_norm": 0.17437295615673065, "learning_rate": 2.1450427440023456e-05, "loss": 0.6541, "step": 3049 }, { "epoch": 0.702926941691634, "grad_norm": 0.1672581136226654, "learning_rate": 2.1419790088513998e-05, "loss": 0.6469, "step": 3050 }, { "epoch": 0.703157409541369, "grad_norm": 0.17412829399108887, "learning_rate": 2.138916866692754e-05, "loss": 0.6396, "step": 3051 }, { "epoch": 0.703387877391104, "grad_norm": 0.1715768724679947, "learning_rate": 2.1358563192331747e-05, "loss": 0.6532, "step": 3052 }, { "epoch": 0.7036183452408389, "grad_norm": 0.16486719250679016, "learning_rate": 2.1327973681785397e-05, "loss": 0.653, "step": 3053 }, { "epoch": 0.7038488130905739, "grad_norm": 0.1680760681629181, "learning_rate": 2.1297400152338286e-05, "loss": 0.6564, "step": 3054 }, { "epoch": 0.7040792809403088, "grad_norm": 0.16449017822742462, "learning_rate": 2.1266842621031434e-05, "loss": 0.6408, "step": 3055 }, { "epoch": 0.7043097487900438, "grad_norm": 0.1720770299434662, "learning_rate": 2.1236301104896866e-05, "loss": 0.6612, "step": 3056 }, { "epoch": 0.7045402166397787, "grad_norm": 0.15701450407505035, "learning_rate": 2.1205775620957652e-05, "loss": 0.6421, "step": 3057 }, { "epoch": 0.7047706844895137, "grad_norm": 0.17185088992118835, "learning_rate": 2.1175266186227987e-05, "loss": 0.6487, "step": 3058 }, { "epoch": 0.7050011523392486, "grad_norm": 0.18234078586101532, "learning_rate": 2.1144772817713103e-05, "loss": 0.6472, "step": 3059 }, { "epoch": 0.7052316201889837, "grad_norm": 0.16597682237625122, "learning_rate": 2.1114295532409263e-05, "loss": 0.6511, "step": 3060 }, { "epoch": 0.7054620880387186, "grad_norm": 0.17775195837020874, "learning_rate": 2.1083834347303772e-05, "loss": 0.6451, "step": 3061 }, { "epoch": 0.7056925558884536, "grad_norm": 0.1636880785226822, "learning_rate": 2.1053389279374987e-05, "loss": 0.6505, "step": 3062 }, { "epoch": 0.7059230237381885, "grad_norm": 0.16783006489276886, "learning_rate": 2.1022960345592223e-05, "loss": 0.6394, "step": 3063 }, { "epoch": 0.7061534915879235, "grad_norm": 0.17598304152488708, "learning_rate": 2.0992547562915838e-05, "loss": 0.6499, "step": 3064 }, { "epoch": 0.7063839594376584, "grad_norm": 0.16829919815063477, "learning_rate": 2.096215094829723e-05, "loss": 0.6499, "step": 3065 }, { "epoch": 0.7066144272873934, "grad_norm": 0.1717023104429245, "learning_rate": 2.0931770518678707e-05, "loss": 0.6479, "step": 3066 }, { "epoch": 0.7068448951371283, "grad_norm": 0.17776085436344147, "learning_rate": 2.0901406290993598e-05, "loss": 0.6604, "step": 3067 }, { "epoch": 0.7070753629868634, "grad_norm": 0.17955102026462555, "learning_rate": 2.087105828216619e-05, "loss": 0.6479, "step": 3068 }, { "epoch": 0.7073058308365983, "grad_norm": 0.17219555377960205, "learning_rate": 2.0840726509111748e-05, "loss": 0.6486, "step": 3069 }, { "epoch": 0.7075362986863333, "grad_norm": 0.16756534576416016, "learning_rate": 2.081041098873646e-05, "loss": 0.636, "step": 3070 }, { "epoch": 0.7077667665360682, "grad_norm": 0.17784403264522552, "learning_rate": 2.0780111737937497e-05, "loss": 0.6599, "step": 3071 }, { "epoch": 0.7079972343858032, "grad_norm": 0.16979734599590302, "learning_rate": 2.0749828773602898e-05, "loss": 0.6566, "step": 3072 }, { "epoch": 0.7082277022355381, "grad_norm": 0.18243227899074554, "learning_rate": 2.0719562112611675e-05, "loss": 0.6452, "step": 3073 }, { "epoch": 0.7084581700852731, "grad_norm": 0.16543544828891754, "learning_rate": 2.0689311771833737e-05, "loss": 0.6489, "step": 3074 }, { "epoch": 0.708688637935008, "grad_norm": 0.17076393961906433, "learning_rate": 2.0659077768129898e-05, "loss": 0.6472, "step": 3075 }, { "epoch": 0.708919105784743, "grad_norm": 0.17454828321933746, "learning_rate": 2.0628860118351874e-05, "loss": 0.6464, "step": 3076 }, { "epoch": 0.709149573634478, "grad_norm": 0.16081586480140686, "learning_rate": 2.0598658839342266e-05, "loss": 0.6488, "step": 3077 }, { "epoch": 0.709380041484213, "grad_norm": 0.17376914620399475, "learning_rate": 2.0568473947934498e-05, "loss": 0.6416, "step": 3078 }, { "epoch": 0.7096105093339479, "grad_norm": 0.17064812779426575, "learning_rate": 2.0538305460952945e-05, "loss": 0.6464, "step": 3079 }, { "epoch": 0.7098409771836829, "grad_norm": 0.18025130033493042, "learning_rate": 2.050815339521281e-05, "loss": 0.6501, "step": 3080 }, { "epoch": 0.7100714450334178, "grad_norm": 0.16442714631557465, "learning_rate": 2.0478017767520087e-05, "loss": 0.647, "step": 3081 }, { "epoch": 0.7103019128831528, "grad_norm": 0.1766822636127472, "learning_rate": 2.0447898594671667e-05, "loss": 0.6456, "step": 3082 }, { "epoch": 0.7105323807328877, "grad_norm": 0.17419351637363434, "learning_rate": 2.0417795893455265e-05, "loss": 0.6503, "step": 3083 }, { "epoch": 0.7107628485826227, "grad_norm": 0.1673542559146881, "learning_rate": 2.0387709680649397e-05, "loss": 0.6526, "step": 3084 }, { "epoch": 0.7109933164323576, "grad_norm": 0.1788138598203659, "learning_rate": 2.0357639973023396e-05, "loss": 0.6441, "step": 3085 }, { "epoch": 0.7112237842820927, "grad_norm": 0.16873040795326233, "learning_rate": 2.032758678733741e-05, "loss": 0.6467, "step": 3086 }, { "epoch": 0.7114542521318276, "grad_norm": 0.17903602123260498, "learning_rate": 2.0297550140342338e-05, "loss": 0.647, "step": 3087 }, { "epoch": 0.7116847199815626, "grad_norm": 0.15698370337486267, "learning_rate": 2.0267530048779896e-05, "loss": 0.6465, "step": 3088 }, { "epoch": 0.7119151878312975, "grad_norm": 0.15955950319766998, "learning_rate": 2.023752652938256e-05, "loss": 0.6401, "step": 3089 }, { "epoch": 0.7121456556810325, "grad_norm": 0.17143939435482025, "learning_rate": 2.020753959887358e-05, "loss": 0.6558, "step": 3090 }, { "epoch": 0.7123761235307675, "grad_norm": 0.16940918564796448, "learning_rate": 2.0177569273966945e-05, "loss": 0.6542, "step": 3091 }, { "epoch": 0.7126065913805024, "grad_norm": 0.1974901705980301, "learning_rate": 2.01476155713674e-05, "loss": 0.6397, "step": 3092 }, { "epoch": 0.7128370592302374, "grad_norm": 0.16214625537395477, "learning_rate": 2.0117678507770416e-05, "loss": 0.6492, "step": 3093 }, { "epoch": 0.7130675270799723, "grad_norm": 0.19105158746242523, "learning_rate": 2.0087758099862192e-05, "loss": 0.6497, "step": 3094 }, { "epoch": 0.7132979949297074, "grad_norm": 0.17240196466445923, "learning_rate": 2.0057854364319646e-05, "loss": 0.6336, "step": 3095 }, { "epoch": 0.7135284627794423, "grad_norm": 0.17806990444660187, "learning_rate": 2.0027967317810426e-05, "loss": 0.6529, "step": 3096 }, { "epoch": 0.7137589306291773, "grad_norm": 0.1702287495136261, "learning_rate": 1.9998096976992812e-05, "loss": 0.6559, "step": 3097 }, { "epoch": 0.7139893984789122, "grad_norm": 0.17224186658859253, "learning_rate": 1.9968243358515837e-05, "loss": 0.639, "step": 3098 }, { "epoch": 0.7142198663286472, "grad_norm": 0.18311919271945953, "learning_rate": 1.9938406479019183e-05, "loss": 0.6348, "step": 3099 }, { "epoch": 0.7144503341783821, "grad_norm": 0.1758183240890503, "learning_rate": 1.9908586355133223e-05, "loss": 0.6486, "step": 3100 }, { "epoch": 0.7146808020281171, "grad_norm": 0.1789383888244629, "learning_rate": 1.9878783003478975e-05, "loss": 0.649, "step": 3101 }, { "epoch": 0.714911269877852, "grad_norm": 0.18306902050971985, "learning_rate": 1.9848996440668123e-05, "loss": 0.6387, "step": 3102 }, { "epoch": 0.715141737727587, "grad_norm": 0.1843525767326355, "learning_rate": 1.981922668330293e-05, "loss": 0.6538, "step": 3103 }, { "epoch": 0.715372205577322, "grad_norm": 0.16771547496318817, "learning_rate": 1.9789473747976412e-05, "loss": 0.6386, "step": 3104 }, { "epoch": 0.715602673427057, "grad_norm": 0.19470912218093872, "learning_rate": 1.975973765127212e-05, "loss": 0.6467, "step": 3105 }, { "epoch": 0.7158331412767919, "grad_norm": 0.16712939739227295, "learning_rate": 1.9730018409764218e-05, "loss": 0.646, "step": 3106 }, { "epoch": 0.7160636091265269, "grad_norm": 0.18347413837909698, "learning_rate": 1.9700316040017515e-05, "loss": 0.6378, "step": 3107 }, { "epoch": 0.7162940769762618, "grad_norm": 0.18481610715389252, "learning_rate": 1.967063055858739e-05, "loss": 0.6492, "step": 3108 }, { "epoch": 0.7165245448259968, "grad_norm": 0.15738435089588165, "learning_rate": 1.9640961982019825e-05, "loss": 0.6417, "step": 3109 }, { "epoch": 0.7167550126757317, "grad_norm": 0.17788545787334442, "learning_rate": 1.9611310326851373e-05, "loss": 0.6416, "step": 3110 }, { "epoch": 0.7169854805254667, "grad_norm": 0.1677306592464447, "learning_rate": 1.9581675609609173e-05, "loss": 0.6449, "step": 3111 }, { "epoch": 0.7172159483752016, "grad_norm": 0.17086228728294373, "learning_rate": 1.9552057846810866e-05, "loss": 0.6473, "step": 3112 }, { "epoch": 0.7174464162249367, "grad_norm": 0.1743098348379135, "learning_rate": 1.9522457054964683e-05, "loss": 0.6488, "step": 3113 }, { "epoch": 0.7176768840746716, "grad_norm": 0.1679782271385193, "learning_rate": 1.949287325056945e-05, "loss": 0.6525, "step": 3114 }, { "epoch": 0.7179073519244066, "grad_norm": 0.15719559788703918, "learning_rate": 1.9463306450114416e-05, "loss": 0.6499, "step": 3115 }, { "epoch": 0.7181378197741415, "grad_norm": 0.16834014654159546, "learning_rate": 1.9433756670079423e-05, "loss": 0.6547, "step": 3116 }, { "epoch": 0.7183682876238765, "grad_norm": 0.17471790313720703, "learning_rate": 1.9404223926934828e-05, "loss": 0.6499, "step": 3117 }, { "epoch": 0.7185987554736114, "grad_norm": 0.16939441859722137, "learning_rate": 1.9374708237141413e-05, "loss": 0.6544, "step": 3118 }, { "epoch": 0.7188292233233464, "grad_norm": 0.17127317190170288, "learning_rate": 1.9345209617150577e-05, "loss": 0.6403, "step": 3119 }, { "epoch": 0.7190596911730813, "grad_norm": 0.16861754655838013, "learning_rate": 1.9315728083404145e-05, "loss": 0.6465, "step": 3120 }, { "epoch": 0.7192901590228163, "grad_norm": 0.17622019350528717, "learning_rate": 1.9286263652334368e-05, "loss": 0.6353, "step": 3121 }, { "epoch": 0.7195206268725512, "grad_norm": 0.16617770493030548, "learning_rate": 1.925681634036404e-05, "loss": 0.6502, "step": 3122 }, { "epoch": 0.7197510947222863, "grad_norm": 0.16550083458423615, "learning_rate": 1.922738616390639e-05, "loss": 0.6483, "step": 3123 }, { "epoch": 0.7199815625720212, "grad_norm": 0.15915584564208984, "learning_rate": 1.9197973139365083e-05, "loss": 0.6441, "step": 3124 }, { "epoch": 0.7202120304217562, "grad_norm": 0.16623058915138245, "learning_rate": 1.9168577283134232e-05, "loss": 0.6493, "step": 3125 }, { "epoch": 0.7204424982714911, "grad_norm": 0.16962306201457977, "learning_rate": 1.9139198611598404e-05, "loss": 0.6456, "step": 3126 }, { "epoch": 0.7206729661212261, "grad_norm": 0.1750573068857193, "learning_rate": 1.910983714113253e-05, "loss": 0.644, "step": 3127 }, { "epoch": 0.720903433970961, "grad_norm": 0.17163607478141785, "learning_rate": 1.9080492888101993e-05, "loss": 0.641, "step": 3128 }, { "epoch": 0.721133901820696, "grad_norm": 0.6170444488525391, "learning_rate": 1.9051165868862615e-05, "loss": 0.6669, "step": 3129 }, { "epoch": 0.7213643696704309, "grad_norm": 0.17929589748382568, "learning_rate": 1.9021856099760533e-05, "loss": 0.6479, "step": 3130 }, { "epoch": 0.721594837520166, "grad_norm": 0.16149266064167023, "learning_rate": 1.8992563597132323e-05, "loss": 0.6498, "step": 3131 }, { "epoch": 0.7218253053699009, "grad_norm": 0.16591522097587585, "learning_rate": 1.8963288377304916e-05, "loss": 0.6493, "step": 3132 }, { "epoch": 0.7220557732196359, "grad_norm": 0.1631591022014618, "learning_rate": 1.8934030456595625e-05, "loss": 0.6517, "step": 3133 }, { "epoch": 0.7222862410693708, "grad_norm": 0.1634044647216797, "learning_rate": 1.890478985131211e-05, "loss": 0.6413, "step": 3134 }, { "epoch": 0.7225167089191058, "grad_norm": 0.1616399884223938, "learning_rate": 1.88755665777524e-05, "loss": 0.6432, "step": 3135 }, { "epoch": 0.7227471767688407, "grad_norm": 0.16510508954524994, "learning_rate": 1.8846360652204816e-05, "loss": 0.6428, "step": 3136 }, { "epoch": 0.7229776446185757, "grad_norm": 0.15748094022274017, "learning_rate": 1.881717209094805e-05, "loss": 0.6463, "step": 3137 }, { "epoch": 0.7232081124683106, "grad_norm": 0.17742668092250824, "learning_rate": 1.8788000910251103e-05, "loss": 0.6451, "step": 3138 }, { "epoch": 0.7234385803180456, "grad_norm": 0.16781218349933624, "learning_rate": 1.8758847126373303e-05, "loss": 0.6499, "step": 3139 }, { "epoch": 0.7236690481677805, "grad_norm": 0.1616116166114807, "learning_rate": 1.8729710755564257e-05, "loss": 0.6435, "step": 3140 }, { "epoch": 0.7238995160175156, "grad_norm": 0.1795063465833664, "learning_rate": 1.8700591814063905e-05, "loss": 0.6404, "step": 3141 }, { "epoch": 0.7241299838672505, "grad_norm": 0.16432838141918182, "learning_rate": 1.867149031810241e-05, "loss": 0.641, "step": 3142 }, { "epoch": 0.7243604517169855, "grad_norm": 0.1743660271167755, "learning_rate": 1.864240628390024e-05, "loss": 0.6405, "step": 3143 }, { "epoch": 0.7245909195667204, "grad_norm": 0.17047765851020813, "learning_rate": 1.8613339727668194e-05, "loss": 0.6471, "step": 3144 }, { "epoch": 0.7248213874164554, "grad_norm": 0.15893389284610748, "learning_rate": 1.8584290665607228e-05, "loss": 0.6404, "step": 3145 }, { "epoch": 0.7250518552661903, "grad_norm": 0.20478779077529907, "learning_rate": 1.8555259113908597e-05, "loss": 0.6507, "step": 3146 }, { "epoch": 0.7252823231159253, "grad_norm": 0.17662855982780457, "learning_rate": 1.85262450887538e-05, "loss": 0.6537, "step": 3147 }, { "epoch": 0.7255127909656602, "grad_norm": 0.17941661179065704, "learning_rate": 1.849724860631456e-05, "loss": 0.6383, "step": 3148 }, { "epoch": 0.7257432588153953, "grad_norm": 0.15811479091644287, "learning_rate": 1.846826968275281e-05, "loss": 0.6488, "step": 3149 }, { "epoch": 0.7259737266651302, "grad_norm": 0.1792788952589035, "learning_rate": 1.843930833422073e-05, "loss": 0.6522, "step": 3150 }, { "epoch": 0.7262041945148652, "grad_norm": 0.16518691182136536, "learning_rate": 1.8410364576860646e-05, "loss": 0.6368, "step": 3151 }, { "epoch": 0.7264346623646002, "grad_norm": 0.16625241935253143, "learning_rate": 1.838143842680513e-05, "loss": 0.6339, "step": 3152 }, { "epoch": 0.7266651302143351, "grad_norm": 0.17410995066165924, "learning_rate": 1.8352529900176923e-05, "loss": 0.6421, "step": 3153 }, { "epoch": 0.7268955980640701, "grad_norm": 0.16226623952388763, "learning_rate": 1.832363901308895e-05, "loss": 0.6515, "step": 3154 }, { "epoch": 0.727126065913805, "grad_norm": 0.16737419366836548, "learning_rate": 1.8294765781644285e-05, "loss": 0.639, "step": 3155 }, { "epoch": 0.72735653376354, "grad_norm": 0.1635529100894928, "learning_rate": 1.8265910221936206e-05, "loss": 0.6342, "step": 3156 }, { "epoch": 0.7275870016132749, "grad_norm": 0.17363202571868896, "learning_rate": 1.823707235004805e-05, "loss": 0.6494, "step": 3157 }, { "epoch": 0.72781746946301, "grad_norm": 0.1653600037097931, "learning_rate": 1.8208252182053403e-05, "loss": 0.6471, "step": 3158 }, { "epoch": 0.7280479373127449, "grad_norm": 0.1543000489473343, "learning_rate": 1.8179449734015948e-05, "loss": 0.6399, "step": 3159 }, { "epoch": 0.7282784051624799, "grad_norm": 0.1642274111509323, "learning_rate": 1.8150665021989426e-05, "loss": 0.6496, "step": 3160 }, { "epoch": 0.7285088730122148, "grad_norm": 0.16651754081249237, "learning_rate": 1.812189806201778e-05, "loss": 0.6451, "step": 3161 }, { "epoch": 0.7287393408619498, "grad_norm": 0.1647575944662094, "learning_rate": 1.8093148870135e-05, "loss": 0.6464, "step": 3162 }, { "epoch": 0.7289698087116847, "grad_norm": 0.16285358369350433, "learning_rate": 1.8064417462365226e-05, "loss": 0.6495, "step": 3163 }, { "epoch": 0.7292002765614197, "grad_norm": 0.18453556299209595, "learning_rate": 1.8035703854722623e-05, "loss": 0.6457, "step": 3164 }, { "epoch": 0.7294307444111546, "grad_norm": 0.16734904050827026, "learning_rate": 1.800700806321151e-05, "loss": 0.6464, "step": 3165 }, { "epoch": 0.7296612122608896, "grad_norm": 0.167644202709198, "learning_rate": 1.7978330103826184e-05, "loss": 0.6494, "step": 3166 }, { "epoch": 0.7298916801106246, "grad_norm": 0.17859847843647003, "learning_rate": 1.7949669992551053e-05, "loss": 0.6481, "step": 3167 }, { "epoch": 0.7301221479603596, "grad_norm": 0.16591614484786987, "learning_rate": 1.792102774536063e-05, "loss": 0.6398, "step": 3168 }, { "epoch": 0.7303526158100945, "grad_norm": 0.15973828732967377, "learning_rate": 1.7892403378219364e-05, "loss": 0.6475, "step": 3169 }, { "epoch": 0.7305830836598295, "grad_norm": 0.16776901483535767, "learning_rate": 1.786379690708181e-05, "loss": 0.6442, "step": 3170 }, { "epoch": 0.7308135515095644, "grad_norm": 0.15858832001686096, "learning_rate": 1.7835208347892535e-05, "loss": 0.6528, "step": 3171 }, { "epoch": 0.7310440193592994, "grad_norm": 0.16048070788383484, "learning_rate": 1.7806637716586073e-05, "loss": 0.6393, "step": 3172 }, { "epoch": 0.7312744872090343, "grad_norm": 0.1663476973772049, "learning_rate": 1.777808502908706e-05, "loss": 0.6438, "step": 3173 }, { "epoch": 0.7315049550587693, "grad_norm": 0.1595594882965088, "learning_rate": 1.7749550301310074e-05, "loss": 0.6347, "step": 3174 }, { "epoch": 0.7317354229085042, "grad_norm": 0.16028767824172974, "learning_rate": 1.7721033549159655e-05, "loss": 0.6495, "step": 3175 }, { "epoch": 0.7319658907582393, "grad_norm": 0.16454143822193146, "learning_rate": 1.7692534788530374e-05, "loss": 0.653, "step": 3176 }, { "epoch": 0.7321963586079742, "grad_norm": 0.16430748999118805, "learning_rate": 1.7664054035306756e-05, "loss": 0.6383, "step": 3177 }, { "epoch": 0.7324268264577092, "grad_norm": 0.16365505754947662, "learning_rate": 1.7635591305363292e-05, "loss": 0.6416, "step": 3178 }, { "epoch": 0.7326572943074441, "grad_norm": 0.1731487661600113, "learning_rate": 1.7607146614564418e-05, "loss": 0.6551, "step": 3179 }, { "epoch": 0.7328877621571791, "grad_norm": 0.1614767163991928, "learning_rate": 1.7578719978764545e-05, "loss": 0.6446, "step": 3180 }, { "epoch": 0.733118230006914, "grad_norm": 0.16245704889297485, "learning_rate": 1.755031141380796e-05, "loss": 0.6451, "step": 3181 }, { "epoch": 0.733348697856649, "grad_norm": 0.15642160177230835, "learning_rate": 1.7521920935528917e-05, "loss": 0.6422, "step": 3182 }, { "epoch": 0.7335791657063839, "grad_norm": 0.162491574883461, "learning_rate": 1.749354855975164e-05, "loss": 0.6495, "step": 3183 }, { "epoch": 0.733809633556119, "grad_norm": 0.15852685272693634, "learning_rate": 1.746519430229015e-05, "loss": 0.6452, "step": 3184 }, { "epoch": 0.7340401014058539, "grad_norm": 0.16387516260147095, "learning_rate": 1.7436858178948457e-05, "loss": 0.6464, "step": 3185 }, { "epoch": 0.7342705692555889, "grad_norm": 0.15998558700084686, "learning_rate": 1.7408540205520436e-05, "loss": 0.648, "step": 3186 }, { "epoch": 0.7345010371053238, "grad_norm": 0.17251843214035034, "learning_rate": 1.7380240397789836e-05, "loss": 0.6492, "step": 3187 }, { "epoch": 0.7347315049550588, "grad_norm": 0.1601380854845047, "learning_rate": 1.7351958771530298e-05, "loss": 0.6511, "step": 3188 }, { "epoch": 0.7349619728047937, "grad_norm": 0.15078677237033844, "learning_rate": 1.7323695342505342e-05, "loss": 0.6492, "step": 3189 }, { "epoch": 0.7351924406545287, "grad_norm": 0.16088485717773438, "learning_rate": 1.729545012646828e-05, "loss": 0.6434, "step": 3190 }, { "epoch": 0.7354229085042636, "grad_norm": 0.17706431448459625, "learning_rate": 1.7267223139162342e-05, "loss": 0.6491, "step": 3191 }, { "epoch": 0.7356533763539986, "grad_norm": 0.15498369932174683, "learning_rate": 1.7239014396320574e-05, "loss": 0.646, "step": 3192 }, { "epoch": 0.7358838442037335, "grad_norm": 0.1685081273317337, "learning_rate": 1.7210823913665852e-05, "loss": 0.6454, "step": 3193 }, { "epoch": 0.7361143120534686, "grad_norm": 0.16466312110424042, "learning_rate": 1.718265170691087e-05, "loss": 0.6512, "step": 3194 }, { "epoch": 0.7363447799032035, "grad_norm": 0.1716887503862381, "learning_rate": 1.7154497791758157e-05, "loss": 0.6552, "step": 3195 }, { "epoch": 0.7365752477529385, "grad_norm": 0.17006585001945496, "learning_rate": 1.7126362183899986e-05, "loss": 0.6402, "step": 3196 }, { "epoch": 0.7368057156026734, "grad_norm": 0.15960989892482758, "learning_rate": 1.7098244899018512e-05, "loss": 0.6478, "step": 3197 }, { "epoch": 0.7370361834524084, "grad_norm": 0.16518181562423706, "learning_rate": 1.707014595278564e-05, "loss": 0.6585, "step": 3198 }, { "epoch": 0.7372666513021433, "grad_norm": 0.15796171128749847, "learning_rate": 1.7042065360863007e-05, "loss": 0.6493, "step": 3199 }, { "epoch": 0.7374971191518783, "grad_norm": 0.1603865772485733, "learning_rate": 1.7014003138902092e-05, "loss": 0.6361, "step": 3200 }, { "epoch": 0.7377275870016132, "grad_norm": 0.1591259092092514, "learning_rate": 1.698595930254409e-05, "loss": 0.6409, "step": 3201 }, { "epoch": 0.7379580548513482, "grad_norm": 0.16630783677101135, "learning_rate": 1.6957933867419966e-05, "loss": 0.6435, "step": 3202 }, { "epoch": 0.7381885227010831, "grad_norm": 0.17076456546783447, "learning_rate": 1.6929926849150428e-05, "loss": 0.6506, "step": 3203 }, { "epoch": 0.7384189905508182, "grad_norm": 0.16689413785934448, "learning_rate": 1.6901938263345934e-05, "loss": 0.6521, "step": 3204 }, { "epoch": 0.7386494584005531, "grad_norm": 0.16302435100078583, "learning_rate": 1.687396812560661e-05, "loss": 0.6446, "step": 3205 }, { "epoch": 0.7388799262502881, "grad_norm": 0.1644594967365265, "learning_rate": 1.6846016451522362e-05, "loss": 0.6389, "step": 3206 }, { "epoch": 0.739110394100023, "grad_norm": 0.16946260631084442, "learning_rate": 1.681808325667278e-05, "loss": 0.6493, "step": 3207 }, { "epoch": 0.739340861949758, "grad_norm": 0.16244158148765564, "learning_rate": 1.6790168556627156e-05, "loss": 0.6507, "step": 3208 }, { "epoch": 0.7395713297994929, "grad_norm": 0.18265312910079956, "learning_rate": 1.6762272366944472e-05, "loss": 0.6404, "step": 3209 }, { "epoch": 0.7398017976492279, "grad_norm": 0.16505154967308044, "learning_rate": 1.673439470317341e-05, "loss": 0.6464, "step": 3210 }, { "epoch": 0.740032265498963, "grad_norm": 0.1631191074848175, "learning_rate": 1.6706535580852267e-05, "loss": 0.6385, "step": 3211 }, { "epoch": 0.7402627333486979, "grad_norm": 0.17808805406093597, "learning_rate": 1.66786950155091e-05, "loss": 0.6458, "step": 3212 }, { "epoch": 0.7404932011984329, "grad_norm": 0.16241349279880524, "learning_rate": 1.6650873022661563e-05, "loss": 0.6487, "step": 3213 }, { "epoch": 0.7407236690481678, "grad_norm": 0.1839081048965454, "learning_rate": 1.662306961781694e-05, "loss": 0.6478, "step": 3214 }, { "epoch": 0.7409541368979028, "grad_norm": 0.16764585673809052, "learning_rate": 1.6595284816472195e-05, "loss": 0.6424, "step": 3215 }, { "epoch": 0.7411846047476377, "grad_norm": 0.15650008618831635, "learning_rate": 1.6567518634113916e-05, "loss": 0.6455, "step": 3216 }, { "epoch": 0.7414150725973727, "grad_norm": 0.17013835906982422, "learning_rate": 1.65397710862183e-05, "loss": 0.6422, "step": 3217 }, { "epoch": 0.7416455404471076, "grad_norm": 0.15841804444789886, "learning_rate": 1.6512042188251164e-05, "loss": 0.6402, "step": 3218 }, { "epoch": 0.7418760082968426, "grad_norm": 0.1625175029039383, "learning_rate": 1.6484331955667947e-05, "loss": 0.6491, "step": 3219 }, { "epoch": 0.7421064761465775, "grad_norm": 0.17238567769527435, "learning_rate": 1.6456640403913638e-05, "loss": 0.6331, "step": 3220 }, { "epoch": 0.7423369439963126, "grad_norm": 0.15374347567558289, "learning_rate": 1.642896754842284e-05, "loss": 0.656, "step": 3221 }, { "epoch": 0.7425674118460475, "grad_norm": 0.17570993304252625, "learning_rate": 1.640131340461978e-05, "loss": 0.641, "step": 3222 }, { "epoch": 0.7427978796957825, "grad_norm": 0.15289098024368286, "learning_rate": 1.6373677987918195e-05, "loss": 0.6542, "step": 3223 }, { "epoch": 0.7430283475455174, "grad_norm": 0.16534923017024994, "learning_rate": 1.634606131372139e-05, "loss": 0.6461, "step": 3224 }, { "epoch": 0.7432588153952524, "grad_norm": 0.1682467758655548, "learning_rate": 1.631846339742224e-05, "loss": 0.65, "step": 3225 }, { "epoch": 0.7434892832449873, "grad_norm": 0.15141364932060242, "learning_rate": 1.629088425440317e-05, "loss": 0.6427, "step": 3226 }, { "epoch": 0.7437197510947223, "grad_norm": 0.16520178318023682, "learning_rate": 1.6263323900036126e-05, "loss": 0.6427, "step": 3227 }, { "epoch": 0.7439502189444572, "grad_norm": 0.1559877246618271, "learning_rate": 1.6235782349682592e-05, "loss": 0.6452, "step": 3228 }, { "epoch": 0.7441806867941922, "grad_norm": 0.15489576756954193, "learning_rate": 1.6208259618693583e-05, "loss": 0.6445, "step": 3229 }, { "epoch": 0.7444111546439272, "grad_norm": 0.1593122035264969, "learning_rate": 1.618075572240957e-05, "loss": 0.6506, "step": 3230 }, { "epoch": 0.7446416224936622, "grad_norm": 0.15990042686462402, "learning_rate": 1.615327067616057e-05, "loss": 0.6488, "step": 3231 }, { "epoch": 0.7448720903433971, "grad_norm": 0.15468232333660126, "learning_rate": 1.612580449526614e-05, "loss": 0.6389, "step": 3232 }, { "epoch": 0.7451025581931321, "grad_norm": 0.15509067475795746, "learning_rate": 1.6098357195035212e-05, "loss": 0.6439, "step": 3233 }, { "epoch": 0.745333026042867, "grad_norm": 0.17395319044589996, "learning_rate": 1.6070928790766275e-05, "loss": 0.638, "step": 3234 }, { "epoch": 0.745563493892602, "grad_norm": 0.151578888297081, "learning_rate": 1.6043519297747285e-05, "loss": 0.6367, "step": 3235 }, { "epoch": 0.7457939617423369, "grad_norm": 0.15744104981422424, "learning_rate": 1.6016128731255575e-05, "loss": 0.6429, "step": 3236 }, { "epoch": 0.7460244295920719, "grad_norm": 0.17480209469795227, "learning_rate": 1.5988757106558043e-05, "loss": 0.6509, "step": 3237 }, { "epoch": 0.7462548974418068, "grad_norm": 0.16368581354618073, "learning_rate": 1.5961404438910976e-05, "loss": 0.6411, "step": 3238 }, { "epoch": 0.7464853652915419, "grad_norm": 0.15094834566116333, "learning_rate": 1.5934070743560065e-05, "loss": 0.64, "step": 3239 }, { "epoch": 0.7467158331412768, "grad_norm": 0.16196765005588531, "learning_rate": 1.590675603574046e-05, "loss": 0.6455, "step": 3240 }, { "epoch": 0.7469463009910118, "grad_norm": 0.17574729025363922, "learning_rate": 1.5879460330676743e-05, "loss": 0.6494, "step": 3241 }, { "epoch": 0.7471767688407467, "grad_norm": 0.1642741709947586, "learning_rate": 1.5852183643582868e-05, "loss": 0.6417, "step": 3242 }, { "epoch": 0.7474072366904817, "grad_norm": 0.1686857044696808, "learning_rate": 1.5824925989662216e-05, "loss": 0.6415, "step": 3243 }, { "epoch": 0.7476377045402166, "grad_norm": 0.17271287739276886, "learning_rate": 1.5797687384107558e-05, "loss": 0.6514, "step": 3244 }, { "epoch": 0.7478681723899516, "grad_norm": 0.16826404631137848, "learning_rate": 1.577046784210101e-05, "loss": 0.6418, "step": 3245 }, { "epoch": 0.7480986402396865, "grad_norm": 0.5926986932754517, "learning_rate": 1.574326737881409e-05, "loss": 0.6596, "step": 3246 }, { "epoch": 0.7483291080894215, "grad_norm": 0.16710159182548523, "learning_rate": 1.571608600940774e-05, "loss": 0.6324, "step": 3247 }, { "epoch": 0.7485595759391565, "grad_norm": 0.16913191974163055, "learning_rate": 1.568892374903214e-05, "loss": 0.6437, "step": 3248 }, { "epoch": 0.7487900437888915, "grad_norm": 0.21093370020389557, "learning_rate": 1.566178061282691e-05, "loss": 0.6346, "step": 3249 }, { "epoch": 0.7490205116386264, "grad_norm": 0.15978707373142242, "learning_rate": 1.5634656615920974e-05, "loss": 0.6546, "step": 3250 }, { "epoch": 0.7492509794883614, "grad_norm": 0.17602089047431946, "learning_rate": 1.56075517734326e-05, "loss": 0.6429, "step": 3251 }, { "epoch": 0.7494814473380963, "grad_norm": 0.16324278712272644, "learning_rate": 1.558046610046938e-05, "loss": 0.6439, "step": 3252 }, { "epoch": 0.7497119151878313, "grad_norm": 0.1663910150527954, "learning_rate": 1.5553399612128234e-05, "loss": 0.6506, "step": 3253 }, { "epoch": 0.7499423830375662, "grad_norm": 0.17245832085609436, "learning_rate": 1.5526352323495336e-05, "loss": 0.6397, "step": 3254 }, { "epoch": 0.7501728508873012, "grad_norm": 0.1658082902431488, "learning_rate": 1.549932424964622e-05, "loss": 0.6503, "step": 3255 }, { "epoch": 0.7504033187370361, "grad_norm": 0.16782449185848236, "learning_rate": 1.547231540564567e-05, "loss": 0.6483, "step": 3256 }, { "epoch": 0.7506337865867712, "grad_norm": 0.16004249453544617, "learning_rate": 1.5445325806547782e-05, "loss": 0.6485, "step": 3257 }, { "epoch": 0.7508642544365061, "grad_norm": 0.1629590541124344, "learning_rate": 1.5418355467395906e-05, "loss": 0.6478, "step": 3258 }, { "epoch": 0.7510947222862411, "grad_norm": 0.1687597781419754, "learning_rate": 1.5391404403222676e-05, "loss": 0.6482, "step": 3259 }, { "epoch": 0.751325190135976, "grad_norm": 0.16578345000743866, "learning_rate": 1.536447262904994e-05, "loss": 0.648, "step": 3260 }, { "epoch": 0.751555657985711, "grad_norm": 0.15404678881168365, "learning_rate": 1.533756015988882e-05, "loss": 0.6436, "step": 3261 }, { "epoch": 0.7517861258354459, "grad_norm": 0.18324708938598633, "learning_rate": 1.5310667010739726e-05, "loss": 0.6466, "step": 3262 }, { "epoch": 0.7520165936851809, "grad_norm": 0.16745442152023315, "learning_rate": 1.5283793196592212e-05, "loss": 0.6459, "step": 3263 }, { "epoch": 0.7522470615349158, "grad_norm": 0.16149534285068512, "learning_rate": 1.5256938732425107e-05, "loss": 0.6426, "step": 3264 }, { "epoch": 0.7524775293846508, "grad_norm": 0.14993827044963837, "learning_rate": 1.5230103633206449e-05, "loss": 0.6334, "step": 3265 }, { "epoch": 0.7527079972343858, "grad_norm": 0.15830254554748535, "learning_rate": 1.5203287913893478e-05, "loss": 0.6382, "step": 3266 }, { "epoch": 0.7529384650841208, "grad_norm": 0.15330778062343597, "learning_rate": 1.5176491589432628e-05, "loss": 0.6551, "step": 3267 }, { "epoch": 0.7531689329338557, "grad_norm": 0.15092666447162628, "learning_rate": 1.5149714674759546e-05, "loss": 0.6435, "step": 3268 }, { "epoch": 0.7533994007835907, "grad_norm": 0.16142503917217255, "learning_rate": 1.5122957184799007e-05, "loss": 0.6423, "step": 3269 }, { "epoch": 0.7536298686333257, "grad_norm": 0.15477819740772247, "learning_rate": 1.5096219134465017e-05, "loss": 0.6452, "step": 3270 }, { "epoch": 0.7538603364830606, "grad_norm": 0.16160400211811066, "learning_rate": 1.5069500538660713e-05, "loss": 0.6451, "step": 3271 }, { "epoch": 0.7540908043327956, "grad_norm": 0.17091768980026245, "learning_rate": 1.5042801412278412e-05, "loss": 0.6416, "step": 3272 }, { "epoch": 0.7543212721825305, "grad_norm": 0.1987750232219696, "learning_rate": 1.5016121770199553e-05, "loss": 0.64, "step": 3273 }, { "epoch": 0.7545517400322655, "grad_norm": 0.16774295270442963, "learning_rate": 1.4989461627294755e-05, "loss": 0.639, "step": 3274 }, { "epoch": 0.7547822078820005, "grad_norm": 0.1618596464395523, "learning_rate": 1.4962820998423683e-05, "loss": 0.6448, "step": 3275 }, { "epoch": 0.7550126757317355, "grad_norm": 0.1566198170185089, "learning_rate": 1.4936199898435238e-05, "loss": 0.642, "step": 3276 }, { "epoch": 0.7552431435814704, "grad_norm": 0.17685432732105255, "learning_rate": 1.4909598342167385e-05, "loss": 0.6434, "step": 3277 }, { "epoch": 0.7554736114312054, "grad_norm": 0.15080119669437408, "learning_rate": 1.488301634444716e-05, "loss": 0.6513, "step": 3278 }, { "epoch": 0.7557040792809403, "grad_norm": 0.15888544917106628, "learning_rate": 1.485645392009074e-05, "loss": 0.6462, "step": 3279 }, { "epoch": 0.7559345471306753, "grad_norm": 0.16779987514019012, "learning_rate": 1.4829911083903386e-05, "loss": 0.6443, "step": 3280 }, { "epoch": 0.7561650149804102, "grad_norm": 0.15703189373016357, "learning_rate": 1.4803387850679445e-05, "loss": 0.6497, "step": 3281 }, { "epoch": 0.7563954828301452, "grad_norm": 0.1584496796131134, "learning_rate": 1.477688423520232e-05, "loss": 0.6466, "step": 3282 }, { "epoch": 0.7566259506798801, "grad_norm": 0.16846555471420288, "learning_rate": 1.4750400252244511e-05, "loss": 0.6452, "step": 3283 }, { "epoch": 0.7568564185296152, "grad_norm": 0.1615644246339798, "learning_rate": 1.4723935916567522e-05, "loss": 0.6491, "step": 3284 }, { "epoch": 0.7570868863793501, "grad_norm": 0.15736454725265503, "learning_rate": 1.469749124292194e-05, "loss": 0.6436, "step": 3285 }, { "epoch": 0.7573173542290851, "grad_norm": 0.15067139267921448, "learning_rate": 1.4671066246047438e-05, "loss": 0.6353, "step": 3286 }, { "epoch": 0.75754782207882, "grad_norm": 0.16569077968597412, "learning_rate": 1.4644660940672627e-05, "loss": 0.6491, "step": 3287 }, { "epoch": 0.757778289928555, "grad_norm": 0.15051403641700745, "learning_rate": 1.461827534151521e-05, "loss": 0.6394, "step": 3288 }, { "epoch": 0.7580087577782899, "grad_norm": 0.15739889442920685, "learning_rate": 1.4591909463281894e-05, "loss": 0.6364, "step": 3289 }, { "epoch": 0.7582392256280249, "grad_norm": 0.16223600506782532, "learning_rate": 1.4565563320668346e-05, "loss": 0.6472, "step": 3290 }, { "epoch": 0.7584696934777598, "grad_norm": 0.159046471118927, "learning_rate": 1.4539236928359318e-05, "loss": 0.654, "step": 3291 }, { "epoch": 0.7587001613274948, "grad_norm": 0.16086021065711975, "learning_rate": 1.451293030102851e-05, "loss": 0.6456, "step": 3292 }, { "epoch": 0.7589306291772298, "grad_norm": 0.15306276082992554, "learning_rate": 1.4486643453338571e-05, "loss": 0.656, "step": 3293 }, { "epoch": 0.7591610970269648, "grad_norm": 0.16488851606845856, "learning_rate": 1.4460376399941184e-05, "loss": 0.6519, "step": 3294 }, { "epoch": 0.7593915648766997, "grad_norm": 0.16087326407432556, "learning_rate": 1.443412915547696e-05, "loss": 0.6482, "step": 3295 }, { "epoch": 0.7596220327264347, "grad_norm": 0.15743768215179443, "learning_rate": 1.4407901734575496e-05, "loss": 0.6472, "step": 3296 }, { "epoch": 0.7598525005761696, "grad_norm": 0.16286884248256683, "learning_rate": 1.4381694151855318e-05, "loss": 0.6464, "step": 3297 }, { "epoch": 0.7600829684259046, "grad_norm": 0.15299223363399506, "learning_rate": 1.4355506421923926e-05, "loss": 0.6535, "step": 3298 }, { "epoch": 0.7603134362756395, "grad_norm": 0.16617640852928162, "learning_rate": 1.4329338559377691e-05, "loss": 0.6402, "step": 3299 }, { "epoch": 0.7605439041253745, "grad_norm": 0.17199209332466125, "learning_rate": 1.4303190578801967e-05, "loss": 0.6406, "step": 3300 }, { "epoch": 0.7607743719751094, "grad_norm": 0.1543896347284317, "learning_rate": 1.4277062494771044e-05, "loss": 0.646, "step": 3301 }, { "epoch": 0.7610048398248445, "grad_norm": 0.16141347587108612, "learning_rate": 1.4250954321848043e-05, "loss": 0.6484, "step": 3302 }, { "epoch": 0.7612353076745794, "grad_norm": 0.17056810855865479, "learning_rate": 1.4224866074585053e-05, "loss": 0.641, "step": 3303 }, { "epoch": 0.7614657755243144, "grad_norm": 0.17288394272327423, "learning_rate": 1.4198797767523037e-05, "loss": 0.6421, "step": 3304 }, { "epoch": 0.7616962433740493, "grad_norm": 0.16343657672405243, "learning_rate": 1.4172749415191844e-05, "loss": 0.6313, "step": 3305 }, { "epoch": 0.7619267112237843, "grad_norm": 0.16413342952728271, "learning_rate": 1.41467210321102e-05, "loss": 0.6415, "step": 3306 }, { "epoch": 0.7621571790735192, "grad_norm": 0.17422489821910858, "learning_rate": 1.412071263278571e-05, "loss": 0.6417, "step": 3307 }, { "epoch": 0.7623876469232542, "grad_norm": 0.16758742928504944, "learning_rate": 1.4094724231714812e-05, "loss": 0.6429, "step": 3308 }, { "epoch": 0.7626181147729891, "grad_norm": 0.16555525362491608, "learning_rate": 1.406875584338282e-05, "loss": 0.641, "step": 3309 }, { "epoch": 0.7628485826227241, "grad_norm": 0.168875589966774, "learning_rate": 1.4042807482263904e-05, "loss": 0.6468, "step": 3310 }, { "epoch": 0.763079050472459, "grad_norm": 0.16183854639530182, "learning_rate": 1.4016879162821044e-05, "loss": 0.644, "step": 3311 }, { "epoch": 0.7633095183221941, "grad_norm": 0.15983159840106964, "learning_rate": 1.3990970899506072e-05, "loss": 0.6481, "step": 3312 }, { "epoch": 0.763539986171929, "grad_norm": 0.1633015125989914, "learning_rate": 1.3965082706759646e-05, "loss": 0.642, "step": 3313 }, { "epoch": 0.763770454021664, "grad_norm": 0.15642528235912323, "learning_rate": 1.3939214599011174e-05, "loss": 0.6431, "step": 3314 }, { "epoch": 0.7640009218713989, "grad_norm": 0.15615011751651764, "learning_rate": 1.3913366590678966e-05, "loss": 0.6502, "step": 3315 }, { "epoch": 0.7642313897211339, "grad_norm": 0.15808658301830292, "learning_rate": 1.3887538696170089e-05, "loss": 0.6522, "step": 3316 }, { "epoch": 0.7644618575708688, "grad_norm": 0.15133537352085114, "learning_rate": 1.3861730929880346e-05, "loss": 0.6448, "step": 3317 }, { "epoch": 0.7646923254206038, "grad_norm": 0.152401402592659, "learning_rate": 1.3835943306194393e-05, "loss": 0.6524, "step": 3318 }, { "epoch": 0.7649227932703387, "grad_norm": 0.15733560919761658, "learning_rate": 1.3810175839485628e-05, "loss": 0.6479, "step": 3319 }, { "epoch": 0.7651532611200738, "grad_norm": 0.16682898998260498, "learning_rate": 1.3784428544116218e-05, "loss": 0.6391, "step": 3320 }, { "epoch": 0.7653837289698087, "grad_norm": 0.14624935388565063, "learning_rate": 1.3758701434437088e-05, "loss": 0.6499, "step": 3321 }, { "epoch": 0.7656141968195437, "grad_norm": 0.1629732847213745, "learning_rate": 1.3732994524787935e-05, "loss": 0.6372, "step": 3322 }, { "epoch": 0.7658446646692786, "grad_norm": 0.1721165031194687, "learning_rate": 1.370730782949713e-05, "loss": 0.6376, "step": 3323 }, { "epoch": 0.7660751325190136, "grad_norm": 0.1540745496749878, "learning_rate": 1.3681641362881842e-05, "loss": 0.6442, "step": 3324 }, { "epoch": 0.7663056003687485, "grad_norm": 0.1577741801738739, "learning_rate": 1.365599513924794e-05, "loss": 0.6475, "step": 3325 }, { "epoch": 0.7665360682184835, "grad_norm": 0.15906648337841034, "learning_rate": 1.3630369172890017e-05, "loss": 0.6426, "step": 3326 }, { "epoch": 0.7667665360682184, "grad_norm": 0.14891204237937927, "learning_rate": 1.3604763478091375e-05, "loss": 0.6394, "step": 3327 }, { "epoch": 0.7669970039179534, "grad_norm": 0.1566314697265625, "learning_rate": 1.3579178069124021e-05, "loss": 0.6483, "step": 3328 }, { "epoch": 0.7672274717676885, "grad_norm": 0.16238120198249817, "learning_rate": 1.3553612960248607e-05, "loss": 0.6396, "step": 3329 }, { "epoch": 0.7674579396174234, "grad_norm": 0.16790124773979187, "learning_rate": 1.3528068165714552e-05, "loss": 0.6512, "step": 3330 }, { "epoch": 0.7676884074671584, "grad_norm": 0.15930506587028503, "learning_rate": 1.3502543699759917e-05, "loss": 0.6483, "step": 3331 }, { "epoch": 0.7679188753168933, "grad_norm": 0.15827548503875732, "learning_rate": 1.34770395766114e-05, "loss": 0.6445, "step": 3332 }, { "epoch": 0.7681493431666283, "grad_norm": 0.16675326228141785, "learning_rate": 1.3451555810484389e-05, "loss": 0.6528, "step": 3333 }, { "epoch": 0.7683798110163632, "grad_norm": 0.16960427165031433, "learning_rate": 1.3426092415582936e-05, "loss": 0.6565, "step": 3334 }, { "epoch": 0.7686102788660982, "grad_norm": 0.1545518934726715, "learning_rate": 1.3400649406099719e-05, "loss": 0.6461, "step": 3335 }, { "epoch": 0.7688407467158331, "grad_norm": 0.1682869791984558, "learning_rate": 1.337522679621606e-05, "loss": 0.6409, "step": 3336 }, { "epoch": 0.7690712145655682, "grad_norm": 0.1694023311138153, "learning_rate": 1.3349824600101934e-05, "loss": 0.6443, "step": 3337 }, { "epoch": 0.7693016824153031, "grad_norm": 0.15705664455890656, "learning_rate": 1.3324442831915878e-05, "loss": 0.6416, "step": 3338 }, { "epoch": 0.7695321502650381, "grad_norm": 0.16872623562812805, "learning_rate": 1.3299081505805088e-05, "loss": 0.6481, "step": 3339 }, { "epoch": 0.769762618114773, "grad_norm": 0.15904702246189117, "learning_rate": 1.3273740635905397e-05, "loss": 0.6493, "step": 3340 }, { "epoch": 0.769993085964508, "grad_norm": 0.165695458650589, "learning_rate": 1.3248420236341147e-05, "loss": 0.6417, "step": 3341 }, { "epoch": 0.7702235538142429, "grad_norm": 0.16277405619621277, "learning_rate": 1.3223120321225351e-05, "loss": 0.644, "step": 3342 }, { "epoch": 0.7704540216639779, "grad_norm": 0.15501618385314941, "learning_rate": 1.319784090465958e-05, "loss": 0.6462, "step": 3343 }, { "epoch": 0.7706844895137128, "grad_norm": 0.16321119666099548, "learning_rate": 1.317258200073393e-05, "loss": 0.6355, "step": 3344 }, { "epoch": 0.7709149573634478, "grad_norm": 0.1589282751083374, "learning_rate": 1.314734362352716e-05, "loss": 0.6329, "step": 3345 }, { "epoch": 0.7711454252131827, "grad_norm": 0.15726275742053986, "learning_rate": 1.3122125787106521e-05, "loss": 0.632, "step": 3346 }, { "epoch": 0.7713758930629178, "grad_norm": 0.17059746384620667, "learning_rate": 1.3096928505527811e-05, "loss": 0.6396, "step": 3347 }, { "epoch": 0.7716063609126527, "grad_norm": 0.15518136322498322, "learning_rate": 1.3071751792835402e-05, "loss": 0.651, "step": 3348 }, { "epoch": 0.7718368287623877, "grad_norm": 0.16409626603126526, "learning_rate": 1.3046595663062188e-05, "loss": 0.6485, "step": 3349 }, { "epoch": 0.7720672966121226, "grad_norm": 0.15897217392921448, "learning_rate": 1.3021460130229596e-05, "loss": 0.6549, "step": 3350 }, { "epoch": 0.7722977644618576, "grad_norm": 0.15244118869304657, "learning_rate": 1.2996345208347565e-05, "loss": 0.6445, "step": 3351 }, { "epoch": 0.7725282323115925, "grad_norm": 0.15054090321063995, "learning_rate": 1.2971250911414567e-05, "loss": 0.6433, "step": 3352 }, { "epoch": 0.7727587001613275, "grad_norm": 0.16574056446552277, "learning_rate": 1.2946177253417525e-05, "loss": 0.6394, "step": 3353 }, { "epoch": 0.7729891680110624, "grad_norm": 0.16119171679019928, "learning_rate": 1.2921124248331901e-05, "loss": 0.6408, "step": 3354 }, { "epoch": 0.7732196358607974, "grad_norm": 0.15423963963985443, "learning_rate": 1.2896091910121666e-05, "loss": 0.6454, "step": 3355 }, { "epoch": 0.7734501037105324, "grad_norm": 0.15891356766223907, "learning_rate": 1.2871080252739247e-05, "loss": 0.6464, "step": 3356 }, { "epoch": 0.7736805715602674, "grad_norm": 0.15453371405601501, "learning_rate": 1.2846089290125507e-05, "loss": 0.6365, "step": 3357 }, { "epoch": 0.7739110394100023, "grad_norm": 0.15373654663562775, "learning_rate": 1.2821119036209827e-05, "loss": 0.6484, "step": 3358 }, { "epoch": 0.7741415072597373, "grad_norm": 0.15394984185695648, "learning_rate": 1.2796169504910028e-05, "loss": 0.6376, "step": 3359 }, { "epoch": 0.7743719751094722, "grad_norm": 0.1592058390378952, "learning_rate": 1.2771240710132375e-05, "loss": 0.642, "step": 3360 }, { "epoch": 0.7746024429592072, "grad_norm": 0.1550869345664978, "learning_rate": 1.2746332665771587e-05, "loss": 0.6512, "step": 3361 }, { "epoch": 0.7748329108089421, "grad_norm": 0.16978594660758972, "learning_rate": 1.2721445385710818e-05, "loss": 0.6474, "step": 3362 }, { "epoch": 0.7750633786586771, "grad_norm": 0.16240744292736053, "learning_rate": 1.2696578883821614e-05, "loss": 0.6442, "step": 3363 }, { "epoch": 0.775293846508412, "grad_norm": 0.14979274570941925, "learning_rate": 1.2671733173963968e-05, "loss": 0.6478, "step": 3364 }, { "epoch": 0.7755243143581471, "grad_norm": 0.16255247592926025, "learning_rate": 1.2646908269986318e-05, "loss": 0.6445, "step": 3365 }, { "epoch": 0.775754782207882, "grad_norm": 0.15680548548698425, "learning_rate": 1.2622104185725441e-05, "loss": 0.6426, "step": 3366 }, { "epoch": 0.775985250057617, "grad_norm": 0.15975458920001984, "learning_rate": 1.259732093500654e-05, "loss": 0.6461, "step": 3367 }, { "epoch": 0.7762157179073519, "grad_norm": 0.15226735174655914, "learning_rate": 1.2572558531643208e-05, "loss": 0.6403, "step": 3368 }, { "epoch": 0.7764461857570869, "grad_norm": 0.1590985804796219, "learning_rate": 1.2547816989437416e-05, "loss": 0.645, "step": 3369 }, { "epoch": 0.7766766536068218, "grad_norm": 0.1611194908618927, "learning_rate": 1.2523096322179501e-05, "loss": 0.6515, "step": 3370 }, { "epoch": 0.7769071214565568, "grad_norm": 0.16382546722888947, "learning_rate": 1.2498396543648195e-05, "loss": 0.6497, "step": 3371 }, { "epoch": 0.7771375893062917, "grad_norm": 0.15387794375419617, "learning_rate": 1.2473717667610519e-05, "loss": 0.6458, "step": 3372 }, { "epoch": 0.7773680571560267, "grad_norm": 0.15444159507751465, "learning_rate": 1.2449059707821904e-05, "loss": 0.6327, "step": 3373 }, { "epoch": 0.7775985250057617, "grad_norm": 0.16839702427387238, "learning_rate": 1.2424422678026116e-05, "loss": 0.6397, "step": 3374 }, { "epoch": 0.7778289928554967, "grad_norm": 0.16093666851520538, "learning_rate": 1.2399806591955227e-05, "loss": 0.6428, "step": 3375 }, { "epoch": 0.7780594607052316, "grad_norm": 0.1614423543214798, "learning_rate": 1.2375211463329666e-05, "loss": 0.6453, "step": 3376 }, { "epoch": 0.7782899285549666, "grad_norm": 0.1571039855480194, "learning_rate": 1.2350637305858176e-05, "loss": 0.6286, "step": 3377 }, { "epoch": 0.7785203964047015, "grad_norm": 0.1751880794763565, "learning_rate": 1.2326084133237774e-05, "loss": 0.6371, "step": 3378 }, { "epoch": 0.7787508642544365, "grad_norm": 0.16066774725914001, "learning_rate": 1.2301551959153813e-05, "loss": 0.6501, "step": 3379 }, { "epoch": 0.7789813321041714, "grad_norm": 0.16211430728435516, "learning_rate": 1.2277040797279976e-05, "loss": 0.6437, "step": 3380 }, { "epoch": 0.7792117999539064, "grad_norm": 0.16450825333595276, "learning_rate": 1.2252550661278156e-05, "loss": 0.6409, "step": 3381 }, { "epoch": 0.7794422678036413, "grad_norm": 0.16443844139575958, "learning_rate": 1.2228081564798583e-05, "loss": 0.6472, "step": 3382 }, { "epoch": 0.7796727356533764, "grad_norm": 0.17358103394508362, "learning_rate": 1.2203633521479735e-05, "loss": 0.6389, "step": 3383 }, { "epoch": 0.7799032035031113, "grad_norm": 0.16539937257766724, "learning_rate": 1.2179206544948374e-05, "loss": 0.6404, "step": 3384 }, { "epoch": 0.7801336713528463, "grad_norm": 0.15908871591091156, "learning_rate": 1.2154800648819508e-05, "loss": 0.6396, "step": 3385 }, { "epoch": 0.7803641392025812, "grad_norm": 0.17083421349525452, "learning_rate": 1.2130415846696414e-05, "loss": 0.6408, "step": 3386 }, { "epoch": 0.7805946070523162, "grad_norm": 0.17554426193237305, "learning_rate": 1.2106052152170561e-05, "loss": 0.6402, "step": 3387 }, { "epoch": 0.7808250749020512, "grad_norm": 0.16431263089179993, "learning_rate": 1.2081709578821709e-05, "loss": 0.6394, "step": 3388 }, { "epoch": 0.7810555427517861, "grad_norm": 0.16662830114364624, "learning_rate": 1.2057388140217818e-05, "loss": 0.6456, "step": 3389 }, { "epoch": 0.7812860106015211, "grad_norm": 0.16625608503818512, "learning_rate": 1.2033087849915076e-05, "loss": 0.6474, "step": 3390 }, { "epoch": 0.781516478451256, "grad_norm": 0.1618298441171646, "learning_rate": 1.2008808721457881e-05, "loss": 0.642, "step": 3391 }, { "epoch": 0.7817469463009911, "grad_norm": 0.1568860411643982, "learning_rate": 1.1984550768378856e-05, "loss": 0.6519, "step": 3392 }, { "epoch": 0.781977414150726, "grad_norm": 0.15564152598381042, "learning_rate": 1.1960314004198752e-05, "loss": 0.6391, "step": 3393 }, { "epoch": 0.782207882000461, "grad_norm": 0.16593000292778015, "learning_rate": 1.1936098442426608e-05, "loss": 0.6472, "step": 3394 }, { "epoch": 0.7824383498501959, "grad_norm": 0.1633983701467514, "learning_rate": 1.1911904096559589e-05, "loss": 0.6497, "step": 3395 }, { "epoch": 0.7826688176999309, "grad_norm": 0.15584073960781097, "learning_rate": 1.1887730980083023e-05, "loss": 0.6437, "step": 3396 }, { "epoch": 0.7828992855496658, "grad_norm": 0.15618832409381866, "learning_rate": 1.1863579106470434e-05, "loss": 0.6372, "step": 3397 }, { "epoch": 0.7831297533994008, "grad_norm": 0.16305984556674957, "learning_rate": 1.1839448489183503e-05, "loss": 0.6409, "step": 3398 }, { "epoch": 0.7833602212491357, "grad_norm": 0.1538400650024414, "learning_rate": 1.181533914167205e-05, "loss": 0.6457, "step": 3399 }, { "epoch": 0.7835906890988708, "grad_norm": 0.15274596214294434, "learning_rate": 1.1791251077374043e-05, "loss": 0.6412, "step": 3400 }, { "epoch": 0.7838211569486057, "grad_norm": 0.1625516712665558, "learning_rate": 1.1767184309715618e-05, "loss": 0.6418, "step": 3401 }, { "epoch": 0.7840516247983407, "grad_norm": 0.14723870158195496, "learning_rate": 1.1743138852110969e-05, "loss": 0.6439, "step": 3402 }, { "epoch": 0.7842820926480756, "grad_norm": 0.15598557889461517, "learning_rate": 1.1719114717962476e-05, "loss": 0.638, "step": 3403 }, { "epoch": 0.7845125604978106, "grad_norm": 0.16605372726917267, "learning_rate": 1.169511192066064e-05, "loss": 0.6421, "step": 3404 }, { "epoch": 0.7847430283475455, "grad_norm": 0.1595214158296585, "learning_rate": 1.1671130473584013e-05, "loss": 0.6449, "step": 3405 }, { "epoch": 0.7849734961972805, "grad_norm": 0.15899643301963806, "learning_rate": 1.1647170390099283e-05, "loss": 0.6373, "step": 3406 }, { "epoch": 0.7852039640470154, "grad_norm": 0.16188432276248932, "learning_rate": 1.1623231683561247e-05, "loss": 0.6418, "step": 3407 }, { "epoch": 0.7854344318967504, "grad_norm": 0.16148293018341064, "learning_rate": 1.1599314367312725e-05, "loss": 0.6438, "step": 3408 }, { "epoch": 0.7856648997464853, "grad_norm": 0.14987516403198242, "learning_rate": 1.157541845468469e-05, "loss": 0.6383, "step": 3409 }, { "epoch": 0.7858953675962204, "grad_norm": 0.160349503159523, "learning_rate": 1.1551543958996148e-05, "loss": 0.6452, "step": 3410 }, { "epoch": 0.7861258354459553, "grad_norm": 0.1663265824317932, "learning_rate": 1.1527690893554156e-05, "loss": 0.6423, "step": 3411 }, { "epoch": 0.7863563032956903, "grad_norm": 0.15916943550109863, "learning_rate": 1.1503859271653839e-05, "loss": 0.6423, "step": 3412 }, { "epoch": 0.7865867711454252, "grad_norm": 0.16647377610206604, "learning_rate": 1.1480049106578377e-05, "loss": 0.652, "step": 3413 }, { "epoch": 0.7868172389951602, "grad_norm": 0.15861909091472626, "learning_rate": 1.1456260411598984e-05, "loss": 0.6446, "step": 3414 }, { "epoch": 0.7870477068448951, "grad_norm": 0.15791212022304535, "learning_rate": 1.143249319997491e-05, "loss": 0.6436, "step": 3415 }, { "epoch": 0.7872781746946301, "grad_norm": 0.17296838760375977, "learning_rate": 1.1408747484953442e-05, "loss": 0.6361, "step": 3416 }, { "epoch": 0.787508642544365, "grad_norm": 0.16724546253681183, "learning_rate": 1.138502327976984e-05, "loss": 0.6409, "step": 3417 }, { "epoch": 0.7877391103941, "grad_norm": 0.1602526307106018, "learning_rate": 1.1361320597647407e-05, "loss": 0.636, "step": 3418 }, { "epoch": 0.787969578243835, "grad_norm": 0.15437106788158417, "learning_rate": 1.1337639451797494e-05, "loss": 0.6457, "step": 3419 }, { "epoch": 0.78820004609357, "grad_norm": 0.15734903514385223, "learning_rate": 1.1313979855419359e-05, "loss": 0.6389, "step": 3420 }, { "epoch": 0.7884305139433049, "grad_norm": 0.16602694988250732, "learning_rate": 1.1290341821700313e-05, "loss": 0.6401, "step": 3421 }, { "epoch": 0.7886609817930399, "grad_norm": 0.1661614030599594, "learning_rate": 1.1266725363815623e-05, "loss": 0.655, "step": 3422 }, { "epoch": 0.7888914496427748, "grad_norm": 0.1512596160173416, "learning_rate": 1.1243130494928533e-05, "loss": 0.6383, "step": 3423 }, { "epoch": 0.7891219174925098, "grad_norm": 0.14931842684745789, "learning_rate": 1.1219557228190258e-05, "loss": 0.6418, "step": 3424 }, { "epoch": 0.7893523853422447, "grad_norm": 0.1699947863817215, "learning_rate": 1.1196005576739993e-05, "loss": 0.6467, "step": 3425 }, { "epoch": 0.7895828531919797, "grad_norm": 0.1627425104379654, "learning_rate": 1.1172475553704826e-05, "loss": 0.6307, "step": 3426 }, { "epoch": 0.7898133210417146, "grad_norm": 0.17060711979866028, "learning_rate": 1.1148967172199848e-05, "loss": 0.652, "step": 3427 }, { "epoch": 0.7900437888914497, "grad_norm": 0.16025568544864655, "learning_rate": 1.1125480445328057e-05, "loss": 0.6434, "step": 3428 }, { "epoch": 0.7902742567411846, "grad_norm": 0.1679077297449112, "learning_rate": 1.1102015386180404e-05, "loss": 0.6428, "step": 3429 }, { "epoch": 0.7905047245909196, "grad_norm": 0.1602405458688736, "learning_rate": 1.1078572007835735e-05, "loss": 0.6387, "step": 3430 }, { "epoch": 0.7907351924406545, "grad_norm": 0.14988547563552856, "learning_rate": 1.1055150323360852e-05, "loss": 0.6398, "step": 3431 }, { "epoch": 0.7909656602903895, "grad_norm": 0.16209550201892853, "learning_rate": 1.10317503458104e-05, "loss": 0.6433, "step": 3432 }, { "epoch": 0.7911961281401244, "grad_norm": 0.16234451532363892, "learning_rate": 1.1008372088226992e-05, "loss": 0.6415, "step": 3433 }, { "epoch": 0.7914265959898594, "grad_norm": 0.14663785696029663, "learning_rate": 1.098501556364112e-05, "loss": 0.6472, "step": 3434 }, { "epoch": 0.7916570638395943, "grad_norm": 0.15545038878917694, "learning_rate": 1.0961680785071116e-05, "loss": 0.6441, "step": 3435 }, { "epoch": 0.7918875316893293, "grad_norm": 0.17046032845973969, "learning_rate": 1.0938367765523244e-05, "loss": 0.6392, "step": 3436 }, { "epoch": 0.7921179995390643, "grad_norm": 0.15083369612693787, "learning_rate": 1.0915076517991617e-05, "loss": 0.6356, "step": 3437 }, { "epoch": 0.7923484673887993, "grad_norm": 0.169847771525383, "learning_rate": 1.0891807055458226e-05, "loss": 0.6369, "step": 3438 }, { "epoch": 0.7925789352385342, "grad_norm": 0.16202600300312042, "learning_rate": 1.0868559390892902e-05, "loss": 0.6494, "step": 3439 }, { "epoch": 0.7928094030882692, "grad_norm": 0.1544782817363739, "learning_rate": 1.0845333537253349e-05, "loss": 0.6499, "step": 3440 }, { "epoch": 0.7930398709380041, "grad_norm": 0.15906357765197754, "learning_rate": 1.082212950748508e-05, "loss": 0.6448, "step": 3441 }, { "epoch": 0.7932703387877391, "grad_norm": 0.1550484299659729, "learning_rate": 1.0798947314521468e-05, "loss": 0.6422, "step": 3442 }, { "epoch": 0.793500806637474, "grad_norm": 0.15442262589931488, "learning_rate": 1.0775786971283725e-05, "loss": 0.64, "step": 3443 }, { "epoch": 0.793731274487209, "grad_norm": 0.1611274778842926, "learning_rate": 1.0752648490680856e-05, "loss": 0.642, "step": 3444 }, { "epoch": 0.7939617423369439, "grad_norm": 0.15969206392765045, "learning_rate": 1.07295318856097e-05, "loss": 0.641, "step": 3445 }, { "epoch": 0.794192210186679, "grad_norm": 0.15087677538394928, "learning_rate": 1.0706437168954913e-05, "loss": 0.6345, "step": 3446 }, { "epoch": 0.7944226780364139, "grad_norm": 0.1505325436592102, "learning_rate": 1.0683364353588898e-05, "loss": 0.639, "step": 3447 }, { "epoch": 0.7946531458861489, "grad_norm": 0.16190530359745026, "learning_rate": 1.0660313452371922e-05, "loss": 0.6442, "step": 3448 }, { "epoch": 0.7948836137358839, "grad_norm": 0.15547344088554382, "learning_rate": 1.063728447815201e-05, "loss": 0.6475, "step": 3449 }, { "epoch": 0.7951140815856188, "grad_norm": 0.1529662311077118, "learning_rate": 1.0614277443764925e-05, "loss": 0.642, "step": 3450 }, { "epoch": 0.7953445494353538, "grad_norm": 0.15717142820358276, "learning_rate": 1.0591292362034255e-05, "loss": 0.6335, "step": 3451 }, { "epoch": 0.7955750172850887, "grad_norm": 0.15520958602428436, "learning_rate": 1.0568329245771336e-05, "loss": 0.6368, "step": 3452 }, { "epoch": 0.7958054851348237, "grad_norm": 0.16604743897914886, "learning_rate": 1.054538810777525e-05, "loss": 0.6456, "step": 3453 }, { "epoch": 0.7960359529845586, "grad_norm": 0.15997354686260223, "learning_rate": 1.0522468960832842e-05, "loss": 0.6438, "step": 3454 }, { "epoch": 0.7962664208342937, "grad_norm": 0.1553625613451004, "learning_rate": 1.0499571817718707e-05, "loss": 0.6391, "step": 3455 }, { "epoch": 0.7964968886840286, "grad_norm": 0.2511269748210907, "learning_rate": 1.0476696691195138e-05, "loss": 0.6387, "step": 3456 }, { "epoch": 0.7967273565337636, "grad_norm": 0.14997459948062897, "learning_rate": 1.0453843594012175e-05, "loss": 0.6303, "step": 3457 }, { "epoch": 0.7969578243834985, "grad_norm": 0.14934763312339783, "learning_rate": 1.0431012538907631e-05, "loss": 0.6377, "step": 3458 }, { "epoch": 0.7971882922332335, "grad_norm": 0.15451250970363617, "learning_rate": 1.0408203538606948e-05, "loss": 0.6484, "step": 3459 }, { "epoch": 0.7974187600829684, "grad_norm": 0.15496329963207245, "learning_rate": 1.0385416605823323e-05, "loss": 0.6356, "step": 3460 }, { "epoch": 0.7976492279327034, "grad_norm": 0.15835978090763092, "learning_rate": 1.0362651753257668e-05, "loss": 0.6446, "step": 3461 }, { "epoch": 0.7978796957824383, "grad_norm": 0.16143862903118134, "learning_rate": 1.0339908993598518e-05, "loss": 0.6367, "step": 3462 }, { "epoch": 0.7981101636321734, "grad_norm": 0.1462894082069397, "learning_rate": 1.0317188339522188e-05, "loss": 0.647, "step": 3463 }, { "epoch": 0.7983406314819083, "grad_norm": 0.14987273514270782, "learning_rate": 1.029448980369262e-05, "loss": 0.6468, "step": 3464 }, { "epoch": 0.7985710993316433, "grad_norm": 0.1489580124616623, "learning_rate": 1.0271813398761405e-05, "loss": 0.6474, "step": 3465 }, { "epoch": 0.7988015671813782, "grad_norm": 0.1617879867553711, "learning_rate": 1.0249159137367842e-05, "loss": 0.6429, "step": 3466 }, { "epoch": 0.7990320350311132, "grad_norm": 0.1757936030626297, "learning_rate": 1.0226527032138878e-05, "loss": 0.6353, "step": 3467 }, { "epoch": 0.7992625028808481, "grad_norm": 0.15790215134620667, "learning_rate": 1.0203917095689097e-05, "loss": 0.6442, "step": 3468 }, { "epoch": 0.7994929707305831, "grad_norm": 0.155640110373497, "learning_rate": 1.0181329340620743e-05, "loss": 0.643, "step": 3469 }, { "epoch": 0.799723438580318, "grad_norm": 0.16087128221988678, "learning_rate": 1.0158763779523695e-05, "loss": 0.6401, "step": 3470 }, { "epoch": 0.799953906430053, "grad_norm": 0.16374313831329346, "learning_rate": 1.0136220424975435e-05, "loss": 0.6386, "step": 3471 }, { "epoch": 0.800184374279788, "grad_norm": 0.16088031232357025, "learning_rate": 1.011369928954108e-05, "loss": 0.6392, "step": 3472 }, { "epoch": 0.800414842129523, "grad_norm": 0.1503087878227234, "learning_rate": 1.0091200385773408e-05, "loss": 0.6402, "step": 3473 }, { "epoch": 0.8006453099792579, "grad_norm": 0.16075649857521057, "learning_rate": 1.0068723726212742e-05, "loss": 0.6463, "step": 3474 }, { "epoch": 0.8008757778289929, "grad_norm": 0.16603581607341766, "learning_rate": 1.0046269323387036e-05, "loss": 0.643, "step": 3475 }, { "epoch": 0.8011062456787278, "grad_norm": 0.15744850039482117, "learning_rate": 1.0023837189811835e-05, "loss": 0.6437, "step": 3476 }, { "epoch": 0.8013367135284628, "grad_norm": 0.16175509989261627, "learning_rate": 1.0001427337990277e-05, "loss": 0.6434, "step": 3477 }, { "epoch": 0.8015671813781977, "grad_norm": 0.17071296274662018, "learning_rate": 9.979039780413068e-06, "loss": 0.6449, "step": 3478 }, { "epoch": 0.8017976492279327, "grad_norm": 0.15305480360984802, "learning_rate": 9.956674529558518e-06, "loss": 0.6427, "step": 3479 }, { "epoch": 0.8020281170776676, "grad_norm": 0.16541430354118347, "learning_rate": 9.934331597892448e-06, "loss": 0.6406, "step": 3480 }, { "epoch": 0.8022585849274027, "grad_norm": 0.15421070158481598, "learning_rate": 9.912010997868287e-06, "loss": 0.6473, "step": 3481 }, { "epoch": 0.8024890527771376, "grad_norm": 0.15333889424800873, "learning_rate": 9.889712741926998e-06, "loss": 0.6405, "step": 3482 }, { "epoch": 0.8027195206268726, "grad_norm": 0.15218707919120789, "learning_rate": 9.867436842497103e-06, "loss": 0.632, "step": 3483 }, { "epoch": 0.8029499884766075, "grad_norm": 0.15634161233901978, "learning_rate": 9.845183311994637e-06, "loss": 0.6378, "step": 3484 }, { "epoch": 0.8031804563263425, "grad_norm": 0.157097727060318, "learning_rate": 9.822952162823201e-06, "loss": 0.6448, "step": 3485 }, { "epoch": 0.8034109241760774, "grad_norm": 0.15480422973632812, "learning_rate": 9.800743407373896e-06, "loss": 0.6354, "step": 3486 }, { "epoch": 0.8036413920258124, "grad_norm": 0.15486778318881989, "learning_rate": 9.778557058025356e-06, "loss": 0.6309, "step": 3487 }, { "epoch": 0.8038718598755473, "grad_norm": 0.15796728432178497, "learning_rate": 9.756393127143709e-06, "loss": 0.6334, "step": 3488 }, { "epoch": 0.8041023277252823, "grad_norm": 0.15152916312217712, "learning_rate": 9.734251627082613e-06, "loss": 0.6429, "step": 3489 }, { "epoch": 0.8043327955750172, "grad_norm": 0.1551668345928192, "learning_rate": 9.71213257018319e-06, "loss": 0.6454, "step": 3490 }, { "epoch": 0.8045632634247523, "grad_norm": 0.16736368834972382, "learning_rate": 9.69003596877408e-06, "loss": 0.6362, "step": 3491 }, { "epoch": 0.8047937312744872, "grad_norm": 0.14687344431877136, "learning_rate": 9.667961835171402e-06, "loss": 0.6366, "step": 3492 }, { "epoch": 0.8050241991242222, "grad_norm": 0.1573791354894638, "learning_rate": 9.645910181678741e-06, "loss": 0.6338, "step": 3493 }, { "epoch": 0.8052546669739571, "grad_norm": 0.1550455242395401, "learning_rate": 9.62388102058716e-06, "loss": 0.6381, "step": 3494 }, { "epoch": 0.8054851348236921, "grad_norm": 0.15506303310394287, "learning_rate": 9.601874364175206e-06, "loss": 0.6391, "step": 3495 }, { "epoch": 0.805715602673427, "grad_norm": 0.15035480260849, "learning_rate": 9.579890224708826e-06, "loss": 0.6455, "step": 3496 }, { "epoch": 0.805946070523162, "grad_norm": 0.1552959680557251, "learning_rate": 9.557928614441458e-06, "loss": 0.641, "step": 3497 }, { "epoch": 0.8061765383728969, "grad_norm": 0.17842282354831696, "learning_rate": 9.535989545614016e-06, "loss": 0.6346, "step": 3498 }, { "epoch": 0.806407006222632, "grad_norm": 0.15210920572280884, "learning_rate": 9.514073030454762e-06, "loss": 0.6414, "step": 3499 }, { "epoch": 0.8066374740723669, "grad_norm": 0.16027803719043732, "learning_rate": 9.49217908117946e-06, "loss": 0.6539, "step": 3500 }, { "epoch": 0.8068679419221019, "grad_norm": 0.1576579064130783, "learning_rate": 9.470307709991267e-06, "loss": 0.6397, "step": 3501 }, { "epoch": 0.8070984097718368, "grad_norm": 0.15845666825771332, "learning_rate": 9.448458929080756e-06, "loss": 0.6395, "step": 3502 }, { "epoch": 0.8073288776215718, "grad_norm": 0.15758542716503143, "learning_rate": 9.426632750625918e-06, "loss": 0.6437, "step": 3503 }, { "epoch": 0.8075593454713067, "grad_norm": 0.15715070068836212, "learning_rate": 9.404829186792152e-06, "loss": 0.6347, "step": 3504 }, { "epoch": 0.8077898133210417, "grad_norm": 0.15392762422561646, "learning_rate": 9.383048249732217e-06, "loss": 0.6407, "step": 3505 }, { "epoch": 0.8080202811707766, "grad_norm": 0.15316665172576904, "learning_rate": 9.3612899515863e-06, "loss": 0.6394, "step": 3506 }, { "epoch": 0.8082507490205116, "grad_norm": 0.15614305436611176, "learning_rate": 9.339554304481951e-06, "loss": 0.6348, "step": 3507 }, { "epoch": 0.8084812168702467, "grad_norm": 0.16622847318649292, "learning_rate": 9.317841320534092e-06, "loss": 0.6378, "step": 3508 }, { "epoch": 0.8087116847199816, "grad_norm": 0.1508837789297104, "learning_rate": 9.296151011845034e-06, "loss": 0.6428, "step": 3509 }, { "epoch": 0.8089421525697166, "grad_norm": 0.15079739689826965, "learning_rate": 9.27448339050443e-06, "loss": 0.6462, "step": 3510 }, { "epoch": 0.8091726204194515, "grad_norm": 0.15400461852550507, "learning_rate": 9.252838468589265e-06, "loss": 0.6382, "step": 3511 }, { "epoch": 0.8094030882691865, "grad_norm": 0.16339372098445892, "learning_rate": 9.231216258163939e-06, "loss": 0.6392, "step": 3512 }, { "epoch": 0.8096335561189214, "grad_norm": 0.15596559643745422, "learning_rate": 9.209616771280139e-06, "loss": 0.6341, "step": 3513 }, { "epoch": 0.8098640239686564, "grad_norm": 0.1570686399936676, "learning_rate": 9.18804001997689e-06, "loss": 0.6424, "step": 3514 }, { "epoch": 0.8100944918183913, "grad_norm": 0.1516939401626587, "learning_rate": 9.166486016280562e-06, "loss": 0.6523, "step": 3515 }, { "epoch": 0.8103249596681263, "grad_norm": 0.15133339166641235, "learning_rate": 9.14495477220484e-06, "loss": 0.6376, "step": 3516 }, { "epoch": 0.8105554275178612, "grad_norm": 0.1549098789691925, "learning_rate": 9.12344629975072e-06, "loss": 0.6424, "step": 3517 }, { "epoch": 0.8107858953675963, "grad_norm": 0.1573808342218399, "learning_rate": 9.101960610906519e-06, "loss": 0.6441, "step": 3518 }, { "epoch": 0.8110163632173312, "grad_norm": 0.16286353766918182, "learning_rate": 9.08049771764784e-06, "loss": 0.639, "step": 3519 }, { "epoch": 0.8112468310670662, "grad_norm": 0.15851277112960815, "learning_rate": 9.059057631937567e-06, "loss": 0.642, "step": 3520 }, { "epoch": 0.8114772989168011, "grad_norm": 0.15502890944480896, "learning_rate": 9.037640365725897e-06, "loss": 0.6326, "step": 3521 }, { "epoch": 0.8117077667665361, "grad_norm": 0.15994207561016083, "learning_rate": 9.01624593095033e-06, "loss": 0.6448, "step": 3522 }, { "epoch": 0.811938234616271, "grad_norm": 0.1541290283203125, "learning_rate": 8.994874339535569e-06, "loss": 0.6481, "step": 3523 }, { "epoch": 0.812168702466006, "grad_norm": 0.14977748692035675, "learning_rate": 8.973525603393645e-06, "loss": 0.6415, "step": 3524 }, { "epoch": 0.8123991703157409, "grad_norm": 0.15313945710659027, "learning_rate": 8.952199734423843e-06, "loss": 0.6383, "step": 3525 }, { "epoch": 0.812629638165476, "grad_norm": 0.15545286238193512, "learning_rate": 8.930896744512652e-06, "loss": 0.6438, "step": 3526 }, { "epoch": 0.8128601060152109, "grad_norm": 0.15130577981472015, "learning_rate": 8.909616645533886e-06, "loss": 0.6466, "step": 3527 }, { "epoch": 0.8130905738649459, "grad_norm": 0.15956194698810577, "learning_rate": 8.888359449348555e-06, "loss": 0.6353, "step": 3528 }, { "epoch": 0.8133210417146808, "grad_norm": 0.15058216452598572, "learning_rate": 8.867125167804896e-06, "loss": 0.6374, "step": 3529 }, { "epoch": 0.8135515095644158, "grad_norm": 0.14475852251052856, "learning_rate": 8.845913812738394e-06, "loss": 0.6442, "step": 3530 }, { "epoch": 0.8137819774141507, "grad_norm": 0.14834396541118622, "learning_rate": 8.824725395971745e-06, "loss": 0.6469, "step": 3531 }, { "epoch": 0.8140124452638857, "grad_norm": 0.15428373217582703, "learning_rate": 8.803559929314869e-06, "loss": 0.6378, "step": 3532 }, { "epoch": 0.8142429131136206, "grad_norm": 0.15115933120250702, "learning_rate": 8.782417424564893e-06, "loss": 0.6415, "step": 3533 }, { "epoch": 0.8144733809633556, "grad_norm": 0.14465634524822235, "learning_rate": 8.761297893506149e-06, "loss": 0.6432, "step": 3534 }, { "epoch": 0.8147038488130905, "grad_norm": 0.15278743207454681, "learning_rate": 8.740201347910132e-06, "loss": 0.6305, "step": 3535 }, { "epoch": 0.8149343166628256, "grad_norm": 0.15596990287303925, "learning_rate": 8.719127799535547e-06, "loss": 0.6451, "step": 3536 }, { "epoch": 0.8151647845125605, "grad_norm": 0.15721940994262695, "learning_rate": 8.698077260128329e-06, "loss": 0.636, "step": 3537 }, { "epoch": 0.8153952523622955, "grad_norm": 0.15862002968788147, "learning_rate": 8.677049741421506e-06, "loss": 0.643, "step": 3538 }, { "epoch": 0.8156257202120304, "grad_norm": 0.15502506494522095, "learning_rate": 8.656045255135314e-06, "loss": 0.641, "step": 3539 }, { "epoch": 0.8158561880617654, "grad_norm": 0.1488857865333557, "learning_rate": 8.635063812977156e-06, "loss": 0.6389, "step": 3540 }, { "epoch": 0.8160866559115003, "grad_norm": 0.1511356681585312, "learning_rate": 8.61410542664159e-06, "loss": 0.6383, "step": 3541 }, { "epoch": 0.8163171237612353, "grad_norm": 0.15279202163219452, "learning_rate": 8.593170107810312e-06, "loss": 0.633, "step": 3542 }, { "epoch": 0.8165475916109702, "grad_norm": 0.1624889373779297, "learning_rate": 8.572257868152172e-06, "loss": 0.6456, "step": 3543 }, { "epoch": 0.8167780594607053, "grad_norm": 0.15917298197746277, "learning_rate": 8.551368719323139e-06, "loss": 0.6379, "step": 3544 }, { "epoch": 0.8170085273104402, "grad_norm": 0.16097372770309448, "learning_rate": 8.530502672966328e-06, "loss": 0.6475, "step": 3545 }, { "epoch": 0.8172389951601752, "grad_norm": 0.14943112432956696, "learning_rate": 8.509659740711973e-06, "loss": 0.646, "step": 3546 }, { "epoch": 0.8174694630099101, "grad_norm": 0.15080755949020386, "learning_rate": 8.488839934177422e-06, "loss": 0.6347, "step": 3547 }, { "epoch": 0.8176999308596451, "grad_norm": 0.16167816519737244, "learning_rate": 8.46804326496714e-06, "loss": 0.6356, "step": 3548 }, { "epoch": 0.81793039870938, "grad_norm": 0.15981729328632355, "learning_rate": 8.447269744672703e-06, "loss": 0.6338, "step": 3549 }, { "epoch": 0.818160866559115, "grad_norm": 0.1530756950378418, "learning_rate": 8.426519384872733e-06, "loss": 0.6392, "step": 3550 }, { "epoch": 0.8183913344088499, "grad_norm": 0.1670352965593338, "learning_rate": 8.405792197133022e-06, "loss": 0.6457, "step": 3551 }, { "epoch": 0.8186218022585849, "grad_norm": 0.1554369479417801, "learning_rate": 8.385088193006407e-06, "loss": 0.6376, "step": 3552 }, { "epoch": 0.8188522701083198, "grad_norm": 0.15348638594150543, "learning_rate": 8.364407384032775e-06, "loss": 0.6355, "step": 3553 }, { "epoch": 0.8190827379580549, "grad_norm": 0.15805724263191223, "learning_rate": 8.343749781739125e-06, "loss": 0.6443, "step": 3554 }, { "epoch": 0.8193132058077898, "grad_norm": 0.15739752352237701, "learning_rate": 8.323115397639513e-06, "loss": 0.6445, "step": 3555 }, { "epoch": 0.8195436736575248, "grad_norm": 0.16260041296482086, "learning_rate": 8.302504243235043e-06, "loss": 0.6431, "step": 3556 }, { "epoch": 0.8197741415072597, "grad_norm": 0.14841555058956146, "learning_rate": 8.281916330013889e-06, "loss": 0.6307, "step": 3557 }, { "epoch": 0.8200046093569947, "grad_norm": 0.15895935893058777, "learning_rate": 8.261351669451256e-06, "loss": 0.6315, "step": 3558 }, { "epoch": 0.8202350772067296, "grad_norm": 0.14655686914920807, "learning_rate": 8.240810273009381e-06, "loss": 0.6334, "step": 3559 }, { "epoch": 0.8204655450564646, "grad_norm": 0.1525675356388092, "learning_rate": 8.220292152137554e-06, "loss": 0.6474, "step": 3560 }, { "epoch": 0.8206960129061995, "grad_norm": 0.15452326834201813, "learning_rate": 8.199797318272085e-06, "loss": 0.6402, "step": 3561 }, { "epoch": 0.8209264807559346, "grad_norm": 0.15377068519592285, "learning_rate": 8.179325782836295e-06, "loss": 0.6416, "step": 3562 }, { "epoch": 0.8211569486056695, "grad_norm": 0.15333302319049835, "learning_rate": 8.158877557240529e-06, "loss": 0.6398, "step": 3563 }, { "epoch": 0.8213874164554045, "grad_norm": 0.14731299877166748, "learning_rate": 8.138452652882156e-06, "loss": 0.6459, "step": 3564 }, { "epoch": 0.8216178843051394, "grad_norm": 0.15332041680812836, "learning_rate": 8.118051081145484e-06, "loss": 0.6408, "step": 3565 }, { "epoch": 0.8218483521548744, "grad_norm": 0.15887399017810822, "learning_rate": 8.097672853401894e-06, "loss": 0.643, "step": 3566 }, { "epoch": 0.8220788200046094, "grad_norm": 0.14689992368221283, "learning_rate": 8.07731798100973e-06, "loss": 0.6382, "step": 3567 }, { "epoch": 0.8223092878543443, "grad_norm": 0.15063437819480896, "learning_rate": 8.056986475314283e-06, "loss": 0.6366, "step": 3568 }, { "epoch": 0.8225397557040793, "grad_norm": 0.15097613632678986, "learning_rate": 8.036678347647853e-06, "loss": 0.6489, "step": 3569 }, { "epoch": 0.8227702235538142, "grad_norm": 0.14924323558807373, "learning_rate": 8.016393609329703e-06, "loss": 0.6397, "step": 3570 }, { "epoch": 0.8230006914035493, "grad_norm": 0.14785458147525787, "learning_rate": 7.996132271666062e-06, "loss": 0.6405, "step": 3571 }, { "epoch": 0.8232311592532842, "grad_norm": 0.1523989886045456, "learning_rate": 7.975894345950114e-06, "loss": 0.6478, "step": 3572 }, { "epoch": 0.8234616271030192, "grad_norm": 0.152211531996727, "learning_rate": 7.955679843462005e-06, "loss": 0.6379, "step": 3573 }, { "epoch": 0.8236920949527541, "grad_norm": 0.14990997314453125, "learning_rate": 7.935488775468791e-06, "loss": 0.6435, "step": 3574 }, { "epoch": 0.8239225628024891, "grad_norm": 0.1513289213180542, "learning_rate": 7.915321153224487e-06, "loss": 0.6429, "step": 3575 }, { "epoch": 0.824153030652224, "grad_norm": 0.150692880153656, "learning_rate": 7.895176987970071e-06, "loss": 0.6436, "step": 3576 }, { "epoch": 0.824383498501959, "grad_norm": 0.14454329013824463, "learning_rate": 7.875056290933391e-06, "loss": 0.6294, "step": 3577 }, { "epoch": 0.8246139663516939, "grad_norm": 0.14884406328201294, "learning_rate": 7.85495907332925e-06, "loss": 0.639, "step": 3578 }, { "epoch": 0.8248444342014289, "grad_norm": 0.15509894490242004, "learning_rate": 7.83488534635936e-06, "loss": 0.6402, "step": 3579 }, { "epoch": 0.8250749020511638, "grad_norm": 0.15603148937225342, "learning_rate": 7.814835121212305e-06, "loss": 0.6336, "step": 3580 }, { "epoch": 0.8253053699008989, "grad_norm": 0.1463710367679596, "learning_rate": 7.794808409063636e-06, "loss": 0.6454, "step": 3581 }, { "epoch": 0.8255358377506338, "grad_norm": 0.15044938027858734, "learning_rate": 7.774805221075764e-06, "loss": 0.6402, "step": 3582 }, { "epoch": 0.8257663056003688, "grad_norm": 0.1514042317867279, "learning_rate": 7.754825568397955e-06, "loss": 0.6413, "step": 3583 }, { "epoch": 0.8259967734501037, "grad_norm": 0.15234024822711945, "learning_rate": 7.7348694621664e-06, "loss": 0.6444, "step": 3584 }, { "epoch": 0.8262272412998387, "grad_norm": 0.14573800563812256, "learning_rate": 7.71493691350416e-06, "loss": 0.6449, "step": 3585 }, { "epoch": 0.8264577091495736, "grad_norm": 0.14887578785419464, "learning_rate": 7.695027933521154e-06, "loss": 0.6321, "step": 3586 }, { "epoch": 0.8266881769993086, "grad_norm": 0.15681131184101105, "learning_rate": 7.675142533314172e-06, "loss": 0.6428, "step": 3587 }, { "epoch": 0.8269186448490435, "grad_norm": 0.15508340299129486, "learning_rate": 7.655280723966868e-06, "loss": 0.6383, "step": 3588 }, { "epoch": 0.8271491126987786, "grad_norm": 0.15364862978458405, "learning_rate": 7.635442516549719e-06, "loss": 0.636, "step": 3589 }, { "epoch": 0.8273795805485135, "grad_norm": 0.1485067903995514, "learning_rate": 7.615627922120056e-06, "loss": 0.6457, "step": 3590 }, { "epoch": 0.8276100483982485, "grad_norm": 0.14897732436656952, "learning_rate": 7.5958369517221075e-06, "loss": 0.6437, "step": 3591 }, { "epoch": 0.8278405162479834, "grad_norm": 0.1528944969177246, "learning_rate": 7.576069616386838e-06, "loss": 0.6447, "step": 3592 }, { "epoch": 0.8280709840977184, "grad_norm": 0.15931028127670288, "learning_rate": 7.556325927132102e-06, "loss": 0.6421, "step": 3593 }, { "epoch": 0.8283014519474533, "grad_norm": 0.15149252116680145, "learning_rate": 7.536605894962562e-06, "loss": 0.6398, "step": 3594 }, { "epoch": 0.8285319197971883, "grad_norm": 0.14745914936065674, "learning_rate": 7.5169095308696865e-06, "loss": 0.65, "step": 3595 }, { "epoch": 0.8287623876469232, "grad_norm": 0.1486995816230774, "learning_rate": 7.497236845831751e-06, "loss": 0.6366, "step": 3596 }, { "epoch": 0.8289928554966582, "grad_norm": 0.14624127745628357, "learning_rate": 7.4775878508138606e-06, "loss": 0.6405, "step": 3597 }, { "epoch": 0.8292233233463931, "grad_norm": 0.1482323855161667, "learning_rate": 7.457962556767866e-06, "loss": 0.6405, "step": 3598 }, { "epoch": 0.8294537911961282, "grad_norm": 0.14901240170001984, "learning_rate": 7.438360974632441e-06, "loss": 0.6402, "step": 3599 }, { "epoch": 0.8296842590458631, "grad_norm": 0.15405641496181488, "learning_rate": 7.418783115333045e-06, "loss": 0.6439, "step": 3600 }, { "epoch": 0.8299147268955981, "grad_norm": 0.14017413556575775, "learning_rate": 7.399228989781898e-06, "loss": 0.6411, "step": 3601 }, { "epoch": 0.830145194745333, "grad_norm": 0.14487187564373016, "learning_rate": 7.3796986088780105e-06, "loss": 0.6315, "step": 3602 }, { "epoch": 0.830375662595068, "grad_norm": 0.15042652189731598, "learning_rate": 7.360191983507153e-06, "loss": 0.6384, "step": 3603 }, { "epoch": 0.8306061304448029, "grad_norm": 0.15280528366565704, "learning_rate": 7.340709124541817e-06, "loss": 0.6446, "step": 3604 }, { "epoch": 0.8308365982945379, "grad_norm": 0.1426979899406433, "learning_rate": 7.321250042841316e-06, "loss": 0.6448, "step": 3605 }, { "epoch": 0.8310670661442728, "grad_norm": 0.14999282360076904, "learning_rate": 7.301814749251679e-06, "loss": 0.6387, "step": 3606 }, { "epoch": 0.8312975339940079, "grad_norm": 0.151020810008049, "learning_rate": 7.2824032546056354e-06, "loss": 0.6489, "step": 3607 }, { "epoch": 0.8315280018437428, "grad_norm": 0.15103960037231445, "learning_rate": 7.2630155697227146e-06, "loss": 0.6448, "step": 3608 }, { "epoch": 0.8317584696934778, "grad_norm": 0.14460238814353943, "learning_rate": 7.243651705409132e-06, "loss": 0.6409, "step": 3609 }, { "epoch": 0.8319889375432127, "grad_norm": 0.14994823932647705, "learning_rate": 7.2243116724578565e-06, "loss": 0.6336, "step": 3610 }, { "epoch": 0.8322194053929477, "grad_norm": 0.15003249049186707, "learning_rate": 7.2049954816485465e-06, "loss": 0.6397, "step": 3611 }, { "epoch": 0.8324498732426826, "grad_norm": 0.15217861533164978, "learning_rate": 7.185703143747596e-06, "loss": 0.6425, "step": 3612 }, { "epoch": 0.8326803410924176, "grad_norm": 0.1463039368391037, "learning_rate": 7.166434669508071e-06, "loss": 0.6341, "step": 3613 }, { "epoch": 0.8329108089421525, "grad_norm": 0.1474071443080902, "learning_rate": 7.147190069669768e-06, "loss": 0.6397, "step": 3614 }, { "epoch": 0.8331412767918875, "grad_norm": 0.14856012165546417, "learning_rate": 7.127969354959163e-06, "loss": 0.6444, "step": 3615 }, { "epoch": 0.8333717446416224, "grad_norm": 0.147612527012825, "learning_rate": 7.1087725360894195e-06, "loss": 0.6347, "step": 3616 }, { "epoch": 0.8336022124913575, "grad_norm": 0.15374769270420074, "learning_rate": 7.08959962376039e-06, "loss": 0.6435, "step": 3617 }, { "epoch": 0.8338326803410924, "grad_norm": 0.14860646426677704, "learning_rate": 7.070450628658592e-06, "loss": 0.646, "step": 3618 }, { "epoch": 0.8340631481908274, "grad_norm": 0.1480385959148407, "learning_rate": 7.051325561457217e-06, "loss": 0.6384, "step": 3619 }, { "epoch": 0.8342936160405623, "grad_norm": 0.1520072966814041, "learning_rate": 7.0322244328161144e-06, "loss": 0.6458, "step": 3620 }, { "epoch": 0.8345240838902973, "grad_norm": 0.14834314584732056, "learning_rate": 7.013147253381797e-06, "loss": 0.6341, "step": 3621 }, { "epoch": 0.8347545517400322, "grad_norm": 0.14721745252609253, "learning_rate": 6.99409403378744e-06, "loss": 0.6417, "step": 3622 }, { "epoch": 0.8349850195897672, "grad_norm": 0.1415952891111374, "learning_rate": 6.975064784652829e-06, "loss": 0.6443, "step": 3623 }, { "epoch": 0.8352154874395021, "grad_norm": 0.14462348818778992, "learning_rate": 6.9560595165844175e-06, "loss": 0.633, "step": 3624 }, { "epoch": 0.8354459552892372, "grad_norm": 0.1447225958108902, "learning_rate": 6.937078240175287e-06, "loss": 0.6442, "step": 3625 }, { "epoch": 0.8356764231389722, "grad_norm": 0.1561073660850525, "learning_rate": 6.918120966005148e-06, "loss": 0.6468, "step": 3626 }, { "epoch": 0.8359068909887071, "grad_norm": 0.1541508585214615, "learning_rate": 6.899187704640325e-06, "loss": 0.6415, "step": 3627 }, { "epoch": 0.8361373588384421, "grad_norm": 0.15150463581085205, "learning_rate": 6.880278466633783e-06, "loss": 0.6413, "step": 3628 }, { "epoch": 0.836367826688177, "grad_norm": 0.14670462906360626, "learning_rate": 6.861393262525035e-06, "loss": 0.6488, "step": 3629 }, { "epoch": 0.836598294537912, "grad_norm": 0.14568495750427246, "learning_rate": 6.842532102840277e-06, "loss": 0.6333, "step": 3630 }, { "epoch": 0.8368287623876469, "grad_norm": 0.15337829291820526, "learning_rate": 6.823694998092273e-06, "loss": 0.6408, "step": 3631 }, { "epoch": 0.8370592302373819, "grad_norm": 0.14713677763938904, "learning_rate": 6.80488195878034e-06, "loss": 0.6367, "step": 3632 }, { "epoch": 0.8372896980871168, "grad_norm": 0.14200221002101898, "learning_rate": 6.786092995390436e-06, "loss": 0.6303, "step": 3633 }, { "epoch": 0.8375201659368519, "grad_norm": 0.15284350514411926, "learning_rate": 6.7673281183950665e-06, "loss": 0.6438, "step": 3634 }, { "epoch": 0.8377506337865868, "grad_norm": 0.147255539894104, "learning_rate": 6.748587338253337e-06, "loss": 0.643, "step": 3635 }, { "epoch": 0.8379811016363218, "grad_norm": 0.14654171466827393, "learning_rate": 6.729870665410898e-06, "loss": 0.6404, "step": 3636 }, { "epoch": 0.8382115694860567, "grad_norm": 0.15386711061000824, "learning_rate": 6.711178110299993e-06, "loss": 0.6394, "step": 3637 }, { "epoch": 0.8384420373357917, "grad_norm": 0.15187443792819977, "learning_rate": 6.692509683339371e-06, "loss": 0.6411, "step": 3638 }, { "epoch": 0.8386725051855266, "grad_norm": 0.14688163995742798, "learning_rate": 6.673865394934376e-06, "loss": 0.6341, "step": 3639 }, { "epoch": 0.8389029730352616, "grad_norm": 0.15309026837348938, "learning_rate": 6.655245255476911e-06, "loss": 0.6373, "step": 3640 }, { "epoch": 0.8391334408849965, "grad_norm": 0.15117040276527405, "learning_rate": 6.6366492753453695e-06, "loss": 0.6554, "step": 3641 }, { "epoch": 0.8393639087347315, "grad_norm": 0.15723955631256104, "learning_rate": 6.61807746490471e-06, "loss": 0.6349, "step": 3642 }, { "epoch": 0.8395943765844665, "grad_norm": 0.15363402664661407, "learning_rate": 6.59952983450643e-06, "loss": 0.641, "step": 3643 }, { "epoch": 0.8398248444342015, "grad_norm": 0.14676257967948914, "learning_rate": 6.581006394488493e-06, "loss": 0.6338, "step": 3644 }, { "epoch": 0.8400553122839364, "grad_norm": 0.14586247503757477, "learning_rate": 6.562507155175457e-06, "loss": 0.6316, "step": 3645 }, { "epoch": 0.8402857801336714, "grad_norm": 0.1537342369556427, "learning_rate": 6.544032126878358e-06, "loss": 0.6407, "step": 3646 }, { "epoch": 0.8405162479834063, "grad_norm": 0.14989496767520905, "learning_rate": 6.525581319894703e-06, "loss": 0.6309, "step": 3647 }, { "epoch": 0.8407467158331413, "grad_norm": 0.15396440029144287, "learning_rate": 6.507154744508548e-06, "loss": 0.6486, "step": 3648 }, { "epoch": 0.8409771836828762, "grad_norm": 0.14274229109287262, "learning_rate": 6.488752410990417e-06, "loss": 0.6309, "step": 3649 }, { "epoch": 0.8412076515326112, "grad_norm": 0.14844496548175812, "learning_rate": 6.470374329597334e-06, "loss": 0.6343, "step": 3650 }, { "epoch": 0.8414381193823461, "grad_norm": 0.15259870886802673, "learning_rate": 6.452020510572798e-06, "loss": 0.6426, "step": 3651 }, { "epoch": 0.8416685872320812, "grad_norm": 0.1534135341644287, "learning_rate": 6.433690964146799e-06, "loss": 0.6334, "step": 3652 }, { "epoch": 0.8418990550818161, "grad_norm": 0.14689849317073822, "learning_rate": 6.415385700535764e-06, "loss": 0.6429, "step": 3653 }, { "epoch": 0.8421295229315511, "grad_norm": 0.14878277480602264, "learning_rate": 6.39710472994261e-06, "loss": 0.6417, "step": 3654 }, { "epoch": 0.842359990781286, "grad_norm": 0.1506912261247635, "learning_rate": 6.378848062556741e-06, "loss": 0.6453, "step": 3655 }, { "epoch": 0.842590458631021, "grad_norm": 0.15941332280635834, "learning_rate": 6.360615708553952e-06, "loss": 0.6397, "step": 3656 }, { "epoch": 0.8428209264807559, "grad_norm": 0.15402281284332275, "learning_rate": 6.342407678096534e-06, "loss": 0.6413, "step": 3657 }, { "epoch": 0.8430513943304909, "grad_norm": 0.14595894515514374, "learning_rate": 6.324223981333199e-06, "loss": 0.6448, "step": 3658 }, { "epoch": 0.8432818621802258, "grad_norm": 0.16061711311340332, "learning_rate": 6.3060646283991106e-06, "loss": 0.646, "step": 3659 }, { "epoch": 0.8435123300299608, "grad_norm": 0.16146208345890045, "learning_rate": 6.287929629415856e-06, "loss": 0.6377, "step": 3660 }, { "epoch": 0.8437427978796957, "grad_norm": 0.15115495026111603, "learning_rate": 6.269818994491455e-06, "loss": 0.6467, "step": 3661 }, { "epoch": 0.8439732657294308, "grad_norm": 0.13822171092033386, "learning_rate": 6.251732733720323e-06, "loss": 0.639, "step": 3662 }, { "epoch": 0.8442037335791657, "grad_norm": 0.14701151847839355, "learning_rate": 6.23367085718331e-06, "loss": 0.6439, "step": 3663 }, { "epoch": 0.8444342014289007, "grad_norm": 0.1508350670337677, "learning_rate": 6.215633374947683e-06, "loss": 0.6387, "step": 3664 }, { "epoch": 0.8446646692786356, "grad_norm": 0.15383285284042358, "learning_rate": 6.197620297067097e-06, "loss": 0.6405, "step": 3665 }, { "epoch": 0.8448951371283706, "grad_norm": 0.1498384326696396, "learning_rate": 6.179631633581612e-06, "loss": 0.6461, "step": 3666 }, { "epoch": 0.8451256049781055, "grad_norm": 0.15178535878658295, "learning_rate": 6.1616673945176836e-06, "loss": 0.6454, "step": 3667 }, { "epoch": 0.8453560728278405, "grad_norm": 0.15506048500537872, "learning_rate": 6.143727589888126e-06, "loss": 0.6449, "step": 3668 }, { "epoch": 0.8455865406775754, "grad_norm": 0.1453298181295395, "learning_rate": 6.125812229692162e-06, "loss": 0.6417, "step": 3669 }, { "epoch": 0.8458170085273105, "grad_norm": 0.1514149308204651, "learning_rate": 6.107921323915411e-06, "loss": 0.6433, "step": 3670 }, { "epoch": 0.8460474763770454, "grad_norm": 0.14425182342529297, "learning_rate": 6.0900548825298e-06, "loss": 0.6461, "step": 3671 }, { "epoch": 0.8462779442267804, "grad_norm": 0.1488070785999298, "learning_rate": 6.072212915493669e-06, "loss": 0.6473, "step": 3672 }, { "epoch": 0.8465084120765153, "grad_norm": 0.14930947124958038, "learning_rate": 6.054395432751703e-06, "loss": 0.6418, "step": 3673 }, { "epoch": 0.8467388799262503, "grad_norm": 0.15142978727817535, "learning_rate": 6.036602444234935e-06, "loss": 0.638, "step": 3674 }, { "epoch": 0.8469693477759852, "grad_norm": 0.13954748213291168, "learning_rate": 6.018833959860753e-06, "loss": 0.6415, "step": 3675 }, { "epoch": 0.8471998156257202, "grad_norm": 0.14654095470905304, "learning_rate": 6.001089989532893e-06, "loss": 0.6398, "step": 3676 }, { "epoch": 0.8474302834754551, "grad_norm": 0.14327792823314667, "learning_rate": 5.9833705431413975e-06, "loss": 0.6393, "step": 3677 }, { "epoch": 0.8476607513251901, "grad_norm": 0.15117095410823822, "learning_rate": 5.965675630562672e-06, "loss": 0.6375, "step": 3678 }, { "epoch": 0.847891219174925, "grad_norm": 0.14697414636611938, "learning_rate": 5.948005261659434e-06, "loss": 0.6385, "step": 3679 }, { "epoch": 0.8481216870246601, "grad_norm": 0.14706385135650635, "learning_rate": 5.930359446280726e-06, "loss": 0.6391, "step": 3680 }, { "epoch": 0.848352154874395, "grad_norm": 0.1475333720445633, "learning_rate": 5.912738194261902e-06, "loss": 0.636, "step": 3681 }, { "epoch": 0.84858262272413, "grad_norm": 0.15489518642425537, "learning_rate": 5.895141515424629e-06, "loss": 0.6409, "step": 3682 }, { "epoch": 0.8488130905738649, "grad_norm": 0.1442023515701294, "learning_rate": 5.87756941957685e-06, "loss": 0.6374, "step": 3683 }, { "epoch": 0.8490435584235999, "grad_norm": 0.14666606485843658, "learning_rate": 5.860021916512859e-06, "loss": 0.6388, "step": 3684 }, { "epoch": 0.8492740262733348, "grad_norm": 0.1419772058725357, "learning_rate": 5.842499016013209e-06, "loss": 0.6369, "step": 3685 }, { "epoch": 0.8495044941230698, "grad_norm": 0.14429770410060883, "learning_rate": 5.8250007278447205e-06, "loss": 0.637, "step": 3686 }, { "epoch": 0.8497349619728048, "grad_norm": 0.15203547477722168, "learning_rate": 5.807527061760543e-06, "loss": 0.6346, "step": 3687 }, { "epoch": 0.8499654298225398, "grad_norm": 0.14554864168167114, "learning_rate": 5.790078027500068e-06, "loss": 0.6423, "step": 3688 }, { "epoch": 0.8501958976722748, "grad_norm": 0.15379323065280914, "learning_rate": 5.772653634788971e-06, "loss": 0.645, "step": 3689 }, { "epoch": 0.8504263655220097, "grad_norm": 0.15001268684864044, "learning_rate": 5.755253893339185e-06, "loss": 0.6349, "step": 3690 }, { "epoch": 0.8506568333717447, "grad_norm": 0.15877242386341095, "learning_rate": 5.737878812848929e-06, "loss": 0.6364, "step": 3691 }, { "epoch": 0.8508873012214796, "grad_norm": 0.15002503991127014, "learning_rate": 5.720528403002634e-06, "loss": 0.6319, "step": 3692 }, { "epoch": 0.8511177690712146, "grad_norm": 0.1416318565607071, "learning_rate": 5.703202673470992e-06, "loss": 0.6432, "step": 3693 }, { "epoch": 0.8513482369209495, "grad_norm": 0.15145903825759888, "learning_rate": 5.685901633910989e-06, "loss": 0.6373, "step": 3694 }, { "epoch": 0.8515787047706845, "grad_norm": 0.1428060680627823, "learning_rate": 5.668625293965774e-06, "loss": 0.6411, "step": 3695 }, { "epoch": 0.8518091726204194, "grad_norm": 0.1449238806962967, "learning_rate": 5.6513736632647695e-06, "loss": 0.6353, "step": 3696 }, { "epoch": 0.8520396404701545, "grad_norm": 0.15147364139556885, "learning_rate": 5.634146751423647e-06, "loss": 0.6433, "step": 3697 }, { "epoch": 0.8522701083198894, "grad_norm": 0.14954763650894165, "learning_rate": 5.616944568044225e-06, "loss": 0.6279, "step": 3698 }, { "epoch": 0.8525005761696244, "grad_norm": 0.14778868854045868, "learning_rate": 5.599767122714627e-06, "loss": 0.6368, "step": 3699 }, { "epoch": 0.8527310440193593, "grad_norm": 0.14133252203464508, "learning_rate": 5.582614425009153e-06, "loss": 0.6416, "step": 3700 }, { "epoch": 0.8529615118690943, "grad_norm": 0.15429161489009857, "learning_rate": 5.565486484488275e-06, "loss": 0.6418, "step": 3701 }, { "epoch": 0.8531919797188292, "grad_norm": 0.1481894999742508, "learning_rate": 5.548383310698707e-06, "loss": 0.635, "step": 3702 }, { "epoch": 0.8534224475685642, "grad_norm": 0.14938104152679443, "learning_rate": 5.531304913173357e-06, "loss": 0.6503, "step": 3703 }, { "epoch": 0.8536529154182991, "grad_norm": 0.14781972765922546, "learning_rate": 5.514251301431306e-06, "loss": 0.6352, "step": 3704 }, { "epoch": 0.8538833832680341, "grad_norm": 0.1436912715435028, "learning_rate": 5.497222484977826e-06, "loss": 0.6331, "step": 3705 }, { "epoch": 0.854113851117769, "grad_norm": 0.14232562482357025, "learning_rate": 5.480218473304388e-06, "loss": 0.6355, "step": 3706 }, { "epoch": 0.8543443189675041, "grad_norm": 0.1478080451488495, "learning_rate": 5.4632392758885985e-06, "loss": 0.6371, "step": 3707 }, { "epoch": 0.854574786817239, "grad_norm": 0.14770181477069855, "learning_rate": 5.446284902194249e-06, "loss": 0.6365, "step": 3708 }, { "epoch": 0.854805254666974, "grad_norm": 0.14597497880458832, "learning_rate": 5.429355361671335e-06, "loss": 0.641, "step": 3709 }, { "epoch": 0.8550357225167089, "grad_norm": 0.1463644802570343, "learning_rate": 5.412450663755941e-06, "loss": 0.6343, "step": 3710 }, { "epoch": 0.8552661903664439, "grad_norm": 0.14388582110404968, "learning_rate": 5.39557081787036e-06, "loss": 0.6475, "step": 3711 }, { "epoch": 0.8554966582161788, "grad_norm": 0.1477576494216919, "learning_rate": 5.378715833423004e-06, "loss": 0.6357, "step": 3712 }, { "epoch": 0.8557271260659138, "grad_norm": 0.15977248549461365, "learning_rate": 5.36188571980844e-06, "loss": 0.6389, "step": 3713 }, { "epoch": 0.8559575939156487, "grad_norm": 0.14352811872959137, "learning_rate": 5.3450804864073665e-06, "loss": 0.6389, "step": 3714 }, { "epoch": 0.8561880617653838, "grad_norm": 0.13531458377838135, "learning_rate": 5.328300142586629e-06, "loss": 0.6385, "step": 3715 }, { "epoch": 0.8564185296151187, "grad_norm": 0.15272001922130585, "learning_rate": 5.311544697699172e-06, "loss": 0.635, "step": 3716 }, { "epoch": 0.8566489974648537, "grad_norm": 0.15149231255054474, "learning_rate": 5.294814161084083e-06, "loss": 0.634, "step": 3717 }, { "epoch": 0.8568794653145886, "grad_norm": 0.13878074288368225, "learning_rate": 5.278108542066562e-06, "loss": 0.6344, "step": 3718 }, { "epoch": 0.8571099331643236, "grad_norm": 0.143055722117424, "learning_rate": 5.261427849957928e-06, "loss": 0.6459, "step": 3719 }, { "epoch": 0.8573404010140585, "grad_norm": 0.14342109858989716, "learning_rate": 5.244772094055589e-06, "loss": 0.6344, "step": 3720 }, { "epoch": 0.8575708688637935, "grad_norm": 0.1506274938583374, "learning_rate": 5.228141283643073e-06, "loss": 0.6441, "step": 3721 }, { "epoch": 0.8578013367135284, "grad_norm": 0.13922744989395142, "learning_rate": 5.211535427989972e-06, "loss": 0.6469, "step": 3722 }, { "epoch": 0.8580318045632634, "grad_norm": 0.13600236177444458, "learning_rate": 5.194954536352021e-06, "loss": 0.6365, "step": 3723 }, { "epoch": 0.8582622724129984, "grad_norm": 0.140496164560318, "learning_rate": 5.178398617971003e-06, "loss": 0.6401, "step": 3724 }, { "epoch": 0.8584927402627334, "grad_norm": 0.14220167696475983, "learning_rate": 5.161867682074773e-06, "loss": 0.6382, "step": 3725 }, { "epoch": 0.8587232081124683, "grad_norm": 0.14413121342658997, "learning_rate": 5.145361737877291e-06, "loss": 0.6251, "step": 3726 }, { "epoch": 0.8589536759622033, "grad_norm": 0.14119820296764374, "learning_rate": 5.128880794578572e-06, "loss": 0.6368, "step": 3727 }, { "epoch": 0.8591841438119382, "grad_norm": 0.14928089082241058, "learning_rate": 5.112424861364701e-06, "loss": 0.6405, "step": 3728 }, { "epoch": 0.8594146116616732, "grad_norm": 0.1459737867116928, "learning_rate": 5.095993947407818e-06, "loss": 0.6333, "step": 3729 }, { "epoch": 0.8596450795114081, "grad_norm": 0.1493435502052307, "learning_rate": 5.079588061866125e-06, "loss": 0.635, "step": 3730 }, { "epoch": 0.8598755473611431, "grad_norm": 0.14926058053970337, "learning_rate": 5.0632072138838584e-06, "loss": 0.6535, "step": 3731 }, { "epoch": 0.860106015210878, "grad_norm": 0.14926740527153015, "learning_rate": 5.046851412591314e-06, "loss": 0.6416, "step": 3732 }, { "epoch": 0.860336483060613, "grad_norm": 0.148090198636055, "learning_rate": 5.030520667104821e-06, "loss": 0.637, "step": 3733 }, { "epoch": 0.860566950910348, "grad_norm": 0.14074482023715973, "learning_rate": 5.01421498652675e-06, "loss": 0.6391, "step": 3734 }, { "epoch": 0.860797418760083, "grad_norm": 0.14252926409244537, "learning_rate": 4.997934379945491e-06, "loss": 0.638, "step": 3735 }, { "epoch": 0.8610278866098179, "grad_norm": 0.14792828261852264, "learning_rate": 4.981678856435479e-06, "loss": 0.6389, "step": 3736 }, { "epoch": 0.8612583544595529, "grad_norm": 0.15630586445331573, "learning_rate": 4.965448425057118e-06, "loss": 0.6465, "step": 3737 }, { "epoch": 0.8614888223092878, "grad_norm": 0.14542563259601593, "learning_rate": 4.949243094856892e-06, "loss": 0.64, "step": 3738 }, { "epoch": 0.8617192901590228, "grad_norm": 0.1466866284608841, "learning_rate": 4.933062874867267e-06, "loss": 0.6441, "step": 3739 }, { "epoch": 0.8619497580087577, "grad_norm": 0.14459478855133057, "learning_rate": 4.916907774106683e-06, "loss": 0.6341, "step": 3740 }, { "epoch": 0.8621802258584927, "grad_norm": 0.152002215385437, "learning_rate": 4.90077780157962e-06, "loss": 0.6486, "step": 3741 }, { "epoch": 0.8624106937082276, "grad_norm": 0.14948220551013947, "learning_rate": 4.884672966276538e-06, "loss": 0.6398, "step": 3742 }, { "epoch": 0.8626411615579627, "grad_norm": 0.14576594531536102, "learning_rate": 4.868593277173878e-06, "loss": 0.6509, "step": 3743 }, { "epoch": 0.8628716294076976, "grad_norm": 0.1429005265235901, "learning_rate": 4.852538743234081e-06, "loss": 0.6391, "step": 3744 }, { "epoch": 0.8631020972574326, "grad_norm": 0.145622119307518, "learning_rate": 4.836509373405568e-06, "loss": 0.6404, "step": 3745 }, { "epoch": 0.8633325651071676, "grad_norm": 0.1396528035402298, "learning_rate": 4.820505176622697e-06, "loss": 0.6299, "step": 3746 }, { "epoch": 0.8635630329569025, "grad_norm": 0.14236707985401154, "learning_rate": 4.804526161805833e-06, "loss": 0.6399, "step": 3747 }, { "epoch": 0.8637935008066375, "grad_norm": 0.14210587739944458, "learning_rate": 4.788572337861313e-06, "loss": 0.6344, "step": 3748 }, { "epoch": 0.8640239686563724, "grad_norm": 0.1455959975719452, "learning_rate": 4.772643713681413e-06, "loss": 0.6388, "step": 3749 }, { "epoch": 0.8642544365061074, "grad_norm": 0.14806394279003143, "learning_rate": 4.756740298144346e-06, "loss": 0.6385, "step": 3750 }, { "epoch": 0.8644849043558424, "grad_norm": 0.13723453879356384, "learning_rate": 4.740862100114307e-06, "loss": 0.6385, "step": 3751 }, { "epoch": 0.8647153722055774, "grad_norm": 0.15280765295028687, "learning_rate": 4.725009128441421e-06, "loss": 0.6454, "step": 3752 }, { "epoch": 0.8649458400553123, "grad_norm": 0.14481478929519653, "learning_rate": 4.709181391961753e-06, "loss": 0.6413, "step": 3753 }, { "epoch": 0.8651763079050473, "grad_norm": 0.14188295602798462, "learning_rate": 4.693378899497303e-06, "loss": 0.6528, "step": 3754 }, { "epoch": 0.8654067757547822, "grad_norm": 0.1486392468214035, "learning_rate": 4.6776016598560124e-06, "loss": 0.6364, "step": 3755 }, { "epoch": 0.8656372436045172, "grad_norm": 0.1420852243900299, "learning_rate": 4.6618496818317145e-06, "loss": 0.6371, "step": 3756 }, { "epoch": 0.8658677114542521, "grad_norm": 0.1463935822248459, "learning_rate": 4.646122974204187e-06, "loss": 0.6368, "step": 3757 }, { "epoch": 0.8660981793039871, "grad_norm": 0.14199309051036835, "learning_rate": 4.630421545739144e-06, "loss": 0.6407, "step": 3758 }, { "epoch": 0.866328647153722, "grad_norm": 0.13984660804271698, "learning_rate": 4.6147454051881585e-06, "loss": 0.6389, "step": 3759 }, { "epoch": 0.8665591150034571, "grad_norm": 0.14572177827358246, "learning_rate": 4.5990945612887415e-06, "loss": 0.6353, "step": 3760 }, { "epoch": 0.866789582853192, "grad_norm": 0.14711903035640717, "learning_rate": 4.583469022764314e-06, "loss": 0.6408, "step": 3761 }, { "epoch": 0.867020050702927, "grad_norm": 0.14538122713565826, "learning_rate": 4.567868798324143e-06, "loss": 0.6373, "step": 3762 }, { "epoch": 0.8672505185526619, "grad_norm": 0.14264172315597534, "learning_rate": 4.552293896663451e-06, "loss": 0.6357, "step": 3763 }, { "epoch": 0.8674809864023969, "grad_norm": 0.14745338261127472, "learning_rate": 4.536744326463304e-06, "loss": 0.6415, "step": 3764 }, { "epoch": 0.8677114542521318, "grad_norm": 0.1417376846075058, "learning_rate": 4.521220096390655e-06, "loss": 0.6299, "step": 3765 }, { "epoch": 0.8679419221018668, "grad_norm": 0.13753369450569153, "learning_rate": 4.505721215098335e-06, "loss": 0.6364, "step": 3766 }, { "epoch": 0.8681723899516017, "grad_norm": 0.14812487363815308, "learning_rate": 4.490247691225058e-06, "loss": 0.6494, "step": 3767 }, { "epoch": 0.8684028578013367, "grad_norm": 0.14765094220638275, "learning_rate": 4.4747995333953855e-06, "loss": 0.6363, "step": 3768 }, { "epoch": 0.8686333256510717, "grad_norm": 0.14122313261032104, "learning_rate": 4.459376750219757e-06, "loss": 0.6319, "step": 3769 }, { "epoch": 0.8688637935008067, "grad_norm": 0.13697822391986847, "learning_rate": 4.443979350294463e-06, "loss": 0.6328, "step": 3770 }, { "epoch": 0.8690942613505416, "grad_norm": 0.1433285027742386, "learning_rate": 4.428607342201635e-06, "loss": 0.643, "step": 3771 }, { "epoch": 0.8693247292002766, "grad_norm": 0.13931584358215332, "learning_rate": 4.4132607345092555e-06, "loss": 0.6451, "step": 3772 }, { "epoch": 0.8695551970500115, "grad_norm": 0.1486472487449646, "learning_rate": 4.397939535771189e-06, "loss": 0.6352, "step": 3773 }, { "epoch": 0.8697856648997465, "grad_norm": 0.14353391528129578, "learning_rate": 4.382643754527072e-06, "loss": 0.6599, "step": 3774 }, { "epoch": 0.8700161327494814, "grad_norm": 0.14892083406448364, "learning_rate": 4.36737339930241e-06, "loss": 0.6315, "step": 3775 }, { "epoch": 0.8702466005992164, "grad_norm": 0.1448989063501358, "learning_rate": 4.352128478608541e-06, "loss": 0.6396, "step": 3776 }, { "epoch": 0.8704770684489513, "grad_norm": 0.1473805457353592, "learning_rate": 4.3369090009426185e-06, "loss": 0.6484, "step": 3777 }, { "epoch": 0.8707075362986864, "grad_norm": 0.14433009922504425, "learning_rate": 4.321714974787605e-06, "loss": 0.6427, "step": 3778 }, { "epoch": 0.8709380041484213, "grad_norm": 0.14538750052452087, "learning_rate": 4.306546408612306e-06, "loss": 0.6443, "step": 3779 }, { "epoch": 0.8711684719981563, "grad_norm": 0.13906364142894745, "learning_rate": 4.291403310871284e-06, "loss": 0.6368, "step": 3780 }, { "epoch": 0.8713989398478912, "grad_norm": 0.15096677839756012, "learning_rate": 4.276285690004961e-06, "loss": 0.6428, "step": 3781 }, { "epoch": 0.8716294076976262, "grad_norm": 0.14877794682979584, "learning_rate": 4.26119355443953e-06, "loss": 0.6374, "step": 3782 }, { "epoch": 0.8718598755473611, "grad_norm": 0.14585071802139282, "learning_rate": 4.24612691258699e-06, "loss": 0.6383, "step": 3783 }, { "epoch": 0.8720903433970961, "grad_norm": 0.14376723766326904, "learning_rate": 4.231085772845117e-06, "loss": 0.6423, "step": 3784 }, { "epoch": 0.872320811246831, "grad_norm": 0.1447305977344513, "learning_rate": 4.2160701435974945e-06, "loss": 0.6369, "step": 3785 }, { "epoch": 0.872551279096566, "grad_norm": 0.14197836816310883, "learning_rate": 4.201080033213456e-06, "loss": 0.6373, "step": 3786 }, { "epoch": 0.872781746946301, "grad_norm": 0.14709553122520447, "learning_rate": 4.186115450048128e-06, "loss": 0.6385, "step": 3787 }, { "epoch": 0.873012214796036, "grad_norm": 0.1427144557237625, "learning_rate": 4.171176402442445e-06, "loss": 0.6398, "step": 3788 }, { "epoch": 0.8732426826457709, "grad_norm": 0.14940115809440613, "learning_rate": 4.156262898723034e-06, "loss": 0.6333, "step": 3789 }, { "epoch": 0.8734731504955059, "grad_norm": 0.1392568051815033, "learning_rate": 4.141374947202336e-06, "loss": 0.6419, "step": 3790 }, { "epoch": 0.8737036183452408, "grad_norm": 0.1471542865037918, "learning_rate": 4.1265125561785465e-06, "loss": 0.6299, "step": 3791 }, { "epoch": 0.8739340861949758, "grad_norm": 0.14388377964496613, "learning_rate": 4.1116757339355995e-06, "loss": 0.6431, "step": 3792 }, { "epoch": 0.8741645540447107, "grad_norm": 0.1433270126581192, "learning_rate": 4.0968644887431795e-06, "loss": 0.6342, "step": 3793 }, { "epoch": 0.8743950218944457, "grad_norm": 0.13722054660320282, "learning_rate": 4.082078828856733e-06, "loss": 0.6373, "step": 3794 }, { "epoch": 0.8746254897441806, "grad_norm": 0.14640505611896515, "learning_rate": 4.0673187625174195e-06, "loss": 0.6395, "step": 3795 }, { "epoch": 0.8748559575939157, "grad_norm": 0.1450256109237671, "learning_rate": 4.052584297952145e-06, "loss": 0.6384, "step": 3796 }, { "epoch": 0.8750864254436506, "grad_norm": 0.14310050010681152, "learning_rate": 4.037875443373546e-06, "loss": 0.6361, "step": 3797 }, { "epoch": 0.8753168932933856, "grad_norm": 0.142232283949852, "learning_rate": 4.023192206979992e-06, "loss": 0.6378, "step": 3798 }, { "epoch": 0.8755473611431205, "grad_norm": 0.14268440008163452, "learning_rate": 4.008534596955565e-06, "loss": 0.6358, "step": 3799 }, { "epoch": 0.8757778289928555, "grad_norm": 0.14434240758419037, "learning_rate": 3.9939026214700695e-06, "loss": 0.6394, "step": 3800 }, { "epoch": 0.8760082968425904, "grad_norm": 0.14181116223335266, "learning_rate": 3.979296288678996e-06, "loss": 0.6414, "step": 3801 }, { "epoch": 0.8762387646923254, "grad_norm": 0.14815394580364227, "learning_rate": 3.964715606723585e-06, "loss": 0.6415, "step": 3802 }, { "epoch": 0.8764692325420603, "grad_norm": 0.14605048298835754, "learning_rate": 3.950160583730761e-06, "loss": 0.6353, "step": 3803 }, { "epoch": 0.8766997003917953, "grad_norm": 0.14189860224723816, "learning_rate": 3.93563122781313e-06, "loss": 0.6441, "step": 3804 }, { "epoch": 0.8769301682415304, "grad_norm": 0.14701896905899048, "learning_rate": 3.921127547069014e-06, "loss": 0.6457, "step": 3805 }, { "epoch": 0.8771606360912653, "grad_norm": 0.141061931848526, "learning_rate": 3.906649549582414e-06, "loss": 0.6324, "step": 3806 }, { "epoch": 0.8773911039410003, "grad_norm": 0.14271359145641327, "learning_rate": 3.8921972434230185e-06, "loss": 0.6447, "step": 3807 }, { "epoch": 0.8776215717907352, "grad_norm": 0.1448899656534195, "learning_rate": 3.8777706366462e-06, "loss": 0.6401, "step": 3808 }, { "epoch": 0.8778520396404702, "grad_norm": 0.13925962150096893, "learning_rate": 3.863369737293005e-06, "loss": 0.6411, "step": 3809 }, { "epoch": 0.8780825074902051, "grad_norm": 0.1407134234905243, "learning_rate": 3.848994553390134e-06, "loss": 0.6445, "step": 3810 }, { "epoch": 0.8783129753399401, "grad_norm": 0.1439376175403595, "learning_rate": 3.834645092949973e-06, "loss": 0.6367, "step": 3811 }, { "epoch": 0.878543443189675, "grad_norm": 0.14084570109844208, "learning_rate": 3.8203213639705915e-06, "loss": 0.6304, "step": 3812 }, { "epoch": 0.87877391103941, "grad_norm": 0.14097703993320465, "learning_rate": 3.8060233744356633e-06, "loss": 0.638, "step": 3813 }, { "epoch": 0.879004378889145, "grad_norm": 0.1431102752685547, "learning_rate": 3.7917511323145584e-06, "loss": 0.6446, "step": 3814 }, { "epoch": 0.87923484673888, "grad_norm": 0.14385107159614563, "learning_rate": 3.7775046455622855e-06, "loss": 0.6343, "step": 3815 }, { "epoch": 0.8794653145886149, "grad_norm": 0.14835472404956818, "learning_rate": 3.7632839221194706e-06, "loss": 0.6442, "step": 3816 }, { "epoch": 0.8796957824383499, "grad_norm": 0.1427791565656662, "learning_rate": 3.749088969912429e-06, "loss": 0.6434, "step": 3817 }, { "epoch": 0.8799262502880848, "grad_norm": 0.13777343928813934, "learning_rate": 3.734919796853087e-06, "loss": 0.6434, "step": 3818 }, { "epoch": 0.8801567181378198, "grad_norm": 0.15572573244571686, "learning_rate": 3.720776410838983e-06, "loss": 0.6361, "step": 3819 }, { "epoch": 0.8803871859875547, "grad_norm": 0.14312800765037537, "learning_rate": 3.7066588197533115e-06, "loss": 0.6331, "step": 3820 }, { "epoch": 0.8806176538372897, "grad_norm": 0.14728614687919617, "learning_rate": 3.6925670314648775e-06, "loss": 0.6391, "step": 3821 }, { "epoch": 0.8808481216870246, "grad_norm": 0.14271116256713867, "learning_rate": 3.6785010538281093e-06, "loss": 0.6359, "step": 3822 }, { "epoch": 0.8810785895367597, "grad_norm": 0.14498567581176758, "learning_rate": 3.664460894683036e-06, "loss": 0.6383, "step": 3823 }, { "epoch": 0.8813090573864946, "grad_norm": 0.14503896236419678, "learning_rate": 3.650446561855325e-06, "loss": 0.6293, "step": 3824 }, { "epoch": 0.8815395252362296, "grad_norm": 0.14249297976493835, "learning_rate": 3.6364580631562063e-06, "loss": 0.6329, "step": 3825 }, { "epoch": 0.8817699930859645, "grad_norm": 0.14754772186279297, "learning_rate": 3.622495406382531e-06, "loss": 0.6412, "step": 3826 }, { "epoch": 0.8820004609356995, "grad_norm": 0.17180697619915009, "learning_rate": 3.6085585993167805e-06, "loss": 0.6378, "step": 3827 }, { "epoch": 0.8822309287854344, "grad_norm": 0.14113938808441162, "learning_rate": 3.594647649726962e-06, "loss": 0.6425, "step": 3828 }, { "epoch": 0.8824613966351694, "grad_norm": 0.1466577649116516, "learning_rate": 3.5807625653667243e-06, "loss": 0.6484, "step": 3829 }, { "epoch": 0.8826918644849043, "grad_norm": 0.14053227007389069, "learning_rate": 3.566903353975276e-06, "loss": 0.6409, "step": 3830 }, { "epoch": 0.8829223323346393, "grad_norm": 0.14258506894111633, "learning_rate": 3.553070023277405e-06, "loss": 0.6373, "step": 3831 }, { "epoch": 0.8831528001843743, "grad_norm": 0.1399742215871811, "learning_rate": 3.5392625809834823e-06, "loss": 0.6461, "step": 3832 }, { "epoch": 0.8833832680341093, "grad_norm": 0.13795405626296997, "learning_rate": 3.525481034789446e-06, "loss": 0.6329, "step": 3833 }, { "epoch": 0.8836137358838442, "grad_norm": 0.14222726225852966, "learning_rate": 3.5117253923767967e-06, "loss": 0.6454, "step": 3834 }, { "epoch": 0.8838442037335792, "grad_norm": 0.14100506901741028, "learning_rate": 3.4979956614125953e-06, "loss": 0.6318, "step": 3835 }, { "epoch": 0.8840746715833141, "grad_norm": 0.13950549066066742, "learning_rate": 3.4842918495494646e-06, "loss": 0.6348, "step": 3836 }, { "epoch": 0.8843051394330491, "grad_norm": 0.17627927660942078, "learning_rate": 3.4706139644255897e-06, "loss": 0.639, "step": 3837 }, { "epoch": 0.884535607282784, "grad_norm": 0.1371656060218811, "learning_rate": 3.4569620136646886e-06, "loss": 0.64, "step": 3838 }, { "epoch": 0.884766075132519, "grad_norm": 0.13862690329551697, "learning_rate": 3.4433360048760357e-06, "loss": 0.6335, "step": 3839 }, { "epoch": 0.8849965429822539, "grad_norm": 0.14692457020282745, "learning_rate": 3.4297359456544276e-06, "loss": 0.629, "step": 3840 }, { "epoch": 0.885227010831989, "grad_norm": 0.13511526584625244, "learning_rate": 3.4161618435802233e-06, "loss": 0.6377, "step": 3841 }, { "epoch": 0.8854574786817239, "grad_norm": 0.14511561393737793, "learning_rate": 3.4026137062193097e-06, "loss": 0.645, "step": 3842 }, { "epoch": 0.8856879465314589, "grad_norm": 0.1404416412115097, "learning_rate": 3.389091541123074e-06, "loss": 0.6393, "step": 3843 }, { "epoch": 0.8859184143811938, "grad_norm": 0.14536434412002563, "learning_rate": 3.375595355828454e-06, "loss": 0.6376, "step": 3844 }, { "epoch": 0.8861488822309288, "grad_norm": 0.14059731364250183, "learning_rate": 3.362125157857904e-06, "loss": 0.6335, "step": 3845 }, { "epoch": 0.8863793500806637, "grad_norm": 0.14244748651981354, "learning_rate": 3.34868095471938e-06, "loss": 0.6391, "step": 3846 }, { "epoch": 0.8866098179303987, "grad_norm": 0.13621114194393158, "learning_rate": 3.335262753906371e-06, "loss": 0.6436, "step": 3847 }, { "epoch": 0.8868402857801336, "grad_norm": 0.14168091118335724, "learning_rate": 3.3218705628978554e-06, "loss": 0.6382, "step": 3848 }, { "epoch": 0.8870707536298686, "grad_norm": 0.14067873358726501, "learning_rate": 3.3085043891583125e-06, "loss": 0.6332, "step": 3849 }, { "epoch": 0.8873012214796036, "grad_norm": 0.14555051922798157, "learning_rate": 3.295164240137727e-06, "loss": 0.6424, "step": 3850 }, { "epoch": 0.8875316893293386, "grad_norm": 0.14118191599845886, "learning_rate": 3.2818501232715794e-06, "loss": 0.6465, "step": 3851 }, { "epoch": 0.8877621571790735, "grad_norm": 0.13933710753917694, "learning_rate": 3.268562045980844e-06, "loss": 0.6429, "step": 3852 }, { "epoch": 0.8879926250288085, "grad_norm": 0.1409381926059723, "learning_rate": 3.2553000156719747e-06, "loss": 0.6372, "step": 3853 }, { "epoch": 0.8882230928785434, "grad_norm": 0.13979056477546692, "learning_rate": 3.242064039736914e-06, "loss": 0.6342, "step": 3854 }, { "epoch": 0.8884535607282784, "grad_norm": 0.14258621633052826, "learning_rate": 3.2288541255530545e-06, "loss": 0.6412, "step": 3855 }, { "epoch": 0.8886840285780133, "grad_norm": 0.1503862887620926, "learning_rate": 3.215670280483307e-06, "loss": 0.6461, "step": 3856 }, { "epoch": 0.8889144964277483, "grad_norm": 0.14357925951480865, "learning_rate": 3.202512511876038e-06, "loss": 0.6381, "step": 3857 }, { "epoch": 0.8891449642774832, "grad_norm": 0.1408717930316925, "learning_rate": 3.189380827065047e-06, "loss": 0.6363, "step": 3858 }, { "epoch": 0.8893754321272183, "grad_norm": 0.14140430092811584, "learning_rate": 3.1762752333696297e-06, "loss": 0.6376, "step": 3859 }, { "epoch": 0.8896058999769532, "grad_norm": 0.14138497412204742, "learning_rate": 3.163195738094532e-06, "loss": 0.6396, "step": 3860 }, { "epoch": 0.8898363678266882, "grad_norm": 0.14000347256660461, "learning_rate": 3.150142348529955e-06, "loss": 0.6386, "step": 3861 }, { "epoch": 0.8900668356764231, "grad_norm": 0.14265434443950653, "learning_rate": 3.1371150719515354e-06, "loss": 0.6347, "step": 3862 }, { "epoch": 0.8902973035261581, "grad_norm": 0.14289766550064087, "learning_rate": 3.1241139156203746e-06, "loss": 0.6392, "step": 3863 }, { "epoch": 0.8905277713758931, "grad_norm": 0.14235663414001465, "learning_rate": 3.111138886782994e-06, "loss": 0.6365, "step": 3864 }, { "epoch": 0.890758239225628, "grad_norm": 0.1410188376903534, "learning_rate": 3.0981899926713574e-06, "loss": 0.6476, "step": 3865 }, { "epoch": 0.890988707075363, "grad_norm": 0.1419987976551056, "learning_rate": 3.0852672405028984e-06, "loss": 0.6372, "step": 3866 }, { "epoch": 0.891219174925098, "grad_norm": 0.14122051000595093, "learning_rate": 3.072370637480415e-06, "loss": 0.6286, "step": 3867 }, { "epoch": 0.891449642774833, "grad_norm": 0.13950307667255402, "learning_rate": 3.059500190792186e-06, "loss": 0.6365, "step": 3868 }, { "epoch": 0.8916801106245679, "grad_norm": 0.1390613466501236, "learning_rate": 3.0466559076118837e-06, "loss": 0.639, "step": 3869 }, { "epoch": 0.8919105784743029, "grad_norm": 0.14016972482204437, "learning_rate": 3.0338377950985875e-06, "loss": 0.6386, "step": 3870 }, { "epoch": 0.8921410463240378, "grad_norm": 0.14392688870429993, "learning_rate": 3.0210458603968263e-06, "loss": 0.6441, "step": 3871 }, { "epoch": 0.8923715141737728, "grad_norm": 0.13809806108474731, "learning_rate": 3.0082801106365187e-06, "loss": 0.6364, "step": 3872 }, { "epoch": 0.8926019820235077, "grad_norm": 0.14696134626865387, "learning_rate": 2.995540552932974e-06, "loss": 0.6413, "step": 3873 }, { "epoch": 0.8928324498732427, "grad_norm": 0.14118346571922302, "learning_rate": 2.982827194386917e-06, "loss": 0.6405, "step": 3874 }, { "epoch": 0.8930629177229776, "grad_norm": 0.14888674020767212, "learning_rate": 2.9701400420844737e-06, "loss": 0.6413, "step": 3875 }, { "epoch": 0.8932933855727127, "grad_norm": 0.14854340255260468, "learning_rate": 2.9574791030971604e-06, "loss": 0.6442, "step": 3876 }, { "epoch": 0.8935238534224476, "grad_norm": 0.1380283385515213, "learning_rate": 2.944844384481871e-06, "loss": 0.6342, "step": 3877 }, { "epoch": 0.8937543212721826, "grad_norm": 0.13916511833667755, "learning_rate": 2.9322358932809157e-06, "loss": 0.6426, "step": 3878 }, { "epoch": 0.8939847891219175, "grad_norm": 0.1366412490606308, "learning_rate": 2.919653636521935e-06, "loss": 0.6418, "step": 3879 }, { "epoch": 0.8942152569716525, "grad_norm": 0.13759376108646393, "learning_rate": 2.907097621217986e-06, "loss": 0.6409, "step": 3880 }, { "epoch": 0.8944457248213874, "grad_norm": 0.1421494036912918, "learning_rate": 2.894567854367508e-06, "loss": 0.6452, "step": 3881 }, { "epoch": 0.8946761926711224, "grad_norm": 0.13909095525741577, "learning_rate": 2.8820643429542825e-06, "loss": 0.6435, "step": 3882 }, { "epoch": 0.8949066605208573, "grad_norm": 0.1391996145248413, "learning_rate": 2.8695870939474624e-06, "loss": 0.6409, "step": 3883 }, { "epoch": 0.8951371283705923, "grad_norm": 0.13931603729724884, "learning_rate": 2.857136114301562e-06, "loss": 0.6327, "step": 3884 }, { "epoch": 0.8953675962203272, "grad_norm": 0.1370822936296463, "learning_rate": 2.844711410956469e-06, "loss": 0.642, "step": 3885 }, { "epoch": 0.8955980640700623, "grad_norm": 0.14227797091007233, "learning_rate": 2.83231299083741e-06, "loss": 0.6278, "step": 3886 }, { "epoch": 0.8958285319197972, "grad_norm": 0.1375594437122345, "learning_rate": 2.8199408608549695e-06, "loss": 0.6328, "step": 3887 }, { "epoch": 0.8960589997695322, "grad_norm": 0.14295582473278046, "learning_rate": 2.8075950279050855e-06, "loss": 0.646, "step": 3888 }, { "epoch": 0.8962894676192671, "grad_norm": 0.13916680216789246, "learning_rate": 2.7952754988690046e-06, "loss": 0.647, "step": 3889 }, { "epoch": 0.8965199354690021, "grad_norm": 0.1418418288230896, "learning_rate": 2.782982280613344e-06, "loss": 0.6377, "step": 3890 }, { "epoch": 0.896750403318737, "grad_norm": 0.1433732807636261, "learning_rate": 2.770715379990069e-06, "loss": 0.6426, "step": 3891 }, { "epoch": 0.896980871168472, "grad_norm": 0.1423547863960266, "learning_rate": 2.7584748038364303e-06, "loss": 0.635, "step": 3892 }, { "epoch": 0.8972113390182069, "grad_norm": 0.1434488594532013, "learning_rate": 2.7462605589750443e-06, "loss": 0.6382, "step": 3893 }, { "epoch": 0.897441806867942, "grad_norm": 0.14350055158138275, "learning_rate": 2.734072652213837e-06, "loss": 0.6414, "step": 3894 }, { "epoch": 0.8976722747176769, "grad_norm": 0.13868124783039093, "learning_rate": 2.7219110903460523e-06, "loss": 0.6353, "step": 3895 }, { "epoch": 0.8979027425674119, "grad_norm": 0.13998155295848846, "learning_rate": 2.7097758801502506e-06, "loss": 0.6369, "step": 3896 }, { "epoch": 0.8981332104171468, "grad_norm": 0.1414719521999359, "learning_rate": 2.6976670283903215e-06, "loss": 0.6281, "step": 3897 }, { "epoch": 0.8983636782668818, "grad_norm": 0.13889610767364502, "learning_rate": 2.685584541815428e-06, "loss": 0.6447, "step": 3898 }, { "epoch": 0.8985941461166167, "grad_norm": 0.14231473207473755, "learning_rate": 2.6735284271600657e-06, "loss": 0.6355, "step": 3899 }, { "epoch": 0.8988246139663517, "grad_norm": 0.1469811350107193, "learning_rate": 2.6614986911440264e-06, "loss": 0.6374, "step": 3900 }, { "epoch": 0.8990550818160866, "grad_norm": 0.13894438743591309, "learning_rate": 2.6494953404723965e-06, "loss": 0.6362, "step": 3901 }, { "epoch": 0.8992855496658216, "grad_norm": 0.1419995129108429, "learning_rate": 2.637518381835552e-06, "loss": 0.6354, "step": 3902 }, { "epoch": 0.8995160175155565, "grad_norm": 0.14471600949764252, "learning_rate": 2.625567821909175e-06, "loss": 0.6406, "step": 3903 }, { "epoch": 0.8997464853652916, "grad_norm": 0.1410285234451294, "learning_rate": 2.6136436673541986e-06, "loss": 0.6399, "step": 3904 }, { "epoch": 0.8999769532150265, "grad_norm": 0.14543531835079193, "learning_rate": 2.601745924816862e-06, "loss": 0.6349, "step": 3905 }, { "epoch": 0.9002074210647615, "grad_norm": 0.14382287859916687, "learning_rate": 2.58987460092871e-06, "loss": 0.6331, "step": 3906 }, { "epoch": 0.9004378889144964, "grad_norm": 0.14252907037734985, "learning_rate": 2.5780297023065057e-06, "loss": 0.6367, "step": 3907 }, { "epoch": 0.9006683567642314, "grad_norm": 0.1392444670200348, "learning_rate": 2.5662112355523183e-06, "loss": 0.6349, "step": 3908 }, { "epoch": 0.9008988246139663, "grad_norm": 0.13947899639606476, "learning_rate": 2.5544192072534835e-06, "loss": 0.6431, "step": 3909 }, { "epoch": 0.9011292924637013, "grad_norm": 0.14244791865348816, "learning_rate": 2.542653623982588e-06, "loss": 0.6393, "step": 3910 }, { "epoch": 0.9013597603134362, "grad_norm": 0.1407129168510437, "learning_rate": 2.530914492297487e-06, "loss": 0.637, "step": 3911 }, { "epoch": 0.9015902281631712, "grad_norm": 0.13949137926101685, "learning_rate": 2.519201818741301e-06, "loss": 0.6359, "step": 3912 }, { "epoch": 0.9018206960129062, "grad_norm": 0.13872455060482025, "learning_rate": 2.507515609842376e-06, "loss": 0.6415, "step": 3913 }, { "epoch": 0.9020511638626412, "grad_norm": 0.13934583961963654, "learning_rate": 2.495855872114333e-06, "loss": 0.6298, "step": 3914 }, { "epoch": 0.9022816317123761, "grad_norm": 0.13912633061408997, "learning_rate": 2.4842226120560255e-06, "loss": 0.6321, "step": 3915 }, { "epoch": 0.9025120995621111, "grad_norm": 0.1382398009300232, "learning_rate": 2.4726158361515593e-06, "loss": 0.6343, "step": 3916 }, { "epoch": 0.902742567411846, "grad_norm": 0.14063194394111633, "learning_rate": 2.461035550870272e-06, "loss": 0.6505, "step": 3917 }, { "epoch": 0.902973035261581, "grad_norm": 0.14703617990016937, "learning_rate": 2.4494817626667442e-06, "loss": 0.64, "step": 3918 }, { "epoch": 0.9032035031113159, "grad_norm": 0.14321446418762207, "learning_rate": 2.437954477980753e-06, "loss": 0.6332, "step": 3919 }, { "epoch": 0.9034339709610509, "grad_norm": 0.14084553718566895, "learning_rate": 2.426453703237358e-06, "loss": 0.6363, "step": 3920 }, { "epoch": 0.9036644388107858, "grad_norm": 0.14175479114055634, "learning_rate": 2.41497944484681e-06, "loss": 0.6319, "step": 3921 }, { "epoch": 0.9038949066605209, "grad_norm": 0.13902176916599274, "learning_rate": 2.4035317092045763e-06, "loss": 0.6345, "step": 3922 }, { "epoch": 0.9041253745102559, "grad_norm": 0.1377190500497818, "learning_rate": 2.3921105026913527e-06, "loss": 0.6291, "step": 3923 }, { "epoch": 0.9043558423599908, "grad_norm": 0.13996082544326782, "learning_rate": 2.380715831673047e-06, "loss": 0.6375, "step": 3924 }, { "epoch": 0.9045863102097258, "grad_norm": 0.1362973004579544, "learning_rate": 2.369347702500774e-06, "loss": 0.6454, "step": 3925 }, { "epoch": 0.9048167780594607, "grad_norm": 0.13769546151161194, "learning_rate": 2.3580061215108585e-06, "loss": 0.635, "step": 3926 }, { "epoch": 0.9050472459091957, "grad_norm": 0.13530834019184113, "learning_rate": 2.3466910950248332e-06, "loss": 0.634, "step": 3927 }, { "epoch": 0.9052777137589306, "grad_norm": 0.14218156039714813, "learning_rate": 2.3354026293494034e-06, "loss": 0.6334, "step": 3928 }, { "epoch": 0.9055081816086656, "grad_norm": 0.13567234575748444, "learning_rate": 2.324140730776497e-06, "loss": 0.6383, "step": 3929 }, { "epoch": 0.9057386494584005, "grad_norm": 0.14312051236629486, "learning_rate": 2.3129054055832376e-06, "loss": 0.6454, "step": 3930 }, { "epoch": 0.9059691173081356, "grad_norm": 0.13520599901676178, "learning_rate": 2.3016966600319154e-06, "loss": 0.6408, "step": 3931 }, { "epoch": 0.9061995851578705, "grad_norm": 0.13904651999473572, "learning_rate": 2.290514500370011e-06, "loss": 0.6371, "step": 3932 }, { "epoch": 0.9064300530076055, "grad_norm": 0.14134728908538818, "learning_rate": 2.2793589328302056e-06, "loss": 0.641, "step": 3933 }, { "epoch": 0.9066605208573404, "grad_norm": 0.13760726153850555, "learning_rate": 2.268229963630325e-06, "loss": 0.6408, "step": 3934 }, { "epoch": 0.9068909887070754, "grad_norm": 0.1370038092136383, "learning_rate": 2.2571275989734076e-06, "loss": 0.6401, "step": 3935 }, { "epoch": 0.9071214565568103, "grad_norm": 0.13965949416160583, "learning_rate": 2.2460518450476474e-06, "loss": 0.6404, "step": 3936 }, { "epoch": 0.9073519244065453, "grad_norm": 0.1396677941083908, "learning_rate": 2.2350027080263845e-06, "loss": 0.6266, "step": 3937 }, { "epoch": 0.9075823922562802, "grad_norm": 0.13712947070598602, "learning_rate": 2.223980194068159e-06, "loss": 0.6334, "step": 3938 }, { "epoch": 0.9078128601060153, "grad_norm": 0.1409660130739212, "learning_rate": 2.212984309316646e-06, "loss": 0.6407, "step": 3939 }, { "epoch": 0.9080433279557502, "grad_norm": 0.14090897142887115, "learning_rate": 2.2020150599006916e-06, "loss": 0.6377, "step": 3940 }, { "epoch": 0.9082737958054852, "grad_norm": 0.1404808610677719, "learning_rate": 2.191072451934295e-06, "loss": 0.6421, "step": 3941 }, { "epoch": 0.9085042636552201, "grad_norm": 0.1363547444343567, "learning_rate": 2.180156491516605e-06, "loss": 0.6403, "step": 3942 }, { "epoch": 0.9087347315049551, "grad_norm": 0.13701926171779633, "learning_rate": 2.1692671847319048e-06, "loss": 0.6341, "step": 3943 }, { "epoch": 0.90896519935469, "grad_norm": 0.19568116962909698, "learning_rate": 2.1584045376496385e-06, "loss": 0.6294, "step": 3944 }, { "epoch": 0.909195667204425, "grad_norm": 0.13929463922977448, "learning_rate": 2.147568556324392e-06, "loss": 0.6354, "step": 3945 }, { "epoch": 0.9094261350541599, "grad_norm": 0.14168402552604675, "learning_rate": 2.136759246795872e-06, "loss": 0.6367, "step": 3946 }, { "epoch": 0.9096566029038949, "grad_norm": 0.13594500720500946, "learning_rate": 2.125976615088926e-06, "loss": 0.6406, "step": 3947 }, { "epoch": 0.9098870707536298, "grad_norm": 0.14707335829734802, "learning_rate": 2.1152206672135465e-06, "loss": 0.6409, "step": 3948 }, { "epoch": 0.9101175386033649, "grad_norm": 0.13740447163581848, "learning_rate": 2.104491409164827e-06, "loss": 0.6325, "step": 3949 }, { "epoch": 0.9103480064530998, "grad_norm": 0.1381852775812149, "learning_rate": 2.0937888469230115e-06, "loss": 0.6356, "step": 3950 }, { "epoch": 0.9105784743028348, "grad_norm": 0.13955402374267578, "learning_rate": 2.083112986453445e-06, "loss": 0.6406, "step": 3951 }, { "epoch": 0.9108089421525697, "grad_norm": 0.14137601852416992, "learning_rate": 2.072463833706595e-06, "loss": 0.6395, "step": 3952 }, { "epoch": 0.9110394100023047, "grad_norm": 0.14015664160251617, "learning_rate": 2.061841394618036e-06, "loss": 0.6381, "step": 3953 }, { "epoch": 0.9112698778520396, "grad_norm": 0.1479710191488266, "learning_rate": 2.0512456751084763e-06, "loss": 0.6534, "step": 3954 }, { "epoch": 0.9115003457017746, "grad_norm": 0.14452481269836426, "learning_rate": 2.040676681083703e-06, "loss": 0.6372, "step": 3955 }, { "epoch": 0.9117308135515095, "grad_norm": 0.1414709836244583, "learning_rate": 2.030134418434626e-06, "loss": 0.6353, "step": 3956 }, { "epoch": 0.9119612814012446, "grad_norm": 0.1382758766412735, "learning_rate": 2.0196188930372563e-06, "loss": 0.6351, "step": 3957 }, { "epoch": 0.9121917492509795, "grad_norm": 0.14365412294864655, "learning_rate": 2.0091301107526774e-06, "loss": 0.6343, "step": 3958 }, { "epoch": 0.9124222171007145, "grad_norm": 0.13868463039398193, "learning_rate": 1.998668077427096e-06, "loss": 0.6444, "step": 3959 }, { "epoch": 0.9126526849504494, "grad_norm": 0.14234106242656708, "learning_rate": 1.9882327988918038e-06, "loss": 0.6295, "step": 3960 }, { "epoch": 0.9128831528001844, "grad_norm": 0.16360850632190704, "learning_rate": 1.977824280963164e-06, "loss": 0.6482, "step": 3961 }, { "epoch": 0.9131136206499193, "grad_norm": 0.13877460360527039, "learning_rate": 1.967442529442637e-06, "loss": 0.641, "step": 3962 }, { "epoch": 0.9133440884996543, "grad_norm": 0.13784171640872955, "learning_rate": 1.957087550116765e-06, "loss": 0.6391, "step": 3963 }, { "epoch": 0.9135745563493892, "grad_norm": 0.1386898308992386, "learning_rate": 1.94675934875716e-06, "loss": 0.6399, "step": 3964 }, { "epoch": 0.9138050241991242, "grad_norm": 0.14157116413116455, "learning_rate": 1.936457931120522e-06, "loss": 0.6383, "step": 3965 }, { "epoch": 0.9140354920488591, "grad_norm": 0.13940860331058502, "learning_rate": 1.9261833029486088e-06, "loss": 0.6441, "step": 3966 }, { "epoch": 0.9142659598985942, "grad_norm": 0.13591444492340088, "learning_rate": 1.9159354699682497e-06, "loss": 0.6382, "step": 3967 }, { "epoch": 0.9144964277483291, "grad_norm": 0.14465990662574768, "learning_rate": 1.9057144378913427e-06, "loss": 0.6278, "step": 3968 }, { "epoch": 0.9147268955980641, "grad_norm": 0.13699522614479065, "learning_rate": 1.895520212414842e-06, "loss": 0.6409, "step": 3969 }, { "epoch": 0.914957363447799, "grad_norm": 0.14070484042167664, "learning_rate": 1.8853527992207742e-06, "loss": 0.6438, "step": 3970 }, { "epoch": 0.915187831297534, "grad_norm": 0.1463005095720291, "learning_rate": 1.875212203976201e-06, "loss": 0.6356, "step": 3971 }, { "epoch": 0.9154182991472689, "grad_norm": 0.1436576545238495, "learning_rate": 1.8650984323332566e-06, "loss": 0.6466, "step": 3972 }, { "epoch": 0.9156487669970039, "grad_norm": 0.13998855650424957, "learning_rate": 1.8550114899290983e-06, "loss": 0.6379, "step": 3973 }, { "epoch": 0.9158792348467388, "grad_norm": 0.13579371571540833, "learning_rate": 1.8449513823859622e-06, "loss": 0.6415, "step": 3974 }, { "epoch": 0.9161097026964738, "grad_norm": 0.14221090078353882, "learning_rate": 1.8349181153111073e-06, "loss": 0.6376, "step": 3975 }, { "epoch": 0.9163401705462088, "grad_norm": 0.136023610830307, "learning_rate": 1.8249116942968325e-06, "loss": 0.6349, "step": 3976 }, { "epoch": 0.9165706383959438, "grad_norm": 0.1378968209028244, "learning_rate": 1.8149321249204765e-06, "loss": 0.6463, "step": 3977 }, { "epoch": 0.9168011062456787, "grad_norm": 0.13700707256793976, "learning_rate": 1.8049794127444119e-06, "loss": 0.6424, "step": 3978 }, { "epoch": 0.9170315740954137, "grad_norm": 0.1433679163455963, "learning_rate": 1.7950535633160403e-06, "loss": 0.6281, "step": 3979 }, { "epoch": 0.9172620419451486, "grad_norm": 0.14099539816379547, "learning_rate": 1.7851545821677973e-06, "loss": 0.6381, "step": 3980 }, { "epoch": 0.9174925097948836, "grad_norm": 0.13756389915943146, "learning_rate": 1.7752824748171415e-06, "loss": 0.6286, "step": 3981 }, { "epoch": 0.9177229776446185, "grad_norm": 0.13646264374256134, "learning_rate": 1.7654372467665325e-06, "loss": 0.626, "step": 3982 }, { "epoch": 0.9179534454943535, "grad_norm": 0.1390201449394226, "learning_rate": 1.7556189035034644e-06, "loss": 0.6433, "step": 3983 }, { "epoch": 0.9181839133440886, "grad_norm": 0.1405726969242096, "learning_rate": 1.7458274505004702e-06, "loss": 0.6378, "step": 3984 }, { "epoch": 0.9184143811938235, "grad_norm": 0.13295190036296844, "learning_rate": 1.7360628932150512e-06, "loss": 0.635, "step": 3985 }, { "epoch": 0.9186448490435585, "grad_norm": 0.13831880688667297, "learning_rate": 1.7263252370897377e-06, "loss": 0.6451, "step": 3986 }, { "epoch": 0.9188753168932934, "grad_norm": 0.1404305100440979, "learning_rate": 1.7166144875520763e-06, "loss": 0.6413, "step": 3987 }, { "epoch": 0.9191057847430284, "grad_norm": 0.13879506289958954, "learning_rate": 1.7069306500145875e-06, "loss": 0.6319, "step": 3988 }, { "epoch": 0.9193362525927633, "grad_norm": 0.13712599873542786, "learning_rate": 1.6972737298748266e-06, "loss": 0.6433, "step": 3989 }, { "epoch": 0.9195667204424983, "grad_norm": 0.13845130801200867, "learning_rate": 1.6876437325153261e-06, "loss": 0.6356, "step": 3990 }, { "epoch": 0.9197971882922332, "grad_norm": 0.13963979482650757, "learning_rate": 1.6780406633036095e-06, "loss": 0.6461, "step": 3991 }, { "epoch": 0.9200276561419682, "grad_norm": 0.1397661715745926, "learning_rate": 1.6684645275922007e-06, "loss": 0.6437, "step": 3992 }, { "epoch": 0.9202581239917031, "grad_norm": 0.13445524871349335, "learning_rate": 1.6589153307186078e-06, "loss": 0.6337, "step": 3993 }, { "epoch": 0.9204885918414382, "grad_norm": 0.14014729857444763, "learning_rate": 1.6493930780053235e-06, "loss": 0.6396, "step": 3994 }, { "epoch": 0.9207190596911731, "grad_norm": 0.14015179872512817, "learning_rate": 1.6398977747598243e-06, "loss": 0.6377, "step": 3995 }, { "epoch": 0.9209495275409081, "grad_norm": 0.13548614084720612, "learning_rate": 1.6304294262745656e-06, "loss": 0.635, "step": 3996 }, { "epoch": 0.921179995390643, "grad_norm": 0.1403760462999344, "learning_rate": 1.6209880378269705e-06, "loss": 0.6384, "step": 3997 }, { "epoch": 0.921410463240378, "grad_norm": 0.1437310427427292, "learning_rate": 1.6115736146794402e-06, "loss": 0.634, "step": 3998 }, { "epoch": 0.9216409310901129, "grad_norm": 0.1346195787191391, "learning_rate": 1.6021861620793666e-06, "loss": 0.6337, "step": 3999 }, { "epoch": 0.9218713989398479, "grad_norm": 0.13712115585803986, "learning_rate": 1.5928256852590751e-06, "loss": 0.6279, "step": 4000 }, { "epoch": 0.9221018667895828, "grad_norm": 0.13652116060256958, "learning_rate": 1.5834921894358701e-06, "loss": 0.644, "step": 4001 }, { "epoch": 0.9223323346393179, "grad_norm": 0.13506993651390076, "learning_rate": 1.574185679812029e-06, "loss": 0.6371, "step": 4002 }, { "epoch": 0.9225628024890528, "grad_norm": 0.13801835477352142, "learning_rate": 1.564906161574764e-06, "loss": 0.6401, "step": 4003 }, { "epoch": 0.9227932703387878, "grad_norm": 0.13854144513607025, "learning_rate": 1.555653639896265e-06, "loss": 0.6415, "step": 4004 }, { "epoch": 0.9230237381885227, "grad_norm": 0.13430531322956085, "learning_rate": 1.5464281199336683e-06, "loss": 0.6296, "step": 4005 }, { "epoch": 0.9232542060382577, "grad_norm": 0.13435374200344086, "learning_rate": 1.5372296068290493e-06, "loss": 0.6304, "step": 4006 }, { "epoch": 0.9234846738879926, "grad_norm": 0.13501442968845367, "learning_rate": 1.5280581057094346e-06, "loss": 0.6355, "step": 4007 }, { "epoch": 0.9237151417377276, "grad_norm": 0.13476891815662384, "learning_rate": 1.518913621686807e-06, "loss": 0.6398, "step": 4008 }, { "epoch": 0.9239456095874625, "grad_norm": 0.1361880600452423, "learning_rate": 1.5097961598580845e-06, "loss": 0.6492, "step": 4009 }, { "epoch": 0.9241760774371975, "grad_norm": 0.13403740525245667, "learning_rate": 1.5007057253051127e-06, "loss": 0.6257, "step": 4010 }, { "epoch": 0.9244065452869324, "grad_norm": 0.13556121289730072, "learning_rate": 1.4916423230946885e-06, "loss": 0.6413, "step": 4011 }, { "epoch": 0.9246370131366675, "grad_norm": 0.13702526688575745, "learning_rate": 1.4826059582785324e-06, "loss": 0.6427, "step": 4012 }, { "epoch": 0.9248674809864024, "grad_norm": 0.13721799850463867, "learning_rate": 1.473596635893293e-06, "loss": 0.6494, "step": 4013 }, { "epoch": 0.9250979488361374, "grad_norm": 0.1371307075023651, "learning_rate": 1.464614360960559e-06, "loss": 0.6482, "step": 4014 }, { "epoch": 0.9253284166858723, "grad_norm": 0.1363903433084488, "learning_rate": 1.4556591384868367e-06, "loss": 0.6343, "step": 4015 }, { "epoch": 0.9255588845356073, "grad_norm": 0.14017435908317566, "learning_rate": 1.4467309734635393e-06, "loss": 0.6399, "step": 4016 }, { "epoch": 0.9257893523853422, "grad_norm": 0.13649635016918182, "learning_rate": 1.437829870867019e-06, "loss": 0.6372, "step": 4017 }, { "epoch": 0.9260198202350772, "grad_norm": 0.13363580405712128, "learning_rate": 1.4289558356585353e-06, "loss": 0.6357, "step": 4018 }, { "epoch": 0.9262502880848121, "grad_norm": 0.1385689377784729, "learning_rate": 1.4201088727842648e-06, "loss": 0.6344, "step": 4019 }, { "epoch": 0.9264807559345472, "grad_norm": 0.13566775619983673, "learning_rate": 1.411288987175291e-06, "loss": 0.646, "step": 4020 }, { "epoch": 0.9267112237842821, "grad_norm": 0.13955044746398926, "learning_rate": 1.4024961837476092e-06, "loss": 0.6411, "step": 4021 }, { "epoch": 0.9269416916340171, "grad_norm": 0.13665254414081573, "learning_rate": 1.39373046740211e-06, "loss": 0.6417, "step": 4022 }, { "epoch": 0.927172159483752, "grad_norm": 0.13750925660133362, "learning_rate": 1.384991843024591e-06, "loss": 0.6351, "step": 4023 }, { "epoch": 0.927402627333487, "grad_norm": 0.1418118178844452, "learning_rate": 1.3762803154857729e-06, "loss": 0.6324, "step": 4024 }, { "epoch": 0.9276330951832219, "grad_norm": 0.13481222093105316, "learning_rate": 1.3675958896412267e-06, "loss": 0.6325, "step": 4025 }, { "epoch": 0.9278635630329569, "grad_norm": 0.1390499770641327, "learning_rate": 1.3589385703314529e-06, "loss": 0.6369, "step": 4026 }, { "epoch": 0.9280940308826918, "grad_norm": 0.13821762800216675, "learning_rate": 1.3503083623818412e-06, "loss": 0.6386, "step": 4027 }, { "epoch": 0.9283244987324268, "grad_norm": 0.141762837767601, "learning_rate": 1.34170527060265e-06, "loss": 0.6422, "step": 4028 }, { "epoch": 0.9285549665821617, "grad_norm": 0.1358966827392578, "learning_rate": 1.3331292997890377e-06, "loss": 0.6365, "step": 4029 }, { "epoch": 0.9287854344318968, "grad_norm": 0.13388477265834808, "learning_rate": 1.3245804547210582e-06, "loss": 0.6495, "step": 4030 }, { "epoch": 0.9290159022816317, "grad_norm": 0.1355268657207489, "learning_rate": 1.3160587401636171e-06, "loss": 0.6362, "step": 4031 }, { "epoch": 0.9292463701313667, "grad_norm": 0.1358853578567505, "learning_rate": 1.3075641608665202e-06, "loss": 0.6367, "step": 4032 }, { "epoch": 0.9294768379811016, "grad_norm": 0.1385107785463333, "learning_rate": 1.2990967215644412e-06, "loss": 0.6408, "step": 4033 }, { "epoch": 0.9297073058308366, "grad_norm": 0.1369771957397461, "learning_rate": 1.2906564269769217e-06, "loss": 0.6382, "step": 4034 }, { "epoch": 0.9299377736805715, "grad_norm": 0.14050287008285522, "learning_rate": 1.282243281808393e-06, "loss": 0.6412, "step": 4035 }, { "epoch": 0.9301682415303065, "grad_norm": 0.1366300880908966, "learning_rate": 1.2738572907481315e-06, "loss": 0.6419, "step": 4036 }, { "epoch": 0.9303987093800414, "grad_norm": 0.1353761851787567, "learning_rate": 1.2654984584702766e-06, "loss": 0.6393, "step": 4037 }, { "epoch": 0.9306291772297764, "grad_norm": 0.18008558452129364, "learning_rate": 1.2571667896338624e-06, "loss": 0.6354, "step": 4038 }, { "epoch": 0.9308596450795114, "grad_norm": 0.13674607872962952, "learning_rate": 1.2488622888827517e-06, "loss": 0.6426, "step": 4039 }, { "epoch": 0.9310901129292464, "grad_norm": 0.13963568210601807, "learning_rate": 1.24058496084567e-06, "loss": 0.6333, "step": 4040 }, { "epoch": 0.9313205807789813, "grad_norm": 0.1362845003604889, "learning_rate": 1.2323348101362043e-06, "loss": 0.6435, "step": 4041 }, { "epoch": 0.9315510486287163, "grad_norm": 0.1407993733882904, "learning_rate": 1.224111841352793e-06, "loss": 0.6403, "step": 4042 }, { "epoch": 0.9317815164784513, "grad_norm": 0.13926169276237488, "learning_rate": 1.2159160590787143e-06, "loss": 0.6412, "step": 4043 }, { "epoch": 0.9320119843281862, "grad_norm": 0.13823188841342926, "learning_rate": 1.2077474678821088e-06, "loss": 0.6452, "step": 4044 }, { "epoch": 0.9322424521779212, "grad_norm": 0.1374104917049408, "learning_rate": 1.1996060723159508e-06, "loss": 0.6343, "step": 4045 }, { "epoch": 0.9324729200276561, "grad_norm": 0.13301199674606323, "learning_rate": 1.1914918769180606e-06, "loss": 0.6373, "step": 4046 }, { "epoch": 0.9327033878773912, "grad_norm": 0.136099711060524, "learning_rate": 1.1834048862110814e-06, "loss": 0.6435, "step": 4047 }, { "epoch": 0.9329338557271261, "grad_norm": 0.140198215842247, "learning_rate": 1.17534510470253e-06, "loss": 0.6392, "step": 4048 }, { "epoch": 0.9331643235768611, "grad_norm": 0.1340055614709854, "learning_rate": 1.1673125368847238e-06, "loss": 0.6425, "step": 4049 }, { "epoch": 0.933394791426596, "grad_norm": 0.14197507500648499, "learning_rate": 1.1593071872348204e-06, "loss": 0.6369, "step": 4050 }, { "epoch": 0.933625259276331, "grad_norm": 0.13812677562236786, "learning_rate": 1.1513290602148174e-06, "loss": 0.6403, "step": 4051 }, { "epoch": 0.9338557271260659, "grad_norm": 0.1445024460554123, "learning_rate": 1.1433781602715189e-06, "loss": 0.6346, "step": 4052 }, { "epoch": 0.9340861949758009, "grad_norm": 0.13985450565814972, "learning_rate": 1.1354544918365795e-06, "loss": 0.643, "step": 4053 }, { "epoch": 0.9343166628255358, "grad_norm": 0.13591651618480682, "learning_rate": 1.1275580593264611e-06, "loss": 0.6331, "step": 4054 }, { "epoch": 0.9345471306752708, "grad_norm": 0.13689643144607544, "learning_rate": 1.1196888671424377e-06, "loss": 0.6402, "step": 4055 }, { "epoch": 0.9347775985250057, "grad_norm": 0.13568592071533203, "learning_rate": 1.111846919670606e-06, "loss": 0.6353, "step": 4056 }, { "epoch": 0.9350080663747408, "grad_norm": 0.13714367151260376, "learning_rate": 1.1040322212818922e-06, "loss": 0.636, "step": 4057 }, { "epoch": 0.9352385342244757, "grad_norm": 0.13835318386554718, "learning_rate": 1.096244776332006e-06, "loss": 0.6393, "step": 4058 }, { "epoch": 0.9354690020742107, "grad_norm": 0.14133679866790771, "learning_rate": 1.0884845891614925e-06, "loss": 0.6446, "step": 4059 }, { "epoch": 0.9356994699239456, "grad_norm": 0.13992494344711304, "learning_rate": 1.0807516640956972e-06, "loss": 0.6326, "step": 4060 }, { "epoch": 0.9359299377736806, "grad_norm": 0.13520614802837372, "learning_rate": 1.0730460054447612e-06, "loss": 0.6325, "step": 4061 }, { "epoch": 0.9361604056234155, "grad_norm": 0.13457529246807098, "learning_rate": 1.065367617503621e-06, "loss": 0.634, "step": 4062 }, { "epoch": 0.9363908734731505, "grad_norm": 0.1412680298089981, "learning_rate": 1.057716504552053e-06, "loss": 0.6391, "step": 4063 }, { "epoch": 0.9366213413228854, "grad_norm": 0.13816601037979126, "learning_rate": 1.0500926708545855e-06, "loss": 0.6334, "step": 4064 }, { "epoch": 0.9368518091726205, "grad_norm": 0.138847216963768, "learning_rate": 1.0424961206605632e-06, "loss": 0.6358, "step": 4065 }, { "epoch": 0.9370822770223554, "grad_norm": 0.13748426735401154, "learning_rate": 1.0349268582041161e-06, "loss": 0.6406, "step": 4066 }, { "epoch": 0.9373127448720904, "grad_norm": 0.13643766939640045, "learning_rate": 1.0273848877041802e-06, "loss": 0.6331, "step": 4067 }, { "epoch": 0.9375432127218253, "grad_norm": 0.1338902860879898, "learning_rate": 1.0198702133644656e-06, "loss": 0.639, "step": 4068 }, { "epoch": 0.9377736805715603, "grad_norm": 0.13689804077148438, "learning_rate": 1.0123828393734714e-06, "loss": 0.6414, "step": 4069 }, { "epoch": 0.9380041484212952, "grad_norm": 0.14404194056987762, "learning_rate": 1.0049227699044762e-06, "loss": 0.6427, "step": 4070 }, { "epoch": 0.9382346162710302, "grad_norm": 0.1352960616350174, "learning_rate": 9.974900091155425e-07, "loss": 0.6478, "step": 4071 }, { "epoch": 0.9384650841207651, "grad_norm": 0.1446170061826706, "learning_rate": 9.90084561149518e-07, "loss": 0.6336, "step": 4072 }, { "epoch": 0.9386955519705001, "grad_norm": 0.14939038455486298, "learning_rate": 9.827064301340228e-07, "loss": 0.6342, "step": 4073 }, { "epoch": 0.938926019820235, "grad_norm": 0.1382802277803421, "learning_rate": 9.75355620181445e-07, "loss": 0.6345, "step": 4074 }, { "epoch": 0.9391564876699701, "grad_norm": 0.1343778669834137, "learning_rate": 9.680321353889576e-07, "loss": 0.6438, "step": 4075 }, { "epoch": 0.939386955519705, "grad_norm": 0.13062545657157898, "learning_rate": 9.607359798384785e-07, "loss": 0.6332, "step": 4076 }, { "epoch": 0.93961742336944, "grad_norm": 0.1372269243001938, "learning_rate": 9.534671575967213e-07, "loss": 0.6382, "step": 4077 }, { "epoch": 0.9398478912191749, "grad_norm": 0.13971804082393646, "learning_rate": 9.46225672715162e-07, "loss": 0.6306, "step": 4078 }, { "epoch": 0.9400783590689099, "grad_norm": 0.13551059365272522, "learning_rate": 9.390115292300162e-07, "loss": 0.6438, "step": 4079 }, { "epoch": 0.9403088269186448, "grad_norm": 0.13725043833255768, "learning_rate": 9.318247311622785e-07, "loss": 0.6437, "step": 4080 }, { "epoch": 0.9405392947683798, "grad_norm": 0.13926532864570618, "learning_rate": 9.246652825176949e-07, "loss": 0.6427, "step": 4081 }, { "epoch": 0.9407697626181147, "grad_norm": 0.14660166203975677, "learning_rate": 9.175331872867732e-07, "loss": 0.6462, "step": 4082 }, { "epoch": 0.9410002304678498, "grad_norm": 0.13765586912631989, "learning_rate": 9.104284494447779e-07, "loss": 0.6419, "step": 4083 }, { "epoch": 0.9412306983175847, "grad_norm": 0.13550062477588654, "learning_rate": 9.033510729517136e-07, "loss": 0.6526, "step": 4084 }, { "epoch": 0.9414611661673197, "grad_norm": 0.20040088891983032, "learning_rate": 8.96301061752336e-07, "loss": 0.6438, "step": 4085 }, { "epoch": 0.9416916340170546, "grad_norm": 0.13654790818691254, "learning_rate": 8.892784197761572e-07, "loss": 0.629, "step": 4086 }, { "epoch": 0.9419221018667896, "grad_norm": 0.13696341216564178, "learning_rate": 8.822831509374297e-07, "loss": 0.6424, "step": 4087 }, { "epoch": 0.9421525697165245, "grad_norm": 0.13563202321529388, "learning_rate": 8.753152591351455e-07, "loss": 0.642, "step": 4088 }, { "epoch": 0.9423830375662595, "grad_norm": 0.13841907680034637, "learning_rate": 8.683747482530424e-07, "loss": 0.6395, "step": 4089 }, { "epoch": 0.9426135054159944, "grad_norm": 0.13629648089408875, "learning_rate": 8.614616221595983e-07, "loss": 0.6394, "step": 4090 }, { "epoch": 0.9428439732657294, "grad_norm": 0.13527196645736694, "learning_rate": 8.545758847080143e-07, "loss": 0.6384, "step": 4091 }, { "epoch": 0.9430744411154643, "grad_norm": 0.13530802726745605, "learning_rate": 8.47717539736237e-07, "loss": 0.6322, "step": 4092 }, { "epoch": 0.9433049089651994, "grad_norm": 0.1388748586177826, "learning_rate": 8.408865910669583e-07, "loss": 0.6369, "step": 4093 }, { "epoch": 0.9435353768149343, "grad_norm": 0.13674351572990417, "learning_rate": 8.340830425075663e-07, "loss": 0.6384, "step": 4094 }, { "epoch": 0.9437658446646693, "grad_norm": 0.13656572997570038, "learning_rate": 8.273068978501996e-07, "loss": 0.6455, "step": 4095 }, { "epoch": 0.9439963125144042, "grad_norm": 0.13279734551906586, "learning_rate": 8.205581608717261e-07, "loss": 0.633, "step": 4096 }, { "epoch": 0.9442267803641392, "grad_norm": 0.14017152786254883, "learning_rate": 8.138368353337255e-07, "loss": 0.6338, "step": 4097 }, { "epoch": 0.9444572482138741, "grad_norm": 0.13749238848686218, "learning_rate": 8.071429249825013e-07, "loss": 0.6366, "step": 4098 }, { "epoch": 0.9446877160636091, "grad_norm": 0.1360204666852951, "learning_rate": 8.004764335490856e-07, "loss": 0.6405, "step": 4099 }, { "epoch": 0.944918183913344, "grad_norm": 0.14251859486103058, "learning_rate": 7.938373647492115e-07, "loss": 0.6426, "step": 4100 }, { "epoch": 0.945148651763079, "grad_norm": 0.13560590147972107, "learning_rate": 7.872257222833357e-07, "loss": 0.6376, "step": 4101 }, { "epoch": 0.9453791196128141, "grad_norm": 0.1355055868625641, "learning_rate": 7.806415098366438e-07, "loss": 0.6379, "step": 4102 }, { "epoch": 0.945609587462549, "grad_norm": 0.13701874017715454, "learning_rate": 7.74084731079e-07, "loss": 0.6298, "step": 4103 }, { "epoch": 0.945840055312284, "grad_norm": 0.13840676844120026, "learning_rate": 7.67555389665009e-07, "loss": 0.629, "step": 4104 }, { "epoch": 0.9460705231620189, "grad_norm": 0.13396546244621277, "learning_rate": 7.61053489233965e-07, "loss": 0.6323, "step": 4105 }, { "epoch": 0.9463009910117539, "grad_norm": 0.1397581547498703, "learning_rate": 7.545790334098579e-07, "loss": 0.6427, "step": 4106 }, { "epoch": 0.9465314588614888, "grad_norm": 0.13133475184440613, "learning_rate": 7.481320258014124e-07, "loss": 0.6424, "step": 4107 }, { "epoch": 0.9467619267112238, "grad_norm": 0.13664543628692627, "learning_rate": 7.417124700020373e-07, "loss": 0.6419, "step": 4108 }, { "epoch": 0.9469923945609587, "grad_norm": 0.13687404990196228, "learning_rate": 7.353203695898203e-07, "loss": 0.631, "step": 4109 }, { "epoch": 0.9472228624106938, "grad_norm": 0.13796484470367432, "learning_rate": 7.289557281275782e-07, "loss": 0.6416, "step": 4110 }, { "epoch": 0.9474533302604287, "grad_norm": 0.13329102098941803, "learning_rate": 7.226185491628069e-07, "loss": 0.6261, "step": 4111 }, { "epoch": 0.9476837981101637, "grad_norm": 0.13241469860076904, "learning_rate": 7.163088362276971e-07, "loss": 0.6364, "step": 4112 }, { "epoch": 0.9479142659598986, "grad_norm": 0.13166265189647675, "learning_rate": 7.100265928391303e-07, "loss": 0.6389, "step": 4113 }, { "epoch": 0.9481447338096336, "grad_norm": 0.13284790515899658, "learning_rate": 7.037718224986833e-07, "loss": 0.6334, "step": 4114 }, { "epoch": 0.9483752016593685, "grad_norm": 0.13939420878887177, "learning_rate": 6.975445286926063e-07, "loss": 0.6227, "step": 4115 }, { "epoch": 0.9486056695091035, "grad_norm": 0.13803811371326447, "learning_rate": 6.913447148918506e-07, "loss": 0.6405, "step": 4116 }, { "epoch": 0.9488361373588384, "grad_norm": 0.13367731869220734, "learning_rate": 6.851723845520408e-07, "loss": 0.6311, "step": 4117 }, { "epoch": 0.9490666052085734, "grad_norm": 0.13658930361270905, "learning_rate": 6.790275411134861e-07, "loss": 0.6459, "step": 4118 }, { "epoch": 0.9492970730583083, "grad_norm": 0.13773563504219055, "learning_rate": 6.729101880011746e-07, "loss": 0.6382, "step": 4119 }, { "epoch": 0.9495275409080434, "grad_norm": 0.13418744504451752, "learning_rate": 6.668203286247732e-07, "loss": 0.6321, "step": 4120 }, { "epoch": 0.9497580087577783, "grad_norm": 0.13385219871997833, "learning_rate": 6.607579663786223e-07, "loss": 0.6343, "step": 4121 }, { "epoch": 0.9499884766075133, "grad_norm": 0.13633115589618683, "learning_rate": 6.547231046417357e-07, "loss": 0.6265, "step": 4122 }, { "epoch": 0.9502189444572482, "grad_norm": 0.13809369504451752, "learning_rate": 6.48715746777806e-07, "loss": 0.6444, "step": 4123 }, { "epoch": 0.9504494123069832, "grad_norm": 0.13296149671077728, "learning_rate": 6.42735896135177e-07, "loss": 0.6357, "step": 4124 }, { "epoch": 0.9506798801567181, "grad_norm": 0.13449513912200928, "learning_rate": 6.367835560468938e-07, "loss": 0.6401, "step": 4125 }, { "epoch": 0.9509103480064531, "grad_norm": 0.13610585033893585, "learning_rate": 6.308587298306301e-07, "loss": 0.6376, "step": 4126 }, { "epoch": 0.951140815856188, "grad_norm": 0.13235846161842346, "learning_rate": 6.2496142078875e-07, "loss": 0.6397, "step": 4127 }, { "epoch": 0.951371283705923, "grad_norm": 0.13302072882652283, "learning_rate": 6.19091632208274e-07, "loss": 0.634, "step": 4128 }, { "epoch": 0.951601751555658, "grad_norm": 0.13481658697128296, "learning_rate": 6.132493673608797e-07, "loss": 0.6375, "step": 4129 }, { "epoch": 0.951832219405393, "grad_norm": 0.139765664935112, "learning_rate": 6.074346295028955e-07, "loss": 0.6346, "step": 4130 }, { "epoch": 0.9520626872551279, "grad_norm": 0.13490067422389984, "learning_rate": 6.016474218753288e-07, "loss": 0.6376, "step": 4131 }, { "epoch": 0.9522931551048629, "grad_norm": 0.13335686922073364, "learning_rate": 5.958877477038327e-07, "loss": 0.6462, "step": 4132 }, { "epoch": 0.9525236229545978, "grad_norm": 0.1309853196144104, "learning_rate": 5.901556101987005e-07, "loss": 0.632, "step": 4133 }, { "epoch": 0.9527540908043328, "grad_norm": 0.13043320178985596, "learning_rate": 5.84451012554893e-07, "loss": 0.6474, "step": 4134 }, { "epoch": 0.9529845586540677, "grad_norm": 0.13605879247188568, "learning_rate": 5.787739579520113e-07, "loss": 0.6376, "step": 4135 }, { "epoch": 0.9532150265038027, "grad_norm": 0.13562782108783722, "learning_rate": 5.731244495543186e-07, "loss": 0.6292, "step": 4136 }, { "epoch": 0.9534454943535376, "grad_norm": 0.1376948058605194, "learning_rate": 5.675024905107129e-07, "loss": 0.641, "step": 4137 }, { "epoch": 0.9536759622032727, "grad_norm": 0.13430991768836975, "learning_rate": 5.619080839547375e-07, "loss": 0.6371, "step": 4138 }, { "epoch": 0.9539064300530076, "grad_norm": 0.1342536360025406, "learning_rate": 5.563412330045758e-07, "loss": 0.6447, "step": 4139 }, { "epoch": 0.9541368979027426, "grad_norm": 0.13564327359199524, "learning_rate": 5.508019407630572e-07, "loss": 0.6422, "step": 4140 }, { "epoch": 0.9543673657524775, "grad_norm": 0.1371186226606369, "learning_rate": 5.452902103176616e-07, "loss": 0.6401, "step": 4141 }, { "epoch": 0.9545978336022125, "grad_norm": 0.13882949948310852, "learning_rate": 5.398060447404818e-07, "loss": 0.6294, "step": 4142 }, { "epoch": 0.9548283014519474, "grad_norm": 0.14043927192687988, "learning_rate": 5.343494470882671e-07, "loss": 0.6451, "step": 4143 }, { "epoch": 0.9550587693016824, "grad_norm": 0.13908624649047852, "learning_rate": 5.289204204023957e-07, "loss": 0.6357, "step": 4144 }, { "epoch": 0.9552892371514173, "grad_norm": 0.13900399208068848, "learning_rate": 5.235189677088692e-07, "loss": 0.6432, "step": 4145 }, { "epoch": 0.9555197050011524, "grad_norm": 0.13600599765777588, "learning_rate": 5.18145092018335e-07, "loss": 0.6311, "step": 4146 }, { "epoch": 0.9557501728508873, "grad_norm": 0.1335585117340088, "learning_rate": 5.127987963260583e-07, "loss": 0.6355, "step": 4147 }, { "epoch": 0.9559806407006223, "grad_norm": 0.1321658492088318, "learning_rate": 5.074800836119442e-07, "loss": 0.6348, "step": 4148 }, { "epoch": 0.9562111085503572, "grad_norm": 0.13960915803909302, "learning_rate": 5.021889568404991e-07, "loss": 0.6346, "step": 4149 }, { "epoch": 0.9564415764000922, "grad_norm": 0.13837899267673492, "learning_rate": 4.969254189608863e-07, "loss": 0.6393, "step": 4150 }, { "epoch": 0.9566720442498271, "grad_norm": 0.13521605730056763, "learning_rate": 4.916894729068644e-07, "loss": 0.6338, "step": 4151 }, { "epoch": 0.9569025120995621, "grad_norm": 0.13600218296051025, "learning_rate": 4.864811215968324e-07, "loss": 0.6392, "step": 4152 }, { "epoch": 0.957132979949297, "grad_norm": 0.1411590576171875, "learning_rate": 4.813003679337957e-07, "loss": 0.6352, "step": 4153 }, { "epoch": 0.957363447799032, "grad_norm": 0.13098092377185822, "learning_rate": 4.761472148053836e-07, "loss": 0.637, "step": 4154 }, { "epoch": 0.957593915648767, "grad_norm": 0.13644741475582123, "learning_rate": 4.710216650838317e-07, "loss": 0.6348, "step": 4155 }, { "epoch": 0.957824383498502, "grad_norm": 0.13410955667495728, "learning_rate": 4.6592372162601037e-07, "loss": 0.6394, "step": 4156 }, { "epoch": 0.9580548513482369, "grad_norm": 0.13613279163837433, "learning_rate": 4.608533872733911e-07, "loss": 0.642, "step": 4157 }, { "epoch": 0.9582853191979719, "grad_norm": 0.13792695105075836, "learning_rate": 4.558106648520466e-07, "loss": 0.6296, "step": 4158 }, { "epoch": 0.9585157870477068, "grad_norm": 0.14079563319683075, "learning_rate": 4.5079555717267854e-07, "loss": 0.649, "step": 4159 }, { "epoch": 0.9587462548974418, "grad_norm": 0.13565976917743683, "learning_rate": 4.4580806703057865e-07, "loss": 0.6424, "step": 4160 }, { "epoch": 0.9589767227471768, "grad_norm": 0.13566036522388458, "learning_rate": 4.408481972056622e-07, "loss": 0.6388, "step": 4161 }, { "epoch": 0.9592071905969117, "grad_norm": 0.1372612714767456, "learning_rate": 4.3591595046243994e-07, "loss": 0.6375, "step": 4162 }, { "epoch": 0.9594376584466467, "grad_norm": 0.13461288809776306, "learning_rate": 4.3101132955002396e-07, "loss": 0.635, "step": 4163 }, { "epoch": 0.9596681262963817, "grad_norm": 0.1348971575498581, "learning_rate": 4.2613433720213316e-07, "loss": 0.6376, "step": 4164 }, { "epoch": 0.9598985941461167, "grad_norm": 0.13625575602054596, "learning_rate": 4.212849761370874e-07, "loss": 0.6393, "step": 4165 }, { "epoch": 0.9601290619958516, "grad_norm": 0.13304929435253143, "learning_rate": 4.16463249057808e-07, "loss": 0.6365, "step": 4166 }, { "epoch": 0.9603595298455866, "grad_norm": 0.13466964662075043, "learning_rate": 4.116691586518062e-07, "loss": 0.6374, "step": 4167 }, { "epoch": 0.9605899976953215, "grad_norm": 0.1362065076828003, "learning_rate": 4.0690270759119464e-07, "loss": 0.6364, "step": 4168 }, { "epoch": 0.9608204655450565, "grad_norm": 0.13293537497520447, "learning_rate": 4.021638985326759e-07, "loss": 0.6401, "step": 4169 }, { "epoch": 0.9610509333947914, "grad_norm": 0.13475392758846283, "learning_rate": 3.974527341175427e-07, "loss": 0.6275, "step": 4170 }, { "epoch": 0.9612814012445264, "grad_norm": 0.13077649474143982, "learning_rate": 3.9276921697169455e-07, "loss": 0.6325, "step": 4171 }, { "epoch": 0.9615118690942613, "grad_norm": 0.13671356439590454, "learning_rate": 3.8811334970561553e-07, "loss": 0.6435, "step": 4172 }, { "epoch": 0.9617423369439964, "grad_norm": 0.1355324536561966, "learning_rate": 3.834851349143631e-07, "loss": 0.6361, "step": 4173 }, { "epoch": 0.9619728047937313, "grad_norm": 0.13314592838287354, "learning_rate": 3.78884575177596e-07, "loss": 0.6362, "step": 4174 }, { "epoch": 0.9622032726434663, "grad_norm": 0.14188657701015472, "learning_rate": 3.743116730595575e-07, "loss": 0.6382, "step": 4175 }, { "epoch": 0.9624337404932012, "grad_norm": 0.133900985121727, "learning_rate": 3.697664311090754e-07, "loss": 0.6374, "step": 4176 }, { "epoch": 0.9626642083429362, "grad_norm": 0.1333308219909668, "learning_rate": 3.6524885185955647e-07, "loss": 0.6408, "step": 4177 }, { "epoch": 0.9628946761926711, "grad_norm": 0.13134267926216125, "learning_rate": 3.6075893782899217e-07, "loss": 0.632, "step": 4178 }, { "epoch": 0.9631251440424061, "grad_norm": 0.13069699704647064, "learning_rate": 3.5629669151994725e-07, "loss": 0.6364, "step": 4179 }, { "epoch": 0.963355611892141, "grad_norm": 0.1325685828924179, "learning_rate": 3.518621154195767e-07, "loss": 0.6371, "step": 4180 }, { "epoch": 0.963586079741876, "grad_norm": 0.13809578120708466, "learning_rate": 3.4745521199960884e-07, "loss": 0.6378, "step": 4181 }, { "epoch": 0.963816547591611, "grad_norm": 0.13212169706821442, "learning_rate": 3.4307598371633445e-07, "loss": 0.6327, "step": 4182 }, { "epoch": 0.964047015441346, "grad_norm": 0.1352756917476654, "learning_rate": 3.387244330106454e-07, "loss": 0.6443, "step": 4183 }, { "epoch": 0.9642774832910809, "grad_norm": 0.1354503035545349, "learning_rate": 3.3440056230797933e-07, "loss": 0.6386, "step": 4184 }, { "epoch": 0.9645079511408159, "grad_norm": 0.13389207422733307, "learning_rate": 3.30104374018364e-07, "loss": 0.6328, "step": 4185 }, { "epoch": 0.9647384189905508, "grad_norm": 0.13191154599189758, "learning_rate": 3.2583587053638955e-07, "loss": 0.6486, "step": 4186 }, { "epoch": 0.9649688868402858, "grad_norm": 0.1348099559545517, "learning_rate": 3.2159505424122495e-07, "loss": 0.6407, "step": 4187 }, { "epoch": 0.9651993546900207, "grad_norm": 0.135834202170372, "learning_rate": 3.1738192749658503e-07, "loss": 0.6347, "step": 4188 }, { "epoch": 0.9654298225397557, "grad_norm": 0.13616180419921875, "learning_rate": 3.131964926507747e-07, "loss": 0.6374, "step": 4189 }, { "epoch": 0.9656602903894906, "grad_norm": 0.13497985899448395, "learning_rate": 3.0903875203665556e-07, "loss": 0.6316, "step": 4190 }, { "epoch": 0.9658907582392257, "grad_norm": 0.13490994274616241, "learning_rate": 3.049087079716462e-07, "loss": 0.639, "step": 4191 }, { "epoch": 0.9661212260889606, "grad_norm": 0.13283471763134003, "learning_rate": 3.0080636275774397e-07, "loss": 0.6342, "step": 4192 }, { "epoch": 0.9663516939386956, "grad_norm": 0.1357053518295288, "learning_rate": 2.967317186814922e-07, "loss": 0.6337, "step": 4193 }, { "epoch": 0.9665821617884305, "grad_norm": 0.13123416900634766, "learning_rate": 2.926847780139907e-07, "loss": 0.6394, "step": 4194 }, { "epoch": 0.9668126296381655, "grad_norm": 0.13529904186725616, "learning_rate": 2.8866554301091866e-07, "loss": 0.6258, "step": 4195 }, { "epoch": 0.9670430974879004, "grad_norm": 0.13570614159107208, "learning_rate": 2.846740159125061e-07, "loss": 0.6341, "step": 4196 }, { "epoch": 0.9672735653376354, "grad_norm": 0.13483721017837524, "learning_rate": 2.807101989435179e-07, "loss": 0.6394, "step": 4197 }, { "epoch": 0.9675040331873703, "grad_norm": 0.13158808648586273, "learning_rate": 2.767740943133035e-07, "loss": 0.6317, "step": 4198 }, { "epoch": 0.9677345010371053, "grad_norm": 0.135604590177536, "learning_rate": 2.7286570421574677e-07, "loss": 0.6362, "step": 4199 }, { "epoch": 0.9679649688868402, "grad_norm": 0.13495594263076782, "learning_rate": 2.6898503082929406e-07, "loss": 0.6278, "step": 4200 }, { "epoch": 0.9681954367365753, "grad_norm": 0.13914699852466583, "learning_rate": 2.6513207631693184e-07, "loss": 0.6368, "step": 4201 }, { "epoch": 0.9684259045863102, "grad_norm": 0.1364966183900833, "learning_rate": 2.6130684282621995e-07, "loss": 0.6334, "step": 4202 }, { "epoch": 0.9686563724360452, "grad_norm": 0.1355530023574829, "learning_rate": 2.575093324892364e-07, "loss": 0.637, "step": 4203 }, { "epoch": 0.9688868402857801, "grad_norm": 0.1372985690832138, "learning_rate": 2.5373954742263227e-07, "loss": 0.6267, "step": 4204 }, { "epoch": 0.9691173081355151, "grad_norm": 0.1355789601802826, "learning_rate": 2.4999748972758805e-07, "loss": 0.6418, "step": 4205 }, { "epoch": 0.96934777598525, "grad_norm": 0.13223963975906372, "learning_rate": 2.462831614898409e-07, "loss": 0.6339, "step": 4206 }, { "epoch": 0.969578243834985, "grad_norm": 0.1328386813402176, "learning_rate": 2.42596564779668e-07, "loss": 0.6352, "step": 4207 }, { "epoch": 0.9698087116847199, "grad_norm": 0.13580402731895447, "learning_rate": 2.3893770165189235e-07, "loss": 0.634, "step": 4208 }, { "epoch": 0.970039179534455, "grad_norm": 0.13300515711307526, "learning_rate": 2.3530657414586598e-07, "loss": 0.6409, "step": 4209 }, { "epoch": 0.9702696473841899, "grad_norm": 0.13167451322078705, "learning_rate": 2.317031842855033e-07, "loss": 0.6353, "step": 4210 }, { "epoch": 0.9705001152339249, "grad_norm": 0.13538329303264618, "learning_rate": 2.281275340792477e-07, "loss": 0.6342, "step": 4211 }, { "epoch": 0.9707305830836598, "grad_norm": 0.1796192079782486, "learning_rate": 2.2457962552007162e-07, "loss": 0.6347, "step": 4212 }, { "epoch": 0.9709610509333948, "grad_norm": 0.13724969327449799, "learning_rate": 2.2105946058549876e-07, "loss": 0.634, "step": 4213 }, { "epoch": 0.9711915187831297, "grad_norm": 0.13621461391448975, "learning_rate": 2.1756704123758742e-07, "loss": 0.641, "step": 4214 }, { "epoch": 0.9714219866328647, "grad_norm": 0.13638457655906677, "learning_rate": 2.141023694229305e-07, "loss": 0.6366, "step": 4215 }, { "epoch": 0.9716524544825996, "grad_norm": 0.1362111121416092, "learning_rate": 2.1066544707264435e-07, "loss": 0.6483, "step": 4216 }, { "epoch": 0.9718829223323346, "grad_norm": 0.13554121553897858, "learning_rate": 2.0725627610239107e-07, "loss": 0.6455, "step": 4217 }, { "epoch": 0.9721133901820695, "grad_norm": 0.13165734708309174, "learning_rate": 2.038748584123562e-07, "loss": 0.6362, "step": 4218 }, { "epoch": 0.9723438580318046, "grad_norm": 0.13413430750370026, "learning_rate": 2.0052119588727103e-07, "loss": 0.6415, "step": 4219 }, { "epoch": 0.9725743258815395, "grad_norm": 0.13295698165893555, "learning_rate": 1.9719529039637919e-07, "loss": 0.6413, "step": 4220 }, { "epoch": 0.9728047937312745, "grad_norm": 0.1375885158777237, "learning_rate": 1.9389714379346446e-07, "loss": 0.6374, "step": 4221 }, { "epoch": 0.9730352615810095, "grad_norm": 0.1361759752035141, "learning_rate": 1.906267579168286e-07, "loss": 0.6349, "step": 4222 }, { "epoch": 0.9732657294307444, "grad_norm": 0.13398383557796478, "learning_rate": 1.8738413458931347e-07, "loss": 0.6336, "step": 4223 }, { "epoch": 0.9734961972804794, "grad_norm": 0.13173305988311768, "learning_rate": 1.8416927561827336e-07, "loss": 0.6304, "step": 4224 }, { "epoch": 0.9737266651302143, "grad_norm": 0.13570670783519745, "learning_rate": 1.8098218279559708e-07, "loss": 0.6381, "step": 4225 }, { "epoch": 0.9739571329799493, "grad_norm": 0.13517099618911743, "learning_rate": 1.7782285789769147e-07, "loss": 0.6361, "step": 4226 }, { "epoch": 0.9741876008296843, "grad_norm": 0.1324930042028427, "learning_rate": 1.7469130268549238e-07, "loss": 0.6377, "step": 4227 }, { "epoch": 0.9744180686794193, "grad_norm": 0.1336393505334854, "learning_rate": 1.7158751890444803e-07, "loss": 0.6356, "step": 4228 }, { "epoch": 0.9746485365291542, "grad_norm": 0.13054290413856506, "learning_rate": 1.6851150828453566e-07, "loss": 0.64, "step": 4229 }, { "epoch": 0.9748790043788892, "grad_norm": 0.130965456366539, "learning_rate": 1.6546327254025052e-07, "loss": 0.6411, "step": 4230 }, { "epoch": 0.9751094722286241, "grad_norm": 0.13424457609653473, "learning_rate": 1.6244281337060574e-07, "loss": 0.6407, "step": 4231 }, { "epoch": 0.9753399400783591, "grad_norm": 0.1327114850282669, "learning_rate": 1.5945013245913799e-07, "loss": 0.646, "step": 4232 }, { "epoch": 0.975570407928094, "grad_norm": 0.13304896652698517, "learning_rate": 1.5648523147388516e-07, "loss": 0.6394, "step": 4233 }, { "epoch": 0.975800875777829, "grad_norm": 0.1386221945285797, "learning_rate": 1.5354811206741427e-07, "loss": 0.645, "step": 4234 }, { "epoch": 0.9760313436275639, "grad_norm": 0.13558200001716614, "learning_rate": 1.5063877587681019e-07, "loss": 0.6303, "step": 4235 }, { "epoch": 0.976261811477299, "grad_norm": 0.13395509123802185, "learning_rate": 1.4775722452366468e-07, "loss": 0.6372, "step": 4236 }, { "epoch": 0.9764922793270339, "grad_norm": 0.1313483864068985, "learning_rate": 1.4490345961408746e-07, "loss": 0.634, "step": 4237 }, { "epoch": 0.9767227471767689, "grad_norm": 0.13300201296806335, "learning_rate": 1.4207748273868948e-07, "loss": 0.6448, "step": 4238 }, { "epoch": 0.9769532150265038, "grad_norm": 0.1316831409931183, "learning_rate": 1.3927929547261632e-07, "loss": 0.6384, "step": 4239 }, { "epoch": 0.9771836828762388, "grad_norm": 0.13369691371917725, "learning_rate": 1.365088993755037e-07, "loss": 0.6355, "step": 4240 }, { "epoch": 0.9774141507259737, "grad_norm": 0.13882844150066376, "learning_rate": 1.337662959914998e-07, "loss": 0.6482, "step": 4241 }, { "epoch": 0.9776446185757087, "grad_norm": 0.13777481019496918, "learning_rate": 1.3105148684927072e-07, "loss": 0.6451, "step": 4242 }, { "epoch": 0.9778750864254436, "grad_norm": 0.1354648768901825, "learning_rate": 1.283644734619893e-07, "loss": 0.6485, "step": 4243 }, { "epoch": 0.9781055542751786, "grad_norm": 0.13480645418167114, "learning_rate": 1.257052573273243e-07, "loss": 0.6384, "step": 4244 }, { "epoch": 0.9783360221249136, "grad_norm": 0.1347130984067917, "learning_rate": 1.2307383992746225e-07, "loss": 0.6335, "step": 4245 }, { "epoch": 0.9785664899746486, "grad_norm": 0.13522972166538239, "learning_rate": 1.2047022272909102e-07, "loss": 0.6338, "step": 4246 }, { "epoch": 0.9787969578243835, "grad_norm": 0.137363463640213, "learning_rate": 1.1789440718341093e-07, "loss": 0.6306, "step": 4247 }, { "epoch": 0.9790274256741185, "grad_norm": 0.13565847277641296, "learning_rate": 1.1534639472611242e-07, "loss": 0.6334, "step": 4248 }, { "epoch": 0.9792578935238534, "grad_norm": 0.1334226429462433, "learning_rate": 1.1282618677739831e-07, "loss": 0.6332, "step": 4249 }, { "epoch": 0.9794883613735884, "grad_norm": 0.13705900311470032, "learning_rate": 1.1033378474197276e-07, "loss": 0.6332, "step": 4250 }, { "epoch": 0.9797188292233233, "grad_norm": 0.1340656876564026, "learning_rate": 1.0786919000903562e-07, "loss": 0.6424, "step": 4251 }, { "epoch": 0.9799492970730583, "grad_norm": 0.13730952143669128, "learning_rate": 1.054324039523047e-07, "loss": 0.6377, "step": 4252 }, { "epoch": 0.9801797649227932, "grad_norm": 0.1353830099105835, "learning_rate": 1.0302342792997688e-07, "loss": 0.6389, "step": 4253 }, { "epoch": 0.9804102327725283, "grad_norm": 0.13165293633937836, "learning_rate": 1.0064226328476145e-07, "loss": 0.6366, "step": 4254 }, { "epoch": 0.9806407006222632, "grad_norm": 0.12924884259700775, "learning_rate": 9.828891134385786e-08, "loss": 0.6363, "step": 4255 }, { "epoch": 0.9808711684719982, "grad_norm": 0.13422457873821259, "learning_rate": 9.596337341897243e-08, "loss": 0.635, "step": 4256 }, { "epoch": 0.9811016363217331, "grad_norm": 0.1396060436964035, "learning_rate": 9.366565080630163e-08, "loss": 0.6328, "step": 4257 }, { "epoch": 0.9813321041714681, "grad_norm": 0.1325884759426117, "learning_rate": 9.139574478654322e-08, "loss": 0.6278, "step": 4258 }, { "epoch": 0.981562572021203, "grad_norm": 0.13009661436080933, "learning_rate": 8.915365662488518e-08, "loss": 0.6329, "step": 4259 }, { "epoch": 0.981793039870938, "grad_norm": 0.13182280957698822, "learning_rate": 8.693938757101672e-08, "loss": 0.6293, "step": 4260 }, { "epoch": 0.9820235077206729, "grad_norm": 0.13452081382274628, "learning_rate": 8.475293885911173e-08, "loss": 0.6298, "step": 4261 }, { "epoch": 0.982253975570408, "grad_norm": 0.13344185054302216, "learning_rate": 8.259431170785647e-08, "loss": 0.6335, "step": 4262 }, { "epoch": 0.9824844434201429, "grad_norm": 0.1369807869195938, "learning_rate": 8.046350732041075e-08, "loss": 0.632, "step": 4263 }, { "epoch": 0.9827149112698779, "grad_norm": 0.13430048525333405, "learning_rate": 7.836052688443007e-08, "loss": 0.6378, "step": 4264 }, { "epoch": 0.9829453791196128, "grad_norm": 0.13494747877120972, "learning_rate": 7.628537157207128e-08, "loss": 0.6394, "step": 4265 }, { "epoch": 0.9831758469693478, "grad_norm": 0.13669085502624512, "learning_rate": 7.423804253997579e-08, "loss": 0.6392, "step": 4266 }, { "epoch": 0.9834063148190827, "grad_norm": 0.13560469448566437, "learning_rate": 7.221854092926971e-08, "loss": 0.6522, "step": 4267 }, { "epoch": 0.9836367826688177, "grad_norm": 0.13016438484191895, "learning_rate": 7.022686786558042e-08, "loss": 0.6298, "step": 4268 }, { "epoch": 0.9838672505185526, "grad_norm": 0.13416080176830292, "learning_rate": 6.826302445901989e-08, "loss": 0.6433, "step": 4269 }, { "epoch": 0.9840977183682876, "grad_norm": 0.13181835412979126, "learning_rate": 6.632701180418476e-08, "loss": 0.6342, "step": 4270 }, { "epoch": 0.9843281862180225, "grad_norm": 0.13344010710716248, "learning_rate": 6.441883098015633e-08, "loss": 0.6337, "step": 4271 }, { "epoch": 0.9845586540677576, "grad_norm": 0.13779869675636292, "learning_rate": 6.253848305052268e-08, "loss": 0.6446, "step": 4272 }, { "epoch": 0.9847891219174925, "grad_norm": 0.13841082155704498, "learning_rate": 6.06859690633288e-08, "loss": 0.6393, "step": 4273 }, { "epoch": 0.9850195897672275, "grad_norm": 0.13432055711746216, "learning_rate": 5.886129005113206e-08, "loss": 0.6352, "step": 4274 }, { "epoch": 0.9852500576169624, "grad_norm": 0.1333775520324707, "learning_rate": 5.706444703096336e-08, "loss": 0.6288, "step": 4275 }, { "epoch": 0.9854805254666974, "grad_norm": 0.13299840688705444, "learning_rate": 5.5295441004332704e-08, "loss": 0.6288, "step": 4276 }, { "epoch": 0.9857109933164323, "grad_norm": 0.1346500962972641, "learning_rate": 5.355427295725135e-08, "loss": 0.6369, "step": 4277 }, { "epoch": 0.9859414611661673, "grad_norm": 0.13148584961891174, "learning_rate": 5.184094386019855e-08, "loss": 0.6326, "step": 4278 }, { "epoch": 0.9861719290159022, "grad_norm": 0.1373184323310852, "learning_rate": 5.0155454668149304e-08, "loss": 0.6345, "step": 4279 }, { "epoch": 0.9864023968656372, "grad_norm": 0.1355026662349701, "learning_rate": 4.849780632054657e-08, "loss": 0.6454, "step": 4280 }, { "epoch": 0.9866328647153723, "grad_norm": 0.13367265462875366, "learning_rate": 4.6867999741323496e-08, "loss": 0.6447, "step": 4281 }, { "epoch": 0.9868633325651072, "grad_norm": 0.134071946144104, "learning_rate": 4.5266035838903434e-08, "loss": 0.6395, "step": 4282 }, { "epoch": 0.9870938004148422, "grad_norm": 0.1350136399269104, "learning_rate": 4.3691915506177686e-08, "loss": 0.6453, "step": 4283 }, { "epoch": 0.9873242682645771, "grad_norm": 0.13533277809619904, "learning_rate": 4.21456396205222e-08, "loss": 0.6347, "step": 4284 }, { "epoch": 0.9875547361143121, "grad_norm": 0.13973018527030945, "learning_rate": 4.062720904379757e-08, "loss": 0.6393, "step": 4285 }, { "epoch": 0.987785203964047, "grad_norm": 0.13376709818840027, "learning_rate": 3.913662462233791e-08, "loss": 0.6352, "step": 4286 }, { "epoch": 0.988015671813782, "grad_norm": 0.13301095366477966, "learning_rate": 3.767388718696197e-08, "loss": 0.6446, "step": 4287 }, { "epoch": 0.9882461396635169, "grad_norm": 0.13637404143810272, "learning_rate": 3.6238997552956456e-08, "loss": 0.6414, "step": 4288 }, { "epoch": 0.988476607513252, "grad_norm": 0.13000015914440155, "learning_rate": 3.483195652010385e-08, "loss": 0.6326, "step": 4289 }, { "epoch": 0.9887070753629869, "grad_norm": 0.1287553757429123, "learning_rate": 3.3452764872649036e-08, "loss": 0.6359, "step": 4290 }, { "epoch": 0.9889375432127219, "grad_norm": 0.13659948110580444, "learning_rate": 3.210142337932709e-08, "loss": 0.6306, "step": 4291 }, { "epoch": 0.9891680110624568, "grad_norm": 0.13787119090557098, "learning_rate": 3.0777932793335516e-08, "loss": 0.6243, "step": 4292 }, { "epoch": 0.9893984789121918, "grad_norm": 0.13213881850242615, "learning_rate": 2.948229385236201e-08, "loss": 0.6381, "step": 4293 }, { "epoch": 0.9896289467619267, "grad_norm": 0.13811156153678894, "learning_rate": 2.8214507278556678e-08, "loss": 0.6412, "step": 4294 }, { "epoch": 0.9898594146116617, "grad_norm": 0.1367308348417282, "learning_rate": 2.6974573778565383e-08, "loss": 0.6411, "step": 4295 }, { "epoch": 0.9900898824613966, "grad_norm": 0.1384420096874237, "learning_rate": 2.5762494043485296e-08, "loss": 0.6355, "step": 4296 }, { "epoch": 0.9903203503111316, "grad_norm": 0.13190501928329468, "learning_rate": 2.4578268748909338e-08, "loss": 0.6296, "step": 4297 }, { "epoch": 0.9905508181608665, "grad_norm": 0.13569803535938263, "learning_rate": 2.3421898554892852e-08, "loss": 0.6342, "step": 4298 }, { "epoch": 0.9907812860106016, "grad_norm": 0.13529643416404724, "learning_rate": 2.229338410597026e-08, "loss": 0.6311, "step": 4299 }, { "epoch": 0.9910117538603365, "grad_norm": 0.13517087697982788, "learning_rate": 2.1192726031143974e-08, "loss": 0.6434, "step": 4300 }, { "epoch": 0.9912422217100715, "grad_norm": 0.13566431403160095, "learning_rate": 2.0119924943901025e-08, "loss": 0.6371, "step": 4301 }, { "epoch": 0.9914726895598064, "grad_norm": 0.1320231556892395, "learning_rate": 1.9074981442185336e-08, "loss": 0.6346, "step": 4302 }, { "epoch": 0.9917031574095414, "grad_norm": 0.13202235102653503, "learning_rate": 1.8057896108436558e-08, "loss": 0.6322, "step": 4303 }, { "epoch": 0.9919336252592763, "grad_norm": 0.13193956017494202, "learning_rate": 1.7068669509545665e-08, "loss": 0.6393, "step": 4304 }, { "epoch": 0.9921640931090113, "grad_norm": 0.13464799523353577, "learning_rate": 1.6107302196882724e-08, "loss": 0.6363, "step": 4305 }, { "epoch": 0.9923945609587462, "grad_norm": 0.13650983572006226, "learning_rate": 1.5173794706291324e-08, "loss": 0.6332, "step": 4306 }, { "epoch": 0.9926250288084812, "grad_norm": 0.1360529065132141, "learning_rate": 1.4268147558088585e-08, "loss": 0.6251, "step": 4307 }, { "epoch": 0.9928554966582162, "grad_norm": 0.1333739310503006, "learning_rate": 1.3390361257059614e-08, "loss": 0.6345, "step": 4308 }, { "epoch": 0.9930859645079512, "grad_norm": 0.13426019251346588, "learning_rate": 1.2540436292463043e-08, "loss": 0.6416, "step": 4309 }, { "epoch": 0.9933164323576861, "grad_norm": 0.13250836730003357, "learning_rate": 1.1718373138019933e-08, "loss": 0.6278, "step": 4310 }, { "epoch": 0.9935469002074211, "grad_norm": 0.13704052567481995, "learning_rate": 1.0924172251941534e-08, "loss": 0.6428, "step": 4311 }, { "epoch": 0.993777368057156, "grad_norm": 0.13682594895362854, "learning_rate": 1.0157834076879313e-08, "loss": 0.6392, "step": 4312 }, { "epoch": 0.994007835906891, "grad_norm": 0.13360513746738434, "learning_rate": 9.419359039986032e-09, "loss": 0.6351, "step": 4313 }, { "epoch": 0.9942383037566259, "grad_norm": 0.1286047101020813, "learning_rate": 8.70874755286577e-09, "loss": 0.6381, "step": 4314 }, { "epoch": 0.9944687716063609, "grad_norm": 0.13860009610652924, "learning_rate": 8.026000011596146e-09, "loss": 0.642, "step": 4315 }, { "epoch": 0.9946992394560958, "grad_norm": 0.1374213993549347, "learning_rate": 7.371116796717203e-09, "loss": 0.6356, "step": 4316 }, { "epoch": 0.9949297073058309, "grad_norm": 0.13426125049591064, "learning_rate": 6.74409827325917e-09, "loss": 0.6325, "step": 4317 }, { "epoch": 0.9951601751555658, "grad_norm": 0.13406482338905334, "learning_rate": 6.144944790692497e-09, "loss": 0.6422, "step": 4318 }, { "epoch": 0.9953906430053008, "grad_norm": 0.1396821290254593, "learning_rate": 5.573656682977824e-09, "loss": 0.6443, "step": 4319 }, { "epoch": 0.9956211108550357, "grad_norm": 0.12982842326164246, "learning_rate": 5.030234268543765e-09, "loss": 0.6245, "step": 4320 }, { "epoch": 0.9958515787047707, "grad_norm": 0.1334078311920166, "learning_rate": 4.514677850270266e-09, "loss": 0.6401, "step": 4321 }, { "epoch": 0.9960820465545056, "grad_norm": 0.13134385645389557, "learning_rate": 4.0269877155219016e-09, "loss": 0.6331, "step": 4322 }, { "epoch": 0.9963125144042406, "grad_norm": 0.1397440880537033, "learning_rate": 3.567164136120127e-09, "loss": 0.6252, "step": 4323 }, { "epoch": 0.9965429822539755, "grad_norm": 0.13442939519882202, "learning_rate": 3.1352073683654783e-09, "loss": 0.635, "step": 4324 }, { "epoch": 0.9967734501037105, "grad_norm": 0.13672947883605957, "learning_rate": 2.7311176530209203e-09, "loss": 0.6365, "step": 4325 }, { "epoch": 0.9970039179534455, "grad_norm": 0.13293464481830597, "learning_rate": 2.3548952153118476e-09, "loss": 0.6337, "step": 4326 }, { "epoch": 0.9972343858031805, "grad_norm": 0.1351083517074585, "learning_rate": 2.0065402649371845e-09, "loss": 0.633, "step": 4327 }, { "epoch": 0.9974648536529154, "grad_norm": 0.13411957025527954, "learning_rate": 1.6860529960638361e-09, "loss": 0.6376, "step": 4328 }, { "epoch": 0.9976953215026504, "grad_norm": 0.1313239187002182, "learning_rate": 1.3934335873155846e-09, "loss": 0.6344, "step": 4329 }, { "epoch": 0.9979257893523853, "grad_norm": 0.13476663827896118, "learning_rate": 1.1286822018008458e-09, "loss": 0.6432, "step": 4330 }, { "epoch": 0.9981562572021203, "grad_norm": 0.13341675698757172, "learning_rate": 8.917989870849131e-10, "loss": 0.6288, "step": 4331 }, { "epoch": 0.9983867250518552, "grad_norm": 0.13334329426288605, "learning_rate": 6.827840751955083e-10, "loss": 0.6403, "step": 4332 }, { "epoch": 0.9986171929015902, "grad_norm": 0.135373055934906, "learning_rate": 5.016375826394359e-10, "loss": 0.6377, "step": 4333 }, { "epoch": 0.9988476607513251, "grad_norm": 0.13281653821468353, "learning_rate": 3.4835961037482655e-10, "loss": 0.6261, "step": 4334 }, { "epoch": 0.9990781286010602, "grad_norm": 0.1364908516407013, "learning_rate": 2.2295024383889306e-10, "loss": 0.6386, "step": 4335 }, { "epoch": 0.9993085964507951, "grad_norm": 0.1349654197692871, "learning_rate": 1.2540955293682822e-10, "loss": 0.6366, "step": 4336 }, { "epoch": 0.9995390643005301, "grad_norm": 0.14018984138965607, "learning_rate": 5.573759202515127e-11, "loss": 0.6342, "step": 4337 }, { "epoch": 0.999769532150265, "grad_norm": 0.1340036541223526, "learning_rate": 1.3934399950565891e-11, "loss": 0.6367, "step": 4338 }, { "epoch": 1.0, "grad_norm": 0.13604800403118134, "learning_rate": 0.0, "loss": 0.6404, "step": 4339 }, { "epoch": 1.0, "step": 4339, "total_flos": 1.6526087271915035e+20, "train_loss": 0.7063895864925178, "train_runtime": 30118.2929, "train_samples_per_second": 589.978, "train_steps_per_second": 0.144 } ], "logging_steps": 1.0, "max_steps": 4339, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6526087271915035e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }