diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,20629 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.0, - "eval_steps": 500, - "global_step": 2941, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00034002040122407346, - "grad_norm": 7.957215845157106, - "learning_rate": 1.1235955056179776e-07, - "loss": 1.265, - "step": 1 - }, - { - "epoch": 0.0006800408024481469, - "grad_norm": 13.156657121104576, - "learning_rate": 2.247191011235955e-07, - "loss": 1.1163, - "step": 2 - }, - { - "epoch": 0.0010200612036722204, - "grad_norm": 8.234411807150657, - "learning_rate": 3.3707865168539325e-07, - "loss": 1.1117, - "step": 3 - }, - { - "epoch": 0.0013600816048962938, - "grad_norm": 8.378852124626784, - "learning_rate": 4.49438202247191e-07, - "loss": 1.0045, - "step": 4 - }, - { - "epoch": 0.0017001020061203672, - "grad_norm": 5.0871456226741785, - "learning_rate": 5.617977528089888e-07, - "loss": 1.1061, - "step": 5 - }, - { - "epoch": 0.002040122407344441, - "grad_norm": 12.113028237990287, - "learning_rate": 6.741573033707865e-07, - "loss": 0.978, - "step": 6 - }, - { - "epoch": 0.0023801428085685142, - "grad_norm": 7.182446599540591, - "learning_rate": 7.865168539325843e-07, - "loss": 1.2117, - "step": 7 - }, - { - "epoch": 0.0027201632097925877, - "grad_norm": 8.794165320435372, - "learning_rate": 8.98876404494382e-07, - "loss": 1.0302, - "step": 8 - }, - { - "epoch": 0.003060183611016661, - "grad_norm": 6.19432478948948, - "learning_rate": 1.01123595505618e-06, - "loss": 1.1328, - "step": 9 - }, - { - "epoch": 0.0034002040122407345, - "grad_norm": 8.815917265181259, - "learning_rate": 1.1235955056179777e-06, - "loss": 1.0271, - "step": 10 - }, - { - "epoch": 0.003740224413464808, - "grad_norm": 3.4552395124739923, - "learning_rate": 1.2359550561797752e-06, - "loss": 1.1775, - "step": 11 - }, - { - "epoch": 0.004080244814688882, - "grad_norm": 3.920780003164372, - "learning_rate": 1.348314606741573e-06, - "loss": 1.1197, - "step": 12 - }, - { - "epoch": 0.004420265215912955, - "grad_norm": 9.503601873541784, - "learning_rate": 1.4606741573033708e-06, - "loss": 1.0117, - "step": 13 - }, - { - "epoch": 0.0047602856171370285, - "grad_norm": 3.8392791998777698, - "learning_rate": 1.5730337078651686e-06, - "loss": 1.1943, - "step": 14 - }, - { - "epoch": 0.0051003060183611015, - "grad_norm": 16.880294672259833, - "learning_rate": 1.6853932584269663e-06, - "loss": 0.9484, - "step": 15 - }, - { - "epoch": 0.005440326419585175, - "grad_norm": 3.7585745209663135, - "learning_rate": 1.797752808988764e-06, - "loss": 1.0903, - "step": 16 - }, - { - "epoch": 0.005780346820809248, - "grad_norm": 4.0811771911781705, - "learning_rate": 1.910112359550562e-06, - "loss": 0.9225, - "step": 17 - }, - { - "epoch": 0.006120367222033322, - "grad_norm": 5.5863711220456835, - "learning_rate": 2.02247191011236e-06, - "loss": 1.0673, - "step": 18 - }, - { - "epoch": 0.006460387623257395, - "grad_norm": 3.1676452672990454, - "learning_rate": 2.1348314606741574e-06, - "loss": 0.9291, - "step": 19 - }, - { - "epoch": 0.006800408024481469, - "grad_norm": 3.611139125930507, - "learning_rate": 2.2471910112359554e-06, - "loss": 0.8766, - "step": 20 - }, - { - "epoch": 0.007140428425705543, - "grad_norm": 2.4985925611214093, - "learning_rate": 2.359550561797753e-06, - "loss": 0.8919, - "step": 21 - }, - { - "epoch": 0.007480448826929616, - "grad_norm": 3.1107777199577735, - "learning_rate": 2.4719101123595505e-06, - "loss": 0.9591, - "step": 22 - }, - { - "epoch": 0.007820469228153689, - "grad_norm": 2.4206200369165054, - "learning_rate": 2.584269662921349e-06, - "loss": 0.9344, - "step": 23 - }, - { - "epoch": 0.008160489629377763, - "grad_norm": 3.006651205729068, - "learning_rate": 2.696629213483146e-06, - "loss": 0.929, - "step": 24 - }, - { - "epoch": 0.008500510030601836, - "grad_norm": 4.211242050455176, - "learning_rate": 2.8089887640449444e-06, - "loss": 0.922, - "step": 25 - }, - { - "epoch": 0.00884053043182591, - "grad_norm": 2.928334204696846, - "learning_rate": 2.9213483146067416e-06, - "loss": 0.8109, - "step": 26 - }, - { - "epoch": 0.009180550833049982, - "grad_norm": 2.920501950315103, - "learning_rate": 3.03370786516854e-06, - "loss": 0.9918, - "step": 27 - }, - { - "epoch": 0.009520571234274057, - "grad_norm": 2.6627931747349463, - "learning_rate": 3.146067415730337e-06, - "loss": 1.0249, - "step": 28 - }, - { - "epoch": 0.00986059163549813, - "grad_norm": 2.2391523692025945, - "learning_rate": 3.258426966292135e-06, - "loss": 0.8657, - "step": 29 - }, - { - "epoch": 0.010200612036722203, - "grad_norm": 2.3439476348012924, - "learning_rate": 3.3707865168539327e-06, - "loss": 0.8669, - "step": 30 - }, - { - "epoch": 0.010540632437946278, - "grad_norm": 2.6299632657140646, - "learning_rate": 3.4831460674157306e-06, - "loss": 0.9279, - "step": 31 - }, - { - "epoch": 0.01088065283917035, - "grad_norm": 2.5299601625034702, - "learning_rate": 3.595505617977528e-06, - "loss": 0.9859, - "step": 32 - }, - { - "epoch": 0.011220673240394424, - "grad_norm": 1.869054216542114, - "learning_rate": 3.707865168539326e-06, - "loss": 0.9683, - "step": 33 - }, - { - "epoch": 0.011560693641618497, - "grad_norm": 2.641701125076694, - "learning_rate": 3.820224719101124e-06, - "loss": 0.9049, - "step": 34 - }, - { - "epoch": 0.011900714042842571, - "grad_norm": 2.3075105518731167, - "learning_rate": 3.932584269662922e-06, - "loss": 0.95, - "step": 35 - }, - { - "epoch": 0.012240734444066644, - "grad_norm": 2.3217690220079943, - "learning_rate": 4.04494382022472e-06, - "loss": 0.9225, - "step": 36 - }, - { - "epoch": 0.012580754845290717, - "grad_norm": 2.480896383955115, - "learning_rate": 4.157303370786518e-06, - "loss": 0.8839, - "step": 37 - }, - { - "epoch": 0.01292077524651479, - "grad_norm": 2.151833792105812, - "learning_rate": 4.269662921348315e-06, - "loss": 0.7775, - "step": 38 - }, - { - "epoch": 0.013260795647738865, - "grad_norm": 4.211040389687315, - "learning_rate": 4.382022471910113e-06, - "loss": 0.9096, - "step": 39 - }, - { - "epoch": 0.013600816048962938, - "grad_norm": 2.064947551303069, - "learning_rate": 4.494382022471911e-06, - "loss": 0.9284, - "step": 40 - }, - { - "epoch": 0.01394083645018701, - "grad_norm": 2.0660909038422526, - "learning_rate": 4.606741573033709e-06, - "loss": 0.925, - "step": 41 - }, - { - "epoch": 0.014280856851411085, - "grad_norm": 4.901236022148853, - "learning_rate": 4.719101123595506e-06, - "loss": 0.8714, - "step": 42 - }, - { - "epoch": 0.014620877252635158, - "grad_norm": 4.502266868053889, - "learning_rate": 4.831460674157304e-06, - "loss": 0.8047, - "step": 43 - }, - { - "epoch": 0.014960897653859231, - "grad_norm": 2.4077409650266337, - "learning_rate": 4.943820224719101e-06, - "loss": 0.913, - "step": 44 - }, - { - "epoch": 0.015300918055083304, - "grad_norm": 4.793105624055668, - "learning_rate": 5.0561797752809e-06, - "loss": 0.8195, - "step": 45 - }, - { - "epoch": 0.015640938456307377, - "grad_norm": 4.106559075152421, - "learning_rate": 5.168539325842698e-06, - "loss": 0.771, - "step": 46 - }, - { - "epoch": 0.015980958857531452, - "grad_norm": 2.95021911633751, - "learning_rate": 5.280898876404494e-06, - "loss": 0.8879, - "step": 47 - }, - { - "epoch": 0.016320979258755527, - "grad_norm": 2.5286647211984956, - "learning_rate": 5.393258426966292e-06, - "loss": 0.8658, - "step": 48 - }, - { - "epoch": 0.016660999659979598, - "grad_norm": 2.8336405976774373, - "learning_rate": 5.50561797752809e-06, - "loss": 0.8408, - "step": 49 - }, - { - "epoch": 0.017001020061203673, - "grad_norm": 2.1850180954836187, - "learning_rate": 5.617977528089889e-06, - "loss": 0.9112, - "step": 50 - }, - { - "epoch": 0.017341040462427744, - "grad_norm": 2.06887498219481, - "learning_rate": 5.730337078651685e-06, - "loss": 0.7897, - "step": 51 - }, - { - "epoch": 0.01768106086365182, - "grad_norm": 4.3341123167435125, - "learning_rate": 5.842696629213483e-06, - "loss": 0.811, - "step": 52 - }, - { - "epoch": 0.018021081264875893, - "grad_norm": 2.14419920475312, - "learning_rate": 5.955056179775281e-06, - "loss": 0.9512, - "step": 53 - }, - { - "epoch": 0.018361101666099965, - "grad_norm": 2.5786480499173177, - "learning_rate": 6.06741573033708e-06, - "loss": 0.9029, - "step": 54 - }, - { - "epoch": 0.01870112206732404, - "grad_norm": 3.3621255490379833, - "learning_rate": 6.179775280898876e-06, - "loss": 0.8179, - "step": 55 - }, - { - "epoch": 0.019041142468548114, - "grad_norm": 2.0145825477143675, - "learning_rate": 6.292134831460674e-06, - "loss": 0.924, - "step": 56 - }, - { - "epoch": 0.019381162869772185, - "grad_norm": 2.1614514347624625, - "learning_rate": 6.404494382022472e-06, - "loss": 0.8009, - "step": 57 - }, - { - "epoch": 0.01972118327099626, - "grad_norm": 2.056785775033519, - "learning_rate": 6.51685393258427e-06, - "loss": 0.8702, - "step": 58 - }, - { - "epoch": 0.020061203672220335, - "grad_norm": 2.3474912782953172, - "learning_rate": 6.629213483146067e-06, - "loss": 0.8707, - "step": 59 - }, - { - "epoch": 0.020401224073444406, - "grad_norm": 2.735017646466919, - "learning_rate": 6.741573033707865e-06, - "loss": 0.8172, - "step": 60 - }, - { - "epoch": 0.02074124447466848, - "grad_norm": 2.422521293220476, - "learning_rate": 6.853932584269663e-06, - "loss": 0.9169, - "step": 61 - }, - { - "epoch": 0.021081264875892555, - "grad_norm": 3.808006273477038, - "learning_rate": 6.966292134831461e-06, - "loss": 0.9115, - "step": 62 - }, - { - "epoch": 0.021421285277116626, - "grad_norm": 6.385476467042873, - "learning_rate": 7.078651685393258e-06, - "loss": 0.9251, - "step": 63 - }, - { - "epoch": 0.0217613056783407, - "grad_norm": 2.056529939521199, - "learning_rate": 7.191011235955056e-06, - "loss": 0.871, - "step": 64 - }, - { - "epoch": 0.022101326079564772, - "grad_norm": 3.488639073828488, - "learning_rate": 7.303370786516854e-06, - "loss": 0.8088, - "step": 65 - }, - { - "epoch": 0.022441346480788847, - "grad_norm": 2.085309229849527, - "learning_rate": 7.415730337078652e-06, - "loss": 0.9016, - "step": 66 - }, - { - "epoch": 0.022781366882012922, - "grad_norm": 5.044997989735368, - "learning_rate": 7.5280898876404495e-06, - "loss": 1.0085, - "step": 67 - }, - { - "epoch": 0.023121387283236993, - "grad_norm": 4.002898882880522, - "learning_rate": 7.640449438202247e-06, - "loss": 0.7977, - "step": 68 - }, - { - "epoch": 0.023461407684461068, - "grad_norm": 2.189417532279016, - "learning_rate": 7.752808988764046e-06, - "loss": 0.749, - "step": 69 - }, - { - "epoch": 0.023801428085685142, - "grad_norm": 1.9250509612156395, - "learning_rate": 7.865168539325843e-06, - "loss": 0.9402, - "step": 70 - }, - { - "epoch": 0.024141448486909214, - "grad_norm": 8.213532430857596, - "learning_rate": 7.97752808988764e-06, - "loss": 0.8401, - "step": 71 - }, - { - "epoch": 0.02448146888813329, - "grad_norm": 2.0693669751843085, - "learning_rate": 8.08988764044944e-06, - "loss": 0.8317, - "step": 72 - }, - { - "epoch": 0.024821489289357363, - "grad_norm": 4.854843546953979, - "learning_rate": 8.202247191011237e-06, - "loss": 0.7864, - "step": 73 - }, - { - "epoch": 0.025161509690581434, - "grad_norm": 1.9256245489125812, - "learning_rate": 8.314606741573035e-06, - "loss": 0.8605, - "step": 74 - }, - { - "epoch": 0.02550153009180551, - "grad_norm": 3.3854334242945736, - "learning_rate": 8.426966292134832e-06, - "loss": 0.994, - "step": 75 - }, - { - "epoch": 0.02584155049302958, - "grad_norm": 3.4183811142773135, - "learning_rate": 8.53932584269663e-06, - "loss": 0.8632, - "step": 76 - }, - { - "epoch": 0.026181570894253655, - "grad_norm": 2.2762352809156416, - "learning_rate": 8.651685393258428e-06, - "loss": 0.8118, - "step": 77 - }, - { - "epoch": 0.02652159129547773, - "grad_norm": 2.297584618749765, - "learning_rate": 8.764044943820226e-06, - "loss": 0.8672, - "step": 78 - }, - { - "epoch": 0.0268616116967018, - "grad_norm": 3.23414736176019, - "learning_rate": 8.876404494382023e-06, - "loss": 0.6984, - "step": 79 - }, - { - "epoch": 0.027201632097925876, - "grad_norm": 2.970866674193053, - "learning_rate": 8.988764044943822e-06, - "loss": 0.9025, - "step": 80 - }, - { - "epoch": 0.02754165249914995, - "grad_norm": 2.5731909072039727, - "learning_rate": 9.101123595505619e-06, - "loss": 0.8281, - "step": 81 - }, - { - "epoch": 0.02788167290037402, - "grad_norm": 2.087322150347932, - "learning_rate": 9.213483146067417e-06, - "loss": 0.9687, - "step": 82 - }, - { - "epoch": 0.028221693301598096, - "grad_norm": 2.1556977588073867, - "learning_rate": 9.325842696629213e-06, - "loss": 0.801, - "step": 83 - }, - { - "epoch": 0.02856171370282217, - "grad_norm": 2.0239695578983237, - "learning_rate": 9.438202247191012e-06, - "loss": 0.8507, - "step": 84 - }, - { - "epoch": 0.028901734104046242, - "grad_norm": 2.8375743876638633, - "learning_rate": 9.55056179775281e-06, - "loss": 0.8769, - "step": 85 - }, - { - "epoch": 0.029241754505270317, - "grad_norm": 2.258009150645209, - "learning_rate": 9.662921348314608e-06, - "loss": 0.7804, - "step": 86 - }, - { - "epoch": 0.029581774906494388, - "grad_norm": 2.468189158455857, - "learning_rate": 9.775280898876405e-06, - "loss": 0.879, - "step": 87 - }, - { - "epoch": 0.029921795307718463, - "grad_norm": 2.5014523937082482, - "learning_rate": 9.887640449438202e-06, - "loss": 0.8627, - "step": 88 - }, - { - "epoch": 0.030261815708942538, - "grad_norm": 2.134005133865966, - "learning_rate": 1e-05, - "loss": 0.7324, - "step": 89 - }, - { - "epoch": 0.03060183611016661, - "grad_norm": 3.2988329246550396, - "learning_rate": 9.999996966523272e-06, - "loss": 0.8279, - "step": 90 - }, - { - "epoch": 0.030941856511390683, - "grad_norm": 3.35459533797809, - "learning_rate": 9.999987866096762e-06, - "loss": 0.8842, - "step": 91 - }, - { - "epoch": 0.031281876912614755, - "grad_norm": 2.346746709615949, - "learning_rate": 9.999972698731516e-06, - "loss": 0.8541, - "step": 92 - }, - { - "epoch": 0.03162189731383883, - "grad_norm": 2.5077318815610816, - "learning_rate": 9.999951464445938e-06, - "loss": 0.874, - "step": 93 - }, - { - "epoch": 0.031961917715062904, - "grad_norm": 6.007848319671871, - "learning_rate": 9.99992416326579e-06, - "loss": 0.7933, - "step": 94 - }, - { - "epoch": 0.032301938116286975, - "grad_norm": 3.1450126786992882, - "learning_rate": 9.999890795224206e-06, - "loss": 0.8713, - "step": 95 - }, - { - "epoch": 0.032641958517511054, - "grad_norm": 2.0402024490282633, - "learning_rate": 9.999851360361666e-06, - "loss": 0.801, - "step": 96 - }, - { - "epoch": 0.032981978918735125, - "grad_norm": 4.045944747267887, - "learning_rate": 9.999805858726026e-06, - "loss": 0.8282, - "step": 97 - }, - { - "epoch": 0.033321999319959196, - "grad_norm": 2.7667039685581947, - "learning_rate": 9.999754290372496e-06, - "loss": 0.9823, - "step": 98 - }, - { - "epoch": 0.033662019721183274, - "grad_norm": 1.9693833148302338, - "learning_rate": 9.999696655363646e-06, - "loss": 0.8958, - "step": 99 - }, - { - "epoch": 0.034002040122407345, - "grad_norm": 1.8144739667930376, - "learning_rate": 9.999632953769413e-06, - "loss": 0.865, - "step": 100 - }, - { - "epoch": 0.03434206052363142, - "grad_norm": 2.054769480683066, - "learning_rate": 9.99956318566709e-06, - "loss": 0.7375, - "step": 101 - }, - { - "epoch": 0.03468208092485549, - "grad_norm": 2.3634788615414535, - "learning_rate": 9.999487351141333e-06, - "loss": 0.7926, - "step": 102 - }, - { - "epoch": 0.035022101326079566, - "grad_norm": 2.432946102833199, - "learning_rate": 9.999405450284161e-06, - "loss": 0.9227, - "step": 103 - }, - { - "epoch": 0.03536212172730364, - "grad_norm": 2.01036703041841, - "learning_rate": 9.999317483194948e-06, - "loss": 0.836, - "step": 104 - }, - { - "epoch": 0.03570214212852771, - "grad_norm": 11.065759310951304, - "learning_rate": 9.999223449980434e-06, - "loss": 0.8741, - "step": 105 - }, - { - "epoch": 0.03604216252975179, - "grad_norm": 3.1475568662058984, - "learning_rate": 9.999123350754722e-06, - "loss": 0.8076, - "step": 106 - }, - { - "epoch": 0.03638218293097586, - "grad_norm": 2.087445939358465, - "learning_rate": 9.999017185639266e-06, - "loss": 0.9328, - "step": 107 - }, - { - "epoch": 0.03672220333219993, - "grad_norm": 5.085608104666051, - "learning_rate": 9.99890495476289e-06, - "loss": 0.8641, - "step": 108 - }, - { - "epoch": 0.03706222373342401, - "grad_norm": 3.1741923963644827, - "learning_rate": 9.99878665826177e-06, - "loss": 0.8019, - "step": 109 - }, - { - "epoch": 0.03740224413464808, - "grad_norm": 2.355452993498913, - "learning_rate": 9.998662296279447e-06, - "loss": 0.8653, - "step": 110 - }, - { - "epoch": 0.03774226453587215, - "grad_norm": 2.413762001760075, - "learning_rate": 9.998531868966822e-06, - "loss": 0.7411, - "step": 111 - }, - { - "epoch": 0.03808228493709623, - "grad_norm": 2.0339220046950524, - "learning_rate": 9.998395376482152e-06, - "loss": 0.8552, - "step": 112 - }, - { - "epoch": 0.0384223053383203, - "grad_norm": 2.079277005071041, - "learning_rate": 9.998252818991062e-06, - "loss": 0.8222, - "step": 113 - }, - { - "epoch": 0.03876232573954437, - "grad_norm": 5.632785104475984, - "learning_rate": 9.99810419666652e-06, - "loss": 0.8776, - "step": 114 - }, - { - "epoch": 0.03910234614076845, - "grad_norm": 1.8509337002126818, - "learning_rate": 9.997949509688871e-06, - "loss": 0.8431, - "step": 115 - }, - { - "epoch": 0.03944236654199252, - "grad_norm": 2.573221870698461, - "learning_rate": 9.997788758245808e-06, - "loss": 0.9841, - "step": 116 - }, - { - "epoch": 0.03978238694321659, - "grad_norm": 2.572525731553236, - "learning_rate": 9.997621942532383e-06, - "loss": 0.8367, - "step": 117 - }, - { - "epoch": 0.04012240734444067, - "grad_norm": 2.2742345681308938, - "learning_rate": 9.997449062751012e-06, - "loss": 0.7897, - "step": 118 - }, - { - "epoch": 0.04046242774566474, - "grad_norm": 4.327044372060793, - "learning_rate": 9.997270119111467e-06, - "loss": 0.8457, - "step": 119 - }, - { - "epoch": 0.04080244814688881, - "grad_norm": 2.2772066238104376, - "learning_rate": 9.99708511183087e-06, - "loss": 0.8247, - "step": 120 - }, - { - "epoch": 0.04114246854811289, - "grad_norm": 2.601789407750634, - "learning_rate": 9.996894041133715e-06, - "loss": 0.825, - "step": 121 - }, - { - "epoch": 0.04148248894933696, - "grad_norm": 2.0585977344964284, - "learning_rate": 9.99669690725184e-06, - "loss": 0.7796, - "step": 122 - }, - { - "epoch": 0.04182250935056103, - "grad_norm": 1.8855361410264628, - "learning_rate": 9.996493710424447e-06, - "loss": 0.8304, - "step": 123 - }, - { - "epoch": 0.04216252975178511, - "grad_norm": 1.837257670311997, - "learning_rate": 9.996284450898093e-06, - "loss": 0.9445, - "step": 124 - }, - { - "epoch": 0.04250255015300918, - "grad_norm": 3.0656012239893964, - "learning_rate": 9.996069128926691e-06, - "loss": 0.8702, - "step": 125 - }, - { - "epoch": 0.04284257055423325, - "grad_norm": 2.438953538165285, - "learning_rate": 9.995847744771514e-06, - "loss": 0.7872, - "step": 126 - }, - { - "epoch": 0.043182590955457324, - "grad_norm": 2.279269747200056, - "learning_rate": 9.995620298701183e-06, - "loss": 0.8613, - "step": 127 - }, - { - "epoch": 0.0435226113566814, - "grad_norm": 3.213489993275219, - "learning_rate": 9.99538679099168e-06, - "loss": 0.7127, - "step": 128 - }, - { - "epoch": 0.043862631757905474, - "grad_norm": 2.5232149812128637, - "learning_rate": 9.995147221926343e-06, - "loss": 0.8698, - "step": 129 - }, - { - "epoch": 0.044202652159129545, - "grad_norm": 2.839717095850297, - "learning_rate": 9.994901591795863e-06, - "loss": 0.85, - "step": 130 - }, - { - "epoch": 0.04454267256035362, - "grad_norm": 7.648211408560256, - "learning_rate": 9.994649900898283e-06, - "loss": 0.9204, - "step": 131 - }, - { - "epoch": 0.044882692961577694, - "grad_norm": 1.8745457296274581, - "learning_rate": 9.994392149539003e-06, - "loss": 0.8267, - "step": 132 - }, - { - "epoch": 0.045222713362801766, - "grad_norm": 1.9836289186862535, - "learning_rate": 9.994128338030778e-06, - "loss": 0.9781, - "step": 133 - }, - { - "epoch": 0.045562733764025844, - "grad_norm": 2.126401684741174, - "learning_rate": 9.993858466693712e-06, - "loss": 1.0093, - "step": 134 - }, - { - "epoch": 0.045902754165249915, - "grad_norm": 1.983933252004279, - "learning_rate": 9.993582535855265e-06, - "loss": 0.8227, - "step": 135 - }, - { - "epoch": 0.046242774566473986, - "grad_norm": 2.088706279304247, - "learning_rate": 9.99330054585025e-06, - "loss": 0.8529, - "step": 136 - }, - { - "epoch": 0.046582794967698064, - "grad_norm": 2.4156135313351363, - "learning_rate": 9.993012497020831e-06, - "loss": 0.8666, - "step": 137 - }, - { - "epoch": 0.046922815368922136, - "grad_norm": 2.4388488859277584, - "learning_rate": 9.992718389716521e-06, - "loss": 0.7969, - "step": 138 - }, - { - "epoch": 0.04726283577014621, - "grad_norm": 2.169555997324188, - "learning_rate": 9.992418224294191e-06, - "loss": 0.893, - "step": 139 - }, - { - "epoch": 0.047602856171370285, - "grad_norm": 1.9240543034696431, - "learning_rate": 9.992112001118058e-06, - "loss": 0.9161, - "step": 140 - }, - { - "epoch": 0.047942876572594356, - "grad_norm": 3.4020949955907636, - "learning_rate": 9.991799720559687e-06, - "loss": 0.7416, - "step": 141 - }, - { - "epoch": 0.04828289697381843, - "grad_norm": 2.4604945354615153, - "learning_rate": 9.991481382998001e-06, - "loss": 0.9075, - "step": 142 - }, - { - "epoch": 0.048622917375042506, - "grad_norm": 2.255946457618047, - "learning_rate": 9.991156988819264e-06, - "loss": 0.9905, - "step": 143 - }, - { - "epoch": 0.04896293777626658, - "grad_norm": 2.024969438240585, - "learning_rate": 9.990826538417095e-06, - "loss": 0.9332, - "step": 144 - }, - { - "epoch": 0.04930295817749065, - "grad_norm": 2.2370360110433025, - "learning_rate": 9.99049003219246e-06, - "loss": 0.8148, - "step": 145 - }, - { - "epoch": 0.049642978578714726, - "grad_norm": 2.2743555041984407, - "learning_rate": 9.99014747055367e-06, - "loss": 0.8756, - "step": 146 - }, - { - "epoch": 0.0499829989799388, - "grad_norm": 3.95316960278758, - "learning_rate": 9.989798853916388e-06, - "loss": 0.8292, - "step": 147 - }, - { - "epoch": 0.05032301938116287, - "grad_norm": 3.6392856446793833, - "learning_rate": 9.989444182703623e-06, - "loss": 0.7666, - "step": 148 - }, - { - "epoch": 0.05066303978238694, - "grad_norm": 2.230929282152931, - "learning_rate": 9.989083457345727e-06, - "loss": 0.8501, - "step": 149 - }, - { - "epoch": 0.05100306018361102, - "grad_norm": 3.0061135776554826, - "learning_rate": 9.988716678280403e-06, - "loss": 0.9064, - "step": 150 - }, - { - "epoch": 0.05134308058483509, - "grad_norm": 2.1445457506712824, - "learning_rate": 9.988343845952697e-06, - "loss": 0.8971, - "step": 151 - }, - { - "epoch": 0.05168310098605916, - "grad_norm": 2.437139941905991, - "learning_rate": 9.987964960815e-06, - "loss": 0.9558, - "step": 152 - }, - { - "epoch": 0.05202312138728324, - "grad_norm": 4.873429755031727, - "learning_rate": 9.987580023327046e-06, - "loss": 0.8671, - "step": 153 - }, - { - "epoch": 0.05236314178850731, - "grad_norm": 3.944162605474988, - "learning_rate": 9.987189033955918e-06, - "loss": 0.8272, - "step": 154 - }, - { - "epoch": 0.05270316218973138, - "grad_norm": 2.8250974202447985, - "learning_rate": 9.986791993176035e-06, - "loss": 0.8342, - "step": 155 - }, - { - "epoch": 0.05304318259095546, - "grad_norm": 2.3354440033626345, - "learning_rate": 9.986388901469167e-06, - "loss": 0.7322, - "step": 156 - }, - { - "epoch": 0.05338320299217953, - "grad_norm": 3.5022852363593198, - "learning_rate": 9.985979759324418e-06, - "loss": 0.9182, - "step": 157 - }, - { - "epoch": 0.0537232233934036, - "grad_norm": 2.022608573939002, - "learning_rate": 9.985564567238237e-06, - "loss": 0.8555, - "step": 158 - }, - { - "epoch": 0.05406324379462768, - "grad_norm": 2.4546285068255744, - "learning_rate": 9.985143325714419e-06, - "loss": 0.8571, - "step": 159 - }, - { - "epoch": 0.05440326419585175, - "grad_norm": 1.7140624575103907, - "learning_rate": 9.984716035264089e-06, - "loss": 0.7834, - "step": 160 - }, - { - "epoch": 0.05474328459707582, - "grad_norm": 2.007966583007289, - "learning_rate": 9.98428269640572e-06, - "loss": 0.8762, - "step": 161 - }, - { - "epoch": 0.0550833049982999, - "grad_norm": 2.543305674356758, - "learning_rate": 9.983843309665122e-06, - "loss": 0.8685, - "step": 162 - }, - { - "epoch": 0.05542332539952397, - "grad_norm": 2.080393891117657, - "learning_rate": 9.983397875575442e-06, - "loss": 0.8911, - "step": 163 - }, - { - "epoch": 0.05576334580074804, - "grad_norm": 2.294684609497304, - "learning_rate": 9.982946394677165e-06, - "loss": 0.9734, - "step": 164 - }, - { - "epoch": 0.05610336620197212, - "grad_norm": 1.804144176865855, - "learning_rate": 9.982488867518112e-06, - "loss": 0.819, - "step": 165 - }, - { - "epoch": 0.05644338660319619, - "grad_norm": 3.657476774208594, - "learning_rate": 9.982025294653445e-06, - "loss": 0.8307, - "step": 166 - }, - { - "epoch": 0.056783407004420264, - "grad_norm": 2.074991736382199, - "learning_rate": 9.98155567664566e-06, - "loss": 0.8015, - "step": 167 - }, - { - "epoch": 0.05712342740564434, - "grad_norm": 2.9051744319346255, - "learning_rate": 9.981080014064584e-06, - "loss": 0.7693, - "step": 168 - }, - { - "epoch": 0.05746344780686841, - "grad_norm": 1.9099484180793749, - "learning_rate": 9.980598307487383e-06, - "loss": 0.7955, - "step": 169 - }, - { - "epoch": 0.057803468208092484, - "grad_norm": 2.146831708028932, - "learning_rate": 9.980110557498556e-06, - "loss": 0.8993, - "step": 170 - }, - { - "epoch": 0.058143488609316556, - "grad_norm": 2.61617139387018, - "learning_rate": 9.979616764689932e-06, - "loss": 0.8702, - "step": 171 - }, - { - "epoch": 0.058483509010540634, - "grad_norm": 2.1196184076767057, - "learning_rate": 9.979116929660677e-06, - "loss": 0.9441, - "step": 172 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 3.7671698090532866, - "learning_rate": 9.978611053017286e-06, - "loss": 0.9364, - "step": 173 - }, - { - "epoch": 0.059163549812988776, - "grad_norm": 2.6799358731624268, - "learning_rate": 9.978099135373584e-06, - "loss": 0.8686, - "step": 174 - }, - { - "epoch": 0.059503570214212854, - "grad_norm": 1.8432414885158408, - "learning_rate": 9.977581177350726e-06, - "loss": 0.8512, - "step": 175 - }, - { - "epoch": 0.059843590615436926, - "grad_norm": 2.6747034963076963, - "learning_rate": 9.977057179577199e-06, - "loss": 0.7921, - "step": 176 - }, - { - "epoch": 0.060183611016661, - "grad_norm": 3.038911682651883, - "learning_rate": 9.976527142688818e-06, - "loss": 0.849, - "step": 177 - }, - { - "epoch": 0.060523631417885075, - "grad_norm": 3.497473396206265, - "learning_rate": 9.975991067328722e-06, - "loss": 0.8514, - "step": 178 - }, - { - "epoch": 0.060863651819109146, - "grad_norm": 2.872664876776745, - "learning_rate": 9.975448954147383e-06, - "loss": 0.76, - "step": 179 - }, - { - "epoch": 0.06120367222033322, - "grad_norm": 2.265420234730481, - "learning_rate": 9.974900803802595e-06, - "loss": 0.8893, - "step": 180 - }, - { - "epoch": 0.061543692621557296, - "grad_norm": 2.2490770407309273, - "learning_rate": 9.974346616959476e-06, - "loss": 0.9017, - "step": 181 - }, - { - "epoch": 0.06188371302278137, - "grad_norm": 2.279036329894871, - "learning_rate": 9.973786394290475e-06, - "loss": 0.7729, - "step": 182 - }, - { - "epoch": 0.06222373342400544, - "grad_norm": 2.2273111686177693, - "learning_rate": 9.973220136475359e-06, - "loss": 0.9491, - "step": 183 - }, - { - "epoch": 0.06256375382522951, - "grad_norm": 1.8442525974273993, - "learning_rate": 9.97264784420122e-06, - "loss": 0.8584, - "step": 184 - }, - { - "epoch": 0.06290377422645359, - "grad_norm": 2.0159433278766876, - "learning_rate": 9.972069518162472e-06, - "loss": 0.855, - "step": 185 - }, - { - "epoch": 0.06324379462767767, - "grad_norm": 2.2639684727572336, - "learning_rate": 9.971485159060851e-06, - "loss": 0.9352, - "step": 186 - }, - { - "epoch": 0.06358381502890173, - "grad_norm": 2.5527740776494574, - "learning_rate": 9.970894767605412e-06, - "loss": 0.7912, - "step": 187 - }, - { - "epoch": 0.06392383543012581, - "grad_norm": 1.9379101706278465, - "learning_rate": 9.970298344512533e-06, - "loss": 0.8189, - "step": 188 - }, - { - "epoch": 0.06426385583134989, - "grad_norm": 3.1376756923399824, - "learning_rate": 9.969695890505904e-06, - "loss": 0.9007, - "step": 189 - }, - { - "epoch": 0.06460387623257395, - "grad_norm": 2.270221919510738, - "learning_rate": 9.96908740631654e-06, - "loss": 0.8274, - "step": 190 - }, - { - "epoch": 0.06494389663379803, - "grad_norm": 10.152532884466371, - "learning_rate": 9.96847289268277e-06, - "loss": 0.9628, - "step": 191 - }, - { - "epoch": 0.06528391703502211, - "grad_norm": 2.2780092143082395, - "learning_rate": 9.967852350350239e-06, - "loss": 0.8497, - "step": 192 - }, - { - "epoch": 0.06562393743624617, - "grad_norm": 2.212416833604225, - "learning_rate": 9.967225780071908e-06, - "loss": 0.8529, - "step": 193 - }, - { - "epoch": 0.06596395783747025, - "grad_norm": 2.5787177735483833, - "learning_rate": 9.966593182608048e-06, - "loss": 0.8931, - "step": 194 - }, - { - "epoch": 0.06630397823869433, - "grad_norm": 2.222235355514418, - "learning_rate": 9.965954558726249e-06, - "loss": 0.8334, - "step": 195 - }, - { - "epoch": 0.06664399863991839, - "grad_norm": 3.0045508316166742, - "learning_rate": 9.965309909201414e-06, - "loss": 0.8262, - "step": 196 - }, - { - "epoch": 0.06698401904114247, - "grad_norm": 2.071849506932709, - "learning_rate": 9.964659234815752e-06, - "loss": 0.9124, - "step": 197 - }, - { - "epoch": 0.06732403944236655, - "grad_norm": 2.67102771314228, - "learning_rate": 9.964002536358784e-06, - "loss": 0.8469, - "step": 198 - }, - { - "epoch": 0.06766405984359061, - "grad_norm": 3.45508048201135, - "learning_rate": 9.963339814627344e-06, - "loss": 0.862, - "step": 199 - }, - { - "epoch": 0.06800408024481469, - "grad_norm": 1.95482979584485, - "learning_rate": 9.962671070425573e-06, - "loss": 0.832, - "step": 200 - }, - { - "epoch": 0.06834410064603877, - "grad_norm": 2.6045722790965, - "learning_rate": 9.961996304564916e-06, - "loss": 0.8735, - "step": 201 - }, - { - "epoch": 0.06868412104726283, - "grad_norm": 2.198649716123807, - "learning_rate": 9.961315517864131e-06, - "loss": 0.8463, - "step": 202 - }, - { - "epoch": 0.06902414144848691, - "grad_norm": 3.00819531461859, - "learning_rate": 9.960628711149276e-06, - "loss": 0.7847, - "step": 203 - }, - { - "epoch": 0.06936416184971098, - "grad_norm": 2.7123733460067068, - "learning_rate": 9.959935885253715e-06, - "loss": 0.8133, - "step": 204 - }, - { - "epoch": 0.06970418225093505, - "grad_norm": 1.9582131537045544, - "learning_rate": 9.95923704101812e-06, - "loss": 0.9498, - "step": 205 - }, - { - "epoch": 0.07004420265215913, - "grad_norm": 2.0898364433787577, - "learning_rate": 9.958532179290458e-06, - "loss": 0.7157, - "step": 206 - }, - { - "epoch": 0.0703842230533832, - "grad_norm": 2.0874129936819688, - "learning_rate": 9.957821300926007e-06, - "loss": 0.8845, - "step": 207 - }, - { - "epoch": 0.07072424345460727, - "grad_norm": 2.565402516349025, - "learning_rate": 9.957104406787335e-06, - "loss": 0.8621, - "step": 208 - }, - { - "epoch": 0.07106426385583135, - "grad_norm": 3.070517613442607, - "learning_rate": 9.956381497744317e-06, - "loss": 0.8068, - "step": 209 - }, - { - "epoch": 0.07140428425705542, - "grad_norm": 1.7745161730599397, - "learning_rate": 9.955652574674122e-06, - "loss": 0.774, - "step": 210 - }, - { - "epoch": 0.0717443046582795, - "grad_norm": 2.2664861311135835, - "learning_rate": 9.954917638461221e-06, - "loss": 0.7881, - "step": 211 - }, - { - "epoch": 0.07208432505950357, - "grad_norm": 2.8326430149609103, - "learning_rate": 9.954176689997379e-06, - "loss": 0.8248, - "step": 212 - }, - { - "epoch": 0.07242434546072764, - "grad_norm": 5.345259266036334, - "learning_rate": 9.953429730181653e-06, - "loss": 0.8863, - "step": 213 - }, - { - "epoch": 0.07276436586195172, - "grad_norm": 2.0087383584289404, - "learning_rate": 9.952676759920401e-06, - "loss": 0.9046, - "step": 214 - }, - { - "epoch": 0.0731043862631758, - "grad_norm": 2.7067250680131503, - "learning_rate": 9.951917780127268e-06, - "loss": 0.7835, - "step": 215 - }, - { - "epoch": 0.07344440666439986, - "grad_norm": 2.0141720241519963, - "learning_rate": 9.951152791723193e-06, - "loss": 0.8934, - "step": 216 - }, - { - "epoch": 0.07378442706562394, - "grad_norm": 1.8777488263439555, - "learning_rate": 9.950381795636406e-06, - "loss": 0.8121, - "step": 217 - }, - { - "epoch": 0.07412444746684801, - "grad_norm": 3.255807583902176, - "learning_rate": 9.949604792802425e-06, - "loss": 0.8676, - "step": 218 - }, - { - "epoch": 0.07446446786807208, - "grad_norm": 2.67736933247142, - "learning_rate": 9.94882178416406e-06, - "loss": 0.7393, - "step": 219 - }, - { - "epoch": 0.07480448826929616, - "grad_norm": 4.008487179200106, - "learning_rate": 9.948032770671405e-06, - "loss": 0.8465, - "step": 220 - }, - { - "epoch": 0.07514450867052024, - "grad_norm": 2.940928392932845, - "learning_rate": 9.947237753281845e-06, - "loss": 0.8339, - "step": 221 - }, - { - "epoch": 0.0754845290717443, - "grad_norm": 2.051172980166151, - "learning_rate": 9.946436732960042e-06, - "loss": 0.9295, - "step": 222 - }, - { - "epoch": 0.07582454947296838, - "grad_norm": 2.424428707199302, - "learning_rate": 9.945629710677949e-06, - "loss": 0.8197, - "step": 223 - }, - { - "epoch": 0.07616456987419246, - "grad_norm": 2.0727299530697763, - "learning_rate": 9.9448166874148e-06, - "loss": 0.8643, - "step": 224 - }, - { - "epoch": 0.07650459027541652, - "grad_norm": 2.770127723386738, - "learning_rate": 9.943997664157108e-06, - "loss": 0.8465, - "step": 225 - }, - { - "epoch": 0.0768446106766406, - "grad_norm": 2.555973204555607, - "learning_rate": 9.943172641898669e-06, - "loss": 0.8517, - "step": 226 - }, - { - "epoch": 0.07718463107786468, - "grad_norm": 1.842390623944307, - "learning_rate": 9.942341621640558e-06, - "loss": 0.85, - "step": 227 - }, - { - "epoch": 0.07752465147908874, - "grad_norm": 2.128091117333538, - "learning_rate": 9.941504604391126e-06, - "loss": 0.8292, - "step": 228 - }, - { - "epoch": 0.07786467188031282, - "grad_norm": 1.7281395666728367, - "learning_rate": 9.940661591166003e-06, - "loss": 0.8231, - "step": 229 - }, - { - "epoch": 0.0782046922815369, - "grad_norm": 2.9385281630654285, - "learning_rate": 9.939812582988094e-06, - "loss": 0.7502, - "step": 230 - }, - { - "epoch": 0.07854471268276096, - "grad_norm": 2.1296804812289603, - "learning_rate": 9.938957580887575e-06, - "loss": 0.8717, - "step": 231 - }, - { - "epoch": 0.07888473308398504, - "grad_norm": 2.14717718646388, - "learning_rate": 9.9380965859019e-06, - "loss": 0.8894, - "step": 232 - }, - { - "epoch": 0.07922475348520912, - "grad_norm": 1.9538584630326372, - "learning_rate": 9.937229599075791e-06, - "loss": 0.8824, - "step": 233 - }, - { - "epoch": 0.07956477388643318, - "grad_norm": 1.7865365933867121, - "learning_rate": 9.936356621461243e-06, - "loss": 0.8454, - "step": 234 - }, - { - "epoch": 0.07990479428765726, - "grad_norm": 1.7416149475546014, - "learning_rate": 9.935477654117518e-06, - "loss": 0.8576, - "step": 235 - }, - { - "epoch": 0.08024481468888134, - "grad_norm": 1.768841666763347, - "learning_rate": 9.934592698111148e-06, - "loss": 0.9265, - "step": 236 - }, - { - "epoch": 0.0805848350901054, - "grad_norm": 2.4152772171644656, - "learning_rate": 9.933701754515928e-06, - "loss": 0.8519, - "step": 237 - }, - { - "epoch": 0.08092485549132948, - "grad_norm": 2.2519504055783814, - "learning_rate": 9.932804824412922e-06, - "loss": 0.9161, - "step": 238 - }, - { - "epoch": 0.08126487589255356, - "grad_norm": 2.450596425363767, - "learning_rate": 9.931901908890457e-06, - "loss": 0.8091, - "step": 239 - }, - { - "epoch": 0.08160489629377762, - "grad_norm": 1.8041419356227175, - "learning_rate": 9.930993009044123e-06, - "loss": 0.8561, - "step": 240 - }, - { - "epoch": 0.0819449166950017, - "grad_norm": 1.7358105710203156, - "learning_rate": 9.930078125976767e-06, - "loss": 0.9872, - "step": 241 - }, - { - "epoch": 0.08228493709622578, - "grad_norm": 1.9725454367197321, - "learning_rate": 9.929157260798504e-06, - "loss": 0.796, - "step": 242 - }, - { - "epoch": 0.08262495749744984, - "grad_norm": 2.0911715350232987, - "learning_rate": 9.9282304146267e-06, - "loss": 0.7842, - "step": 243 - }, - { - "epoch": 0.08296497789867392, - "grad_norm": 1.7083869843606982, - "learning_rate": 9.927297588585984e-06, - "loss": 0.7561, - "step": 244 - }, - { - "epoch": 0.083304998299898, - "grad_norm": 1.8485993258822613, - "learning_rate": 9.926358783808238e-06, - "loss": 0.7767, - "step": 245 - }, - { - "epoch": 0.08364501870112206, - "grad_norm": 2.4843083297610655, - "learning_rate": 9.925414001432599e-06, - "loss": 0.8209, - "step": 246 - }, - { - "epoch": 0.08398503910234614, - "grad_norm": 2.514523920882766, - "learning_rate": 9.924463242605454e-06, - "loss": 0.75, - "step": 247 - }, - { - "epoch": 0.08432505950357022, - "grad_norm": 2.038961464493961, - "learning_rate": 9.92350650848045e-06, - "loss": 0.8024, - "step": 248 - }, - { - "epoch": 0.08466507990479429, - "grad_norm": 5.967821891887257, - "learning_rate": 9.922543800218474e-06, - "loss": 0.8777, - "step": 249 - }, - { - "epoch": 0.08500510030601836, - "grad_norm": 2.1398795095312946, - "learning_rate": 9.921575118987672e-06, - "loss": 0.8732, - "step": 250 - }, - { - "epoch": 0.08534512070724243, - "grad_norm": 2.9133926895356277, - "learning_rate": 9.92060046596343e-06, - "loss": 0.7944, - "step": 251 - }, - { - "epoch": 0.0856851411084665, - "grad_norm": 1.9889465507980215, - "learning_rate": 9.919619842328383e-06, - "loss": 0.8085, - "step": 252 - }, - { - "epoch": 0.08602516150969058, - "grad_norm": 3.150534939883254, - "learning_rate": 9.918633249272412e-06, - "loss": 0.8113, - "step": 253 - }, - { - "epoch": 0.08636518191091465, - "grad_norm": 2.2835881883036335, - "learning_rate": 9.917640687992638e-06, - "loss": 0.787, - "step": 254 - }, - { - "epoch": 0.08670520231213873, - "grad_norm": 2.4230018775230904, - "learning_rate": 9.916642159693428e-06, - "loss": 0.8945, - "step": 255 - }, - { - "epoch": 0.0870452227133628, - "grad_norm": 2.2893689337255108, - "learning_rate": 9.915637665586386e-06, - "loss": 0.8661, - "step": 256 - }, - { - "epoch": 0.08738524311458687, - "grad_norm": 2.4221470788760713, - "learning_rate": 9.914627206890352e-06, - "loss": 0.8282, - "step": 257 - }, - { - "epoch": 0.08772526351581095, - "grad_norm": 2.376986510975116, - "learning_rate": 9.913610784831415e-06, - "loss": 0.8371, - "step": 258 - }, - { - "epoch": 0.08806528391703503, - "grad_norm": 2.4413551231465624, - "learning_rate": 9.912588400642884e-06, - "loss": 0.8826, - "step": 259 - }, - { - "epoch": 0.08840530431825909, - "grad_norm": 2.644042881659926, - "learning_rate": 9.911560055565316e-06, - "loss": 0.7355, - "step": 260 - }, - { - "epoch": 0.08874532471948317, - "grad_norm": 1.9367674986615202, - "learning_rate": 9.910525750846494e-06, - "loss": 0.8337, - "step": 261 - }, - { - "epoch": 0.08908534512070725, - "grad_norm": 3.6307557122329657, - "learning_rate": 9.909485487741432e-06, - "loss": 0.8818, - "step": 262 - }, - { - "epoch": 0.08942536552193131, - "grad_norm": 2.1386654842761468, - "learning_rate": 9.908439267512378e-06, - "loss": 0.8079, - "step": 263 - }, - { - "epoch": 0.08976538592315539, - "grad_norm": 2.05052351091725, - "learning_rate": 9.907387091428803e-06, - "loss": 0.808, - "step": 264 - }, - { - "epoch": 0.09010540632437947, - "grad_norm": 2.1166857344624455, - "learning_rate": 9.906328960767409e-06, - "loss": 0.8604, - "step": 265 - }, - { - "epoch": 0.09044542672560353, - "grad_norm": 2.179398767884701, - "learning_rate": 9.905264876812123e-06, - "loss": 0.7211, - "step": 266 - }, - { - "epoch": 0.09078544712682761, - "grad_norm": 2.8274324506005213, - "learning_rate": 9.904194840854094e-06, - "loss": 0.9274, - "step": 267 - }, - { - "epoch": 0.09112546752805169, - "grad_norm": 2.2046170596471315, - "learning_rate": 9.903118854191693e-06, - "loss": 0.8147, - "step": 268 - }, - { - "epoch": 0.09146548792927575, - "grad_norm": 2.7523217330730505, - "learning_rate": 9.902036918130514e-06, - "loss": 0.8264, - "step": 269 - }, - { - "epoch": 0.09180550833049983, - "grad_norm": 2.2273055464770812, - "learning_rate": 9.900949033983366e-06, - "loss": 0.866, - "step": 270 - }, - { - "epoch": 0.09214552873172391, - "grad_norm": 2.2499475354374607, - "learning_rate": 9.899855203070278e-06, - "loss": 0.818, - "step": 271 - }, - { - "epoch": 0.09248554913294797, - "grad_norm": 1.9616287518680806, - "learning_rate": 9.898755426718493e-06, - "loss": 0.8311, - "step": 272 - }, - { - "epoch": 0.09282556953417205, - "grad_norm": 2.5970292191674655, - "learning_rate": 9.897649706262474e-06, - "loss": 0.8518, - "step": 273 - }, - { - "epoch": 0.09316558993539613, - "grad_norm": 1.7660827251520181, - "learning_rate": 9.896538043043887e-06, - "loss": 0.8273, - "step": 274 - }, - { - "epoch": 0.09350561033662019, - "grad_norm": 2.6210906521485824, - "learning_rate": 9.895420438411616e-06, - "loss": 0.8951, - "step": 275 - }, - { - "epoch": 0.09384563073784427, - "grad_norm": 1.8800676289752767, - "learning_rate": 9.89429689372175e-06, - "loss": 0.8584, - "step": 276 - }, - { - "epoch": 0.09418565113906835, - "grad_norm": 3.1773516067582186, - "learning_rate": 9.893167410337591e-06, - "loss": 0.8535, - "step": 277 - }, - { - "epoch": 0.09452567154029241, - "grad_norm": 3.404016837148496, - "learning_rate": 9.892031989629642e-06, - "loss": 0.8279, - "step": 278 - }, - { - "epoch": 0.09486569194151649, - "grad_norm": 2.173356538675513, - "learning_rate": 9.890890632975612e-06, - "loss": 0.8635, - "step": 279 - }, - { - "epoch": 0.09520571234274057, - "grad_norm": 2.261938469413284, - "learning_rate": 9.889743341760412e-06, - "loss": 0.7996, - "step": 280 - }, - { - "epoch": 0.09554573274396463, - "grad_norm": 2.1477511262078233, - "learning_rate": 9.888590117376154e-06, - "loss": 0.9334, - "step": 281 - }, - { - "epoch": 0.09588575314518871, - "grad_norm": 2.458576128018538, - "learning_rate": 9.887430961222153e-06, - "loss": 0.88, - "step": 282 - }, - { - "epoch": 0.09622577354641279, - "grad_norm": 4.04358503504034, - "learning_rate": 9.886265874704914e-06, - "loss": 0.8699, - "step": 283 - }, - { - "epoch": 0.09656579394763685, - "grad_norm": 2.1028398707450293, - "learning_rate": 9.885094859238145e-06, - "loss": 1.0212, - "step": 284 - }, - { - "epoch": 0.09690581434886093, - "grad_norm": 2.208984051301497, - "learning_rate": 9.883917916242744e-06, - "loss": 0.8778, - "step": 285 - }, - { - "epoch": 0.09724583475008501, - "grad_norm": 5.2221607237155245, - "learning_rate": 9.882735047146803e-06, - "loss": 0.9002, - "step": 286 - }, - { - "epoch": 0.09758585515130908, - "grad_norm": 1.976265426760934, - "learning_rate": 9.881546253385603e-06, - "loss": 0.8457, - "step": 287 - }, - { - "epoch": 0.09792587555253315, - "grad_norm": 2.298479530013667, - "learning_rate": 9.880351536401617e-06, - "loss": 0.8554, - "step": 288 - }, - { - "epoch": 0.09826589595375723, - "grad_norm": 2.055015991219169, - "learning_rate": 9.879150897644504e-06, - "loss": 0.833, - "step": 289 - }, - { - "epoch": 0.0986059163549813, - "grad_norm": 2.147764160856317, - "learning_rate": 9.877944338571108e-06, - "loss": 0.8516, - "step": 290 - }, - { - "epoch": 0.09894593675620537, - "grad_norm": 1.9705786404454357, - "learning_rate": 9.876731860645454e-06, - "loss": 0.8118, - "step": 291 - }, - { - "epoch": 0.09928595715742945, - "grad_norm": 2.502835283880914, - "learning_rate": 9.875513465338754e-06, - "loss": 0.8403, - "step": 292 - }, - { - "epoch": 0.09962597755865352, - "grad_norm": 1.9113225330806665, - "learning_rate": 9.874289154129396e-06, - "loss": 0.8076, - "step": 293 - }, - { - "epoch": 0.0999659979598776, - "grad_norm": 2.282725376690771, - "learning_rate": 9.873058928502948e-06, - "loss": 0.9446, - "step": 294 - }, - { - "epoch": 0.10030601836110166, - "grad_norm": 2.2144235263707217, - "learning_rate": 9.871822789952155e-06, - "loss": 0.8268, - "step": 295 - }, - { - "epoch": 0.10064603876232574, - "grad_norm": 2.0635404740469525, - "learning_rate": 9.870580739976936e-06, - "loss": 0.8726, - "step": 296 - }, - { - "epoch": 0.10098605916354982, - "grad_norm": 1.9237725683316835, - "learning_rate": 9.869332780084383e-06, - "loss": 0.8556, - "step": 297 - }, - { - "epoch": 0.10132607956477388, - "grad_norm": 2.2476614190445514, - "learning_rate": 9.868078911788756e-06, - "loss": 0.9219, - "step": 298 - }, - { - "epoch": 0.10166609996599796, - "grad_norm": 2.5057546873897882, - "learning_rate": 9.866819136611492e-06, - "loss": 0.767, - "step": 299 - }, - { - "epoch": 0.10200612036722204, - "grad_norm": 2.355080917010462, - "learning_rate": 9.865553456081188e-06, - "loss": 0.7392, - "step": 300 - }, - { - "epoch": 0.1023461407684461, - "grad_norm": 2.1056856473326273, - "learning_rate": 9.864281871733608e-06, - "loss": 0.9198, - "step": 301 - }, - { - "epoch": 0.10268616116967018, - "grad_norm": 3.9555249729605917, - "learning_rate": 9.863004385111683e-06, - "loss": 0.7841, - "step": 302 - }, - { - "epoch": 0.10302618157089426, - "grad_norm": 2.2790309051441144, - "learning_rate": 9.8617209977655e-06, - "loss": 0.785, - "step": 303 - }, - { - "epoch": 0.10336620197211832, - "grad_norm": 2.117633945883531, - "learning_rate": 9.860431711252312e-06, - "loss": 0.8726, - "step": 304 - }, - { - "epoch": 0.1037062223733424, - "grad_norm": 2.5389939700471347, - "learning_rate": 9.859136527136525e-06, - "loss": 0.8982, - "step": 305 - }, - { - "epoch": 0.10404624277456648, - "grad_norm": 2.022741013697183, - "learning_rate": 9.857835446989708e-06, - "loss": 0.7907, - "step": 306 - }, - { - "epoch": 0.10438626317579054, - "grad_norm": 3.2009092222933715, - "learning_rate": 9.856528472390576e-06, - "loss": 0.6933, - "step": 307 - }, - { - "epoch": 0.10472628357701462, - "grad_norm": 2.2334333165472984, - "learning_rate": 9.855215604925e-06, - "loss": 0.8256, - "step": 308 - }, - { - "epoch": 0.1050663039782387, - "grad_norm": 2.7821304676829834, - "learning_rate": 9.853896846186e-06, - "loss": 0.8078, - "step": 309 - }, - { - "epoch": 0.10540632437946276, - "grad_norm": 1.960403992508658, - "learning_rate": 9.852572197773746e-06, - "loss": 0.7848, - "step": 310 - }, - { - "epoch": 0.10574634478068684, - "grad_norm": 2.0477907520683476, - "learning_rate": 9.851241661295558e-06, - "loss": 0.7813, - "step": 311 - }, - { - "epoch": 0.10608636518191092, - "grad_norm": 2.0911122056100164, - "learning_rate": 9.84990523836589e-06, - "loss": 0.8461, - "step": 312 - }, - { - "epoch": 0.10642638558313498, - "grad_norm": 2.0728170947217492, - "learning_rate": 9.848562930606353e-06, - "loss": 0.8832, - "step": 313 - }, - { - "epoch": 0.10676640598435906, - "grad_norm": 2.3673123838424, - "learning_rate": 9.847214739645684e-06, - "loss": 0.8177, - "step": 314 - }, - { - "epoch": 0.10710642638558314, - "grad_norm": 6.272345076835982, - "learning_rate": 9.845860667119769e-06, - "loss": 0.8795, - "step": 315 - }, - { - "epoch": 0.1074464467868072, - "grad_norm": 2.1579047581155466, - "learning_rate": 9.844500714671625e-06, - "loss": 0.7415, - "step": 316 - }, - { - "epoch": 0.10778646718803128, - "grad_norm": 1.9340927610624175, - "learning_rate": 9.843134883951405e-06, - "loss": 0.7208, - "step": 317 - }, - { - "epoch": 0.10812648758925536, - "grad_norm": 2.582635490684408, - "learning_rate": 9.8417631766164e-06, - "loss": 0.788, - "step": 318 - }, - { - "epoch": 0.10846650799047942, - "grad_norm": 2.5034059769021617, - "learning_rate": 9.840385594331022e-06, - "loss": 0.8107, - "step": 319 - }, - { - "epoch": 0.1088065283917035, - "grad_norm": 1.8885099464740926, - "learning_rate": 9.839002138766818e-06, - "loss": 0.8251, - "step": 320 - }, - { - "epoch": 0.10914654879292758, - "grad_norm": 2.1105158457858915, - "learning_rate": 9.837612811602462e-06, - "loss": 0.8193, - "step": 321 - }, - { - "epoch": 0.10948656919415165, - "grad_norm": 2.35579939491151, - "learning_rate": 9.836217614523747e-06, - "loss": 0.8366, - "step": 322 - }, - { - "epoch": 0.10982658959537572, - "grad_norm": 2.0838710319308174, - "learning_rate": 9.834816549223595e-06, - "loss": 0.8519, - "step": 323 - }, - { - "epoch": 0.1101666099965998, - "grad_norm": 1.9779789707319297, - "learning_rate": 9.833409617402044e-06, - "loss": 0.8505, - "step": 324 - }, - { - "epoch": 0.11050663039782387, - "grad_norm": 2.3766207833663784, - "learning_rate": 9.831996820766255e-06, - "loss": 0.8322, - "step": 325 - }, - { - "epoch": 0.11084665079904794, - "grad_norm": 2.1149686177574343, - "learning_rate": 9.830578161030498e-06, - "loss": 0.7337, - "step": 326 - }, - { - "epoch": 0.11118667120027202, - "grad_norm": 1.805489375141993, - "learning_rate": 9.829153639916162e-06, - "loss": 0.8365, - "step": 327 - }, - { - "epoch": 0.11152669160149609, - "grad_norm": 1.8967483785377255, - "learning_rate": 9.827723259151752e-06, - "loss": 0.8414, - "step": 328 - }, - { - "epoch": 0.11186671200272016, - "grad_norm": 1.797366163019355, - "learning_rate": 9.826287020472873e-06, - "loss": 0.7744, - "step": 329 - }, - { - "epoch": 0.11220673240394424, - "grad_norm": 2.0302882503199373, - "learning_rate": 9.82484492562225e-06, - "loss": 0.7147, - "step": 330 - }, - { - "epoch": 0.1125467528051683, - "grad_norm": 2.5063229156854723, - "learning_rate": 9.823396976349702e-06, - "loss": 0.9023, - "step": 331 - }, - { - "epoch": 0.11288677320639239, - "grad_norm": 1.8249477324783674, - "learning_rate": 9.821943174412159e-06, - "loss": 0.8074, - "step": 332 - }, - { - "epoch": 0.11322679360761646, - "grad_norm": 1.577389057439917, - "learning_rate": 9.82048352157365e-06, - "loss": 0.8605, - "step": 333 - }, - { - "epoch": 0.11356681400884053, - "grad_norm": 2.760876889647242, - "learning_rate": 9.819018019605306e-06, - "loss": 0.8667, - "step": 334 - }, - { - "epoch": 0.1139068344100646, - "grad_norm": 1.8963064743986586, - "learning_rate": 9.817546670285353e-06, - "loss": 0.7706, - "step": 335 - }, - { - "epoch": 0.11424685481128868, - "grad_norm": 2.2030659264137773, - "learning_rate": 9.816069475399113e-06, - "loss": 0.8123, - "step": 336 - }, - { - "epoch": 0.11458687521251275, - "grad_norm": 2.0354951728501685, - "learning_rate": 9.814586436738998e-06, - "loss": 0.8086, - "step": 337 - }, - { - "epoch": 0.11492689561373683, - "grad_norm": 1.9773497047211561, - "learning_rate": 9.813097556104514e-06, - "loss": 0.7746, - "step": 338 - }, - { - "epoch": 0.1152669160149609, - "grad_norm": 2.0274305580007628, - "learning_rate": 9.811602835302257e-06, - "loss": 0.8596, - "step": 339 - }, - { - "epoch": 0.11560693641618497, - "grad_norm": 1.8336317454746485, - "learning_rate": 9.810102276145907e-06, - "loss": 0.9853, - "step": 340 - }, - { - "epoch": 0.11594695681740905, - "grad_norm": 2.39198257850906, - "learning_rate": 9.808595880456226e-06, - "loss": 0.856, - "step": 341 - }, - { - "epoch": 0.11628697721863311, - "grad_norm": 2.245187215344967, - "learning_rate": 9.807083650061063e-06, - "loss": 0.8427, - "step": 342 - }, - { - "epoch": 0.11662699761985719, - "grad_norm": 2.972553693877441, - "learning_rate": 9.805565586795343e-06, - "loss": 0.7884, - "step": 343 - }, - { - "epoch": 0.11696701802108127, - "grad_norm": 2.1953868509755776, - "learning_rate": 9.804041692501071e-06, - "loss": 0.8486, - "step": 344 - }, - { - "epoch": 0.11730703842230533, - "grad_norm": 1.8166601796451167, - "learning_rate": 9.802511969027325e-06, - "loss": 0.8615, - "step": 345 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 1.9173045797680728, - "learning_rate": 9.800976418230257e-06, - "loss": 0.8542, - "step": 346 - }, - { - "epoch": 0.11798707922475349, - "grad_norm": 1.8134758747889816, - "learning_rate": 9.799435041973092e-06, - "loss": 0.8473, - "step": 347 - }, - { - "epoch": 0.11832709962597755, - "grad_norm": 4.312106023493087, - "learning_rate": 9.797887842126119e-06, - "loss": 0.8589, - "step": 348 - }, - { - "epoch": 0.11866712002720163, - "grad_norm": 2.0162211971459514, - "learning_rate": 9.796334820566697e-06, - "loss": 0.8679, - "step": 349 - }, - { - "epoch": 0.11900714042842571, - "grad_norm": 2.0564343236552873, - "learning_rate": 9.79477597917925e-06, - "loss": 0.8577, - "step": 350 - }, - { - "epoch": 0.11934716082964977, - "grad_norm": 2.133819909568558, - "learning_rate": 9.793211319855258e-06, - "loss": 0.8628, - "step": 351 - }, - { - "epoch": 0.11968718123087385, - "grad_norm": 1.9311795244885308, - "learning_rate": 9.791640844493267e-06, - "loss": 0.8469, - "step": 352 - }, - { - "epoch": 0.12002720163209793, - "grad_norm": 2.967519343303041, - "learning_rate": 9.790064554998875e-06, - "loss": 0.8362, - "step": 353 - }, - { - "epoch": 0.120367222033322, - "grad_norm": 4.208154882236529, - "learning_rate": 9.788482453284737e-06, - "loss": 0.9199, - "step": 354 - }, - { - "epoch": 0.12070724243454607, - "grad_norm": 2.617909166707279, - "learning_rate": 9.786894541270563e-06, - "loss": 0.7771, - "step": 355 - }, - { - "epoch": 0.12104726283577015, - "grad_norm": 1.8137174275503192, - "learning_rate": 9.785300820883108e-06, - "loss": 0.7556, - "step": 356 - }, - { - "epoch": 0.12138728323699421, - "grad_norm": 1.9432369984112166, - "learning_rate": 9.78370129405618e-06, - "loss": 0.8698, - "step": 357 - }, - { - "epoch": 0.12172730363821829, - "grad_norm": 2.5064308483815316, - "learning_rate": 9.782095962730628e-06, - "loss": 0.8607, - "step": 358 - }, - { - "epoch": 0.12206732403944237, - "grad_norm": 5.205967310399993, - "learning_rate": 9.780484828854346e-06, - "loss": 0.8711, - "step": 359 - }, - { - "epoch": 0.12240734444066644, - "grad_norm": 3.2761307002799462, - "learning_rate": 9.77886789438227e-06, - "loss": 0.8533, - "step": 360 - }, - { - "epoch": 0.12274736484189051, - "grad_norm": 1.8164118637880238, - "learning_rate": 9.777245161276372e-06, - "loss": 0.8407, - "step": 361 - }, - { - "epoch": 0.12308738524311459, - "grad_norm": 1.933148024538711, - "learning_rate": 9.775616631505663e-06, - "loss": 0.8629, - "step": 362 - }, - { - "epoch": 0.12342740564433866, - "grad_norm": 2.0021553446281395, - "learning_rate": 9.773982307046185e-06, - "loss": 0.7911, - "step": 363 - }, - { - "epoch": 0.12376742604556273, - "grad_norm": 2.4770275582280155, - "learning_rate": 9.772342189881012e-06, - "loss": 0.8485, - "step": 364 - }, - { - "epoch": 0.12410744644678681, - "grad_norm": 2.0858568863354474, - "learning_rate": 9.770696282000245e-06, - "loss": 0.8311, - "step": 365 - }, - { - "epoch": 0.12444746684801088, - "grad_norm": 1.8817539415546063, - "learning_rate": 9.769044585401017e-06, - "loss": 0.8769, - "step": 366 - }, - { - "epoch": 0.12478748724923495, - "grad_norm": 1.7596502920409212, - "learning_rate": 9.767387102087477e-06, - "loss": 0.8521, - "step": 367 - }, - { - "epoch": 0.12512750765045902, - "grad_norm": 4.783494590295243, - "learning_rate": 9.765723834070805e-06, - "loss": 0.8325, - "step": 368 - }, - { - "epoch": 0.1254675280516831, - "grad_norm": 4.166385331832633, - "learning_rate": 9.764054783369191e-06, - "loss": 0.862, - "step": 369 - }, - { - "epoch": 0.12580754845290718, - "grad_norm": 2.460165725461386, - "learning_rate": 9.762379952007847e-06, - "loss": 0.8256, - "step": 370 - }, - { - "epoch": 0.12614756885413125, - "grad_norm": 2.066282248436767, - "learning_rate": 9.760699342018997e-06, - "loss": 0.8975, - "step": 371 - }, - { - "epoch": 0.12648758925535533, - "grad_norm": 2.4732231692866793, - "learning_rate": 9.759012955441877e-06, - "loss": 0.8474, - "step": 372 - }, - { - "epoch": 0.12682760965657938, - "grad_norm": 2.1370791962949034, - "learning_rate": 9.757320794322736e-06, - "loss": 0.8541, - "step": 373 - }, - { - "epoch": 0.12716763005780346, - "grad_norm": 2.545444534776287, - "learning_rate": 9.755622860714824e-06, - "loss": 0.7436, - "step": 374 - }, - { - "epoch": 0.12750765045902754, - "grad_norm": 4.486941686450132, - "learning_rate": 9.753919156678397e-06, - "loss": 0.9077, - "step": 375 - }, - { - "epoch": 0.12784767086025162, - "grad_norm": 2.3634783435481004, - "learning_rate": 9.752209684280717e-06, - "loss": 0.9126, - "step": 376 - }, - { - "epoch": 0.1281876912614757, - "grad_norm": 2.155370098640075, - "learning_rate": 9.750494445596039e-06, - "loss": 0.9266, - "step": 377 - }, - { - "epoch": 0.12852771166269977, - "grad_norm": 3.380923933190232, - "learning_rate": 9.748773442705617e-06, - "loss": 0.859, - "step": 378 - }, - { - "epoch": 0.12886773206392382, - "grad_norm": 2.0842951308423805, - "learning_rate": 9.747046677697703e-06, - "loss": 0.8092, - "step": 379 - }, - { - "epoch": 0.1292077524651479, - "grad_norm": 2.2756970171219026, - "learning_rate": 9.745314152667532e-06, - "loss": 0.8278, - "step": 380 - }, - { - "epoch": 0.12954777286637198, - "grad_norm": 2.224154665108581, - "learning_rate": 9.743575869717343e-06, - "loss": 0.9301, - "step": 381 - }, - { - "epoch": 0.12988779326759606, - "grad_norm": 1.8618826446938581, - "learning_rate": 9.741831830956344e-06, - "loss": 0.7506, - "step": 382 - }, - { - "epoch": 0.13022781366882014, - "grad_norm": 2.359471897466931, - "learning_rate": 9.740082038500738e-06, - "loss": 0.8335, - "step": 383 - }, - { - "epoch": 0.13056783407004421, - "grad_norm": 2.2530265023397003, - "learning_rate": 9.738326494473708e-06, - "loss": 0.6634, - "step": 384 - }, - { - "epoch": 0.13090785447126826, - "grad_norm": 3.4483730372310313, - "learning_rate": 9.736565201005415e-06, - "loss": 0.796, - "step": 385 - }, - { - "epoch": 0.13124787487249234, - "grad_norm": 2.0533540536094264, - "learning_rate": 9.734798160232994e-06, - "loss": 0.7612, - "step": 386 - }, - { - "epoch": 0.13158789527371642, - "grad_norm": 2.0190748769698907, - "learning_rate": 9.733025374300556e-06, - "loss": 0.9188, - "step": 387 - }, - { - "epoch": 0.1319279156749405, - "grad_norm": 3.434702747485103, - "learning_rate": 9.731246845359187e-06, - "loss": 0.7621, - "step": 388 - }, - { - "epoch": 0.13226793607616458, - "grad_norm": 2.418994252917101, - "learning_rate": 9.729462575566931e-06, - "loss": 0.8452, - "step": 389 - }, - { - "epoch": 0.13260795647738866, - "grad_norm": 3.0894287674893337, - "learning_rate": 9.727672567088809e-06, - "loss": 0.9177, - "step": 390 - }, - { - "epoch": 0.1329479768786127, - "grad_norm": 2.087903312379049, - "learning_rate": 9.725876822096798e-06, - "loss": 0.8296, - "step": 391 - }, - { - "epoch": 0.13328799727983678, - "grad_norm": 1.9043558027810883, - "learning_rate": 9.724075342769841e-06, - "loss": 0.889, - "step": 392 - }, - { - "epoch": 0.13362801768106086, - "grad_norm": 2.455862323906618, - "learning_rate": 9.722268131293835e-06, - "loss": 0.7897, - "step": 393 - }, - { - "epoch": 0.13396803808228494, - "grad_norm": 2.0215817915535346, - "learning_rate": 9.720455189861634e-06, - "loss": 0.8734, - "step": 394 - }, - { - "epoch": 0.13430805848350902, - "grad_norm": 2.6158996648329347, - "learning_rate": 9.718636520673042e-06, - "loss": 0.7503, - "step": 395 - }, - { - "epoch": 0.1346480788847331, - "grad_norm": 1.9769125457115064, - "learning_rate": 9.716812125934818e-06, - "loss": 0.8262, - "step": 396 - }, - { - "epoch": 0.13498809928595715, - "grad_norm": 2.1779733445105514, - "learning_rate": 9.714982007860666e-06, - "loss": 0.859, - "step": 397 - }, - { - "epoch": 0.13532811968718123, - "grad_norm": 1.831021062504083, - "learning_rate": 9.713146168671229e-06, - "loss": 0.7766, - "step": 398 - }, - { - "epoch": 0.1356681400884053, - "grad_norm": 2.8959475128601224, - "learning_rate": 9.711304610594104e-06, - "loss": 0.7802, - "step": 399 - }, - { - "epoch": 0.13600816048962938, - "grad_norm": 2.1321321783640004, - "learning_rate": 9.709457335863815e-06, - "loss": 0.7941, - "step": 400 - }, - { - "epoch": 0.13634818089085346, - "grad_norm": 1.9938357040475916, - "learning_rate": 9.707604346721833e-06, - "loss": 0.8127, - "step": 401 - }, - { - "epoch": 0.13668820129207754, - "grad_norm": 1.7330981671482506, - "learning_rate": 9.705745645416553e-06, - "loss": 0.7544, - "step": 402 - }, - { - "epoch": 0.1370282216933016, - "grad_norm": 2.584162403732432, - "learning_rate": 9.703881234203309e-06, - "loss": 0.843, - "step": 403 - }, - { - "epoch": 0.13736824209452567, - "grad_norm": 2.3450929065189596, - "learning_rate": 9.702011115344359e-06, - "loss": 0.8568, - "step": 404 - }, - { - "epoch": 0.13770826249574974, - "grad_norm": 1.8906128272198697, - "learning_rate": 9.70013529110889e-06, - "loss": 0.7954, - "step": 405 - }, - { - "epoch": 0.13804828289697382, - "grad_norm": 2.005544234328828, - "learning_rate": 9.698253763773005e-06, - "loss": 0.8552, - "step": 406 - }, - { - "epoch": 0.1383883032981979, - "grad_norm": 2.1791404886973083, - "learning_rate": 9.696366535619735e-06, - "loss": 0.7682, - "step": 407 - }, - { - "epoch": 0.13872832369942195, - "grad_norm": 2.1254255310117216, - "learning_rate": 9.694473608939024e-06, - "loss": 0.9305, - "step": 408 - }, - { - "epoch": 0.13906834410064603, - "grad_norm": 2.065032167374917, - "learning_rate": 9.692574986027733e-06, - "loss": 0.8186, - "step": 409 - }, - { - "epoch": 0.1394083645018701, - "grad_norm": 1.976786103840094, - "learning_rate": 9.690670669189632e-06, - "loss": 0.9253, - "step": 410 - }, - { - "epoch": 0.13974838490309419, - "grad_norm": 9.33711461350008, - "learning_rate": 9.688760660735403e-06, - "loss": 0.8761, - "step": 411 - }, - { - "epoch": 0.14008840530431826, - "grad_norm": 2.0857389613573027, - "learning_rate": 9.68684496298263e-06, - "loss": 0.8343, - "step": 412 - }, - { - "epoch": 0.14042842570554234, - "grad_norm": 2.496185713769945, - "learning_rate": 9.684923578255806e-06, - "loss": 0.8012, - "step": 413 - }, - { - "epoch": 0.1407684461067664, - "grad_norm": 3.227539517727669, - "learning_rate": 9.682996508886318e-06, - "loss": 0.8353, - "step": 414 - }, - { - "epoch": 0.14110846650799047, - "grad_norm": 2.713651534237373, - "learning_rate": 9.681063757212455e-06, - "loss": 0.7775, - "step": 415 - }, - { - "epoch": 0.14144848690921455, - "grad_norm": 1.6905920998598611, - "learning_rate": 9.679125325579402e-06, - "loss": 0.79, - "step": 416 - }, - { - "epoch": 0.14178850731043863, - "grad_norm": 3.2275016714057947, - "learning_rate": 9.67718121633923e-06, - "loss": 0.8604, - "step": 417 - }, - { - "epoch": 0.1421285277116627, - "grad_norm": 2.6920966680993503, - "learning_rate": 9.675231431850907e-06, - "loss": 0.793, - "step": 418 - }, - { - "epoch": 0.14246854811288678, - "grad_norm": 2.6498430598374583, - "learning_rate": 9.673275974480282e-06, - "loss": 0.9103, - "step": 419 - }, - { - "epoch": 0.14280856851411083, - "grad_norm": 2.7892979782736864, - "learning_rate": 9.671314846600088e-06, - "loss": 0.826, - "step": 420 - }, - { - "epoch": 0.1431485889153349, - "grad_norm": 1.7387600466875632, - "learning_rate": 9.66934805058994e-06, - "loss": 0.7522, - "step": 421 - }, - { - "epoch": 0.143488609316559, - "grad_norm": 1.9252648709916258, - "learning_rate": 9.667375588836329e-06, - "loss": 0.9249, - "step": 422 - }, - { - "epoch": 0.14382862971778307, - "grad_norm": 2.0337042623648784, - "learning_rate": 9.665397463732623e-06, - "loss": 0.832, - "step": 423 - }, - { - "epoch": 0.14416865011900715, - "grad_norm": 2.355326076923748, - "learning_rate": 9.66341367767906e-06, - "loss": 0.7972, - "step": 424 - }, - { - "epoch": 0.14450867052023122, - "grad_norm": 4.252381455801173, - "learning_rate": 9.661424233082748e-06, - "loss": 0.8571, - "step": 425 - }, - { - "epoch": 0.14484869092145528, - "grad_norm": 2.2203148724753503, - "learning_rate": 9.65942913235766e-06, - "loss": 0.8049, - "step": 426 - }, - { - "epoch": 0.14518871132267935, - "grad_norm": 2.250620096493986, - "learning_rate": 9.657428377924632e-06, - "loss": 0.8665, - "step": 427 - }, - { - "epoch": 0.14552873172390343, - "grad_norm": 1.7641375389756913, - "learning_rate": 9.655421972211362e-06, - "loss": 0.8509, - "step": 428 - }, - { - "epoch": 0.1458687521251275, - "grad_norm": 1.917741070851843, - "learning_rate": 9.653409917652406e-06, - "loss": 0.8852, - "step": 429 - }, - { - "epoch": 0.1462087725263516, - "grad_norm": 2.1235005797818425, - "learning_rate": 9.651392216689167e-06, - "loss": 0.939, - "step": 430 - }, - { - "epoch": 0.14654879292757567, - "grad_norm": 2.423393500928274, - "learning_rate": 9.649368871769908e-06, - "loss": 0.7891, - "step": 431 - }, - { - "epoch": 0.14688881332879972, - "grad_norm": 2.29836823405829, - "learning_rate": 9.647339885349736e-06, - "loss": 0.8961, - "step": 432 - }, - { - "epoch": 0.1472288337300238, - "grad_norm": 3.041323557001672, - "learning_rate": 9.645305259890606e-06, - "loss": 0.6884, - "step": 433 - }, - { - "epoch": 0.14756885413124787, - "grad_norm": 2.1635297868724486, - "learning_rate": 9.643264997861312e-06, - "loss": 0.8554, - "step": 434 - }, - { - "epoch": 0.14790887453247195, - "grad_norm": 2.0623308876529896, - "learning_rate": 9.641219101737489e-06, - "loss": 0.8993, - "step": 435 - }, - { - "epoch": 0.14824889493369603, - "grad_norm": 1.9701367858930552, - "learning_rate": 9.639167574001608e-06, - "loss": 0.7581, - "step": 436 - }, - { - "epoch": 0.1485889153349201, - "grad_norm": 2.2500245249695365, - "learning_rate": 9.637110417142975e-06, - "loss": 0.7519, - "step": 437 - }, - { - "epoch": 0.14892893573614416, - "grad_norm": 2.2284060545707187, - "learning_rate": 9.635047633657723e-06, - "loss": 0.9183, - "step": 438 - }, - { - "epoch": 0.14926895613736824, - "grad_norm": 2.286832136889049, - "learning_rate": 9.632979226048816e-06, - "loss": 0.8386, - "step": 439 - }, - { - "epoch": 0.14960897653859231, - "grad_norm": 2.2951771970261143, - "learning_rate": 9.630905196826039e-06, - "loss": 0.8065, - "step": 440 - }, - { - "epoch": 0.1499489969398164, - "grad_norm": 2.131373688797792, - "learning_rate": 9.628825548506002e-06, - "loss": 0.7767, - "step": 441 - }, - { - "epoch": 0.15028901734104047, - "grad_norm": 2.091224107238558, - "learning_rate": 9.62674028361213e-06, - "loss": 0.8655, - "step": 442 - }, - { - "epoch": 0.15062903774226455, - "grad_norm": 3.2172828773538993, - "learning_rate": 9.624649404674661e-06, - "loss": 0.9147, - "step": 443 - }, - { - "epoch": 0.1509690581434886, - "grad_norm": 2.025151440715302, - "learning_rate": 9.622552914230655e-06, - "loss": 0.9121, - "step": 444 - }, - { - "epoch": 0.15130907854471268, - "grad_norm": 1.9528957154547468, - "learning_rate": 9.620450814823966e-06, - "loss": 0.8995, - "step": 445 - }, - { - "epoch": 0.15164909894593676, - "grad_norm": 2.2336331464426245, - "learning_rate": 9.618343109005266e-06, - "loss": 0.7953, - "step": 446 - }, - { - "epoch": 0.15198911934716083, - "grad_norm": 2.247828787686121, - "learning_rate": 9.616229799332026e-06, - "loss": 0.9126, - "step": 447 - }, - { - "epoch": 0.1523291397483849, - "grad_norm": 3.0710622235399967, - "learning_rate": 9.614110888368515e-06, - "loss": 0.7671, - "step": 448 - }, - { - "epoch": 0.152669160149609, - "grad_norm": 2.240205069427427, - "learning_rate": 9.6119863786858e-06, - "loss": 0.8534, - "step": 449 - }, - { - "epoch": 0.15300918055083304, - "grad_norm": 2.3440964305088374, - "learning_rate": 9.609856272861742e-06, - "loss": 0.8859, - "step": 450 - }, - { - "epoch": 0.15334920095205712, - "grad_norm": 2.947306903901322, - "learning_rate": 9.607720573480991e-06, - "loss": 0.8971, - "step": 451 - }, - { - "epoch": 0.1536892213532812, - "grad_norm": 1.9814248823269929, - "learning_rate": 9.605579283134985e-06, - "loss": 0.8666, - "step": 452 - }, - { - "epoch": 0.15402924175450527, - "grad_norm": 1.9414054426785363, - "learning_rate": 9.603432404421947e-06, - "loss": 0.83, - "step": 453 - }, - { - "epoch": 0.15436926215572935, - "grad_norm": 1.9508100977088108, - "learning_rate": 9.601279939946874e-06, - "loss": 0.7941, - "step": 454 - }, - { - "epoch": 0.1547092825569534, - "grad_norm": 2.738798819972234, - "learning_rate": 9.599121892321554e-06, - "loss": 0.7554, - "step": 455 - }, - { - "epoch": 0.15504930295817748, - "grad_norm": 2.2388309465744274, - "learning_rate": 9.59695826416454e-06, - "loss": 0.8047, - "step": 456 - }, - { - "epoch": 0.15538932335940156, - "grad_norm": 1.9102221923547757, - "learning_rate": 9.594789058101154e-06, - "loss": 0.7742, - "step": 457 - }, - { - "epoch": 0.15572934376062564, - "grad_norm": 2.176425662506411, - "learning_rate": 9.592614276763494e-06, - "loss": 0.8392, - "step": 458 - }, - { - "epoch": 0.15606936416184972, - "grad_norm": 2.075696818549719, - "learning_rate": 9.590433922790418e-06, - "loss": 0.8328, - "step": 459 - }, - { - "epoch": 0.1564093845630738, - "grad_norm": 2.45612251063091, - "learning_rate": 9.58824799882755e-06, - "loss": 0.8015, - "step": 460 - }, - { - "epoch": 0.15674940496429784, - "grad_norm": 1.8178552528317342, - "learning_rate": 9.586056507527266e-06, - "loss": 0.9039, - "step": 461 - }, - { - "epoch": 0.15708942536552192, - "grad_norm": 2.124214107555732, - "learning_rate": 9.583859451548703e-06, - "loss": 0.8113, - "step": 462 - }, - { - "epoch": 0.157429445766746, - "grad_norm": 1.5843205381627385, - "learning_rate": 9.581656833557749e-06, - "loss": 0.8248, - "step": 463 - }, - { - "epoch": 0.15776946616797008, - "grad_norm": 1.9306313527246615, - "learning_rate": 9.57944865622704e-06, - "loss": 0.7318, - "step": 464 - }, - { - "epoch": 0.15810948656919416, - "grad_norm": 2.9413967318596943, - "learning_rate": 9.577234922235954e-06, - "loss": 0.8524, - "step": 465 - }, - { - "epoch": 0.15844950697041824, - "grad_norm": 1.571426184030293, - "learning_rate": 9.575015634270619e-06, - "loss": 0.9224, - "step": 466 - }, - { - "epoch": 0.1587895273716423, - "grad_norm": 1.8730771122977774, - "learning_rate": 9.5727907950239e-06, - "loss": 0.7957, - "step": 467 - }, - { - "epoch": 0.15912954777286636, - "grad_norm": 2.376107493345504, - "learning_rate": 9.570560407195392e-06, - "loss": 0.7542, - "step": 468 - }, - { - "epoch": 0.15946956817409044, - "grad_norm": 1.9384094700182535, - "learning_rate": 9.568324473491431e-06, - "loss": 0.7407, - "step": 469 - }, - { - "epoch": 0.15980958857531452, - "grad_norm": 1.7937843614169016, - "learning_rate": 9.566082996625072e-06, - "loss": 0.7993, - "step": 470 - }, - { - "epoch": 0.1601496089765386, - "grad_norm": 3.3373628850176127, - "learning_rate": 9.56383597931611e-06, - "loss": 0.7848, - "step": 471 - }, - { - "epoch": 0.16048962937776268, - "grad_norm": 2.1659690728359697, - "learning_rate": 9.561583424291048e-06, - "loss": 0.8287, - "step": 472 - }, - { - "epoch": 0.16082964977898673, - "grad_norm": 2.2389421306989234, - "learning_rate": 9.55932533428312e-06, - "loss": 0.7587, - "step": 473 - }, - { - "epoch": 0.1611696701802108, - "grad_norm": 2.818398736688453, - "learning_rate": 9.557061712032269e-06, - "loss": 0.8222, - "step": 474 - }, - { - "epoch": 0.16150969058143488, - "grad_norm": 2.441252664404201, - "learning_rate": 9.554792560285152e-06, - "loss": 0.734, - "step": 475 - }, - { - "epoch": 0.16184971098265896, - "grad_norm": 2.295687270495865, - "learning_rate": 9.552517881795142e-06, - "loss": 0.8626, - "step": 476 - }, - { - "epoch": 0.16218973138388304, - "grad_norm": 13.760896349921383, - "learning_rate": 9.550237679322308e-06, - "loss": 0.8463, - "step": 477 - }, - { - "epoch": 0.16252975178510712, - "grad_norm": 2.1715258052291047, - "learning_rate": 9.547951955633428e-06, - "loss": 0.7491, - "step": 478 - }, - { - "epoch": 0.16286977218633117, - "grad_norm": 2.2255593479400906, - "learning_rate": 9.545660713501975e-06, - "loss": 0.9064, - "step": 479 - }, - { - "epoch": 0.16320979258755525, - "grad_norm": 2.21806151643282, - "learning_rate": 9.543363955708124e-06, - "loss": 0.8289, - "step": 480 - }, - { - "epoch": 0.16354981298877933, - "grad_norm": 2.188397294600766, - "learning_rate": 9.541061685038742e-06, - "loss": 0.8429, - "step": 481 - }, - { - "epoch": 0.1638898333900034, - "grad_norm": 2.166972985318867, - "learning_rate": 9.538753904287376e-06, - "loss": 0.9443, - "step": 482 - }, - { - "epoch": 0.16422985379122748, - "grad_norm": 2.455788846295091, - "learning_rate": 9.53644061625427e-06, - "loss": 0.7398, - "step": 483 - }, - { - "epoch": 0.16456987419245156, - "grad_norm": 2.4033301722625886, - "learning_rate": 9.534121823746348e-06, - "loss": 0.8728, - "step": 484 - }, - { - "epoch": 0.1649098945936756, - "grad_norm": 2.2276672135131634, - "learning_rate": 9.531797529577205e-06, - "loss": 0.9371, - "step": 485 - }, - { - "epoch": 0.1652499149948997, - "grad_norm": 2.434555018080122, - "learning_rate": 9.529467736567124e-06, - "loss": 0.9057, - "step": 486 - }, - { - "epoch": 0.16558993539612377, - "grad_norm": 2.427937801753027, - "learning_rate": 9.527132447543051e-06, - "loss": 0.8455, - "step": 487 - }, - { - "epoch": 0.16592995579734784, - "grad_norm": 3.4006562033751817, - "learning_rate": 9.524791665338606e-06, - "loss": 0.8247, - "step": 488 - }, - { - "epoch": 0.16626997619857192, - "grad_norm": 2.7613015303421466, - "learning_rate": 9.522445392794069e-06, - "loss": 0.8169, - "step": 489 - }, - { - "epoch": 0.166609996599796, - "grad_norm": 2.6236255693220323, - "learning_rate": 9.520093632756388e-06, - "loss": 0.7666, - "step": 490 - }, - { - "epoch": 0.16695001700102005, - "grad_norm": 2.389219152903732, - "learning_rate": 9.517736388079169e-06, - "loss": 0.8067, - "step": 491 - }, - { - "epoch": 0.16729003740224413, - "grad_norm": 1.7505382569470098, - "learning_rate": 9.515373661622665e-06, - "loss": 0.8714, - "step": 492 - }, - { - "epoch": 0.1676300578034682, - "grad_norm": 2.0417205501733795, - "learning_rate": 9.51300545625379e-06, - "loss": 0.8, - "step": 493 - }, - { - "epoch": 0.16797007820469229, - "grad_norm": 2.245402590882533, - "learning_rate": 9.510631774846099e-06, - "loss": 0.762, - "step": 494 - }, - { - "epoch": 0.16831009860591636, - "grad_norm": 2.82928636204283, - "learning_rate": 9.5082526202798e-06, - "loss": 0.6621, - "step": 495 - }, - { - "epoch": 0.16865011900714044, - "grad_norm": 2.387297511941106, - "learning_rate": 9.505867995441734e-06, - "loss": 0.8231, - "step": 496 - }, - { - "epoch": 0.1689901394083645, - "grad_norm": 3.8013261588105927, - "learning_rate": 9.503477903225382e-06, - "loss": 0.8885, - "step": 497 - }, - { - "epoch": 0.16933015980958857, - "grad_norm": 1.9582651742957375, - "learning_rate": 9.501082346530864e-06, - "loss": 0.7235, - "step": 498 - }, - { - "epoch": 0.16967018021081265, - "grad_norm": 2.0197184013240497, - "learning_rate": 9.498681328264919e-06, - "loss": 0.8888, - "step": 499 - }, - { - "epoch": 0.17001020061203673, - "grad_norm": 1.9383228814810216, - "learning_rate": 9.496274851340926e-06, - "loss": 0.7643, - "step": 500 - }, - { - "epoch": 0.1703502210132608, - "grad_norm": 2.2775734934604728, - "learning_rate": 9.49386291867888e-06, - "loss": 0.7317, - "step": 501 - }, - { - "epoch": 0.17069024141448486, - "grad_norm": 1.6625027584786152, - "learning_rate": 9.491445533205397e-06, - "loss": 0.8367, - "step": 502 - }, - { - "epoch": 0.17103026181570893, - "grad_norm": 2.1475307975125113, - "learning_rate": 9.48902269785371e-06, - "loss": 0.8449, - "step": 503 - }, - { - "epoch": 0.171370282216933, - "grad_norm": 2.347052710314186, - "learning_rate": 9.486594415563665e-06, - "loss": 0.867, - "step": 504 - }, - { - "epoch": 0.1717103026181571, - "grad_norm": 1.6939167607337662, - "learning_rate": 9.484160689281718e-06, - "loss": 0.8089, - "step": 505 - }, - { - "epoch": 0.17205032301938117, - "grad_norm": 2.7074248206478786, - "learning_rate": 9.48172152196093e-06, - "loss": 0.9275, - "step": 506 - }, - { - "epoch": 0.17239034342060525, - "grad_norm": 1.9623542563803935, - "learning_rate": 9.47927691656096e-06, - "loss": 0.7276, - "step": 507 - }, - { - "epoch": 0.1727303638218293, - "grad_norm": 2.2577590094858433, - "learning_rate": 9.476826876048076e-06, - "loss": 0.8322, - "step": 508 - }, - { - "epoch": 0.17307038422305338, - "grad_norm": 2.610689837249583, - "learning_rate": 9.474371403395129e-06, - "loss": 0.7989, - "step": 509 - }, - { - "epoch": 0.17341040462427745, - "grad_norm": 2.981168203823366, - "learning_rate": 9.47191050158157e-06, - "loss": 0.8787, - "step": 510 - }, - { - "epoch": 0.17375042502550153, - "grad_norm": 2.0910514651851937, - "learning_rate": 9.469444173593433e-06, - "loss": 0.8342, - "step": 511 - }, - { - "epoch": 0.1740904454267256, - "grad_norm": 2.327443732050833, - "learning_rate": 9.466972422423338e-06, - "loss": 0.7471, - "step": 512 - }, - { - "epoch": 0.1744304658279497, - "grad_norm": 1.8968205078548026, - "learning_rate": 9.464495251070483e-06, - "loss": 0.8071, - "step": 513 - }, - { - "epoch": 0.17477048622917374, - "grad_norm": 5.35775514905284, - "learning_rate": 9.462012662540645e-06, - "loss": 0.7672, - "step": 514 - }, - { - "epoch": 0.17511050663039782, - "grad_norm": 1.8205374867928585, - "learning_rate": 9.459524659846176e-06, - "loss": 0.8094, - "step": 515 - }, - { - "epoch": 0.1754505270316219, - "grad_norm": 2.70503593128861, - "learning_rate": 9.457031246005994e-06, - "loss": 0.8121, - "step": 516 - }, - { - "epoch": 0.17579054743284597, - "grad_norm": 2.097674274994687, - "learning_rate": 9.454532424045585e-06, - "loss": 0.7831, - "step": 517 - }, - { - "epoch": 0.17613056783407005, - "grad_norm": 2.541108967039887, - "learning_rate": 9.452028196996994e-06, - "loss": 0.7744, - "step": 518 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 2.448989357418417, - "learning_rate": 9.449518567898827e-06, - "loss": 0.8201, - "step": 519 - }, - { - "epoch": 0.17681060863651818, - "grad_norm": 2.4183579990716972, - "learning_rate": 9.44700353979625e-06, - "loss": 0.9491, - "step": 520 - }, - { - "epoch": 0.17715062903774226, - "grad_norm": 2.0371443097616204, - "learning_rate": 9.444483115740968e-06, - "loss": 0.8665, - "step": 521 - }, - { - "epoch": 0.17749064943896634, - "grad_norm": 2.417951434607015, - "learning_rate": 9.441957298791243e-06, - "loss": 0.7236, - "step": 522 - }, - { - "epoch": 0.17783066984019041, - "grad_norm": 3.2234895274833817, - "learning_rate": 9.439426092011877e-06, - "loss": 0.8275, - "step": 523 - }, - { - "epoch": 0.1781706902414145, - "grad_norm": 2.6553913479919773, - "learning_rate": 9.436889498474213e-06, - "loss": 0.8412, - "step": 524 - }, - { - "epoch": 0.17851071064263857, - "grad_norm": 3.1030470159238392, - "learning_rate": 9.434347521256131e-06, - "loss": 0.832, - "step": 525 - }, - { - "epoch": 0.17885073104386262, - "grad_norm": 1.8409627332088008, - "learning_rate": 9.431800163442043e-06, - "loss": 0.843, - "step": 526 - }, - { - "epoch": 0.1791907514450867, - "grad_norm": 3.186355638430994, - "learning_rate": 9.429247428122886e-06, - "loss": 0.707, - "step": 527 - }, - { - "epoch": 0.17953077184631078, - "grad_norm": 2.0444710139532516, - "learning_rate": 9.426689318396128e-06, - "loss": 0.8321, - "step": 528 - }, - { - "epoch": 0.17987079224753486, - "grad_norm": 1.7029237292350985, - "learning_rate": 9.424125837365754e-06, - "loss": 0.8387, - "step": 529 - }, - { - "epoch": 0.18021081264875893, - "grad_norm": 2.2003972951001427, - "learning_rate": 9.42155698814227e-06, - "loss": 0.7516, - "step": 530 - }, - { - "epoch": 0.180550833049983, - "grad_norm": 2.557957310252339, - "learning_rate": 9.41898277384269e-06, - "loss": 0.8797, - "step": 531 - }, - { - "epoch": 0.18089085345120706, - "grad_norm": 2.895779678922211, - "learning_rate": 9.416403197590547e-06, - "loss": 0.823, - "step": 532 - }, - { - "epoch": 0.18123087385243114, - "grad_norm": 2.0167366993376947, - "learning_rate": 9.41381826251587e-06, - "loss": 0.9268, - "step": 533 - }, - { - "epoch": 0.18157089425365522, - "grad_norm": 2.2879605857864265, - "learning_rate": 9.411227971755197e-06, - "loss": 0.9309, - "step": 534 - }, - { - "epoch": 0.1819109146548793, - "grad_norm": 5.454574823683741, - "learning_rate": 9.408632328451565e-06, - "loss": 0.8586, - "step": 535 - }, - { - "epoch": 0.18225093505610337, - "grad_norm": 2.145392976793967, - "learning_rate": 9.4060313357545e-06, - "loss": 0.8267, - "step": 536 - }, - { - "epoch": 0.18259095545732745, - "grad_norm": 2.4134835759822377, - "learning_rate": 9.403424996820024e-06, - "loss": 0.8951, - "step": 537 - }, - { - "epoch": 0.1829309758585515, - "grad_norm": 2.490795625384553, - "learning_rate": 9.400813314810644e-06, - "loss": 0.8217, - "step": 538 - }, - { - "epoch": 0.18327099625977558, - "grad_norm": 4.105744879893649, - "learning_rate": 9.39819629289535e-06, - "loss": 0.7641, - "step": 539 - }, - { - "epoch": 0.18361101666099966, - "grad_norm": 2.663813984562291, - "learning_rate": 9.395573934249614e-06, - "loss": 0.8811, - "step": 540 - }, - { - "epoch": 0.18395103706222374, - "grad_norm": 2.3379526523751037, - "learning_rate": 9.392946242055379e-06, - "loss": 0.8157, - "step": 541 - }, - { - "epoch": 0.18429105746344782, - "grad_norm": 2.460601877280579, - "learning_rate": 9.390313219501061e-06, - "loss": 0.8666, - "step": 542 - }, - { - "epoch": 0.18463107786467187, - "grad_norm": 2.4088692423050144, - "learning_rate": 9.38767486978155e-06, - "loss": 0.876, - "step": 543 - }, - { - "epoch": 0.18497109826589594, - "grad_norm": 1.5738286525981031, - "learning_rate": 9.385031196098194e-06, - "loss": 0.7488, - "step": 544 - }, - { - "epoch": 0.18531111866712002, - "grad_norm": 4.532392662900621, - "learning_rate": 9.3823822016588e-06, - "loss": 0.8693, - "step": 545 - }, - { - "epoch": 0.1856511390683441, - "grad_norm": 2.237189674784233, - "learning_rate": 9.379727889677632e-06, - "loss": 0.8958, - "step": 546 - }, - { - "epoch": 0.18599115946956818, - "grad_norm": 1.554107484360944, - "learning_rate": 9.377068263375411e-06, - "loss": 0.7866, - "step": 547 - }, - { - "epoch": 0.18633117987079226, - "grad_norm": 2.4467881402130125, - "learning_rate": 9.374403325979301e-06, - "loss": 0.8856, - "step": 548 - }, - { - "epoch": 0.1866712002720163, - "grad_norm": 2.4507416677378213, - "learning_rate": 9.371733080722911e-06, - "loss": 0.7532, - "step": 549 - }, - { - "epoch": 0.18701122067324039, - "grad_norm": 1.427343424523804, - "learning_rate": 9.369057530846294e-06, - "loss": 0.8418, - "step": 550 - }, - { - "epoch": 0.18735124107446446, - "grad_norm": 3.24841097172593, - "learning_rate": 9.366376679595936e-06, - "loss": 0.8738, - "step": 551 - }, - { - "epoch": 0.18769126147568854, - "grad_norm": 1.8294498367391419, - "learning_rate": 9.363690530224757e-06, - "loss": 0.9536, - "step": 552 - }, - { - "epoch": 0.18803128187691262, - "grad_norm": 3.147835292974788, - "learning_rate": 9.360999085992106e-06, - "loss": 0.9387, - "step": 553 - }, - { - "epoch": 0.1883713022781367, - "grad_norm": 1.7595455512097826, - "learning_rate": 9.358302350163758e-06, - "loss": 0.893, - "step": 554 - }, - { - "epoch": 0.18871132267936075, - "grad_norm": 2.0297149767885587, - "learning_rate": 9.355600326011903e-06, - "loss": 0.8648, - "step": 555 - }, - { - "epoch": 0.18905134308058483, - "grad_norm": 2.669731699705077, - "learning_rate": 9.352893016815155e-06, - "loss": 0.8835, - "step": 556 - }, - { - "epoch": 0.1893913634818089, - "grad_norm": 5.446794892812443, - "learning_rate": 9.350180425858538e-06, - "loss": 0.7767, - "step": 557 - }, - { - "epoch": 0.18973138388303298, - "grad_norm": 1.8268464771652575, - "learning_rate": 9.347462556433483e-06, - "loss": 0.7565, - "step": 558 - }, - { - "epoch": 0.19007140428425706, - "grad_norm": 1.642166985619522, - "learning_rate": 9.34473941183783e-06, - "loss": 0.8424, - "step": 559 - }, - { - "epoch": 0.19041142468548114, - "grad_norm": 2.0901831217312625, - "learning_rate": 9.342010995375811e-06, - "loss": 0.7805, - "step": 560 - }, - { - "epoch": 0.1907514450867052, - "grad_norm": 1.90871026730968, - "learning_rate": 9.33927731035807e-06, - "loss": 0.7668, - "step": 561 - }, - { - "epoch": 0.19109146548792927, - "grad_norm": 3.4307001624182316, - "learning_rate": 9.336538360101631e-06, - "loss": 0.8382, - "step": 562 - }, - { - "epoch": 0.19143148588915335, - "grad_norm": 2.393265808564416, - "learning_rate": 9.333794147929907e-06, - "loss": 0.7788, - "step": 563 - }, - { - "epoch": 0.19177150629037742, - "grad_norm": 2.022566960473697, - "learning_rate": 9.331044677172705e-06, - "loss": 0.744, - "step": 564 - }, - { - "epoch": 0.1921115266916015, - "grad_norm": 3.3062922036782836, - "learning_rate": 9.328289951166205e-06, - "loss": 0.8229, - "step": 565 - }, - { - "epoch": 0.19245154709282558, - "grad_norm": 3.0841654373647516, - "learning_rate": 9.325529973252967e-06, - "loss": 0.6495, - "step": 566 - }, - { - "epoch": 0.19279156749404963, - "grad_norm": 5.4892675893239105, - "learning_rate": 9.32276474678192e-06, - "loss": 0.8487, - "step": 567 - }, - { - "epoch": 0.1931315878952737, - "grad_norm": 2.013856083284362, - "learning_rate": 9.319994275108365e-06, - "loss": 0.9441, - "step": 568 - }, - { - "epoch": 0.1934716082964978, - "grad_norm": 2.4066098547440897, - "learning_rate": 9.31721856159397e-06, - "loss": 0.8576, - "step": 569 - }, - { - "epoch": 0.19381162869772187, - "grad_norm": 2.0530822158137023, - "learning_rate": 9.314437609606754e-06, - "loss": 0.7699, - "step": 570 - }, - { - "epoch": 0.19415164909894594, - "grad_norm": 1.7520902212648368, - "learning_rate": 9.311651422521103e-06, - "loss": 0.8794, - "step": 571 - }, - { - "epoch": 0.19449166950017002, - "grad_norm": 2.034565210374195, - "learning_rate": 9.308860003717748e-06, - "loss": 0.8773, - "step": 572 - }, - { - "epoch": 0.19483168990139407, - "grad_norm": 1.8451050446755255, - "learning_rate": 9.306063356583772e-06, - "loss": 0.7947, - "step": 573 - }, - { - "epoch": 0.19517171030261815, - "grad_norm": 2.010334432898869, - "learning_rate": 9.3032614845126e-06, - "loss": 0.8639, - "step": 574 - }, - { - "epoch": 0.19551173070384223, - "grad_norm": 1.8970535978324625, - "learning_rate": 9.300454390903999e-06, - "loss": 0.74, - "step": 575 - }, - { - "epoch": 0.1958517511050663, - "grad_norm": 2.3148877609826544, - "learning_rate": 9.297642079164067e-06, - "loss": 0.8328, - "step": 576 - }, - { - "epoch": 0.19619177150629039, - "grad_norm": 1.8964922349773508, - "learning_rate": 9.294824552705238e-06, - "loss": 0.7799, - "step": 577 - }, - { - "epoch": 0.19653179190751446, - "grad_norm": 1.970465125409274, - "learning_rate": 9.292001814946275e-06, - "loss": 0.8337, - "step": 578 - }, - { - "epoch": 0.19687181230873851, - "grad_norm": 1.8923831770943373, - "learning_rate": 9.289173869312259e-06, - "loss": 0.9365, - "step": 579 - }, - { - "epoch": 0.1972118327099626, - "grad_norm": 5.110858953274893, - "learning_rate": 9.286340719234592e-06, - "loss": 0.8185, - "step": 580 - }, - { - "epoch": 0.19755185311118667, - "grad_norm": 2.090608423644068, - "learning_rate": 9.283502368150996e-06, - "loss": 0.8934, - "step": 581 - }, - { - "epoch": 0.19789187351241075, - "grad_norm": 2.016487940198634, - "learning_rate": 9.280658819505495e-06, - "loss": 0.7756, - "step": 582 - }, - { - "epoch": 0.19823189391363483, - "grad_norm": 2.221575121411304, - "learning_rate": 9.277810076748427e-06, - "loss": 0.821, - "step": 583 - }, - { - "epoch": 0.1985719143148589, - "grad_norm": 1.8543548108937444, - "learning_rate": 9.274956143336433e-06, - "loss": 0.873, - "step": 584 - }, - { - "epoch": 0.19891193471608296, - "grad_norm": 2.259730694848992, - "learning_rate": 9.272097022732444e-06, - "loss": 0.8786, - "step": 585 - }, - { - "epoch": 0.19925195511730703, - "grad_norm": 1.848711170198966, - "learning_rate": 9.269232718405692e-06, - "loss": 0.8858, - "step": 586 - }, - { - "epoch": 0.1995919755185311, - "grad_norm": 1.866200683323318, - "learning_rate": 9.266363233831697e-06, - "loss": 0.8016, - "step": 587 - }, - { - "epoch": 0.1999319959197552, - "grad_norm": 14.97505212921413, - "learning_rate": 9.263488572492267e-06, - "loss": 0.7263, - "step": 588 - }, - { - "epoch": 0.20027201632097927, - "grad_norm": 2.15282534325055, - "learning_rate": 9.260608737875487e-06, - "loss": 0.83, - "step": 589 - }, - { - "epoch": 0.20061203672220332, - "grad_norm": 3.0728740665252308, - "learning_rate": 9.257723733475723e-06, - "loss": 0.8643, - "step": 590 - }, - { - "epoch": 0.2009520571234274, - "grad_norm": 1.9579947280234309, - "learning_rate": 9.25483356279361e-06, - "loss": 0.7628, - "step": 591 - }, - { - "epoch": 0.20129207752465147, - "grad_norm": 2.0595756759778645, - "learning_rate": 9.251938229336057e-06, - "loss": 0.7825, - "step": 592 - }, - { - "epoch": 0.20163209792587555, - "grad_norm": 3.9099946836071258, - "learning_rate": 9.249037736616235e-06, - "loss": 0.9511, - "step": 593 - }, - { - "epoch": 0.20197211832709963, - "grad_norm": 1.9453043208275265, - "learning_rate": 9.24613208815357e-06, - "loss": 0.8037, - "step": 594 - }, - { - "epoch": 0.2023121387283237, - "grad_norm": 2.067317387394065, - "learning_rate": 9.243221287473755e-06, - "loss": 0.8915, - "step": 595 - }, - { - "epoch": 0.20265215912954776, - "grad_norm": 1.7871868965358906, - "learning_rate": 9.240305338108726e-06, - "loss": 0.9012, - "step": 596 - }, - { - "epoch": 0.20299217953077184, - "grad_norm": 1.742758087393586, - "learning_rate": 9.237384243596667e-06, - "loss": 0.7241, - "step": 597 - }, - { - "epoch": 0.20333219993199592, - "grad_norm": 3.9506555587545105, - "learning_rate": 9.23445800748201e-06, - "loss": 0.8664, - "step": 598 - }, - { - "epoch": 0.20367222033322, - "grad_norm": 2.1447546142847704, - "learning_rate": 9.231526633315419e-06, - "loss": 0.8176, - "step": 599 - }, - { - "epoch": 0.20401224073444407, - "grad_norm": 2.0350086212804848, - "learning_rate": 9.2285901246538e-06, - "loss": 0.858, - "step": 600 - }, - { - "epoch": 0.20435226113566815, - "grad_norm": 2.0664182960390494, - "learning_rate": 9.225648485060283e-06, - "loss": 0.7872, - "step": 601 - }, - { - "epoch": 0.2046922815368922, - "grad_norm": 2.5061940461297714, - "learning_rate": 9.222701718104226e-06, - "loss": 0.7595, - "step": 602 - }, - { - "epoch": 0.20503230193811628, - "grad_norm": 2.0379087741894084, - "learning_rate": 9.21974982736121e-06, - "loss": 0.8303, - "step": 603 - }, - { - "epoch": 0.20537232233934036, - "grad_norm": 2.4684024083448697, - "learning_rate": 9.21679281641303e-06, - "loss": 0.7877, - "step": 604 - }, - { - "epoch": 0.20571234274056444, - "grad_norm": 1.541601484308322, - "learning_rate": 9.2138306888477e-06, - "loss": 0.8159, - "step": 605 - }, - { - "epoch": 0.2060523631417885, - "grad_norm": 2.855409839214273, - "learning_rate": 9.21086344825943e-06, - "loss": 0.892, - "step": 606 - }, - { - "epoch": 0.2063923835430126, - "grad_norm": 2.0094424248725584, - "learning_rate": 9.207891098248648e-06, - "loss": 0.8376, - "step": 607 - }, - { - "epoch": 0.20673240394423664, - "grad_norm": 2.3628874201292103, - "learning_rate": 9.204913642421977e-06, - "loss": 0.8384, - "step": 608 - }, - { - "epoch": 0.20707242434546072, - "grad_norm": 2.7503706346744408, - "learning_rate": 9.20193108439223e-06, - "loss": 0.8241, - "step": 609 - }, - { - "epoch": 0.2074124447466848, - "grad_norm": 1.8912481720048837, - "learning_rate": 9.198943427778415e-06, - "loss": 0.7518, - "step": 610 - }, - { - "epoch": 0.20775246514790888, - "grad_norm": 1.8807659764591915, - "learning_rate": 9.19595067620573e-06, - "loss": 0.9048, - "step": 611 - }, - { - "epoch": 0.20809248554913296, - "grad_norm": 2.5399683576595744, - "learning_rate": 9.19295283330555e-06, - "loss": 0.787, - "step": 612 - }, - { - "epoch": 0.20843250595035703, - "grad_norm": 2.2042041124830756, - "learning_rate": 9.189949902715432e-06, - "loss": 0.7788, - "step": 613 - }, - { - "epoch": 0.20877252635158108, - "grad_norm": 3.1080504073340185, - "learning_rate": 9.1869418880791e-06, - "loss": 0.9674, - "step": 614 - }, - { - "epoch": 0.20911254675280516, - "grad_norm": 4.302778372345475, - "learning_rate": 9.183928793046456e-06, - "loss": 0.8179, - "step": 615 - }, - { - "epoch": 0.20945256715402924, - "grad_norm": 4.5320039265679375, - "learning_rate": 9.180910621273555e-06, - "loss": 0.8254, - "step": 616 - }, - { - "epoch": 0.20979258755525332, - "grad_norm": 1.8992179347695721, - "learning_rate": 9.177887376422624e-06, - "loss": 0.7908, - "step": 617 - }, - { - "epoch": 0.2101326079564774, - "grad_norm": 1.9899814372282567, - "learning_rate": 9.174859062162037e-06, - "loss": 0.7912, - "step": 618 - }, - { - "epoch": 0.21047262835770147, - "grad_norm": 2.476667419018384, - "learning_rate": 9.171825682166325e-06, - "loss": 0.8038, - "step": 619 - }, - { - "epoch": 0.21081264875892552, - "grad_norm": 2.295861238237628, - "learning_rate": 9.168787240116162e-06, - "loss": 0.8047, - "step": 620 - }, - { - "epoch": 0.2111526691601496, - "grad_norm": 2.0894123097550104, - "learning_rate": 9.165743739698364e-06, - "loss": 0.8888, - "step": 621 - }, - { - "epoch": 0.21149268956137368, - "grad_norm": 1.8237207223507654, - "learning_rate": 9.162695184605887e-06, - "loss": 0.8017, - "step": 622 - }, - { - "epoch": 0.21183270996259776, - "grad_norm": 6.442654526204107, - "learning_rate": 9.15964157853782e-06, - "loss": 0.858, - "step": 623 - }, - { - "epoch": 0.21217273036382184, - "grad_norm": 1.7241002709899875, - "learning_rate": 9.15658292519938e-06, - "loss": 0.8993, - "step": 624 - }, - { - "epoch": 0.21251275076504592, - "grad_norm": 2.4396884010337767, - "learning_rate": 9.153519228301907e-06, - "loss": 0.8945, - "step": 625 - }, - { - "epoch": 0.21285277116626997, - "grad_norm": 3.270858990699396, - "learning_rate": 9.150450491562864e-06, - "loss": 0.8649, - "step": 626 - }, - { - "epoch": 0.21319279156749404, - "grad_norm": 3.3296457820914904, - "learning_rate": 9.147376718705825e-06, - "loss": 0.8044, - "step": 627 - }, - { - "epoch": 0.21353281196871812, - "grad_norm": 1.8050916328292812, - "learning_rate": 9.144297913460481e-06, - "loss": 0.7789, - "step": 628 - }, - { - "epoch": 0.2138728323699422, - "grad_norm": 1.6022202427177714, - "learning_rate": 9.141214079562624e-06, - "loss": 0.7811, - "step": 629 - }, - { - "epoch": 0.21421285277116628, - "grad_norm": 3.8320152900329987, - "learning_rate": 9.13812522075415e-06, - "loss": 0.8481, - "step": 630 - }, - { - "epoch": 0.21455287317239036, - "grad_norm": 2.968430851574413, - "learning_rate": 9.13503134078305e-06, - "loss": 0.8705, - "step": 631 - }, - { - "epoch": 0.2148928935736144, - "grad_norm": 2.7320514154269016, - "learning_rate": 9.13193244340341e-06, - "loss": 0.7574, - "step": 632 - }, - { - "epoch": 0.21523291397483849, - "grad_norm": 2.5381990072952987, - "learning_rate": 9.128828532375404e-06, - "loss": 0.758, - "step": 633 - }, - { - "epoch": 0.21557293437606256, - "grad_norm": 4.664824544519933, - "learning_rate": 9.125719611465287e-06, - "loss": 0.9002, - "step": 634 - }, - { - "epoch": 0.21591295477728664, - "grad_norm": 1.8271666953396724, - "learning_rate": 9.122605684445397e-06, - "loss": 0.8619, - "step": 635 - }, - { - "epoch": 0.21625297517851072, - "grad_norm": 1.9433634039375405, - "learning_rate": 9.119486755094143e-06, - "loss": 0.8429, - "step": 636 - }, - { - "epoch": 0.21659299557973477, - "grad_norm": 1.8859622430454264, - "learning_rate": 9.116362827196002e-06, - "loss": 0.7708, - "step": 637 - }, - { - "epoch": 0.21693301598095885, - "grad_norm": 2.211237168392557, - "learning_rate": 9.113233904541524e-06, - "loss": 0.8633, - "step": 638 - }, - { - "epoch": 0.21727303638218293, - "grad_norm": 1.767226866716121, - "learning_rate": 9.110099990927311e-06, - "loss": 0.9302, - "step": 639 - }, - { - "epoch": 0.217613056783407, - "grad_norm": 1.989789546199134, - "learning_rate": 9.106961090156026e-06, - "loss": 0.8603, - "step": 640 - }, - { - "epoch": 0.21795307718463108, - "grad_norm": 4.520549862271871, - "learning_rate": 9.103817206036383e-06, - "loss": 0.8902, - "step": 641 - }, - { - "epoch": 0.21829309758585516, - "grad_norm": 1.938477575541222, - "learning_rate": 9.100668342383138e-06, - "loss": 0.8366, - "step": 642 - }, - { - "epoch": 0.2186331179870792, - "grad_norm": 2.327565546501374, - "learning_rate": 9.097514503017098e-06, - "loss": 0.7551, - "step": 643 - }, - { - "epoch": 0.2189731383883033, - "grad_norm": 2.1109166908567936, - "learning_rate": 9.0943556917651e-06, - "loss": 0.8257, - "step": 644 - }, - { - "epoch": 0.21931315878952737, - "grad_norm": 1.8110479549556548, - "learning_rate": 9.091191912460014e-06, - "loss": 0.9507, - "step": 645 - }, - { - "epoch": 0.21965317919075145, - "grad_norm": 2.251697261146959, - "learning_rate": 9.088023168940743e-06, - "loss": 0.6991, - "step": 646 - }, - { - "epoch": 0.21999319959197552, - "grad_norm": 1.7773244864207038, - "learning_rate": 9.08484946505221e-06, - "loss": 0.7976, - "step": 647 - }, - { - "epoch": 0.2203332199931996, - "grad_norm": 2.3077357797688176, - "learning_rate": 9.08167080464536e-06, - "loss": 0.8666, - "step": 648 - }, - { - "epoch": 0.22067324039442365, - "grad_norm": 2.28086178140718, - "learning_rate": 9.078487191577146e-06, - "loss": 0.94, - "step": 649 - }, - { - "epoch": 0.22101326079564773, - "grad_norm": 1.5485886049410107, - "learning_rate": 9.075298629710536e-06, - "loss": 0.8475, - "step": 650 - }, - { - "epoch": 0.2213532811968718, - "grad_norm": 1.9290472754703258, - "learning_rate": 9.072105122914502e-06, - "loss": 0.8813, - "step": 651 - }, - { - "epoch": 0.2216933015980959, - "grad_norm": 8.21841805972392, - "learning_rate": 9.068906675064016e-06, - "loss": 0.7745, - "step": 652 - }, - { - "epoch": 0.22203332199931997, - "grad_norm": 2.9628280561922713, - "learning_rate": 9.065703290040043e-06, - "loss": 0.6788, - "step": 653 - }, - { - "epoch": 0.22237334240054404, - "grad_norm": 3.0384149610010076, - "learning_rate": 9.062494971729542e-06, - "loss": 0.8977, - "step": 654 - }, - { - "epoch": 0.2227133628017681, - "grad_norm": 3.001689658067599, - "learning_rate": 9.059281724025455e-06, - "loss": 0.7856, - "step": 655 - }, - { - "epoch": 0.22305338320299217, - "grad_norm": 2.4216473597244557, - "learning_rate": 9.056063550826708e-06, - "loss": 0.8248, - "step": 656 - }, - { - "epoch": 0.22339340360421625, - "grad_norm": 2.4916553224709115, - "learning_rate": 9.052840456038204e-06, - "loss": 0.8426, - "step": 657 - }, - { - "epoch": 0.22373342400544033, - "grad_norm": 2.030963651676548, - "learning_rate": 9.049612443570814e-06, - "loss": 0.8562, - "step": 658 - }, - { - "epoch": 0.2240734444066644, - "grad_norm": 2.8064288123845516, - "learning_rate": 9.046379517341378e-06, - "loss": 0.8298, - "step": 659 - }, - { - "epoch": 0.22441346480788849, - "grad_norm": 2.279614877119188, - "learning_rate": 9.0431416812727e-06, - "loss": 0.8515, - "step": 660 - }, - { - "epoch": 0.22475348520911254, - "grad_norm": 2.1516052872817553, - "learning_rate": 9.039898939293539e-06, - "loss": 0.7463, - "step": 661 - }, - { - "epoch": 0.2250935056103366, - "grad_norm": 2.343567497267246, - "learning_rate": 9.036651295338608e-06, - "loss": 0.8554, - "step": 662 - }, - { - "epoch": 0.2254335260115607, - "grad_norm": 2.6761262055968795, - "learning_rate": 9.033398753348569e-06, - "loss": 0.8184, - "step": 663 - }, - { - "epoch": 0.22577354641278477, - "grad_norm": 2.0464322670596493, - "learning_rate": 9.030141317270026e-06, - "loss": 0.7478, - "step": 664 - }, - { - "epoch": 0.22611356681400885, - "grad_norm": 2.2236911397789414, - "learning_rate": 9.026878991055521e-06, - "loss": 0.9156, - "step": 665 - }, - { - "epoch": 0.22645358721523293, - "grad_norm": 2.1101918658429124, - "learning_rate": 9.02361177866353e-06, - "loss": 0.782, - "step": 666 - }, - { - "epoch": 0.22679360761645698, - "grad_norm": 3.224573101223046, - "learning_rate": 9.020339684058459e-06, - "loss": 0.8831, - "step": 667 - }, - { - "epoch": 0.22713362801768106, - "grad_norm": 2.155983593275046, - "learning_rate": 9.017062711210638e-06, - "loss": 0.8461, - "step": 668 - }, - { - "epoch": 0.22747364841890513, - "grad_norm": 3.742735571520871, - "learning_rate": 9.013780864096313e-06, - "loss": 0.9233, - "step": 669 - }, - { - "epoch": 0.2278136688201292, - "grad_norm": 2.11951387339584, - "learning_rate": 9.010494146697648e-06, - "loss": 0.8415, - "step": 670 - }, - { - "epoch": 0.2281536892213533, - "grad_norm": 4.046208706917889, - "learning_rate": 9.007202563002715e-06, - "loss": 0.8367, - "step": 671 - }, - { - "epoch": 0.22849370962257737, - "grad_norm": 2.5726534936217353, - "learning_rate": 9.003906117005489e-06, - "loss": 0.7983, - "step": 672 - }, - { - "epoch": 0.22883373002380142, - "grad_norm": 2.3143058688116334, - "learning_rate": 9.000604812705854e-06, - "loss": 0.7471, - "step": 673 - }, - { - "epoch": 0.2291737504250255, - "grad_norm": 7.689486779358199, - "learning_rate": 8.997298654109573e-06, - "loss": 0.7961, - "step": 674 - }, - { - "epoch": 0.22951377082624957, - "grad_norm": 1.9154873476566034, - "learning_rate": 8.993987645228313e-06, - "loss": 0.8463, - "step": 675 - }, - { - "epoch": 0.22985379122747365, - "grad_norm": 1.9308286258150182, - "learning_rate": 8.99067179007962e-06, - "loss": 0.7293, - "step": 676 - }, - { - "epoch": 0.23019381162869773, - "grad_norm": 3.112154181656327, - "learning_rate": 8.987351092686923e-06, - "loss": 0.8588, - "step": 677 - }, - { - "epoch": 0.2305338320299218, - "grad_norm": 1.7834759216247464, - "learning_rate": 8.984025557079523e-06, - "loss": 0.8339, - "step": 678 - }, - { - "epoch": 0.23087385243114586, - "grad_norm": 2.316905251892595, - "learning_rate": 8.980695187292598e-06, - "loss": 0.7621, - "step": 679 - }, - { - "epoch": 0.23121387283236994, - "grad_norm": 3.101167924048224, - "learning_rate": 8.977359987367182e-06, - "loss": 0.8604, - "step": 680 - }, - { - "epoch": 0.23155389323359402, - "grad_norm": 2.2874629757251634, - "learning_rate": 8.97401996135018e-06, - "loss": 0.8787, - "step": 681 - }, - { - "epoch": 0.2318939136348181, - "grad_norm": 2.4403671006704677, - "learning_rate": 8.970675113294348e-06, - "loss": 0.9373, - "step": 682 - }, - { - "epoch": 0.23223393403604217, - "grad_norm": 2.4115030369064274, - "learning_rate": 8.967325447258292e-06, - "loss": 0.7396, - "step": 683 - }, - { - "epoch": 0.23257395443726622, - "grad_norm": 2.1720889668199708, - "learning_rate": 8.963970967306466e-06, - "loss": 0.843, - "step": 684 - }, - { - "epoch": 0.2329139748384903, - "grad_norm": 1.9401953765991744, - "learning_rate": 8.960611677509166e-06, - "loss": 0.8625, - "step": 685 - }, - { - "epoch": 0.23325399523971438, - "grad_norm": 2.068432286888123, - "learning_rate": 8.95724758194252e-06, - "loss": 0.8402, - "step": 686 - }, - { - "epoch": 0.23359401564093846, - "grad_norm": 1.591243068030512, - "learning_rate": 8.953878684688492e-06, - "loss": 0.7842, - "step": 687 - }, - { - "epoch": 0.23393403604216254, - "grad_norm": 1.7715469138687294, - "learning_rate": 8.950504989834873e-06, - "loss": 0.8833, - "step": 688 - }, - { - "epoch": 0.2342740564433866, - "grad_norm": 2.0026773241901537, - "learning_rate": 8.94712650147527e-06, - "loss": 0.8189, - "step": 689 - }, - { - "epoch": 0.23461407684461066, - "grad_norm": 2.8052765922906917, - "learning_rate": 8.943743223709109e-06, - "loss": 0.7157, - "step": 690 - }, - { - "epoch": 0.23495409724583474, - "grad_norm": 1.7526459634636724, - "learning_rate": 8.94035516064163e-06, - "loss": 0.7976, - "step": 691 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 1.5061919498395846, - "learning_rate": 8.936962316383876e-06, - "loss": 0.7932, - "step": 692 - }, - { - "epoch": 0.2356341380482829, - "grad_norm": 2.5099728137526736, - "learning_rate": 8.933564695052692e-06, - "loss": 0.7652, - "step": 693 - }, - { - "epoch": 0.23597415844950698, - "grad_norm": 1.8950918439469646, - "learning_rate": 8.930162300770721e-06, - "loss": 0.7014, - "step": 694 - }, - { - "epoch": 0.23631417885073105, - "grad_norm": 2.1074334438205358, - "learning_rate": 8.926755137666396e-06, - "loss": 0.8158, - "step": 695 - }, - { - "epoch": 0.2366541992519551, - "grad_norm": 2.093263958473567, - "learning_rate": 8.923343209873937e-06, - "loss": 0.8099, - "step": 696 - }, - { - "epoch": 0.23699421965317918, - "grad_norm": 1.8542225188561288, - "learning_rate": 8.919926521533346e-06, - "loss": 0.8189, - "step": 697 - }, - { - "epoch": 0.23733424005440326, - "grad_norm": 2.087963516864021, - "learning_rate": 8.9165050767904e-06, - "loss": 0.8313, - "step": 698 - }, - { - "epoch": 0.23767426045562734, - "grad_norm": 2.047018307961187, - "learning_rate": 8.913078879796648e-06, - "loss": 0.8662, - "step": 699 - }, - { - "epoch": 0.23801428085685142, - "grad_norm": 2.7094333692876837, - "learning_rate": 8.90964793470941e-06, - "loss": 0.8143, - "step": 700 - }, - { - "epoch": 0.2383543012580755, - "grad_norm": 2.7906165665573837, - "learning_rate": 8.906212245691755e-06, - "loss": 0.8905, - "step": 701 - }, - { - "epoch": 0.23869432165929955, - "grad_norm": 2.797092594783242, - "learning_rate": 8.902771816912521e-06, - "loss": 0.879, - "step": 702 - }, - { - "epoch": 0.23903434206052362, - "grad_norm": 2.5180099645451066, - "learning_rate": 8.899326652546292e-06, - "loss": 0.7547, - "step": 703 - }, - { - "epoch": 0.2393743624617477, - "grad_norm": 1.7769222125627648, - "learning_rate": 8.895876756773398e-06, - "loss": 0.9269, - "step": 704 - }, - { - "epoch": 0.23971438286297178, - "grad_norm": 1.8153973558094076, - "learning_rate": 8.89242213377991e-06, - "loss": 0.8157, - "step": 705 - }, - { - "epoch": 0.24005440326419586, - "grad_norm": 1.7008522196194176, - "learning_rate": 8.888962787757636e-06, - "loss": 0.8323, - "step": 706 - }, - { - "epoch": 0.24039442366541994, - "grad_norm": 3.122832621811037, - "learning_rate": 8.885498722904114e-06, - "loss": 0.8148, - "step": 707 - }, - { - "epoch": 0.240734444066644, - "grad_norm": 2.6291893036900045, - "learning_rate": 8.882029943422605e-06, - "loss": 0.8432, - "step": 708 - }, - { - "epoch": 0.24107446446786807, - "grad_norm": 2.273799903298918, - "learning_rate": 8.8785564535221e-06, - "loss": 0.8374, - "step": 709 - }, - { - "epoch": 0.24141448486909214, - "grad_norm": 1.8887964125341279, - "learning_rate": 8.875078257417294e-06, - "loss": 0.8395, - "step": 710 - }, - { - "epoch": 0.24175450527031622, - "grad_norm": 1.9167482586163092, - "learning_rate": 8.871595359328603e-06, - "loss": 0.8333, - "step": 711 - }, - { - "epoch": 0.2420945256715403, - "grad_norm": 2.0043550475154777, - "learning_rate": 8.868107763482137e-06, - "loss": 0.8465, - "step": 712 - }, - { - "epoch": 0.24243454607276438, - "grad_norm": 1.8537150982530552, - "learning_rate": 8.864615474109715e-06, - "loss": 0.7761, - "step": 713 - }, - { - "epoch": 0.24277456647398843, - "grad_norm": 2.2270421393649777, - "learning_rate": 8.861118495448847e-06, - "loss": 0.7535, - "step": 714 - }, - { - "epoch": 0.2431145868752125, - "grad_norm": 1.9397964001880972, - "learning_rate": 8.857616831742739e-06, - "loss": 0.751, - "step": 715 - }, - { - "epoch": 0.24345460727643659, - "grad_norm": 2.797785675978316, - "learning_rate": 8.854110487240275e-06, - "loss": 0.7928, - "step": 716 - }, - { - "epoch": 0.24379462767766066, - "grad_norm": 3.0971738774972604, - "learning_rate": 8.850599466196018e-06, - "loss": 0.7754, - "step": 717 - }, - { - "epoch": 0.24413464807888474, - "grad_norm": 3.154393286576707, - "learning_rate": 8.847083772870209e-06, - "loss": 0.7009, - "step": 718 - }, - { - "epoch": 0.24447466848010882, - "grad_norm": 2.605914049831787, - "learning_rate": 8.84356341152876e-06, - "loss": 0.7458, - "step": 719 - }, - { - "epoch": 0.24481468888133287, - "grad_norm": 2.3256897705301522, - "learning_rate": 8.840038386443243e-06, - "loss": 0.7355, - "step": 720 - }, - { - "epoch": 0.24515470928255695, - "grad_norm": 2.092998344309539, - "learning_rate": 8.836508701890892e-06, - "loss": 0.859, - "step": 721 - }, - { - "epoch": 0.24549472968378103, - "grad_norm": 1.8695733446918772, - "learning_rate": 8.832974362154592e-06, - "loss": 0.8425, - "step": 722 - }, - { - "epoch": 0.2458347500850051, - "grad_norm": 1.9623481565876253, - "learning_rate": 8.829435371522879e-06, - "loss": 0.7531, - "step": 723 - }, - { - "epoch": 0.24617477048622918, - "grad_norm": 1.7281382412711035, - "learning_rate": 8.82589173428993e-06, - "loss": 0.8975, - "step": 724 - }, - { - "epoch": 0.24651479088745323, - "grad_norm": 1.6071614173391668, - "learning_rate": 8.822343454755562e-06, - "loss": 0.8718, - "step": 725 - }, - { - "epoch": 0.2468548112886773, - "grad_norm": 1.9394291672670085, - "learning_rate": 8.818790537225224e-06, - "loss": 0.7458, - "step": 726 - }, - { - "epoch": 0.2471948316899014, - "grad_norm": 2.0539393000556343, - "learning_rate": 8.815232986009994e-06, - "loss": 0.8104, - "step": 727 - }, - { - "epoch": 0.24753485209112547, - "grad_norm": 2.047963973266287, - "learning_rate": 8.81167080542657e-06, - "loss": 0.8877, - "step": 728 - }, - { - "epoch": 0.24787487249234955, - "grad_norm": 3.0026729958135134, - "learning_rate": 8.80810399979727e-06, - "loss": 0.8977, - "step": 729 - }, - { - "epoch": 0.24821489289357362, - "grad_norm": 1.7334258967558374, - "learning_rate": 8.804532573450024e-06, - "loss": 0.7311, - "step": 730 - }, - { - "epoch": 0.24855491329479767, - "grad_norm": 2.2237717981886056, - "learning_rate": 8.800956530718365e-06, - "loss": 0.8934, - "step": 731 - }, - { - "epoch": 0.24889493369602175, - "grad_norm": 1.4722862638834173, - "learning_rate": 8.797375875941431e-06, - "loss": 0.7578, - "step": 732 - }, - { - "epoch": 0.24923495409724583, - "grad_norm": 2.076977010854489, - "learning_rate": 8.793790613463956e-06, - "loss": 0.8266, - "step": 733 - }, - { - "epoch": 0.2495749744984699, - "grad_norm": 1.9019007451038732, - "learning_rate": 8.790200747636261e-06, - "loss": 0.817, - "step": 734 - }, - { - "epoch": 0.249914994899694, - "grad_norm": 3.2523001391650603, - "learning_rate": 8.78660628281426e-06, - "loss": 0.823, - "step": 735 - }, - { - "epoch": 0.25025501530091804, - "grad_norm": 2.2606861111968706, - "learning_rate": 8.78300722335944e-06, - "loss": 0.7769, - "step": 736 - }, - { - "epoch": 0.25059503570214214, - "grad_norm": 2.4433629042102467, - "learning_rate": 8.77940357363887e-06, - "loss": 0.7904, - "step": 737 - }, - { - "epoch": 0.2509350561033662, - "grad_norm": 2.108341620407174, - "learning_rate": 8.77579533802518e-06, - "loss": 0.8316, - "step": 738 - }, - { - "epoch": 0.2512750765045903, - "grad_norm": 4.98477571143613, - "learning_rate": 8.772182520896573e-06, - "loss": 0.8266, - "step": 739 - }, - { - "epoch": 0.25161509690581435, - "grad_norm": 1.9399774055291894, - "learning_rate": 8.768565126636806e-06, - "loss": 0.8225, - "step": 740 - }, - { - "epoch": 0.2519551173070384, - "grad_norm": 2.594549449894867, - "learning_rate": 8.764943159635193e-06, - "loss": 0.7238, - "step": 741 - }, - { - "epoch": 0.2522951377082625, - "grad_norm": 2.6610598581449247, - "learning_rate": 8.761316624286593e-06, - "loss": 0.7797, - "step": 742 - }, - { - "epoch": 0.25263515810948656, - "grad_norm": 1.7706557977888584, - "learning_rate": 8.757685524991414e-06, - "loss": 0.8875, - "step": 743 - }, - { - "epoch": 0.25297517851071066, - "grad_norm": 2.367385917663463, - "learning_rate": 8.754049866155594e-06, - "loss": 0.8251, - "step": 744 - }, - { - "epoch": 0.2533151989119347, - "grad_norm": 2.0014441027718557, - "learning_rate": 8.750409652190609e-06, - "loss": 0.8519, - "step": 745 - }, - { - "epoch": 0.25365521931315876, - "grad_norm": 1.8473603569334116, - "learning_rate": 8.74676488751346e-06, - "loss": 0.8601, - "step": 746 - }, - { - "epoch": 0.25399523971438287, - "grad_norm": 2.477807792703976, - "learning_rate": 8.743115576546672e-06, - "loss": 0.9798, - "step": 747 - }, - { - "epoch": 0.2543352601156069, - "grad_norm": 2.1596648623116694, - "learning_rate": 8.739461723718286e-06, - "loss": 0.9241, - "step": 748 - }, - { - "epoch": 0.254675280516831, - "grad_norm": 2.271967660622451, - "learning_rate": 8.73580333346185e-06, - "loss": 0.9333, - "step": 749 - }, - { - "epoch": 0.2550153009180551, - "grad_norm": 1.89409525964846, - "learning_rate": 8.732140410216422e-06, - "loss": 0.9235, - "step": 750 - }, - { - "epoch": 0.2553553213192792, - "grad_norm": 1.9160319862426827, - "learning_rate": 8.72847295842656e-06, - "loss": 0.8362, - "step": 751 - }, - { - "epoch": 0.25569534172050323, - "grad_norm": 2.75041804529313, - "learning_rate": 8.724800982542313e-06, - "loss": 0.8281, - "step": 752 - }, - { - "epoch": 0.2560353621217273, - "grad_norm": 1.9660343049850402, - "learning_rate": 8.721124487019226e-06, - "loss": 0.8134, - "step": 753 - }, - { - "epoch": 0.2563753825229514, - "grad_norm": 1.7476864872494857, - "learning_rate": 8.717443476318322e-06, - "loss": 0.7963, - "step": 754 - }, - { - "epoch": 0.25671540292417544, - "grad_norm": 1.7181600952027278, - "learning_rate": 8.713757954906105e-06, - "loss": 0.6619, - "step": 755 - }, - { - "epoch": 0.25705542332539955, - "grad_norm": 4.689603340381868, - "learning_rate": 8.710067927254555e-06, - "loss": 0.8325, - "step": 756 - }, - { - "epoch": 0.2573954437266236, - "grad_norm": 2.0670743417962014, - "learning_rate": 8.706373397841114e-06, - "loss": 0.7841, - "step": 757 - }, - { - "epoch": 0.25773546412784765, - "grad_norm": 1.9345516631091482, - "learning_rate": 8.702674371148692e-06, - "loss": 0.7412, - "step": 758 - }, - { - "epoch": 0.25807548452907175, - "grad_norm": 2.3058554102539865, - "learning_rate": 8.698970851665652e-06, - "loss": 0.8672, - "step": 759 - }, - { - "epoch": 0.2584155049302958, - "grad_norm": 1.906875691115053, - "learning_rate": 8.695262843885812e-06, - "loss": 0.7907, - "step": 760 - }, - { - "epoch": 0.2587555253315199, - "grad_norm": 1.8081498930839859, - "learning_rate": 8.691550352308431e-06, - "loss": 0.7257, - "step": 761 - }, - { - "epoch": 0.25909554573274396, - "grad_norm": 2.0456832516321377, - "learning_rate": 8.687833381438215e-06, - "loss": 0.8767, - "step": 762 - }, - { - "epoch": 0.25943556613396807, - "grad_norm": 4.818955286864829, - "learning_rate": 8.684111935785299e-06, - "loss": 0.809, - "step": 763 - }, - { - "epoch": 0.2597755865351921, - "grad_norm": 1.6359696437957223, - "learning_rate": 8.680386019865253e-06, - "loss": 0.8736, - "step": 764 - }, - { - "epoch": 0.26011560693641617, - "grad_norm": 1.9275763227542202, - "learning_rate": 8.676655638199068e-06, - "loss": 0.7778, - "step": 765 - }, - { - "epoch": 0.26045562733764027, - "grad_norm": 1.5111168632740775, - "learning_rate": 8.67292079531315e-06, - "loss": 0.7518, - "step": 766 - }, - { - "epoch": 0.2607956477388643, - "grad_norm": 1.9868081030493614, - "learning_rate": 8.669181495739332e-06, - "loss": 0.876, - "step": 767 - }, - { - "epoch": 0.26113566814008843, - "grad_norm": 1.9632367709448835, - "learning_rate": 8.665437744014838e-06, - "loss": 0.7469, - "step": 768 - }, - { - "epoch": 0.2614756885413125, - "grad_norm": 2.658790741994479, - "learning_rate": 8.661689544682301e-06, - "loss": 0.8102, - "step": 769 - }, - { - "epoch": 0.26181570894253653, - "grad_norm": 1.9709245415214305, - "learning_rate": 8.657936902289756e-06, - "loss": 0.8966, - "step": 770 - }, - { - "epoch": 0.26215572934376064, - "grad_norm": 3.0701001889258515, - "learning_rate": 8.65417982139062e-06, - "loss": 0.9841, - "step": 771 - }, - { - "epoch": 0.2624957497449847, - "grad_norm": 4.54013202807214, - "learning_rate": 8.650418306543704e-06, - "loss": 0.8277, - "step": 772 - }, - { - "epoch": 0.2628357701462088, - "grad_norm": 1.6031790587096684, - "learning_rate": 8.646652362313193e-06, - "loss": 0.8168, - "step": 773 - }, - { - "epoch": 0.26317579054743284, - "grad_norm": 2.2725192507554857, - "learning_rate": 8.642881993268647e-06, - "loss": 0.8552, - "step": 774 - }, - { - "epoch": 0.2635158109486569, - "grad_norm": 1.9449496721499624, - "learning_rate": 8.639107203985e-06, - "loss": 0.8014, - "step": 775 - }, - { - "epoch": 0.263855831349881, - "grad_norm": 4.227240205226276, - "learning_rate": 8.635327999042543e-06, - "loss": 0.9003, - "step": 776 - }, - { - "epoch": 0.26419585175110505, - "grad_norm": 1.7770839323226375, - "learning_rate": 8.63154438302693e-06, - "loss": 0.8669, - "step": 777 - }, - { - "epoch": 0.26453587215232915, - "grad_norm": 4.668426873038303, - "learning_rate": 8.627756360529166e-06, - "loss": 0.861, - "step": 778 - }, - { - "epoch": 0.2648758925535532, - "grad_norm": 2.0048269343626663, - "learning_rate": 8.6239639361456e-06, - "loss": 0.7886, - "step": 779 - }, - { - "epoch": 0.2652159129547773, - "grad_norm": 2.793168861569981, - "learning_rate": 8.620167114477926e-06, - "loss": 0.8552, - "step": 780 - }, - { - "epoch": 0.26555593335600136, - "grad_norm": 1.832488110710129, - "learning_rate": 8.616365900133175e-06, - "loss": 0.8196, - "step": 781 - }, - { - "epoch": 0.2658959537572254, - "grad_norm": 4.023379445825273, - "learning_rate": 8.612560297723697e-06, - "loss": 0.7989, - "step": 782 - }, - { - "epoch": 0.2662359741584495, - "grad_norm": 2.332651801821611, - "learning_rate": 8.608750311867182e-06, - "loss": 0.7508, - "step": 783 - }, - { - "epoch": 0.26657599455967357, - "grad_norm": 6.696709614360918, - "learning_rate": 8.60493594718663e-06, - "loss": 0.8147, - "step": 784 - }, - { - "epoch": 0.2669160149608977, - "grad_norm": 1.8834108862545373, - "learning_rate": 8.601117208310351e-06, - "loss": 0.9059, - "step": 785 - }, - { - "epoch": 0.2672560353621217, - "grad_norm": 2.077771875039454, - "learning_rate": 8.597294099871974e-06, - "loss": 0.7673, - "step": 786 - }, - { - "epoch": 0.2675960557633458, - "grad_norm": 6.9046174801442675, - "learning_rate": 8.59346662651042e-06, - "loss": 0.8263, - "step": 787 - }, - { - "epoch": 0.2679360761645699, - "grad_norm": 2.77437081702886, - "learning_rate": 8.589634792869908e-06, - "loss": 0.8334, - "step": 788 - }, - { - "epoch": 0.26827609656579393, - "grad_norm": 2.308063436284089, - "learning_rate": 8.58579860359995e-06, - "loss": 0.8516, - "step": 789 - }, - { - "epoch": 0.26861611696701804, - "grad_norm": 1.5049106697591117, - "learning_rate": 8.581958063355344e-06, - "loss": 0.7896, - "step": 790 - }, - { - "epoch": 0.2689561373682421, - "grad_norm": 2.273632666328108, - "learning_rate": 8.578113176796165e-06, - "loss": 0.9209, - "step": 791 - }, - { - "epoch": 0.2692961577694662, - "grad_norm": 2.223581774886736, - "learning_rate": 8.574263948587762e-06, - "loss": 0.7586, - "step": 792 - }, - { - "epoch": 0.26963617817069024, - "grad_norm": 2.234320213681529, - "learning_rate": 8.570410383400754e-06, - "loss": 0.9106, - "step": 793 - }, - { - "epoch": 0.2699761985719143, - "grad_norm": 1.8073070815618781, - "learning_rate": 8.56655248591102e-06, - "loss": 0.8563, - "step": 794 - }, - { - "epoch": 0.2703162189731384, - "grad_norm": 2.216862016544106, - "learning_rate": 8.562690260799696e-06, - "loss": 0.8404, - "step": 795 - }, - { - "epoch": 0.27065623937436245, - "grad_norm": 3.3719814382949944, - "learning_rate": 8.558823712753171e-06, - "loss": 0.8676, - "step": 796 - }, - { - "epoch": 0.27099625977558656, - "grad_norm": 2.196633013307635, - "learning_rate": 8.554952846463081e-06, - "loss": 0.8648, - "step": 797 - }, - { - "epoch": 0.2713362801768106, - "grad_norm": 2.5165242077281595, - "learning_rate": 8.551077666626292e-06, - "loss": 0.7004, - "step": 798 - }, - { - "epoch": 0.27167630057803466, - "grad_norm": 1.7249240592256398, - "learning_rate": 8.54719817794492e-06, - "loss": 0.7373, - "step": 799 - }, - { - "epoch": 0.27201632097925876, - "grad_norm": 1.677617648661045, - "learning_rate": 8.543314385126296e-06, - "loss": 0.8333, - "step": 800 - }, - { - "epoch": 0.2723563413804828, - "grad_norm": 2.7130063773245223, - "learning_rate": 8.539426292882976e-06, - "loss": 0.7646, - "step": 801 - }, - { - "epoch": 0.2726963617817069, - "grad_norm": 3.5342476236653084, - "learning_rate": 8.535533905932739e-06, - "loss": 0.747, - "step": 802 - }, - { - "epoch": 0.27303638218293097, - "grad_norm": 2.6210750163057357, - "learning_rate": 8.531637228998569e-06, - "loss": 0.8778, - "step": 803 - }, - { - "epoch": 0.2733764025841551, - "grad_norm": 2.0347303242471853, - "learning_rate": 8.527736266808658e-06, - "loss": 0.769, - "step": 804 - }, - { - "epoch": 0.2737164229853791, - "grad_norm": 2.4027128478891915, - "learning_rate": 8.523831024096396e-06, - "loss": 0.8585, - "step": 805 - }, - { - "epoch": 0.2740564433866032, - "grad_norm": 1.8350089573984862, - "learning_rate": 8.519921505600368e-06, - "loss": 0.8113, - "step": 806 - }, - { - "epoch": 0.2743964637878273, - "grad_norm": 2.2589489484204908, - "learning_rate": 8.516007716064352e-06, - "loss": 0.8187, - "step": 807 - }, - { - "epoch": 0.27473648418905133, - "grad_norm": 1.858296448626169, - "learning_rate": 8.5120896602373e-06, - "loss": 0.9453, - "step": 808 - }, - { - "epoch": 0.27507650459027544, - "grad_norm": 2.355100329431512, - "learning_rate": 8.508167342873342e-06, - "loss": 0.8078, - "step": 809 - }, - { - "epoch": 0.2754165249914995, - "grad_norm": 1.9899667485080101, - "learning_rate": 8.504240768731787e-06, - "loss": 0.8554, - "step": 810 - }, - { - "epoch": 0.27575654539272354, - "grad_norm": 1.8787714779536697, - "learning_rate": 8.500309942577098e-06, - "loss": 0.8568, - "step": 811 - }, - { - "epoch": 0.27609656579394765, - "grad_norm": 2.068637813773282, - "learning_rate": 8.496374869178908e-06, - "loss": 0.848, - "step": 812 - }, - { - "epoch": 0.2764365861951717, - "grad_norm": 1.7791526260663866, - "learning_rate": 8.492435553311995e-06, - "loss": 0.8251, - "step": 813 - }, - { - "epoch": 0.2767766065963958, - "grad_norm": 1.8869896486229023, - "learning_rate": 8.48849199975629e-06, - "loss": 0.778, - "step": 814 - }, - { - "epoch": 0.27711662699761985, - "grad_norm": 2.2096789072585414, - "learning_rate": 8.484544213296864e-06, - "loss": 0.8346, - "step": 815 - }, - { - "epoch": 0.2774566473988439, - "grad_norm": 2.170554684057787, - "learning_rate": 8.480592198723922e-06, - "loss": 0.9079, - "step": 816 - }, - { - "epoch": 0.277796667800068, - "grad_norm": 1.9605343168925984, - "learning_rate": 8.476635960832805e-06, - "loss": 0.9024, - "step": 817 - }, - { - "epoch": 0.27813668820129206, - "grad_norm": 2.5300613494335638, - "learning_rate": 8.472675504423972e-06, - "loss": 0.7871, - "step": 818 - }, - { - "epoch": 0.27847670860251617, - "grad_norm": 2.8042077194040287, - "learning_rate": 8.468710834303007e-06, - "loss": 0.7785, - "step": 819 - }, - { - "epoch": 0.2788167290037402, - "grad_norm": 1.697967343522963, - "learning_rate": 8.464741955280603e-06, - "loss": 0.8535, - "step": 820 - }, - { - "epoch": 0.2791567494049643, - "grad_norm": 2.2291137255399303, - "learning_rate": 8.460768872172558e-06, - "loss": 0.8406, - "step": 821 - }, - { - "epoch": 0.27949676980618837, - "grad_norm": 1.919715258452679, - "learning_rate": 8.456791589799777e-06, - "loss": 0.8334, - "step": 822 - }, - { - "epoch": 0.2798367902074124, - "grad_norm": 1.9257825337008065, - "learning_rate": 8.45281011298826e-06, - "loss": 0.7674, - "step": 823 - }, - { - "epoch": 0.28017681060863653, - "grad_norm": 1.9491101623001321, - "learning_rate": 8.448824446569087e-06, - "loss": 0.8832, - "step": 824 - }, - { - "epoch": 0.2805168310098606, - "grad_norm": 1.870418445256147, - "learning_rate": 8.444834595378434e-06, - "loss": 0.8243, - "step": 825 - }, - { - "epoch": 0.2808568514110847, - "grad_norm": 15.898061851643817, - "learning_rate": 8.440840564257547e-06, - "loss": 0.9136, - "step": 826 - }, - { - "epoch": 0.28119687181230874, - "grad_norm": 4.472135160620738, - "learning_rate": 8.436842358052746e-06, - "loss": 0.7969, - "step": 827 - }, - { - "epoch": 0.2815368922135328, - "grad_norm": 1.761895889926857, - "learning_rate": 8.432839981615419e-06, - "loss": 0.7631, - "step": 828 - }, - { - "epoch": 0.2818769126147569, - "grad_norm": 2.3826293642920735, - "learning_rate": 8.428833439802012e-06, - "loss": 0.8369, - "step": 829 - }, - { - "epoch": 0.28221693301598094, - "grad_norm": 2.027771199922908, - "learning_rate": 8.424822737474023e-06, - "loss": 0.752, - "step": 830 - }, - { - "epoch": 0.28255695341720505, - "grad_norm": 2.5040444225047596, - "learning_rate": 8.420807879498002e-06, - "loss": 0.9132, - "step": 831 - }, - { - "epoch": 0.2828969738184291, - "grad_norm": 1.8159022349945535, - "learning_rate": 8.416788870745544e-06, - "loss": 0.8259, - "step": 832 - }, - { - "epoch": 0.2832369942196532, - "grad_norm": 2.620947283954682, - "learning_rate": 8.412765716093273e-06, - "loss": 0.8616, - "step": 833 - }, - { - "epoch": 0.28357701462087725, - "grad_norm": 1.985024515267911, - "learning_rate": 8.408738420422847e-06, - "loss": 0.8538, - "step": 834 - }, - { - "epoch": 0.2839170350221013, - "grad_norm": 1.7903361247800387, - "learning_rate": 8.40470698862095e-06, - "loss": 0.8478, - "step": 835 - }, - { - "epoch": 0.2842570554233254, - "grad_norm": 2.1338723210061974, - "learning_rate": 8.400671425579283e-06, - "loss": 0.7906, - "step": 836 - }, - { - "epoch": 0.28459707582454946, - "grad_norm": 1.9295239837495932, - "learning_rate": 8.396631736194563e-06, - "loss": 0.8481, - "step": 837 - }, - { - "epoch": 0.28493709622577357, - "grad_norm": 1.983034000002347, - "learning_rate": 8.39258792536851e-06, - "loss": 0.8847, - "step": 838 - }, - { - "epoch": 0.2852771166269976, - "grad_norm": 2.2803142495667035, - "learning_rate": 8.388539998007847e-06, - "loss": 0.9007, - "step": 839 - }, - { - "epoch": 0.28561713702822167, - "grad_norm": 2.7645969730004807, - "learning_rate": 8.384487959024293e-06, - "loss": 0.7356, - "step": 840 - }, - { - "epoch": 0.2859571574294458, - "grad_norm": 3.149778399577589, - "learning_rate": 8.380431813334548e-06, - "loss": 0.7855, - "step": 841 - }, - { - "epoch": 0.2862971778306698, - "grad_norm": 1.830554950516933, - "learning_rate": 8.37637156586031e-06, - "loss": 0.8831, - "step": 842 - }, - { - "epoch": 0.28663719823189393, - "grad_norm": 1.9793055946594367, - "learning_rate": 8.372307221528239e-06, - "loss": 0.8116, - "step": 843 - }, - { - "epoch": 0.286977218633118, - "grad_norm": 2.062954991657379, - "learning_rate": 8.368238785269976e-06, - "loss": 0.8563, - "step": 844 - }, - { - "epoch": 0.2873172390343421, - "grad_norm": 2.6403272415419834, - "learning_rate": 8.36416626202212e-06, - "loss": 0.8033, - "step": 845 - }, - { - "epoch": 0.28765725943556614, - "grad_norm": 2.1424920280150506, - "learning_rate": 8.360089656726238e-06, - "loss": 0.9417, - "step": 846 - }, - { - "epoch": 0.2879972798367902, - "grad_norm": 1.8495657823428482, - "learning_rate": 8.356008974328843e-06, - "loss": 0.8778, - "step": 847 - }, - { - "epoch": 0.2883373002380143, - "grad_norm": 1.964102211636596, - "learning_rate": 8.351924219781393e-06, - "loss": 0.8762, - "step": 848 - }, - { - "epoch": 0.28867732063923834, - "grad_norm": 5.329745330260959, - "learning_rate": 8.347835398040297e-06, - "loss": 0.8703, - "step": 849 - }, - { - "epoch": 0.28901734104046245, - "grad_norm": 6.872745204669564, - "learning_rate": 8.34374251406689e-06, - "loss": 0.9126, - "step": 850 - }, - { - "epoch": 0.2893573614416865, - "grad_norm": 3.021940703140289, - "learning_rate": 8.339645572827439e-06, - "loss": 0.8435, - "step": 851 - }, - { - "epoch": 0.28969738184291055, - "grad_norm": 1.7359605774084226, - "learning_rate": 8.335544579293138e-06, - "loss": 0.8956, - "step": 852 - }, - { - "epoch": 0.29003740224413466, - "grad_norm": 2.0495366847155645, - "learning_rate": 8.331439538440089e-06, - "loss": 0.8737, - "step": 853 - }, - { - "epoch": 0.2903774226453587, - "grad_norm": 1.7403332283198236, - "learning_rate": 8.327330455249316e-06, - "loss": 0.836, - "step": 854 - }, - { - "epoch": 0.2907174430465828, - "grad_norm": 1.765839927053788, - "learning_rate": 8.323217334706736e-06, - "loss": 0.7708, - "step": 855 - }, - { - "epoch": 0.29105746344780686, - "grad_norm": 2.561707026442392, - "learning_rate": 8.319100181803177e-06, - "loss": 0.8048, - "step": 856 - }, - { - "epoch": 0.29139748384903097, - "grad_norm": 1.893355173621553, - "learning_rate": 8.314979001534351e-06, - "loss": 0.8355, - "step": 857 - }, - { - "epoch": 0.291737504250255, - "grad_norm": 1.6626617126300058, - "learning_rate": 8.310853798900861e-06, - "loss": 0.8117, - "step": 858 - }, - { - "epoch": 0.29207752465147907, - "grad_norm": 2.2181642572480404, - "learning_rate": 8.306724578908187e-06, - "loss": 0.8809, - "step": 859 - }, - { - "epoch": 0.2924175450527032, - "grad_norm": 2.1608263105904237, - "learning_rate": 8.302591346566691e-06, - "loss": 0.9428, - "step": 860 - }, - { - "epoch": 0.2927575654539272, - "grad_norm": 2.2361606195465, - "learning_rate": 8.298454106891593e-06, - "loss": 0.8456, - "step": 861 - }, - { - "epoch": 0.29309758585515133, - "grad_norm": 2.161708305297874, - "learning_rate": 8.294312864902985e-06, - "loss": 0.7702, - "step": 862 - }, - { - "epoch": 0.2934376062563754, - "grad_norm": 1.9911157415642, - "learning_rate": 8.290167625625811e-06, - "loss": 0.8566, - "step": 863 - }, - { - "epoch": 0.29377762665759943, - "grad_norm": 2.204248530981356, - "learning_rate": 8.286018394089864e-06, - "loss": 0.785, - "step": 864 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 2.54115940873232, - "learning_rate": 8.281865175329783e-06, - "loss": 0.8669, - "step": 865 - }, - { - "epoch": 0.2944576674600476, - "grad_norm": 1.6985894379936504, - "learning_rate": 8.277707974385047e-06, - "loss": 0.8809, - "step": 866 - }, - { - "epoch": 0.2947976878612717, - "grad_norm": 1.9914761180754428, - "learning_rate": 8.273546796299962e-06, - "loss": 0.868, - "step": 867 - }, - { - "epoch": 0.29513770826249575, - "grad_norm": 1.9848345218936125, - "learning_rate": 8.269381646123666e-06, - "loss": 0.8266, - "step": 868 - }, - { - "epoch": 0.2954777286637198, - "grad_norm": 2.0822000899070674, - "learning_rate": 8.265212528910113e-06, - "loss": 0.9115, - "step": 869 - }, - { - "epoch": 0.2958177490649439, - "grad_norm": 1.9979737857871827, - "learning_rate": 8.261039449718068e-06, - "loss": 0.7968, - "step": 870 - }, - { - "epoch": 0.29615776946616795, - "grad_norm": 2.0218833894280532, - "learning_rate": 8.256862413611113e-06, - "loss": 0.8031, - "step": 871 - }, - { - "epoch": 0.29649778986739206, - "grad_norm": 2.006168397097048, - "learning_rate": 8.252681425657617e-06, - "loss": 0.8669, - "step": 872 - }, - { - "epoch": 0.2968378102686161, - "grad_norm": 2.2355962957542377, - "learning_rate": 8.248496490930753e-06, - "loss": 0.8274, - "step": 873 - }, - { - "epoch": 0.2971778306698402, - "grad_norm": 1.7904006258629988, - "learning_rate": 8.244307614508487e-06, - "loss": 0.7554, - "step": 874 - }, - { - "epoch": 0.29751785107106427, - "grad_norm": 2.0320747257565444, - "learning_rate": 8.240114801473558e-06, - "loss": 0.7651, - "step": 875 - }, - { - "epoch": 0.2978578714722883, - "grad_norm": 2.6665182314923412, - "learning_rate": 8.23591805691349e-06, - "loss": 0.8223, - "step": 876 - }, - { - "epoch": 0.2981978918735124, - "grad_norm": 1.8328124167485744, - "learning_rate": 8.23171738592057e-06, - "loss": 0.9082, - "step": 877 - }, - { - "epoch": 0.29853791227473647, - "grad_norm": 2.3699720185830757, - "learning_rate": 8.227512793591855e-06, - "loss": 0.9096, - "step": 878 - }, - { - "epoch": 0.2988779326759606, - "grad_norm": 1.9090567074503153, - "learning_rate": 8.223304285029159e-06, - "loss": 0.7705, - "step": 879 - }, - { - "epoch": 0.29921795307718463, - "grad_norm": 2.2190907511890368, - "learning_rate": 8.219091865339045e-06, - "loss": 0.7971, - "step": 880 - }, - { - "epoch": 0.2995579734784087, - "grad_norm": 2.253264189984432, - "learning_rate": 8.214875539632825e-06, - "loss": 0.7269, - "step": 881 - }, - { - "epoch": 0.2998979938796328, - "grad_norm": 2.0376522816245934, - "learning_rate": 8.21065531302655e-06, - "loss": 0.8329, - "step": 882 - }, - { - "epoch": 0.30023801428085684, - "grad_norm": 3.6223570808965007, - "learning_rate": 8.206431190641002e-06, - "loss": 0.8321, - "step": 883 - }, - { - "epoch": 0.30057803468208094, - "grad_norm": 3.8252210397062694, - "learning_rate": 8.202203177601693e-06, - "loss": 0.8164, - "step": 884 - }, - { - "epoch": 0.300918055083305, - "grad_norm": 1.776370483766253, - "learning_rate": 8.197971279038854e-06, - "loss": 0.8426, - "step": 885 - }, - { - "epoch": 0.3012580754845291, - "grad_norm": 2.385354175900532, - "learning_rate": 8.193735500087432e-06, - "loss": 0.7418, - "step": 886 - }, - { - "epoch": 0.30159809588575315, - "grad_norm": 1.6779774511826855, - "learning_rate": 8.189495845887083e-06, - "loss": 0.7568, - "step": 887 - }, - { - "epoch": 0.3019381162869772, - "grad_norm": 2.4863457173840544, - "learning_rate": 8.185252321582162e-06, - "loss": 0.8176, - "step": 888 - }, - { - "epoch": 0.3022781366882013, - "grad_norm": 4.0386363547881485, - "learning_rate": 8.18100493232172e-06, - "loss": 0.9485, - "step": 889 - }, - { - "epoch": 0.30261815708942535, - "grad_norm": 1.7173326803227138, - "learning_rate": 8.176753683259506e-06, - "loss": 0.7396, - "step": 890 - }, - { - "epoch": 0.30295817749064946, - "grad_norm": 2.3498732585677202, - "learning_rate": 8.172498579553939e-06, - "loss": 0.7183, - "step": 891 - }, - { - "epoch": 0.3032981978918735, - "grad_norm": 2.44411432379618, - "learning_rate": 8.168239626368126e-06, - "loss": 0.7807, - "step": 892 - }, - { - "epoch": 0.30363821829309756, - "grad_norm": 2.478384366935357, - "learning_rate": 8.16397682886984e-06, - "loss": 0.8315, - "step": 893 - }, - { - "epoch": 0.30397823869432167, - "grad_norm": 2.5774348066125894, - "learning_rate": 8.15971019223152e-06, - "loss": 0.8123, - "step": 894 - }, - { - "epoch": 0.3043182590955457, - "grad_norm": 1.7111325488707947, - "learning_rate": 8.155439721630265e-06, - "loss": 0.8263, - "step": 895 - }, - { - "epoch": 0.3046582794967698, - "grad_norm": 2.7448751652607553, - "learning_rate": 8.151165422247822e-06, - "loss": 0.8248, - "step": 896 - }, - { - "epoch": 0.3049982998979939, - "grad_norm": 2.1485101781392877, - "learning_rate": 8.146887299270585e-06, - "loss": 0.8035, - "step": 897 - }, - { - "epoch": 0.305338320299218, - "grad_norm": 1.776591747388704, - "learning_rate": 8.142605357889592e-06, - "loss": 0.8089, - "step": 898 - }, - { - "epoch": 0.30567834070044203, - "grad_norm": 3.2632560104335173, - "learning_rate": 8.13831960330051e-06, - "loss": 0.8202, - "step": 899 - }, - { - "epoch": 0.3060183611016661, - "grad_norm": 2.8739582827981347, - "learning_rate": 8.13403004070363e-06, - "loss": 0.9092, - "step": 900 - }, - { - "epoch": 0.3063583815028902, - "grad_norm": 2.1943148602179994, - "learning_rate": 8.129736675303873e-06, - "loss": 0.8322, - "step": 901 - }, - { - "epoch": 0.30669840190411424, - "grad_norm": 1.8531479477302115, - "learning_rate": 8.125439512310765e-06, - "loss": 0.7566, - "step": 902 - }, - { - "epoch": 0.30703842230533834, - "grad_norm": 1.7228875957473064, - "learning_rate": 8.121138556938444e-06, - "loss": 0.8078, - "step": 903 - }, - { - "epoch": 0.3073784427065624, - "grad_norm": 2.3898144961502745, - "learning_rate": 8.116833814405648e-06, - "loss": 0.8067, - "step": 904 - }, - { - "epoch": 0.30771846310778644, - "grad_norm": 1.802933531637354, - "learning_rate": 8.112525289935716e-06, - "loss": 0.7799, - "step": 905 - }, - { - "epoch": 0.30805848350901055, - "grad_norm": 2.5139323313707673, - "learning_rate": 8.108212988756568e-06, - "loss": 0.9037, - "step": 906 - }, - { - "epoch": 0.3083985039102346, - "grad_norm": 1.9251179471419289, - "learning_rate": 8.10389691610071e-06, - "loss": 0.8635, - "step": 907 - }, - { - "epoch": 0.3087385243114587, - "grad_norm": 2.193694058263112, - "learning_rate": 8.099577077205225e-06, - "loss": 0.8323, - "step": 908 - }, - { - "epoch": 0.30907854471268276, - "grad_norm": 2.0048196549770885, - "learning_rate": 8.095253477311765e-06, - "loss": 0.7756, - "step": 909 - }, - { - "epoch": 0.3094185651139068, - "grad_norm": 1.980230499045498, - "learning_rate": 8.090926121666547e-06, - "loss": 0.7977, - "step": 910 - }, - { - "epoch": 0.3097585855151309, - "grad_norm": 2.4883937143671564, - "learning_rate": 8.086595015520345e-06, - "loss": 0.8233, - "step": 911 - }, - { - "epoch": 0.31009860591635496, - "grad_norm": 2.1286197933597584, - "learning_rate": 8.08226016412848e-06, - "loss": 0.9729, - "step": 912 - }, - { - "epoch": 0.31043862631757907, - "grad_norm": 2.086146264470014, - "learning_rate": 8.07792157275082e-06, - "loss": 0.8914, - "step": 913 - }, - { - "epoch": 0.3107786467188031, - "grad_norm": 2.0772062973899423, - "learning_rate": 8.073579246651775e-06, - "loss": 0.945, - "step": 914 - }, - { - "epoch": 0.3111186671200272, - "grad_norm": 2.0695504749457143, - "learning_rate": 8.069233191100278e-06, - "loss": 0.8634, - "step": 915 - }, - { - "epoch": 0.3114586875212513, - "grad_norm": 2.0384939167389393, - "learning_rate": 8.064883411369799e-06, - "loss": 0.7785, - "step": 916 - }, - { - "epoch": 0.3117987079224753, - "grad_norm": 2.1885792727969138, - "learning_rate": 8.060529912738316e-06, - "loss": 0.8655, - "step": 917 - }, - { - "epoch": 0.31213872832369943, - "grad_norm": 2.1403065502782406, - "learning_rate": 8.056172700488324e-06, - "loss": 0.8965, - "step": 918 - }, - { - "epoch": 0.3124787487249235, - "grad_norm": 2.1339209234822647, - "learning_rate": 8.051811779906823e-06, - "loss": 0.7545, - "step": 919 - }, - { - "epoch": 0.3128187691261476, - "grad_norm": 1.8344955563510745, - "learning_rate": 8.047447156285314e-06, - "loss": 0.8804, - "step": 920 - }, - { - "epoch": 0.31315878952737164, - "grad_norm": 2.423743963035901, - "learning_rate": 8.043078834919792e-06, - "loss": 0.8068, - "step": 921 - }, - { - "epoch": 0.3134988099285957, - "grad_norm": 1.787239252052783, - "learning_rate": 8.038706821110738e-06, - "loss": 0.9271, - "step": 922 - }, - { - "epoch": 0.3138388303298198, - "grad_norm": 2.0084809664473684, - "learning_rate": 8.03433112016311e-06, - "loss": 0.8244, - "step": 923 - }, - { - "epoch": 0.31417885073104385, - "grad_norm": 2.0471692656266085, - "learning_rate": 8.029951737386345e-06, - "loss": 0.7478, - "step": 924 - }, - { - "epoch": 0.31451887113226795, - "grad_norm": 2.143535486588015, - "learning_rate": 8.025568678094346e-06, - "loss": 0.7579, - "step": 925 - }, - { - "epoch": 0.314858891533492, - "grad_norm": 1.835302258844517, - "learning_rate": 8.021181947605474e-06, - "loss": 0.771, - "step": 926 - }, - { - "epoch": 0.3151989119347161, - "grad_norm": 2.109820781590098, - "learning_rate": 8.016791551242548e-06, - "loss": 0.8985, - "step": 927 - }, - { - "epoch": 0.31553893233594016, - "grad_norm": 1.9465940317912624, - "learning_rate": 8.012397494332832e-06, - "loss": 0.9183, - "step": 928 - }, - { - "epoch": 0.3158789527371642, - "grad_norm": 1.9499330380070024, - "learning_rate": 8.00799978220804e-06, - "loss": 0.8158, - "step": 929 - }, - { - "epoch": 0.3162189731383883, - "grad_norm": 1.8510195207977735, - "learning_rate": 8.003598420204307e-06, - "loss": 0.8287, - "step": 930 - }, - { - "epoch": 0.31655899353961237, - "grad_norm": 2.2106314828328895, - "learning_rate": 7.99919341366221e-06, - "loss": 0.8159, - "step": 931 - }, - { - "epoch": 0.31689901394083647, - "grad_norm": 2.132400411746793, - "learning_rate": 7.994784767926743e-06, - "loss": 0.8686, - "step": 932 - }, - { - "epoch": 0.3172390343420605, - "grad_norm": 1.9835217982818234, - "learning_rate": 7.99037248834731e-06, - "loss": 0.7661, - "step": 933 - }, - { - "epoch": 0.3175790547432846, - "grad_norm": 1.8257541034651736, - "learning_rate": 7.985956580277738e-06, - "loss": 0.8968, - "step": 934 - }, - { - "epoch": 0.3179190751445087, - "grad_norm": 1.7806392840797605, - "learning_rate": 7.981537049076243e-06, - "loss": 0.8334, - "step": 935 - }, - { - "epoch": 0.31825909554573273, - "grad_norm": 1.946318339623462, - "learning_rate": 7.977113900105444e-06, - "loss": 0.8255, - "step": 936 - }, - { - "epoch": 0.31859911594695683, - "grad_norm": 2.5853186802795856, - "learning_rate": 7.972687138732352e-06, - "loss": 0.8669, - "step": 937 - }, - { - "epoch": 0.3189391363481809, - "grad_norm": 2.2143025012416913, - "learning_rate": 7.968256770328353e-06, - "loss": 0.7807, - "step": 938 - }, - { - "epoch": 0.319279156749405, - "grad_norm": 2.025109539800048, - "learning_rate": 7.96382280026922e-06, - "loss": 0.7668, - "step": 939 - }, - { - "epoch": 0.31961917715062904, - "grad_norm": 2.309014049320171, - "learning_rate": 7.959385233935087e-06, - "loss": 0.7586, - "step": 940 - }, - { - "epoch": 0.3199591975518531, - "grad_norm": 2.297274599241439, - "learning_rate": 7.954944076710457e-06, - "loss": 0.8962, - "step": 941 - }, - { - "epoch": 0.3202992179530772, - "grad_norm": 2.054541472734675, - "learning_rate": 7.95049933398419e-06, - "loss": 0.8203, - "step": 942 - }, - { - "epoch": 0.32063923835430125, - "grad_norm": 2.6215309252037873, - "learning_rate": 7.946051011149494e-06, - "loss": 0.8248, - "step": 943 - }, - { - "epoch": 0.32097925875552535, - "grad_norm": 2.711737030629169, - "learning_rate": 7.941599113603923e-06, - "loss": 0.8764, - "step": 944 - }, - { - "epoch": 0.3213192791567494, - "grad_norm": 2.2912349790018633, - "learning_rate": 7.937143646749367e-06, - "loss": 0.7335, - "step": 945 - }, - { - "epoch": 0.32165929955797345, - "grad_norm": 1.7908168705820537, - "learning_rate": 7.93268461599205e-06, - "loss": 0.8435, - "step": 946 - }, - { - "epoch": 0.32199931995919756, - "grad_norm": 4.946848177327164, - "learning_rate": 7.928222026742517e-06, - "loss": 0.8039, - "step": 947 - }, - { - "epoch": 0.3223393403604216, - "grad_norm": 2.0622056965822932, - "learning_rate": 7.923755884415634e-06, - "loss": 0.9067, - "step": 948 - }, - { - "epoch": 0.3226793607616457, - "grad_norm": 2.6667796950375715, - "learning_rate": 7.919286194430573e-06, - "loss": 0.7022, - "step": 949 - }, - { - "epoch": 0.32301938116286977, - "grad_norm": 1.9317481210027303, - "learning_rate": 7.914812962210819e-06, - "loss": 0.8264, - "step": 950 - }, - { - "epoch": 0.3233594015640938, - "grad_norm": 1.8020949558699724, - "learning_rate": 7.910336193184146e-06, - "loss": 0.7472, - "step": 951 - }, - { - "epoch": 0.3236994219653179, - "grad_norm": 1.9275396744198368, - "learning_rate": 7.905855892782625e-06, - "loss": 0.7309, - "step": 952 - }, - { - "epoch": 0.324039442366542, - "grad_norm": 1.8182685008859338, - "learning_rate": 7.901372066442615e-06, - "loss": 0.7625, - "step": 953 - }, - { - "epoch": 0.3243794627677661, - "grad_norm": 2.010794684369215, - "learning_rate": 7.89688471960474e-06, - "loss": 0.8687, - "step": 954 - }, - { - "epoch": 0.32471948316899013, - "grad_norm": 2.0540586819091997, - "learning_rate": 7.892393857713914e-06, - "loss": 0.8335, - "step": 955 - }, - { - "epoch": 0.32505950357021424, - "grad_norm": 2.325367093840662, - "learning_rate": 7.887899486219304e-06, - "loss": 0.783, - "step": 956 - }, - { - "epoch": 0.3253995239714383, - "grad_norm": 2.1873077332560156, - "learning_rate": 7.883401610574338e-06, - "loss": 0.8885, - "step": 957 - }, - { - "epoch": 0.32573954437266234, - "grad_norm": 2.177598891005239, - "learning_rate": 7.878900236236693e-06, - "loss": 0.763, - "step": 958 - }, - { - "epoch": 0.32607956477388644, - "grad_norm": 1.9372352426800454, - "learning_rate": 7.874395368668302e-06, - "loss": 0.8097, - "step": 959 - }, - { - "epoch": 0.3264195851751105, - "grad_norm": 1.8413146998215235, - "learning_rate": 7.869887013335324e-06, - "loss": 0.7083, - "step": 960 - }, - { - "epoch": 0.3267596055763346, - "grad_norm": 1.704925469624369, - "learning_rate": 7.865375175708158e-06, - "loss": 0.6822, - "step": 961 - }, - { - "epoch": 0.32709962597755865, - "grad_norm": 3.100437896445037, - "learning_rate": 7.860859861261423e-06, - "loss": 0.7932, - "step": 962 - }, - { - "epoch": 0.3274396463787827, - "grad_norm": 1.6975108379354062, - "learning_rate": 7.856341075473963e-06, - "loss": 0.7636, - "step": 963 - }, - { - "epoch": 0.3277796667800068, - "grad_norm": 1.7501297373086775, - "learning_rate": 7.851818823828828e-06, - "loss": 0.7754, - "step": 964 - }, - { - "epoch": 0.32811968718123086, - "grad_norm": 2.545140448497676, - "learning_rate": 7.847293111813276e-06, - "loss": 0.9082, - "step": 965 - }, - { - "epoch": 0.32845970758245496, - "grad_norm": 1.8008653754801884, - "learning_rate": 7.842763944918766e-06, - "loss": 0.83, - "step": 966 - }, - { - "epoch": 0.328799727983679, - "grad_norm": 2.6708500385698484, - "learning_rate": 7.838231328640945e-06, - "loss": 0.8698, - "step": 967 - }, - { - "epoch": 0.3291397483849031, - "grad_norm": 1.8002730480278057, - "learning_rate": 7.83369526847965e-06, - "loss": 0.8098, - "step": 968 - }, - { - "epoch": 0.32947976878612717, - "grad_norm": 2.032934392213337, - "learning_rate": 7.82915576993889e-06, - "loss": 0.841, - "step": 969 - }, - { - "epoch": 0.3298197891873512, - "grad_norm": 2.1268627260893473, - "learning_rate": 7.824612838526853e-06, - "loss": 0.8791, - "step": 970 - }, - { - "epoch": 0.3301598095885753, - "grad_norm": 1.9943092833782559, - "learning_rate": 7.82006647975589e-06, - "loss": 0.8746, - "step": 971 - }, - { - "epoch": 0.3304998299897994, - "grad_norm": 12.28140454443901, - "learning_rate": 7.81551669914251e-06, - "loss": 0.8952, - "step": 972 - }, - { - "epoch": 0.3308398503910235, - "grad_norm": 2.0328755642218055, - "learning_rate": 7.810963502207373e-06, - "loss": 0.7673, - "step": 973 - }, - { - "epoch": 0.33117987079224753, - "grad_norm": 1.7777944287029475, - "learning_rate": 7.806406894475286e-06, - "loss": 0.8826, - "step": 974 - }, - { - "epoch": 0.3315198911934716, - "grad_norm": 1.89053310342559, - "learning_rate": 7.801846881475199e-06, - "loss": 0.8305, - "step": 975 - }, - { - "epoch": 0.3318599115946957, - "grad_norm": 1.7916446125501533, - "learning_rate": 7.797283468740184e-06, - "loss": 0.7707, - "step": 976 - }, - { - "epoch": 0.33219993199591974, - "grad_norm": 1.583383697703338, - "learning_rate": 7.792716661807443e-06, - "loss": 0.7796, - "step": 977 - }, - { - "epoch": 0.33253995239714385, - "grad_norm": 2.4183910263443087, - "learning_rate": 7.788146466218301e-06, - "loss": 0.7304, - "step": 978 - }, - { - "epoch": 0.3328799727983679, - "grad_norm": 2.3658727843683907, - "learning_rate": 7.78357288751819e-06, - "loss": 0.7426, - "step": 979 - }, - { - "epoch": 0.333219993199592, - "grad_norm": 1.828116229828553, - "learning_rate": 7.778995931256646e-06, - "loss": 0.8078, - "step": 980 - }, - { - "epoch": 0.33356001360081605, - "grad_norm": 3.597726067768484, - "learning_rate": 7.774415602987304e-06, - "loss": 0.6857, - "step": 981 - }, - { - "epoch": 0.3339000340020401, - "grad_norm": 3.166303108924718, - "learning_rate": 7.769831908267896e-06, - "loss": 0.7904, - "step": 982 - }, - { - "epoch": 0.3342400544032642, - "grad_norm": 2.8933855627551304, - "learning_rate": 7.765244852660233e-06, - "loss": 0.8998, - "step": 983 - }, - { - "epoch": 0.33458007480448826, - "grad_norm": 1.9338110662323469, - "learning_rate": 7.760654441730202e-06, - "loss": 0.9007, - "step": 984 - }, - { - "epoch": 0.33492009520571236, - "grad_norm": 1.6591607534919344, - "learning_rate": 7.756060681047769e-06, - "loss": 0.8238, - "step": 985 - }, - { - "epoch": 0.3352601156069364, - "grad_norm": 3.243453226362474, - "learning_rate": 7.751463576186957e-06, - "loss": 0.7642, - "step": 986 - }, - { - "epoch": 0.33560013600816047, - "grad_norm": 2.7428152535139607, - "learning_rate": 7.746863132725856e-06, - "loss": 0.7282, - "step": 987 - }, - { - "epoch": 0.33594015640938457, - "grad_norm": 2.1236456873501144, - "learning_rate": 7.742259356246594e-06, - "loss": 0.7627, - "step": 988 - }, - { - "epoch": 0.3362801768106086, - "grad_norm": 1.6743079956506057, - "learning_rate": 7.737652252335356e-06, - "loss": 0.8406, - "step": 989 - }, - { - "epoch": 0.33662019721183273, - "grad_norm": 1.87268843836185, - "learning_rate": 7.733041826582357e-06, - "loss": 0.8455, - "step": 990 - }, - { - "epoch": 0.3369602176130568, - "grad_norm": 2.030664659551619, - "learning_rate": 7.728428084581844e-06, - "loss": 0.7965, - "step": 991 - }, - { - "epoch": 0.3373002380142809, - "grad_norm": 3.5752732157045215, - "learning_rate": 7.72381103193209e-06, - "loss": 0.7149, - "step": 992 - }, - { - "epoch": 0.33764025841550493, - "grad_norm": 2.8550170098863323, - "learning_rate": 7.719190674235383e-06, - "loss": 0.8308, - "step": 993 - }, - { - "epoch": 0.337980278816729, - "grad_norm": 2.590955515466377, - "learning_rate": 7.714567017098023e-06, - "loss": 0.902, - "step": 994 - }, - { - "epoch": 0.3383202992179531, - "grad_norm": 2.0307936229873467, - "learning_rate": 7.709940066130312e-06, - "loss": 0.8208, - "step": 995 - }, - { - "epoch": 0.33866031961917714, - "grad_norm": 1.4578787186534492, - "learning_rate": 7.705309826946547e-06, - "loss": 0.8051, - "step": 996 - }, - { - "epoch": 0.33900034002040125, - "grad_norm": 2.07437464133569, - "learning_rate": 7.70067630516502e-06, - "loss": 0.7707, - "step": 997 - }, - { - "epoch": 0.3393403604216253, - "grad_norm": 2.15530600462403, - "learning_rate": 7.696039506408001e-06, - "loss": 0.7745, - "step": 998 - }, - { - "epoch": 0.33968038082284935, - "grad_norm": 1.9609888444625139, - "learning_rate": 7.691399436301743e-06, - "loss": 0.7726, - "step": 999 - }, - { - "epoch": 0.34002040122407345, - "grad_norm": 1.7131939444884539, - "learning_rate": 7.686756100476458e-06, - "loss": 0.8546, - "step": 1000 - }, - { - "epoch": 0.3403604216252975, - "grad_norm": 2.0565210562712397, - "learning_rate": 7.68210950456633e-06, - "loss": 0.7028, - "step": 1001 - }, - { - "epoch": 0.3407004420265216, - "grad_norm": 2.1703917499812664, - "learning_rate": 7.677459654209493e-06, - "loss": 0.83, - "step": 1002 - }, - { - "epoch": 0.34104046242774566, - "grad_norm": 2.047040344782625, - "learning_rate": 7.672806555048034e-06, - "loss": 0.949, - "step": 1003 - }, - { - "epoch": 0.3413804828289697, - "grad_norm": 2.938822311667402, - "learning_rate": 7.66815021272798e-06, - "loss": 1.0131, - "step": 1004 - }, - { - "epoch": 0.3417205032301938, - "grad_norm": 2.5006641954997257, - "learning_rate": 7.663490632899293e-06, - "loss": 0.7971, - "step": 1005 - }, - { - "epoch": 0.34206052363141787, - "grad_norm": 2.1529578288278817, - "learning_rate": 7.658827821215863e-06, - "loss": 0.8715, - "step": 1006 - }, - { - "epoch": 0.342400544032642, - "grad_norm": 3.1033304500720442, - "learning_rate": 7.654161783335506e-06, - "loss": 0.7939, - "step": 1007 - }, - { - "epoch": 0.342740564433866, - "grad_norm": 2.484067747922095, - "learning_rate": 7.649492524919944e-06, - "loss": 0.8513, - "step": 1008 - }, - { - "epoch": 0.34308058483509013, - "grad_norm": 2.1827092693175256, - "learning_rate": 7.644820051634813e-06, - "loss": 0.8447, - "step": 1009 - }, - { - "epoch": 0.3434206052363142, - "grad_norm": 1.9307893267754808, - "learning_rate": 7.64014436914965e-06, - "loss": 0.7384, - "step": 1010 - }, - { - "epoch": 0.34376062563753823, - "grad_norm": 1.9144570087459527, - "learning_rate": 7.635465483137885e-06, - "loss": 0.8265, - "step": 1011 - }, - { - "epoch": 0.34410064603876234, - "grad_norm": 1.9292779787587635, - "learning_rate": 7.63078339927683e-06, - "loss": 0.7725, - "step": 1012 - }, - { - "epoch": 0.3444406664399864, - "grad_norm": 2.532262819572805, - "learning_rate": 7.626098123247691e-06, - "loss": 0.8484, - "step": 1013 - }, - { - "epoch": 0.3447806868412105, - "grad_norm": 1.516985451302082, - "learning_rate": 7.621409660735531e-06, - "loss": 0.7265, - "step": 1014 - }, - { - "epoch": 0.34512070724243454, - "grad_norm": 2.4387700274364335, - "learning_rate": 7.616718017429288e-06, - "loss": 0.8759, - "step": 1015 - }, - { - "epoch": 0.3454607276436586, - "grad_norm": 1.7038256750677205, - "learning_rate": 7.612023199021759e-06, - "loss": 0.8983, - "step": 1016 - }, - { - "epoch": 0.3458007480448827, - "grad_norm": 1.915028296278682, - "learning_rate": 7.607325211209593e-06, - "loss": 0.8207, - "step": 1017 - }, - { - "epoch": 0.34614076844610675, - "grad_norm": 5.327535008237301, - "learning_rate": 7.6026240596932854e-06, - "loss": 0.8199, - "step": 1018 - }, - { - "epoch": 0.34648078884733086, - "grad_norm": 2.0485629266374596, - "learning_rate": 7.597919750177168e-06, - "loss": 0.7972, - "step": 1019 - }, - { - "epoch": 0.3468208092485549, - "grad_norm": 2.5900475236404215, - "learning_rate": 7.593212288369408e-06, - "loss": 0.8245, - "step": 1020 - }, - { - "epoch": 0.347160829649779, - "grad_norm": 1.71454309402805, - "learning_rate": 7.588501679981997e-06, - "loss": 0.7877, - "step": 1021 - }, - { - "epoch": 0.34750085005100306, - "grad_norm": 1.7173724399881714, - "learning_rate": 7.583787930730737e-06, - "loss": 0.8098, - "step": 1022 - }, - { - "epoch": 0.3478408704522271, - "grad_norm": 1.3352281724662016, - "learning_rate": 7.579071046335256e-06, - "loss": 0.8151, - "step": 1023 - }, - { - "epoch": 0.3481808908534512, - "grad_norm": 2.2809433135453565, - "learning_rate": 7.57435103251897e-06, - "loss": 0.8194, - "step": 1024 - }, - { - "epoch": 0.34852091125467527, - "grad_norm": 1.776619177140561, - "learning_rate": 7.569627895009104e-06, - "loss": 0.7774, - "step": 1025 - }, - { - "epoch": 0.3488609316558994, - "grad_norm": 1.8490485181067193, - "learning_rate": 7.564901639536671e-06, - "loss": 0.7279, - "step": 1026 - }, - { - "epoch": 0.3492009520571234, - "grad_norm": 1.9994188958218952, - "learning_rate": 7.560172271836459e-06, - "loss": 0.8714, - "step": 1027 - }, - { - "epoch": 0.3495409724583475, - "grad_norm": 2.4785018885474903, - "learning_rate": 7.555439797647044e-06, - "loss": 0.7691, - "step": 1028 - }, - { - "epoch": 0.3498809928595716, - "grad_norm": 2.3412120228462427, - "learning_rate": 7.5507042227107655e-06, - "loss": 0.7584, - "step": 1029 - }, - { - "epoch": 0.35022101326079563, - "grad_norm": 2.165022439020042, - "learning_rate": 7.545965552773724e-06, - "loss": 0.8901, - "step": 1030 - }, - { - "epoch": 0.35056103366201974, - "grad_norm": 2.006216307009843, - "learning_rate": 7.54122379358578e-06, - "loss": 0.9007, - "step": 1031 - }, - { - "epoch": 0.3509010540632438, - "grad_norm": 4.543392686075566, - "learning_rate": 7.536478950900537e-06, - "loss": 0.8423, - "step": 1032 - }, - { - "epoch": 0.3512410744644679, - "grad_norm": 1.976663114216136, - "learning_rate": 7.531731030475345e-06, - "loss": 0.8405, - "step": 1033 - }, - { - "epoch": 0.35158109486569195, - "grad_norm": 2.117430140673597, - "learning_rate": 7.526980038071288e-06, - "loss": 0.8223, - "step": 1034 - }, - { - "epoch": 0.351921115266916, - "grad_norm": 1.9728797359425245, - "learning_rate": 7.52222597945317e-06, - "loss": 0.9271, - "step": 1035 - }, - { - "epoch": 0.3522611356681401, - "grad_norm": 3.175594333388401, - "learning_rate": 7.517468860389528e-06, - "loss": 0.6723, - "step": 1036 - }, - { - "epoch": 0.35260115606936415, - "grad_norm": 1.9729371741404098, - "learning_rate": 7.512708686652603e-06, - "loss": 0.8227, - "step": 1037 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 1.4699410732834897, - "learning_rate": 7.507945464018347e-06, - "loss": 0.85, - "step": 1038 - }, - { - "epoch": 0.3532811968718123, - "grad_norm": 2.4025141879021743, - "learning_rate": 7.5031791982664085e-06, - "loss": 0.8379, - "step": 1039 - }, - { - "epoch": 0.35362121727303636, - "grad_norm": 1.5826438634608264, - "learning_rate": 7.49840989518013e-06, - "loss": 0.8426, - "step": 1040 - }, - { - "epoch": 0.35396123767426046, - "grad_norm": 1.8143996630470258, - "learning_rate": 7.493637560546539e-06, - "loss": 0.8218, - "step": 1041 - }, - { - "epoch": 0.3543012580754845, - "grad_norm": 2.9819746909856875, - "learning_rate": 7.488862200156345e-06, - "loss": 0.8116, - "step": 1042 - }, - { - "epoch": 0.3546412784767086, - "grad_norm": 1.6025956087863886, - "learning_rate": 7.484083819803922e-06, - "loss": 0.8996, - "step": 1043 - }, - { - "epoch": 0.35498129887793267, - "grad_norm": 2.2569463838656394, - "learning_rate": 7.479302425287314e-06, - "loss": 0.793, - "step": 1044 - }, - { - "epoch": 0.3553213192791567, - "grad_norm": 1.7640317823457161, - "learning_rate": 7.4745180224082205e-06, - "loss": 0.9197, - "step": 1045 - }, - { - "epoch": 0.35566133968038083, - "grad_norm": 2.0978448016759796, - "learning_rate": 7.469730616971992e-06, - "loss": 0.8864, - "step": 1046 - }, - { - "epoch": 0.3560013600816049, - "grad_norm": 1.815417919798777, - "learning_rate": 7.464940214787622e-06, - "loss": 0.8489, - "step": 1047 - }, - { - "epoch": 0.356341380482829, - "grad_norm": 2.5179528675012306, - "learning_rate": 7.4601468216677375e-06, - "loss": 0.7985, - "step": 1048 - }, - { - "epoch": 0.35668140088405303, - "grad_norm": 2.0737400366436978, - "learning_rate": 7.455350443428598e-06, - "loss": 0.8621, - "step": 1049 - }, - { - "epoch": 0.35702142128527714, - "grad_norm": 1.9500143741002205, - "learning_rate": 7.450551085890087e-06, - "loss": 0.7252, - "step": 1050 - }, - { - "epoch": 0.3573614416865012, - "grad_norm": 2.487299951341808, - "learning_rate": 7.445748754875697e-06, - "loss": 0.8219, - "step": 1051 - }, - { - "epoch": 0.35770146208772524, - "grad_norm": 1.9293676887234388, - "learning_rate": 7.440943456212533e-06, - "loss": 0.8055, - "step": 1052 - }, - { - "epoch": 0.35804148248894935, - "grad_norm": 1.630631399721771, - "learning_rate": 7.4361351957313e-06, - "loss": 0.8086, - "step": 1053 - }, - { - "epoch": 0.3583815028901734, - "grad_norm": 1.993396157634338, - "learning_rate": 7.431323979266296e-06, - "loss": 0.8358, - "step": 1054 - }, - { - "epoch": 0.3587215232913975, - "grad_norm": 2.1096399659613434, - "learning_rate": 7.4265098126554065e-06, - "loss": 0.6622, - "step": 1055 - }, - { - "epoch": 0.35906154369262155, - "grad_norm": 2.0957274147043568, - "learning_rate": 7.421692701740095e-06, - "loss": 0.8535, - "step": 1056 - }, - { - "epoch": 0.3594015640938456, - "grad_norm": 2.280331567819238, - "learning_rate": 7.416872652365401e-06, - "loss": 0.7613, - "step": 1057 - }, - { - "epoch": 0.3597415844950697, - "grad_norm": 1.7088762504236967, - "learning_rate": 7.412049670379927e-06, - "loss": 0.8598, - "step": 1058 - }, - { - "epoch": 0.36008160489629376, - "grad_norm": 1.8455695752782821, - "learning_rate": 7.4072237616358356e-06, - "loss": 0.9097, - "step": 1059 - }, - { - "epoch": 0.36042162529751787, - "grad_norm": 2.4513346866686483, - "learning_rate": 7.402394931988836e-06, - "loss": 0.8521, - "step": 1060 - }, - { - "epoch": 0.3607616456987419, - "grad_norm": 1.7993496451007047, - "learning_rate": 7.397563187298189e-06, - "loss": 0.8043, - "step": 1061 - }, - { - "epoch": 0.361101666099966, - "grad_norm": 2.5033019796675777, - "learning_rate": 7.392728533426687e-06, - "loss": 0.6882, - "step": 1062 - }, - { - "epoch": 0.3614416865011901, - "grad_norm": 1.6959114046452552, - "learning_rate": 7.387890976240655e-06, - "loss": 0.7466, - "step": 1063 - }, - { - "epoch": 0.3617817069024141, - "grad_norm": 1.7439198675611804, - "learning_rate": 7.383050521609938e-06, - "loss": 0.8818, - "step": 1064 - }, - { - "epoch": 0.36212172730363823, - "grad_norm": 2.053598063714424, - "learning_rate": 7.378207175407899e-06, - "loss": 0.7483, - "step": 1065 - }, - { - "epoch": 0.3624617477048623, - "grad_norm": 2.781756547823472, - "learning_rate": 7.3733609435114096e-06, - "loss": 0.8445, - "step": 1066 - }, - { - "epoch": 0.3628017681060864, - "grad_norm": 2.09779855425177, - "learning_rate": 7.368511831800844e-06, - "loss": 0.851, - "step": 1067 - }, - { - "epoch": 0.36314178850731044, - "grad_norm": 3.020910053469312, - "learning_rate": 7.363659846160066e-06, - "loss": 0.8892, - "step": 1068 - }, - { - "epoch": 0.3634818089085345, - "grad_norm": 2.4787490690903944, - "learning_rate": 7.358804992476432e-06, - "loss": 0.8432, - "step": 1069 - }, - { - "epoch": 0.3638218293097586, - "grad_norm": 2.8989448273348573, - "learning_rate": 7.353947276640776e-06, - "loss": 0.6853, - "step": 1070 - }, - { - "epoch": 0.36416184971098264, - "grad_norm": 1.726759255900026, - "learning_rate": 7.349086704547408e-06, - "loss": 0.8831, - "step": 1071 - }, - { - "epoch": 0.36450187011220675, - "grad_norm": 1.8399420040429915, - "learning_rate": 7.344223282094095e-06, - "loss": 0.7882, - "step": 1072 - }, - { - "epoch": 0.3648418905134308, - "grad_norm": 1.806073228885492, - "learning_rate": 7.3393570151820714e-06, - "loss": 0.8412, - "step": 1073 - }, - { - "epoch": 0.3651819109146549, - "grad_norm": 1.6710296660184638, - "learning_rate": 7.334487909716021e-06, - "loss": 0.9138, - "step": 1074 - }, - { - "epoch": 0.36552193131587896, - "grad_norm": 2.1624300932227682, - "learning_rate": 7.329615971604071e-06, - "loss": 0.8227, - "step": 1075 - }, - { - "epoch": 0.365861951717103, - "grad_norm": 1.7009224007949533, - "learning_rate": 7.324741206757785e-06, - "loss": 0.8129, - "step": 1076 - }, - { - "epoch": 0.3662019721183271, - "grad_norm": 2.408114357062906, - "learning_rate": 7.3198636210921556e-06, - "loss": 0.8746, - "step": 1077 - }, - { - "epoch": 0.36654199251955116, - "grad_norm": 1.963920816993082, - "learning_rate": 7.314983220525604e-06, - "loss": 0.8986, - "step": 1078 - }, - { - "epoch": 0.36688201292077527, - "grad_norm": 2.625997972868134, - "learning_rate": 7.3101000109799616e-06, - "loss": 0.8639, - "step": 1079 - }, - { - "epoch": 0.3672220333219993, - "grad_norm": 2.089484971251662, - "learning_rate": 7.305213998380466e-06, - "loss": 0.9753, - "step": 1080 - }, - { - "epoch": 0.36756205372322337, - "grad_norm": 1.879765672023159, - "learning_rate": 7.300325188655762e-06, - "loss": 0.8868, - "step": 1081 - }, - { - "epoch": 0.3679020741244475, - "grad_norm": 2.742242310525079, - "learning_rate": 7.295433587737885e-06, - "loss": 0.8266, - "step": 1082 - }, - { - "epoch": 0.3682420945256715, - "grad_norm": 2.110886719188207, - "learning_rate": 7.29053920156226e-06, - "loss": 0.8568, - "step": 1083 - }, - { - "epoch": 0.36858211492689563, - "grad_norm": 1.6424790092242403, - "learning_rate": 7.285642036067687e-06, - "loss": 0.803, - "step": 1084 - }, - { - "epoch": 0.3689221353281197, - "grad_norm": 1.6786948404232462, - "learning_rate": 7.280742097196342e-06, - "loss": 0.7703, - "step": 1085 - }, - { - "epoch": 0.36926215572934373, - "grad_norm": 3.0606693350336154, - "learning_rate": 7.275839390893766e-06, - "loss": 0.7358, - "step": 1086 - }, - { - "epoch": 0.36960217613056784, - "grad_norm": 2.1162203963053146, - "learning_rate": 7.270933923108857e-06, - "loss": 0.7855, - "step": 1087 - }, - { - "epoch": 0.3699421965317919, - "grad_norm": 1.8016862422684354, - "learning_rate": 7.2660256997938635e-06, - "loss": 0.8466, - "step": 1088 - }, - { - "epoch": 0.370282216933016, - "grad_norm": 1.6606430113607151, - "learning_rate": 7.261114726904379e-06, - "loss": 0.8152, - "step": 1089 - }, - { - "epoch": 0.37062223733424005, - "grad_norm": 2.761851717407974, - "learning_rate": 7.25620101039933e-06, - "loss": 0.9042, - "step": 1090 - }, - { - "epoch": 0.37096225773546415, - "grad_norm": 1.9771042284445832, - "learning_rate": 7.2512845562409764e-06, - "loss": 0.7056, - "step": 1091 - }, - { - "epoch": 0.3713022781366882, - "grad_norm": 1.653746373879253, - "learning_rate": 7.246365370394896e-06, - "loss": 0.8067, - "step": 1092 - }, - { - "epoch": 0.37164229853791225, - "grad_norm": 1.8811326242800515, - "learning_rate": 7.241443458829985e-06, - "loss": 0.9338, - "step": 1093 - }, - { - "epoch": 0.37198231893913636, - "grad_norm": 2.7501158364403637, - "learning_rate": 7.236518827518444e-06, - "loss": 0.8809, - "step": 1094 - }, - { - "epoch": 0.3723223393403604, - "grad_norm": 2.5384080733406074, - "learning_rate": 7.231591482435777e-06, - "loss": 0.7763, - "step": 1095 - }, - { - "epoch": 0.3726623597415845, - "grad_norm": 2.228534473485439, - "learning_rate": 7.226661429560776e-06, - "loss": 0.7819, - "step": 1096 - }, - { - "epoch": 0.37300238014280857, - "grad_norm": 1.9926490104103836, - "learning_rate": 7.221728674875522e-06, - "loss": 0.886, - "step": 1097 - }, - { - "epoch": 0.3733424005440326, - "grad_norm": 2.362100344429819, - "learning_rate": 7.216793224365373e-06, - "loss": 0.8621, - "step": 1098 - }, - { - "epoch": 0.3736824209452567, - "grad_norm": 2.0797006061886103, - "learning_rate": 7.2118550840189605e-06, - "loss": 0.8351, - "step": 1099 - }, - { - "epoch": 0.37402244134648077, - "grad_norm": 1.9013645029122852, - "learning_rate": 7.206914259828177e-06, - "loss": 0.7349, - "step": 1100 - }, - { - "epoch": 0.3743624617477049, - "grad_norm": 2.096526511001182, - "learning_rate": 7.201970757788172e-06, - "loss": 0.8096, - "step": 1101 - }, - { - "epoch": 0.37470248214892893, - "grad_norm": 1.9487792698034627, - "learning_rate": 7.197024583897345e-06, - "loss": 0.7688, - "step": 1102 - }, - { - "epoch": 0.37504250255015303, - "grad_norm": 1.7616136379351248, - "learning_rate": 7.19207574415734e-06, - "loss": 0.877, - "step": 1103 - }, - { - "epoch": 0.3753825229513771, - "grad_norm": 2.216518466261683, - "learning_rate": 7.187124244573029e-06, - "loss": 0.7425, - "step": 1104 - }, - { - "epoch": 0.37572254335260113, - "grad_norm": 1.9420595220834904, - "learning_rate": 7.182170091152518e-06, - "loss": 0.8859, - "step": 1105 - }, - { - "epoch": 0.37606256375382524, - "grad_norm": 3.1843147726618892, - "learning_rate": 7.17721328990713e-06, - "loss": 0.7846, - "step": 1106 - }, - { - "epoch": 0.3764025841550493, - "grad_norm": 3.5406950144124134, - "learning_rate": 7.1722538468514015e-06, - "loss": 0.7288, - "step": 1107 - }, - { - "epoch": 0.3767426045562734, - "grad_norm": 1.9027244103388907, - "learning_rate": 7.167291768003075e-06, - "loss": 0.8369, - "step": 1108 - }, - { - "epoch": 0.37708262495749745, - "grad_norm": 2.229506384560749, - "learning_rate": 7.162327059383089e-06, - "loss": 0.8006, - "step": 1109 - }, - { - "epoch": 0.3774226453587215, - "grad_norm": 2.483789935208878, - "learning_rate": 7.157359727015578e-06, - "loss": 0.8531, - "step": 1110 - }, - { - "epoch": 0.3777626657599456, - "grad_norm": 2.0952967393905597, - "learning_rate": 7.152389776927855e-06, - "loss": 0.7728, - "step": 1111 - }, - { - "epoch": 0.37810268616116965, - "grad_norm": 2.8696175226987135, - "learning_rate": 7.147417215150411e-06, - "loss": 0.8312, - "step": 1112 - }, - { - "epoch": 0.37844270656239376, - "grad_norm": 2.0770110520679115, - "learning_rate": 7.142442047716905e-06, - "loss": 0.7291, - "step": 1113 - }, - { - "epoch": 0.3787827269636178, - "grad_norm": 4.9493875756484345, - "learning_rate": 7.13746428066416e-06, - "loss": 0.8679, - "step": 1114 - }, - { - "epoch": 0.3791227473648419, - "grad_norm": 1.4736356648123277, - "learning_rate": 7.132483920032154e-06, - "loss": 0.8668, - "step": 1115 - }, - { - "epoch": 0.37946276776606597, - "grad_norm": 2.1871010916988407, - "learning_rate": 7.127500971864008e-06, - "loss": 0.8253, - "step": 1116 - }, - { - "epoch": 0.37980278816729, - "grad_norm": 2.7048260870598395, - "learning_rate": 7.122515442205985e-06, - "loss": 0.8072, - "step": 1117 - }, - { - "epoch": 0.3801428085685141, - "grad_norm": 1.6568599508148707, - "learning_rate": 7.117527337107481e-06, - "loss": 0.883, - "step": 1118 - }, - { - "epoch": 0.3804828289697382, - "grad_norm": 1.7744379748328984, - "learning_rate": 7.112536662621017e-06, - "loss": 0.8028, - "step": 1119 - }, - { - "epoch": 0.3808228493709623, - "grad_norm": 1.9075136504794952, - "learning_rate": 7.10754342480223e-06, - "loss": 0.7285, - "step": 1120 - }, - { - "epoch": 0.38116286977218633, - "grad_norm": 1.9011664282216376, - "learning_rate": 7.102547629709867e-06, - "loss": 0.8961, - "step": 1121 - }, - { - "epoch": 0.3815028901734104, - "grad_norm": 3.3324355205350096, - "learning_rate": 7.097549283405782e-06, - "loss": 0.8518, - "step": 1122 - }, - { - "epoch": 0.3818429105746345, - "grad_norm": 1.7672003604856446, - "learning_rate": 7.092548391954919e-06, - "loss": 0.8808, - "step": 1123 - }, - { - "epoch": 0.38218293097585854, - "grad_norm": 2.013141247061694, - "learning_rate": 7.087544961425317e-06, - "loss": 0.725, - "step": 1124 - }, - { - "epoch": 0.38252295137708264, - "grad_norm": 1.985634648817913, - "learning_rate": 7.082538997888087e-06, - "loss": 0.75, - "step": 1125 - }, - { - "epoch": 0.3828629717783067, - "grad_norm": 1.9538380256881642, - "learning_rate": 7.077530507417423e-06, - "loss": 0.7453, - "step": 1126 - }, - { - "epoch": 0.3832029921795308, - "grad_norm": 1.4275684400693776, - "learning_rate": 7.07251949609058e-06, - "loss": 0.8418, - "step": 1127 - }, - { - "epoch": 0.38354301258075485, - "grad_norm": 1.876464428838775, - "learning_rate": 7.067505969987869e-06, - "loss": 0.8856, - "step": 1128 - }, - { - "epoch": 0.3838830329819789, - "grad_norm": 2.4676650596958134, - "learning_rate": 7.06248993519266e-06, - "loss": 0.7442, - "step": 1129 - }, - { - "epoch": 0.384223053383203, - "grad_norm": 1.9772400387901186, - "learning_rate": 7.05747139779136e-06, - "loss": 0.7476, - "step": 1130 - }, - { - "epoch": 0.38456307378442706, - "grad_norm": 1.7114896839198552, - "learning_rate": 7.0524503638734175e-06, - "loss": 0.7586, - "step": 1131 - }, - { - "epoch": 0.38490309418565116, - "grad_norm": 1.979719268713037, - "learning_rate": 7.047426839531308e-06, - "loss": 0.7806, - "step": 1132 - }, - { - "epoch": 0.3852431145868752, - "grad_norm": 2.573830551015435, - "learning_rate": 7.04240083086053e-06, - "loss": 0.804, - "step": 1133 - }, - { - "epoch": 0.38558313498809926, - "grad_norm": 2.0092570242754286, - "learning_rate": 7.037372343959592e-06, - "loss": 0.784, - "step": 1134 - }, - { - "epoch": 0.38592315538932337, - "grad_norm": 1.7021704495221963, - "learning_rate": 7.032341384930018e-06, - "loss": 0.8437, - "step": 1135 - }, - { - "epoch": 0.3862631757905474, - "grad_norm": 2.005711746099124, - "learning_rate": 7.0273079598763236e-06, - "loss": 0.8695, - "step": 1136 - }, - { - "epoch": 0.3866031961917715, - "grad_norm": 3.7689109362135897, - "learning_rate": 7.022272074906021e-06, - "loss": 0.8509, - "step": 1137 - }, - { - "epoch": 0.3869432165929956, - "grad_norm": 2.040973796481224, - "learning_rate": 7.017233736129606e-06, - "loss": 0.7938, - "step": 1138 - }, - { - "epoch": 0.3872832369942196, - "grad_norm": 1.6275779088061617, - "learning_rate": 7.012192949660552e-06, - "loss": 0.7431, - "step": 1139 - }, - { - "epoch": 0.38762325739544373, - "grad_norm": 3.289438767702211, - "learning_rate": 7.007149721615303e-06, - "loss": 0.8054, - "step": 1140 - }, - { - "epoch": 0.3879632777966678, - "grad_norm": 1.5783269725777442, - "learning_rate": 7.002104058113264e-06, - "loss": 0.7602, - "step": 1141 - }, - { - "epoch": 0.3883032981978919, - "grad_norm": 2.401935957785332, - "learning_rate": 6.997055965276796e-06, - "loss": 0.8494, - "step": 1142 - }, - { - "epoch": 0.38864331859911594, - "grad_norm": 2.2155438256160447, - "learning_rate": 6.9920054492312086e-06, - "loss": 0.8322, - "step": 1143 - }, - { - "epoch": 0.38898333900034004, - "grad_norm": 2.5303097895478426, - "learning_rate": 6.98695251610475e-06, - "loss": 0.7219, - "step": 1144 - }, - { - "epoch": 0.3893233594015641, - "grad_norm": 1.9492455847007166, - "learning_rate": 6.981897172028605e-06, - "loss": 0.7452, - "step": 1145 - }, - { - "epoch": 0.38966337980278815, - "grad_norm": 1.8888613087300397, - "learning_rate": 6.9768394231368765e-06, - "loss": 0.7079, - "step": 1146 - }, - { - "epoch": 0.39000340020401225, - "grad_norm": 1.8614707671392436, - "learning_rate": 6.971779275566593e-06, - "loss": 0.8869, - "step": 1147 - }, - { - "epoch": 0.3903434206052363, - "grad_norm": 2.196139754184654, - "learning_rate": 6.96671673545769e-06, - "loss": 0.8182, - "step": 1148 - }, - { - "epoch": 0.3906834410064604, - "grad_norm": 2.015645605594665, - "learning_rate": 6.961651808953008e-06, - "loss": 0.788, - "step": 1149 - }, - { - "epoch": 0.39102346140768446, - "grad_norm": 2.118358018353812, - "learning_rate": 6.956584502198278e-06, - "loss": 0.7944, - "step": 1150 - }, - { - "epoch": 0.3913634818089085, - "grad_norm": 2.5297802062767505, - "learning_rate": 6.9515148213421265e-06, - "loss": 0.7594, - "step": 1151 - }, - { - "epoch": 0.3917035022101326, - "grad_norm": 1.7235690561643628, - "learning_rate": 6.946442772536055e-06, - "loss": 0.8006, - "step": 1152 - }, - { - "epoch": 0.39204352261135667, - "grad_norm": 2.0605815364417723, - "learning_rate": 6.941368361934442e-06, - "loss": 0.7571, - "step": 1153 - }, - { - "epoch": 0.39238354301258077, - "grad_norm": 1.802107862787346, - "learning_rate": 6.9362915956945264e-06, - "loss": 0.7694, - "step": 1154 - }, - { - "epoch": 0.3927235634138048, - "grad_norm": 3.0394698418460373, - "learning_rate": 6.931212479976413e-06, - "loss": 0.8776, - "step": 1155 - }, - { - "epoch": 0.3930635838150289, - "grad_norm": 2.344468637665646, - "learning_rate": 6.9261310209430525e-06, - "loss": 0.7557, - "step": 1156 - }, - { - "epoch": 0.393403604216253, - "grad_norm": 2.1496119244458693, - "learning_rate": 6.921047224760239e-06, - "loss": 0.8703, - "step": 1157 - }, - { - "epoch": 0.39374362461747703, - "grad_norm": 1.8022051744909748, - "learning_rate": 6.9159610975966044e-06, - "loss": 0.9033, - "step": 1158 - }, - { - "epoch": 0.39408364501870113, - "grad_norm": 1.7062404002111817, - "learning_rate": 6.910872645623608e-06, - "loss": 0.8358, - "step": 1159 - }, - { - "epoch": 0.3944236654199252, - "grad_norm": 2.807991980988546, - "learning_rate": 6.905781875015529e-06, - "loss": 0.732, - "step": 1160 - }, - { - "epoch": 0.3947636858211493, - "grad_norm": 2.010080594688907, - "learning_rate": 6.900688791949463e-06, - "loss": 0.8275, - "step": 1161 - }, - { - "epoch": 0.39510370622237334, - "grad_norm": 2.543059620006072, - "learning_rate": 6.895593402605308e-06, - "loss": 0.8672, - "step": 1162 - }, - { - "epoch": 0.3954437266235974, - "grad_norm": 2.323489162154479, - "learning_rate": 6.890495713165761e-06, - "loss": 0.8112, - "step": 1163 - }, - { - "epoch": 0.3957837470248215, - "grad_norm": 2.181543784406478, - "learning_rate": 6.885395729816313e-06, - "loss": 0.9508, - "step": 1164 - }, - { - "epoch": 0.39612376742604555, - "grad_norm": 2.226593260830025, - "learning_rate": 6.880293458745237e-06, - "loss": 0.6444, - "step": 1165 - }, - { - "epoch": 0.39646378782726965, - "grad_norm": 1.634156363859306, - "learning_rate": 6.87518890614358e-06, - "loss": 0.8444, - "step": 1166 - }, - { - "epoch": 0.3968038082284937, - "grad_norm": 2.5459595291596493, - "learning_rate": 6.870082078205158e-06, - "loss": 0.8996, - "step": 1167 - }, - { - "epoch": 0.3971438286297178, - "grad_norm": 3.9234543111468567, - "learning_rate": 6.86497298112655e-06, - "loss": 0.9022, - "step": 1168 - }, - { - "epoch": 0.39748384903094186, - "grad_norm": 1.9173410435574607, - "learning_rate": 6.859861621107084e-06, - "loss": 0.8068, - "step": 1169 - }, - { - "epoch": 0.3978238694321659, - "grad_norm": 3.035334796056879, - "learning_rate": 6.85474800434884e-06, - "loss": 0.8787, - "step": 1170 - }, - { - "epoch": 0.39816388983339, - "grad_norm": 1.996336800913808, - "learning_rate": 6.849632137056631e-06, - "loss": 0.8218, - "step": 1171 - }, - { - "epoch": 0.39850391023461407, - "grad_norm": 2.4997036230867877, - "learning_rate": 6.844514025438003e-06, - "loss": 0.8944, - "step": 1172 - }, - { - "epoch": 0.3988439306358382, - "grad_norm": 2.058620980233996, - "learning_rate": 6.8393936757032255e-06, - "loss": 0.8202, - "step": 1173 - }, - { - "epoch": 0.3991839510370622, - "grad_norm": 2.0680642784567764, - "learning_rate": 6.834271094065284e-06, - "loss": 0.8465, - "step": 1174 - }, - { - "epoch": 0.3995239714382863, - "grad_norm": 1.8727199079644916, - "learning_rate": 6.82914628673987e-06, - "loss": 0.8527, - "step": 1175 - }, - { - "epoch": 0.3998639918395104, - "grad_norm": 1.590081791651527, - "learning_rate": 6.824019259945376e-06, - "loss": 0.8613, - "step": 1176 - }, - { - "epoch": 0.40020401224073443, - "grad_norm": 2.0260284044840278, - "learning_rate": 6.818890019902891e-06, - "loss": 0.7465, - "step": 1177 - }, - { - "epoch": 0.40054403264195854, - "grad_norm": 1.8398479387280182, - "learning_rate": 6.813758572836187e-06, - "loss": 0.7806, - "step": 1178 - }, - { - "epoch": 0.4008840530431826, - "grad_norm": 2.105917280765373, - "learning_rate": 6.808624924971711e-06, - "loss": 0.7141, - "step": 1179 - }, - { - "epoch": 0.40122407344440664, - "grad_norm": 1.9634355488466153, - "learning_rate": 6.803489082538586e-06, - "loss": 0.8055, - "step": 1180 - }, - { - "epoch": 0.40156409384563074, - "grad_norm": 1.8526489425120056, - "learning_rate": 6.798351051768597e-06, - "loss": 0.8832, - "step": 1181 - }, - { - "epoch": 0.4019041142468548, - "grad_norm": 2.3356716947930316, - "learning_rate": 6.79321083889618e-06, - "loss": 0.7484, - "step": 1182 - }, - { - "epoch": 0.4022441346480789, - "grad_norm": 2.0837152056282964, - "learning_rate": 6.788068450158422e-06, - "loss": 0.718, - "step": 1183 - }, - { - "epoch": 0.40258415504930295, - "grad_norm": 1.8999186943331179, - "learning_rate": 6.78292389179505e-06, - "loss": 0.7811, - "step": 1184 - }, - { - "epoch": 0.40292417545052706, - "grad_norm": 1.6658111499434904, - "learning_rate": 6.777777170048423e-06, - "loss": 0.8201, - "step": 1185 - }, - { - "epoch": 0.4032641958517511, - "grad_norm": 1.680917916696707, - "learning_rate": 6.772628291163527e-06, - "loss": 0.807, - "step": 1186 - }, - { - "epoch": 0.40360421625297516, - "grad_norm": 1.7407790818800217, - "learning_rate": 6.76747726138796e-06, - "loss": 0.8313, - "step": 1187 - }, - { - "epoch": 0.40394423665419926, - "grad_norm": 1.5617043074412897, - "learning_rate": 6.762324086971936e-06, - "loss": 0.9455, - "step": 1188 - }, - { - "epoch": 0.4042842570554233, - "grad_norm": 4.146596153131383, - "learning_rate": 6.75716877416827e-06, - "loss": 0.7997, - "step": 1189 - }, - { - "epoch": 0.4046242774566474, - "grad_norm": 2.0617653843196884, - "learning_rate": 6.752011329232369e-06, - "loss": 0.8153, - "step": 1190 - }, - { - "epoch": 0.40496429785787147, - "grad_norm": 2.076737017009885, - "learning_rate": 6.746851758422228e-06, - "loss": 0.8002, - "step": 1191 - }, - { - "epoch": 0.4053043182590955, - "grad_norm": 1.813071998464279, - "learning_rate": 6.741690067998423e-06, - "loss": 0.8347, - "step": 1192 - }, - { - "epoch": 0.4056443386603196, - "grad_norm": 1.9981326256931067, - "learning_rate": 6.736526264224101e-06, - "loss": 0.9294, - "step": 1193 - }, - { - "epoch": 0.4059843590615437, - "grad_norm": 1.8827868546011934, - "learning_rate": 6.731360353364975e-06, - "loss": 0.867, - "step": 1194 - }, - { - "epoch": 0.4063243794627678, - "grad_norm": 1.8243418052617972, - "learning_rate": 6.726192341689311e-06, - "loss": 0.8223, - "step": 1195 - }, - { - "epoch": 0.40666439986399183, - "grad_norm": 1.770350872789149, - "learning_rate": 6.721022235467926e-06, - "loss": 0.8619, - "step": 1196 - }, - { - "epoch": 0.40700442026521594, - "grad_norm": 1.8639174786964454, - "learning_rate": 6.7158500409741815e-06, - "loss": 0.9201, - "step": 1197 - }, - { - "epoch": 0.40734444066644, - "grad_norm": 2.0254058760681803, - "learning_rate": 6.710675764483968e-06, - "loss": 0.7695, - "step": 1198 - }, - { - "epoch": 0.40768446106766404, - "grad_norm": 1.760831957768078, - "learning_rate": 6.7054994122757046e-06, - "loss": 0.819, - "step": 1199 - }, - { - "epoch": 0.40802448146888814, - "grad_norm": 2.3849603429891997, - "learning_rate": 6.700320990630329e-06, - "loss": 0.8816, - "step": 1200 - }, - { - "epoch": 0.4083645018701122, - "grad_norm": 3.24791611718312, - "learning_rate": 6.69514050583129e-06, - "loss": 0.9234, - "step": 1201 - }, - { - "epoch": 0.4087045222713363, - "grad_norm": 1.7471455004845486, - "learning_rate": 6.689957964164539e-06, - "loss": 0.7623, - "step": 1202 - }, - { - "epoch": 0.40904454267256035, - "grad_norm": 2.785514967554598, - "learning_rate": 6.684773371918526e-06, - "loss": 0.7937, - "step": 1203 - }, - { - "epoch": 0.4093845630737844, - "grad_norm": 1.8030855427647954, - "learning_rate": 6.679586735384184e-06, - "loss": 0.8442, - "step": 1204 - }, - { - "epoch": 0.4097245834750085, - "grad_norm": 3.721825112571208, - "learning_rate": 6.674398060854931e-06, - "loss": 0.7539, - "step": 1205 - }, - { - "epoch": 0.41006460387623256, - "grad_norm": 1.80798373103189, - "learning_rate": 6.669207354626657e-06, - "loss": 0.8992, - "step": 1206 - }, - { - "epoch": 0.41040462427745666, - "grad_norm": 2.0935781969101352, - "learning_rate": 6.664014622997717e-06, - "loss": 0.8665, - "step": 1207 - }, - { - "epoch": 0.4107446446786807, - "grad_norm": 1.77781409293517, - "learning_rate": 6.65881987226892e-06, - "loss": 0.9314, - "step": 1208 - }, - { - "epoch": 0.4110846650799048, - "grad_norm": 2.0253755293143105, - "learning_rate": 6.65362310874353e-06, - "loss": 0.8807, - "step": 1209 - }, - { - "epoch": 0.41142468548112887, - "grad_norm": 3.8985316382602444, - "learning_rate": 6.648424338727254e-06, - "loss": 0.7557, - "step": 1210 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 2.9177556160015317, - "learning_rate": 6.643223568528228e-06, - "loss": 0.7773, - "step": 1211 - }, - { - "epoch": 0.412104726283577, - "grad_norm": 1.7364907122378959, - "learning_rate": 6.638020804457017e-06, - "loss": 0.7708, - "step": 1212 - }, - { - "epoch": 0.4124447466848011, - "grad_norm": 2.5902233868083107, - "learning_rate": 6.632816052826611e-06, - "loss": 0.8803, - "step": 1213 - }, - { - "epoch": 0.4127847670860252, - "grad_norm": 2.196962080702439, - "learning_rate": 6.627609319952404e-06, - "loss": 0.8413, - "step": 1214 - }, - { - "epoch": 0.41312478748724923, - "grad_norm": 1.7517859998624659, - "learning_rate": 6.622400612152199e-06, - "loss": 0.7581, - "step": 1215 - }, - { - "epoch": 0.4134648078884733, - "grad_norm": 2.725187748694304, - "learning_rate": 6.617189935746191e-06, - "loss": 0.8616, - "step": 1216 - }, - { - "epoch": 0.4138048282896974, - "grad_norm": 2.186121004596113, - "learning_rate": 6.6119772970569686e-06, - "loss": 0.867, - "step": 1217 - }, - { - "epoch": 0.41414484869092144, - "grad_norm": 1.7550877153581408, - "learning_rate": 6.606762702409499e-06, - "loss": 0.8189, - "step": 1218 - }, - { - "epoch": 0.41448486909214555, - "grad_norm": 1.911639257053032, - "learning_rate": 6.60154615813112e-06, - "loss": 0.8447, - "step": 1219 - }, - { - "epoch": 0.4148248894933696, - "grad_norm": 2.2359482219667566, - "learning_rate": 6.596327670551541e-06, - "loss": 0.7098, - "step": 1220 - }, - { - "epoch": 0.4151649098945937, - "grad_norm": 1.8270789562811462, - "learning_rate": 6.591107246002825e-06, - "loss": 0.8242, - "step": 1221 - }, - { - "epoch": 0.41550493029581775, - "grad_norm": 1.4119348213396106, - "learning_rate": 6.585884890819388e-06, - "loss": 0.9456, - "step": 1222 - }, - { - "epoch": 0.4158449506970418, - "grad_norm": 2.4831333073302115, - "learning_rate": 6.5806606113379855e-06, - "loss": 0.843, - "step": 1223 - }, - { - "epoch": 0.4161849710982659, - "grad_norm": 2.0195834149272907, - "learning_rate": 6.57543441389771e-06, - "loss": 0.7616, - "step": 1224 - }, - { - "epoch": 0.41652499149948996, - "grad_norm": 1.8560990970772, - "learning_rate": 6.570206304839979e-06, - "loss": 0.772, - "step": 1225 - }, - { - "epoch": 0.41686501190071407, - "grad_norm": 3.4777502532844364, - "learning_rate": 6.564976290508535e-06, - "loss": 0.7694, - "step": 1226 - }, - { - "epoch": 0.4172050323019381, - "grad_norm": 2.1141524879270337, - "learning_rate": 6.559744377249426e-06, - "loss": 0.8212, - "step": 1227 - }, - { - "epoch": 0.41754505270316217, - "grad_norm": 1.9995986640884873, - "learning_rate": 6.554510571411009e-06, - "loss": 0.7696, - "step": 1228 - }, - { - "epoch": 0.4178850731043863, - "grad_norm": 4.388048606323549, - "learning_rate": 6.549274879343932e-06, - "loss": 0.8521, - "step": 1229 - }, - { - "epoch": 0.4182250935056103, - "grad_norm": 2.512566436457032, - "learning_rate": 6.54403730740114e-06, - "loss": 0.8375, - "step": 1230 - }, - { - "epoch": 0.41856511390683443, - "grad_norm": 2.022875278093896, - "learning_rate": 6.53879786193785e-06, - "loss": 0.8287, - "step": 1231 - }, - { - "epoch": 0.4189051343080585, - "grad_norm": 2.1514715191325338, - "learning_rate": 6.533556549311557e-06, - "loss": 0.8248, - "step": 1232 - }, - { - "epoch": 0.41924515470928253, - "grad_norm": 1.903140049004442, - "learning_rate": 6.52831337588202e-06, - "loss": 0.8134, - "step": 1233 - }, - { - "epoch": 0.41958517511050664, - "grad_norm": 3.9459515916276815, - "learning_rate": 6.52306834801126e-06, - "loss": 0.8674, - "step": 1234 - }, - { - "epoch": 0.4199251955117307, - "grad_norm": 1.9243723260268402, - "learning_rate": 6.517821472063543e-06, - "loss": 0.8009, - "step": 1235 - }, - { - "epoch": 0.4202652159129548, - "grad_norm": 2.1622315952308484, - "learning_rate": 6.51257275440538e-06, - "loss": 0.8302, - "step": 1236 - }, - { - "epoch": 0.42060523631417884, - "grad_norm": 2.246688116810149, - "learning_rate": 6.507322201405515e-06, - "loss": 0.8518, - "step": 1237 - }, - { - "epoch": 0.42094525671540295, - "grad_norm": 1.7866795660553714, - "learning_rate": 6.502069819434921e-06, - "loss": 0.7996, - "step": 1238 - }, - { - "epoch": 0.421285277116627, - "grad_norm": 2.483859305848629, - "learning_rate": 6.496815614866792e-06, - "loss": 0.798, - "step": 1239 - }, - { - "epoch": 0.42162529751785105, - "grad_norm": 2.6273097801827743, - "learning_rate": 6.491559594076526e-06, - "loss": 0.7717, - "step": 1240 - }, - { - "epoch": 0.42196531791907516, - "grad_norm": 1.9705933045927748, - "learning_rate": 6.486301763441732e-06, - "loss": 0.8437, - "step": 1241 - }, - { - "epoch": 0.4223053383202992, - "grad_norm": 1.6987794156789002, - "learning_rate": 6.4810421293422124e-06, - "loss": 0.7846, - "step": 1242 - }, - { - "epoch": 0.4226453587215233, - "grad_norm": 1.7952873310252566, - "learning_rate": 6.475780698159959e-06, - "loss": 0.8228, - "step": 1243 - }, - { - "epoch": 0.42298537912274736, - "grad_norm": 2.3781383897255357, - "learning_rate": 6.470517476279143e-06, - "loss": 0.9275, - "step": 1244 - }, - { - "epoch": 0.4233253995239714, - "grad_norm": 1.7388598039077234, - "learning_rate": 6.465252470086109e-06, - "loss": 0.7543, - "step": 1245 - }, - { - "epoch": 0.4236654199251955, - "grad_norm": 3.847734904824095, - "learning_rate": 6.459985685969365e-06, - "loss": 0.7801, - "step": 1246 - }, - { - "epoch": 0.42400544032641957, - "grad_norm": 2.0015735326442123, - "learning_rate": 6.454717130319583e-06, - "loss": 0.8404, - "step": 1247 - }, - { - "epoch": 0.4243454607276437, - "grad_norm": 1.676873018907222, - "learning_rate": 6.449446809529573e-06, - "loss": 0.7616, - "step": 1248 - }, - { - "epoch": 0.4246854811288677, - "grad_norm": 1.9476313763300463, - "learning_rate": 6.444174729994295e-06, - "loss": 0.8572, - "step": 1249 - }, - { - "epoch": 0.42502550153009183, - "grad_norm": 3.039778353662543, - "learning_rate": 6.438900898110843e-06, - "loss": 0.6842, - "step": 1250 - }, - { - "epoch": 0.4253655219313159, - "grad_norm": 2.212503519532677, - "learning_rate": 6.433625320278435e-06, - "loss": 0.7895, - "step": 1251 - }, - { - "epoch": 0.42570554233253993, - "grad_norm": 3.696419960495062, - "learning_rate": 6.4283480028984065e-06, - "loss": 0.7889, - "step": 1252 - }, - { - "epoch": 0.42604556273376404, - "grad_norm": 1.8504613276269528, - "learning_rate": 6.423068952374208e-06, - "loss": 0.6952, - "step": 1253 - }, - { - "epoch": 0.4263855831349881, - "grad_norm": 2.011583690777513, - "learning_rate": 6.4177881751113854e-06, - "loss": 0.7343, - "step": 1254 - }, - { - "epoch": 0.4267256035362122, - "grad_norm": 1.9658764564932893, - "learning_rate": 6.412505677517592e-06, - "loss": 0.8955, - "step": 1255 - }, - { - "epoch": 0.42706562393743625, - "grad_norm": 1.7771550054870846, - "learning_rate": 6.4072214660025555e-06, - "loss": 0.794, - "step": 1256 - }, - { - "epoch": 0.4274056443386603, - "grad_norm": 2.170566029231825, - "learning_rate": 6.401935546978091e-06, - "loss": 0.8307, - "step": 1257 - }, - { - "epoch": 0.4277456647398844, - "grad_norm": 2.795606918335267, - "learning_rate": 6.396647926858082e-06, - "loss": 0.7408, - "step": 1258 - }, - { - "epoch": 0.42808568514110845, - "grad_norm": 1.9857919239067945, - "learning_rate": 6.391358612058479e-06, - "loss": 0.7435, - "step": 1259 - }, - { - "epoch": 0.42842570554233256, - "grad_norm": 4.183650214686305, - "learning_rate": 6.386067608997286e-06, - "loss": 0.8171, - "step": 1260 - }, - { - "epoch": 0.4287657259435566, - "grad_norm": 2.2866953692840517, - "learning_rate": 6.3807749240945594e-06, - "loss": 0.9282, - "step": 1261 - }, - { - "epoch": 0.4291057463447807, - "grad_norm": 1.723522832107471, - "learning_rate": 6.375480563772391e-06, - "loss": 0.8644, - "step": 1262 - }, - { - "epoch": 0.42944576674600476, - "grad_norm": 1.9376266728840439, - "learning_rate": 6.3701845344549105e-06, - "loss": 0.7975, - "step": 1263 - }, - { - "epoch": 0.4297857871472288, - "grad_norm": 2.0320058786563884, - "learning_rate": 6.3648868425682695e-06, - "loss": 0.8404, - "step": 1264 - }, - { - "epoch": 0.4301258075484529, - "grad_norm": 1.833242518517846, - "learning_rate": 6.359587494540638e-06, - "loss": 0.9727, - "step": 1265 - }, - { - "epoch": 0.43046582794967697, - "grad_norm": 2.0656866508193237, - "learning_rate": 6.354286496802195e-06, - "loss": 0.7088, - "step": 1266 - }, - { - "epoch": 0.4308058483509011, - "grad_norm": 2.333059652715563, - "learning_rate": 6.348983855785122e-06, - "loss": 0.7784, - "step": 1267 - }, - { - "epoch": 0.4311458687521251, - "grad_norm": 1.8812951830256721, - "learning_rate": 6.343679577923596e-06, - "loss": 0.8082, - "step": 1268 - }, - { - "epoch": 0.4314858891533492, - "grad_norm": 2.2757541276249103, - "learning_rate": 6.338373669653777e-06, - "loss": 0.8048, - "step": 1269 - }, - { - "epoch": 0.4318259095545733, - "grad_norm": 1.7616508457249394, - "learning_rate": 6.333066137413803e-06, - "loss": 0.7967, - "step": 1270 - }, - { - "epoch": 0.43216592995579733, - "grad_norm": 2.5839639191285912, - "learning_rate": 6.327756987643788e-06, - "loss": 0.8475, - "step": 1271 - }, - { - "epoch": 0.43250595035702144, - "grad_norm": 1.983977392970035, - "learning_rate": 6.322446226785803e-06, - "loss": 0.7688, - "step": 1272 - }, - { - "epoch": 0.4328459707582455, - "grad_norm": 1.920278640728409, - "learning_rate": 6.317133861283876e-06, - "loss": 0.8112, - "step": 1273 - }, - { - "epoch": 0.43318599115946954, - "grad_norm": 2.175785687192607, - "learning_rate": 6.311819897583981e-06, - "loss": 0.8807, - "step": 1274 - }, - { - "epoch": 0.43352601156069365, - "grad_norm": 2.1329693868490156, - "learning_rate": 6.306504342134032e-06, - "loss": 0.7646, - "step": 1275 - }, - { - "epoch": 0.4338660319619177, - "grad_norm": 2.7197062834600643, - "learning_rate": 6.301187201383876e-06, - "loss": 0.8924, - "step": 1276 - }, - { - "epoch": 0.4342060523631418, - "grad_norm": 2.0900606207430976, - "learning_rate": 6.295868481785281e-06, - "loss": 0.8063, - "step": 1277 - }, - { - "epoch": 0.43454607276436585, - "grad_norm": 1.7320405000379613, - "learning_rate": 6.290548189791932e-06, - "loss": 0.7871, - "step": 1278 - }, - { - "epoch": 0.43488609316558996, - "grad_norm": 3.246424780026875, - "learning_rate": 6.285226331859423e-06, - "loss": 0.7022, - "step": 1279 - }, - { - "epoch": 0.435226113566814, - "grad_norm": 3.2299354049530558, - "learning_rate": 6.279902914445246e-06, - "loss": 0.8512, - "step": 1280 - }, - { - "epoch": 0.43556613396803806, - "grad_norm": 1.7974856940773503, - "learning_rate": 6.274577944008785e-06, - "loss": 0.7445, - "step": 1281 - }, - { - "epoch": 0.43590615436926217, - "grad_norm": 1.7902884652613178, - "learning_rate": 6.26925142701131e-06, - "loss": 0.7549, - "step": 1282 - }, - { - "epoch": 0.4362461747704862, - "grad_norm": 1.7575937886544872, - "learning_rate": 6.263923369915968e-06, - "loss": 0.7033, - "step": 1283 - }, - { - "epoch": 0.4365861951717103, - "grad_norm": 2.003652418101978, - "learning_rate": 6.258593779187774e-06, - "loss": 0.7226, - "step": 1284 - }, - { - "epoch": 0.4369262155729344, - "grad_norm": 1.586460473657157, - "learning_rate": 6.2532626612936035e-06, - "loss": 0.7918, - "step": 1285 - }, - { - "epoch": 0.4372662359741584, - "grad_norm": 1.7790120795884707, - "learning_rate": 6.247930022702184e-06, - "loss": 0.7426, - "step": 1286 - }, - { - "epoch": 0.43760625637538253, - "grad_norm": 1.4845861438510766, - "learning_rate": 6.242595869884093e-06, - "loss": 0.75, - "step": 1287 - }, - { - "epoch": 0.4379462767766066, - "grad_norm": 2.586395804452128, - "learning_rate": 6.237260209311738e-06, - "loss": 0.7247, - "step": 1288 - }, - { - "epoch": 0.4382862971778307, - "grad_norm": 1.7488826727955935, - "learning_rate": 6.231923047459362e-06, - "loss": 0.7819, - "step": 1289 - }, - { - "epoch": 0.43862631757905474, - "grad_norm": 2.7476753864785306, - "learning_rate": 6.2265843908030255e-06, - "loss": 0.8755, - "step": 1290 - }, - { - "epoch": 0.43896633798027884, - "grad_norm": 2.04843978154324, - "learning_rate": 6.2212442458206065e-06, - "loss": 0.845, - "step": 1291 - }, - { - "epoch": 0.4393063583815029, - "grad_norm": 2.056402370171357, - "learning_rate": 6.215902618991789e-06, - "loss": 0.6932, - "step": 1292 - }, - { - "epoch": 0.43964637878272694, - "grad_norm": 2.3809614347105814, - "learning_rate": 6.21055951679805e-06, - "loss": 0.8301, - "step": 1293 - }, - { - "epoch": 0.43998639918395105, - "grad_norm": 2.216045492126213, - "learning_rate": 6.20521494572266e-06, - "loss": 0.8544, - "step": 1294 - }, - { - "epoch": 0.4403264195851751, - "grad_norm": 1.7506779190930466, - "learning_rate": 6.1998689122506765e-06, - "loss": 0.8289, - "step": 1295 - }, - { - "epoch": 0.4406664399863992, - "grad_norm": 1.904093376273434, - "learning_rate": 6.19452142286892e-06, - "loss": 0.7709, - "step": 1296 - }, - { - "epoch": 0.44100646038762326, - "grad_norm": 2.7089730576700664, - "learning_rate": 6.1891724840659895e-06, - "loss": 0.8263, - "step": 1297 - }, - { - "epoch": 0.4413464807888473, - "grad_norm": 1.7011604570765408, - "learning_rate": 6.183822102332234e-06, - "loss": 0.7318, - "step": 1298 - }, - { - "epoch": 0.4416865011900714, - "grad_norm": 1.7063499626312666, - "learning_rate": 6.17847028415976e-06, - "loss": 0.7845, - "step": 1299 - }, - { - "epoch": 0.44202652159129546, - "grad_norm": 2.5512420126857998, - "learning_rate": 6.1731170360424116e-06, - "loss": 0.8297, - "step": 1300 - }, - { - "epoch": 0.44236654199251957, - "grad_norm": 2.3308930708868614, - "learning_rate": 6.1677623644757715e-06, - "loss": 0.7281, - "step": 1301 - }, - { - "epoch": 0.4427065623937436, - "grad_norm": 1.9814345181679105, - "learning_rate": 6.162406275957147e-06, - "loss": 0.6841, - "step": 1302 - }, - { - "epoch": 0.4430465827949677, - "grad_norm": 1.8308664790589082, - "learning_rate": 6.157048776985568e-06, - "loss": 0.7597, - "step": 1303 - }, - { - "epoch": 0.4433866031961918, - "grad_norm": 1.9450975948070095, - "learning_rate": 6.151689874061773e-06, - "loss": 0.8809, - "step": 1304 - }, - { - "epoch": 0.4437266235974158, - "grad_norm": 2.783986050727857, - "learning_rate": 6.1463295736882045e-06, - "loss": 0.7678, - "step": 1305 - }, - { - "epoch": 0.44406664399863993, - "grad_norm": 2.654115532064757, - "learning_rate": 6.140967882369001e-06, - "loss": 0.7656, - "step": 1306 - }, - { - "epoch": 0.444406664399864, - "grad_norm": 2.1154238353595938, - "learning_rate": 6.135604806609988e-06, - "loss": 0.7393, - "step": 1307 - }, - { - "epoch": 0.4447466848010881, - "grad_norm": 1.8316694323729121, - "learning_rate": 6.130240352918675e-06, - "loss": 0.7955, - "step": 1308 - }, - { - "epoch": 0.44508670520231214, - "grad_norm": 1.7923597848780743, - "learning_rate": 6.1248745278042375e-06, - "loss": 0.7902, - "step": 1309 - }, - { - "epoch": 0.4454267256035362, - "grad_norm": 2.1469603822475882, - "learning_rate": 6.119507337777517e-06, - "loss": 0.8111, - "step": 1310 - }, - { - "epoch": 0.4457667460047603, - "grad_norm": 3.204925579816377, - "learning_rate": 6.114138789351015e-06, - "loss": 0.898, - "step": 1311 - }, - { - "epoch": 0.44610676640598435, - "grad_norm": 2.4583222181061735, - "learning_rate": 6.108768889038875e-06, - "loss": 0.8401, - "step": 1312 - }, - { - "epoch": 0.44644678680720845, - "grad_norm": 1.6306721028852273, - "learning_rate": 6.103397643356888e-06, - "loss": 0.8261, - "step": 1313 - }, - { - "epoch": 0.4467868072084325, - "grad_norm": 1.6997011922153806, - "learning_rate": 6.098025058822467e-06, - "loss": 0.8157, - "step": 1314 - }, - { - "epoch": 0.44712682760965655, - "grad_norm": 1.894873083473829, - "learning_rate": 6.092651141954663e-06, - "loss": 0.818, - "step": 1315 - }, - { - "epoch": 0.44746684801088066, - "grad_norm": 1.8152984091344468, - "learning_rate": 6.087275899274132e-06, - "loss": 0.846, - "step": 1316 - }, - { - "epoch": 0.4478068684121047, - "grad_norm": 1.8804247156065967, - "learning_rate": 6.081899337303148e-06, - "loss": 0.8775, - "step": 1317 - }, - { - "epoch": 0.4481468888133288, - "grad_norm": 2.2811320840226874, - "learning_rate": 6.076521462565575e-06, - "loss": 0.8405, - "step": 1318 - }, - { - "epoch": 0.44848690921455286, - "grad_norm": 1.9014030398430317, - "learning_rate": 6.071142281586883e-06, - "loss": 0.6665, - "step": 1319 - }, - { - "epoch": 0.44882692961577697, - "grad_norm": 1.7598473537668629, - "learning_rate": 6.0657618008941135e-06, - "loss": 0.8114, - "step": 1320 - }, - { - "epoch": 0.449166950017001, - "grad_norm": 1.701348824209668, - "learning_rate": 6.060380027015897e-06, - "loss": 0.9063, - "step": 1321 - }, - { - "epoch": 0.44950697041822507, - "grad_norm": 2.0151604743954192, - "learning_rate": 6.054996966482425e-06, - "loss": 0.7727, - "step": 1322 - }, - { - "epoch": 0.4498469908194492, - "grad_norm": 1.8240997834179458, - "learning_rate": 6.049612625825454e-06, - "loss": 0.6151, - "step": 1323 - }, - { - "epoch": 0.4501870112206732, - "grad_norm": 1.7621003478299089, - "learning_rate": 6.044227011578292e-06, - "loss": 0.8248, - "step": 1324 - }, - { - "epoch": 0.45052703162189733, - "grad_norm": 2.3449253350037647, - "learning_rate": 6.038840130275795e-06, - "loss": 0.8094, - "step": 1325 - }, - { - "epoch": 0.4508670520231214, - "grad_norm": 2.186940711291871, - "learning_rate": 6.033451988454352e-06, - "loss": 0.8526, - "step": 1326 - }, - { - "epoch": 0.45120707242434543, - "grad_norm": 1.8098545794134784, - "learning_rate": 6.0280625926518865e-06, - "loss": 0.8167, - "step": 1327 - }, - { - "epoch": 0.45154709282556954, - "grad_norm": 2.11938751373049, - "learning_rate": 6.02267194940784e-06, - "loss": 0.8615, - "step": 1328 - }, - { - "epoch": 0.4518871132267936, - "grad_norm": 5.798776280397436, - "learning_rate": 6.0172800652631706e-06, - "loss": 0.8126, - "step": 1329 - }, - { - "epoch": 0.4522271336280177, - "grad_norm": 1.9362659319180398, - "learning_rate": 6.011886946760337e-06, - "loss": 0.8515, - "step": 1330 - }, - { - "epoch": 0.45256715402924175, - "grad_norm": 1.9240359926336854, - "learning_rate": 6.006492600443301e-06, - "loss": 0.795, - "step": 1331 - }, - { - "epoch": 0.45290717443046585, - "grad_norm": 2.6312493040402223, - "learning_rate": 6.001097032857513e-06, - "loss": 0.9005, - "step": 1332 - }, - { - "epoch": 0.4532471948316899, - "grad_norm": 2.4588002159411975, - "learning_rate": 5.995700250549903e-06, - "loss": 0.9122, - "step": 1333 - }, - { - "epoch": 0.45358721523291395, - "grad_norm": 1.8102464748281866, - "learning_rate": 5.990302260068877e-06, - "loss": 0.7861, - "step": 1334 - }, - { - "epoch": 0.45392723563413806, - "grad_norm": 2.4499379979673654, - "learning_rate": 5.9849030679643075e-06, - "loss": 0.8793, - "step": 1335 - }, - { - "epoch": 0.4542672560353621, - "grad_norm": 1.643369045844105, - "learning_rate": 5.97950268078752e-06, - "loss": 0.9176, - "step": 1336 - }, - { - "epoch": 0.4546072764365862, - "grad_norm": 2.94241862218008, - "learning_rate": 5.9741011050913e-06, - "loss": 0.7631, - "step": 1337 - }, - { - "epoch": 0.45494729683781027, - "grad_norm": 2.260745557245212, - "learning_rate": 5.968698347429864e-06, - "loss": 0.8574, - "step": 1338 - }, - { - "epoch": 0.4552873172390343, - "grad_norm": 1.9189222697412902, - "learning_rate": 5.96329441435887e-06, - "loss": 0.8627, - "step": 1339 - }, - { - "epoch": 0.4556273376402584, - "grad_norm": 2.3145970729902725, - "learning_rate": 5.9578893124354e-06, - "loss": 0.8203, - "step": 1340 - }, - { - "epoch": 0.4559673580414825, - "grad_norm": 1.7097699263826742, - "learning_rate": 5.9524830482179565e-06, - "loss": 0.8143, - "step": 1341 - }, - { - "epoch": 0.4563073784427066, - "grad_norm": 1.7644511385918709, - "learning_rate": 5.9470756282664455e-06, - "loss": 0.8428, - "step": 1342 - }, - { - "epoch": 0.45664739884393063, - "grad_norm": 2.0834007860288835, - "learning_rate": 5.941667059142184e-06, - "loss": 0.8975, - "step": 1343 - }, - { - "epoch": 0.45698741924515474, - "grad_norm": 1.9264637214129008, - "learning_rate": 5.936257347407877e-06, - "loss": 0.7147, - "step": 1344 - }, - { - "epoch": 0.4573274396463788, - "grad_norm": 1.7108620469281093, - "learning_rate": 5.9308464996276195e-06, - "loss": 0.8773, - "step": 1345 - }, - { - "epoch": 0.45766746004760284, - "grad_norm": 1.8740444271707064, - "learning_rate": 5.925434522366884e-06, - "loss": 0.8765, - "step": 1346 - }, - { - "epoch": 0.45800748044882694, - "grad_norm": 2.9159889860260706, - "learning_rate": 5.920021422192512e-06, - "loss": 0.7429, - "step": 1347 - }, - { - "epoch": 0.458347500850051, - "grad_norm": 2.2683984119230964, - "learning_rate": 5.914607205672711e-06, - "loss": 0.8265, - "step": 1348 - }, - { - "epoch": 0.4586875212512751, - "grad_norm": 2.229706808343337, - "learning_rate": 5.909191879377041e-06, - "loss": 0.8355, - "step": 1349 - }, - { - "epoch": 0.45902754165249915, - "grad_norm": 2.284917272323551, - "learning_rate": 5.903775449876406e-06, - "loss": 0.706, - "step": 1350 - }, - { - "epoch": 0.4593675620537232, - "grad_norm": 1.9399879628948615, - "learning_rate": 5.898357923743052e-06, - "loss": 0.6978, - "step": 1351 - }, - { - "epoch": 0.4597075824549473, - "grad_norm": 2.0206889656325777, - "learning_rate": 5.892939307550556e-06, - "loss": 0.7937, - "step": 1352 - }, - { - "epoch": 0.46004760285617136, - "grad_norm": 2.0270151745480107, - "learning_rate": 5.887519607873815e-06, - "loss": 0.801, - "step": 1353 - }, - { - "epoch": 0.46038762325739546, - "grad_norm": 1.8581224024582177, - "learning_rate": 5.882098831289044e-06, - "loss": 0.8618, - "step": 1354 - }, - { - "epoch": 0.4607276436586195, - "grad_norm": 1.9755282021333846, - "learning_rate": 5.8766769843737604e-06, - "loss": 0.7721, - "step": 1355 - }, - { - "epoch": 0.4610676640598436, - "grad_norm": 1.8373059374517786, - "learning_rate": 5.8712540737067835e-06, - "loss": 0.7952, - "step": 1356 - }, - { - "epoch": 0.46140768446106767, - "grad_norm": 1.5959326369230662, - "learning_rate": 5.865830105868226e-06, - "loss": 0.7782, - "step": 1357 - }, - { - "epoch": 0.4617477048622917, - "grad_norm": 1.9988446870321313, - "learning_rate": 5.860405087439475e-06, - "loss": 0.8748, - "step": 1358 - }, - { - "epoch": 0.4620877252635158, - "grad_norm": 4.024311573353348, - "learning_rate": 5.8549790250032e-06, - "loss": 0.7804, - "step": 1359 - }, - { - "epoch": 0.4624277456647399, - "grad_norm": 1.7924867446440473, - "learning_rate": 5.849551925143334e-06, - "loss": 0.7366, - "step": 1360 - }, - { - "epoch": 0.462767766065964, - "grad_norm": 1.9333933529106293, - "learning_rate": 5.84412379444507e-06, - "loss": 0.7634, - "step": 1361 - }, - { - "epoch": 0.46310778646718803, - "grad_norm": 2.239027398676504, - "learning_rate": 5.838694639494852e-06, - "loss": 0.7516, - "step": 1362 - }, - { - "epoch": 0.4634478068684121, - "grad_norm": 2.564558822755495, - "learning_rate": 5.833264466880363e-06, - "loss": 0.7493, - "step": 1363 - }, - { - "epoch": 0.4637878272696362, - "grad_norm": 2.406933971641201, - "learning_rate": 5.827833283190527e-06, - "loss": 0.7643, - "step": 1364 - }, - { - "epoch": 0.46412784767086024, - "grad_norm": 1.6911599873633802, - "learning_rate": 5.8224010950154895e-06, - "loss": 0.8361, - "step": 1365 - }, - { - "epoch": 0.46446786807208434, - "grad_norm": 1.8143704851616336, - "learning_rate": 5.81696790894662e-06, - "loss": 0.8781, - "step": 1366 - }, - { - "epoch": 0.4648078884733084, - "grad_norm": 1.9049878875916997, - "learning_rate": 5.811533731576494e-06, - "loss": 0.883, - "step": 1367 - }, - { - "epoch": 0.46514790887453245, - "grad_norm": 1.6427893085515717, - "learning_rate": 5.806098569498892e-06, - "loss": 0.7631, - "step": 1368 - }, - { - "epoch": 0.46548792927575655, - "grad_norm": 1.950478915992292, - "learning_rate": 5.800662429308787e-06, - "loss": 0.7777, - "step": 1369 - }, - { - "epoch": 0.4658279496769806, - "grad_norm": 2.3387814639730995, - "learning_rate": 5.795225317602344e-06, - "loss": 0.7839, - "step": 1370 - }, - { - "epoch": 0.4661679700782047, - "grad_norm": 2.3136990487853306, - "learning_rate": 5.789787240976903e-06, - "loss": 0.8801, - "step": 1371 - }, - { - "epoch": 0.46650799047942876, - "grad_norm": 1.883331409095713, - "learning_rate": 5.784348206030974e-06, - "loss": 0.7718, - "step": 1372 - }, - { - "epoch": 0.46684801088065286, - "grad_norm": 1.5552364347893213, - "learning_rate": 5.778908219364234e-06, - "loss": 0.7953, - "step": 1373 - }, - { - "epoch": 0.4671880312818769, - "grad_norm": 1.8758755215294272, - "learning_rate": 5.77346728757751e-06, - "loss": 0.9304, - "step": 1374 - }, - { - "epoch": 0.46752805168310096, - "grad_norm": 2.8864202166172857, - "learning_rate": 5.768025417272779e-06, - "loss": 0.8601, - "step": 1375 - }, - { - "epoch": 0.46786807208432507, - "grad_norm": 1.7477299549548073, - "learning_rate": 5.762582615053155e-06, - "loss": 0.8618, - "step": 1376 - }, - { - "epoch": 0.4682080924855491, - "grad_norm": 1.6359643840822298, - "learning_rate": 5.757138887522884e-06, - "loss": 0.8735, - "step": 1377 - }, - { - "epoch": 0.4685481128867732, - "grad_norm": 1.9885997278079388, - "learning_rate": 5.751694241287336e-06, - "loss": 0.7201, - "step": 1378 - }, - { - "epoch": 0.4688881332879973, - "grad_norm": 2.024775175272147, - "learning_rate": 5.7462486829529895e-06, - "loss": 0.9019, - "step": 1379 - }, - { - "epoch": 0.46922815368922133, - "grad_norm": 2.1517611217388164, - "learning_rate": 5.7408022191274385e-06, - "loss": 0.7558, - "step": 1380 - }, - { - "epoch": 0.46956817409044543, - "grad_norm": 1.6071554576538785, - "learning_rate": 5.735354856419371e-06, - "loss": 0.7544, - "step": 1381 - }, - { - "epoch": 0.4699081944916695, - "grad_norm": 3.0679645169810588, - "learning_rate": 5.729906601438564e-06, - "loss": 0.6876, - "step": 1382 - }, - { - "epoch": 0.4702482148928936, - "grad_norm": 1.7186871226619356, - "learning_rate": 5.724457460795883e-06, - "loss": 0.9415, - "step": 1383 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 2.8408348818159648, - "learning_rate": 5.71900744110326e-06, - "loss": 0.7498, - "step": 1384 - }, - { - "epoch": 0.47092825569534175, - "grad_norm": 1.5730885516835262, - "learning_rate": 5.713556548973701e-06, - "loss": 0.8499, - "step": 1385 - }, - { - "epoch": 0.4712682760965658, - "grad_norm": 2.4054974564917146, - "learning_rate": 5.708104791021267e-06, - "loss": 0.7346, - "step": 1386 - }, - { - "epoch": 0.47160829649778985, - "grad_norm": 7.6135309739245605, - "learning_rate": 5.702652173861073e-06, - "loss": 0.9721, - "step": 1387 - }, - { - "epoch": 0.47194831689901395, - "grad_norm": 2.157238376626225, - "learning_rate": 5.697198704109269e-06, - "loss": 0.8068, - "step": 1388 - }, - { - "epoch": 0.472288337300238, - "grad_norm": 2.0366968869428206, - "learning_rate": 5.691744388383047e-06, - "loss": 0.8825, - "step": 1389 - }, - { - "epoch": 0.4726283577014621, - "grad_norm": 3.332007974752036, - "learning_rate": 5.686289233300625e-06, - "loss": 0.8573, - "step": 1390 - }, - { - "epoch": 0.47296837810268616, - "grad_norm": 2.996038777755443, - "learning_rate": 5.680833245481234e-06, - "loss": 0.7937, - "step": 1391 - }, - { - "epoch": 0.4733083985039102, - "grad_norm": 1.876848717220222, - "learning_rate": 5.6753764315451196e-06, - "loss": 0.8256, - "step": 1392 - }, - { - "epoch": 0.4736484189051343, - "grad_norm": 1.7456685592190304, - "learning_rate": 5.669918798113531e-06, - "loss": 0.781, - "step": 1393 - }, - { - "epoch": 0.47398843930635837, - "grad_norm": 2.1150837869236243, - "learning_rate": 5.664460351808706e-06, - "loss": 0.7863, - "step": 1394 - }, - { - "epoch": 0.4743284597075825, - "grad_norm": 2.106099742367351, - "learning_rate": 5.659001099253878e-06, - "loss": 0.8522, - "step": 1395 - }, - { - "epoch": 0.4746684801088065, - "grad_norm": 1.6430769745388225, - "learning_rate": 5.653541047073248e-06, - "loss": 0.8509, - "step": 1396 - }, - { - "epoch": 0.47500850051003063, - "grad_norm": 2.7934049226507893, - "learning_rate": 5.648080201891994e-06, - "loss": 0.6624, - "step": 1397 - }, - { - "epoch": 0.4753485209112547, - "grad_norm": 1.8064873975135194, - "learning_rate": 5.642618570336258e-06, - "loss": 0.7733, - "step": 1398 - }, - { - "epoch": 0.47568854131247873, - "grad_norm": 1.7201690606896272, - "learning_rate": 5.637156159033129e-06, - "loss": 0.7874, - "step": 1399 - }, - { - "epoch": 0.47602856171370284, - "grad_norm": 2.377775914178791, - "learning_rate": 5.631692974610647e-06, - "loss": 0.9048, - "step": 1400 - }, - { - "epoch": 0.4763685821149269, - "grad_norm": 1.9190322261379347, - "learning_rate": 5.626229023697789e-06, - "loss": 0.8154, - "step": 1401 - }, - { - "epoch": 0.476708602516151, - "grad_norm": 1.846727656609046, - "learning_rate": 5.6207643129244625e-06, - "loss": 0.7141, - "step": 1402 - }, - { - "epoch": 0.47704862291737504, - "grad_norm": 1.6992506612080924, - "learning_rate": 5.6152988489214985e-06, - "loss": 0.8167, - "step": 1403 - }, - { - "epoch": 0.4773886433185991, - "grad_norm": 1.8237274064366962, - "learning_rate": 5.609832638320637e-06, - "loss": 0.8415, - "step": 1404 - }, - { - "epoch": 0.4777286637198232, - "grad_norm": 2.046569580094362, - "learning_rate": 5.604365687754528e-06, - "loss": 0.8745, - "step": 1405 - }, - { - "epoch": 0.47806868412104725, - "grad_norm": 2.576331253597654, - "learning_rate": 5.59889800385672e-06, - "loss": 0.7876, - "step": 1406 - }, - { - "epoch": 0.47840870452227136, - "grad_norm": 1.696243517927065, - "learning_rate": 5.59342959326165e-06, - "loss": 0.6787, - "step": 1407 - }, - { - "epoch": 0.4787487249234954, - "grad_norm": 1.7387459270264907, - "learning_rate": 5.587960462604634e-06, - "loss": 0.7973, - "step": 1408 - }, - { - "epoch": 0.47908874532471946, - "grad_norm": 2.1302859398907934, - "learning_rate": 5.582490618521864e-06, - "loss": 0.7099, - "step": 1409 - }, - { - "epoch": 0.47942876572594356, - "grad_norm": 1.7380121057600302, - "learning_rate": 5.5770200676504e-06, - "loss": 0.877, - "step": 1410 - }, - { - "epoch": 0.4797687861271676, - "grad_norm": 2.327507521749447, - "learning_rate": 5.571548816628159e-06, - "loss": 0.7612, - "step": 1411 - }, - { - "epoch": 0.4801088065283917, - "grad_norm": 1.9212379412038696, - "learning_rate": 5.5660768720939e-06, - "loss": 0.8138, - "step": 1412 - }, - { - "epoch": 0.48044882692961577, - "grad_norm": 1.6433392321151539, - "learning_rate": 5.560604240687235e-06, - "loss": 0.8439, - "step": 1413 - }, - { - "epoch": 0.4807888473308399, - "grad_norm": 1.6964960554479955, - "learning_rate": 5.555130929048603e-06, - "loss": 0.8821, - "step": 1414 - }, - { - "epoch": 0.4811288677320639, - "grad_norm": 1.8194872677059573, - "learning_rate": 5.5496569438192695e-06, - "loss": 0.7899, - "step": 1415 - }, - { - "epoch": 0.481468888133288, - "grad_norm": 1.8072104696328097, - "learning_rate": 5.544182291641317e-06, - "loss": 0.7687, - "step": 1416 - }, - { - "epoch": 0.4818089085345121, - "grad_norm": 1.7790457934236203, - "learning_rate": 5.538706979157635e-06, - "loss": 0.7862, - "step": 1417 - }, - { - "epoch": 0.48214892893573613, - "grad_norm": 1.7951850721079083, - "learning_rate": 5.533231013011919e-06, - "loss": 0.7515, - "step": 1418 - }, - { - "epoch": 0.48248894933696024, - "grad_norm": 1.5561662456874903, - "learning_rate": 5.527754399848657e-06, - "loss": 0.8133, - "step": 1419 - }, - { - "epoch": 0.4828289697381843, - "grad_norm": 2.157936588540733, - "learning_rate": 5.522277146313117e-06, - "loss": 0.7538, - "step": 1420 - }, - { - "epoch": 0.48316899013940834, - "grad_norm": 2.4290999302724288, - "learning_rate": 5.51679925905135e-06, - "loss": 0.7814, - "step": 1421 - }, - { - "epoch": 0.48350901054063244, - "grad_norm": 1.841958993342082, - "learning_rate": 5.511320744710171e-06, - "loss": 0.8118, - "step": 1422 - }, - { - "epoch": 0.4838490309418565, - "grad_norm": 2.09445138995467, - "learning_rate": 5.505841609937162e-06, - "loss": 0.771, - "step": 1423 - }, - { - "epoch": 0.4841890513430806, - "grad_norm": 1.9025737655981094, - "learning_rate": 5.500361861380651e-06, - "loss": 0.8158, - "step": 1424 - }, - { - "epoch": 0.48452907174430465, - "grad_norm": 1.593238831589755, - "learning_rate": 5.494881505689714e-06, - "loss": 0.845, - "step": 1425 - }, - { - "epoch": 0.48486909214552876, - "grad_norm": 2.1406933443102902, - "learning_rate": 5.489400549514165e-06, - "loss": 0.7092, - "step": 1426 - }, - { - "epoch": 0.4852091125467528, - "grad_norm": 2.5531087262152354, - "learning_rate": 5.483918999504544e-06, - "loss": 0.7776, - "step": 1427 - }, - { - "epoch": 0.48554913294797686, - "grad_norm": 1.919105692583851, - "learning_rate": 5.478436862312113e-06, - "loss": 0.719, - "step": 1428 - }, - { - "epoch": 0.48588915334920096, - "grad_norm": 1.8829313213513676, - "learning_rate": 5.472954144588847e-06, - "loss": 0.7841, - "step": 1429 - }, - { - "epoch": 0.486229173750425, - "grad_norm": 1.4881374527192293, - "learning_rate": 5.467470852987424e-06, - "loss": 0.7724, - "step": 1430 - }, - { - "epoch": 0.4865691941516491, - "grad_norm": 1.924768602698392, - "learning_rate": 5.4619869941612204e-06, - "loss": 0.7726, - "step": 1431 - }, - { - "epoch": 0.48690921455287317, - "grad_norm": 1.5858390673137608, - "learning_rate": 5.456502574764299e-06, - "loss": 0.8339, - "step": 1432 - }, - { - "epoch": 0.4872492349540972, - "grad_norm": 2.009244140179676, - "learning_rate": 5.4510176014514e-06, - "loss": 0.8099, - "step": 1433 - }, - { - "epoch": 0.4875892553553213, - "grad_norm": 1.7556534967172963, - "learning_rate": 5.445532080877942e-06, - "loss": 0.8161, - "step": 1434 - }, - { - "epoch": 0.4879292757565454, - "grad_norm": 2.1544729558220315, - "learning_rate": 5.440046019700004e-06, - "loss": 0.822, - "step": 1435 - }, - { - "epoch": 0.4882692961577695, - "grad_norm": 2.050182429485166, - "learning_rate": 5.434559424574323e-06, - "loss": 0.7798, - "step": 1436 - }, - { - "epoch": 0.48860931655899353, - "grad_norm": 2.250279830505212, - "learning_rate": 5.429072302158279e-06, - "loss": 0.772, - "step": 1437 - }, - { - "epoch": 0.48894933696021764, - "grad_norm": 1.7152397535373827, - "learning_rate": 5.4235846591098995e-06, - "loss": 0.7366, - "step": 1438 - }, - { - "epoch": 0.4892893573614417, - "grad_norm": 1.99011884513494, - "learning_rate": 5.4180965020878365e-06, - "loss": 0.7173, - "step": 1439 - }, - { - "epoch": 0.48962937776266574, - "grad_norm": 2.173655615360162, - "learning_rate": 5.41260783775137e-06, - "loss": 0.7406, - "step": 1440 - }, - { - "epoch": 0.48996939816388985, - "grad_norm": 2.2971002978470576, - "learning_rate": 5.407118672760393e-06, - "loss": 0.9206, - "step": 1441 - }, - { - "epoch": 0.4903094185651139, - "grad_norm": 1.5023371228055133, - "learning_rate": 5.401629013775408e-06, - "loss": 0.8379, - "step": 1442 - }, - { - "epoch": 0.490649438966338, - "grad_norm": 2.0910378047987375, - "learning_rate": 5.396138867457517e-06, - "loss": 0.829, - "step": 1443 - }, - { - "epoch": 0.49098945936756205, - "grad_norm": 3.7490164898531675, - "learning_rate": 5.39064824046841e-06, - "loss": 0.9118, - "step": 1444 - }, - { - "epoch": 0.4913294797687861, - "grad_norm": 2.126644865691582, - "learning_rate": 5.385157139470365e-06, - "loss": 0.865, - "step": 1445 - }, - { - "epoch": 0.4916695001700102, - "grad_norm": 1.8516246611445681, - "learning_rate": 5.379665571126232e-06, - "loss": 0.8226, - "step": 1446 - }, - { - "epoch": 0.49200952057123426, - "grad_norm": 2.3484056426904467, - "learning_rate": 5.374173542099429e-06, - "loss": 0.7315, - "step": 1447 - }, - { - "epoch": 0.49234954097245837, - "grad_norm": 1.5288170614125434, - "learning_rate": 5.368681059053934e-06, - "loss": 0.791, - "step": 1448 - }, - { - "epoch": 0.4926895613736824, - "grad_norm": 1.9647101647201137, - "learning_rate": 5.363188128654272e-06, - "loss": 0.7512, - "step": 1449 - }, - { - "epoch": 0.49302958177490647, - "grad_norm": 1.5696810158458874, - "learning_rate": 5.357694757565515e-06, - "loss": 0.8299, - "step": 1450 - }, - { - "epoch": 0.4933696021761306, - "grad_norm": 2.0603657799295165, - "learning_rate": 5.352200952453268e-06, - "loss": 0.8204, - "step": 1451 - }, - { - "epoch": 0.4937096225773546, - "grad_norm": 1.9159683428169025, - "learning_rate": 5.3467067199836665e-06, - "loss": 0.832, - "step": 1452 - }, - { - "epoch": 0.49404964297857873, - "grad_norm": 2.2261677908581343, - "learning_rate": 5.341212066823356e-06, - "loss": 0.8217, - "step": 1453 - }, - { - "epoch": 0.4943896633798028, - "grad_norm": 1.81536044760645, - "learning_rate": 5.335716999639499e-06, - "loss": 0.7984, - "step": 1454 - }, - { - "epoch": 0.4947296837810269, - "grad_norm": 2.9109596077188447, - "learning_rate": 5.330221525099761e-06, - "loss": 0.7177, - "step": 1455 - }, - { - "epoch": 0.49506970418225094, - "grad_norm": 2.0524828104111554, - "learning_rate": 5.3247256498722985e-06, - "loss": 0.8618, - "step": 1456 - }, - { - "epoch": 0.495409724583475, - "grad_norm": 1.7519967928504512, - "learning_rate": 5.319229380625754e-06, - "loss": 0.8823, - "step": 1457 - }, - { - "epoch": 0.4957497449846991, - "grad_norm": 1.8637116727130303, - "learning_rate": 5.31373272402925e-06, - "loss": 0.7384, - "step": 1458 - }, - { - "epoch": 0.49608976538592314, - "grad_norm": 2.152147896428193, - "learning_rate": 5.308235686752379e-06, - "loss": 0.8812, - "step": 1459 - }, - { - "epoch": 0.49642978578714725, - "grad_norm": 2.078235845036691, - "learning_rate": 5.302738275465196e-06, - "loss": 0.7727, - "step": 1460 - }, - { - "epoch": 0.4967698061883713, - "grad_norm": 2.102266420017895, - "learning_rate": 5.297240496838206e-06, - "loss": 0.8562, - "step": 1461 - }, - { - "epoch": 0.49710982658959535, - "grad_norm": 1.8174835697796294, - "learning_rate": 5.291742357542364e-06, - "loss": 0.8048, - "step": 1462 - }, - { - "epoch": 0.49744984699081946, - "grad_norm": 1.754386383349789, - "learning_rate": 5.2862438642490634e-06, - "loss": 0.7872, - "step": 1463 - }, - { - "epoch": 0.4977898673920435, - "grad_norm": 4.158515391970638, - "learning_rate": 5.280745023630119e-06, - "loss": 0.7779, - "step": 1464 - }, - { - "epoch": 0.4981298877932676, - "grad_norm": 2.4054339081607767, - "learning_rate": 5.275245842357778e-06, - "loss": 0.7462, - "step": 1465 - }, - { - "epoch": 0.49846990819449166, - "grad_norm": 2.689734918133284, - "learning_rate": 5.269746327104693e-06, - "loss": 0.8174, - "step": 1466 - }, - { - "epoch": 0.49880992859571577, - "grad_norm": 2.295691045707937, - "learning_rate": 5.264246484543926e-06, - "loss": 0.7969, - "step": 1467 - }, - { - "epoch": 0.4991499489969398, - "grad_norm": 2.107973616659225, - "learning_rate": 5.258746321348934e-06, - "loss": 0.7944, - "step": 1468 - }, - { - "epoch": 0.49948996939816387, - "grad_norm": 1.930350616477822, - "learning_rate": 5.253245844193564e-06, - "loss": 0.8304, - "step": 1469 - }, - { - "epoch": 0.499829989799388, - "grad_norm": 1.7525142127819853, - "learning_rate": 5.247745059752044e-06, - "loss": 0.7762, - "step": 1470 - }, - { - "epoch": 0.500170010200612, - "grad_norm": 1.8529587101036085, - "learning_rate": 5.242243974698975e-06, - "loss": 0.8314, - "step": 1471 - }, - { - "epoch": 0.5005100306018361, - "grad_norm": 2.3903901423865457, - "learning_rate": 5.236742595709321e-06, - "loss": 0.7822, - "step": 1472 - }, - { - "epoch": 0.5008500510030602, - "grad_norm": 2.0348324811278777, - "learning_rate": 5.231240929458406e-06, - "loss": 0.7494, - "step": 1473 - }, - { - "epoch": 0.5011900714042843, - "grad_norm": 1.7162050480913733, - "learning_rate": 5.225738982621898e-06, - "loss": 0.9737, - "step": 1474 - }, - { - "epoch": 0.5015300918055083, - "grad_norm": 2.2064804559522093, - "learning_rate": 5.220236761875811e-06, - "loss": 0.7815, - "step": 1475 - }, - { - "epoch": 0.5018701122067324, - "grad_norm": 2.2251124219486798, - "learning_rate": 5.214734273896488e-06, - "loss": 0.7881, - "step": 1476 - }, - { - "epoch": 0.5022101326079564, - "grad_norm": 2.981440295612256, - "learning_rate": 5.209231525360594e-06, - "loss": 0.8, - "step": 1477 - }, - { - "epoch": 0.5025501530091806, - "grad_norm": 7.45515078847437, - "learning_rate": 5.203728522945115e-06, - "loss": 0.7911, - "step": 1478 - }, - { - "epoch": 0.5028901734104047, - "grad_norm": 1.92941189117845, - "learning_rate": 5.198225273327343e-06, - "loss": 0.7445, - "step": 1479 - }, - { - "epoch": 0.5032301938116287, - "grad_norm": 1.8975113113950242, - "learning_rate": 5.1927217831848685e-06, - "loss": 0.843, - "step": 1480 - }, - { - "epoch": 0.5035702142128528, - "grad_norm": 2.1094734040883893, - "learning_rate": 5.187218059195578e-06, - "loss": 0.8277, - "step": 1481 - }, - { - "epoch": 0.5039102346140768, - "grad_norm": 2.1248192008186964, - "learning_rate": 5.181714108037635e-06, - "loss": 0.7933, - "step": 1482 - }, - { - "epoch": 0.504250255015301, - "grad_norm": 1.8280812151104824, - "learning_rate": 5.176209936389485e-06, - "loss": 0.7447, - "step": 1483 - }, - { - "epoch": 0.504590275416525, - "grad_norm": 3.305850569207107, - "learning_rate": 5.17070555092984e-06, - "loss": 0.7644, - "step": 1484 - }, - { - "epoch": 0.5049302958177491, - "grad_norm": 2.714270205426286, - "learning_rate": 5.1652009583376676e-06, - "loss": 0.7827, - "step": 1485 - }, - { - "epoch": 0.5052703162189731, - "grad_norm": 3.091352082861896, - "learning_rate": 5.159696165292189e-06, - "loss": 0.8001, - "step": 1486 - }, - { - "epoch": 0.5056103366201972, - "grad_norm": 1.593603634259395, - "learning_rate": 5.154191178472873e-06, - "loss": 0.8329, - "step": 1487 - }, - { - "epoch": 0.5059503570214213, - "grad_norm": 2.1224743879515633, - "learning_rate": 5.148686004559412e-06, - "loss": 0.7409, - "step": 1488 - }, - { - "epoch": 0.5062903774226454, - "grad_norm": 3.01066198517414, - "learning_rate": 5.143180650231741e-06, - "loss": 0.8388, - "step": 1489 - }, - { - "epoch": 0.5066303978238694, - "grad_norm": 1.7136792637059917, - "learning_rate": 5.13767512217e-06, - "loss": 0.7341, - "step": 1490 - }, - { - "epoch": 0.5069704182250935, - "grad_norm": 2.0631670268496096, - "learning_rate": 5.1321694270545455e-06, - "loss": 0.7773, - "step": 1491 - }, - { - "epoch": 0.5073104386263175, - "grad_norm": 1.874567686624954, - "learning_rate": 5.12666357156594e-06, - "loss": 0.7862, - "step": 1492 - }, - { - "epoch": 0.5076504590275417, - "grad_norm": 6.715516555383307, - "learning_rate": 5.121157562384936e-06, - "loss": 0.8309, - "step": 1493 - }, - { - "epoch": 0.5079904794287657, - "grad_norm": 1.8974326591409743, - "learning_rate": 5.115651406192473e-06, - "loss": 0.8229, - "step": 1494 - }, - { - "epoch": 0.5083304998299898, - "grad_norm": 1.7753191132738406, - "learning_rate": 5.110145109669671e-06, - "loss": 0.7212, - "step": 1495 - }, - { - "epoch": 0.5086705202312138, - "grad_norm": 1.8871431993037888, - "learning_rate": 5.104638679497818e-06, - "loss": 0.7695, - "step": 1496 - }, - { - "epoch": 0.5090105406324379, - "grad_norm": 2.1209005798272993, - "learning_rate": 5.0991321223583655e-06, - "loss": 0.8439, - "step": 1497 - }, - { - "epoch": 0.509350561033662, - "grad_norm": 1.8469772744160644, - "learning_rate": 5.093625444932917e-06, - "loss": 0.806, - "step": 1498 - }, - { - "epoch": 0.5096905814348861, - "grad_norm": 2.281929279661747, - "learning_rate": 5.088118653903225e-06, - "loss": 0.8326, - "step": 1499 - }, - { - "epoch": 0.5100306018361102, - "grad_norm": 1.9613864164506285, - "learning_rate": 5.08261175595118e-06, - "loss": 0.6774, - "step": 1500 - }, - { - "epoch": 0.5103706222373342, - "grad_norm": 3.9527172939988366, - "learning_rate": 5.0771047577587995e-06, - "loss": 0.7849, - "step": 1501 - }, - { - "epoch": 0.5107106426385584, - "grad_norm": 3.9507534634382218, - "learning_rate": 5.071597666008223e-06, - "loss": 0.8327, - "step": 1502 - }, - { - "epoch": 0.5110506630397824, - "grad_norm": 2.3477801057322587, - "learning_rate": 5.066090487381705e-06, - "loss": 0.7659, - "step": 1503 - }, - { - "epoch": 0.5113906834410065, - "grad_norm": 1.6637921910839917, - "learning_rate": 5.060583228561604e-06, - "loss": 0.7807, - "step": 1504 - }, - { - "epoch": 0.5117307038422305, - "grad_norm": 1.9869572742030395, - "learning_rate": 5.055075896230379e-06, - "loss": 0.8009, - "step": 1505 - }, - { - "epoch": 0.5120707242434546, - "grad_norm": 2.7596350421531493, - "learning_rate": 5.0495684970705725e-06, - "loss": 0.8015, - "step": 1506 - }, - { - "epoch": 0.5124107446446787, - "grad_norm": 2.5992292669330306, - "learning_rate": 5.044061037764814e-06, - "loss": 0.7465, - "step": 1507 - }, - { - "epoch": 0.5127507650459028, - "grad_norm": 1.7505681170663439, - "learning_rate": 5.0385535249958015e-06, - "loss": 0.8182, - "step": 1508 - }, - { - "epoch": 0.5130907854471268, - "grad_norm": 2.1632485122473404, - "learning_rate": 5.033045965446303e-06, - "loss": 0.7183, - "step": 1509 - }, - { - "epoch": 0.5134308058483509, - "grad_norm": 1.753555699009244, - "learning_rate": 5.027538365799135e-06, - "loss": 0.7862, - "step": 1510 - }, - { - "epoch": 0.5137708262495749, - "grad_norm": 1.9243297591580018, - "learning_rate": 5.022030732737172e-06, - "loss": 0.8458, - "step": 1511 - }, - { - "epoch": 0.5141108466507991, - "grad_norm": 2.177617314164665, - "learning_rate": 5.016523072943321e-06, - "loss": 0.8359, - "step": 1512 - }, - { - "epoch": 0.5144508670520231, - "grad_norm": 1.4739366045699038, - "learning_rate": 5.011015393100529e-06, - "loss": 0.7725, - "step": 1513 - }, - { - "epoch": 0.5147908874532472, - "grad_norm": 1.9809330712521047, - "learning_rate": 5.00550769989176e-06, - "loss": 0.8364, - "step": 1514 - }, - { - "epoch": 0.5151309078544712, - "grad_norm": 1.7781499734908552, - "learning_rate": 5e-06, - "loss": 0.7835, - "step": 1515 - }, - { - "epoch": 0.5154709282556953, - "grad_norm": 1.8799570009883524, - "learning_rate": 4.994492300108241e-06, - "loss": 0.7751, - "step": 1516 - }, - { - "epoch": 0.5158109486569195, - "grad_norm": 3.073785329201592, - "learning_rate": 4.988984606899473e-06, - "loss": 0.7751, - "step": 1517 - }, - { - "epoch": 0.5161509690581435, - "grad_norm": 1.776054822698832, - "learning_rate": 4.9834769270566805e-06, - "loss": 0.853, - "step": 1518 - }, - { - "epoch": 0.5164909894593676, - "grad_norm": 1.7634367926127321, - "learning_rate": 4.977969267262829e-06, - "loss": 0.8076, - "step": 1519 - }, - { - "epoch": 0.5168310098605916, - "grad_norm": 4.086423843722984, - "learning_rate": 4.972461634200866e-06, - "loss": 0.9011, - "step": 1520 - }, - { - "epoch": 0.5171710302618157, - "grad_norm": 1.6421236623641557, - "learning_rate": 4.966954034553699e-06, - "loss": 0.8642, - "step": 1521 - }, - { - "epoch": 0.5175110506630398, - "grad_norm": 1.8584454764781853, - "learning_rate": 4.961446475004199e-06, - "loss": 0.7653, - "step": 1522 - }, - { - "epoch": 0.5178510710642639, - "grad_norm": 1.4656599468459788, - "learning_rate": 4.955938962235186e-06, - "loss": 0.8445, - "step": 1523 - }, - { - "epoch": 0.5181910914654879, - "grad_norm": 1.8519620362169094, - "learning_rate": 4.950431502929428e-06, - "loss": 0.7378, - "step": 1524 - }, - { - "epoch": 0.518531111866712, - "grad_norm": 2.3725424976917413, - "learning_rate": 4.944924103769623e-06, - "loss": 0.7779, - "step": 1525 - }, - { - "epoch": 0.5188711322679361, - "grad_norm": 1.7226943911959502, - "learning_rate": 4.939416771438397e-06, - "loss": 0.7654, - "step": 1526 - }, - { - "epoch": 0.5192111526691602, - "grad_norm": 1.8557184031747187, - "learning_rate": 4.933909512618298e-06, - "loss": 0.863, - "step": 1527 - }, - { - "epoch": 0.5195511730703842, - "grad_norm": 2.115844790979398, - "learning_rate": 4.928402333991777e-06, - "loss": 0.7592, - "step": 1528 - }, - { - "epoch": 0.5198911934716083, - "grad_norm": 1.6348805097468087, - "learning_rate": 4.922895242241202e-06, - "loss": 0.8855, - "step": 1529 - }, - { - "epoch": 0.5202312138728323, - "grad_norm": 2.3404047261939858, - "learning_rate": 4.91738824404882e-06, - "loss": 0.8327, - "step": 1530 - }, - { - "epoch": 0.5205712342740565, - "grad_norm": 1.8623557309255268, - "learning_rate": 4.9118813460967754e-06, - "loss": 0.8303, - "step": 1531 - }, - { - "epoch": 0.5209112546752805, - "grad_norm": 2.2490241499645847, - "learning_rate": 4.906374555067085e-06, - "loss": 0.8482, - "step": 1532 - }, - { - "epoch": 0.5212512750765046, - "grad_norm": 2.331643434530923, - "learning_rate": 4.900867877641636e-06, - "loss": 0.839, - "step": 1533 - }, - { - "epoch": 0.5215912954777286, - "grad_norm": 2.095247817090777, - "learning_rate": 4.895361320502185e-06, - "loss": 0.7988, - "step": 1534 - }, - { - "epoch": 0.5219313158789527, - "grad_norm": 1.723635906341325, - "learning_rate": 4.88985489033033e-06, - "loss": 0.7934, - "step": 1535 - }, - { - "epoch": 0.5222713362801769, - "grad_norm": 2.046788329469224, - "learning_rate": 4.8843485938075286e-06, - "loss": 0.817, - "step": 1536 - }, - { - "epoch": 0.5226113566814009, - "grad_norm": 2.0488449285989003, - "learning_rate": 4.878842437615065e-06, - "loss": 0.7112, - "step": 1537 - }, - { - "epoch": 0.522951377082625, - "grad_norm": 1.9845826095458905, - "learning_rate": 4.873336428434062e-06, - "loss": 0.759, - "step": 1538 - }, - { - "epoch": 0.523291397483849, - "grad_norm": 4.138763157740066, - "learning_rate": 4.8678305729454545e-06, - "loss": 0.8152, - "step": 1539 - }, - { - "epoch": 0.5236314178850731, - "grad_norm": 3.647861718118265, - "learning_rate": 4.862324877830003e-06, - "loss": 0.8438, - "step": 1540 - }, - { - "epoch": 0.5239714382862972, - "grad_norm": 2.2618049383095196, - "learning_rate": 4.856819349768262e-06, - "loss": 0.7159, - "step": 1541 - }, - { - "epoch": 0.5243114586875213, - "grad_norm": 1.5426259829995164, - "learning_rate": 4.851313995440589e-06, - "loss": 0.7474, - "step": 1542 - }, - { - "epoch": 0.5246514790887453, - "grad_norm": 1.9326266116251898, - "learning_rate": 4.845808821527131e-06, - "loss": 0.7739, - "step": 1543 - }, - { - "epoch": 0.5249914994899694, - "grad_norm": 1.838847556325622, - "learning_rate": 4.840303834707811e-06, - "loss": 0.7753, - "step": 1544 - }, - { - "epoch": 0.5253315198911934, - "grad_norm": 1.7052452591835734, - "learning_rate": 4.834799041662333e-06, - "loss": 0.6825, - "step": 1545 - }, - { - "epoch": 0.5256715402924176, - "grad_norm": 2.2889041372273056, - "learning_rate": 4.829294449070161e-06, - "loss": 0.8191, - "step": 1546 - }, - { - "epoch": 0.5260115606936416, - "grad_norm": 3.1528847052040416, - "learning_rate": 4.8237900636105154e-06, - "loss": 0.8092, - "step": 1547 - }, - { - "epoch": 0.5263515810948657, - "grad_norm": 4.090914456990795, - "learning_rate": 4.818285891962367e-06, - "loss": 0.8098, - "step": 1548 - }, - { - "epoch": 0.5266916014960897, - "grad_norm": 1.659291969185622, - "learning_rate": 4.812781940804424e-06, - "loss": 0.8033, - "step": 1549 - }, - { - "epoch": 0.5270316218973138, - "grad_norm": 2.134430251801482, - "learning_rate": 4.807278216815132e-06, - "loss": 0.8078, - "step": 1550 - }, - { - "epoch": 0.527371642298538, - "grad_norm": 3.1931811524180778, - "learning_rate": 4.801774726672658e-06, - "loss": 0.9237, - "step": 1551 - }, - { - "epoch": 0.527711662699762, - "grad_norm": 2.616206525270748, - "learning_rate": 4.796271477054887e-06, - "loss": 0.7764, - "step": 1552 - }, - { - "epoch": 0.528051683100986, - "grad_norm": 2.221145968614602, - "learning_rate": 4.790768474639407e-06, - "loss": 0.8206, - "step": 1553 - }, - { - "epoch": 0.5283917035022101, - "grad_norm": 2.892055480983333, - "learning_rate": 4.785265726103514e-06, - "loss": 0.7451, - "step": 1554 - }, - { - "epoch": 0.5287317239034343, - "grad_norm": 1.6659559597323008, - "learning_rate": 4.77976323812419e-06, - "loss": 0.8594, - "step": 1555 - }, - { - "epoch": 0.5290717443046583, - "grad_norm": 1.9584708522977028, - "learning_rate": 4.7742610173781025e-06, - "loss": 0.7449, - "step": 1556 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 3.613058182805375, - "learning_rate": 4.768759070541596e-06, - "loss": 0.8322, - "step": 1557 - }, - { - "epoch": 0.5297517851071064, - "grad_norm": 1.9177605042321149, - "learning_rate": 4.76325740429068e-06, - "loss": 0.8372, - "step": 1558 - }, - { - "epoch": 0.5300918055083305, - "grad_norm": 1.9899246030541402, - "learning_rate": 4.7577560253010275e-06, - "loss": 0.7641, - "step": 1559 - }, - { - "epoch": 0.5304318259095546, - "grad_norm": 2.3476977109929975, - "learning_rate": 4.752254940247956e-06, - "loss": 0.8484, - "step": 1560 - }, - { - "epoch": 0.5307718463107787, - "grad_norm": 1.8469551861867957, - "learning_rate": 4.746754155806437e-06, - "loss": 0.8196, - "step": 1561 - }, - { - "epoch": 0.5311118667120027, - "grad_norm": 2.1425767308127495, - "learning_rate": 4.741253678651067e-06, - "loss": 0.86, - "step": 1562 - }, - { - "epoch": 0.5314518871132268, - "grad_norm": 2.4488270324011054, - "learning_rate": 4.735753515456076e-06, - "loss": 0.801, - "step": 1563 - }, - { - "epoch": 0.5317919075144508, - "grad_norm": 2.5625039316596947, - "learning_rate": 4.7302536728953095e-06, - "loss": 0.7215, - "step": 1564 - }, - { - "epoch": 0.532131927915675, - "grad_norm": 2.330648518728265, - "learning_rate": 4.724754157642223e-06, - "loss": 0.8298, - "step": 1565 - }, - { - "epoch": 0.532471948316899, - "grad_norm": 1.6442430241473305, - "learning_rate": 4.719254976369882e-06, - "loss": 0.8346, - "step": 1566 - }, - { - "epoch": 0.5328119687181231, - "grad_norm": 2.06375287355313, - "learning_rate": 4.713756135750939e-06, - "loss": 0.8094, - "step": 1567 - }, - { - "epoch": 0.5331519891193471, - "grad_norm": 1.8032858583233626, - "learning_rate": 4.708257642457637e-06, - "loss": 0.7847, - "step": 1568 - }, - { - "epoch": 0.5334920095205712, - "grad_norm": 2.9072319400722106, - "learning_rate": 4.702759503161794e-06, - "loss": 0.7787, - "step": 1569 - }, - { - "epoch": 0.5338320299217953, - "grad_norm": 2.737668124200652, - "learning_rate": 4.697261724534805e-06, - "loss": 0.9145, - "step": 1570 - }, - { - "epoch": 0.5341720503230194, - "grad_norm": 1.5899160859318942, - "learning_rate": 4.691764313247621e-06, - "loss": 0.806, - "step": 1571 - }, - { - "epoch": 0.5345120707242434, - "grad_norm": 2.128696822075121, - "learning_rate": 4.686267275970751e-06, - "loss": 0.8027, - "step": 1572 - }, - { - "epoch": 0.5348520911254675, - "grad_norm": 6.327392776694456, - "learning_rate": 4.680770619374248e-06, - "loss": 0.9375, - "step": 1573 - }, - { - "epoch": 0.5351921115266915, - "grad_norm": 1.8507733306360983, - "learning_rate": 4.675274350127702e-06, - "loss": 0.6373, - "step": 1574 - }, - { - "epoch": 0.5355321319279157, - "grad_norm": 3.2014249362155764, - "learning_rate": 4.669778474900241e-06, - "loss": 0.7396, - "step": 1575 - }, - { - "epoch": 0.5358721523291398, - "grad_norm": 1.8220955607716283, - "learning_rate": 4.664283000360501e-06, - "loss": 0.7536, - "step": 1576 - }, - { - "epoch": 0.5362121727303638, - "grad_norm": 2.0103763713686202, - "learning_rate": 4.6587879331766465e-06, - "loss": 0.766, - "step": 1577 - }, - { - "epoch": 0.5365521931315879, - "grad_norm": 1.9882582274236145, - "learning_rate": 4.653293280016335e-06, - "loss": 0.7164, - "step": 1578 - }, - { - "epoch": 0.536892213532812, - "grad_norm": 2.332642295450942, - "learning_rate": 4.647799047546733e-06, - "loss": 0.804, - "step": 1579 - }, - { - "epoch": 0.5372322339340361, - "grad_norm": 2.1481887049056367, - "learning_rate": 4.642305242434488e-06, - "loss": 0.8621, - "step": 1580 - }, - { - "epoch": 0.5375722543352601, - "grad_norm": 2.233927473620768, - "learning_rate": 4.63681187134573e-06, - "loss": 0.8682, - "step": 1581 - }, - { - "epoch": 0.5379122747364842, - "grad_norm": 1.8595284687765963, - "learning_rate": 4.6313189409460694e-06, - "loss": 0.8078, - "step": 1582 - }, - { - "epoch": 0.5382522951377082, - "grad_norm": 1.6189064806741513, - "learning_rate": 4.625826457900573e-06, - "loss": 0.7825, - "step": 1583 - }, - { - "epoch": 0.5385923155389324, - "grad_norm": 1.7109496499151693, - "learning_rate": 4.62033442887377e-06, - "loss": 0.8204, - "step": 1584 - }, - { - "epoch": 0.5389323359401564, - "grad_norm": 1.9628012655876577, - "learning_rate": 4.614842860529636e-06, - "loss": 0.7718, - "step": 1585 - }, - { - "epoch": 0.5392723563413805, - "grad_norm": 2.215853939912509, - "learning_rate": 4.6093517595315906e-06, - "loss": 0.8478, - "step": 1586 - }, - { - "epoch": 0.5396123767426045, - "grad_norm": 1.7285326667444412, - "learning_rate": 4.603861132542484e-06, - "loss": 0.7447, - "step": 1587 - }, - { - "epoch": 0.5399523971438286, - "grad_norm": 1.8991399112924394, - "learning_rate": 4.598370986224594e-06, - "loss": 0.804, - "step": 1588 - }, - { - "epoch": 0.5402924175450527, - "grad_norm": 2.1876690348212477, - "learning_rate": 4.59288132723961e-06, - "loss": 0.827, - "step": 1589 - }, - { - "epoch": 0.5406324379462768, - "grad_norm": 3.388267144912403, - "learning_rate": 4.587392162248631e-06, - "loss": 0.9509, - "step": 1590 - }, - { - "epoch": 0.5409724583475009, - "grad_norm": 2.75901823504875, - "learning_rate": 4.581903497912164e-06, - "loss": 0.8255, - "step": 1591 - }, - { - "epoch": 0.5413124787487249, - "grad_norm": 3.0797491504488193, - "learning_rate": 4.576415340890101e-06, - "loss": 0.9066, - "step": 1592 - }, - { - "epoch": 0.541652499149949, - "grad_norm": 1.7281255669918878, - "learning_rate": 4.570927697841722e-06, - "loss": 0.8885, - "step": 1593 - }, - { - "epoch": 0.5419925195511731, - "grad_norm": 1.780260195431119, - "learning_rate": 4.565440575425678e-06, - "loss": 0.8186, - "step": 1594 - }, - { - "epoch": 0.5423325399523972, - "grad_norm": 1.698715980373987, - "learning_rate": 4.559953980299998e-06, - "loss": 0.7423, - "step": 1595 - }, - { - "epoch": 0.5426725603536212, - "grad_norm": 1.8233055697908436, - "learning_rate": 4.554467919122061e-06, - "loss": 0.7461, - "step": 1596 - }, - { - "epoch": 0.5430125807548453, - "grad_norm": 2.0732693883965747, - "learning_rate": 4.548982398548601e-06, - "loss": 0.8519, - "step": 1597 - }, - { - "epoch": 0.5433526011560693, - "grad_norm": 4.076606566555027, - "learning_rate": 4.543497425235705e-06, - "loss": 0.8375, - "step": 1598 - }, - { - "epoch": 0.5436926215572935, - "grad_norm": 3.726800785573923, - "learning_rate": 4.538013005838781e-06, - "loss": 0.8457, - "step": 1599 - }, - { - "epoch": 0.5440326419585175, - "grad_norm": 1.640942051570328, - "learning_rate": 4.532529147012578e-06, - "loss": 0.7555, - "step": 1600 - }, - { - "epoch": 0.5443726623597416, - "grad_norm": 2.1236371326115004, - "learning_rate": 4.527045855411153e-06, - "loss": 0.7701, - "step": 1601 - }, - { - "epoch": 0.5447126827609656, - "grad_norm": 1.7181777314185185, - "learning_rate": 4.521563137687889e-06, - "loss": 0.8164, - "step": 1602 - }, - { - "epoch": 0.5450527031621897, - "grad_norm": 2.448664581131007, - "learning_rate": 4.516081000495458e-06, - "loss": 0.8668, - "step": 1603 - }, - { - "epoch": 0.5453927235634138, - "grad_norm": 2.494208677148641, - "learning_rate": 4.510599450485838e-06, - "loss": 0.8405, - "step": 1604 - }, - { - "epoch": 0.5457327439646379, - "grad_norm": 1.9946946250715516, - "learning_rate": 4.505118494310289e-06, - "loss": 0.8654, - "step": 1605 - }, - { - "epoch": 0.5460727643658619, - "grad_norm": 2.016493132082253, - "learning_rate": 4.499638138619351e-06, - "loss": 0.7986, - "step": 1606 - }, - { - "epoch": 0.546412784767086, - "grad_norm": 5.5575317498533705, - "learning_rate": 4.49415839006284e-06, - "loss": 0.8583, - "step": 1607 - }, - { - "epoch": 0.5467528051683102, - "grad_norm": 1.5958424468455283, - "learning_rate": 4.488679255289829e-06, - "loss": 0.7993, - "step": 1608 - }, - { - "epoch": 0.5470928255695342, - "grad_norm": 3.075588169517648, - "learning_rate": 4.483200740948652e-06, - "loss": 0.6526, - "step": 1609 - }, - { - "epoch": 0.5474328459707583, - "grad_norm": 2.27438391686465, - "learning_rate": 4.477722853686883e-06, - "loss": 0.7749, - "step": 1610 - }, - { - "epoch": 0.5477728663719823, - "grad_norm": 1.899436303639678, - "learning_rate": 4.472245600151344e-06, - "loss": 0.7449, - "step": 1611 - }, - { - "epoch": 0.5481128867732064, - "grad_norm": 1.8159424708385177, - "learning_rate": 4.466768986988082e-06, - "loss": 0.7725, - "step": 1612 - }, - { - "epoch": 0.5484529071744305, - "grad_norm": 2.3206076275584118, - "learning_rate": 4.461293020842366e-06, - "loss": 0.8011, - "step": 1613 - }, - { - "epoch": 0.5487929275756546, - "grad_norm": 2.5334818132018735, - "learning_rate": 4.4558177083586855e-06, - "loss": 0.8291, - "step": 1614 - }, - { - "epoch": 0.5491329479768786, - "grad_norm": 1.824370986639498, - "learning_rate": 4.450343056180731e-06, - "loss": 0.8763, - "step": 1615 - }, - { - "epoch": 0.5494729683781027, - "grad_norm": 2.265085787284215, - "learning_rate": 4.444869070951398e-06, - "loss": 0.7383, - "step": 1616 - }, - { - "epoch": 0.5498129887793267, - "grad_norm": 1.5706339248830496, - "learning_rate": 4.439395759312765e-06, - "loss": 0.7321, - "step": 1617 - }, - { - "epoch": 0.5501530091805509, - "grad_norm": 1.8450960380829842, - "learning_rate": 4.433923127906101e-06, - "loss": 0.8253, - "step": 1618 - }, - { - "epoch": 0.5504930295817749, - "grad_norm": 1.8895954088436864, - "learning_rate": 4.428451183371844e-06, - "loss": 0.7584, - "step": 1619 - }, - { - "epoch": 0.550833049982999, - "grad_norm": 1.7647869933641376, - "learning_rate": 4.422979932349601e-06, - "loss": 0.8461, - "step": 1620 - }, - { - "epoch": 0.551173070384223, - "grad_norm": 1.726859257880695, - "learning_rate": 4.417509381478139e-06, - "loss": 0.9478, - "step": 1621 - }, - { - "epoch": 0.5515130907854471, - "grad_norm": 1.7830292603282358, - "learning_rate": 4.412039537395369e-06, - "loss": 0.8192, - "step": 1622 - }, - { - "epoch": 0.5518531111866712, - "grad_norm": 2.135528104500953, - "learning_rate": 4.4065704067383526e-06, - "loss": 0.789, - "step": 1623 - }, - { - "epoch": 0.5521931315878953, - "grad_norm": 2.2835744809721947, - "learning_rate": 4.401101996143281e-06, - "loss": 0.7897, - "step": 1624 - }, - { - "epoch": 0.5525331519891193, - "grad_norm": 1.9968779612406193, - "learning_rate": 4.395634312245473e-06, - "loss": 0.8017, - "step": 1625 - }, - { - "epoch": 0.5528731723903434, - "grad_norm": 1.7386489203169355, - "learning_rate": 4.390167361679363e-06, - "loss": 0.8258, - "step": 1626 - }, - { - "epoch": 0.5532131927915674, - "grad_norm": 2.3880896647187813, - "learning_rate": 4.384701151078502e-06, - "loss": 0.6548, - "step": 1627 - }, - { - "epoch": 0.5535532131927916, - "grad_norm": 2.3052787463523248, - "learning_rate": 4.379235687075538e-06, - "loss": 0.8939, - "step": 1628 - }, - { - "epoch": 0.5538932335940157, - "grad_norm": 1.9456406943708848, - "learning_rate": 4.373770976302212e-06, - "loss": 0.7207, - "step": 1629 - }, - { - "epoch": 0.5542332539952397, - "grad_norm": 2.233470695480692, - "learning_rate": 4.368307025389355e-06, - "loss": 0.9426, - "step": 1630 - }, - { - "epoch": 0.5545732743964638, - "grad_norm": 1.9001780195038485, - "learning_rate": 4.362843840966872e-06, - "loss": 0.7396, - "step": 1631 - }, - { - "epoch": 0.5549132947976878, - "grad_norm": 1.6170663156516558, - "learning_rate": 4.357381429663744e-06, - "loss": 0.7398, - "step": 1632 - }, - { - "epoch": 0.555253315198912, - "grad_norm": 1.7903568747147653, - "learning_rate": 4.351919798108006e-06, - "loss": 0.7973, - "step": 1633 - }, - { - "epoch": 0.555593335600136, - "grad_norm": 1.8519949395264552, - "learning_rate": 4.346458952926754e-06, - "loss": 0.7845, - "step": 1634 - }, - { - "epoch": 0.5559333560013601, - "grad_norm": 2.312852525979355, - "learning_rate": 4.340998900746123e-06, - "loss": 0.7661, - "step": 1635 - }, - { - "epoch": 0.5562733764025841, - "grad_norm": 1.9538845003355985, - "learning_rate": 4.335539648191295e-06, - "loss": 0.8089, - "step": 1636 - }, - { - "epoch": 0.5566133968038083, - "grad_norm": 2.0572388515661495, - "learning_rate": 4.330081201886473e-06, - "loss": 0.8594, - "step": 1637 - }, - { - "epoch": 0.5569534172050323, - "grad_norm": 1.7570956508285698, - "learning_rate": 4.324623568454881e-06, - "loss": 0.7019, - "step": 1638 - }, - { - "epoch": 0.5572934376062564, - "grad_norm": 2.6666409487156297, - "learning_rate": 4.319166754518768e-06, - "loss": 0.8802, - "step": 1639 - }, - { - "epoch": 0.5576334580074804, - "grad_norm": 8.161817414104679, - "learning_rate": 4.313710766699377e-06, - "loss": 0.8173, - "step": 1640 - }, - { - "epoch": 0.5579734784087045, - "grad_norm": 1.5517265216589782, - "learning_rate": 4.308255611616954e-06, - "loss": 0.7627, - "step": 1641 - }, - { - "epoch": 0.5583134988099286, - "grad_norm": 2.276892346390744, - "learning_rate": 4.302801295890731e-06, - "loss": 0.8266, - "step": 1642 - }, - { - "epoch": 0.5586535192111527, - "grad_norm": 1.5667650901676113, - "learning_rate": 4.297347826138929e-06, - "loss": 0.7707, - "step": 1643 - }, - { - "epoch": 0.5589935396123767, - "grad_norm": 1.6949899190570468, - "learning_rate": 4.291895208978734e-06, - "loss": 0.7413, - "step": 1644 - }, - { - "epoch": 0.5593335600136008, - "grad_norm": 2.006877875279477, - "learning_rate": 4.2864434510263e-06, - "loss": 0.7829, - "step": 1645 - }, - { - "epoch": 0.5596735804148248, - "grad_norm": 3.095263458414816, - "learning_rate": 4.280992558896742e-06, - "loss": 0.7722, - "step": 1646 - }, - { - "epoch": 0.560013600816049, - "grad_norm": 2.240127359014377, - "learning_rate": 4.275542539204118e-06, - "loss": 0.7562, - "step": 1647 - }, - { - "epoch": 0.5603536212172731, - "grad_norm": 2.1541576931196107, - "learning_rate": 4.270093398561437e-06, - "loss": 0.7223, - "step": 1648 - }, - { - "epoch": 0.5606936416184971, - "grad_norm": 1.890091367677857, - "learning_rate": 4.26464514358063e-06, - "loss": 0.7961, - "step": 1649 - }, - { - "epoch": 0.5610336620197212, - "grad_norm": 2.1168877878836514, - "learning_rate": 4.259197780872562e-06, - "loss": 0.8332, - "step": 1650 - }, - { - "epoch": 0.5613736824209452, - "grad_norm": 1.7140754905216478, - "learning_rate": 4.2537513170470105e-06, - "loss": 0.8327, - "step": 1651 - }, - { - "epoch": 0.5617137028221694, - "grad_norm": 2.441707848688923, - "learning_rate": 4.248305758712666e-06, - "loss": 0.7136, - "step": 1652 - }, - { - "epoch": 0.5620537232233934, - "grad_norm": 2.2663018210778483, - "learning_rate": 4.2428611124771184e-06, - "loss": 0.7338, - "step": 1653 - }, - { - "epoch": 0.5623937436246175, - "grad_norm": 1.7042736119872506, - "learning_rate": 4.237417384946846e-06, - "loss": 0.8221, - "step": 1654 - }, - { - "epoch": 0.5627337640258415, - "grad_norm": 1.7857448847407418, - "learning_rate": 4.231974582727223e-06, - "loss": 0.8938, - "step": 1655 - }, - { - "epoch": 0.5630737844270656, - "grad_norm": 8.534990207003116, - "learning_rate": 4.226532712422492e-06, - "loss": 0.8593, - "step": 1656 - }, - { - "epoch": 0.5634138048282897, - "grad_norm": 2.0702826945140456, - "learning_rate": 4.221091780635768e-06, - "loss": 0.8043, - "step": 1657 - }, - { - "epoch": 0.5637538252295138, - "grad_norm": 1.880149571201394, - "learning_rate": 4.215651793969026e-06, - "loss": 0.7408, - "step": 1658 - }, - { - "epoch": 0.5640938456307378, - "grad_norm": 1.9094946983165033, - "learning_rate": 4.210212759023099e-06, - "loss": 0.85, - "step": 1659 - }, - { - "epoch": 0.5644338660319619, - "grad_norm": 2.0826267533303144, - "learning_rate": 4.204774682397658e-06, - "loss": 0.7968, - "step": 1660 - }, - { - "epoch": 0.564773886433186, - "grad_norm": 1.9576152950783854, - "learning_rate": 4.199337570691214e-06, - "loss": 0.7934, - "step": 1661 - }, - { - "epoch": 0.5651139068344101, - "grad_norm": 1.8605000019574227, - "learning_rate": 4.1939014305011116e-06, - "loss": 0.7489, - "step": 1662 - }, - { - "epoch": 0.5654539272356341, - "grad_norm": 1.7452937853609298, - "learning_rate": 4.188466268423507e-06, - "loss": 0.798, - "step": 1663 - }, - { - "epoch": 0.5657939476368582, - "grad_norm": 2.2351881221107233, - "learning_rate": 4.183032091053381e-06, - "loss": 0.7977, - "step": 1664 - }, - { - "epoch": 0.5661339680380822, - "grad_norm": 1.949488664977891, - "learning_rate": 4.1775989049845105e-06, - "loss": 0.7882, - "step": 1665 - }, - { - "epoch": 0.5664739884393064, - "grad_norm": 1.9412918778769286, - "learning_rate": 4.172166716809475e-06, - "loss": 0.8033, - "step": 1666 - }, - { - "epoch": 0.5668140088405305, - "grad_norm": 1.8838839464107233, - "learning_rate": 4.166735533119638e-06, - "loss": 0.7347, - "step": 1667 - }, - { - "epoch": 0.5671540292417545, - "grad_norm": 2.4094539519885823, - "learning_rate": 4.16130536050515e-06, - "loss": 0.8985, - "step": 1668 - }, - { - "epoch": 0.5674940496429786, - "grad_norm": 1.832393433721923, - "learning_rate": 4.155876205554931e-06, - "loss": 0.7948, - "step": 1669 - }, - { - "epoch": 0.5678340700442026, - "grad_norm": 1.7842845158543639, - "learning_rate": 4.150448074856667e-06, - "loss": 0.856, - "step": 1670 - }, - { - "epoch": 0.5681740904454268, - "grad_norm": 5.093298965869656, - "learning_rate": 4.145020974996802e-06, - "loss": 0.8544, - "step": 1671 - }, - { - "epoch": 0.5685141108466508, - "grad_norm": 1.8288966692154494, - "learning_rate": 4.139594912560526e-06, - "loss": 0.7695, - "step": 1672 - }, - { - "epoch": 0.5688541312478749, - "grad_norm": 2.5087803047490196, - "learning_rate": 4.134169894131776e-06, - "loss": 0.8, - "step": 1673 - }, - { - "epoch": 0.5691941516490989, - "grad_norm": 3.9759397871617006, - "learning_rate": 4.1287459262932164e-06, - "loss": 0.8681, - "step": 1674 - }, - { - "epoch": 0.569534172050323, - "grad_norm": 1.7556306149575895, - "learning_rate": 4.123323015626241e-06, - "loss": 0.9425, - "step": 1675 - }, - { - "epoch": 0.5698741924515471, - "grad_norm": 1.822886620788618, - "learning_rate": 4.11790116871096e-06, - "loss": 0.8339, - "step": 1676 - }, - { - "epoch": 0.5702142128527712, - "grad_norm": 2.32577771957017, - "learning_rate": 4.112480392126187e-06, - "loss": 0.7799, - "step": 1677 - }, - { - "epoch": 0.5705542332539952, - "grad_norm": 2.3654748461284267, - "learning_rate": 4.107060692449447e-06, - "loss": 0.7794, - "step": 1678 - }, - { - "epoch": 0.5708942536552193, - "grad_norm": 2.1326059990159, - "learning_rate": 4.1016420762569496e-06, - "loss": 0.6922, - "step": 1679 - }, - { - "epoch": 0.5712342740564433, - "grad_norm": 1.6500811511500117, - "learning_rate": 4.096224550123597e-06, - "loss": 0.9321, - "step": 1680 - }, - { - "epoch": 0.5715742944576675, - "grad_norm": 2.288324199496334, - "learning_rate": 4.090808120622961e-06, - "loss": 0.8088, - "step": 1681 - }, - { - "epoch": 0.5719143148588915, - "grad_norm": 3.335932302163143, - "learning_rate": 4.08539279432729e-06, - "loss": 0.7918, - "step": 1682 - }, - { - "epoch": 0.5722543352601156, - "grad_norm": 2.018206897778278, - "learning_rate": 4.079978577807487e-06, - "loss": 0.8091, - "step": 1683 - }, - { - "epoch": 0.5725943556613396, - "grad_norm": 1.8917366828566053, - "learning_rate": 4.074565477633117e-06, - "loss": 0.8174, - "step": 1684 - }, - { - "epoch": 0.5729343760625637, - "grad_norm": 1.6840308709452019, - "learning_rate": 4.069153500372382e-06, - "loss": 0.794, - "step": 1685 - }, - { - "epoch": 0.5732743964637879, - "grad_norm": 2.0560413539850972, - "learning_rate": 4.063742652592125e-06, - "loss": 0.8338, - "step": 1686 - }, - { - "epoch": 0.5736144168650119, - "grad_norm": 2.715704335230041, - "learning_rate": 4.0583329408578185e-06, - "loss": 0.8608, - "step": 1687 - }, - { - "epoch": 0.573954437266236, - "grad_norm": 1.8308222638540965, - "learning_rate": 4.052924371733555e-06, - "loss": 0.7391, - "step": 1688 - }, - { - "epoch": 0.57429445766746, - "grad_norm": 3.93217327404691, - "learning_rate": 4.047516951782046e-06, - "loss": 0.8336, - "step": 1689 - }, - { - "epoch": 0.5746344780686842, - "grad_norm": 1.7155299358553424, - "learning_rate": 4.0421106875646e-06, - "loss": 0.7387, - "step": 1690 - }, - { - "epoch": 0.5749744984699082, - "grad_norm": 2.924796486558408, - "learning_rate": 4.036705585641131e-06, - "loss": 0.8656, - "step": 1691 - }, - { - "epoch": 0.5753145188711323, - "grad_norm": 2.1154010899917015, - "learning_rate": 4.031301652570139e-06, - "loss": 0.8103, - "step": 1692 - }, - { - "epoch": 0.5756545392723563, - "grad_norm": 1.7593975839358962, - "learning_rate": 4.0258988949087015e-06, - "loss": 0.7343, - "step": 1693 - }, - { - "epoch": 0.5759945596735804, - "grad_norm": 7.277324615448209, - "learning_rate": 4.020497319212482e-06, - "loss": 0.9342, - "step": 1694 - }, - { - "epoch": 0.5763345800748045, - "grad_norm": 1.8762066741865282, - "learning_rate": 4.015096932035695e-06, - "loss": 0.8569, - "step": 1695 - }, - { - "epoch": 0.5766746004760286, - "grad_norm": 2.5094661743116227, - "learning_rate": 4.009697739931125e-06, - "loss": 0.7803, - "step": 1696 - }, - { - "epoch": 0.5770146208772526, - "grad_norm": 2.368718744294282, - "learning_rate": 4.004299749450099e-06, - "loss": 0.7593, - "step": 1697 - }, - { - "epoch": 0.5773546412784767, - "grad_norm": 1.7379622792872598, - "learning_rate": 3.99890296714249e-06, - "loss": 0.8102, - "step": 1698 - }, - { - "epoch": 0.5776946616797007, - "grad_norm": 1.7950661253826894, - "learning_rate": 3.993507399556699e-06, - "loss": 0.8261, - "step": 1699 - }, - { - "epoch": 0.5780346820809249, - "grad_norm": 1.7895033857380052, - "learning_rate": 3.988113053239664e-06, - "loss": 0.7831, - "step": 1700 - }, - { - "epoch": 0.578374702482149, - "grad_norm": 2.513724608663744, - "learning_rate": 3.982719934736832e-06, - "loss": 0.7863, - "step": 1701 - }, - { - "epoch": 0.578714722883373, - "grad_norm": 1.7341110425502526, - "learning_rate": 3.977328050592161e-06, - "loss": 0.9247, - "step": 1702 - }, - { - "epoch": 0.579054743284597, - "grad_norm": 1.8845071679357839, - "learning_rate": 3.971937407348115e-06, - "loss": 0.8488, - "step": 1703 - }, - { - "epoch": 0.5793947636858211, - "grad_norm": 1.8072089808348282, - "learning_rate": 3.966548011545648e-06, - "loss": 0.7179, - "step": 1704 - }, - { - "epoch": 0.5797347840870453, - "grad_norm": 3.0702338796288307, - "learning_rate": 3.961159869724207e-06, - "loss": 0.7202, - "step": 1705 - }, - { - "epoch": 0.5800748044882693, - "grad_norm": 1.759787279998497, - "learning_rate": 3.955772988421709e-06, - "loss": 0.768, - "step": 1706 - }, - { - "epoch": 0.5804148248894934, - "grad_norm": 2.3433623345553034, - "learning_rate": 3.950387374174548e-06, - "loss": 0.6933, - "step": 1707 - }, - { - "epoch": 0.5807548452907174, - "grad_norm": 1.7657990260540302, - "learning_rate": 3.945003033517578e-06, - "loss": 0.7882, - "step": 1708 - }, - { - "epoch": 0.5810948656919415, - "grad_norm": 1.6247662467902115, - "learning_rate": 3.9396199729841044e-06, - "loss": 0.7497, - "step": 1709 - }, - { - "epoch": 0.5814348860931656, - "grad_norm": 4.31861063498843, - "learning_rate": 3.934238199105887e-06, - "loss": 0.6626, - "step": 1710 - }, - { - "epoch": 0.5817749064943897, - "grad_norm": 1.9596435537578243, - "learning_rate": 3.928857718413119e-06, - "loss": 0.6802, - "step": 1711 - }, - { - "epoch": 0.5821149268956137, - "grad_norm": 2.133099365831387, - "learning_rate": 3.9234785374344264e-06, - "loss": 0.8929, - "step": 1712 - }, - { - "epoch": 0.5824549472968378, - "grad_norm": 1.6595599877288754, - "learning_rate": 3.918100662696853e-06, - "loss": 0.9019, - "step": 1713 - }, - { - "epoch": 0.5827949676980619, - "grad_norm": 4.129797231985507, - "learning_rate": 3.9127241007258695e-06, - "loss": 0.704, - "step": 1714 - }, - { - "epoch": 0.583134988099286, - "grad_norm": 2.2096678154176854, - "learning_rate": 3.907348858045338e-06, - "loss": 0.767, - "step": 1715 - }, - { - "epoch": 0.58347500850051, - "grad_norm": 2.050103065477324, - "learning_rate": 3.9019749411775336e-06, - "loss": 0.7629, - "step": 1716 - }, - { - "epoch": 0.5838150289017341, - "grad_norm": 2.224362598207352, - "learning_rate": 3.8966023566431154e-06, - "loss": 0.7301, - "step": 1717 - }, - { - "epoch": 0.5841550493029581, - "grad_norm": 3.471505729243675, - "learning_rate": 3.891231110961126e-06, - "loss": 0.8771, - "step": 1718 - }, - { - "epoch": 0.5844950697041823, - "grad_norm": 2.2233454405320017, - "learning_rate": 3.885861210648987e-06, - "loss": 0.8412, - "step": 1719 - }, - { - "epoch": 0.5848350901054064, - "grad_norm": 1.8614586671959485, - "learning_rate": 3.880492662222483e-06, - "loss": 0.7183, - "step": 1720 - }, - { - "epoch": 0.5851751105066304, - "grad_norm": 3.3042359139392645, - "learning_rate": 3.875125472195764e-06, - "loss": 0.7574, - "step": 1721 - }, - { - "epoch": 0.5855151309078545, - "grad_norm": 1.6934396731563182, - "learning_rate": 3.869759647081326e-06, - "loss": 0.7454, - "step": 1722 - }, - { - "epoch": 0.5858551513090785, - "grad_norm": 1.9354501363208503, - "learning_rate": 3.8643951933900125e-06, - "loss": 0.8003, - "step": 1723 - }, - { - "epoch": 0.5861951717103027, - "grad_norm": 2.033785372249832, - "learning_rate": 3.859032117631002e-06, - "loss": 0.9099, - "step": 1724 - }, - { - "epoch": 0.5865351921115267, - "grad_norm": 1.6684392122823892, - "learning_rate": 3.853670426311797e-06, - "loss": 0.7391, - "step": 1725 - }, - { - "epoch": 0.5868752125127508, - "grad_norm": 2.023118010181095, - "learning_rate": 3.848310125938229e-06, - "loss": 0.8358, - "step": 1726 - }, - { - "epoch": 0.5872152329139748, - "grad_norm": 2.3269706588518604, - "learning_rate": 3.842951223014433e-06, - "loss": 0.8102, - "step": 1727 - }, - { - "epoch": 0.5875552533151989, - "grad_norm": 1.9400944184932607, - "learning_rate": 3.837593724042854e-06, - "loss": 0.7688, - "step": 1728 - }, - { - "epoch": 0.587895273716423, - "grad_norm": 1.6651797388262217, - "learning_rate": 3.832237635524229e-06, - "loss": 0.7588, - "step": 1729 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 1.5455784587197665, - "learning_rate": 3.826882963957589e-06, - "loss": 0.7464, - "step": 1730 - }, - { - "epoch": 0.5885753145188711, - "grad_norm": 1.85747121655971, - "learning_rate": 3.821529715840241e-06, - "loss": 0.7595, - "step": 1731 - }, - { - "epoch": 0.5889153349200952, - "grad_norm": 2.6341826537925495, - "learning_rate": 3.816177897667767e-06, - "loss": 0.7596, - "step": 1732 - }, - { - "epoch": 0.5892553553213192, - "grad_norm": 2.3495118501585717, - "learning_rate": 3.810827515934013e-06, - "loss": 0.7428, - "step": 1733 - }, - { - "epoch": 0.5895953757225434, - "grad_norm": 4.349211175598281, - "learning_rate": 3.8054785771310817e-06, - "loss": 0.7613, - "step": 1734 - }, - { - "epoch": 0.5899353961237674, - "grad_norm": 2.1363024851103876, - "learning_rate": 3.8001310877493265e-06, - "loss": 0.844, - "step": 1735 - }, - { - "epoch": 0.5902754165249915, - "grad_norm": 4.0769838432188505, - "learning_rate": 3.7947850542773396e-06, - "loss": 0.7463, - "step": 1736 - }, - { - "epoch": 0.5906154369262155, - "grad_norm": 1.6554083285238113, - "learning_rate": 3.7894404832019514e-06, - "loss": 0.8421, - "step": 1737 - }, - { - "epoch": 0.5909554573274396, - "grad_norm": 1.503047933162913, - "learning_rate": 3.784097381008212e-06, - "loss": 0.7792, - "step": 1738 - }, - { - "epoch": 0.5912954777286638, - "grad_norm": 5.221808674383001, - "learning_rate": 3.778755754179394e-06, - "loss": 0.9246, - "step": 1739 - }, - { - "epoch": 0.5916354981298878, - "grad_norm": 1.679707592266952, - "learning_rate": 3.7734156091969766e-06, - "loss": 0.8391, - "step": 1740 - }, - { - "epoch": 0.5919755185311119, - "grad_norm": 2.1866548043016145, - "learning_rate": 3.7680769525406398e-06, - "loss": 0.8404, - "step": 1741 - }, - { - "epoch": 0.5923155389323359, - "grad_norm": 1.9227767898016401, - "learning_rate": 3.762739790688264e-06, - "loss": 0.6675, - "step": 1742 - }, - { - "epoch": 0.5926555593335601, - "grad_norm": 2.2649866796850584, - "learning_rate": 3.757404130115909e-06, - "loss": 0.7401, - "step": 1743 - }, - { - "epoch": 0.5929955797347841, - "grad_norm": 1.972603607998956, - "learning_rate": 3.752069977297817e-06, - "loss": 0.7905, - "step": 1744 - }, - { - "epoch": 0.5933356001360082, - "grad_norm": 2.2111615655437995, - "learning_rate": 3.7467373387063973e-06, - "loss": 0.7023, - "step": 1745 - }, - { - "epoch": 0.5936756205372322, - "grad_norm": 1.9245235611037521, - "learning_rate": 3.741406220812227e-06, - "loss": 0.9047, - "step": 1746 - }, - { - "epoch": 0.5940156409384563, - "grad_norm": 1.733840029867424, - "learning_rate": 3.7360766300840323e-06, - "loss": 0.7679, - "step": 1747 - }, - { - "epoch": 0.5943556613396804, - "grad_norm": 1.679265103876112, - "learning_rate": 3.7307485729886917e-06, - "loss": 0.835, - "step": 1748 - }, - { - "epoch": 0.5946956817409045, - "grad_norm": 1.751220728165291, - "learning_rate": 3.725422055991218e-06, - "loss": 0.7547, - "step": 1749 - }, - { - "epoch": 0.5950357021421285, - "grad_norm": 1.8592254513227962, - "learning_rate": 3.720097085554756e-06, - "loss": 0.789, - "step": 1750 - }, - { - "epoch": 0.5953757225433526, - "grad_norm": 1.5578672257415482, - "learning_rate": 3.7147736681405784e-06, - "loss": 0.7403, - "step": 1751 - }, - { - "epoch": 0.5957157429445766, - "grad_norm": 1.7162665067510214, - "learning_rate": 3.709451810208068e-06, - "loss": 0.7607, - "step": 1752 - }, - { - "epoch": 0.5960557633458008, - "grad_norm": 1.8084933447047609, - "learning_rate": 3.7041315182147203e-06, - "loss": 0.7423, - "step": 1753 - }, - { - "epoch": 0.5963957837470248, - "grad_norm": 1.6360987508622518, - "learning_rate": 3.6988127986161247e-06, - "loss": 0.7364, - "step": 1754 - }, - { - "epoch": 0.5967358041482489, - "grad_norm": 1.620889477822385, - "learning_rate": 3.6934956578659697e-06, - "loss": 0.7419, - "step": 1755 - }, - { - "epoch": 0.5970758245494729, - "grad_norm": 2.005053928359656, - "learning_rate": 3.688180102416022e-06, - "loss": 0.8189, - "step": 1756 - }, - { - "epoch": 0.597415844950697, - "grad_norm": 2.49591536599233, - "learning_rate": 3.682866138716126e-06, - "loss": 0.7555, - "step": 1757 - }, - { - "epoch": 0.5977558653519212, - "grad_norm": 1.9099333716331244, - "learning_rate": 3.6775537732141986e-06, - "loss": 0.6334, - "step": 1758 - }, - { - "epoch": 0.5980958857531452, - "grad_norm": 1.6443659221282751, - "learning_rate": 3.6722430123562124e-06, - "loss": 0.8068, - "step": 1759 - }, - { - "epoch": 0.5984359061543693, - "grad_norm": 2.623550934402015, - "learning_rate": 3.6669338625861983e-06, - "loss": 0.8823, - "step": 1760 - }, - { - "epoch": 0.5987759265555933, - "grad_norm": 2.7847663723361222, - "learning_rate": 3.661626330346224e-06, - "loss": 0.7644, - "step": 1761 - }, - { - "epoch": 0.5991159469568174, - "grad_norm": 1.978249284886559, - "learning_rate": 3.656320422076406e-06, - "loss": 0.6911, - "step": 1762 - }, - { - "epoch": 0.5994559673580415, - "grad_norm": 1.6370605423957132, - "learning_rate": 3.6510161442148783e-06, - "loss": 0.7779, - "step": 1763 - }, - { - "epoch": 0.5997959877592656, - "grad_norm": 1.9618067800993437, - "learning_rate": 3.6457135031978077e-06, - "loss": 0.8111, - "step": 1764 - }, - { - "epoch": 0.6001360081604896, - "grad_norm": 2.0871896457836785, - "learning_rate": 3.6404125054593653e-06, - "loss": 0.6968, - "step": 1765 - }, - { - "epoch": 0.6004760285617137, - "grad_norm": 3.6449233310433375, - "learning_rate": 3.635113157431732e-06, - "loss": 0.9092, - "step": 1766 - }, - { - "epoch": 0.6008160489629377, - "grad_norm": 2.43520006058048, - "learning_rate": 3.629815465545091e-06, - "loss": 0.8429, - "step": 1767 - }, - { - "epoch": 0.6011560693641619, - "grad_norm": 2.1109713075027723, - "learning_rate": 3.62451943622761e-06, - "loss": 0.6774, - "step": 1768 - }, - { - "epoch": 0.6014960897653859, - "grad_norm": 1.7035409015911598, - "learning_rate": 3.6192250759054427e-06, - "loss": 0.8139, - "step": 1769 - }, - { - "epoch": 0.60183611016661, - "grad_norm": 2.7589386792158628, - "learning_rate": 3.6139323910027136e-06, - "loss": 0.7972, - "step": 1770 - }, - { - "epoch": 0.602176130567834, - "grad_norm": 2.02117501459737, - "learning_rate": 3.608641387941523e-06, - "loss": 0.8181, - "step": 1771 - }, - { - "epoch": 0.6025161509690582, - "grad_norm": 1.9303717752979608, - "learning_rate": 3.6033520731419214e-06, - "loss": 0.8203, - "step": 1772 - }, - { - "epoch": 0.6028561713702822, - "grad_norm": 1.884365094703858, - "learning_rate": 3.598064453021911e-06, - "loss": 0.7987, - "step": 1773 - }, - { - "epoch": 0.6031961917715063, - "grad_norm": 2.7068608465393287, - "learning_rate": 3.592778533997446e-06, - "loss": 0.7508, - "step": 1774 - }, - { - "epoch": 0.6035362121727303, - "grad_norm": 1.987215906609273, - "learning_rate": 3.5874943224824097e-06, - "loss": 0.6987, - "step": 1775 - }, - { - "epoch": 0.6038762325739544, - "grad_norm": 2.2665245253039443, - "learning_rate": 3.582211824888615e-06, - "loss": 0.7874, - "step": 1776 - }, - { - "epoch": 0.6042162529751786, - "grad_norm": 2.079144255350973, - "learning_rate": 3.5769310476257935e-06, - "loss": 0.8801, - "step": 1777 - }, - { - "epoch": 0.6045562733764026, - "grad_norm": 1.684740476192313, - "learning_rate": 3.5716519971015947e-06, - "loss": 0.8109, - "step": 1778 - }, - { - "epoch": 0.6048962937776267, - "grad_norm": 2.356591268153879, - "learning_rate": 3.5663746797215658e-06, - "loss": 0.7333, - "step": 1779 - }, - { - "epoch": 0.6052363141788507, - "grad_norm": 1.9527315149213702, - "learning_rate": 3.561099101889158e-06, - "loss": 0.8158, - "step": 1780 - }, - { - "epoch": 0.6055763345800748, - "grad_norm": 2.0298727319213175, - "learning_rate": 3.555825270005707e-06, - "loss": 0.8055, - "step": 1781 - }, - { - "epoch": 0.6059163549812989, - "grad_norm": 2.1789885450127557, - "learning_rate": 3.5505531904704287e-06, - "loss": 0.8846, - "step": 1782 - }, - { - "epoch": 0.606256375382523, - "grad_norm": 2.190682206775641, - "learning_rate": 3.5452828696804196e-06, - "loss": 0.8113, - "step": 1783 - }, - { - "epoch": 0.606596395783747, - "grad_norm": 1.7727471010909939, - "learning_rate": 3.5400143140306355e-06, - "loss": 0.8189, - "step": 1784 - }, - { - "epoch": 0.6069364161849711, - "grad_norm": 1.9034533372715055, - "learning_rate": 3.5347475299138932e-06, - "loss": 0.8361, - "step": 1785 - }, - { - "epoch": 0.6072764365861951, - "grad_norm": 1.7827761115461676, - "learning_rate": 3.5294825237208573e-06, - "loss": 0.7705, - "step": 1786 - }, - { - "epoch": 0.6076164569874193, - "grad_norm": 2.130570518035593, - "learning_rate": 3.524219301840043e-06, - "loss": 0.8345, - "step": 1787 - }, - { - "epoch": 0.6079564773886433, - "grad_norm": 2.377972509331428, - "learning_rate": 3.5189578706577896e-06, - "loss": 0.8651, - "step": 1788 - }, - { - "epoch": 0.6082964977898674, - "grad_norm": 2.0170332375599647, - "learning_rate": 3.5136982365582704e-06, - "loss": 0.7541, - "step": 1789 - }, - { - "epoch": 0.6086365181910914, - "grad_norm": 2.0487060631425535, - "learning_rate": 3.5084404059234773e-06, - "loss": 0.747, - "step": 1790 - }, - { - "epoch": 0.6089765385923155, - "grad_norm": 1.5497246080487372, - "learning_rate": 3.5031843851332105e-06, - "loss": 0.7551, - "step": 1791 - }, - { - "epoch": 0.6093165589935396, - "grad_norm": 1.8661412507465704, - "learning_rate": 3.4979301805650805e-06, - "loss": 0.7471, - "step": 1792 - }, - { - "epoch": 0.6096565793947637, - "grad_norm": 1.9081392430588895, - "learning_rate": 3.492677798594486e-06, - "loss": 0.6867, - "step": 1793 - }, - { - "epoch": 0.6099965997959877, - "grad_norm": 3.953731191700172, - "learning_rate": 3.4874272455946217e-06, - "loss": 0.863, - "step": 1794 - }, - { - "epoch": 0.6103366201972118, - "grad_norm": 1.7838153201393292, - "learning_rate": 3.4821785279364585e-06, - "loss": 0.9178, - "step": 1795 - }, - { - "epoch": 0.610676640598436, - "grad_norm": 1.9609099043585159, - "learning_rate": 3.476931651988742e-06, - "loss": 0.7292, - "step": 1796 - }, - { - "epoch": 0.61101666099966, - "grad_norm": 1.7849795981254395, - "learning_rate": 3.471686624117982e-06, - "loss": 0.8199, - "step": 1797 - }, - { - "epoch": 0.6113566814008841, - "grad_norm": 2.1270736856013337, - "learning_rate": 3.466443450688445e-06, - "loss": 0.875, - "step": 1798 - }, - { - "epoch": 0.6116967018021081, - "grad_norm": 2.2091957079882945, - "learning_rate": 3.461202138062153e-06, - "loss": 0.6927, - "step": 1799 - }, - { - "epoch": 0.6120367222033322, - "grad_norm": 1.5834963127345454, - "learning_rate": 3.4559626925988623e-06, - "loss": 0.7318, - "step": 1800 - }, - { - "epoch": 0.6123767426045563, - "grad_norm": 2.0496819681593506, - "learning_rate": 3.450725120656069e-06, - "loss": 0.7506, - "step": 1801 - }, - { - "epoch": 0.6127167630057804, - "grad_norm": 2.2013789066829, - "learning_rate": 3.4454894285889916e-06, - "loss": 0.8603, - "step": 1802 - }, - { - "epoch": 0.6130567834070044, - "grad_norm": 2.09843751252011, - "learning_rate": 3.4402556227505746e-06, - "loss": 0.7768, - "step": 1803 - }, - { - "epoch": 0.6133968038082285, - "grad_norm": 2.3451379947690607, - "learning_rate": 3.435023709491467e-06, - "loss": 0.8272, - "step": 1804 - }, - { - "epoch": 0.6137368242094525, - "grad_norm": 3.5327166657343456, - "learning_rate": 3.4297936951600217e-06, - "loss": 0.7466, - "step": 1805 - }, - { - "epoch": 0.6140768446106767, - "grad_norm": 1.905762345831794, - "learning_rate": 3.424565586102293e-06, - "loss": 0.8158, - "step": 1806 - }, - { - "epoch": 0.6144168650119007, - "grad_norm": 2.3123581203476804, - "learning_rate": 3.4193393886620153e-06, - "loss": 0.8462, - "step": 1807 - }, - { - "epoch": 0.6147568854131248, - "grad_norm": 1.946148455776188, - "learning_rate": 3.4141151091806134e-06, - "loss": 0.704, - "step": 1808 - }, - { - "epoch": 0.6150969058143488, - "grad_norm": 1.3443343547386855, - "learning_rate": 3.408892753997175e-06, - "loss": 0.693, - "step": 1809 - }, - { - "epoch": 0.6154369262155729, - "grad_norm": 2.704265394408822, - "learning_rate": 3.40367232944846e-06, - "loss": 0.7594, - "step": 1810 - }, - { - "epoch": 0.615776946616797, - "grad_norm": 1.8211554347276468, - "learning_rate": 3.3984538418688795e-06, - "loss": 0.669, - "step": 1811 - }, - { - "epoch": 0.6161169670180211, - "grad_norm": 2.0413514509876602, - "learning_rate": 3.3932372975905027e-06, - "loss": 0.868, - "step": 1812 - }, - { - "epoch": 0.6164569874192452, - "grad_norm": 2.121098812311723, - "learning_rate": 3.3880227029430335e-06, - "loss": 0.7582, - "step": 1813 - }, - { - "epoch": 0.6167970078204692, - "grad_norm": 1.6909069939174246, - "learning_rate": 3.3828100642538097e-06, - "loss": 0.7327, - "step": 1814 - }, - { - "epoch": 0.6171370282216933, - "grad_norm": 1.6334734116518446, - "learning_rate": 3.377599387847803e-06, - "loss": 0.8684, - "step": 1815 - }, - { - "epoch": 0.6174770486229174, - "grad_norm": 2.1432970505365043, - "learning_rate": 3.372390680047597e-06, - "loss": 0.8207, - "step": 1816 - }, - { - "epoch": 0.6178170690241415, - "grad_norm": 1.8165425685515983, - "learning_rate": 3.3671839471733906e-06, - "loss": 0.797, - "step": 1817 - }, - { - "epoch": 0.6181570894253655, - "grad_norm": 2.1615351800791354, - "learning_rate": 3.3619791955429826e-06, - "loss": 0.773, - "step": 1818 - }, - { - "epoch": 0.6184971098265896, - "grad_norm": 2.6904615455880614, - "learning_rate": 3.3567764314717744e-06, - "loss": 0.7907, - "step": 1819 - }, - { - "epoch": 0.6188371302278136, - "grad_norm": 2.834095748254144, - "learning_rate": 3.351575661272749e-06, - "loss": 0.8717, - "step": 1820 - }, - { - "epoch": 0.6191771506290378, - "grad_norm": 2.145877874306492, - "learning_rate": 3.346376891256471e-06, - "loss": 0.8086, - "step": 1821 - }, - { - "epoch": 0.6195171710302618, - "grad_norm": 2.023497317198109, - "learning_rate": 3.341180127731083e-06, - "loss": 0.8679, - "step": 1822 - }, - { - "epoch": 0.6198571914314859, - "grad_norm": 2.1023255103173564, - "learning_rate": 3.335985377002285e-06, - "loss": 0.8146, - "step": 1823 - }, - { - "epoch": 0.6201972118327099, - "grad_norm": 1.731210236073491, - "learning_rate": 3.330792645373344e-06, - "loss": 0.7683, - "step": 1824 - }, - { - "epoch": 0.6205372322339341, - "grad_norm": 1.9233595089290563, - "learning_rate": 3.3256019391450696e-06, - "loss": 0.7869, - "step": 1825 - }, - { - "epoch": 0.6208772526351581, - "grad_norm": 2.208075651046932, - "learning_rate": 3.320413264615817e-06, - "loss": 0.6999, - "step": 1826 - }, - { - "epoch": 0.6212172730363822, - "grad_norm": 1.8428253882643195, - "learning_rate": 3.315226628081475e-06, - "loss": 0.724, - "step": 1827 - }, - { - "epoch": 0.6215572934376062, - "grad_norm": 2.5874398711101665, - "learning_rate": 3.3100420358354614e-06, - "loss": 0.8652, - "step": 1828 - }, - { - "epoch": 0.6218973138388303, - "grad_norm": 1.673959608619458, - "learning_rate": 3.3048594941687117e-06, - "loss": 0.8139, - "step": 1829 - }, - { - "epoch": 0.6222373342400545, - "grad_norm": 1.8058623104229596, - "learning_rate": 3.299679009369672e-06, - "loss": 0.7287, - "step": 1830 - }, - { - "epoch": 0.6225773546412785, - "grad_norm": 1.7517624272915204, - "learning_rate": 3.2945005877242975e-06, - "loss": 0.767, - "step": 1831 - }, - { - "epoch": 0.6229173750425026, - "grad_norm": 1.9637001633251892, - "learning_rate": 3.2893242355160327e-06, - "loss": 0.7267, - "step": 1832 - }, - { - "epoch": 0.6232573954437266, - "grad_norm": 2.0442734243489378, - "learning_rate": 3.28414995902582e-06, - "loss": 0.7311, - "step": 1833 - }, - { - "epoch": 0.6235974158449507, - "grad_norm": 4.43268536813694, - "learning_rate": 3.2789777645320736e-06, - "loss": 0.6776, - "step": 1834 - }, - { - "epoch": 0.6239374362461748, - "grad_norm": 2.120174804949898, - "learning_rate": 3.2738076583106903e-06, - "loss": 0.8519, - "step": 1835 - }, - { - "epoch": 0.6242774566473989, - "grad_norm": 3.2551292864590056, - "learning_rate": 3.268639646635027e-06, - "loss": 0.9032, - "step": 1836 - }, - { - "epoch": 0.6246174770486229, - "grad_norm": 1.601086184729794, - "learning_rate": 3.2634737357758994e-06, - "loss": 0.8248, - "step": 1837 - }, - { - "epoch": 0.624957497449847, - "grad_norm": 2.240230510804796, - "learning_rate": 3.2583099320015787e-06, - "loss": 0.7506, - "step": 1838 - }, - { - "epoch": 0.625297517851071, - "grad_norm": 1.9608041776165144, - "learning_rate": 3.253148241577773e-06, - "loss": 0.7333, - "step": 1839 - }, - { - "epoch": 0.6256375382522952, - "grad_norm": 1.9849085294573612, - "learning_rate": 3.2479886707676323e-06, - "loss": 0.7508, - "step": 1840 - }, - { - "epoch": 0.6259775586535192, - "grad_norm": 1.8228816637234933, - "learning_rate": 3.2428312258317306e-06, - "loss": 0.7946, - "step": 1841 - }, - { - "epoch": 0.6263175790547433, - "grad_norm": 1.7804154421261742, - "learning_rate": 3.2376759130280644e-06, - "loss": 0.7698, - "step": 1842 - }, - { - "epoch": 0.6266575994559673, - "grad_norm": 1.8927973749913074, - "learning_rate": 3.23252273861204e-06, - "loss": 0.8284, - "step": 1843 - }, - { - "epoch": 0.6269976198571914, - "grad_norm": 1.8550081010363115, - "learning_rate": 3.2273717088364743e-06, - "loss": 0.7924, - "step": 1844 - }, - { - "epoch": 0.6273376402584155, - "grad_norm": 2.1922949801484966, - "learning_rate": 3.222222829951578e-06, - "loss": 0.8388, - "step": 1845 - }, - { - "epoch": 0.6276776606596396, - "grad_norm": 2.1626087410744264, - "learning_rate": 3.2170761082049504e-06, - "loss": 0.7447, - "step": 1846 - }, - { - "epoch": 0.6280176810608636, - "grad_norm": 1.956904781654036, - "learning_rate": 3.21193154984158e-06, - "loss": 0.6497, - "step": 1847 - }, - { - "epoch": 0.6283577014620877, - "grad_norm": 5.531479827665114, - "learning_rate": 3.2067891611038203e-06, - "loss": 0.8239, - "step": 1848 - }, - { - "epoch": 0.6286977218633119, - "grad_norm": 2.0361393715810956, - "learning_rate": 3.201648948231404e-06, - "loss": 0.805, - "step": 1849 - }, - { - "epoch": 0.6290377422645359, - "grad_norm": 2.4082749710623523, - "learning_rate": 3.196510917461414e-06, - "loss": 0.7744, - "step": 1850 - }, - { - "epoch": 0.62937776266576, - "grad_norm": 1.8187471571807, - "learning_rate": 3.191375075028291e-06, - "loss": 0.7981, - "step": 1851 - }, - { - "epoch": 0.629717783066984, - "grad_norm": 1.8526733968838733, - "learning_rate": 3.1862414271638163e-06, - "loss": 0.7936, - "step": 1852 - }, - { - "epoch": 0.630057803468208, - "grad_norm": 2.1219438112421853, - "learning_rate": 3.181109980097111e-06, - "loss": 0.8523, - "step": 1853 - }, - { - "epoch": 0.6303978238694322, - "grad_norm": 2.3455237931427178, - "learning_rate": 3.1759807400546266e-06, - "loss": 0.7498, - "step": 1854 - }, - { - "epoch": 0.6307378442706563, - "grad_norm": 2.126668312041449, - "learning_rate": 3.1708537132601324e-06, - "loss": 0.8679, - "step": 1855 - }, - { - "epoch": 0.6310778646718803, - "grad_norm": 2.298564439126398, - "learning_rate": 3.1657289059347184e-06, - "loss": 0.7885, - "step": 1856 - }, - { - "epoch": 0.6314178850731044, - "grad_norm": 1.7168007294839274, - "learning_rate": 3.1606063242967753e-06, - "loss": 0.866, - "step": 1857 - }, - { - "epoch": 0.6317579054743284, - "grad_norm": 1.8392023959375048, - "learning_rate": 3.1554859745619986e-06, - "loss": 0.7636, - "step": 1858 - }, - { - "epoch": 0.6320979258755526, - "grad_norm": 2.2699088048706235, - "learning_rate": 3.15036786294337e-06, - "loss": 0.837, - "step": 1859 - }, - { - "epoch": 0.6324379462767766, - "grad_norm": 2.9330773051419263, - "learning_rate": 3.145251995651162e-06, - "loss": 0.8315, - "step": 1860 - }, - { - "epoch": 0.6327779666780007, - "grad_norm": 2.1837196368742133, - "learning_rate": 3.1401383788929175e-06, - "loss": 0.7574, - "step": 1861 - }, - { - "epoch": 0.6331179870792247, - "grad_norm": 1.9146680400998761, - "learning_rate": 3.1350270188734523e-06, - "loss": 0.7177, - "step": 1862 - }, - { - "epoch": 0.6334580074804488, - "grad_norm": 1.3903306685323171, - "learning_rate": 3.129917921794844e-06, - "loss": 0.693, - "step": 1863 - }, - { - "epoch": 0.6337980278816729, - "grad_norm": 2.1306437683234325, - "learning_rate": 3.1248110938564202e-06, - "loss": 0.7523, - "step": 1864 - }, - { - "epoch": 0.634138048282897, - "grad_norm": 1.9181652277534635, - "learning_rate": 3.1197065412547632e-06, - "loss": 0.8886, - "step": 1865 - }, - { - "epoch": 0.634478068684121, - "grad_norm": 1.7562861259048224, - "learning_rate": 3.1146042701836865e-06, - "loss": 0.7553, - "step": 1866 - }, - { - "epoch": 0.6348180890853451, - "grad_norm": 1.7325396538375666, - "learning_rate": 3.10950428683424e-06, - "loss": 0.8295, - "step": 1867 - }, - { - "epoch": 0.6351581094865691, - "grad_norm": 1.8009663516914018, - "learning_rate": 3.1044065973946945e-06, - "loss": 0.7395, - "step": 1868 - }, - { - "epoch": 0.6354981298877933, - "grad_norm": 1.6392428927823481, - "learning_rate": 3.0993112080505383e-06, - "loss": 0.872, - "step": 1869 - }, - { - "epoch": 0.6358381502890174, - "grad_norm": 1.5989553503629714, - "learning_rate": 3.0942181249844726e-06, - "loss": 0.6544, - "step": 1870 - }, - { - "epoch": 0.6361781706902414, - "grad_norm": 2.6768868064774374, - "learning_rate": 3.089127354376393e-06, - "loss": 0.7539, - "step": 1871 - }, - { - "epoch": 0.6365181910914655, - "grad_norm": 4.5238927522554535, - "learning_rate": 3.084038902403398e-06, - "loss": 0.7274, - "step": 1872 - }, - { - "epoch": 0.6368582114926895, - "grad_norm": 1.6009647281884545, - "learning_rate": 3.0789527752397624e-06, - "loss": 0.7217, - "step": 1873 - }, - { - "epoch": 0.6371982318939137, - "grad_norm": 2.7730101673025573, - "learning_rate": 3.07386897905695e-06, - "loss": 0.8029, - "step": 1874 - }, - { - "epoch": 0.6375382522951377, - "grad_norm": 2.6935224464307757, - "learning_rate": 3.068787520023587e-06, - "loss": 0.8331, - "step": 1875 - }, - { - "epoch": 0.6378782726963618, - "grad_norm": 2.128963112443544, - "learning_rate": 3.0637084043054744e-06, - "loss": 0.8188, - "step": 1876 - }, - { - "epoch": 0.6382182930975858, - "grad_norm": 2.21461662243339, - "learning_rate": 3.058631638065561e-06, - "loss": 0.8867, - "step": 1877 - }, - { - "epoch": 0.63855831349881, - "grad_norm": 2.0004837364007018, - "learning_rate": 3.0535572274639456e-06, - "loss": 0.7891, - "step": 1878 - }, - { - "epoch": 0.638898333900034, - "grad_norm": 2.645633375201975, - "learning_rate": 3.048485178657875e-06, - "loss": 0.7086, - "step": 1879 - }, - { - "epoch": 0.6392383543012581, - "grad_norm": 1.9850643249437119, - "learning_rate": 3.0434154978017215e-06, - "loss": 0.797, - "step": 1880 - }, - { - "epoch": 0.6395783747024821, - "grad_norm": 1.4653698856711659, - "learning_rate": 3.0383481910469936e-06, - "loss": 0.793, - "step": 1881 - }, - { - "epoch": 0.6399183951037062, - "grad_norm": 2.4247940024767525, - "learning_rate": 3.03328326454231e-06, - "loss": 0.7956, - "step": 1882 - }, - { - "epoch": 0.6402584155049303, - "grad_norm": 1.7278252319233067, - "learning_rate": 3.0282207244334084e-06, - "loss": 0.7589, - "step": 1883 - }, - { - "epoch": 0.6405984359061544, - "grad_norm": 1.7440612715525123, - "learning_rate": 3.0231605768631256e-06, - "loss": 0.8077, - "step": 1884 - }, - { - "epoch": 0.6409384563073784, - "grad_norm": 1.7115699043404717, - "learning_rate": 3.018102827971397e-06, - "loss": 0.7597, - "step": 1885 - }, - { - "epoch": 0.6412784767086025, - "grad_norm": 1.5585975933489125, - "learning_rate": 3.0130474838952518e-06, - "loss": 0.77, - "step": 1886 - }, - { - "epoch": 0.6416184971098265, - "grad_norm": 1.6776789416972602, - "learning_rate": 3.007994550768793e-06, - "loss": 0.8699, - "step": 1887 - }, - { - "epoch": 0.6419585175110507, - "grad_norm": 2.0656741398519305, - "learning_rate": 3.0029440347232064e-06, - "loss": 0.7675, - "step": 1888 - }, - { - "epoch": 0.6422985379122748, - "grad_norm": 2.423344521347543, - "learning_rate": 2.997895941886737e-06, - "loss": 0.7608, - "step": 1889 - }, - { - "epoch": 0.6426385583134988, - "grad_norm": 3.3517642400306653, - "learning_rate": 2.9928502783846987e-06, - "loss": 0.7183, - "step": 1890 - }, - { - "epoch": 0.6429785787147229, - "grad_norm": 4.199360112371653, - "learning_rate": 2.9878070503394484e-06, - "loss": 0.8042, - "step": 1891 - }, - { - "epoch": 0.6433185991159469, - "grad_norm": 2.348727029739639, - "learning_rate": 2.982766263870395e-06, - "loss": 0.8536, - "step": 1892 - }, - { - "epoch": 0.6436586195171711, - "grad_norm": 2.3407286077425815, - "learning_rate": 2.977727925093981e-06, - "loss": 0.7965, - "step": 1893 - }, - { - "epoch": 0.6439986399183951, - "grad_norm": 2.2628555536887243, - "learning_rate": 2.972692040123678e-06, - "loss": 0.8354, - "step": 1894 - }, - { - "epoch": 0.6443386603196192, - "grad_norm": 1.9098209839122338, - "learning_rate": 2.9676586150699843e-06, - "loss": 0.7583, - "step": 1895 - }, - { - "epoch": 0.6446786807208432, - "grad_norm": 2.030613992719097, - "learning_rate": 2.962627656040408e-06, - "loss": 0.6792, - "step": 1896 - }, - { - "epoch": 0.6450187011220673, - "grad_norm": 2.206570139271304, - "learning_rate": 2.957599169139472e-06, - "loss": 0.7251, - "step": 1897 - }, - { - "epoch": 0.6453587215232914, - "grad_norm": 2.2690884747822615, - "learning_rate": 2.9525731604686925e-06, - "loss": 0.7452, - "step": 1898 - }, - { - "epoch": 0.6456987419245155, - "grad_norm": 1.6036554282102649, - "learning_rate": 2.9475496361265834e-06, - "loss": 0.8009, - "step": 1899 - }, - { - "epoch": 0.6460387623257395, - "grad_norm": 1.5691756233346612, - "learning_rate": 2.942528602208642e-06, - "loss": 0.7995, - "step": 1900 - }, - { - "epoch": 0.6463787827269636, - "grad_norm": 2.5262095313197785, - "learning_rate": 2.9375100648073413e-06, - "loss": 0.7977, - "step": 1901 - }, - { - "epoch": 0.6467188031281876, - "grad_norm": 2.329442430159881, - "learning_rate": 2.9324940300121325e-06, - "loss": 0.7225, - "step": 1902 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 2.2795978824555316, - "learning_rate": 2.9274805039094225e-06, - "loss": 0.7248, - "step": 1903 - }, - { - "epoch": 0.6473988439306358, - "grad_norm": 1.8928729552824115, - "learning_rate": 2.922469492582578e-06, - "loss": 0.7438, - "step": 1904 - }, - { - "epoch": 0.6477388643318599, - "grad_norm": 1.645826518622666, - "learning_rate": 2.9174610021119136e-06, - "loss": 0.7018, - "step": 1905 - }, - { - "epoch": 0.648078884733084, - "grad_norm": 1.887335955741179, - "learning_rate": 2.912455038574686e-06, - "loss": 0.7666, - "step": 1906 - }, - { - "epoch": 0.6484189051343081, - "grad_norm": 2.6577609754031966, - "learning_rate": 2.907451608045081e-06, - "loss": 0.7754, - "step": 1907 - }, - { - "epoch": 0.6487589255355322, - "grad_norm": 1.424048835180028, - "learning_rate": 2.9024507165942196e-06, - "loss": 0.8108, - "step": 1908 - }, - { - "epoch": 0.6490989459367562, - "grad_norm": 2.3645854173941014, - "learning_rate": 2.8974523702901346e-06, - "loss": 0.9007, - "step": 1909 - }, - { - "epoch": 0.6494389663379803, - "grad_norm": 1.9228751348732287, - "learning_rate": 2.892456575197771e-06, - "loss": 0.8843, - "step": 1910 - }, - { - "epoch": 0.6497789867392043, - "grad_norm": 1.7424474860546162, - "learning_rate": 2.8874633373789848e-06, - "loss": 0.812, - "step": 1911 - }, - { - "epoch": 0.6501190071404285, - "grad_norm": 2.183670810574612, - "learning_rate": 2.8824726628925204e-06, - "loss": 0.844, - "step": 1912 - }, - { - "epoch": 0.6504590275416525, - "grad_norm": 3.2905500209946386, - "learning_rate": 2.877484557794017e-06, - "loss": 0.7829, - "step": 1913 - }, - { - "epoch": 0.6507990479428766, - "grad_norm": 1.8504014727826574, - "learning_rate": 2.872499028135993e-06, - "loss": 0.8476, - "step": 1914 - }, - { - "epoch": 0.6511390683441006, - "grad_norm": 1.786637343753024, - "learning_rate": 2.8675160799678483e-06, - "loss": 0.7481, - "step": 1915 - }, - { - "epoch": 0.6514790887453247, - "grad_norm": 1.7737603638634836, - "learning_rate": 2.8625357193358416e-06, - "loss": 0.6805, - "step": 1916 - }, - { - "epoch": 0.6518191091465488, - "grad_norm": 2.3513830613944897, - "learning_rate": 2.8575579522830965e-06, - "loss": 0.8911, - "step": 1917 - }, - { - "epoch": 0.6521591295477729, - "grad_norm": 2.308686347074699, - "learning_rate": 2.8525827848495912e-06, - "loss": 0.7006, - "step": 1918 - }, - { - "epoch": 0.6524991499489969, - "grad_norm": 3.0666789675701036, - "learning_rate": 2.847610223072145e-06, - "loss": 0.7716, - "step": 1919 - }, - { - "epoch": 0.652839170350221, - "grad_norm": 1.6176969102311178, - "learning_rate": 2.842640272984422e-06, - "loss": 0.7157, - "step": 1920 - }, - { - "epoch": 0.653179190751445, - "grad_norm": 1.9114007467129297, - "learning_rate": 2.837672940616911e-06, - "loss": 0.8591, - "step": 1921 - }, - { - "epoch": 0.6535192111526692, - "grad_norm": 2.673597039865379, - "learning_rate": 2.8327082319969268e-06, - "loss": 0.7577, - "step": 1922 - }, - { - "epoch": 0.6538592315538932, - "grad_norm": 1.6371578518735068, - "learning_rate": 2.8277461531485985e-06, - "loss": 0.7447, - "step": 1923 - }, - { - "epoch": 0.6541992519551173, - "grad_norm": 1.6541414287928828, - "learning_rate": 2.8227867100928706e-06, - "loss": 0.7508, - "step": 1924 - }, - { - "epoch": 0.6545392723563414, - "grad_norm": 2.6084708691300764, - "learning_rate": 2.8178299088474836e-06, - "loss": 0.7826, - "step": 1925 - }, - { - "epoch": 0.6548792927575654, - "grad_norm": 2.8225766202041194, - "learning_rate": 2.8128757554269716e-06, - "loss": 0.9343, - "step": 1926 - }, - { - "epoch": 0.6552193131587896, - "grad_norm": 1.8679165197646086, - "learning_rate": 2.8079242558426612e-06, - "loss": 0.7858, - "step": 1927 - }, - { - "epoch": 0.6555593335600136, - "grad_norm": 1.6626905717574614, - "learning_rate": 2.8029754161026535e-06, - "loss": 0.8268, - "step": 1928 - }, - { - "epoch": 0.6558993539612377, - "grad_norm": 1.7902883623187187, - "learning_rate": 2.7980292422118282e-06, - "loss": 0.7367, - "step": 1929 - }, - { - "epoch": 0.6562393743624617, - "grad_norm": 2.28189192357985, - "learning_rate": 2.7930857401718244e-06, - "loss": 0.8161, - "step": 1930 - }, - { - "epoch": 0.6565793947636859, - "grad_norm": 1.776198319138567, - "learning_rate": 2.7881449159810416e-06, - "loss": 0.6802, - "step": 1931 - }, - { - "epoch": 0.6569194151649099, - "grad_norm": 2.2447524877488156, - "learning_rate": 2.7832067756346293e-06, - "loss": 0.765, - "step": 1932 - }, - { - "epoch": 0.657259435566134, - "grad_norm": 1.626806606726708, - "learning_rate": 2.7782713251244797e-06, - "loss": 0.786, - "step": 1933 - }, - { - "epoch": 0.657599455967358, - "grad_norm": 2.2354711587791236, - "learning_rate": 2.7733385704392257e-06, - "loss": 0.832, - "step": 1934 - }, - { - "epoch": 0.6579394763685821, - "grad_norm": 2.0096403282944912, - "learning_rate": 2.768408517564224e-06, - "loss": 0.7716, - "step": 1935 - }, - { - "epoch": 0.6582794967698062, - "grad_norm": 1.9832226393988026, - "learning_rate": 2.763481172481556e-06, - "loss": 0.7496, - "step": 1936 - }, - { - "epoch": 0.6586195171710303, - "grad_norm": 1.9113293186754516, - "learning_rate": 2.7585565411700164e-06, - "loss": 0.7712, - "step": 1937 - }, - { - "epoch": 0.6589595375722543, - "grad_norm": 1.800190920309849, - "learning_rate": 2.7536346296051063e-06, - "loss": 0.775, - "step": 1938 - }, - { - "epoch": 0.6592995579734784, - "grad_norm": 2.2869209794289835, - "learning_rate": 2.7487154437590252e-06, - "loss": 0.7114, - "step": 1939 - }, - { - "epoch": 0.6596395783747024, - "grad_norm": 1.752548967169787, - "learning_rate": 2.743798989600672e-06, - "loss": 0.8391, - "step": 1940 - }, - { - "epoch": 0.6599795987759266, - "grad_norm": 1.9696733758583753, - "learning_rate": 2.738885273095624e-06, - "loss": 0.6906, - "step": 1941 - }, - { - "epoch": 0.6603196191771507, - "grad_norm": 1.756956191108405, - "learning_rate": 2.733974300206137e-06, - "loss": 0.8137, - "step": 1942 - }, - { - "epoch": 0.6606596395783747, - "grad_norm": 2.5904794588208433, - "learning_rate": 2.7290660768911435e-06, - "loss": 0.8262, - "step": 1943 - }, - { - "epoch": 0.6609996599795988, - "grad_norm": 2.718207473803795, - "learning_rate": 2.7241606091062334e-06, - "loss": 0.7992, - "step": 1944 - }, - { - "epoch": 0.6613396803808228, - "grad_norm": 1.9225267669432748, - "learning_rate": 2.719257902803658e-06, - "loss": 0.8342, - "step": 1945 - }, - { - "epoch": 0.661679700782047, - "grad_norm": 1.8175234136298053, - "learning_rate": 2.7143579639323146e-06, - "loss": 0.7721, - "step": 1946 - }, - { - "epoch": 0.662019721183271, - "grad_norm": 2.2267444521145365, - "learning_rate": 2.7094607984377423e-06, - "loss": 0.7256, - "step": 1947 - }, - { - "epoch": 0.6623597415844951, - "grad_norm": 1.6676057305902945, - "learning_rate": 2.7045664122621173e-06, - "loss": 0.7588, - "step": 1948 - }, - { - "epoch": 0.6626997619857191, - "grad_norm": 1.9495109160790398, - "learning_rate": 2.6996748113442397e-06, - "loss": 0.7012, - "step": 1949 - }, - { - "epoch": 0.6630397823869432, - "grad_norm": 2.275323709986825, - "learning_rate": 2.6947860016195372e-06, - "loss": 0.809, - "step": 1950 - }, - { - "epoch": 0.6633798027881673, - "grad_norm": 1.7705415744996646, - "learning_rate": 2.6898999890200405e-06, - "loss": 0.7813, - "step": 1951 - }, - { - "epoch": 0.6637198231893914, - "grad_norm": 1.8201284706907614, - "learning_rate": 2.6850167794743966e-06, - "loss": 0.7378, - "step": 1952 - }, - { - "epoch": 0.6640598435906154, - "grad_norm": 1.7763246698039323, - "learning_rate": 2.680136378907845e-06, - "loss": 0.8054, - "step": 1953 - }, - { - "epoch": 0.6643998639918395, - "grad_norm": 2.1067341590101787, - "learning_rate": 2.6752587932422175e-06, - "loss": 0.8473, - "step": 1954 - }, - { - "epoch": 0.6647398843930635, - "grad_norm": 2.1826612345959284, - "learning_rate": 2.67038402839593e-06, - "loss": 0.8311, - "step": 1955 - }, - { - "epoch": 0.6650799047942877, - "grad_norm": 2.151119086744043, - "learning_rate": 2.6655120902839802e-06, - "loss": 0.7625, - "step": 1956 - }, - { - "epoch": 0.6654199251955117, - "grad_norm": 2.5514025172235963, - "learning_rate": 2.6606429848179306e-06, - "loss": 0.7488, - "step": 1957 - }, - { - "epoch": 0.6657599455967358, - "grad_norm": 2.4317656380313477, - "learning_rate": 2.655776717905906e-06, - "loss": 0.7954, - "step": 1958 - }, - { - "epoch": 0.6660999659979598, - "grad_norm": 2.3399416449946364, - "learning_rate": 2.6509132954525946e-06, - "loss": 0.7008, - "step": 1959 - }, - { - "epoch": 0.666439986399184, - "grad_norm": 2.7631747409481386, - "learning_rate": 2.6460527233592225e-06, - "loss": 0.7061, - "step": 1960 - }, - { - "epoch": 0.666780006800408, - "grad_norm": 1.9223571690932921, - "learning_rate": 2.641195007523568e-06, - "loss": 0.8037, - "step": 1961 - }, - { - "epoch": 0.6671200272016321, - "grad_norm": 2.0570084331599454, - "learning_rate": 2.636340153839935e-06, - "loss": 0.7771, - "step": 1962 - }, - { - "epoch": 0.6674600476028562, - "grad_norm": 1.73207328780135, - "learning_rate": 2.631488168199159e-06, - "loss": 0.8048, - "step": 1963 - }, - { - "epoch": 0.6678000680040802, - "grad_norm": 1.7955944649193327, - "learning_rate": 2.626639056488593e-06, - "loss": 0.7144, - "step": 1964 - }, - { - "epoch": 0.6681400884053044, - "grad_norm": 1.899052873587027, - "learning_rate": 2.621792824592103e-06, - "loss": 0.8188, - "step": 1965 - }, - { - "epoch": 0.6684801088065284, - "grad_norm": 1.669623057694321, - "learning_rate": 2.616949478390065e-06, - "loss": 0.7515, - "step": 1966 - }, - { - "epoch": 0.6688201292077525, - "grad_norm": 2.6012423751773817, - "learning_rate": 2.612109023759346e-06, - "loss": 0.724, - "step": 1967 - }, - { - "epoch": 0.6691601496089765, - "grad_norm": 1.7503290521049435, - "learning_rate": 2.6072714665733135e-06, - "loss": 0.7963, - "step": 1968 - }, - { - "epoch": 0.6695001700102006, - "grad_norm": 1.7646392017997337, - "learning_rate": 2.60243681270181e-06, - "loss": 0.7704, - "step": 1969 - }, - { - "epoch": 0.6698401904114247, - "grad_norm": 2.23434001820653, - "learning_rate": 2.597605068011163e-06, - "loss": 0.7679, - "step": 1970 - }, - { - "epoch": 0.6701802108126488, - "grad_norm": 2.2030201912789242, - "learning_rate": 2.5927762383641657e-06, - "loss": 0.8307, - "step": 1971 - }, - { - "epoch": 0.6705202312138728, - "grad_norm": 1.4053187069136137, - "learning_rate": 2.5879503296200736e-06, - "loss": 0.6733, - "step": 1972 - }, - { - "epoch": 0.6708602516150969, - "grad_norm": 2.0200673415876076, - "learning_rate": 2.583127347634601e-06, - "loss": 0.7548, - "step": 1973 - }, - { - "epoch": 0.6712002720163209, - "grad_norm": 7.747179478216865, - "learning_rate": 2.5783072982599057e-06, - "loss": 0.8986, - "step": 1974 - }, - { - "epoch": 0.6715402924175451, - "grad_norm": 4.347151217698582, - "learning_rate": 2.573490187344596e-06, - "loss": 0.8158, - "step": 1975 - }, - { - "epoch": 0.6718803128187691, - "grad_norm": 1.5960191169683489, - "learning_rate": 2.5686760207337045e-06, - "loss": 0.6319, - "step": 1976 - }, - { - "epoch": 0.6722203332199932, - "grad_norm": 2.0210012377694633, - "learning_rate": 2.563864804268701e-06, - "loss": 0.8267, - "step": 1977 - }, - { - "epoch": 0.6725603536212172, - "grad_norm": 2.121999429293373, - "learning_rate": 2.559056543787468e-06, - "loss": 0.7567, - "step": 1978 - }, - { - "epoch": 0.6729003740224413, - "grad_norm": 2.5273540123412044, - "learning_rate": 2.554251245124305e-06, - "loss": 0.6441, - "step": 1979 - }, - { - "epoch": 0.6732403944236655, - "grad_norm": 2.0741394840030853, - "learning_rate": 2.5494489141099155e-06, - "loss": 0.8274, - "step": 1980 - }, - { - "epoch": 0.6735804148248895, - "grad_norm": 1.67166625283495, - "learning_rate": 2.5446495565714024e-06, - "loss": 0.7647, - "step": 1981 - }, - { - "epoch": 0.6739204352261136, - "grad_norm": 1.915541986064764, - "learning_rate": 2.539853178332265e-06, - "loss": 0.8623, - "step": 1982 - }, - { - "epoch": 0.6742604556273376, - "grad_norm": 2.0838754278048075, - "learning_rate": 2.5350597852123798e-06, - "loss": 0.9025, - "step": 1983 - }, - { - "epoch": 0.6746004760285618, - "grad_norm": 1.8777603204555824, - "learning_rate": 2.530269383028009e-06, - "loss": 0.805, - "step": 1984 - }, - { - "epoch": 0.6749404964297858, - "grad_norm": 1.6370793848887677, - "learning_rate": 2.5254819775917795e-06, - "loss": 0.7331, - "step": 1985 - }, - { - "epoch": 0.6752805168310099, - "grad_norm": 1.9510489857076145, - "learning_rate": 2.5206975747126873e-06, - "loss": 0.6924, - "step": 1986 - }, - { - "epoch": 0.6756205372322339, - "grad_norm": 1.8840509441418891, - "learning_rate": 2.51591618019608e-06, - "loss": 0.7736, - "step": 1987 - }, - { - "epoch": 0.675960557633458, - "grad_norm": 2.1944785850385866, - "learning_rate": 2.511137799843658e-06, - "loss": 0.7507, - "step": 1988 - }, - { - "epoch": 0.6763005780346821, - "grad_norm": 2.563444560543499, - "learning_rate": 2.506362439453463e-06, - "loss": 0.8487, - "step": 1989 - }, - { - "epoch": 0.6766405984359062, - "grad_norm": 1.8896121725043236, - "learning_rate": 2.5015901048198716e-06, - "loss": 0.7212, - "step": 1990 - }, - { - "epoch": 0.6769806188371302, - "grad_norm": 1.6981617236104471, - "learning_rate": 2.4968208017335936e-06, - "loss": 0.8074, - "step": 1991 - }, - { - "epoch": 0.6773206392383543, - "grad_norm": 2.1743389190119093, - "learning_rate": 2.4920545359816533e-06, - "loss": 0.8572, - "step": 1992 - }, - { - "epoch": 0.6776606596395783, - "grad_norm": 1.7914808391383161, - "learning_rate": 2.487291313347397e-06, - "loss": 0.8223, - "step": 1993 - }, - { - "epoch": 0.6780006800408025, - "grad_norm": 1.715883330092319, - "learning_rate": 2.4825311396104727e-06, - "loss": 0.8229, - "step": 1994 - }, - { - "epoch": 0.6783407004420265, - "grad_norm": 1.9340151796766085, - "learning_rate": 2.477774020546831e-06, - "loss": 0.7705, - "step": 1995 - }, - { - "epoch": 0.6786807208432506, - "grad_norm": 2.1553476660518376, - "learning_rate": 2.473019961928716e-06, - "loss": 0.8944, - "step": 1996 - }, - { - "epoch": 0.6790207412444746, - "grad_norm": 2.3845638574577306, - "learning_rate": 2.4682689695246557e-06, - "loss": 0.7879, - "step": 1997 - }, - { - "epoch": 0.6793607616456987, - "grad_norm": 2.8669737688434775, - "learning_rate": 2.4635210490994648e-06, - "loss": 0.7056, - "step": 1998 - }, - { - "epoch": 0.6797007820469229, - "grad_norm": 3.049192996998203, - "learning_rate": 2.458776206414221e-06, - "loss": 0.8073, - "step": 1999 - }, - { - "epoch": 0.6800408024481469, - "grad_norm": 1.8798273806791783, - "learning_rate": 2.4540344472262766e-06, - "loss": 0.7979, - "step": 2000 - }, - { - "epoch": 0.680380822849371, - "grad_norm": 2.8826970776464167, - "learning_rate": 2.4492957772892345e-06, - "loss": 0.7671, - "step": 2001 - }, - { - "epoch": 0.680720843250595, - "grad_norm": 1.7248005566280082, - "learning_rate": 2.4445602023529558e-06, - "loss": 0.7626, - "step": 2002 - }, - { - "epoch": 0.6810608636518191, - "grad_norm": 1.6157884725720708, - "learning_rate": 2.439827728163542e-06, - "loss": 0.6729, - "step": 2003 - }, - { - "epoch": 0.6814008840530432, - "grad_norm": 2.015980236780887, - "learning_rate": 2.4350983604633323e-06, - "loss": 0.7427, - "step": 2004 - }, - { - "epoch": 0.6817409044542673, - "grad_norm": 2.213874167082533, - "learning_rate": 2.4303721049908973e-06, - "loss": 0.8243, - "step": 2005 - }, - { - "epoch": 0.6820809248554913, - "grad_norm": 1.8548248745686642, - "learning_rate": 2.425648967481031e-06, - "loss": 0.7634, - "step": 2006 - }, - { - "epoch": 0.6824209452567154, - "grad_norm": 1.652684543766916, - "learning_rate": 2.4209289536647467e-06, - "loss": 0.8613, - "step": 2007 - }, - { - "epoch": 0.6827609656579394, - "grad_norm": 1.9971828674188044, - "learning_rate": 2.4162120692692623e-06, - "loss": 0.7493, - "step": 2008 - }, - { - "epoch": 0.6831009860591636, - "grad_norm": 1.599253674855469, - "learning_rate": 2.4114983200180053e-06, - "loss": 0.7948, - "step": 2009 - }, - { - "epoch": 0.6834410064603876, - "grad_norm": 2.2583508234748653, - "learning_rate": 2.406787711630591e-06, - "loss": 0.7357, - "step": 2010 - }, - { - "epoch": 0.6837810268616117, - "grad_norm": 2.0791026003084396, - "learning_rate": 2.4020802498228333e-06, - "loss": 0.8317, - "step": 2011 - }, - { - "epoch": 0.6841210472628357, - "grad_norm": 1.9114254449996253, - "learning_rate": 2.3973759403067175e-06, - "loss": 0.8558, - "step": 2012 - }, - { - "epoch": 0.6844610676640599, - "grad_norm": 1.6266872694061658, - "learning_rate": 2.3926747887904084e-06, - "loss": 0.8107, - "step": 2013 - }, - { - "epoch": 0.684801088065284, - "grad_norm": 2.2202773218114706, - "learning_rate": 2.3879768009782434e-06, - "loss": 0.8187, - "step": 2014 - }, - { - "epoch": 0.685141108466508, - "grad_norm": 1.8724141832862042, - "learning_rate": 2.3832819825707136e-06, - "loss": 0.7582, - "step": 2015 - }, - { - "epoch": 0.685481128867732, - "grad_norm": 2.6048840426082043, - "learning_rate": 2.3785903392644714e-06, - "loss": 0.7355, - "step": 2016 - }, - { - "epoch": 0.6858211492689561, - "grad_norm": 2.4069817743883695, - "learning_rate": 2.37390187675231e-06, - "loss": 0.8101, - "step": 2017 - }, - { - "epoch": 0.6861611696701803, - "grad_norm": 2.3122636622751997, - "learning_rate": 2.3692166007231686e-06, - "loss": 0.796, - "step": 2018 - }, - { - "epoch": 0.6865011900714043, - "grad_norm": 2.7148809354845103, - "learning_rate": 2.364534516862117e-06, - "loss": 0.7821, - "step": 2019 - }, - { - "epoch": 0.6868412104726284, - "grad_norm": 1.7589729603840554, - "learning_rate": 2.359855630850352e-06, - "loss": 0.805, - "step": 2020 - }, - { - "epoch": 0.6871812308738524, - "grad_norm": 2.188045522331186, - "learning_rate": 2.3551799483651894e-06, - "loss": 0.7042, - "step": 2021 - }, - { - "epoch": 0.6875212512750765, - "grad_norm": 1.6884288952943125, - "learning_rate": 2.3505074750800585e-06, - "loss": 0.7188, - "step": 2022 - }, - { - "epoch": 0.6878612716763006, - "grad_norm": 1.6892834429534136, - "learning_rate": 2.3458382166644967e-06, - "loss": 0.6986, - "step": 2023 - }, - { - "epoch": 0.6882012920775247, - "grad_norm": 2.0889129257298324, - "learning_rate": 2.3411721787841363e-06, - "loss": 0.671, - "step": 2024 - }, - { - "epoch": 0.6885413124787487, - "grad_norm": 1.9926930177927726, - "learning_rate": 2.3365093671007078e-06, - "loss": 0.7946, - "step": 2025 - }, - { - "epoch": 0.6888813328799728, - "grad_norm": 2.050530946497787, - "learning_rate": 2.3318497872720193e-06, - "loss": 0.7665, - "step": 2026 - }, - { - "epoch": 0.6892213532811968, - "grad_norm": 3.331008521125105, - "learning_rate": 2.327193444951966e-06, - "loss": 0.7251, - "step": 2027 - }, - { - "epoch": 0.689561373682421, - "grad_norm": 1.558219400732492, - "learning_rate": 2.322540345790508e-06, - "loss": 0.8328, - "step": 2028 - }, - { - "epoch": 0.689901394083645, - "grad_norm": 1.6259596450082816, - "learning_rate": 2.3178904954336718e-06, - "loss": 0.7147, - "step": 2029 - }, - { - "epoch": 0.6902414144848691, - "grad_norm": 1.8488785938546937, - "learning_rate": 2.313243899523544e-06, - "loss": 0.8313, - "step": 2030 - }, - { - "epoch": 0.6905814348860931, - "grad_norm": 1.8394321602635277, - "learning_rate": 2.3086005636982582e-06, - "loss": 0.8232, - "step": 2031 - }, - { - "epoch": 0.6909214552873172, - "grad_norm": 1.9494231310016805, - "learning_rate": 2.303960493591999e-06, - "loss": 0.6783, - "step": 2032 - }, - { - "epoch": 0.6912614756885413, - "grad_norm": 2.0407917593034863, - "learning_rate": 2.29932369483498e-06, - "loss": 0.8164, - "step": 2033 - }, - { - "epoch": 0.6916014960897654, - "grad_norm": 1.5657183822956182, - "learning_rate": 2.2946901730534533e-06, - "loss": 0.8238, - "step": 2034 - }, - { - "epoch": 0.6919415164909895, - "grad_norm": 2.1179360693491556, - "learning_rate": 2.29005993386969e-06, - "loss": 0.6922, - "step": 2035 - }, - { - "epoch": 0.6922815368922135, - "grad_norm": 2.838283362200117, - "learning_rate": 2.285432982901979e-06, - "loss": 0.7736, - "step": 2036 - }, - { - "epoch": 0.6926215572934376, - "grad_norm": 1.9673971851860206, - "learning_rate": 2.2808093257646184e-06, - "loss": 0.8444, - "step": 2037 - }, - { - "epoch": 0.6929615776946617, - "grad_norm": 2.112143965612654, - "learning_rate": 2.2761889680679106e-06, - "loss": 0.7465, - "step": 2038 - }, - { - "epoch": 0.6933015980958858, - "grad_norm": 3.5658767108987313, - "learning_rate": 2.271571915418157e-06, - "loss": 0.7382, - "step": 2039 - }, - { - "epoch": 0.6936416184971098, - "grad_norm": 1.7890203918330567, - "learning_rate": 2.266958173417644e-06, - "loss": 0.7754, - "step": 2040 - }, - { - "epoch": 0.6939816388983339, - "grad_norm": 1.879135172415941, - "learning_rate": 2.2623477476646447e-06, - "loss": 0.9036, - "step": 2041 - }, - { - "epoch": 0.694321659299558, - "grad_norm": 2.469200512596644, - "learning_rate": 2.2577406437534055e-06, - "loss": 0.7346, - "step": 2042 - }, - { - "epoch": 0.6946616797007821, - "grad_norm": 1.7547922743933462, - "learning_rate": 2.253136867274146e-06, - "loss": 0.837, - "step": 2043 - }, - { - "epoch": 0.6950017001020061, - "grad_norm": 2.0761435680953837, - "learning_rate": 2.2485364238130435e-06, - "loss": 0.7821, - "step": 2044 - }, - { - "epoch": 0.6953417205032302, - "grad_norm": 1.868569645752557, - "learning_rate": 2.243939318952234e-06, - "loss": 0.8159, - "step": 2045 - }, - { - "epoch": 0.6956817409044542, - "grad_norm": 2.2407320960892743, - "learning_rate": 2.239345558269801e-06, - "loss": 0.8396, - "step": 2046 - }, - { - "epoch": 0.6960217613056784, - "grad_norm": 1.7022760601328137, - "learning_rate": 2.23475514733977e-06, - "loss": 0.817, - "step": 2047 - }, - { - "epoch": 0.6963617817069024, - "grad_norm": 1.6301569903177133, - "learning_rate": 2.230168091732106e-06, - "loss": 0.615, - "step": 2048 - }, - { - "epoch": 0.6967018021081265, - "grad_norm": 1.9046707956588385, - "learning_rate": 2.2255843970126957e-06, - "loss": 0.7858, - "step": 2049 - }, - { - "epoch": 0.6970418225093505, - "grad_norm": 1.9673929880121703, - "learning_rate": 2.221004068743356e-06, - "loss": 0.845, - "step": 2050 - }, - { - "epoch": 0.6973818429105746, - "grad_norm": 1.782595694166562, - "learning_rate": 2.2164271124818103e-06, - "loss": 0.7656, - "step": 2051 - }, - { - "epoch": 0.6977218633117988, - "grad_norm": 1.6386330711294346, - "learning_rate": 2.2118535337817003e-06, - "loss": 0.7537, - "step": 2052 - }, - { - "epoch": 0.6980618837130228, - "grad_norm": 1.4316705818253959, - "learning_rate": 2.207283338192559e-06, - "loss": 0.6975, - "step": 2053 - }, - { - "epoch": 0.6984019041142469, - "grad_norm": 1.5611707150455605, - "learning_rate": 2.2027165312598185e-06, - "loss": 0.6761, - "step": 2054 - }, - { - "epoch": 0.6987419245154709, - "grad_norm": 1.5355495259096092, - "learning_rate": 2.1981531185248034e-06, - "loss": 0.6972, - "step": 2055 - }, - { - "epoch": 0.699081944916695, - "grad_norm": 1.9515735201013045, - "learning_rate": 2.1935931055247127e-06, - "loss": 0.7739, - "step": 2056 - }, - { - "epoch": 0.6994219653179191, - "grad_norm": 1.707681607294952, - "learning_rate": 2.1890364977926283e-06, - "loss": 0.8014, - "step": 2057 - }, - { - "epoch": 0.6997619857191432, - "grad_norm": 1.9416088276270818, - "learning_rate": 2.18448330085749e-06, - "loss": 0.7066, - "step": 2058 - }, - { - "epoch": 0.7001020061203672, - "grad_norm": 1.816549981430375, - "learning_rate": 2.1799335202441104e-06, - "loss": 0.8464, - "step": 2059 - }, - { - "epoch": 0.7004420265215913, - "grad_norm": 1.9376066291343164, - "learning_rate": 2.1753871614731474e-06, - "loss": 0.7222, - "step": 2060 - }, - { - "epoch": 0.7007820469228153, - "grad_norm": 2.118492372862069, - "learning_rate": 2.1708442300611115e-06, - "loss": 0.7918, - "step": 2061 - }, - { - "epoch": 0.7011220673240395, - "grad_norm": 2.0839760418604376, - "learning_rate": 2.1663047315203533e-06, - "loss": 0.8174, - "step": 2062 - }, - { - "epoch": 0.7014620877252635, - "grad_norm": 2.3407523269538313, - "learning_rate": 2.1617686713590557e-06, - "loss": 0.7331, - "step": 2063 - }, - { - "epoch": 0.7018021081264876, - "grad_norm": 2.3029622189762757, - "learning_rate": 2.1572360550812354e-06, - "loss": 0.8031, - "step": 2064 - }, - { - "epoch": 0.7021421285277116, - "grad_norm": 2.1267697856713657, - "learning_rate": 2.1527068881867243e-06, - "loss": 0.7973, - "step": 2065 - }, - { - "epoch": 0.7024821489289358, - "grad_norm": 1.8215858343618574, - "learning_rate": 2.148181176171174e-06, - "loss": 0.8117, - "step": 2066 - }, - { - "epoch": 0.7028221693301598, - "grad_norm": 1.4157717285401026, - "learning_rate": 2.1436589245260375e-06, - "loss": 0.9047, - "step": 2067 - }, - { - "epoch": 0.7031621897313839, - "grad_norm": 1.7979159155184197, - "learning_rate": 2.1391401387385773e-06, - "loss": 0.8326, - "step": 2068 - }, - { - "epoch": 0.7035022101326079, - "grad_norm": 2.0914051675300835, - "learning_rate": 2.134624824291846e-06, - "loss": 0.8622, - "step": 2069 - }, - { - "epoch": 0.703842230533832, - "grad_norm": 2.1070541206963056, - "learning_rate": 2.1301129866646774e-06, - "loss": 0.8943, - "step": 2070 - }, - { - "epoch": 0.7041822509350562, - "grad_norm": 2.13463784164494, - "learning_rate": 2.1256046313317002e-06, - "loss": 0.8321, - "step": 2071 - }, - { - "epoch": 0.7045222713362802, - "grad_norm": 2.5136301018463754, - "learning_rate": 2.1210997637633067e-06, - "loss": 0.7691, - "step": 2072 - }, - { - "epoch": 0.7048622917375043, - "grad_norm": 2.2541601501295783, - "learning_rate": 2.1165983894256647e-06, - "loss": 0.7222, - "step": 2073 - }, - { - "epoch": 0.7052023121387283, - "grad_norm": 2.98901817786412, - "learning_rate": 2.1121005137806964e-06, - "loss": 0.7528, - "step": 2074 - }, - { - "epoch": 0.7055423325399524, - "grad_norm": 1.8580295018583073, - "learning_rate": 2.1076061422860862e-06, - "loss": 0.7779, - "step": 2075 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 2.2207128533601983, - "learning_rate": 2.1031152803952605e-06, - "loss": 0.7673, - "step": 2076 - }, - { - "epoch": 0.7062223733424006, - "grad_norm": 1.6735932994012201, - "learning_rate": 2.098627933557389e-06, - "loss": 0.8363, - "step": 2077 - }, - { - "epoch": 0.7065623937436246, - "grad_norm": 1.5680321057110584, - "learning_rate": 2.0941441072173766e-06, - "loss": 0.7077, - "step": 2078 - }, - { - "epoch": 0.7069024141448487, - "grad_norm": 1.8545485537631567, - "learning_rate": 2.089663806815856e-06, - "loss": 0.7437, - "step": 2079 - }, - { - "epoch": 0.7072424345460727, - "grad_norm": 1.700070379165538, - "learning_rate": 2.085187037789184e-06, - "loss": 0.7529, - "step": 2080 - }, - { - "epoch": 0.7075824549472969, - "grad_norm": 2.092387228234501, - "learning_rate": 2.080713805569427e-06, - "loss": 0.8072, - "step": 2081 - }, - { - "epoch": 0.7079224753485209, - "grad_norm": 1.6576023640763133, - "learning_rate": 2.0762441155843678e-06, - "loss": 0.8356, - "step": 2082 - }, - { - "epoch": 0.708262495749745, - "grad_norm": 2.096722406636028, - "learning_rate": 2.071777973257482e-06, - "loss": 0.6847, - "step": 2083 - }, - { - "epoch": 0.708602516150969, - "grad_norm": 2.647186031515588, - "learning_rate": 2.0673153840079502e-06, - "loss": 0.8209, - "step": 2084 - }, - { - "epoch": 0.7089425365521931, - "grad_norm": 1.772423654028428, - "learning_rate": 2.0628563532506334e-06, - "loss": 0.7487, - "step": 2085 - }, - { - "epoch": 0.7092825569534172, - "grad_norm": 2.006215820360962, - "learning_rate": 2.058400886396079e-06, - "loss": 0.7881, - "step": 2086 - }, - { - "epoch": 0.7096225773546413, - "grad_norm": 1.5471023818172427, - "learning_rate": 2.053948988850508e-06, - "loss": 0.8533, - "step": 2087 - }, - { - "epoch": 0.7099625977558653, - "grad_norm": 1.765248777545704, - "learning_rate": 2.0495006660158113e-06, - "loss": 0.6848, - "step": 2088 - }, - { - "epoch": 0.7103026181570894, - "grad_norm": 1.6747092561465118, - "learning_rate": 2.045055923289544e-06, - "loss": 0.7101, - "step": 2089 - }, - { - "epoch": 0.7106426385583134, - "grad_norm": 1.9042522058118778, - "learning_rate": 2.040614766064913e-06, - "loss": 0.7763, - "step": 2090 - }, - { - "epoch": 0.7109826589595376, - "grad_norm": 6.255027159940595, - "learning_rate": 2.036177199730781e-06, - "loss": 0.85, - "step": 2091 - }, - { - "epoch": 0.7113226793607617, - "grad_norm": 1.622002495881479, - "learning_rate": 2.0317432296716475e-06, - "loss": 0.727, - "step": 2092 - }, - { - "epoch": 0.7116626997619857, - "grad_norm": 1.691104238150303, - "learning_rate": 2.0273128612676506e-06, - "loss": 0.7537, - "step": 2093 - }, - { - "epoch": 0.7120027201632098, - "grad_norm": 1.7231893078223932, - "learning_rate": 2.0228860998945577e-06, - "loss": 0.7642, - "step": 2094 - }, - { - "epoch": 0.7123427405644339, - "grad_norm": 2.1757614848576345, - "learning_rate": 2.0184629509237583e-06, - "loss": 0.8258, - "step": 2095 - }, - { - "epoch": 0.712682760965658, - "grad_norm": 1.7874598523749379, - "learning_rate": 2.0140434197222647e-06, - "loss": 0.8447, - "step": 2096 - }, - { - "epoch": 0.713022781366882, - "grad_norm": 1.4437605429731617, - "learning_rate": 2.00962751165269e-06, - "loss": 0.8244, - "step": 2097 - }, - { - "epoch": 0.7133628017681061, - "grad_norm": 2.0064545785073773, - "learning_rate": 2.00521523207326e-06, - "loss": 0.7231, - "step": 2098 - }, - { - "epoch": 0.7137028221693301, - "grad_norm": 1.8662037760559647, - "learning_rate": 2.0008065863377903e-06, - "loss": 0.7298, - "step": 2099 - }, - { - "epoch": 0.7140428425705543, - "grad_norm": 1.864146642080098, - "learning_rate": 1.996401579795693e-06, - "loss": 0.8252, - "step": 2100 - }, - { - "epoch": 0.7143828629717783, - "grad_norm": 2.131314441947248, - "learning_rate": 1.9920002177919622e-06, - "loss": 0.8641, - "step": 2101 - }, - { - "epoch": 0.7147228833730024, - "grad_norm": 1.6478848825280967, - "learning_rate": 1.987602505667169e-06, - "loss": 0.8314, - "step": 2102 - }, - { - "epoch": 0.7150629037742264, - "grad_norm": 1.6965083554911622, - "learning_rate": 1.983208448757455e-06, - "loss": 0.7798, - "step": 2103 - }, - { - "epoch": 0.7154029241754505, - "grad_norm": 2.2399140677663514, - "learning_rate": 1.978818052394528e-06, - "loss": 0.7183, - "step": 2104 - }, - { - "epoch": 0.7157429445766746, - "grad_norm": 2.052486306229749, - "learning_rate": 1.974431321905656e-06, - "loss": 0.6849, - "step": 2105 - }, - { - "epoch": 0.7160829649778987, - "grad_norm": 2.3591721996625266, - "learning_rate": 1.9700482626136548e-06, - "loss": 0.8216, - "step": 2106 - }, - { - "epoch": 0.7164229853791227, - "grad_norm": 1.7604095436525182, - "learning_rate": 1.9656688798368905e-06, - "loss": 0.6536, - "step": 2107 - }, - { - "epoch": 0.7167630057803468, - "grad_norm": 1.9615351287468117, - "learning_rate": 1.9612931788892637e-06, - "loss": 0.7843, - "step": 2108 - }, - { - "epoch": 0.7171030261815708, - "grad_norm": 2.0592818898356997, - "learning_rate": 1.956921165080208e-06, - "loss": 0.7863, - "step": 2109 - }, - { - "epoch": 0.717443046582795, - "grad_norm": 2.8058446206181253, - "learning_rate": 1.9525528437146886e-06, - "loss": 0.7241, - "step": 2110 - }, - { - "epoch": 0.7177830669840191, - "grad_norm": 1.9945581002621289, - "learning_rate": 1.9481882200931794e-06, - "loss": 0.903, - "step": 2111 - }, - { - "epoch": 0.7181230873852431, - "grad_norm": 2.180468609375338, - "learning_rate": 1.94382729951168e-06, - "loss": 0.8265, - "step": 2112 - }, - { - "epoch": 0.7184631077864672, - "grad_norm": 2.623414185405911, - "learning_rate": 1.9394700872616856e-06, - "loss": 0.7801, - "step": 2113 - }, - { - "epoch": 0.7188031281876912, - "grad_norm": 1.7582604702565359, - "learning_rate": 1.9351165886302026e-06, - "loss": 0.802, - "step": 2114 - }, - { - "epoch": 0.7191431485889154, - "grad_norm": 2.5152283185802533, - "learning_rate": 1.9307668088997206e-06, - "loss": 0.8063, - "step": 2115 - }, - { - "epoch": 0.7194831689901394, - "grad_norm": 2.7666858070153437, - "learning_rate": 1.9264207533482264e-06, - "loss": 0.7285, - "step": 2116 - }, - { - "epoch": 0.7198231893913635, - "grad_norm": 2.069299953932944, - "learning_rate": 1.922078427249181e-06, - "loss": 0.6232, - "step": 2117 - }, - { - "epoch": 0.7201632097925875, - "grad_norm": 1.9669276644036997, - "learning_rate": 1.917739835871523e-06, - "loss": 0.8435, - "step": 2118 - }, - { - "epoch": 0.7205032301938117, - "grad_norm": 2.352513634957127, - "learning_rate": 1.9134049844796583e-06, - "loss": 0.7437, - "step": 2119 - }, - { - "epoch": 0.7208432505950357, - "grad_norm": 1.7010870421085014, - "learning_rate": 1.9090738783334535e-06, - "loss": 0.8334, - "step": 2120 - }, - { - "epoch": 0.7211832709962598, - "grad_norm": 1.8286429902827022, - "learning_rate": 1.904746522688236e-06, - "loss": 0.8278, - "step": 2121 - }, - { - "epoch": 0.7215232913974838, - "grad_norm": 2.204105230258611, - "learning_rate": 1.9004229227947752e-06, - "loss": 0.7401, - "step": 2122 - }, - { - "epoch": 0.7218633117987079, - "grad_norm": 2.1051613276517003, - "learning_rate": 1.896103083899291e-06, - "loss": 0.7424, - "step": 2123 - }, - { - "epoch": 0.722203332199932, - "grad_norm": 1.913948718818119, - "learning_rate": 1.891787011243434e-06, - "loss": 0.9045, - "step": 2124 - }, - { - "epoch": 0.7225433526011561, - "grad_norm": 1.5026786148699889, - "learning_rate": 1.8874747100642844e-06, - "loss": 0.7094, - "step": 2125 - }, - { - "epoch": 0.7228833730023801, - "grad_norm": 2.46070363197272, - "learning_rate": 1.8831661855943517e-06, - "loss": 0.7417, - "step": 2126 - }, - { - "epoch": 0.7232233934036042, - "grad_norm": 1.7241794429729518, - "learning_rate": 1.8788614430615582e-06, - "loss": 0.7184, - "step": 2127 - }, - { - "epoch": 0.7235634138048282, - "grad_norm": 1.7709850180647637, - "learning_rate": 1.8745604876892376e-06, - "loss": 0.8362, - "step": 2128 - }, - { - "epoch": 0.7239034342060524, - "grad_norm": 2.062995631686198, - "learning_rate": 1.8702633246961282e-06, - "loss": 0.7107, - "step": 2129 - }, - { - "epoch": 0.7242434546072765, - "grad_norm": 1.7639145613774367, - "learning_rate": 1.8659699592963705e-06, - "loss": 0.7337, - "step": 2130 - }, - { - "epoch": 0.7245834750085005, - "grad_norm": 1.6485206528522929, - "learning_rate": 1.8616803966994912e-06, - "loss": 0.7445, - "step": 2131 - }, - { - "epoch": 0.7249234954097246, - "grad_norm": 2.2432367551430485, - "learning_rate": 1.8573946421104082e-06, - "loss": 0.7921, - "step": 2132 - }, - { - "epoch": 0.7252635158109486, - "grad_norm": 2.4352161818113176, - "learning_rate": 1.8531127007294159e-06, - "loss": 0.8438, - "step": 2133 - }, - { - "epoch": 0.7256035362121728, - "grad_norm": 1.7595604425268574, - "learning_rate": 1.8488345777521804e-06, - "loss": 0.7116, - "step": 2134 - }, - { - "epoch": 0.7259435566133968, - "grad_norm": 1.9847525687521061, - "learning_rate": 1.8445602783697375e-06, - "loss": 0.7425, - "step": 2135 - }, - { - "epoch": 0.7262835770146209, - "grad_norm": 2.6735934681474363, - "learning_rate": 1.8402898077684806e-06, - "loss": 0.7244, - "step": 2136 - }, - { - "epoch": 0.7266235974158449, - "grad_norm": 1.9110660509672215, - "learning_rate": 1.8360231711301618e-06, - "loss": 0.8195, - "step": 2137 - }, - { - "epoch": 0.726963617817069, - "grad_norm": 2.047331681525153, - "learning_rate": 1.8317603736318746e-06, - "loss": 0.8365, - "step": 2138 - }, - { - "epoch": 0.7273036382182931, - "grad_norm": 1.6690303524368486, - "learning_rate": 1.8275014204460623e-06, - "loss": 0.7402, - "step": 2139 - }, - { - "epoch": 0.7276436586195172, - "grad_norm": 3.9059200473743965, - "learning_rate": 1.8232463167404968e-06, - "loss": 0.7426, - "step": 2140 - }, - { - "epoch": 0.7279836790207412, - "grad_norm": 1.932200172688164, - "learning_rate": 1.818995067678279e-06, - "loss": 0.7984, - "step": 2141 - }, - { - "epoch": 0.7283236994219653, - "grad_norm": 1.666195338393341, - "learning_rate": 1.8147476784178398e-06, - "loss": 0.8126, - "step": 2142 - }, - { - "epoch": 0.7286637198231893, - "grad_norm": 1.3626844223285068, - "learning_rate": 1.8105041541129187e-06, - "loss": 0.7733, - "step": 2143 - }, - { - "epoch": 0.7290037402244135, - "grad_norm": 2.5378369721423577, - "learning_rate": 1.8062644999125694e-06, - "loss": 0.7947, - "step": 2144 - }, - { - "epoch": 0.7293437606256375, - "grad_norm": 2.071953765717228, - "learning_rate": 1.8020287209611464e-06, - "loss": 0.7677, - "step": 2145 - }, - { - "epoch": 0.7296837810268616, - "grad_norm": 2.161502417367569, - "learning_rate": 1.7977968223983089e-06, - "loss": 0.8209, - "step": 2146 - }, - { - "epoch": 0.7300238014280857, - "grad_norm": 1.736684016690494, - "learning_rate": 1.7935688093589987e-06, - "loss": 0.7406, - "step": 2147 - }, - { - "epoch": 0.7303638218293098, - "grad_norm": 1.6090760426244202, - "learning_rate": 1.789344686973452e-06, - "loss": 0.7293, - "step": 2148 - }, - { - "epoch": 0.7307038422305339, - "grad_norm": 1.5720198527469798, - "learning_rate": 1.785124460367177e-06, - "loss": 0.7335, - "step": 2149 - }, - { - "epoch": 0.7310438626317579, - "grad_norm": 2.8678996816857842, - "learning_rate": 1.7809081346609574e-06, - "loss": 0.7563, - "step": 2150 - }, - { - "epoch": 0.731383883032982, - "grad_norm": 1.7011052505024924, - "learning_rate": 1.7766957149708442e-06, - "loss": 0.805, - "step": 2151 - }, - { - "epoch": 0.731723903434206, - "grad_norm": 2.179333898822292, - "learning_rate": 1.7724872064081461e-06, - "loss": 0.7768, - "step": 2152 - }, - { - "epoch": 0.7320639238354302, - "grad_norm": 1.9214766322552161, - "learning_rate": 1.768282614079432e-06, - "loss": 0.8926, - "step": 2153 - }, - { - "epoch": 0.7324039442366542, - "grad_norm": 1.6016132177586981, - "learning_rate": 1.7640819430865113e-06, - "loss": 0.7477, - "step": 2154 - }, - { - "epoch": 0.7327439646378783, - "grad_norm": 1.7429702206342037, - "learning_rate": 1.7598851985264426e-06, - "loss": 0.7465, - "step": 2155 - }, - { - "epoch": 0.7330839850391023, - "grad_norm": 3.780369551041426, - "learning_rate": 1.7556923854915148e-06, - "loss": 0.8328, - "step": 2156 - }, - { - "epoch": 0.7334240054403264, - "grad_norm": 5.582776086223021, - "learning_rate": 1.7515035090692466e-06, - "loss": 0.7591, - "step": 2157 - }, - { - "epoch": 0.7337640258415505, - "grad_norm": 1.75407838644776, - "learning_rate": 1.7473185743423853e-06, - "loss": 0.8702, - "step": 2158 - }, - { - "epoch": 0.7341040462427746, - "grad_norm": 3.88944154897513, - "learning_rate": 1.74313758638889e-06, - "loss": 0.7773, - "step": 2159 - }, - { - "epoch": 0.7344440666439986, - "grad_norm": 1.7976911792803083, - "learning_rate": 1.7389605502819324e-06, - "loss": 0.7521, - "step": 2160 - }, - { - "epoch": 0.7347840870452227, - "grad_norm": 2.645410770587803, - "learning_rate": 1.734787471089887e-06, - "loss": 0.7529, - "step": 2161 - }, - { - "epoch": 0.7351241074464467, - "grad_norm": 2.2156116956693133, - "learning_rate": 1.730618353876334e-06, - "loss": 0.7916, - "step": 2162 - }, - { - "epoch": 0.7354641278476709, - "grad_norm": 2.6170696483726616, - "learning_rate": 1.726453203700037e-06, - "loss": 0.8291, - "step": 2163 - }, - { - "epoch": 0.735804148248895, - "grad_norm": 1.7461843428392605, - "learning_rate": 1.7222920256149544e-06, - "loss": 0.8014, - "step": 2164 - }, - { - "epoch": 0.736144168650119, - "grad_norm": 2.1471971587915606, - "learning_rate": 1.7181348246702184e-06, - "loss": 0.908, - "step": 2165 - }, - { - "epoch": 0.736484189051343, - "grad_norm": 1.6110057508169202, - "learning_rate": 1.7139816059101372e-06, - "loss": 0.8735, - "step": 2166 - }, - { - "epoch": 0.7368242094525671, - "grad_norm": 1.7252936307832583, - "learning_rate": 1.7098323743741906e-06, - "loss": 0.8038, - "step": 2167 - }, - { - "epoch": 0.7371642298537913, - "grad_norm": 1.4751364286474202, - "learning_rate": 1.705687135097016e-06, - "loss": 0.7546, - "step": 2168 - }, - { - "epoch": 0.7375042502550153, - "grad_norm": 1.8792991681150337, - "learning_rate": 1.7015458931084084e-06, - "loss": 0.7799, - "step": 2169 - }, - { - "epoch": 0.7378442706562394, - "grad_norm": 1.9139596611161407, - "learning_rate": 1.69740865343331e-06, - "loss": 0.7647, - "step": 2170 - }, - { - "epoch": 0.7381842910574634, - "grad_norm": 2.90776955170561, - "learning_rate": 1.6932754210918133e-06, - "loss": 0.8511, - "step": 2171 - }, - { - "epoch": 0.7385243114586875, - "grad_norm": 1.593958716915834, - "learning_rate": 1.689146201099141e-06, - "loss": 0.7145, - "step": 2172 - }, - { - "epoch": 0.7388643318599116, - "grad_norm": 2.13248225387608, - "learning_rate": 1.6850209984656497e-06, - "loss": 0.8358, - "step": 2173 - }, - { - "epoch": 0.7392043522611357, - "grad_norm": 1.5208790512501984, - "learning_rate": 1.6808998181968238e-06, - "loss": 0.8051, - "step": 2174 - }, - { - "epoch": 0.7395443726623597, - "grad_norm": 1.6847664492358727, - "learning_rate": 1.6767826652932651e-06, - "loss": 0.6941, - "step": 2175 - }, - { - "epoch": 0.7398843930635838, - "grad_norm": 2.169697482882676, - "learning_rate": 1.6726695447506873e-06, - "loss": 0.6935, - "step": 2176 - }, - { - "epoch": 0.7402244134648079, - "grad_norm": 2.4175688054476634, - "learning_rate": 1.6685604615599117e-06, - "loss": 0.7608, - "step": 2177 - }, - { - "epoch": 0.740564433866032, - "grad_norm": 1.9499646893695743, - "learning_rate": 1.6644554207068642e-06, - "loss": 0.6843, - "step": 2178 - }, - { - "epoch": 0.740904454267256, - "grad_norm": 3.3649722434795937, - "learning_rate": 1.6603544271725607e-06, - "loss": 0.7127, - "step": 2179 - }, - { - "epoch": 0.7412444746684801, - "grad_norm": 1.9340583992381386, - "learning_rate": 1.656257485933111e-06, - "loss": 0.6918, - "step": 2180 - }, - { - "epoch": 0.7415844950697041, - "grad_norm": 2.1990875141504214, - "learning_rate": 1.652164601959705e-06, - "loss": 0.7244, - "step": 2181 - }, - { - "epoch": 0.7419245154709283, - "grad_norm": 1.6090430714552022, - "learning_rate": 1.648075780218607e-06, - "loss": 0.7215, - "step": 2182 - }, - { - "epoch": 0.7422645358721524, - "grad_norm": 2.1536726028566986, - "learning_rate": 1.6439910256711595e-06, - "loss": 0.7827, - "step": 2183 - }, - { - "epoch": 0.7426045562733764, - "grad_norm": 2.073708232272958, - "learning_rate": 1.6399103432737635e-06, - "loss": 0.7036, - "step": 2184 - }, - { - "epoch": 0.7429445766746005, - "grad_norm": 2.593918036362238, - "learning_rate": 1.635833737977881e-06, - "loss": 0.8041, - "step": 2185 - }, - { - "epoch": 0.7432845970758245, - "grad_norm": 2.958691737480463, - "learning_rate": 1.631761214730026e-06, - "loss": 0.7823, - "step": 2186 - }, - { - "epoch": 0.7436246174770487, - "grad_norm": 2.511544350856816, - "learning_rate": 1.6276927784717628e-06, - "loss": 0.8576, - "step": 2187 - }, - { - "epoch": 0.7439646378782727, - "grad_norm": 2.9344433102702556, - "learning_rate": 1.623628434139693e-06, - "loss": 0.8594, - "step": 2188 - }, - { - "epoch": 0.7443046582794968, - "grad_norm": 1.8620840535756518, - "learning_rate": 1.6195681866654517e-06, - "loss": 0.7979, - "step": 2189 - }, - { - "epoch": 0.7446446786807208, - "grad_norm": 2.3243589724742857, - "learning_rate": 1.6155120409757096e-06, - "loss": 0.8446, - "step": 2190 - }, - { - "epoch": 0.7449846990819449, - "grad_norm": 3.875803179972586, - "learning_rate": 1.6114600019921538e-06, - "loss": 0.8572, - "step": 2191 - }, - { - "epoch": 0.745324719483169, - "grad_norm": 1.7545550229086833, - "learning_rate": 1.6074120746314915e-06, - "loss": 0.8357, - "step": 2192 - }, - { - "epoch": 0.7456647398843931, - "grad_norm": 2.7660506150712343, - "learning_rate": 1.6033682638054376e-06, - "loss": 0.7566, - "step": 2193 - }, - { - "epoch": 0.7460047602856171, - "grad_norm": 2.208599221707356, - "learning_rate": 1.5993285744207183e-06, - "loss": 0.8451, - "step": 2194 - }, - { - "epoch": 0.7463447806868412, - "grad_norm": 1.6557633542709016, - "learning_rate": 1.5952930113790516e-06, - "loss": 0.7773, - "step": 2195 - }, - { - "epoch": 0.7466848010880652, - "grad_norm": 1.992062183353188, - "learning_rate": 1.5912615795771557e-06, - "loss": 0.8025, - "step": 2196 - }, - { - "epoch": 0.7470248214892894, - "grad_norm": 2.6192247388362597, - "learning_rate": 1.5872342839067305e-06, - "loss": 0.7836, - "step": 2197 - }, - { - "epoch": 0.7473648418905134, - "grad_norm": 1.6643325962419526, - "learning_rate": 1.5832111292544571e-06, - "loss": 0.6351, - "step": 2198 - }, - { - "epoch": 0.7477048622917375, - "grad_norm": 1.7740267079319336, - "learning_rate": 1.5791921205019984e-06, - "loss": 0.7505, - "step": 2199 - }, - { - "epoch": 0.7480448826929615, - "grad_norm": 3.3050676756035293, - "learning_rate": 1.5751772625259787e-06, - "loss": 0.871, - "step": 2200 - }, - { - "epoch": 0.7483849030941857, - "grad_norm": 2.1038805512690875, - "learning_rate": 1.571166560197991e-06, - "loss": 0.7385, - "step": 2201 - }, - { - "epoch": 0.7487249234954098, - "grad_norm": 2.7781533096347144, - "learning_rate": 1.567160018384582e-06, - "loss": 0.6283, - "step": 2202 - }, - { - "epoch": 0.7490649438966338, - "grad_norm": 1.8302184287905139, - "learning_rate": 1.563157641947255e-06, - "loss": 0.7385, - "step": 2203 - }, - { - "epoch": 0.7494049642978579, - "grad_norm": 2.5596522162134363, - "learning_rate": 1.5591594357424555e-06, - "loss": 0.8839, - "step": 2204 - }, - { - "epoch": 0.7497449846990819, - "grad_norm": 2.120816300981687, - "learning_rate": 1.555165404621567e-06, - "loss": 0.7999, - "step": 2205 - }, - { - "epoch": 0.7500850051003061, - "grad_norm": 2.3376616281310407, - "learning_rate": 1.5511755534309143e-06, - "loss": 0.8791, - "step": 2206 - }, - { - "epoch": 0.7504250255015301, - "grad_norm": 1.9094661848594028, - "learning_rate": 1.5471898870117414e-06, - "loss": 0.8485, - "step": 2207 - }, - { - "epoch": 0.7507650459027542, - "grad_norm": 1.7917052477602944, - "learning_rate": 1.5432084102002243e-06, - "loss": 0.7979, - "step": 2208 - }, - { - "epoch": 0.7511050663039782, - "grad_norm": 2.0422812529030425, - "learning_rate": 1.539231127827443e-06, - "loss": 0.789, - "step": 2209 - }, - { - "epoch": 0.7514450867052023, - "grad_norm": 2.671426125801568, - "learning_rate": 1.5352580447194e-06, - "loss": 0.7645, - "step": 2210 - }, - { - "epoch": 0.7517851071064264, - "grad_norm": 2.107626624470792, - "learning_rate": 1.5312891656969936e-06, - "loss": 0.7658, - "step": 2211 - }, - { - "epoch": 0.7521251275076505, - "grad_norm": 1.8158680120697543, - "learning_rate": 1.5273244955760286e-06, - "loss": 0.7953, - "step": 2212 - }, - { - "epoch": 0.7524651479088745, - "grad_norm": 1.978438021845548, - "learning_rate": 1.5233640391671973e-06, - "loss": 0.8217, - "step": 2213 - }, - { - "epoch": 0.7528051683100986, - "grad_norm": 2.2654036793743955, - "learning_rate": 1.5194078012760781e-06, - "loss": 0.7727, - "step": 2214 - }, - { - "epoch": 0.7531451887113226, - "grad_norm": 1.654479252128216, - "learning_rate": 1.5154557867031378e-06, - "loss": 0.7005, - "step": 2215 - }, - { - "epoch": 0.7534852091125468, - "grad_norm": 1.9670315140819592, - "learning_rate": 1.511508000243711e-06, - "loss": 0.8233, - "step": 2216 - }, - { - "epoch": 0.7538252295137708, - "grad_norm": 1.6308197393473027, - "learning_rate": 1.5075644466880063e-06, - "loss": 0.7718, - "step": 2217 - }, - { - "epoch": 0.7541652499149949, - "grad_norm": 1.9162779769547262, - "learning_rate": 1.5036251308210926e-06, - "loss": 0.7515, - "step": 2218 - }, - { - "epoch": 0.754505270316219, - "grad_norm": 2.9000813161046404, - "learning_rate": 1.4996900574229022e-06, - "loss": 0.8187, - "step": 2219 - }, - { - "epoch": 0.754845290717443, - "grad_norm": 2.216886994384, - "learning_rate": 1.4957592312682157e-06, - "loss": 0.7672, - "step": 2220 - }, - { - "epoch": 0.7551853111186672, - "grad_norm": 1.586989789116563, - "learning_rate": 1.4918326571266584e-06, - "loss": 0.7531, - "step": 2221 - }, - { - "epoch": 0.7555253315198912, - "grad_norm": 1.8489815625024457, - "learning_rate": 1.4879103397627027e-06, - "loss": 0.7646, - "step": 2222 - }, - { - "epoch": 0.7558653519211153, - "grad_norm": 1.8544337375478892, - "learning_rate": 1.4839922839356484e-06, - "loss": 0.7514, - "step": 2223 - }, - { - "epoch": 0.7562053723223393, - "grad_norm": 2.344521934054793, - "learning_rate": 1.4800784943996316e-06, - "loss": 0.7807, - "step": 2224 - }, - { - "epoch": 0.7565453927235634, - "grad_norm": 2.653629676503456, - "learning_rate": 1.4761689759036058e-06, - "loss": 0.8042, - "step": 2225 - }, - { - "epoch": 0.7568854131247875, - "grad_norm": 1.6279456912028547, - "learning_rate": 1.4722637331913447e-06, - "loss": 0.6855, - "step": 2226 - }, - { - "epoch": 0.7572254335260116, - "grad_norm": 2.235296588127983, - "learning_rate": 1.4683627710014325e-06, - "loss": 0.7996, - "step": 2227 - }, - { - "epoch": 0.7575654539272356, - "grad_norm": 1.9873774972236244, - "learning_rate": 1.4644660940672628e-06, - "loss": 0.7425, - "step": 2228 - }, - { - "epoch": 0.7579054743284597, - "grad_norm": 1.8195331106101174, - "learning_rate": 1.4605737071170257e-06, - "loss": 0.7902, - "step": 2229 - }, - { - "epoch": 0.7582454947296838, - "grad_norm": 2.0299601573336705, - "learning_rate": 1.4566856148737057e-06, - "loss": 0.6815, - "step": 2230 - }, - { - "epoch": 0.7585855151309079, - "grad_norm": 1.400935242806698, - "learning_rate": 1.452801822055081e-06, - "loss": 0.7916, - "step": 2231 - }, - { - "epoch": 0.7589255355321319, - "grad_norm": 1.7386391395292276, - "learning_rate": 1.4489223333737084e-06, - "loss": 0.8002, - "step": 2232 - }, - { - "epoch": 0.759265555933356, - "grad_norm": 1.73667800084222, - "learning_rate": 1.4450471535369225e-06, - "loss": 0.7085, - "step": 2233 - }, - { - "epoch": 0.75960557633458, - "grad_norm": 1.739383492547159, - "learning_rate": 1.44117628724683e-06, - "loss": 0.8655, - "step": 2234 - }, - { - "epoch": 0.7599455967358042, - "grad_norm": 1.8392317788494839, - "learning_rate": 1.437309739200306e-06, - "loss": 0.7253, - "step": 2235 - }, - { - "epoch": 0.7602856171370282, - "grad_norm": 1.546424794533531, - "learning_rate": 1.4334475140889813e-06, - "loss": 0.7947, - "step": 2236 - }, - { - "epoch": 0.7606256375382523, - "grad_norm": 2.339707672874242, - "learning_rate": 1.4295896165992473e-06, - "loss": 0.8063, - "step": 2237 - }, - { - "epoch": 0.7609656579394763, - "grad_norm": 1.6335877702583506, - "learning_rate": 1.4257360514122393e-06, - "loss": 0.7126, - "step": 2238 - }, - { - "epoch": 0.7613056783407004, - "grad_norm": 1.9927357472606342, - "learning_rate": 1.4218868232038351e-06, - "loss": 0.8169, - "step": 2239 - }, - { - "epoch": 0.7616456987419246, - "grad_norm": 1.9336783631243173, - "learning_rate": 1.4180419366446568e-06, - "loss": 0.8019, - "step": 2240 - }, - { - "epoch": 0.7619857191431486, - "grad_norm": 1.8763655884472532, - "learning_rate": 1.4142013964000513e-06, - "loss": 0.8054, - "step": 2241 - }, - { - "epoch": 0.7623257395443727, - "grad_norm": 2.025901859532698, - "learning_rate": 1.4103652071300945e-06, - "loss": 0.8657, - "step": 2242 - }, - { - "epoch": 0.7626657599455967, - "grad_norm": 1.9142987689118731, - "learning_rate": 1.4065333734895815e-06, - "loss": 0.8067, - "step": 2243 - }, - { - "epoch": 0.7630057803468208, - "grad_norm": 1.8673978585090811, - "learning_rate": 1.4027059001280269e-06, - "loss": 0.7602, - "step": 2244 - }, - { - "epoch": 0.7633458007480449, - "grad_norm": 1.5244016754272622, - "learning_rate": 1.3988827916896491e-06, - "loss": 0.761, - "step": 2245 - }, - { - "epoch": 0.763685821149269, - "grad_norm": 1.6762856131051267, - "learning_rate": 1.3950640528133713e-06, - "loss": 0.8457, - "step": 2246 - }, - { - "epoch": 0.764025841550493, - "grad_norm": 1.696468220368342, - "learning_rate": 1.3912496881328185e-06, - "loss": 0.6888, - "step": 2247 - }, - { - "epoch": 0.7643658619517171, - "grad_norm": 1.807989274352168, - "learning_rate": 1.3874397022763024e-06, - "loss": 0.7174, - "step": 2248 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 2.1049766319752674, - "learning_rate": 1.3836340998668284e-06, - "loss": 0.8443, - "step": 2249 - }, - { - "epoch": 0.7650459027541653, - "grad_norm": 4.147430162429283, - "learning_rate": 1.379832885522074e-06, - "loss": 0.8293, - "step": 2250 - }, - { - "epoch": 0.7653859231553893, - "grad_norm": 2.203536974887446, - "learning_rate": 1.3760360638544012e-06, - "loss": 0.766, - "step": 2251 - }, - { - "epoch": 0.7657259435566134, - "grad_norm": 2.84334808411539, - "learning_rate": 1.3722436394708349e-06, - "loss": 0.7397, - "step": 2252 - }, - { - "epoch": 0.7660659639578374, - "grad_norm": 2.4606094795939875, - "learning_rate": 1.3684556169730706e-06, - "loss": 0.8327, - "step": 2253 - }, - { - "epoch": 0.7664059843590616, - "grad_norm": 3.801030903497411, - "learning_rate": 1.3646720009574582e-06, - "loss": 0.7486, - "step": 2254 - }, - { - "epoch": 0.7667460047602856, - "grad_norm": 2.2370175205395, - "learning_rate": 1.3608927960150008e-06, - "loss": 0.8567, - "step": 2255 - }, - { - "epoch": 0.7670860251615097, - "grad_norm": 1.8816253225209865, - "learning_rate": 1.3571180067313539e-06, - "loss": 0.8999, - "step": 2256 - }, - { - "epoch": 0.7674260455627337, - "grad_norm": 2.383535780477955, - "learning_rate": 1.3533476376868088e-06, - "loss": 0.7714, - "step": 2257 - }, - { - "epoch": 0.7677660659639578, - "grad_norm": 1.672812334875347, - "learning_rate": 1.3495816934562976e-06, - "loss": 0.7594, - "step": 2258 - }, - { - "epoch": 0.768106086365182, - "grad_norm": 1.771870165986916, - "learning_rate": 1.3458201786093795e-06, - "loss": 0.8326, - "step": 2259 - }, - { - "epoch": 0.768446106766406, - "grad_norm": 1.681066649638471, - "learning_rate": 1.3420630977102455e-06, - "loss": 0.6953, - "step": 2260 - }, - { - "epoch": 0.7687861271676301, - "grad_norm": 2.0034594737419833, - "learning_rate": 1.3383104553177001e-06, - "loss": 0.8098, - "step": 2261 - }, - { - "epoch": 0.7691261475688541, - "grad_norm": 1.9711809712880015, - "learning_rate": 1.334562255985164e-06, - "loss": 0.6649, - "step": 2262 - }, - { - "epoch": 0.7694661679700782, - "grad_norm": 1.7828734904005346, - "learning_rate": 1.3308185042606698e-06, - "loss": 0.8103, - "step": 2263 - }, - { - "epoch": 0.7698061883713023, - "grad_norm": 1.8609944720601692, - "learning_rate": 1.3270792046868486e-06, - "loss": 0.8238, - "step": 2264 - }, - { - "epoch": 0.7701462087725264, - "grad_norm": 1.7934456884377525, - "learning_rate": 1.323344361800934e-06, - "loss": 0.7776, - "step": 2265 - }, - { - "epoch": 0.7704862291737504, - "grad_norm": 1.7275631998115004, - "learning_rate": 1.3196139801347485e-06, - "loss": 0.8547, - "step": 2266 - }, - { - "epoch": 0.7708262495749745, - "grad_norm": 2.7409306203098707, - "learning_rate": 1.3158880642147026e-06, - "loss": 0.7145, - "step": 2267 - }, - { - "epoch": 0.7711662699761985, - "grad_norm": 2.087778768589785, - "learning_rate": 1.3121666185617859e-06, - "loss": 0.726, - "step": 2268 - }, - { - "epoch": 0.7715062903774227, - "grad_norm": 1.780483085521547, - "learning_rate": 1.3084496476915698e-06, - "loss": 0.8217, - "step": 2269 - }, - { - "epoch": 0.7718463107786467, - "grad_norm": 2.8825219877755415, - "learning_rate": 1.3047371561141903e-06, - "loss": 0.8109, - "step": 2270 - }, - { - "epoch": 0.7721863311798708, - "grad_norm": 1.9288735242516986, - "learning_rate": 1.3010291483343478e-06, - "loss": 0.812, - "step": 2271 - }, - { - "epoch": 0.7725263515810948, - "grad_norm": 2.431431619077142, - "learning_rate": 1.2973256288513086e-06, - "loss": 0.8052, - "step": 2272 - }, - { - "epoch": 0.7728663719823189, - "grad_norm": 1.7830394043812279, - "learning_rate": 1.2936266021588872e-06, - "loss": 0.6701, - "step": 2273 - }, - { - "epoch": 0.773206392383543, - "grad_norm": 2.105756285208052, - "learning_rate": 1.2899320727454472e-06, - "loss": 0.82, - "step": 2274 - }, - { - "epoch": 0.7735464127847671, - "grad_norm": 5.425208376062677, - "learning_rate": 1.2862420450938955e-06, - "loss": 0.743, - "step": 2275 - }, - { - "epoch": 0.7738864331859912, - "grad_norm": 1.827529432109907, - "learning_rate": 1.28255652368168e-06, - "loss": 0.7528, - "step": 2276 - }, - { - "epoch": 0.7742264535872152, - "grad_norm": 2.0046945708038524, - "learning_rate": 1.2788755129807767e-06, - "loss": 0.8446, - "step": 2277 - }, - { - "epoch": 0.7745664739884393, - "grad_norm": 1.947473671747887, - "learning_rate": 1.2751990174576883e-06, - "loss": 0.7801, - "step": 2278 - }, - { - "epoch": 0.7749064943896634, - "grad_norm": 1.730669713034862, - "learning_rate": 1.2715270415734425e-06, - "loss": 0.7308, - "step": 2279 - }, - { - "epoch": 0.7752465147908875, - "grad_norm": 2.0448368740503335, - "learning_rate": 1.2678595897835788e-06, - "loss": 0.7239, - "step": 2280 - }, - { - "epoch": 0.7755865351921115, - "grad_norm": 1.7996589672894716, - "learning_rate": 1.2641966665381517e-06, - "loss": 0.8383, - "step": 2281 - }, - { - "epoch": 0.7759265555933356, - "grad_norm": 1.9501431444735777, - "learning_rate": 1.2605382762817164e-06, - "loss": 0.7416, - "step": 2282 - }, - { - "epoch": 0.7762665759945597, - "grad_norm": 2.44528318223147, - "learning_rate": 1.2568844234533294e-06, - "loss": 0.867, - "step": 2283 - }, - { - "epoch": 0.7766065963957838, - "grad_norm": 1.5135812296404374, - "learning_rate": 1.253235112486541e-06, - "loss": 0.7896, - "step": 2284 - }, - { - "epoch": 0.7769466167970078, - "grad_norm": 1.6910504402544813, - "learning_rate": 1.249590347809393e-06, - "loss": 0.8092, - "step": 2285 - }, - { - "epoch": 0.7772866371982319, - "grad_norm": 2.0907001787083623, - "learning_rate": 1.2459501338444085e-06, - "loss": 0.7999, - "step": 2286 - }, - { - "epoch": 0.7776266575994559, - "grad_norm": 1.7084265208677907, - "learning_rate": 1.2423144750085875e-06, - "loss": 0.8109, - "step": 2287 - }, - { - "epoch": 0.7779666780006801, - "grad_norm": 1.7912086159226683, - "learning_rate": 1.2386833757134076e-06, - "loss": 0.7468, - "step": 2288 - }, - { - "epoch": 0.7783066984019041, - "grad_norm": 1.9542261253674484, - "learning_rate": 1.2350568403648088e-06, - "loss": 0.7268, - "step": 2289 - }, - { - "epoch": 0.7786467188031282, - "grad_norm": 1.4777106097697732, - "learning_rate": 1.2314348733631958e-06, - "loss": 0.7642, - "step": 2290 - }, - { - "epoch": 0.7789867392043522, - "grad_norm": 2.1056584699669814, - "learning_rate": 1.2278174791034281e-06, - "loss": 0.8599, - "step": 2291 - }, - { - "epoch": 0.7793267596055763, - "grad_norm": 2.1244138537440156, - "learning_rate": 1.224204661974821e-06, - "loss": 0.7469, - "step": 2292 - }, - { - "epoch": 0.7796667800068005, - "grad_norm": 1.842651124665579, - "learning_rate": 1.2205964263611325e-06, - "loss": 0.7238, - "step": 2293 - }, - { - "epoch": 0.7800068004080245, - "grad_norm": 9.972026665603295, - "learning_rate": 1.2169927766405598e-06, - "loss": 0.753, - "step": 2294 - }, - { - "epoch": 0.7803468208092486, - "grad_norm": 1.8250005499709825, - "learning_rate": 1.2133937171857406e-06, - "loss": 0.7459, - "step": 2295 - }, - { - "epoch": 0.7806868412104726, - "grad_norm": 1.4938745812322698, - "learning_rate": 1.2097992523637387e-06, - "loss": 0.741, - "step": 2296 - }, - { - "epoch": 0.7810268616116967, - "grad_norm": 1.8847996493887464, - "learning_rate": 1.2062093865360458e-06, - "loss": 0.8275, - "step": 2297 - }, - { - "epoch": 0.7813668820129208, - "grad_norm": 1.7228031928755019, - "learning_rate": 1.2026241240585702e-06, - "loss": 0.75, - "step": 2298 - }, - { - "epoch": 0.7817069024141449, - "grad_norm": 2.501687535032944, - "learning_rate": 1.1990434692816367e-06, - "loss": 0.7823, - "step": 2299 - }, - { - "epoch": 0.7820469228153689, - "grad_norm": 2.7653925468074614, - "learning_rate": 1.1954674265499773e-06, - "loss": 0.681, - "step": 2300 - }, - { - "epoch": 0.782386943216593, - "grad_norm": 2.3143819049767864, - "learning_rate": 1.1918960002027308e-06, - "loss": 0.8237, - "step": 2301 - }, - { - "epoch": 0.782726963617817, - "grad_norm": 1.5824914893100979, - "learning_rate": 1.1883291945734315e-06, - "loss": 0.7691, - "step": 2302 - }, - { - "epoch": 0.7830669840190412, - "grad_norm": 2.2301539841962708, - "learning_rate": 1.1847670139900074e-06, - "loss": 0.7281, - "step": 2303 - }, - { - "epoch": 0.7834070044202652, - "grad_norm": 2.4915256532784738, - "learning_rate": 1.1812094627747777e-06, - "loss": 0.7732, - "step": 2304 - }, - { - "epoch": 0.7837470248214893, - "grad_norm": 2.2299044959118848, - "learning_rate": 1.1776565452444389e-06, - "loss": 0.7285, - "step": 2305 - }, - { - "epoch": 0.7840870452227133, - "grad_norm": 1.8798158077309424, - "learning_rate": 1.174108265710071e-06, - "loss": 0.8002, - "step": 2306 - }, - { - "epoch": 0.7844270656239375, - "grad_norm": 2.035534060132518, - "learning_rate": 1.1705646284771227e-06, - "loss": 0.729, - "step": 2307 - }, - { - "epoch": 0.7847670860251615, - "grad_norm": 1.7825151695794803, - "learning_rate": 1.1670256378454093e-06, - "loss": 0.7919, - "step": 2308 - }, - { - "epoch": 0.7851071064263856, - "grad_norm": 2.0755250446855404, - "learning_rate": 1.1634912981091096e-06, - "loss": 0.801, - "step": 2309 - }, - { - "epoch": 0.7854471268276096, - "grad_norm": 1.9145697285689294, - "learning_rate": 1.159961613556757e-06, - "loss": 0.7888, - "step": 2310 - }, - { - "epoch": 0.7857871472288337, - "grad_norm": 2.071282232646433, - "learning_rate": 1.1564365884712409e-06, - "loss": 0.8008, - "step": 2311 - }, - { - "epoch": 0.7861271676300579, - "grad_norm": 1.8475994726429972, - "learning_rate": 1.1529162271297912e-06, - "loss": 0.7505, - "step": 2312 - }, - { - "epoch": 0.7864671880312819, - "grad_norm": 1.8531724346083438, - "learning_rate": 1.1494005338039839e-06, - "loss": 0.7435, - "step": 2313 - }, - { - "epoch": 0.786807208432506, - "grad_norm": 2.362438597585831, - "learning_rate": 1.1458895127597275e-06, - "loss": 0.7681, - "step": 2314 - }, - { - "epoch": 0.78714722883373, - "grad_norm": 2.2288635557885463, - "learning_rate": 1.1423831682572623e-06, - "loss": 0.7871, - "step": 2315 - }, - { - "epoch": 0.7874872492349541, - "grad_norm": 1.5469503925739951, - "learning_rate": 1.1388815045511525e-06, - "loss": 0.7279, - "step": 2316 - }, - { - "epoch": 0.7878272696361782, - "grad_norm": 2.0731404202763626, - "learning_rate": 1.1353845258902867e-06, - "loss": 0.788, - "step": 2317 - }, - { - "epoch": 0.7881672900374023, - "grad_norm": 2.262319333024703, - "learning_rate": 1.131892236517866e-06, - "loss": 0.6889, - "step": 2318 - }, - { - "epoch": 0.7885073104386263, - "grad_norm": 2.157562568854031, - "learning_rate": 1.1284046406713994e-06, - "loss": 0.6274, - "step": 2319 - }, - { - "epoch": 0.7888473308398504, - "grad_norm": 1.8322331540862034, - "learning_rate": 1.1249217425827063e-06, - "loss": 0.7697, - "step": 2320 - }, - { - "epoch": 0.7891873512410744, - "grad_norm": 1.9705120789647137, - "learning_rate": 1.1214435464779006e-06, - "loss": 0.8014, - "step": 2321 - }, - { - "epoch": 0.7895273716422986, - "grad_norm": 1.620994185215544, - "learning_rate": 1.117970056577395e-06, - "loss": 0.7806, - "step": 2322 - }, - { - "epoch": 0.7898673920435226, - "grad_norm": 2.1814551902341357, - "learning_rate": 1.1145012770958885e-06, - "loss": 0.7728, - "step": 2323 - }, - { - "epoch": 0.7902074124447467, - "grad_norm": 2.2535509074314417, - "learning_rate": 1.1110372122423663e-06, - "loss": 0.7814, - "step": 2324 - }, - { - "epoch": 0.7905474328459707, - "grad_norm": 2.814674945307446, - "learning_rate": 1.107577866220092e-06, - "loss": 0.7463, - "step": 2325 - }, - { - "epoch": 0.7908874532471948, - "grad_norm": 1.9527766493519834, - "learning_rate": 1.104123243226603e-06, - "loss": 0.7806, - "step": 2326 - }, - { - "epoch": 0.7912274736484189, - "grad_norm": 2.6070147128679895, - "learning_rate": 1.1006733474537095e-06, - "loss": 0.802, - "step": 2327 - }, - { - "epoch": 0.791567494049643, - "grad_norm": 1.9013207094703433, - "learning_rate": 1.0972281830874794e-06, - "loss": 0.8648, - "step": 2328 - }, - { - "epoch": 0.791907514450867, - "grad_norm": 2.0738357697149494, - "learning_rate": 1.0937877543082464e-06, - "loss": 0.6966, - "step": 2329 - }, - { - "epoch": 0.7922475348520911, - "grad_norm": 3.3351856836112437, - "learning_rate": 1.090352065290593e-06, - "loss": 0.7704, - "step": 2330 - }, - { - "epoch": 0.7925875552533151, - "grad_norm": 2.4139686701986545, - "learning_rate": 1.086921120203353e-06, - "loss": 0.8781, - "step": 2331 - }, - { - "epoch": 0.7929275756545393, - "grad_norm": 2.0367586641793785, - "learning_rate": 1.0834949232096008e-06, - "loss": 0.7859, - "step": 2332 - }, - { - "epoch": 0.7932675960557634, - "grad_norm": 1.731703445699507, - "learning_rate": 1.0800734784666556e-06, - "loss": 0.654, - "step": 2333 - }, - { - "epoch": 0.7936076164569874, - "grad_norm": 1.9761986748758829, - "learning_rate": 1.076656790126065e-06, - "loss": 0.8221, - "step": 2334 - }, - { - "epoch": 0.7939476368582115, - "grad_norm": 2.089346557643535, - "learning_rate": 1.0732448623336057e-06, - "loss": 0.7591, - "step": 2335 - }, - { - "epoch": 0.7942876572594356, - "grad_norm": 2.0490768682382074, - "learning_rate": 1.0698376992292808e-06, - "loss": 0.8476, - "step": 2336 - }, - { - "epoch": 0.7946276776606597, - "grad_norm": 1.7404822303267078, - "learning_rate": 1.0664353049473085e-06, - "loss": 0.8059, - "step": 2337 - }, - { - "epoch": 0.7949676980618837, - "grad_norm": 1.9516697066879696, - "learning_rate": 1.0630376836161248e-06, - "loss": 0.7247, - "step": 2338 - }, - { - "epoch": 0.7953077184631078, - "grad_norm": 1.7179222033684476, - "learning_rate": 1.0596448393583709e-06, - "loss": 0.7071, - "step": 2339 - }, - { - "epoch": 0.7956477388643318, - "grad_norm": 2.089155897647102, - "learning_rate": 1.0562567762908915e-06, - "loss": 0.7622, - "step": 2340 - }, - { - "epoch": 0.795987759265556, - "grad_norm": 2.0146843017178906, - "learning_rate": 1.052873498524732e-06, - "loss": 0.7588, - "step": 2341 - }, - { - "epoch": 0.79632777966678, - "grad_norm": 1.8914540998106915, - "learning_rate": 1.0494950101651274e-06, - "loss": 0.7959, - "step": 2342 - }, - { - "epoch": 0.7966678000680041, - "grad_norm": 2.392413934717188, - "learning_rate": 1.046121315311508e-06, - "loss": 0.8566, - "step": 2343 - }, - { - "epoch": 0.7970078204692281, - "grad_norm": 1.763253874255017, - "learning_rate": 1.04275241805748e-06, - "loss": 0.7474, - "step": 2344 - }, - { - "epoch": 0.7973478408704522, - "grad_norm": 1.558308712843251, - "learning_rate": 1.0393883224908358e-06, - "loss": 0.6914, - "step": 2345 - }, - { - "epoch": 0.7976878612716763, - "grad_norm": 1.7605529498524175, - "learning_rate": 1.036029032693534e-06, - "loss": 0.837, - "step": 2346 - }, - { - "epoch": 0.7980278816729004, - "grad_norm": 1.78998260592103, - "learning_rate": 1.0326745527417098e-06, - "loss": 0.8327, - "step": 2347 - }, - { - "epoch": 0.7983679020741244, - "grad_norm": 1.7542085033348727, - "learning_rate": 1.0293248867056527e-06, - "loss": 0.8285, - "step": 2348 - }, - { - "epoch": 0.7987079224753485, - "grad_norm": 1.9678558489079983, - "learning_rate": 1.0259800386498204e-06, - "loss": 0.8867, - "step": 2349 - }, - { - "epoch": 0.7990479428765725, - "grad_norm": 2.214701849578641, - "learning_rate": 1.022640012632819e-06, - "loss": 0.9296, - "step": 2350 - }, - { - "epoch": 0.7993879632777967, - "grad_norm": 1.9468124426612539, - "learning_rate": 1.0193048127074034e-06, - "loss": 0.8632, - "step": 2351 - }, - { - "epoch": 0.7997279836790208, - "grad_norm": 1.5760592103068731, - "learning_rate": 1.0159744429204776e-06, - "loss": 0.8049, - "step": 2352 - }, - { - "epoch": 0.8000680040802448, - "grad_norm": 1.950530829583191, - "learning_rate": 1.0126489073130779e-06, - "loss": 0.6512, - "step": 2353 - }, - { - "epoch": 0.8004080244814689, - "grad_norm": 1.7143537928592565, - "learning_rate": 1.0093282099203805e-06, - "loss": 0.7408, - "step": 2354 - }, - { - "epoch": 0.8007480448826929, - "grad_norm": 3.702934075061963, - "learning_rate": 1.0060123547716888e-06, - "loss": 0.7784, - "step": 2355 - }, - { - "epoch": 0.8010880652839171, - "grad_norm": 1.599124103938154, - "learning_rate": 1.0027013458904288e-06, - "loss": 0.8521, - "step": 2356 - }, - { - "epoch": 0.8014280856851411, - "grad_norm": 1.713620640939179, - "learning_rate": 9.993951872941493e-07, - "loss": 0.8589, - "step": 2357 - }, - { - "epoch": 0.8017681060863652, - "grad_norm": 1.9033657273086837, - "learning_rate": 9.960938829945104e-07, - "loss": 0.7361, - "step": 2358 - }, - { - "epoch": 0.8021081264875892, - "grad_norm": 1.8996990852991276, - "learning_rate": 9.927974369972871e-07, - "loss": 0.7452, - "step": 2359 - }, - { - "epoch": 0.8024481468888133, - "grad_norm": 2.0379615163395193, - "learning_rate": 9.895058533023532e-07, - "loss": 0.5995, - "step": 2360 - }, - { - "epoch": 0.8027881672900374, - "grad_norm": 1.7890665810043467, - "learning_rate": 9.862191359036883e-07, - "loss": 0.9003, - "step": 2361 - }, - { - "epoch": 0.8031281876912615, - "grad_norm": 2.6456228617289748, - "learning_rate": 9.829372887893624e-07, - "loss": 0.7455, - "step": 2362 - }, - { - "epoch": 0.8034682080924855, - "grad_norm": 1.6805195436291074, - "learning_rate": 9.796603159415407e-07, - "loss": 0.7163, - "step": 2363 - }, - { - "epoch": 0.8038082284937096, - "grad_norm": 2.042414294670765, - "learning_rate": 9.763882213364705e-07, - "loss": 0.6174, - "step": 2364 - }, - { - "epoch": 0.8041482488949337, - "grad_norm": 1.8519040933711384, - "learning_rate": 9.731210089444803e-07, - "loss": 0.7669, - "step": 2365 - }, - { - "epoch": 0.8044882692961578, - "grad_norm": 2.2531030397503464, - "learning_rate": 9.69858682729976e-07, - "loss": 0.8395, - "step": 2366 - }, - { - "epoch": 0.8048282896973818, - "grad_norm": 2.0748617126240516, - "learning_rate": 9.66601246651432e-07, - "loss": 0.7717, - "step": 2367 - }, - { - "epoch": 0.8051683100986059, - "grad_norm": 3.841576998570947, - "learning_rate": 9.633487046613932e-07, - "loss": 0.8345, - "step": 2368 - }, - { - "epoch": 0.80550833049983, - "grad_norm": 1.7489227453665286, - "learning_rate": 9.60101060706462e-07, - "loss": 0.8575, - "step": 2369 - }, - { - "epoch": 0.8058483509010541, - "grad_norm": 1.7114506077871587, - "learning_rate": 9.568583187273018e-07, - "loss": 0.8861, - "step": 2370 - }, - { - "epoch": 0.8061883713022782, - "grad_norm": 2.629625241075618, - "learning_rate": 9.536204826586243e-07, - "loss": 0.707, - "step": 2371 - }, - { - "epoch": 0.8065283917035022, - "grad_norm": 2.451325517306834, - "learning_rate": 9.503875564291886e-07, - "loss": 0.7568, - "step": 2372 - }, - { - "epoch": 0.8068684121047263, - "grad_norm": 1.865454750034287, - "learning_rate": 9.471595439617986e-07, - "loss": 0.8517, - "step": 2373 - }, - { - "epoch": 0.8072084325059503, - "grad_norm": 2.0078901844899053, - "learning_rate": 9.439364491732927e-07, - "loss": 0.7792, - "step": 2374 - }, - { - "epoch": 0.8075484529071745, - "grad_norm": 2.1884086746809506, - "learning_rate": 9.407182759745464e-07, - "loss": 0.7711, - "step": 2375 - }, - { - "epoch": 0.8078884733083985, - "grad_norm": 2.359962377985305, - "learning_rate": 9.375050282704596e-07, - "loss": 0.7623, - "step": 2376 - }, - { - "epoch": 0.8082284937096226, - "grad_norm": 1.965811233572265, - "learning_rate": 9.342967099599587e-07, - "loss": 0.7636, - "step": 2377 - }, - { - "epoch": 0.8085685141108466, - "grad_norm": 2.1355877396954557, - "learning_rate": 9.31093324935985e-07, - "loss": 0.8835, - "step": 2378 - }, - { - "epoch": 0.8089085345120707, - "grad_norm": 1.725291600691821, - "learning_rate": 9.278948770854984e-07, - "loss": 0.8575, - "step": 2379 - }, - { - "epoch": 0.8092485549132948, - "grad_norm": 2.1970920436039725, - "learning_rate": 9.247013702894653e-07, - "loss": 0.7891, - "step": 2380 - }, - { - "epoch": 0.8095885753145189, - "grad_norm": 2.1911323854498304, - "learning_rate": 9.215128084228564e-07, - "loss": 0.7819, - "step": 2381 - }, - { - "epoch": 0.8099285957157429, - "grad_norm": 1.9062475395377723, - "learning_rate": 9.183291953546425e-07, - "loss": 0.7573, - "step": 2382 - }, - { - "epoch": 0.810268616116967, - "grad_norm": 1.5938084300323458, - "learning_rate": 9.151505349477901e-07, - "loss": 0.744, - "step": 2383 - }, - { - "epoch": 0.810608636518191, - "grad_norm": 2.4169596699907103, - "learning_rate": 9.11976831059258e-07, - "loss": 0.7026, - "step": 2384 - }, - { - "epoch": 0.8109486569194152, - "grad_norm": 2.6219709533237903, - "learning_rate": 9.088080875399862e-07, - "loss": 0.6643, - "step": 2385 - }, - { - "epoch": 0.8112886773206393, - "grad_norm": 2.01219370432533, - "learning_rate": 9.056443082349015e-07, - "loss": 0.7425, - "step": 2386 - }, - { - "epoch": 0.8116286977218633, - "grad_norm": 2.066167061827305, - "learning_rate": 9.024854969829016e-07, - "loss": 0.6546, - "step": 2387 - }, - { - "epoch": 0.8119687181230874, - "grad_norm": 1.4300615570476107, - "learning_rate": 8.993316576168626e-07, - "loss": 0.7899, - "step": 2388 - }, - { - "epoch": 0.8123087385243115, - "grad_norm": 1.7707829875888732, - "learning_rate": 8.961827939636198e-07, - "loss": 0.8382, - "step": 2389 - }, - { - "epoch": 0.8126487589255356, - "grad_norm": 2.1704787690301597, - "learning_rate": 8.930389098439751e-07, - "loss": 0.7779, - "step": 2390 - }, - { - "epoch": 0.8129887793267596, - "grad_norm": 1.6521448642583239, - "learning_rate": 8.899000090726905e-07, - "loss": 0.788, - "step": 2391 - }, - { - "epoch": 0.8133287997279837, - "grad_norm": 1.9493462394819676, - "learning_rate": 8.867660954584773e-07, - "loss": 0.8392, - "step": 2392 - }, - { - "epoch": 0.8136688201292077, - "grad_norm": 1.8331269676018516, - "learning_rate": 8.836371728039989e-07, - "loss": 0.78, - "step": 2393 - }, - { - "epoch": 0.8140088405304319, - "grad_norm": 2.026606682272262, - "learning_rate": 8.80513244905859e-07, - "loss": 0.8935, - "step": 2394 - }, - { - "epoch": 0.8143488609316559, - "grad_norm": 1.7620149620203838, - "learning_rate": 8.773943155546044e-07, - "loss": 0.6249, - "step": 2395 - }, - { - "epoch": 0.81468888133288, - "grad_norm": 2.7206868334859506, - "learning_rate": 8.74280388534714e-07, - "loss": 0.7804, - "step": 2396 - }, - { - "epoch": 0.815028901734104, - "grad_norm": 1.7308067133219402, - "learning_rate": 8.711714676245975e-07, - "loss": 0.7325, - "step": 2397 - }, - { - "epoch": 0.8153689221353281, - "grad_norm": 2.51943783841568, - "learning_rate": 8.680675565965918e-07, - "loss": 0.752, - "step": 2398 - }, - { - "epoch": 0.8157089425365522, - "grad_norm": 1.549980412078022, - "learning_rate": 8.64968659216951e-07, - "loss": 0.9601, - "step": 2399 - }, - { - "epoch": 0.8160489629377763, - "grad_norm": 2.0254215121489194, - "learning_rate": 8.618747792458515e-07, - "loss": 0.8119, - "step": 2400 - }, - { - "epoch": 0.8163889833390003, - "grad_norm": 1.9202778211704874, - "learning_rate": 8.58785920437376e-07, - "loss": 0.7911, - "step": 2401 - }, - { - "epoch": 0.8167290037402244, - "grad_norm": 1.6548864238372174, - "learning_rate": 8.557020865395194e-07, - "loss": 0.7711, - "step": 2402 - }, - { - "epoch": 0.8170690241414484, - "grad_norm": 1.5676328354601332, - "learning_rate": 8.526232812941748e-07, - "loss": 0.6984, - "step": 2403 - }, - { - "epoch": 0.8174090445426726, - "grad_norm": 1.7193145665504181, - "learning_rate": 8.49549508437138e-07, - "loss": 0.8111, - "step": 2404 - }, - { - "epoch": 0.8177490649438967, - "grad_norm": 1.68834796706292, - "learning_rate": 8.464807716980961e-07, - "loss": 0.7438, - "step": 2405 - }, - { - "epoch": 0.8180890853451207, - "grad_norm": 2.349332554637134, - "learning_rate": 8.434170748006226e-07, - "loss": 0.8144, - "step": 2406 - }, - { - "epoch": 0.8184291057463448, - "grad_norm": 7.841596102340736, - "learning_rate": 8.403584214621823e-07, - "loss": 0.7929, - "step": 2407 - }, - { - "epoch": 0.8187691261475688, - "grad_norm": 2.141322231062195, - "learning_rate": 8.373048153941144e-07, - "loss": 0.8196, - "step": 2408 - }, - { - "epoch": 0.819109146548793, - "grad_norm": 2.0397457850733884, - "learning_rate": 8.34256260301638e-07, - "loss": 0.7033, - "step": 2409 - }, - { - "epoch": 0.819449166950017, - "grad_norm": 2.205136821953891, - "learning_rate": 8.312127598838387e-07, - "loss": 0.7234, - "step": 2410 - }, - { - "epoch": 0.8197891873512411, - "grad_norm": 4.337708830724585, - "learning_rate": 8.281743178336754e-07, - "loss": 0.7171, - "step": 2411 - }, - { - "epoch": 0.8201292077524651, - "grad_norm": 1.6721477091051125, - "learning_rate": 8.251409378379638e-07, - "loss": 0.8007, - "step": 2412 - }, - { - "epoch": 0.8204692281536892, - "grad_norm": 3.8418421223750685, - "learning_rate": 8.22112623577378e-07, - "loss": 0.7635, - "step": 2413 - }, - { - "epoch": 0.8208092485549133, - "grad_norm": 1.6182376861006265, - "learning_rate": 8.19089378726447e-07, - "loss": 0.7876, - "step": 2414 - }, - { - "epoch": 0.8211492689561374, - "grad_norm": 2.2512602655558376, - "learning_rate": 8.160712069535464e-07, - "loss": 0.7364, - "step": 2415 - }, - { - "epoch": 0.8214892893573614, - "grad_norm": 1.5613182342547798, - "learning_rate": 8.130581119209008e-07, - "loss": 0.7997, - "step": 2416 - }, - { - "epoch": 0.8218293097585855, - "grad_norm": 2.123606402304467, - "learning_rate": 8.100500972845688e-07, - "loss": 0.7256, - "step": 2417 - }, - { - "epoch": 0.8221693301598096, - "grad_norm": 2.5565946650892846, - "learning_rate": 8.070471666944496e-07, - "loss": 0.7453, - "step": 2418 - }, - { - "epoch": 0.8225093505610337, - "grad_norm": 2.2217115142287667, - "learning_rate": 8.040493237942698e-07, - "loss": 0.8128, - "step": 2419 - }, - { - "epoch": 0.8228493709622577, - "grad_norm": 1.8879938400884209, - "learning_rate": 8.010565722215851e-07, - "loss": 0.7291, - "step": 2420 - }, - { - "epoch": 0.8231893913634818, - "grad_norm": 1.8601116173360819, - "learning_rate": 7.98068915607772e-07, - "loss": 0.801, - "step": 2421 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 1.7254034450571911, - "learning_rate": 7.950863575780249e-07, - "loss": 0.7592, - "step": 2422 - }, - { - "epoch": 0.82386943216593, - "grad_norm": 2.6122625161288586, - "learning_rate": 7.921089017513522e-07, - "loss": 0.8019, - "step": 2423 - }, - { - "epoch": 0.824209452567154, - "grad_norm": 1.6973528915543705, - "learning_rate": 7.891365517405702e-07, - "loss": 0.8974, - "step": 2424 - }, - { - "epoch": 0.8245494729683781, - "grad_norm": 1.7759681048749987, - "learning_rate": 7.861693111523022e-07, - "loss": 0.7917, - "step": 2425 - }, - { - "epoch": 0.8248894933696022, - "grad_norm": 1.7705878213685702, - "learning_rate": 7.832071835869687e-07, - "loss": 0.8071, - "step": 2426 - }, - { - "epoch": 0.8252295137708262, - "grad_norm": 2.156963508346104, - "learning_rate": 7.802501726387901e-07, - "loss": 0.7664, - "step": 2427 - }, - { - "epoch": 0.8255695341720504, - "grad_norm": 1.7276432522564236, - "learning_rate": 7.772982818957742e-07, - "loss": 0.7373, - "step": 2428 - }, - { - "epoch": 0.8259095545732744, - "grad_norm": 5.112931675706146, - "learning_rate": 7.743515149397185e-07, - "loss": 0.777, - "step": 2429 - }, - { - "epoch": 0.8262495749744985, - "grad_norm": 1.859215069146444, - "learning_rate": 7.714098753462018e-07, - "loss": 0.7991, - "step": 2430 - }, - { - "epoch": 0.8265895953757225, - "grad_norm": 2.0011677462274737, - "learning_rate": 7.684733666845812e-07, - "loss": 0.7925, - "step": 2431 - }, - { - "epoch": 0.8269296157769466, - "grad_norm": 2.7022442512396987, - "learning_rate": 7.655419925179919e-07, - "loss": 0.6235, - "step": 2432 - }, - { - "epoch": 0.8272696361781707, - "grad_norm": 2.0241160886895293, - "learning_rate": 7.626157564033332e-07, - "loss": 0.6865, - "step": 2433 - }, - { - "epoch": 0.8276096565793948, - "grad_norm": 1.906792937890856, - "learning_rate": 7.596946618912754e-07, - "loss": 0.8559, - "step": 2434 - }, - { - "epoch": 0.8279496769806188, - "grad_norm": 1.8797923348430752, - "learning_rate": 7.567787125262449e-07, - "loss": 0.6898, - "step": 2435 - }, - { - "epoch": 0.8282896973818429, - "grad_norm": 1.8757719817795944, - "learning_rate": 7.538679118464298e-07, - "loss": 0.7356, - "step": 2436 - }, - { - "epoch": 0.8286297177830669, - "grad_norm": 2.699911415427047, - "learning_rate": 7.509622633837671e-07, - "loss": 0.6198, - "step": 2437 - }, - { - "epoch": 0.8289697381842911, - "grad_norm": 1.8535353558494398, - "learning_rate": 7.480617706639442e-07, - "loss": 0.6603, - "step": 2438 - }, - { - "epoch": 0.8293097585855151, - "grad_norm": 2.0077754858561025, - "learning_rate": 7.451664372063916e-07, - "loss": 0.8192, - "step": 2439 - }, - { - "epoch": 0.8296497789867392, - "grad_norm": 2.3842511987435064, - "learning_rate": 7.422762665242788e-07, - "loss": 0.8319, - "step": 2440 - }, - { - "epoch": 0.8299897993879632, - "grad_norm": 2.3986998450988835, - "learning_rate": 7.393912621245142e-07, - "loss": 0.798, - "step": 2441 - }, - { - "epoch": 0.8303298197891874, - "grad_norm": 1.761347248138979, - "learning_rate": 7.365114275077334e-07, - "loss": 0.7448, - "step": 2442 - }, - { - "epoch": 0.8306698401904115, - "grad_norm": 1.9120511252582268, - "learning_rate": 7.33636766168303e-07, - "loss": 0.8883, - "step": 2443 - }, - { - "epoch": 0.8310098605916355, - "grad_norm": 1.8267075244116617, - "learning_rate": 7.307672815943084e-07, - "loss": 0.7732, - "step": 2444 - }, - { - "epoch": 0.8313498809928596, - "grad_norm": 1.9441639104439716, - "learning_rate": 7.279029772675572e-07, - "loss": 0.8854, - "step": 2445 - }, - { - "epoch": 0.8316899013940836, - "grad_norm": 2.1103415209340604, - "learning_rate": 7.250438566635692e-07, - "loss": 0.8216, - "step": 2446 - }, - { - "epoch": 0.8320299217953078, - "grad_norm": 1.8163467229280887, - "learning_rate": 7.221899232515727e-07, - "loss": 0.863, - "step": 2447 - }, - { - "epoch": 0.8323699421965318, - "grad_norm": 2.1150765935601474, - "learning_rate": 7.193411804945061e-07, - "loss": 0.6834, - "step": 2448 - }, - { - "epoch": 0.8327099625977559, - "grad_norm": 1.9172782529773438, - "learning_rate": 7.164976318490058e-07, - "loss": 0.8915, - "step": 2449 - }, - { - "epoch": 0.8330499829989799, - "grad_norm": 2.302226063344335, - "learning_rate": 7.136592807654085e-07, - "loss": 0.7917, - "step": 2450 - }, - { - "epoch": 0.833390003400204, - "grad_norm": 3.323395060515028, - "learning_rate": 7.108261306877423e-07, - "loss": 0.7571, - "step": 2451 - }, - { - "epoch": 0.8337300238014281, - "grad_norm": 2.044271938418409, - "learning_rate": 7.079981850537266e-07, - "loss": 0.8017, - "step": 2452 - }, - { - "epoch": 0.8340700442026522, - "grad_norm": 2.1409274203005193, - "learning_rate": 7.051754472947625e-07, - "loss": 0.7459, - "step": 2453 - }, - { - "epoch": 0.8344100646038762, - "grad_norm": 2.95578414657139, - "learning_rate": 7.023579208359349e-07, - "loss": 0.8399, - "step": 2454 - }, - { - "epoch": 0.8347500850051003, - "grad_norm": 1.7525330525619445, - "learning_rate": 6.995456090960034e-07, - "loss": 0.8179, - "step": 2455 - }, - { - "epoch": 0.8350901054063243, - "grad_norm": 3.5586869346230676, - "learning_rate": 6.967385154874001e-07, - "loss": 0.9779, - "step": 2456 - }, - { - "epoch": 0.8354301258075485, - "grad_norm": 1.7571946774112055, - "learning_rate": 6.939366434162287e-07, - "loss": 0.8006, - "step": 2457 - }, - { - "epoch": 0.8357701462087725, - "grad_norm": 2.4033098083942446, - "learning_rate": 6.911399962822518e-07, - "loss": 0.7554, - "step": 2458 - }, - { - "epoch": 0.8361101666099966, - "grad_norm": 1.869358589109052, - "learning_rate": 6.883485774788973e-07, - "loss": 0.7259, - "step": 2459 - }, - { - "epoch": 0.8364501870112206, - "grad_norm": 1.91414508963133, - "learning_rate": 6.855623903932457e-07, - "loss": 0.6757, - "step": 2460 - }, - { - "epoch": 0.8367902074124447, - "grad_norm": 1.7629021010412422, - "learning_rate": 6.82781438406031e-07, - "loss": 0.6845, - "step": 2461 - }, - { - "epoch": 0.8371302278136689, - "grad_norm": 1.733843016954561, - "learning_rate": 6.800057248916347e-07, - "loss": 0.7731, - "step": 2462 - }, - { - "epoch": 0.8374702482148929, - "grad_norm": 1.6233499660099797, - "learning_rate": 6.772352532180815e-07, - "loss": 0.7542, - "step": 2463 - }, - { - "epoch": 0.837810268616117, - "grad_norm": 1.9854817972998593, - "learning_rate": 6.74470026747035e-07, - "loss": 0.7532, - "step": 2464 - }, - { - "epoch": 0.838150289017341, - "grad_norm": 2.0817806024786454, - "learning_rate": 6.717100488337952e-07, - "loss": 0.7815, - "step": 2465 - }, - { - "epoch": 0.8384903094185651, - "grad_norm": 2.966599408068768, - "learning_rate": 6.689553228272955e-07, - "loss": 0.7962, - "step": 2466 - }, - { - "epoch": 0.8388303298197892, - "grad_norm": 2.0775168349055577, - "learning_rate": 6.662058520700926e-07, - "loss": 0.7808, - "step": 2467 - }, - { - "epoch": 0.8391703502210133, - "grad_norm": 2.4039263789618595, - "learning_rate": 6.634616398983712e-07, - "loss": 0.8221, - "step": 2468 - }, - { - "epoch": 0.8395103706222373, - "grad_norm": 1.622008521580856, - "learning_rate": 6.607226896419305e-07, - "loss": 0.7502, - "step": 2469 - }, - { - "epoch": 0.8398503910234614, - "grad_norm": 2.1962585919549564, - "learning_rate": 6.579890046241888e-07, - "loss": 0.7449, - "step": 2470 - }, - { - "epoch": 0.8401904114246855, - "grad_norm": 1.819758576227155, - "learning_rate": 6.552605881621732e-07, - "loss": 0.7057, - "step": 2471 - }, - { - "epoch": 0.8405304318259096, - "grad_norm": 1.9641795678417464, - "learning_rate": 6.525374435665183e-07, - "loss": 0.73, - "step": 2472 - }, - { - "epoch": 0.8408704522271336, - "grad_norm": 1.6323001552109164, - "learning_rate": 6.498195741414637e-07, - "loss": 0.7322, - "step": 2473 - }, - { - "epoch": 0.8412104726283577, - "grad_norm": 1.8961797022845666, - "learning_rate": 6.471069831848453e-07, - "loss": 0.721, - "step": 2474 - }, - { - "epoch": 0.8415504930295817, - "grad_norm": 1.811172379243824, - "learning_rate": 6.443996739880981e-07, - "loss": 0.7265, - "step": 2475 - }, - { - "epoch": 0.8418905134308059, - "grad_norm": 1.7705443438552309, - "learning_rate": 6.416976498362432e-07, - "loss": 0.641, - "step": 2476 - }, - { - "epoch": 0.84223053383203, - "grad_norm": 2.5591611017829186, - "learning_rate": 6.39000914007894e-07, - "loss": 0.8185, - "step": 2477 - }, - { - "epoch": 0.842570554233254, - "grad_norm": 1.500821178484566, - "learning_rate": 6.363094697752436e-07, - "loss": 0.8445, - "step": 2478 - }, - { - "epoch": 0.842910574634478, - "grad_norm": 1.8045464912453517, - "learning_rate": 6.336233204040654e-07, - "loss": 0.8186, - "step": 2479 - }, - { - "epoch": 0.8432505950357021, - "grad_norm": 2.1257740354966024, - "learning_rate": 6.309424691537075e-07, - "loss": 0.7636, - "step": 2480 - }, - { - "epoch": 0.8435906154369263, - "grad_norm": 2.1805061395433025, - "learning_rate": 6.282669192770896e-07, - "loss": 0.7369, - "step": 2481 - }, - { - "epoch": 0.8439306358381503, - "grad_norm": 2.794846227145433, - "learning_rate": 6.255966740207003e-07, - "loss": 0.7512, - "step": 2482 - }, - { - "epoch": 0.8442706562393744, - "grad_norm": 2.139417416344505, - "learning_rate": 6.229317366245891e-07, - "loss": 0.858, - "step": 2483 - }, - { - "epoch": 0.8446106766405984, - "grad_norm": 1.6794324817896975, - "learning_rate": 6.20272110322368e-07, - "loss": 0.7415, - "step": 2484 - }, - { - "epoch": 0.8449506970418225, - "grad_norm": 2.557876957181143, - "learning_rate": 6.176177983412013e-07, - "loss": 0.7493, - "step": 2485 - }, - { - "epoch": 0.8452907174430466, - "grad_norm": 1.6147587726820207, - "learning_rate": 6.14968803901807e-07, - "loss": 0.7066, - "step": 2486 - }, - { - "epoch": 0.8456307378442707, - "grad_norm": 6.264968025708578, - "learning_rate": 6.123251302184502e-07, - "loss": 0.7846, - "step": 2487 - }, - { - "epoch": 0.8459707582454947, - "grad_norm": 2.0720939669815297, - "learning_rate": 6.096867804989387e-07, - "loss": 0.8005, - "step": 2488 - }, - { - "epoch": 0.8463107786467188, - "grad_norm": 1.7818861934834522, - "learning_rate": 6.07053757944624e-07, - "loss": 0.8083, - "step": 2489 - }, - { - "epoch": 0.8466507990479428, - "grad_norm": 1.9663397712493058, - "learning_rate": 6.044260657503881e-07, - "loss": 0.7888, - "step": 2490 - }, - { - "epoch": 0.846990819449167, - "grad_norm": 1.7007116920435754, - "learning_rate": 6.018037071046518e-07, - "loss": 0.727, - "step": 2491 - }, - { - "epoch": 0.847330839850391, - "grad_norm": 2.01433634361555, - "learning_rate": 5.991866851893569e-07, - "loss": 0.7841, - "step": 2492 - }, - { - "epoch": 0.8476708602516151, - "grad_norm": 2.0056879931109104, - "learning_rate": 5.965750031799772e-07, - "loss": 0.7634, - "step": 2493 - }, - { - "epoch": 0.8480108806528391, - "grad_norm": 1.7817072170081534, - "learning_rate": 5.939686642455012e-07, - "loss": 0.7755, - "step": 2494 - }, - { - "epoch": 0.8483509010540632, - "grad_norm": 1.771192620612961, - "learning_rate": 5.913676715484363e-07, - "loss": 0.8514, - "step": 2495 - }, - { - "epoch": 0.8486909214552874, - "grad_norm": 1.9377408759282009, - "learning_rate": 5.887720282448034e-07, - "loss": 0.7875, - "step": 2496 - }, - { - "epoch": 0.8490309418565114, - "grad_norm": 1.875586498487033, - "learning_rate": 5.861817374841311e-07, - "loss": 0.7402, - "step": 2497 - }, - { - "epoch": 0.8493709622577355, - "grad_norm": 1.896813734307177, - "learning_rate": 5.835968024094551e-07, - "loss": 0.7494, - "step": 2498 - }, - { - "epoch": 0.8497109826589595, - "grad_norm": 1.746600133481283, - "learning_rate": 5.810172261573099e-07, - "loss": 0.7486, - "step": 2499 - }, - { - "epoch": 0.8500510030601837, - "grad_norm": 1.701793210043788, - "learning_rate": 5.784430118577322e-07, - "loss": 0.7742, - "step": 2500 - }, - { - "epoch": 0.8503910234614077, - "grad_norm": 1.8039181908755018, - "learning_rate": 5.758741626342479e-07, - "loss": 0.8416, - "step": 2501 - }, - { - "epoch": 0.8507310438626318, - "grad_norm": 1.7572654084027113, - "learning_rate": 5.733106816038736e-07, - "loss": 0.6848, - "step": 2502 - }, - { - "epoch": 0.8510710642638558, - "grad_norm": 2.018493313007424, - "learning_rate": 5.707525718771151e-07, - "loss": 0.8917, - "step": 2503 - }, - { - "epoch": 0.8514110846650799, - "grad_norm": 2.0723674784682644, - "learning_rate": 5.681998365579594e-07, - "loss": 0.8585, - "step": 2504 - }, - { - "epoch": 0.851751105066304, - "grad_norm": 1.8580629745251314, - "learning_rate": 5.6565247874387e-07, - "loss": 0.7809, - "step": 2505 - }, - { - "epoch": 0.8520911254675281, - "grad_norm": 1.8712383894476246, - "learning_rate": 5.631105015257871e-07, - "loss": 0.7901, - "step": 2506 - }, - { - "epoch": 0.8524311458687521, - "grad_norm": 1.574135923996909, - "learning_rate": 5.60573907988124e-07, - "loss": 0.7791, - "step": 2507 - }, - { - "epoch": 0.8527711662699762, - "grad_norm": 2.195303216051158, - "learning_rate": 5.58042701208758e-07, - "loss": 0.6425, - "step": 2508 - }, - { - "epoch": 0.8531111866712002, - "grad_norm": 2.0222539828068418, - "learning_rate": 5.55516884259033e-07, - "loss": 0.8305, - "step": 2509 - }, - { - "epoch": 0.8534512070724244, - "grad_norm": 2.0135511435332427, - "learning_rate": 5.529964602037519e-07, - "loss": 0.7716, - "step": 2510 - }, - { - "epoch": 0.8537912274736484, - "grad_norm": 1.7236389648693269, - "learning_rate": 5.504814321011732e-07, - "loss": 0.6894, - "step": 2511 - }, - { - "epoch": 0.8541312478748725, - "grad_norm": 1.8873379975752618, - "learning_rate": 5.479718030030084e-07, - "loss": 0.7636, - "step": 2512 - }, - { - "epoch": 0.8544712682760965, - "grad_norm": 1.7426057926099683, - "learning_rate": 5.454675759544176e-07, - "loss": 0.8053, - "step": 2513 - }, - { - "epoch": 0.8548112886773206, - "grad_norm": 1.8753643537027582, - "learning_rate": 5.429687539940076e-07, - "loss": 0.723, - "step": 2514 - }, - { - "epoch": 0.8551513090785448, - "grad_norm": 1.7277156698902645, - "learning_rate": 5.404753401538249e-07, - "loss": 0.7989, - "step": 2515 - }, - { - "epoch": 0.8554913294797688, - "grad_norm": 2.2677017882113675, - "learning_rate": 5.379873374593563e-07, - "loss": 0.7536, - "step": 2516 - }, - { - "epoch": 0.8558313498809929, - "grad_norm": 1.622977811890628, - "learning_rate": 5.355047489295195e-07, - "loss": 0.7579, - "step": 2517 - }, - { - "epoch": 0.8561713702822169, - "grad_norm": 1.8045814672152072, - "learning_rate": 5.330275775766642e-07, - "loss": 0.7795, - "step": 2518 - }, - { - "epoch": 0.856511390683441, - "grad_norm": 1.8483802620891205, - "learning_rate": 5.30555826406568e-07, - "loss": 0.8143, - "step": 2519 - }, - { - "epoch": 0.8568514110846651, - "grad_norm": 2.0653176883961075, - "learning_rate": 5.28089498418431e-07, - "loss": 0.8275, - "step": 2520 - }, - { - "epoch": 0.8571914314858892, - "grad_norm": 2.3816394206381752, - "learning_rate": 5.256285966048719e-07, - "loss": 0.7278, - "step": 2521 - }, - { - "epoch": 0.8575314518871132, - "grad_norm": 1.6020707124660172, - "learning_rate": 5.23173123951925e-07, - "loss": 0.8589, - "step": 2522 - }, - { - "epoch": 0.8578714722883373, - "grad_norm": 2.2904803070652684, - "learning_rate": 5.207230834390403e-07, - "loss": 0.7793, - "step": 2523 - }, - { - "epoch": 0.8582114926895614, - "grad_norm": 2.2504916928798666, - "learning_rate": 5.182784780390721e-07, - "loss": 0.7643, - "step": 2524 - }, - { - "epoch": 0.8585515130907855, - "grad_norm": 2.1813636901240074, - "learning_rate": 5.158393107182835e-07, - "loss": 0.7989, - "step": 2525 - }, - { - "epoch": 0.8588915334920095, - "grad_norm": 2.17526125079606, - "learning_rate": 5.134055844363367e-07, - "loss": 0.7287, - "step": 2526 - }, - { - "epoch": 0.8592315538932336, - "grad_norm": 2.0068173957653364, - "learning_rate": 5.109773021462921e-07, - "loss": 0.8449, - "step": 2527 - }, - { - "epoch": 0.8595715742944576, - "grad_norm": 1.9711345745234496, - "learning_rate": 5.085544667946057e-07, - "loss": 0.8109, - "step": 2528 - }, - { - "epoch": 0.8599115946956818, - "grad_norm": 2.3317026622050583, - "learning_rate": 5.061370813211219e-07, - "loss": 0.7172, - "step": 2529 - }, - { - "epoch": 0.8602516150969058, - "grad_norm": 1.757253994535814, - "learning_rate": 5.037251486590755e-07, - "loss": 0.7579, - "step": 2530 - }, - { - "epoch": 0.8605916354981299, - "grad_norm": 2.0285732960091543, - "learning_rate": 5.013186717350815e-07, - "loss": 0.796, - "step": 2531 - }, - { - "epoch": 0.8609316558993539, - "grad_norm": 1.659656431274867, - "learning_rate": 4.989176534691381e-07, - "loss": 0.7392, - "step": 2532 - }, - { - "epoch": 0.861271676300578, - "grad_norm": 2.0089159264540553, - "learning_rate": 4.965220967746181e-07, - "loss": 0.7919, - "step": 2533 - }, - { - "epoch": 0.8616116967018022, - "grad_norm": 2.473425505190949, - "learning_rate": 4.94132004558266e-07, - "loss": 0.7572, - "step": 2534 - }, - { - "epoch": 0.8619517171030262, - "grad_norm": 2.151920557303182, - "learning_rate": 4.917473797202005e-07, - "loss": 0.7254, - "step": 2535 - }, - { - "epoch": 0.8622917375042503, - "grad_norm": 2.232388197647897, - "learning_rate": 4.893682251539012e-07, - "loss": 0.6701, - "step": 2536 - }, - { - "epoch": 0.8626317579054743, - "grad_norm": 1.8999196855734106, - "learning_rate": 4.869945437462126e-07, - "loss": 0.7422, - "step": 2537 - }, - { - "epoch": 0.8629717783066984, - "grad_norm": 1.8645892417506236, - "learning_rate": 4.846263383773364e-07, - "loss": 0.7827, - "step": 2538 - }, - { - "epoch": 0.8633117987079225, - "grad_norm": 1.9633226448791292, - "learning_rate": 4.822636119208335e-07, - "loss": 0.8252, - "step": 2539 - }, - { - "epoch": 0.8636518191091466, - "grad_norm": 2.412498853567893, - "learning_rate": 4.799063672436111e-07, - "loss": 0.6881, - "step": 2540 - }, - { - "epoch": 0.8639918395103706, - "grad_norm": 2.0646181111873116, - "learning_rate": 4.775546072059311e-07, - "loss": 0.7813, - "step": 2541 - }, - { - "epoch": 0.8643318599115947, - "grad_norm": 1.811749527105386, - "learning_rate": 4.752083346613956e-07, - "loss": 0.804, - "step": 2542 - }, - { - "epoch": 0.8646718803128187, - "grad_norm": 2.0205742904901003, - "learning_rate": 4.728675524569487e-07, - "loss": 0.7478, - "step": 2543 - }, - { - "epoch": 0.8650119007140429, - "grad_norm": 1.6948710939652791, - "learning_rate": 4.7053226343287626e-07, - "loss": 0.7354, - "step": 2544 - }, - { - "epoch": 0.8653519211152669, - "grad_norm": 1.8571787878044181, - "learning_rate": 4.68202470422795e-07, - "loss": 0.7866, - "step": 2545 - }, - { - "epoch": 0.865691941516491, - "grad_norm": 4.332069466957595, - "learning_rate": 4.6587817625365406e-07, - "loss": 0.9335, - "step": 2546 - }, - { - "epoch": 0.866031961917715, - "grad_norm": 2.1059679315712083, - "learning_rate": 4.6355938374572975e-07, - "loss": 0.7504, - "step": 2547 - }, - { - "epoch": 0.8663719823189391, - "grad_norm": 2.411521023379306, - "learning_rate": 4.612460957126247e-07, - "loss": 0.7945, - "step": 2548 - }, - { - "epoch": 0.8667120027201632, - "grad_norm": 3.5331071294473904, - "learning_rate": 4.589383149612603e-07, - "loss": 0.7663, - "step": 2549 - }, - { - "epoch": 0.8670520231213873, - "grad_norm": 1.8408470267130377, - "learning_rate": 4.5663604429187547e-07, - "loss": 0.7752, - "step": 2550 - }, - { - "epoch": 0.8673920435226113, - "grad_norm": 1.8173297059452884, - "learning_rate": 4.543392864980256e-07, - "loss": 0.734, - "step": 2551 - }, - { - "epoch": 0.8677320639238354, - "grad_norm": 1.6548861190043966, - "learning_rate": 4.5204804436657423e-07, - "loss": 0.7518, - "step": 2552 - }, - { - "epoch": 0.8680720843250596, - "grad_norm": 1.8814419154796098, - "learning_rate": 4.4976232067769356e-07, - "loss": 0.8335, - "step": 2553 - }, - { - "epoch": 0.8684121047262836, - "grad_norm": 2.4824574969789017, - "learning_rate": 4.474821182048583e-07, - "loss": 0.7759, - "step": 2554 - }, - { - "epoch": 0.8687521251275077, - "grad_norm": 1.929420733997793, - "learning_rate": 4.45207439714847e-07, - "loss": 0.6974, - "step": 2555 - }, - { - "epoch": 0.8690921455287317, - "grad_norm": 1.810989892213861, - "learning_rate": 4.4293828796773133e-07, - "loss": 0.7086, - "step": 2556 - }, - { - "epoch": 0.8694321659299558, - "grad_norm": 3.143595670737321, - "learning_rate": 4.406746657168809e-07, - "loss": 0.8016, - "step": 2557 - }, - { - "epoch": 0.8697721863311799, - "grad_norm": 2.015317063610089, - "learning_rate": 4.384165757089526e-07, - "loss": 0.7969, - "step": 2558 - }, - { - "epoch": 0.870112206732404, - "grad_norm": 2.7863828548912277, - "learning_rate": 4.361640206838913e-07, - "loss": 0.7793, - "step": 2559 - }, - { - "epoch": 0.870452227133628, - "grad_norm": 1.9725121522194389, - "learning_rate": 4.339170033749279e-07, - "loss": 0.6607, - "step": 2560 - }, - { - "epoch": 0.8707922475348521, - "grad_norm": 1.7743293301120258, - "learning_rate": 4.316755265085715e-07, - "loss": 0.7992, - "step": 2561 - }, - { - "epoch": 0.8711322679360761, - "grad_norm": 4.284073115166657, - "learning_rate": 4.294395928046091e-07, - "loss": 0.6972, - "step": 2562 - }, - { - "epoch": 0.8714722883373003, - "grad_norm": 1.7376821947334347, - "learning_rate": 4.272092049761012e-07, - "loss": 0.7081, - "step": 2563 - }, - { - "epoch": 0.8718123087385243, - "grad_norm": 2.4003264376544524, - "learning_rate": 4.2498436572938117e-07, - "loss": 0.7366, - "step": 2564 - }, - { - "epoch": 0.8721523291397484, - "grad_norm": 1.7900283874803544, - "learning_rate": 4.227650777640474e-07, - "loss": 0.7543, - "step": 2565 - }, - { - "epoch": 0.8724923495409724, - "grad_norm": 2.121565874741983, - "learning_rate": 4.2055134377296245e-07, - "loss": 0.8625, - "step": 2566 - }, - { - "epoch": 0.8728323699421965, - "grad_norm": 2.065896891684966, - "learning_rate": 4.183431664422527e-07, - "loss": 0.8362, - "step": 2567 - }, - { - "epoch": 0.8731723903434206, - "grad_norm": 3.5516283553936154, - "learning_rate": 4.1614054845129814e-07, - "loss": 0.7923, - "step": 2568 - }, - { - "epoch": 0.8735124107446447, - "grad_norm": 1.7586409614005896, - "learning_rate": 4.139434924727359e-07, - "loss": 0.8087, - "step": 2569 - }, - { - "epoch": 0.8738524311458687, - "grad_norm": 1.763698578164718, - "learning_rate": 4.1175200117245127e-07, - "loss": 0.7511, - "step": 2570 - }, - { - "epoch": 0.8741924515470928, - "grad_norm": 1.9405876321607063, - "learning_rate": 4.095660772095822e-07, - "loss": 0.7895, - "step": 2571 - }, - { - "epoch": 0.8745324719483168, - "grad_norm": 2.2201530125382662, - "learning_rate": 4.0738572323650636e-07, - "loss": 0.7936, - "step": 2572 - }, - { - "epoch": 0.874872492349541, - "grad_norm": 2.698353558807428, - "learning_rate": 4.05210941898847e-07, - "loss": 0.7766, - "step": 2573 - }, - { - "epoch": 0.8752125127507651, - "grad_norm": 1.8695910167521532, - "learning_rate": 4.0304173583546214e-07, - "loss": 0.7827, - "step": 2574 - }, - { - "epoch": 0.8755525331519891, - "grad_norm": 1.787702184500189, - "learning_rate": 4.008781076784457e-07, - "loss": 0.8141, - "step": 2575 - }, - { - "epoch": 0.8758925535532132, - "grad_norm": 1.9350367343497943, - "learning_rate": 3.9872006005312545e-07, - "loss": 0.8147, - "step": 2576 - }, - { - "epoch": 0.8762325739544373, - "grad_norm": 1.9411047058406645, - "learning_rate": 3.965675955780551e-07, - "loss": 0.8205, - "step": 2577 - }, - { - "epoch": 0.8765725943556614, - "grad_norm": 2.083484099729529, - "learning_rate": 3.9442071686501605e-07, - "loss": 0.7374, - "step": 2578 - }, - { - "epoch": 0.8769126147568854, - "grad_norm": 1.484808045925216, - "learning_rate": 3.9227942651900943e-07, - "loss": 0.7934, - "step": 2579 - }, - { - "epoch": 0.8772526351581095, - "grad_norm": 1.6403262654823596, - "learning_rate": 3.901437271382591e-07, - "loss": 0.75, - "step": 2580 - }, - { - "epoch": 0.8775926555593335, - "grad_norm": 2.139199109393669, - "learning_rate": 3.8801362131420105e-07, - "loss": 0.7095, - "step": 2581 - }, - { - "epoch": 0.8779326759605577, - "grad_norm": 2.283437669529752, - "learning_rate": 3.858891116314861e-07, - "loss": 0.8167, - "step": 2582 - }, - { - "epoch": 0.8782726963617817, - "grad_norm": 1.777253272527263, - "learning_rate": 3.8377020066797557e-07, - "loss": 0.6707, - "step": 2583 - }, - { - "epoch": 0.8786127167630058, - "grad_norm": 2.032396863440268, - "learning_rate": 3.8165689099473436e-07, - "loss": 0.7875, - "step": 2584 - }, - { - "epoch": 0.8789527371642298, - "grad_norm": 2.1496667940512424, - "learning_rate": 3.7954918517603636e-07, - "loss": 0.7843, - "step": 2585 - }, - { - "epoch": 0.8792927575654539, - "grad_norm": 2.0953430909454838, - "learning_rate": 3.7744708576934795e-07, - "loss": 0.7498, - "step": 2586 - }, - { - "epoch": 0.879632777966678, - "grad_norm": 2.6645355484935, - "learning_rate": 3.7535059532533945e-07, - "loss": 0.7451, - "step": 2587 - }, - { - "epoch": 0.8799727983679021, - "grad_norm": 2.7910264296663567, - "learning_rate": 3.732597163878715e-07, - "loss": 0.761, - "step": 2588 - }, - { - "epoch": 0.8803128187691261, - "grad_norm": 2.1309852876618893, - "learning_rate": 3.711744514939991e-07, - "loss": 0.7839, - "step": 2589 - }, - { - "epoch": 0.8806528391703502, - "grad_norm": 2.0023508759747273, - "learning_rate": 3.690948031739622e-07, - "loss": 0.7626, - "step": 2590 - }, - { - "epoch": 0.8809928595715742, - "grad_norm": 2.1149106283441266, - "learning_rate": 3.67020773951185e-07, - "loss": 0.7549, - "step": 2591 - }, - { - "epoch": 0.8813328799727984, - "grad_norm": 1.785186211034344, - "learning_rate": 3.649523663422783e-07, - "loss": 0.7699, - "step": 2592 - }, - { - "epoch": 0.8816729003740225, - "grad_norm": 2.53236936356743, - "learning_rate": 3.6288958285702726e-07, - "loss": 0.7464, - "step": 2593 - }, - { - "epoch": 0.8820129207752465, - "grad_norm": 2.2182667953687516, - "learning_rate": 3.6083242599839365e-07, - "loss": 0.7926, - "step": 2594 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 2.0193533863123316, - "learning_rate": 3.587808982625124e-07, - "loss": 0.7586, - "step": 2595 - }, - { - "epoch": 0.8826929615776946, - "grad_norm": 1.8994718280339355, - "learning_rate": 3.567350021386895e-07, - "loss": 0.7463, - "step": 2596 - }, - { - "epoch": 0.8830329819789188, - "grad_norm": 1.9243015384876234, - "learning_rate": 3.546947401093953e-07, - "loss": 0.8557, - "step": 2597 - }, - { - "epoch": 0.8833730023801428, - "grad_norm": 1.8567845441632016, - "learning_rate": 3.5266011465026394e-07, - "loss": 0.8092, - "step": 2598 - }, - { - "epoch": 0.8837130227813669, - "grad_norm": 1.946576177975856, - "learning_rate": 3.506311282300934e-07, - "loss": 0.7336, - "step": 2599 - }, - { - "epoch": 0.8840530431825909, - "grad_norm": 1.7654478089333205, - "learning_rate": 3.486077833108342e-07, - "loss": 0.7989, - "step": 2600 - }, - { - "epoch": 0.884393063583815, - "grad_norm": 2.3364268494283444, - "learning_rate": 3.4659008234759597e-07, - "loss": 0.6956, - "step": 2601 - }, - { - "epoch": 0.8847330839850391, - "grad_norm": 2.1363703287698383, - "learning_rate": 3.4457802778863846e-07, - "loss": 0.7131, - "step": 2602 - }, - { - "epoch": 0.8850731043862632, - "grad_norm": 3.273937151667049, - "learning_rate": 3.4257162207536887e-07, - "loss": 0.821, - "step": 2603 - }, - { - "epoch": 0.8854131247874872, - "grad_norm": 1.6417747965293206, - "learning_rate": 3.405708676423408e-07, - "loss": 0.8703, - "step": 2604 - }, - { - "epoch": 0.8857531451887113, - "grad_norm": 1.7182489892417279, - "learning_rate": 3.3857576691725346e-07, - "loss": 0.7239, - "step": 2605 - }, - { - "epoch": 0.8860931655899354, - "grad_norm": 1.7767635345343673, - "learning_rate": 3.365863223209409e-07, - "loss": 0.7327, - "step": 2606 - }, - { - "epoch": 0.8864331859911595, - "grad_norm": 1.7793048886202218, - "learning_rate": 3.3460253626737774e-07, - "loss": 0.7237, - "step": 2607 - }, - { - "epoch": 0.8867732063923836, - "grad_norm": 2.404768196088338, - "learning_rate": 3.3262441116367174e-07, - "loss": 0.7197, - "step": 2608 - }, - { - "epoch": 0.8871132267936076, - "grad_norm": 2.5845357263659863, - "learning_rate": 3.306519494100618e-07, - "loss": 0.7361, - "step": 2609 - }, - { - "epoch": 0.8874532471948317, - "grad_norm": 1.7048562949785628, - "learning_rate": 3.286851533999136e-07, - "loss": 0.8217, - "step": 2610 - }, - { - "epoch": 0.8877932675960558, - "grad_norm": 1.8598354259713392, - "learning_rate": 3.2672402551971903e-07, - "loss": 0.7256, - "step": 2611 - }, - { - "epoch": 0.8881332879972799, - "grad_norm": 2.0484876787203614, - "learning_rate": 3.2476856814909364e-07, - "loss": 0.7733, - "step": 2612 - }, - { - "epoch": 0.8884733083985039, - "grad_norm": 4.050508948351152, - "learning_rate": 3.2281878366077046e-07, - "loss": 0.7087, - "step": 2613 - }, - { - "epoch": 0.888813328799728, - "grad_norm": 1.7444011978485319, - "learning_rate": 3.208746744205998e-07, - "loss": 0.8651, - "step": 2614 - }, - { - "epoch": 0.889153349200952, - "grad_norm": 1.7629638699985104, - "learning_rate": 3.1893624278754587e-07, - "loss": 0.7781, - "step": 2615 - }, - { - "epoch": 0.8894933696021762, - "grad_norm": 1.8022588695624528, - "learning_rate": 3.170034911136832e-07, - "loss": 0.8746, - "step": 2616 - }, - { - "epoch": 0.8898333900034002, - "grad_norm": 1.8390591568479497, - "learning_rate": 3.150764217441954e-07, - "loss": 0.6708, - "step": 2617 - }, - { - "epoch": 0.8901734104046243, - "grad_norm": 1.7855141453060075, - "learning_rate": 3.131550370173703e-07, - "loss": 0.7825, - "step": 2618 - }, - { - "epoch": 0.8905134308058483, - "grad_norm": 1.8373816915953056, - "learning_rate": 3.112393392645985e-07, - "loss": 0.7392, - "step": 2619 - }, - { - "epoch": 0.8908534512070724, - "grad_norm": 1.999434435687263, - "learning_rate": 3.093293308103679e-07, - "loss": 0.89, - "step": 2620 - }, - { - "epoch": 0.8911934716082965, - "grad_norm": 1.5593372875987372, - "learning_rate": 3.074250139722679e-07, - "loss": 0.7572, - "step": 2621 - }, - { - "epoch": 0.8915334920095206, - "grad_norm": 2.1392476418230184, - "learning_rate": 3.0552639106097684e-07, - "loss": 0.7994, - "step": 2622 - }, - { - "epoch": 0.8918735124107446, - "grad_norm": 2.4268837548387157, - "learning_rate": 3.0363346438026633e-07, - "loss": 0.8267, - "step": 2623 - }, - { - "epoch": 0.8922135328119687, - "grad_norm": 2.521048048719284, - "learning_rate": 3.0174623622699685e-07, - "loss": 0.7818, - "step": 2624 - }, - { - "epoch": 0.8925535532131927, - "grad_norm": 1.7313419670899555, - "learning_rate": 2.998647088911127e-07, - "loss": 0.7824, - "step": 2625 - }, - { - "epoch": 0.8928935736144169, - "grad_norm": 3.2464092926902364, - "learning_rate": 2.9798888465564226e-07, - "loss": 0.7654, - "step": 2626 - }, - { - "epoch": 0.893233594015641, - "grad_norm": 1.5731620759656288, - "learning_rate": 2.961187657966919e-07, - "loss": 0.8325, - "step": 2627 - }, - { - "epoch": 0.893573614416865, - "grad_norm": 2.6025793303197804, - "learning_rate": 2.942543545834475e-07, - "loss": 0.7288, - "step": 2628 - }, - { - "epoch": 0.893913634818089, - "grad_norm": 1.829882281307138, - "learning_rate": 2.923956532781691e-07, - "loss": 0.7506, - "step": 2629 - }, - { - "epoch": 0.8942536552193131, - "grad_norm": 2.3041651665798337, - "learning_rate": 2.9054266413618525e-07, - "loss": 0.7911, - "step": 2630 - }, - { - "epoch": 0.8945936756205373, - "grad_norm": 1.6349830871902187, - "learning_rate": 2.88695389405898e-07, - "loss": 0.7504, - "step": 2631 - }, - { - "epoch": 0.8949336960217613, - "grad_norm": 3.3922771842440587, - "learning_rate": 2.8685383132877163e-07, - "loss": 0.787, - "step": 2632 - }, - { - "epoch": 0.8952737164229854, - "grad_norm": 2.0614678915265525, - "learning_rate": 2.8501799213933646e-07, - "loss": 0.7534, - "step": 2633 - }, - { - "epoch": 0.8956137368242094, - "grad_norm": 1.5441939097294124, - "learning_rate": 2.831878740651833e-07, - "loss": 0.8937, - "step": 2634 - }, - { - "epoch": 0.8959537572254336, - "grad_norm": 2.0399976051493565, - "learning_rate": 2.8136347932695926e-07, - "loss": 0.6901, - "step": 2635 - }, - { - "epoch": 0.8962937776266576, - "grad_norm": 1.9641640372424263, - "learning_rate": 2.7954481013836744e-07, - "loss": 0.8211, - "step": 2636 - }, - { - "epoch": 0.8966337980278817, - "grad_norm": 2.7068467503326112, - "learning_rate": 2.7773186870616585e-07, - "loss": 0.8513, - "step": 2637 - }, - { - "epoch": 0.8969738184291057, - "grad_norm": 1.999992912921896, - "learning_rate": 2.759246572301599e-07, - "loss": 0.7835, - "step": 2638 - }, - { - "epoch": 0.8973138388303298, - "grad_norm": 1.4208853502746028, - "learning_rate": 2.741231779032022e-07, - "loss": 0.7349, - "step": 2639 - }, - { - "epoch": 0.8976538592315539, - "grad_norm": 1.803477923520495, - "learning_rate": 2.72327432911193e-07, - "loss": 0.744, - "step": 2640 - }, - { - "epoch": 0.897993879632778, - "grad_norm": 6.723981213587554, - "learning_rate": 2.7053742443307054e-07, - "loss": 0.718, - "step": 2641 - }, - { - "epoch": 0.898333900034002, - "grad_norm": 2.2565609405076814, - "learning_rate": 2.6875315464081566e-07, - "loss": 0.7945, - "step": 2642 - }, - { - "epoch": 0.8986739204352261, - "grad_norm": 2.046096256829784, - "learning_rate": 2.669746256994449e-07, - "loss": 0.73, - "step": 2643 - }, - { - "epoch": 0.8990139408364501, - "grad_norm": 1.656402743730646, - "learning_rate": 2.652018397670081e-07, - "loss": 0.7564, - "step": 2644 - }, - { - "epoch": 0.8993539612376743, - "grad_norm": 2.1387537466555, - "learning_rate": 2.6343479899458737e-07, - "loss": 0.7734, - "step": 2645 - }, - { - "epoch": 0.8996939816388984, - "grad_norm": 4.0058075533193325, - "learning_rate": 2.616735055262931e-07, - "loss": 0.7913, - "step": 2646 - }, - { - "epoch": 0.9000340020401224, - "grad_norm": 1.8790063917737332, - "learning_rate": 2.5991796149926306e-07, - "loss": 0.7609, - "step": 2647 - }, - { - "epoch": 0.9003740224413465, - "grad_norm": 1.7616231603038723, - "learning_rate": 2.5816816904365715e-07, - "loss": 0.6813, - "step": 2648 - }, - { - "epoch": 0.9007140428425705, - "grad_norm": 1.6567138767856624, - "learning_rate": 2.5642413028265867e-07, - "loss": 0.7752, - "step": 2649 - }, - { - "epoch": 0.9010540632437947, - "grad_norm": 1.5576675294760438, - "learning_rate": 2.546858473324676e-07, - "loss": 0.6574, - "step": 2650 - }, - { - "epoch": 0.9013940836450187, - "grad_norm": 1.8413136738169835, - "learning_rate": 2.529533223022995e-07, - "loss": 0.7272, - "step": 2651 - }, - { - "epoch": 0.9017341040462428, - "grad_norm": 1.7136660665303882, - "learning_rate": 2.5122655729438393e-07, - "loss": 0.8226, - "step": 2652 - }, - { - "epoch": 0.9020741244474668, - "grad_norm": 2.2558521762249355, - "learning_rate": 2.495055544039632e-07, - "loss": 0.812, - "step": 2653 - }, - { - "epoch": 0.9024141448486909, - "grad_norm": 1.7798515566519915, - "learning_rate": 2.477903157192846e-07, - "loss": 0.741, - "step": 2654 - }, - { - "epoch": 0.902754165249915, - "grad_norm": 2.098481810020344, - "learning_rate": 2.4608084332160277e-07, - "loss": 0.8253, - "step": 2655 - }, - { - "epoch": 0.9030941856511391, - "grad_norm": 1.6779439834154712, - "learning_rate": 2.443771392851768e-07, - "loss": 0.7023, - "step": 2656 - }, - { - "epoch": 0.9034342060523631, - "grad_norm": 1.9775008264088612, - "learning_rate": 2.4267920567726364e-07, - "loss": 0.7944, - "step": 2657 - }, - { - "epoch": 0.9037742264535872, - "grad_norm": 2.5593382037495216, - "learning_rate": 2.409870445581225e-07, - "loss": 0.7293, - "step": 2658 - }, - { - "epoch": 0.9041142468548113, - "grad_norm": 2.0166338772224086, - "learning_rate": 2.393006579810037e-07, - "loss": 0.7021, - "step": 2659 - }, - { - "epoch": 0.9044542672560354, - "grad_norm": 1.6435413275557214, - "learning_rate": 2.3762004799215422e-07, - "loss": 0.7309, - "step": 2660 - }, - { - "epoch": 0.9047942876572594, - "grad_norm": 2.4504668750280008, - "learning_rate": 2.3594521663081072e-07, - "loss": 0.7637, - "step": 2661 - }, - { - "epoch": 0.9051343080584835, - "grad_norm": 1.6120218788123009, - "learning_rate": 2.3427616592919587e-07, - "loss": 0.7751, - "step": 2662 - }, - { - "epoch": 0.9054743284597075, - "grad_norm": 2.086946712646895, - "learning_rate": 2.3261289791252306e-07, - "loss": 0.6903, - "step": 2663 - }, - { - "epoch": 0.9058143488609317, - "grad_norm": 1.7397600903656776, - "learning_rate": 2.3095541459898452e-07, - "loss": 0.7838, - "step": 2664 - }, - { - "epoch": 0.9061543692621558, - "grad_norm": 2.2776303771036366, - "learning_rate": 2.2930371799975593e-07, - "loss": 0.8619, - "step": 2665 - }, - { - "epoch": 0.9064943896633798, - "grad_norm": 1.6304572281291108, - "learning_rate": 2.2765781011899025e-07, - "loss": 0.8539, - "step": 2666 - }, - { - "epoch": 0.9068344100646039, - "grad_norm": 2.8418896975885586, - "learning_rate": 2.260176929538166e-07, - "loss": 0.9118, - "step": 2667 - }, - { - "epoch": 0.9071744304658279, - "grad_norm": 1.7963190573302756, - "learning_rate": 2.243833684943375e-07, - "loss": 0.8397, - "step": 2668 - }, - { - "epoch": 0.9075144508670521, - "grad_norm": 1.6299943334815419, - "learning_rate": 2.2275483872362835e-07, - "loss": 0.7385, - "step": 2669 - }, - { - "epoch": 0.9078544712682761, - "grad_norm": 1.9271317601269167, - "learning_rate": 2.2113210561773124e-07, - "loss": 0.7455, - "step": 2670 - }, - { - "epoch": 0.9081944916695002, - "grad_norm": 1.6366781921840725, - "learning_rate": 2.1951517114565446e-07, - "loss": 0.7428, - "step": 2671 - }, - { - "epoch": 0.9085345120707242, - "grad_norm": 1.6460432599583494, - "learning_rate": 2.179040372693736e-07, - "loss": 0.6801, - "step": 2672 - }, - { - "epoch": 0.9088745324719483, - "grad_norm": 3.0862578766801514, - "learning_rate": 2.162987059438204e-07, - "loss": 0.7899, - "step": 2673 - }, - { - "epoch": 0.9092145528731724, - "grad_norm": 2.0209153785335814, - "learning_rate": 2.1469917911689232e-07, - "loss": 0.8979, - "step": 2674 - }, - { - "epoch": 0.9095545732743965, - "grad_norm": 2.019796094061022, - "learning_rate": 2.1310545872943788e-07, - "loss": 0.7872, - "step": 2675 - }, - { - "epoch": 0.9098945936756205, - "grad_norm": 2.4953968703645515, - "learning_rate": 2.115175467152636e-07, - "loss": 0.7581, - "step": 2676 - }, - { - "epoch": 0.9102346140768446, - "grad_norm": 1.9678752909443469, - "learning_rate": 2.0993544500112706e-07, - "loss": 0.7204, - "step": 2677 - }, - { - "epoch": 0.9105746344780686, - "grad_norm": 2.4724784821104935, - "learning_rate": 2.0835915550673492e-07, - "loss": 0.8005, - "step": 2678 - }, - { - "epoch": 0.9109146548792928, - "grad_norm": 2.3974865270538066, - "learning_rate": 2.0678868014474328e-07, - "loss": 0.8121, - "step": 2679 - }, - { - "epoch": 0.9112546752805168, - "grad_norm": 2.626755117379274, - "learning_rate": 2.0522402082075121e-07, - "loss": 0.615, - "step": 2680 - }, - { - "epoch": 0.9115946956817409, - "grad_norm": 1.7814776861792805, - "learning_rate": 2.0366517943330278e-07, - "loss": 0.845, - "step": 2681 - }, - { - "epoch": 0.911934716082965, - "grad_norm": 1.9241174576916087, - "learning_rate": 2.0211215787388105e-07, - "loss": 0.8233, - "step": 2682 - }, - { - "epoch": 0.912274736484189, - "grad_norm": 1.6974694912568216, - "learning_rate": 2.0056495802690923e-07, - "loss": 0.8282, - "step": 2683 - }, - { - "epoch": 0.9126147568854132, - "grad_norm": 3.7787758396910336, - "learning_rate": 1.9902358176974335e-07, - "loss": 0.7483, - "step": 2684 - }, - { - "epoch": 0.9129547772866372, - "grad_norm": 2.0551170310858593, - "learning_rate": 1.974880309726762e-07, - "loss": 0.781, - "step": 2685 - }, - { - "epoch": 0.9132947976878613, - "grad_norm": 1.7924440273829534, - "learning_rate": 1.959583074989302e-07, - "loss": 0.7122, - "step": 2686 - }, - { - "epoch": 0.9136348180890853, - "grad_norm": 2.225503399226498, - "learning_rate": 1.9443441320465716e-07, - "loss": 0.7122, - "step": 2687 - }, - { - "epoch": 0.9139748384903095, - "grad_norm": 1.8911761329043806, - "learning_rate": 1.9291634993893803e-07, - "loss": 0.6713, - "step": 2688 - }, - { - "epoch": 0.9143148588915335, - "grad_norm": 1.8502644569783382, - "learning_rate": 1.9140411954377437e-07, - "loss": 0.6624, - "step": 2689 - }, - { - "epoch": 0.9146548792927576, - "grad_norm": 2.653961623513032, - "learning_rate": 1.8989772385409445e-07, - "loss": 0.8623, - "step": 2690 - }, - { - "epoch": 0.9149948996939816, - "grad_norm": 2.0652612784577316, - "learning_rate": 1.883971646977434e-07, - "loss": 0.7011, - "step": 2691 - }, - { - "epoch": 0.9153349200952057, - "grad_norm": 2.568246822202351, - "learning_rate": 1.8690244389548694e-07, - "loss": 0.6886, - "step": 2692 - }, - { - "epoch": 0.9156749404964298, - "grad_norm": 2.1817257448676104, - "learning_rate": 1.8541356326100436e-07, - "loss": 0.7835, - "step": 2693 - }, - { - "epoch": 0.9160149608976539, - "grad_norm": 1.5833475097932033, - "learning_rate": 1.8393052460088877e-07, - "loss": 0.7628, - "step": 2694 - }, - { - "epoch": 0.9163549812988779, - "grad_norm": 2.0794302266744555, - "learning_rate": 1.8245332971464803e-07, - "loss": 0.7234, - "step": 2695 - }, - { - "epoch": 0.916695001700102, - "grad_norm": 1.891383361699711, - "learning_rate": 1.8098198039469438e-07, - "loss": 0.8679, - "step": 2696 - }, - { - "epoch": 0.917035022101326, - "grad_norm": 1.847279800591813, - "learning_rate": 1.7951647842635035e-07, - "loss": 0.7993, - "step": 2697 - }, - { - "epoch": 0.9173750425025502, - "grad_norm": 1.672424819741688, - "learning_rate": 1.780568255878423e-07, - "loss": 0.7778, - "step": 2698 - }, - { - "epoch": 0.9177150629037742, - "grad_norm": 2.6871925509688985, - "learning_rate": 1.7660302365029969e-07, - "loss": 0.69, - "step": 2699 - }, - { - "epoch": 0.9180550833049983, - "grad_norm": 1.6768457562216357, - "learning_rate": 1.7515507437775193e-07, - "loss": 0.7657, - "step": 2700 - }, - { - "epoch": 0.9183951037062223, - "grad_norm": 1.809600285567411, - "learning_rate": 1.7371297952712752e-07, - "loss": 0.7147, - "step": 2701 - }, - { - "epoch": 0.9187351241074464, - "grad_norm": 1.9059976949501458, - "learning_rate": 1.722767408482501e-07, - "loss": 0.7172, - "step": 2702 - }, - { - "epoch": 0.9190751445086706, - "grad_norm": 1.9399888604391888, - "learning_rate": 1.7084636008383837e-07, - "loss": 0.7127, - "step": 2703 - }, - { - "epoch": 0.9194151649098946, - "grad_norm": 2.6413221231221455, - "learning_rate": 1.6942183896950458e-07, - "loss": 0.8735, - "step": 2704 - }, - { - "epoch": 0.9197551853111187, - "grad_norm": 2.052359841123048, - "learning_rate": 1.680031792337472e-07, - "loss": 0.7513, - "step": 2705 - }, - { - "epoch": 0.9200952057123427, - "grad_norm": 1.7351319515737746, - "learning_rate": 1.6659038259795644e-07, - "loss": 0.7591, - "step": 2706 - }, - { - "epoch": 0.9204352261135668, - "grad_norm": 3.5928987788596505, - "learning_rate": 1.6518345077640606e-07, - "loss": 0.865, - "step": 2707 - }, - { - "epoch": 0.9207752465147909, - "grad_norm": 2.475516242774065, - "learning_rate": 1.6378238547625436e-07, - "loss": 0.7076, - "step": 2708 - }, - { - "epoch": 0.921115266916015, - "grad_norm": 2.757378010289697, - "learning_rate": 1.6238718839753975e-07, - "loss": 0.8151, - "step": 2709 - }, - { - "epoch": 0.921455287317239, - "grad_norm": 2.839365711351715, - "learning_rate": 1.609978612331825e-07, - "loss": 0.7181, - "step": 2710 - }, - { - "epoch": 0.9217953077184631, - "grad_norm": 1.8003430537941647, - "learning_rate": 1.5961440566897913e-07, - "loss": 0.8018, - "step": 2711 - }, - { - "epoch": 0.9221353281196872, - "grad_norm": 2.3010251129273405, - "learning_rate": 1.582368233836007e-07, - "loss": 0.7241, - "step": 2712 - }, - { - "epoch": 0.9224753485209113, - "grad_norm": 1.7723077920834232, - "learning_rate": 1.5686511604859456e-07, - "loss": 0.8193, - "step": 2713 - }, - { - "epoch": 0.9228153689221353, - "grad_norm": 2.2601407328290364, - "learning_rate": 1.5549928532837544e-07, - "loss": 0.855, - "step": 2714 - }, - { - "epoch": 0.9231553893233594, - "grad_norm": 1.949904865719386, - "learning_rate": 1.5413933288023207e-07, - "loss": 0.8482, - "step": 2715 - }, - { - "epoch": 0.9234954097245834, - "grad_norm": 1.3958294128404651, - "learning_rate": 1.5278526035431673e-07, - "loss": 0.7979, - "step": 2716 - }, - { - "epoch": 0.9238354301258076, - "grad_norm": 3.1940088213187745, - "learning_rate": 1.5143706939364844e-07, - "loss": 0.7152, - "step": 2717 - }, - { - "epoch": 0.9241754505270316, - "grad_norm": 2.3293084372099138, - "learning_rate": 1.5009476163410975e-07, - "loss": 0.7087, - "step": 2718 - }, - { - "epoch": 0.9245154709282557, - "grad_norm": 1.724579154051739, - "learning_rate": 1.4875833870444334e-07, - "loss": 0.8299, - "step": 2719 - }, - { - "epoch": 0.9248554913294798, - "grad_norm": 1.8586372981224857, - "learning_rate": 1.474278022262543e-07, - "loss": 0.7033, - "step": 2720 - }, - { - "epoch": 0.9251955117307038, - "grad_norm": 1.53996008803897, - "learning_rate": 1.4610315381400175e-07, - "loss": 0.7594, - "step": 2721 - }, - { - "epoch": 0.925535532131928, - "grad_norm": 2.2916687247168483, - "learning_rate": 1.4478439507500218e-07, - "loss": 0.7009, - "step": 2722 - }, - { - "epoch": 0.925875552533152, - "grad_norm": 1.9641042638955648, - "learning_rate": 1.4347152760942507e-07, - "loss": 0.6479, - "step": 2723 - }, - { - "epoch": 0.9262155729343761, - "grad_norm": 1.9501129366148464, - "learning_rate": 1.4216455301029274e-07, - "loss": 0.7925, - "step": 2724 - }, - { - "epoch": 0.9265555933356001, - "grad_norm": 2.198382597904698, - "learning_rate": 1.4086347286347502e-07, - "loss": 0.857, - "step": 2725 - }, - { - "epoch": 0.9268956137368242, - "grad_norm": 2.132363720106857, - "learning_rate": 1.3956828874768901e-07, - "loss": 0.6655, - "step": 2726 - }, - { - "epoch": 0.9272356341380483, - "grad_norm": 1.8802823835704232, - "learning_rate": 1.3827900223450152e-07, - "loss": 0.8498, - "step": 2727 - }, - { - "epoch": 0.9275756545392724, - "grad_norm": 2.4695615148666104, - "learning_rate": 1.3699561488831892e-07, - "loss": 0.767, - "step": 2728 - }, - { - "epoch": 0.9279156749404964, - "grad_norm": 1.547770137207598, - "learning_rate": 1.357181282663933e-07, - "loss": 0.8417, - "step": 2729 - }, - { - "epoch": 0.9282556953417205, - "grad_norm": 1.8562537232375893, - "learning_rate": 1.3444654391881306e-07, - "loss": 0.7578, - "step": 2730 - }, - { - "epoch": 0.9285957157429445, - "grad_norm": 2.3415279377546647, - "learning_rate": 1.3318086338850843e-07, - "loss": 0.6844, - "step": 2731 - }, - { - "epoch": 0.9289357361441687, - "grad_norm": 2.056878813634059, - "learning_rate": 1.3192108821124428e-07, - "loss": 0.8104, - "step": 2732 - }, - { - "epoch": 0.9292757565453927, - "grad_norm": 1.800632904887497, - "learning_rate": 1.3066721991561891e-07, - "loss": 0.7732, - "step": 2733 - }, - { - "epoch": 0.9296157769466168, - "grad_norm": 1.801402555086326, - "learning_rate": 1.2941926002306536e-07, - "loss": 0.754, - "step": 2734 - }, - { - "epoch": 0.9299557973478408, - "grad_norm": 2.1441261210894607, - "learning_rate": 1.2817721004784568e-07, - "loss": 0.7945, - "step": 2735 - }, - { - "epoch": 0.9302958177490649, - "grad_norm": 2.5741151332940677, - "learning_rate": 1.2694107149705258e-07, - "loss": 0.7383, - "step": 2736 - }, - { - "epoch": 0.930635838150289, - "grad_norm": 1.5286432889504458, - "learning_rate": 1.2571084587060466e-07, - "loss": 0.6856, - "step": 2737 - }, - { - "epoch": 0.9309758585515131, - "grad_norm": 11.326187707959681, - "learning_rate": 1.2448653466124672e-07, - "loss": 0.8106, - "step": 2738 - }, - { - "epoch": 0.9313158789527372, - "grad_norm": 1.7511130965052701, - "learning_rate": 1.2326813935454596e-07, - "loss": 0.7444, - "step": 2739 - }, - { - "epoch": 0.9316558993539612, - "grad_norm": 1.7617651287605105, - "learning_rate": 1.2205566142889257e-07, - "loss": 0.854, - "step": 2740 - }, - { - "epoch": 0.9319959197551854, - "grad_norm": 2.145302434001505, - "learning_rate": 1.2084910235549586e-07, - "loss": 0.8164, - "step": 2741 - }, - { - "epoch": 0.9323359401564094, - "grad_norm": 1.5133580779256988, - "learning_rate": 1.19648463598383e-07, - "loss": 0.7909, - "step": 2742 - }, - { - "epoch": 0.9326759605576335, - "grad_norm": 1.7694718002515495, - "learning_rate": 1.1845374661439813e-07, - "loss": 0.7474, - "step": 2743 - }, - { - "epoch": 0.9330159809588575, - "grad_norm": 1.624165239983796, - "learning_rate": 1.1726495285319883e-07, - "loss": 0.7366, - "step": 2744 - }, - { - "epoch": 0.9333560013600816, - "grad_norm": 2.2415272140749227, - "learning_rate": 1.1608208375725794e-07, - "loss": 0.7531, - "step": 2745 - }, - { - "epoch": 0.9336960217613057, - "grad_norm": 1.806699065485192, - "learning_rate": 1.1490514076185621e-07, - "loss": 0.7165, - "step": 2746 - }, - { - "epoch": 0.9340360421625298, - "grad_norm": 2.1552408637827267, - "learning_rate": 1.1373412529508687e-07, - "loss": 0.7309, - "step": 2747 - }, - { - "epoch": 0.9343760625637538, - "grad_norm": 1.8671167963972226, - "learning_rate": 1.1256903877784886e-07, - "loss": 0.7929, - "step": 2748 - }, - { - "epoch": 0.9347160829649779, - "grad_norm": 2.3625092627025497, - "learning_rate": 1.1140988262384633e-07, - "loss": 0.7205, - "step": 2749 - }, - { - "epoch": 0.9350561033662019, - "grad_norm": 1.8238588793473012, - "learning_rate": 1.1025665823958975e-07, - "loss": 0.78, - "step": 2750 - }, - { - "epoch": 0.9353961237674261, - "grad_norm": 2.234751620091944, - "learning_rate": 1.0910936702438924e-07, - "loss": 0.7863, - "step": 2751 - }, - { - "epoch": 0.9357361441686501, - "grad_norm": 2.5286309526580717, - "learning_rate": 1.0796801037035898e-07, - "loss": 0.7947, - "step": 2752 - }, - { - "epoch": 0.9360761645698742, - "grad_norm": 1.4596449824820212, - "learning_rate": 1.068325896624095e-07, - "loss": 0.7242, - "step": 2753 - }, - { - "epoch": 0.9364161849710982, - "grad_norm": 1.8586921437706274, - "learning_rate": 1.0570310627825042e-07, - "loss": 0.7237, - "step": 2754 - }, - { - "epoch": 0.9367562053723223, - "grad_norm": 2.1973293560111355, - "learning_rate": 1.0457956158838545e-07, - "loss": 0.7914, - "step": 2755 - }, - { - "epoch": 0.9370962257735465, - "grad_norm": 1.9486083197205395, - "learning_rate": 1.0346195695611461e-07, - "loss": 0.6833, - "step": 2756 - }, - { - "epoch": 0.9374362461747705, - "grad_norm": 2.0127688563988406, - "learning_rate": 1.0235029373752758e-07, - "loss": 0.8055, - "step": 2757 - }, - { - "epoch": 0.9377762665759946, - "grad_norm": 1.9434824986065704, - "learning_rate": 1.0124457328150705e-07, - "loss": 0.7631, - "step": 2758 - }, - { - "epoch": 0.9381162869772186, - "grad_norm": 1.8861521568684736, - "learning_rate": 1.0014479692972368e-07, - "loss": 0.8138, - "step": 2759 - }, - { - "epoch": 0.9384563073784427, - "grad_norm": 2.4202833010762235, - "learning_rate": 9.905096601663556e-08, - "loss": 0.8974, - "step": 2760 - }, - { - "epoch": 0.9387963277796668, - "grad_norm": 1.7851590249860223, - "learning_rate": 9.796308186948711e-08, - "loss": 0.8358, - "step": 2761 - }, - { - "epoch": 0.9391363481808909, - "grad_norm": 2.4290115845610325, - "learning_rate": 9.688114580830688e-08, - "loss": 0.7979, - "step": 2762 - }, - { - "epoch": 0.9394763685821149, - "grad_norm": 1.5838175734575497, - "learning_rate": 9.580515914590637e-08, - "loss": 0.8471, - "step": 2763 - }, - { - "epoch": 0.939816388983339, - "grad_norm": 2.027267479543276, - "learning_rate": 9.473512318787681e-08, - "loss": 0.6754, - "step": 2764 - }, - { - "epoch": 0.940156409384563, - "grad_norm": 1.809277908733106, - "learning_rate": 9.367103923259124e-08, - "loss": 0.7902, - "step": 2765 - }, - { - "epoch": 0.9404964297857872, - "grad_norm": 4.2105282282100225, - "learning_rate": 9.261290857119853e-08, - "loss": 0.7979, - "step": 2766 - }, - { - "epoch": 0.9408364501870112, - "grad_norm": 2.1122972531258655, - "learning_rate": 9.156073248762387e-08, - "loss": 0.7509, - "step": 2767 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 1.9840880876989972, - "learning_rate": 9.051451225856877e-08, - "loss": 0.6946, - "step": 2768 - }, - { - "epoch": 0.9415164909894593, - "grad_norm": 2.524068083748069, - "learning_rate": 8.947424915350723e-08, - "loss": 0.7643, - "step": 2769 - }, - { - "epoch": 0.9418565113906835, - "grad_norm": 2.133668114970484, - "learning_rate": 8.843994443468451e-08, - "loss": 0.8265, - "step": 2770 - }, - { - "epoch": 0.9421965317919075, - "grad_norm": 1.4893159029193104, - "learning_rate": 8.741159935711563e-08, - "loss": 0.8069, - "step": 2771 - }, - { - "epoch": 0.9425365521931316, - "grad_norm": 2.1763296510811716, - "learning_rate": 8.638921516858634e-08, - "loss": 0.8413, - "step": 2772 - }, - { - "epoch": 0.9428765725943556, - "grad_norm": 2.498518002319564, - "learning_rate": 8.537279310964763e-08, - "loss": 0.8262, - "step": 2773 - }, - { - "epoch": 0.9432165929955797, - "grad_norm": 8.993724154572014, - "learning_rate": 8.436233441361629e-08, - "loss": 0.8378, - "step": 2774 - }, - { - "epoch": 0.9435566133968039, - "grad_norm": 1.576329182227026, - "learning_rate": 8.335784030657324e-08, - "loss": 0.8379, - "step": 2775 - }, - { - "epoch": 0.9438966337980279, - "grad_norm": 2.424881688164331, - "learning_rate": 8.235931200736235e-08, - "loss": 0.7671, - "step": 2776 - }, - { - "epoch": 0.944236654199252, - "grad_norm": 2.807110671874095, - "learning_rate": 8.136675072758948e-08, - "loss": 0.7003, - "step": 2777 - }, - { - "epoch": 0.944576674600476, - "grad_norm": 4.154937564787449, - "learning_rate": 8.038015767161789e-08, - "loss": 0.8277, - "step": 2778 - }, - { - "epoch": 0.9449166950017001, - "grad_norm": 1.674791036585208, - "learning_rate": 7.939953403657164e-08, - "loss": 0.7474, - "step": 2779 - }, - { - "epoch": 0.9452567154029242, - "grad_norm": 2.0417610679261635, - "learning_rate": 7.842488101232893e-08, - "loss": 0.7489, - "step": 2780 - }, - { - "epoch": 0.9455967358041483, - "grad_norm": 2.355200646804559, - "learning_rate": 7.745619978152653e-08, - "loss": 0.8185, - "step": 2781 - }, - { - "epoch": 0.9459367562053723, - "grad_norm": 2.1071996152685863, - "learning_rate": 7.649349151955199e-08, - "loss": 0.7427, - "step": 2782 - }, - { - "epoch": 0.9462767766065964, - "grad_norm": 2.5068513815147413, - "learning_rate": 7.553675739454647e-08, - "loss": 0.7599, - "step": 2783 - }, - { - "epoch": 0.9466167970078204, - "grad_norm": 2.137090570866946, - "learning_rate": 7.4585998567403e-08, - "loss": 0.7743, - "step": 2784 - }, - { - "epoch": 0.9469568174090446, - "grad_norm": 1.8432992230207976, - "learning_rate": 7.364121619176213e-08, - "loss": 0.7191, - "step": 2785 - }, - { - "epoch": 0.9472968378102686, - "grad_norm": 1.9437651826974969, - "learning_rate": 7.270241141401568e-08, - "loss": 0.8466, - "step": 2786 - }, - { - "epoch": 0.9476368582114927, - "grad_norm": 1.7799878186616265, - "learning_rate": 7.17695853732997e-08, - "loss": 0.77, - "step": 2787 - }, - { - "epoch": 0.9479768786127167, - "grad_norm": 1.7842807831458771, - "learning_rate": 7.084273920149654e-08, - "loss": 0.9092, - "step": 2788 - }, - { - "epoch": 0.9483168990139408, - "grad_norm": 1.7117275262466325, - "learning_rate": 6.99218740232338e-08, - "loss": 0.8042, - "step": 2789 - }, - { - "epoch": 0.948656919415165, - "grad_norm": 1.6622479904527474, - "learning_rate": 6.900699095587937e-08, - "loss": 0.7579, - "step": 2790 - }, - { - "epoch": 0.948996939816389, - "grad_norm": 1.8529558113196523, - "learning_rate": 6.809809110954413e-08, - "loss": 0.8636, - "step": 2791 - }, - { - "epoch": 0.949336960217613, - "grad_norm": 2.163551138095109, - "learning_rate": 6.719517558707922e-08, - "loss": 0.892, - "step": 2792 - }, - { - "epoch": 0.9496769806188371, - "grad_norm": 1.8487943962662825, - "learning_rate": 6.629824548407381e-08, - "loss": 0.7987, - "step": 2793 - }, - { - "epoch": 0.9500170010200613, - "grad_norm": 2.0154438939507497, - "learning_rate": 6.540730188885347e-08, - "loss": 0.7981, - "step": 2794 - }, - { - "epoch": 0.9503570214212853, - "grad_norm": 1.9570984130674787, - "learning_rate": 6.452234588248285e-08, - "loss": 0.7771, - "step": 2795 - }, - { - "epoch": 0.9506970418225094, - "grad_norm": 1.6604363995442626, - "learning_rate": 6.364337853875745e-08, - "loss": 0.654, - "step": 2796 - }, - { - "epoch": 0.9510370622237334, - "grad_norm": 1.8616496731827172, - "learning_rate": 6.277040092420916e-08, - "loss": 0.7682, - "step": 2797 - }, - { - "epoch": 0.9513770826249575, - "grad_norm": 1.4864670570007181, - "learning_rate": 6.190341409810063e-08, - "loss": 0.7729, - "step": 2798 - }, - { - "epoch": 0.9517171030261816, - "grad_norm": 2.167348761585454, - "learning_rate": 6.104241911242592e-08, - "loss": 0.8381, - "step": 2799 - }, - { - "epoch": 0.9520571234274057, - "grad_norm": 2.0214824467624606, - "learning_rate": 6.018741701190767e-08, - "loss": 0.8774, - "step": 2800 - }, - { - "epoch": 0.9523971438286297, - "grad_norm": 2.046012942264478, - "learning_rate": 5.933840883399766e-08, - "loss": 0.8185, - "step": 2801 - }, - { - "epoch": 0.9527371642298538, - "grad_norm": 1.6744820889133898, - "learning_rate": 5.8495395608874625e-08, - "loss": 0.7855, - "step": 2802 - }, - { - "epoch": 0.9530771846310778, - "grad_norm": 1.9991594031620603, - "learning_rate": 5.7658378359443104e-08, - "loss": 0.7734, - "step": 2803 - }, - { - "epoch": 0.953417205032302, - "grad_norm": 1.8896007551548046, - "learning_rate": 5.6827358101331774e-08, - "loss": 0.753, - "step": 2804 - }, - { - "epoch": 0.953757225433526, - "grad_norm": 2.499578946675649, - "learning_rate": 5.600233584289294e-08, - "loss": 0.7645, - "step": 2805 - }, - { - "epoch": 0.9540972458347501, - "grad_norm": 2.0291966720592898, - "learning_rate": 5.518331258520138e-08, - "loss": 0.7063, - "step": 2806 - }, - { - "epoch": 0.9544372662359741, - "grad_norm": 2.115533353024901, - "learning_rate": 5.437028932205213e-08, - "loss": 0.7353, - "step": 2807 - }, - { - "epoch": 0.9547772866371982, - "grad_norm": 2.230634267024501, - "learning_rate": 5.356326703995884e-08, - "loss": 0.7527, - "step": 2808 - }, - { - "epoch": 0.9551173070384223, - "grad_norm": 1.9500468263820696, - "learning_rate": 5.276224671815655e-08, - "loss": 0.7196, - "step": 2809 - }, - { - "epoch": 0.9554573274396464, - "grad_norm": 1.9933718903005513, - "learning_rate": 5.196722932859499e-08, - "loss": 0.7947, - "step": 2810 - }, - { - "epoch": 0.9557973478408704, - "grad_norm": 3.9589726805428658, - "learning_rate": 5.117821583594085e-08, - "loss": 0.8693, - "step": 2811 - }, - { - "epoch": 0.9561373682420945, - "grad_norm": 1.697109589336965, - "learning_rate": 5.0395207197575516e-08, - "loss": 0.8559, - "step": 2812 - }, - { - "epoch": 0.9564773886433185, - "grad_norm": 1.7784029867244318, - "learning_rate": 4.9618204363595656e-08, - "loss": 0.7196, - "step": 2813 - }, - { - "epoch": 0.9568174090445427, - "grad_norm": 1.7135140822695198, - "learning_rate": 4.8847208276808224e-08, - "loss": 0.7667, - "step": 2814 - }, - { - "epoch": 0.9571574294457668, - "grad_norm": 2.0635249556468676, - "learning_rate": 4.808221987273265e-08, - "loss": 0.7779, - "step": 2815 - }, - { - "epoch": 0.9574974498469908, - "grad_norm": 1.7283710035195046, - "learning_rate": 4.732324007959921e-08, - "loss": 0.7874, - "step": 2816 - }, - { - "epoch": 0.9578374702482149, - "grad_norm": 2.1716890834730247, - "learning_rate": 4.657026981834623e-08, - "loss": 0.776, - "step": 2817 - }, - { - "epoch": 0.9581774906494389, - "grad_norm": 1.8440991529889135, - "learning_rate": 4.5823310002621745e-08, - "loss": 0.7675, - "step": 2818 - }, - { - "epoch": 0.9585175110506631, - "grad_norm": 1.9867632028220694, - "learning_rate": 4.5082361538779095e-08, - "loss": 0.8159, - "step": 2819 - }, - { - "epoch": 0.9588575314518871, - "grad_norm": 1.6735880869924653, - "learning_rate": 4.434742532587855e-08, - "loss": 0.8242, - "step": 2820 - }, - { - "epoch": 0.9591975518531112, - "grad_norm": 1.9489756840557746, - "learning_rate": 4.3618502255684533e-08, - "loss": 0.7493, - "step": 2821 - }, - { - "epoch": 0.9595375722543352, - "grad_norm": 1.8908904594168388, - "learning_rate": 4.289559321266623e-08, - "loss": 0.7569, - "step": 2822 - }, - { - "epoch": 0.9598775926555594, - "grad_norm": 2.0307318453443663, - "learning_rate": 4.2178699073994744e-08, - "loss": 0.8468, - "step": 2823 - }, - { - "epoch": 0.9602176130567834, - "grad_norm": 2.155622909096085, - "learning_rate": 4.1467820709541474e-08, - "loss": 0.7998, - "step": 2824 - }, - { - "epoch": 0.9605576334580075, - "grad_norm": 3.045935542421655, - "learning_rate": 4.0762958981880876e-08, - "loss": 0.6656, - "step": 2825 - }, - { - "epoch": 0.9608976538592315, - "grad_norm": 1.462100425448403, - "learning_rate": 4.006411474628491e-08, - "loss": 0.7495, - "step": 2826 - }, - { - "epoch": 0.9612376742604556, - "grad_norm": 2.0395632850495873, - "learning_rate": 3.937128885072528e-08, - "loss": 0.7343, - "step": 2827 - }, - { - "epoch": 0.9615776946616797, - "grad_norm": 1.4943247088061522, - "learning_rate": 3.868448213587006e-08, - "loss": 0.7365, - "step": 2828 - }, - { - "epoch": 0.9619177150629038, - "grad_norm": 1.6462875432934096, - "learning_rate": 3.800369543508431e-08, - "loss": 0.7184, - "step": 2829 - }, - { - "epoch": 0.9622577354641279, - "grad_norm": 1.7317699680459382, - "learning_rate": 3.7328929574428354e-08, - "loss": 0.7887, - "step": 2830 - }, - { - "epoch": 0.9625977558653519, - "grad_norm": 2.0339479815422683, - "learning_rate": 3.6660185372656144e-08, - "loss": 0.6979, - "step": 2831 - }, - { - "epoch": 0.962937776266576, - "grad_norm": 2.024001193970488, - "learning_rate": 3.5997463641216925e-08, - "loss": 0.7664, - "step": 2832 - }, - { - "epoch": 0.9632777966678001, - "grad_norm": 2.7918239147503123, - "learning_rate": 3.534076518424967e-08, - "loss": 0.7498, - "step": 2833 - }, - { - "epoch": 0.9636178170690242, - "grad_norm": 1.8084785912714854, - "learning_rate": 3.469009079858698e-08, - "loss": 0.5801, - "step": 2834 - }, - { - "epoch": 0.9639578374702482, - "grad_norm": 1.6988220138175392, - "learning_rate": 3.404544127375064e-08, - "loss": 0.7183, - "step": 2835 - }, - { - "epoch": 0.9642978578714723, - "grad_norm": 1.7500355302390025, - "learning_rate": 3.340681739195328e-08, - "loss": 0.8591, - "step": 2836 - }, - { - "epoch": 0.9646378782726963, - "grad_norm": 2.0915983335526542, - "learning_rate": 3.277421992809448e-08, - "loss": 0.7898, - "step": 2837 - }, - { - "epoch": 0.9649778986739205, - "grad_norm": 1.9905149738010572, - "learning_rate": 3.2147649649761914e-08, - "loss": 0.7486, - "step": 2838 - }, - { - "epoch": 0.9653179190751445, - "grad_norm": 1.8204792814385986, - "learning_rate": 3.152710731723019e-08, - "loss": 0.7907, - "step": 2839 - }, - { - "epoch": 0.9656579394763686, - "grad_norm": 2.2038325461595125, - "learning_rate": 3.0912593683460336e-08, - "loss": 0.8221, - "step": 2840 - }, - { - "epoch": 0.9659979598775926, - "grad_norm": 2.432522698561901, - "learning_rate": 3.030410949409701e-08, - "loss": 0.8842, - "step": 2841 - }, - { - "epoch": 0.9663379802788167, - "grad_norm": 1.7618523901465664, - "learning_rate": 2.9701655487469062e-08, - "loss": 0.7653, - "step": 2842 - }, - { - "epoch": 0.9666780006800408, - "grad_norm": 2.0644672247287055, - "learning_rate": 2.9105232394588955e-08, - "loss": 0.7707, - "step": 2843 - }, - { - "epoch": 0.9670180210812649, - "grad_norm": 1.63818941154093, - "learning_rate": 2.8514840939150023e-08, - "loss": 0.8413, - "step": 2844 - }, - { - "epoch": 0.9673580414824889, - "grad_norm": 1.973884901127744, - "learning_rate": 2.793048183752922e-08, - "loss": 0.7836, - "step": 2845 - }, - { - "epoch": 0.967698061883713, - "grad_norm": 2.4371181181523354, - "learning_rate": 2.735215579878159e-08, - "loss": 0.7305, - "step": 2846 - }, - { - "epoch": 0.9680380822849372, - "grad_norm": 1.7853562497323494, - "learning_rate": 2.6779863524642458e-08, - "loss": 0.7943, - "step": 2847 - }, - { - "epoch": 0.9683781026861612, - "grad_norm": 1.6858828008976603, - "learning_rate": 2.6213605709525803e-08, - "loss": 0.7232, - "step": 2848 - }, - { - "epoch": 0.9687181230873853, - "grad_norm": 2.158947668013324, - "learning_rate": 2.5653383040524228e-08, - "loss": 0.8468, - "step": 2849 - }, - { - "epoch": 0.9690581434886093, - "grad_norm": 1.9632625315112604, - "learning_rate": 2.509919619740675e-08, - "loss": 0.7163, - "step": 2850 - }, - { - "epoch": 0.9693981638898334, - "grad_norm": 2.1695249086671757, - "learning_rate": 2.4551045852617694e-08, - "loss": 0.6472, - "step": 2851 - }, - { - "epoch": 0.9697381842910575, - "grad_norm": 2.7117847345646107, - "learning_rate": 2.4008932671277795e-08, - "loss": 0.8502, - "step": 2852 - }, - { - "epoch": 0.9700782046922816, - "grad_norm": 6.101355782708873, - "learning_rate": 2.3472857311183095e-08, - "loss": 0.8528, - "step": 2853 - }, - { - "epoch": 0.9704182250935056, - "grad_norm": 2.254936461781147, - "learning_rate": 2.294282042280105e-08, - "loss": 0.7906, - "step": 2854 - }, - { - "epoch": 0.9707582454947297, - "grad_norm": 1.8108204204000768, - "learning_rate": 2.2418822649274974e-08, - "loss": 0.7968, - "step": 2855 - }, - { - "epoch": 0.9710982658959537, - "grad_norm": 2.6951434301051402, - "learning_rate": 2.1900864626417385e-08, - "loss": 0.776, - "step": 2856 - }, - { - "epoch": 0.9714382862971779, - "grad_norm": 1.6020976275419296, - "learning_rate": 2.1388946982714986e-08, - "loss": 0.7704, - "step": 2857 - }, - { - "epoch": 0.9717783066984019, - "grad_norm": 2.4901873723353556, - "learning_rate": 2.088307033932313e-08, - "loss": 0.7724, - "step": 2858 - }, - { - "epoch": 0.972118327099626, - "grad_norm": 1.9510473315864627, - "learning_rate": 2.0383235310068027e-08, - "loss": 0.7161, - "step": 2859 - }, - { - "epoch": 0.97245834750085, - "grad_norm": 1.7537820967663633, - "learning_rate": 1.9889442501444533e-08, - "loss": 0.7845, - "step": 2860 - }, - { - "epoch": 0.9727983679020741, - "grad_norm": 2.770379816331149, - "learning_rate": 1.9401692512617254e-08, - "loss": 0.7648, - "step": 2861 - }, - { - "epoch": 0.9731383883032982, - "grad_norm": 1.559159771690986, - "learning_rate": 1.891998593541611e-08, - "loss": 0.8496, - "step": 2862 - }, - { - "epoch": 0.9734784087045223, - "grad_norm": 1.8239680223638182, - "learning_rate": 1.8444323354340765e-08, - "loss": 0.8466, - "step": 2863 - }, - { - "epoch": 0.9738184291057463, - "grad_norm": 2.097607953123935, - "learning_rate": 1.7974705346554543e-08, - "loss": 0.7707, - "step": 2864 - }, - { - "epoch": 0.9741584495069704, - "grad_norm": 15.595515977393774, - "learning_rate": 1.7511132481888293e-08, - "loss": 0.8303, - "step": 2865 - }, - { - "epoch": 0.9744984699081944, - "grad_norm": 2.2442397263635923, - "learning_rate": 1.7053605322837064e-08, - "loss": 0.8331, - "step": 2866 - }, - { - "epoch": 0.9748384903094186, - "grad_norm": 1.7730283098392012, - "learning_rate": 1.6602124424558998e-08, - "loss": 0.8179, - "step": 2867 - }, - { - "epoch": 0.9751785107106427, - "grad_norm": 1.9544407790331693, - "learning_rate": 1.6156690334878655e-08, - "loss": 0.8524, - "step": 2868 - }, - { - "epoch": 0.9755185311118667, - "grad_norm": 1.8253723434879328, - "learning_rate": 1.571730359427981e-08, - "loss": 0.7918, - "step": 2869 - }, - { - "epoch": 0.9758585515130908, - "grad_norm": 2.0309536185191175, - "learning_rate": 1.5283964735911537e-08, - "loss": 0.6513, - "step": 2870 - }, - { - "epoch": 0.9761985719143148, - "grad_norm": 1.838769072190465, - "learning_rate": 1.4856674285582128e-08, - "loss": 0.8031, - "step": 2871 - }, - { - "epoch": 0.976538592315539, - "grad_norm": 2.0950822778973692, - "learning_rate": 1.4435432761762958e-08, - "loss": 0.7676, - "step": 2872 - }, - { - "epoch": 0.976878612716763, - "grad_norm": 2.0893793425469873, - "learning_rate": 1.4020240675583496e-08, - "loss": 0.7125, - "step": 2873 - }, - { - "epoch": 0.9772186331179871, - "grad_norm": 2.4146069116338076, - "learning_rate": 1.3611098530834643e-08, - "loss": 0.7995, - "step": 2874 - }, - { - "epoch": 0.9775586535192111, - "grad_norm": 1.6888893378242318, - "learning_rate": 1.3208006823965391e-08, - "loss": 0.7872, - "step": 2875 - }, - { - "epoch": 0.9778986739204353, - "grad_norm": 1.8282106707284866, - "learning_rate": 1.2810966044083384e-08, - "loss": 0.7569, - "step": 2876 - }, - { - "epoch": 0.9782386943216593, - "grad_norm": 3.0888469815054056, - "learning_rate": 1.241997667295436e-08, - "loss": 0.7845, - "step": 2877 - }, - { - "epoch": 0.9785787147228834, - "grad_norm": 2.6085632259240135, - "learning_rate": 1.2035039185001595e-08, - "loss": 0.6891, - "step": 2878 - }, - { - "epoch": 0.9789187351241074, - "grad_norm": 1.6548350534770722, - "learning_rate": 1.1656154047303691e-08, - "loss": 0.776, - "step": 2879 - }, - { - "epoch": 0.9792587555253315, - "grad_norm": 2.2479317743891163, - "learning_rate": 1.128332171959734e-08, - "loss": 0.6986, - "step": 2880 - }, - { - "epoch": 0.9795987759265556, - "grad_norm": 1.5207555564257944, - "learning_rate": 1.0916542654273443e-08, - "loss": 0.7656, - "step": 2881 - }, - { - "epoch": 0.9799387963277797, - "grad_norm": 1.7179799866128536, - "learning_rate": 1.0555817296378223e-08, - "loss": 0.7789, - "step": 2882 - }, - { - "epoch": 0.9802788167290037, - "grad_norm": 3.5457830250161146, - "learning_rate": 1.0201146083612113e-08, - "loss": 0.6575, - "step": 2883 - }, - { - "epoch": 0.9806188371302278, - "grad_norm": 1.66796935458302, - "learning_rate": 9.852529446330306e-09, - "loss": 0.8078, - "step": 2884 - }, - { - "epoch": 0.9809588575314518, - "grad_norm": 1.6131158456304904, - "learning_rate": 9.509967807541098e-09, - "loss": 0.8186, - "step": 2885 - }, - { - "epoch": 0.981298877932676, - "grad_norm": 1.82468180774407, - "learning_rate": 9.17346158290533e-09, - "loss": 0.739, - "step": 2886 - }, - { - "epoch": 0.9816388983339001, - "grad_norm": 1.9688243210497778, - "learning_rate": 8.843011180736383e-09, - "loss": 0.7168, - "step": 2887 - }, - { - "epoch": 0.9819789187351241, - "grad_norm": 1.5022510538028564, - "learning_rate": 8.518617002000184e-09, - "loss": 0.7572, - "step": 2888 - }, - { - "epoch": 0.9823189391363482, - "grad_norm": 1.727052969368207, - "learning_rate": 8.200279440313541e-09, - "loss": 0.8521, - "step": 2889 - }, - { - "epoch": 0.9826589595375722, - "grad_norm": 2.2067244626842633, - "learning_rate": 7.88799888194358e-09, - "loss": 0.8, - "step": 2890 - }, - { - "epoch": 0.9829989799387964, - "grad_norm": 1.7065083718462186, - "learning_rate": 7.581775705809424e-09, - "loss": 0.7378, - "step": 2891 - }, - { - "epoch": 0.9833390003400204, - "grad_norm": 2.9586498581307064, - "learning_rate": 7.281610283479401e-09, - "loss": 0.6801, - "step": 2892 - }, - { - "epoch": 0.9836790207412445, - "grad_norm": 5.171238278863206, - "learning_rate": 6.987502979170502e-09, - "loss": 0.7629, - "step": 2893 - }, - { - "epoch": 0.9840190411424685, - "grad_norm": 1.90520126690055, - "learning_rate": 6.69945414975115e-09, - "loss": 0.8113, - "step": 2894 - }, - { - "epoch": 0.9843590615436926, - "grad_norm": 2.2503726394658616, - "learning_rate": 6.417464144736208e-09, - "loss": 0.7513, - "step": 2895 - }, - { - "epoch": 0.9846990819449167, - "grad_norm": 2.153027725123987, - "learning_rate": 6.141533306289749e-09, - "loss": 0.8205, - "step": 2896 - }, - { - "epoch": 0.9850391023461408, - "grad_norm": 1.736801672039933, - "learning_rate": 5.871661969223951e-09, - "loss": 0.7942, - "step": 2897 - }, - { - "epoch": 0.9853791227473648, - "grad_norm": 2.3675066320350475, - "learning_rate": 5.6078504609979874e-09, - "loss": 0.7259, - "step": 2898 - }, - { - "epoch": 0.9857191431485889, - "grad_norm": 2.017743519753625, - "learning_rate": 5.350099101718575e-09, - "loss": 0.7131, - "step": 2899 - }, - { - "epoch": 0.9860591635498129, - "grad_norm": 2.7453562229219703, - "learning_rate": 5.098408204138872e-09, - "loss": 0.7977, - "step": 2900 - }, - { - "epoch": 0.9863991839510371, - "grad_norm": 1.6134295512276415, - "learning_rate": 4.852778073657361e-09, - "loss": 0.7135, - "step": 2901 - }, - { - "epoch": 0.9867392043522611, - "grad_norm": 2.0405012351886276, - "learning_rate": 4.613209008320629e-09, - "loss": 0.7537, - "step": 2902 - }, - { - "epoch": 0.9870792247534852, - "grad_norm": 1.4871414756238235, - "learning_rate": 4.379701298818928e-09, - "loss": 0.7353, - "step": 2903 - }, - { - "epoch": 0.9874192451547092, - "grad_norm": 2.093132630125787, - "learning_rate": 4.152255228487834e-09, - "loss": 0.7546, - "step": 2904 - }, - { - "epoch": 0.9877592655559334, - "grad_norm": 1.7225547695335006, - "learning_rate": 3.9308710733093616e-09, - "loss": 0.7336, - "step": 2905 - }, - { - "epoch": 0.9880992859571575, - "grad_norm": 2.05442384710289, - "learning_rate": 3.715549101908633e-09, - "loss": 0.7587, - "step": 2906 - }, - { - "epoch": 0.9884393063583815, - "grad_norm": 5.175498096828514, - "learning_rate": 3.5062895755544337e-09, - "loss": 0.7976, - "step": 2907 - }, - { - "epoch": 0.9887793267596056, - "grad_norm": 2.1043805063081678, - "learning_rate": 3.3030927481614294e-09, - "loss": 0.8033, - "step": 2908 - }, - { - "epoch": 0.9891193471608296, - "grad_norm": 2.0514526782549884, - "learning_rate": 3.10595886628684e-09, - "loss": 0.7745, - "step": 2909 - }, - { - "epoch": 0.9894593675620538, - "grad_norm": 1.8077379168303007, - "learning_rate": 2.9148881691298812e-09, - "loss": 0.8329, - "step": 2910 - }, - { - "epoch": 0.9897993879632778, - "grad_norm": 2.7491338803401852, - "learning_rate": 2.7298808885350968e-09, - "loss": 0.7437, - "step": 2911 - }, - { - "epoch": 0.9901394083645019, - "grad_norm": 1.8239821715791795, - "learning_rate": 2.550937248987917e-09, - "loss": 0.752, - "step": 2912 - }, - { - "epoch": 0.9904794287657259, - "grad_norm": 1.9332024110813362, - "learning_rate": 2.378057467617434e-09, - "loss": 0.7285, - "step": 2913 - }, - { - "epoch": 0.99081944916695, - "grad_norm": 2.1222346021287284, - "learning_rate": 2.211241754193627e-09, - "loss": 0.7529, - "step": 2914 - }, - { - "epoch": 0.9911594695681741, - "grad_norm": 2.949500732515013, - "learning_rate": 2.050490311130138e-09, - "loss": 0.7769, - "step": 2915 - }, - { - "epoch": 0.9914994899693982, - "grad_norm": 2.2876307190277387, - "learning_rate": 1.8958033334803837e-09, - "loss": 0.7332, - "step": 2916 - }, - { - "epoch": 0.9918395103706222, - "grad_norm": 1.8659398649610075, - "learning_rate": 1.7471810089403352e-09, - "loss": 0.7737, - "step": 2917 - }, - { - "epoch": 0.9921795307718463, - "grad_norm": 4.208386889103378, - "learning_rate": 1.6046235178474034e-09, - "loss": 0.6483, - "step": 2918 - }, - { - "epoch": 0.9925195511730703, - "grad_norm": 2.7900149310075437, - "learning_rate": 1.4681310331787767e-09, - "loss": 0.7181, - "step": 2919 - }, - { - "epoch": 0.9928595715742945, - "grad_norm": 3.2570105193883836, - "learning_rate": 1.3377037205541954e-09, - "loss": 0.745, - "step": 2920 - }, - { - "epoch": 0.9931995919755185, - "grad_norm": 2.253063484591565, - "learning_rate": 1.2133417382320656e-09, - "loss": 0.7365, - "step": 2921 - }, - { - "epoch": 0.9935396123767426, - "grad_norm": 1.6986142527464747, - "learning_rate": 1.0950452371116805e-09, - "loss": 0.7249, - "step": 2922 - }, - { - "epoch": 0.9938796327779666, - "grad_norm": 2.1769362876424783, - "learning_rate": 9.828143607343298e-10, - "loss": 0.675, - "step": 2923 - }, - { - "epoch": 0.9942196531791907, - "grad_norm": 1.8623993129979777, - "learning_rate": 8.766492452783048e-10, - "loss": 0.7031, - "step": 2924 - }, - { - "epoch": 0.9945596735804149, - "grad_norm": 2.8223920826039577, - "learning_rate": 7.765500195650034e-10, - "loss": 0.7948, - "step": 2925 - }, - { - "epoch": 0.9948996939816389, - "grad_norm": 1.8686593078473168, - "learning_rate": 6.825168050528241e-10, - "loss": 0.727, - "step": 2926 - }, - { - "epoch": 0.995239714382863, - "grad_norm": 2.162089701353925, - "learning_rate": 5.945497158404979e-10, - "loss": 0.7878, - "step": 2927 - }, - { - "epoch": 0.995579734784087, - "grad_norm": 2.0618636520713265, - "learning_rate": 5.126488586676414e-10, - "loss": 0.681, - "step": 2928 - }, - { - "epoch": 0.9959197551853112, - "grad_norm": 2.0577534585653656, - "learning_rate": 4.368143329114283e-10, - "loss": 0.7572, - "step": 2929 - }, - { - "epoch": 0.9962597755865352, - "grad_norm": 1.9033317775907852, - "learning_rate": 3.6704623058825275e-10, - "loss": 0.725, - "step": 2930 - }, - { - "epoch": 0.9965997959877593, - "grad_norm": 1.7966473182708045, - "learning_rate": 3.033446363548409e-10, - "loss": 0.7165, - "step": 2931 - }, - { - "epoch": 0.9969398163889833, - "grad_norm": 1.9439610520049038, - "learning_rate": 2.4570962750547487e-10, - "loss": 0.806, - "step": 2932 - }, - { - "epoch": 0.9972798367902074, - "grad_norm": 1.8447978817356736, - "learning_rate": 1.9414127397476834e-10, - "loss": 0.6776, - "step": 2933 - }, - { - "epoch": 0.9976198571914315, - "grad_norm": 1.8689492566426245, - "learning_rate": 1.486396383343358e-10, - "loss": 0.7369, - "step": 2934 - }, - { - "epoch": 0.9979598775926556, - "grad_norm": 2.3593280079362984, - "learning_rate": 1.0920477579612342e-10, - "loss": 0.737, - "step": 2935 - }, - { - "epoch": 0.9982998979938796, - "grad_norm": 1.8130326623567172, - "learning_rate": 7.583673420963333e-11, - "loss": 0.7728, - "step": 2936 - }, - { - "epoch": 0.9986399183951037, - "grad_norm": 1.8422384775818308, - "learning_rate": 4.8535554063589006e-11, - "loss": 0.7864, - "step": 2937 - }, - { - "epoch": 0.9989799387963277, - "grad_norm": 2.6330920946601504, - "learning_rate": 2.7301268484825062e-11, - "loss": 0.7965, - "step": 2938 - }, - { - "epoch": 0.9993199591975519, - "grad_norm": 1.4728667367747035, - "learning_rate": 1.2133903238842337e-11, - "loss": 0.6569, - "step": 2939 - }, - { - "epoch": 0.999659979598776, - "grad_norm": 2.449775970829956, - "learning_rate": 3.033476729807916e-12, - "loss": 0.7428, - "step": 2940 - }, - { - "epoch": 1.0, - "grad_norm": 1.9956841641574536, - "learning_rate": 0.0, - "loss": 0.7876, - "step": 2941 - }, - { - "epoch": 1.0, - "step": 2941, - "total_flos": 3322796039995392.0, - "train_loss": 0.8056760676374892, - "train_runtime": 111614.1149, - "train_samples_per_second": 0.843, - "train_steps_per_second": 0.026 - } - ], - "logging_steps": 1.0, - "max_steps": 2941, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 400, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 3322796039995392.0, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}