{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22686392180443912, "eval_steps": 500, "global_step": 870000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.215262570216991e-05, "grad_norm": 196.05113220214844, "learning_rate": 2.6076639242734397e-08, "loss": 19.4606, "step": 200 }, { "epoch": 0.00010430525140433982, "grad_norm": 184.3112335205078, "learning_rate": 5.2153278485468794e-08, "loss": 19.0516, "step": 400 }, { "epoch": 0.00015645787710650975, "grad_norm": 126.78282928466797, "learning_rate": 7.822991772820321e-08, "loss": 17.3077, "step": 600 }, { "epoch": 0.00020861050280867964, "grad_norm": 59.83221435546875, "learning_rate": 1.0430655697093759e-07, "loss": 15.0746, "step": 800 }, { "epoch": 0.0002607631285108496, "grad_norm": 28.47006607055664, "learning_rate": 1.30383196213672e-07, "loss": 13.063, "step": 1000 }, { "epoch": 0.0003129157542130195, "grad_norm": 16.229631423950195, "learning_rate": 1.5645983545640642e-07, "loss": 11.636, "step": 1200 }, { "epoch": 0.0003650683799151894, "grad_norm": 14.528124809265137, "learning_rate": 1.825364746991408e-07, "loss": 10.7786, "step": 1400 }, { "epoch": 0.0004172210056173593, "grad_norm": 12.064294815063477, "learning_rate": 2.0861311394187518e-07, "loss": 10.2434, "step": 1600 }, { "epoch": 0.00046937363131952924, "grad_norm": 11.837329864501953, "learning_rate": 2.3468975318460956e-07, "loss": 9.9074, "step": 1800 }, { "epoch": 0.0005215262570216992, "grad_norm": 10.613241195678711, "learning_rate": 2.60766392427344e-07, "loss": 9.6339, "step": 2000 }, { "epoch": 0.000573678882723869, "grad_norm": 10.466361045837402, "learning_rate": 2.8684303167007837e-07, "loss": 9.4193, "step": 2200 }, { "epoch": 0.000625831508426039, "grad_norm": 10.19588565826416, "learning_rate": 3.1291967091281283e-07, "loss": 9.2786, "step": 2400 }, { "epoch": 0.0006779841341282089, "grad_norm": 9.127360343933105, "learning_rate": 3.389963101555472e-07, "loss": 9.1531, "step": 2600 }, { "epoch": 0.0007301367598303788, "grad_norm": 10.378647804260254, "learning_rate": 3.650729493982816e-07, "loss": 9.0132, "step": 2800 }, { "epoch": 0.0007822893855325487, "grad_norm": 10.13025951385498, "learning_rate": 3.91149588641016e-07, "loss": 8.876, "step": 3000 }, { "epoch": 0.0008344420112347186, "grad_norm": 8.717490196228027, "learning_rate": 4.1722622788375035e-07, "loss": 8.7792, "step": 3200 }, { "epoch": 0.0008865946369368885, "grad_norm": 8.891645431518555, "learning_rate": 4.4330286712648476e-07, "loss": 8.675, "step": 3400 }, { "epoch": 0.0009387472626390585, "grad_norm": 8.681458473205566, "learning_rate": 4.693795063692191e-07, "loss": 8.5239, "step": 3600 }, { "epoch": 0.0009908998883412284, "grad_norm": 9.971908569335938, "learning_rate": 4.954561456119536e-07, "loss": 8.4858, "step": 3800 }, { "epoch": 0.0010430525140433984, "grad_norm": 9.44089412689209, "learning_rate": 5.21532784854688e-07, "loss": 8.3296, "step": 4000 }, { "epoch": 0.0010952051397455681, "grad_norm": 8.453426361083984, "learning_rate": 5.476094240974224e-07, "loss": 8.2809, "step": 4200 }, { "epoch": 0.001147357765447738, "grad_norm": 8.532354354858398, "learning_rate": 5.736860633401567e-07, "loss": 8.1715, "step": 4400 }, { "epoch": 0.001199510391149908, "grad_norm": 8.517669677734375, "learning_rate": 5.997627025828912e-07, "loss": 8.0996, "step": 4600 }, { "epoch": 0.001251663016852078, "grad_norm": 9.150565147399902, "learning_rate": 6.258393418256257e-07, "loss": 8.0604, "step": 4800 }, { "epoch": 0.001303815642554248, "grad_norm": 8.797100067138672, "learning_rate": 6.519159810683599e-07, "loss": 7.9397, "step": 5000 }, { "epoch": 0.0013559682682564179, "grad_norm": 8.194568634033203, "learning_rate": 6.779926203110944e-07, "loss": 7.8885, "step": 5200 }, { "epoch": 0.0014081208939585876, "grad_norm": 8.242785453796387, "learning_rate": 7.040692595538287e-07, "loss": 7.8287, "step": 5400 }, { "epoch": 0.0014602735196607576, "grad_norm": 9.448251724243164, "learning_rate": 7.301458987965632e-07, "loss": 7.7276, "step": 5600 }, { "epoch": 0.0015124261453629275, "grad_norm": 7.932211875915527, "learning_rate": 7.562225380392975e-07, "loss": 7.6658, "step": 5800 }, { "epoch": 0.0015645787710650975, "grad_norm": 8.316240310668945, "learning_rate": 7.82299177282032e-07, "loss": 7.5645, "step": 6000 }, { "epoch": 0.0016167313967672674, "grad_norm": 8.247200965881348, "learning_rate": 8.083758165247665e-07, "loss": 7.508, "step": 6200 }, { "epoch": 0.0016688840224694372, "grad_norm": 8.689373970031738, "learning_rate": 8.344524557675007e-07, "loss": 7.4703, "step": 6400 }, { "epoch": 0.001721036648171607, "grad_norm": 7.99875545501709, "learning_rate": 8.605290950102352e-07, "loss": 7.3914, "step": 6600 }, { "epoch": 0.001773189273873777, "grad_norm": 8.472365379333496, "learning_rate": 8.866057342529695e-07, "loss": 7.3268, "step": 6800 }, { "epoch": 0.001825341899575947, "grad_norm": 8.410703659057617, "learning_rate": 9.12682373495704e-07, "loss": 7.2791, "step": 7000 }, { "epoch": 0.001877494525278117, "grad_norm": 9.348575592041016, "learning_rate": 9.387590127384382e-07, "loss": 7.2181, "step": 7200 }, { "epoch": 0.001929647150980287, "grad_norm": 8.044520378112793, "learning_rate": 9.648356519811728e-07, "loss": 7.1393, "step": 7400 }, { "epoch": 0.001981799776682457, "grad_norm": 8.222118377685547, "learning_rate": 9.909122912239071e-07, "loss": 7.1147, "step": 7600 }, { "epoch": 0.002033952402384627, "grad_norm": 8.315069198608398, "learning_rate": 1.0169889304666415e-06, "loss": 7.0527, "step": 7800 }, { "epoch": 0.0020861050280867968, "grad_norm": 8.228837013244629, "learning_rate": 1.043065569709376e-06, "loss": 6.9868, "step": 8000 }, { "epoch": 0.0021382576537889663, "grad_norm": 8.025296211242676, "learning_rate": 1.0691422089521102e-06, "loss": 6.9165, "step": 8200 }, { "epoch": 0.0021904102794911362, "grad_norm": 8.421102523803711, "learning_rate": 1.0952188481948448e-06, "loss": 6.8728, "step": 8400 }, { "epoch": 0.002242562905193306, "grad_norm": 8.444592475891113, "learning_rate": 1.1212954874375791e-06, "loss": 6.8894, "step": 8600 }, { "epoch": 0.002294715530895476, "grad_norm": 9.23551082611084, "learning_rate": 1.1473721266803135e-06, "loss": 6.8165, "step": 8800 }, { "epoch": 0.002346868156597646, "grad_norm": 7.454519748687744, "learning_rate": 1.173448765923048e-06, "loss": 6.7488, "step": 9000 }, { "epoch": 0.002399020782299816, "grad_norm": 8.644956588745117, "learning_rate": 1.1995254051657824e-06, "loss": 6.7656, "step": 9200 }, { "epoch": 0.002451173408001986, "grad_norm": 8.16921615600586, "learning_rate": 1.2256020444085168e-06, "loss": 6.6187, "step": 9400 }, { "epoch": 0.002503326033704156, "grad_norm": 8.748774528503418, "learning_rate": 1.2516786836512513e-06, "loss": 6.5993, "step": 9600 }, { "epoch": 0.002555478659406326, "grad_norm": 7.972047328948975, "learning_rate": 1.2777553228939855e-06, "loss": 6.6099, "step": 9800 }, { "epoch": 0.002607631285108496, "grad_norm": 7.958851337432861, "learning_rate": 1.3038319621367198e-06, "loss": 6.5708, "step": 10000 }, { "epoch": 0.002659783910810666, "grad_norm": 8.20793628692627, "learning_rate": 1.3299086013794544e-06, "loss": 6.5175, "step": 10200 }, { "epoch": 0.0027119365365128358, "grad_norm": 8.190882682800293, "learning_rate": 1.3559852406221887e-06, "loss": 6.4806, "step": 10400 }, { "epoch": 0.0027640891622150053, "grad_norm": 8.604300498962402, "learning_rate": 1.3820618798649233e-06, "loss": 6.4768, "step": 10600 }, { "epoch": 0.0028162417879171752, "grad_norm": 8.548392295837402, "learning_rate": 1.4081385191076574e-06, "loss": 6.4409, "step": 10800 }, { "epoch": 0.002868394413619345, "grad_norm": 7.730503559112549, "learning_rate": 1.4342151583503918e-06, "loss": 6.4013, "step": 11000 }, { "epoch": 0.002920547039321515, "grad_norm": 8.61767292022705, "learning_rate": 1.4602917975931264e-06, "loss": 6.3355, "step": 11200 }, { "epoch": 0.002972699665023685, "grad_norm": 7.736725330352783, "learning_rate": 1.4863684368358607e-06, "loss": 6.2992, "step": 11400 }, { "epoch": 0.003024852290725855, "grad_norm": 8.229381561279297, "learning_rate": 1.512445076078595e-06, "loss": 6.293, "step": 11600 }, { "epoch": 0.003077004916428025, "grad_norm": 8.532485008239746, "learning_rate": 1.5385217153213294e-06, "loss": 6.2469, "step": 11800 }, { "epoch": 0.003129157542130195, "grad_norm": 8.137749671936035, "learning_rate": 1.564598354564064e-06, "loss": 6.1848, "step": 12000 }, { "epoch": 0.003181310167832365, "grad_norm": 7.897096157073975, "learning_rate": 1.5906749938067983e-06, "loss": 6.1823, "step": 12200 }, { "epoch": 0.003233462793534535, "grad_norm": 7.656859874725342, "learning_rate": 1.616751633049533e-06, "loss": 6.1289, "step": 12400 }, { "epoch": 0.003285615419236705, "grad_norm": 8.03537654876709, "learning_rate": 1.642828272292267e-06, "loss": 6.1108, "step": 12600 }, { "epoch": 0.0033377680449388743, "grad_norm": 7.999448299407959, "learning_rate": 1.6689049115350014e-06, "loss": 6.0881, "step": 12800 }, { "epoch": 0.0033899206706410443, "grad_norm": 8.130730628967285, "learning_rate": 1.694981550777736e-06, "loss": 6.05, "step": 13000 }, { "epoch": 0.003442073296343214, "grad_norm": 8.437195777893066, "learning_rate": 1.7210581900204703e-06, "loss": 6.0568, "step": 13200 }, { "epoch": 0.003494225922045384, "grad_norm": 8.395051956176758, "learning_rate": 1.747134829263205e-06, "loss": 6.0069, "step": 13400 }, { "epoch": 0.003546378547747554, "grad_norm": 8.429478645324707, "learning_rate": 1.773211468505939e-06, "loss": 5.9598, "step": 13600 }, { "epoch": 0.003598531173449724, "grad_norm": 8.610182762145996, "learning_rate": 1.7992881077486734e-06, "loss": 5.9299, "step": 13800 }, { "epoch": 0.003650683799151894, "grad_norm": 7.439322471618652, "learning_rate": 1.825364746991408e-06, "loss": 5.913, "step": 14000 }, { "epoch": 0.003702836424854064, "grad_norm": 8.080249786376953, "learning_rate": 1.8514413862341423e-06, "loss": 5.9126, "step": 14200 }, { "epoch": 0.003754989050556234, "grad_norm": 7.7550201416015625, "learning_rate": 1.8775180254768765e-06, "loss": 5.8244, "step": 14400 }, { "epoch": 0.003807141676258404, "grad_norm": 8.251408576965332, "learning_rate": 1.903594664719611e-06, "loss": 5.8174, "step": 14600 }, { "epoch": 0.003859294301960574, "grad_norm": 7.648636817932129, "learning_rate": 1.9296713039623456e-06, "loss": 5.8122, "step": 14800 }, { "epoch": 0.003911446927662743, "grad_norm": 7.906362533569336, "learning_rate": 1.95574794320508e-06, "loss": 5.7618, "step": 15000 }, { "epoch": 0.003963599553364914, "grad_norm": 7.8534698486328125, "learning_rate": 1.9818245824478143e-06, "loss": 5.7498, "step": 15200 }, { "epoch": 0.004015752179067083, "grad_norm": 8.2284574508667, "learning_rate": 2.0079012216905486e-06, "loss": 5.7466, "step": 15400 }, { "epoch": 0.004067904804769254, "grad_norm": 8.237747192382812, "learning_rate": 2.033977860933283e-06, "loss": 5.7004, "step": 15600 }, { "epoch": 0.004120057430471423, "grad_norm": 7.435640335083008, "learning_rate": 2.0600545001760174e-06, "loss": 5.6729, "step": 15800 }, { "epoch": 0.0041722100561735935, "grad_norm": 7.641366958618164, "learning_rate": 2.086131139418752e-06, "loss": 5.6398, "step": 16000 }, { "epoch": 0.004224362681875763, "grad_norm": 7.673348426818848, "learning_rate": 2.1122077786614865e-06, "loss": 5.6584, "step": 16200 }, { "epoch": 0.004276515307577933, "grad_norm": 8.171424865722656, "learning_rate": 2.1382844179042204e-06, "loss": 5.5945, "step": 16400 }, { "epoch": 0.004328667933280103, "grad_norm": 7.752162933349609, "learning_rate": 2.164361057146955e-06, "loss": 5.6081, "step": 16600 }, { "epoch": 0.0043808205589822725, "grad_norm": 7.9565958976745605, "learning_rate": 2.1904376963896896e-06, "loss": 5.5563, "step": 16800 }, { "epoch": 0.004432973184684443, "grad_norm": 7.686338424682617, "learning_rate": 2.216514335632424e-06, "loss": 5.5785, "step": 17000 }, { "epoch": 0.004485125810386612, "grad_norm": 7.707006931304932, "learning_rate": 2.2425909748751583e-06, "loss": 5.57, "step": 17200 }, { "epoch": 0.004537278436088783, "grad_norm": 8.033032417297363, "learning_rate": 2.2686676141178926e-06, "loss": 5.5128, "step": 17400 }, { "epoch": 0.004589431061790952, "grad_norm": 8.142727851867676, "learning_rate": 2.294744253360627e-06, "loss": 5.4853, "step": 17600 }, { "epoch": 0.004641583687493123, "grad_norm": 7.563163757324219, "learning_rate": 2.3208208926033613e-06, "loss": 5.4514, "step": 17800 }, { "epoch": 0.004693736313195292, "grad_norm": 7.709859848022461, "learning_rate": 2.346897531846096e-06, "loss": 5.4332, "step": 18000 }, { "epoch": 0.004745888938897463, "grad_norm": 8.00818920135498, "learning_rate": 2.37297417108883e-06, "loss": 5.474, "step": 18200 }, { "epoch": 0.004798041564599632, "grad_norm": 8.24853229522705, "learning_rate": 2.399050810331565e-06, "loss": 5.4155, "step": 18400 }, { "epoch": 0.004850194190301802, "grad_norm": 8.416743278503418, "learning_rate": 2.425127449574299e-06, "loss": 5.3975, "step": 18600 }, { "epoch": 0.004902346816003972, "grad_norm": 8.024255752563477, "learning_rate": 2.4512040888170335e-06, "loss": 5.3896, "step": 18800 }, { "epoch": 0.0049544994417061415, "grad_norm": 7.6818132400512695, "learning_rate": 2.477280728059768e-06, "loss": 5.3643, "step": 19000 }, { "epoch": 0.005006652067408312, "grad_norm": 7.544449329376221, "learning_rate": 2.5033573673025026e-06, "loss": 5.3326, "step": 19200 }, { "epoch": 0.005058804693110481, "grad_norm": 7.725053787231445, "learning_rate": 2.5294340065452366e-06, "loss": 5.3108, "step": 19400 }, { "epoch": 0.005110957318812652, "grad_norm": 7.843037128448486, "learning_rate": 2.555510645787971e-06, "loss": 5.2766, "step": 19600 }, { "epoch": 0.005163109944514821, "grad_norm": 7.6276164054870605, "learning_rate": 2.5815872850307057e-06, "loss": 5.3071, "step": 19800 }, { "epoch": 0.005215262570216992, "grad_norm": 7.907887935638428, "learning_rate": 2.6076639242734396e-06, "loss": 5.2333, "step": 20000 }, { "epoch": 0.005267415195919161, "grad_norm": 7.383638858795166, "learning_rate": 2.6337405635161744e-06, "loss": 5.3013, "step": 20200 }, { "epoch": 0.005319567821621332, "grad_norm": 7.0909247398376465, "learning_rate": 2.6598172027589088e-06, "loss": 5.222, "step": 20400 }, { "epoch": 0.005371720447323501, "grad_norm": 8.317301750183105, "learning_rate": 2.6858938420016427e-06, "loss": 5.1935, "step": 20600 }, { "epoch": 0.0054238730730256715, "grad_norm": 7.909549713134766, "learning_rate": 2.7119704812443775e-06, "loss": 5.1732, "step": 20800 }, { "epoch": 0.005476025698727841, "grad_norm": 8.112817764282227, "learning_rate": 2.738047120487112e-06, "loss": 5.2078, "step": 21000 }, { "epoch": 0.0055281783244300105, "grad_norm": 7.593140125274658, "learning_rate": 2.7641237597298466e-06, "loss": 5.1478, "step": 21200 }, { "epoch": 0.005580330950132181, "grad_norm": 7.865172863006592, "learning_rate": 2.7902003989725805e-06, "loss": 5.1364, "step": 21400 }, { "epoch": 0.0056324835758343504, "grad_norm": 7.748229503631592, "learning_rate": 2.816277038215315e-06, "loss": 5.1232, "step": 21600 }, { "epoch": 0.005684636201536521, "grad_norm": 7.954893112182617, "learning_rate": 2.8423536774580497e-06, "loss": 5.138, "step": 21800 }, { "epoch": 0.00573678882723869, "grad_norm": 7.68300199508667, "learning_rate": 2.8684303167007836e-06, "loss": 5.1083, "step": 22000 }, { "epoch": 0.005788941452940861, "grad_norm": 9.848554611206055, "learning_rate": 2.8945069559435184e-06, "loss": 5.0713, "step": 22200 }, { "epoch": 0.00584109407864303, "grad_norm": 7.352326393127441, "learning_rate": 2.9205835951862527e-06, "loss": 5.0239, "step": 22400 }, { "epoch": 0.005893246704345201, "grad_norm": 8.010299682617188, "learning_rate": 2.9466602344289867e-06, "loss": 5.0562, "step": 22600 }, { "epoch": 0.00594539933004737, "grad_norm": 7.50053071975708, "learning_rate": 2.9727368736717214e-06, "loss": 4.9805, "step": 22800 }, { "epoch": 0.0059975519557495405, "grad_norm": 7.903580188751221, "learning_rate": 2.998813512914456e-06, "loss": 5.0251, "step": 23000 }, { "epoch": 0.00604970458145171, "grad_norm": 7.933001518249512, "learning_rate": 3.02489015215719e-06, "loss": 5.0268, "step": 23200 }, { "epoch": 0.00610185720715388, "grad_norm": 7.974423885345459, "learning_rate": 3.0509667913999245e-06, "loss": 5.0149, "step": 23400 }, { "epoch": 0.00615400983285605, "grad_norm": 7.946493148803711, "learning_rate": 3.077043430642659e-06, "loss": 4.9947, "step": 23600 }, { "epoch": 0.0062061624585582195, "grad_norm": 8.70067024230957, "learning_rate": 3.1031200698853936e-06, "loss": 4.9885, "step": 23800 }, { "epoch": 0.00625831508426039, "grad_norm": 7.401217937469482, "learning_rate": 3.129196709128128e-06, "loss": 4.9058, "step": 24000 }, { "epoch": 0.006310467709962559, "grad_norm": 8.371158599853516, "learning_rate": 3.155273348370862e-06, "loss": 4.9456, "step": 24200 }, { "epoch": 0.00636262033566473, "grad_norm": 7.775518894195557, "learning_rate": 3.1813499876135967e-06, "loss": 4.9482, "step": 24400 }, { "epoch": 0.006414772961366899, "grad_norm": 7.885232448577881, "learning_rate": 3.207426626856331e-06, "loss": 4.9251, "step": 24600 }, { "epoch": 0.00646692558706907, "grad_norm": 7.657386302947998, "learning_rate": 3.233503266099066e-06, "loss": 4.8979, "step": 24800 }, { "epoch": 0.006519078212771239, "grad_norm": 7.980145454406738, "learning_rate": 3.2595799053417998e-06, "loss": 4.8544, "step": 25000 }, { "epoch": 0.00657123083847341, "grad_norm": 7.923119068145752, "learning_rate": 3.285656544584534e-06, "loss": 4.8472, "step": 25200 }, { "epoch": 0.006623383464175579, "grad_norm": 7.500796318054199, "learning_rate": 3.311733183827269e-06, "loss": 4.8235, "step": 25400 }, { "epoch": 0.006675536089877749, "grad_norm": 7.593719005584717, "learning_rate": 3.337809823070003e-06, "loss": 4.8552, "step": 25600 }, { "epoch": 0.006727688715579919, "grad_norm": 7.597663879394531, "learning_rate": 3.3638864623127376e-06, "loss": 4.822, "step": 25800 }, { "epoch": 0.0067798413412820885, "grad_norm": 7.924131393432617, "learning_rate": 3.389963101555472e-06, "loss": 4.8013, "step": 26000 }, { "epoch": 0.006831993966984259, "grad_norm": 8.150907516479492, "learning_rate": 3.416039740798206e-06, "loss": 4.808, "step": 26200 }, { "epoch": 0.006884146592686428, "grad_norm": 8.553031921386719, "learning_rate": 3.4421163800409407e-06, "loss": 4.7903, "step": 26400 }, { "epoch": 0.006936299218388599, "grad_norm": 7.992140293121338, "learning_rate": 3.468193019283675e-06, "loss": 4.7669, "step": 26600 }, { "epoch": 0.006988451844090768, "grad_norm": 9.123887062072754, "learning_rate": 3.49426965852641e-06, "loss": 4.7772, "step": 26800 }, { "epoch": 0.007040604469792939, "grad_norm": 7.843687057495117, "learning_rate": 3.5203462977691437e-06, "loss": 4.7443, "step": 27000 }, { "epoch": 0.007092757095495108, "grad_norm": 7.194400787353516, "learning_rate": 3.546422937011878e-06, "loss": 4.6394, "step": 27200 }, { "epoch": 0.007144909721197279, "grad_norm": 8.256219863891602, "learning_rate": 3.572499576254613e-06, "loss": 4.7375, "step": 27400 }, { "epoch": 0.007197062346899448, "grad_norm": 7.898242473602295, "learning_rate": 3.5985762154973468e-06, "loss": 4.6597, "step": 27600 }, { "epoch": 0.0072492149726016185, "grad_norm": 8.30362606048584, "learning_rate": 3.6246528547400816e-06, "loss": 4.6724, "step": 27800 }, { "epoch": 0.007301367598303788, "grad_norm": 7.795161247253418, "learning_rate": 3.650729493982816e-06, "loss": 4.6485, "step": 28000 }, { "epoch": 0.0073535202240059576, "grad_norm": 7.712084770202637, "learning_rate": 3.67680613322555e-06, "loss": 4.6684, "step": 28200 }, { "epoch": 0.007405672849708128, "grad_norm": 7.2499165534973145, "learning_rate": 3.7028827724682846e-06, "loss": 4.6383, "step": 28400 }, { "epoch": 0.0074578254754102975, "grad_norm": 7.843800067901611, "learning_rate": 3.728959411711019e-06, "loss": 4.644, "step": 28600 }, { "epoch": 0.007509978101112468, "grad_norm": 8.171004295349121, "learning_rate": 3.755036050953753e-06, "loss": 4.5964, "step": 28800 }, { "epoch": 0.007562130726814637, "grad_norm": 8.430412292480469, "learning_rate": 3.7811126901964877e-06, "loss": 4.5651, "step": 29000 }, { "epoch": 0.007614283352516808, "grad_norm": 8.003270149230957, "learning_rate": 3.807189329439222e-06, "loss": 4.6326, "step": 29200 }, { "epoch": 0.007666435978218977, "grad_norm": 8.111075401306152, "learning_rate": 3.833265968681957e-06, "loss": 4.5397, "step": 29400 }, { "epoch": 0.007718588603921148, "grad_norm": 8.523173332214355, "learning_rate": 3.859342607924691e-06, "loss": 4.5506, "step": 29600 }, { "epoch": 0.007770741229623317, "grad_norm": 8.225560188293457, "learning_rate": 3.8854192471674255e-06, "loss": 4.5974, "step": 29800 }, { "epoch": 0.007822893855325487, "grad_norm": 8.14545726776123, "learning_rate": 3.91149588641016e-06, "loss": 4.5563, "step": 30000 }, { "epoch": 0.007875046481027657, "grad_norm": 8.375019073486328, "learning_rate": 3.937572525652894e-06, "loss": 4.513, "step": 30200 }, { "epoch": 0.007927199106729827, "grad_norm": 7.618561744689941, "learning_rate": 3.963649164895629e-06, "loss": 4.5606, "step": 30400 }, { "epoch": 0.007979351732431996, "grad_norm": 7.901875019073486, "learning_rate": 3.989725804138363e-06, "loss": 4.5278, "step": 30600 }, { "epoch": 0.008031504358134166, "grad_norm": 8.019709587097168, "learning_rate": 4.015802443381097e-06, "loss": 4.5119, "step": 30800 }, { "epoch": 0.008083656983836337, "grad_norm": 7.532272815704346, "learning_rate": 4.041879082623832e-06, "loss": 4.4828, "step": 31000 }, { "epoch": 0.008135809609538507, "grad_norm": 8.28953742980957, "learning_rate": 4.067955721866566e-06, "loss": 4.516, "step": 31200 }, { "epoch": 0.008187962235240676, "grad_norm": 8.234850883483887, "learning_rate": 4.094032361109301e-06, "loss": 4.5126, "step": 31400 }, { "epoch": 0.008240114860942846, "grad_norm": 7.353629112243652, "learning_rate": 4.120109000352035e-06, "loss": 4.4683, "step": 31600 }, { "epoch": 0.008292267486645017, "grad_norm": 7.64729118347168, "learning_rate": 4.146185639594769e-06, "loss": 4.4538, "step": 31800 }, { "epoch": 0.008344420112347187, "grad_norm": 7.47349214553833, "learning_rate": 4.172262278837504e-06, "loss": 4.4283, "step": 32000 }, { "epoch": 0.008396572738049356, "grad_norm": 8.011587142944336, "learning_rate": 4.198338918080238e-06, "loss": 4.414, "step": 32200 }, { "epoch": 0.008448725363751526, "grad_norm": 8.157088279724121, "learning_rate": 4.224415557322973e-06, "loss": 4.4103, "step": 32400 }, { "epoch": 0.008500877989453696, "grad_norm": 8.141148567199707, "learning_rate": 4.250492196565707e-06, "loss": 4.3756, "step": 32600 }, { "epoch": 0.008553030615155865, "grad_norm": 7.892102241516113, "learning_rate": 4.276568835808441e-06, "loss": 4.415, "step": 32800 }, { "epoch": 0.008605183240858036, "grad_norm": 8.264842987060547, "learning_rate": 4.302645475051176e-06, "loss": 4.4202, "step": 33000 }, { "epoch": 0.008657335866560206, "grad_norm": 7.995069980621338, "learning_rate": 4.32872211429391e-06, "loss": 4.407, "step": 33200 }, { "epoch": 0.008709488492262376, "grad_norm": 8.319681167602539, "learning_rate": 4.354798753536645e-06, "loss": 4.3725, "step": 33400 }, { "epoch": 0.008761641117964545, "grad_norm": 7.941286087036133, "learning_rate": 4.380875392779379e-06, "loss": 4.3491, "step": 33600 }, { "epoch": 0.008813793743666715, "grad_norm": 7.6486029624938965, "learning_rate": 4.4069520320221135e-06, "loss": 4.3385, "step": 33800 }, { "epoch": 0.008865946369368886, "grad_norm": 8.253968238830566, "learning_rate": 4.433028671264848e-06, "loss": 4.3506, "step": 34000 }, { "epoch": 0.008918098995071056, "grad_norm": 7.83550500869751, "learning_rate": 4.459105310507582e-06, "loss": 4.3523, "step": 34200 }, { "epoch": 0.008970251620773225, "grad_norm": 8.142230987548828, "learning_rate": 4.4851819497503165e-06, "loss": 4.3497, "step": 34400 }, { "epoch": 0.009022404246475395, "grad_norm": 8.258264541625977, "learning_rate": 4.511258588993051e-06, "loss": 4.334, "step": 34600 }, { "epoch": 0.009074556872177566, "grad_norm": 7.864555358886719, "learning_rate": 4.537335228235785e-06, "loss": 4.3043, "step": 34800 }, { "epoch": 0.009126709497879734, "grad_norm": 7.830788612365723, "learning_rate": 4.56341186747852e-06, "loss": 4.2891, "step": 35000 }, { "epoch": 0.009178862123581905, "grad_norm": 8.586557388305664, "learning_rate": 4.589488506721254e-06, "loss": 4.2071, "step": 35200 }, { "epoch": 0.009231014749284075, "grad_norm": 8.016130447387695, "learning_rate": 4.615565145963988e-06, "loss": 4.2689, "step": 35400 }, { "epoch": 0.009283167374986245, "grad_norm": 7.466185092926025, "learning_rate": 4.641641785206723e-06, "loss": 4.313, "step": 35600 }, { "epoch": 0.009335320000688414, "grad_norm": 7.97619104385376, "learning_rate": 4.667718424449457e-06, "loss": 4.2527, "step": 35800 }, { "epoch": 0.009387472626390584, "grad_norm": 8.14663028717041, "learning_rate": 4.693795063692192e-06, "loss": 4.2316, "step": 36000 }, { "epoch": 0.009439625252092755, "grad_norm": 7.500378608703613, "learning_rate": 4.7198717029349265e-06, "loss": 4.2477, "step": 36200 }, { "epoch": 0.009491777877794925, "grad_norm": 7.814052104949951, "learning_rate": 4.74594834217766e-06, "loss": 4.2685, "step": 36400 }, { "epoch": 0.009543930503497094, "grad_norm": 7.774244785308838, "learning_rate": 4.772024981420395e-06, "loss": 4.2282, "step": 36600 }, { "epoch": 0.009596083129199264, "grad_norm": 7.567872524261475, "learning_rate": 4.79810162066313e-06, "loss": 4.2201, "step": 36800 }, { "epoch": 0.009648235754901435, "grad_norm": 7.794075965881348, "learning_rate": 4.824178259905864e-06, "loss": 4.2564, "step": 37000 }, { "epoch": 0.009700388380603603, "grad_norm": 7.229556560516357, "learning_rate": 4.850254899148598e-06, "loss": 4.197, "step": 37200 }, { "epoch": 0.009752541006305774, "grad_norm": 7.552301406860352, "learning_rate": 4.876331538391333e-06, "loss": 4.1951, "step": 37400 }, { "epoch": 0.009804693632007944, "grad_norm": 7.268792152404785, "learning_rate": 4.902408177634067e-06, "loss": 4.1937, "step": 37600 }, { "epoch": 0.009856846257710114, "grad_norm": 8.427607536315918, "learning_rate": 4.928484816876801e-06, "loss": 4.1549, "step": 37800 }, { "epoch": 0.009908998883412283, "grad_norm": 7.394150733947754, "learning_rate": 4.954561456119536e-06, "loss": 4.1654, "step": 38000 }, { "epoch": 0.009961151509114453, "grad_norm": 8.243969917297363, "learning_rate": 4.98063809536227e-06, "loss": 4.1688, "step": 38200 }, { "epoch": 0.010013304134816624, "grad_norm": 7.82209587097168, "learning_rate": 5.006714734605005e-06, "loss": 4.164, "step": 38400 }, { "epoch": 0.010065456760518794, "grad_norm": 7.940906047821045, "learning_rate": 5.032791373847739e-06, "loss": 4.1694, "step": 38600 }, { "epoch": 0.010117609386220963, "grad_norm": 7.952986717224121, "learning_rate": 5.058868013090473e-06, "loss": 4.18, "step": 38800 }, { "epoch": 0.010169762011923133, "grad_norm": 8.000782012939453, "learning_rate": 5.084944652333208e-06, "loss": 4.1193, "step": 39000 }, { "epoch": 0.010221914637625304, "grad_norm": 7.911661148071289, "learning_rate": 5.111021291575942e-06, "loss": 4.0802, "step": 39200 }, { "epoch": 0.010274067263327474, "grad_norm": 7.344413757324219, "learning_rate": 5.137097930818676e-06, "loss": 4.0842, "step": 39400 }, { "epoch": 0.010326219889029643, "grad_norm": 8.170016288757324, "learning_rate": 5.163174570061411e-06, "loss": 4.111, "step": 39600 }, { "epoch": 0.010378372514731813, "grad_norm": 7.0969719886779785, "learning_rate": 5.189251209304146e-06, "loss": 4.1323, "step": 39800 }, { "epoch": 0.010430525140433983, "grad_norm": 7.286205291748047, "learning_rate": 5.215327848546879e-06, "loss": 4.0863, "step": 40000 }, { "epoch": 0.010482677766136152, "grad_norm": 7.330994129180908, "learning_rate": 5.2414044877896145e-06, "loss": 4.0936, "step": 40200 }, { "epoch": 0.010534830391838322, "grad_norm": 7.573282718658447, "learning_rate": 5.267481127032349e-06, "loss": 4.1067, "step": 40400 }, { "epoch": 0.010586983017540493, "grad_norm": 8.244158744812012, "learning_rate": 5.293557766275082e-06, "loss": 4.0502, "step": 40600 }, { "epoch": 0.010639135643242663, "grad_norm": 7.125401496887207, "learning_rate": 5.3196344055178175e-06, "loss": 4.0565, "step": 40800 }, { "epoch": 0.010691288268944832, "grad_norm": 8.068306922912598, "learning_rate": 5.345711044760552e-06, "loss": 4.0404, "step": 41000 }, { "epoch": 0.010743440894647002, "grad_norm": 7.503974437713623, "learning_rate": 5.371787684003285e-06, "loss": 4.063, "step": 41200 }, { "epoch": 0.010795593520349173, "grad_norm": 7.42249059677124, "learning_rate": 5.397864323246021e-06, "loss": 4.0053, "step": 41400 }, { "epoch": 0.010847746146051343, "grad_norm": 7.555575370788574, "learning_rate": 5.423940962488755e-06, "loss": 4.045, "step": 41600 }, { "epoch": 0.010899898771753512, "grad_norm": 7.423194885253906, "learning_rate": 5.4500176017314885e-06, "loss": 4.0292, "step": 41800 }, { "epoch": 0.010952051397455682, "grad_norm": 7.335684776306152, "learning_rate": 5.476094240974224e-06, "loss": 3.9726, "step": 42000 }, { "epoch": 0.011004204023157852, "grad_norm": 7.528399467468262, "learning_rate": 5.502170880216958e-06, "loss": 3.9907, "step": 42200 }, { "epoch": 0.011056356648860021, "grad_norm": 7.644163131713867, "learning_rate": 5.528247519459693e-06, "loss": 4.034, "step": 42400 }, { "epoch": 0.011108509274562191, "grad_norm": 8.300440788269043, "learning_rate": 5.554324158702427e-06, "loss": 4.0416, "step": 42600 }, { "epoch": 0.011160661900264362, "grad_norm": 7.345607280731201, "learning_rate": 5.580400797945161e-06, "loss": 3.9673, "step": 42800 }, { "epoch": 0.011212814525966532, "grad_norm": 7.909209251403809, "learning_rate": 5.606477437187896e-06, "loss": 3.9715, "step": 43000 }, { "epoch": 0.011264967151668701, "grad_norm": 7.389960289001465, "learning_rate": 5.63255407643063e-06, "loss": 3.9649, "step": 43200 }, { "epoch": 0.011317119777370871, "grad_norm": 7.785503387451172, "learning_rate": 5.658630715673364e-06, "loss": 3.9642, "step": 43400 }, { "epoch": 0.011369272403073042, "grad_norm": 7.3766984939575195, "learning_rate": 5.684707354916099e-06, "loss": 4.0052, "step": 43600 }, { "epoch": 0.011421425028775212, "grad_norm": 7.540368556976318, "learning_rate": 5.710783994158833e-06, "loss": 4.0034, "step": 43800 }, { "epoch": 0.01147357765447738, "grad_norm": 8.014230728149414, "learning_rate": 5.736860633401567e-06, "loss": 3.9643, "step": 44000 }, { "epoch": 0.011525730280179551, "grad_norm": 7.941577434539795, "learning_rate": 5.762937272644302e-06, "loss": 3.9858, "step": 44200 }, { "epoch": 0.011577882905881721, "grad_norm": 7.780430793762207, "learning_rate": 5.789013911887037e-06, "loss": 3.993, "step": 44400 }, { "epoch": 0.01163003553158389, "grad_norm": 8.022960662841797, "learning_rate": 5.81509055112977e-06, "loss": 3.9126, "step": 44600 }, { "epoch": 0.01168218815728606, "grad_norm": 8.14714527130127, "learning_rate": 5.8411671903725055e-06, "loss": 3.9218, "step": 44800 }, { "epoch": 0.011734340782988231, "grad_norm": 7.369551658630371, "learning_rate": 5.86724382961524e-06, "loss": 3.959, "step": 45000 }, { "epoch": 0.011786493408690401, "grad_norm": 8.105989456176758, "learning_rate": 5.893320468857973e-06, "loss": 3.8897, "step": 45200 }, { "epoch": 0.01183864603439257, "grad_norm": 8.191625595092773, "learning_rate": 5.9193971081007085e-06, "loss": 3.9187, "step": 45400 }, { "epoch": 0.01189079866009474, "grad_norm": 7.703822135925293, "learning_rate": 5.945473747343443e-06, "loss": 3.8947, "step": 45600 }, { "epoch": 0.01194295128579691, "grad_norm": 7.697946548461914, "learning_rate": 5.971550386586176e-06, "loss": 3.8795, "step": 45800 }, { "epoch": 0.011995103911499081, "grad_norm": 7.603724002838135, "learning_rate": 5.997627025828912e-06, "loss": 3.911, "step": 46000 }, { "epoch": 0.01204725653720125, "grad_norm": 8.207696914672852, "learning_rate": 6.023703665071646e-06, "loss": 3.896, "step": 46200 }, { "epoch": 0.01209940916290342, "grad_norm": 8.124335289001465, "learning_rate": 6.04978030431438e-06, "loss": 3.8306, "step": 46400 }, { "epoch": 0.01215156178860559, "grad_norm": 7.28653621673584, "learning_rate": 6.075856943557115e-06, "loss": 3.8624, "step": 46600 }, { "epoch": 0.01220371441430776, "grad_norm": 7.189661026000977, "learning_rate": 6.101933582799849e-06, "loss": 3.8445, "step": 46800 }, { "epoch": 0.01225586704000993, "grad_norm": 8.082289695739746, "learning_rate": 6.128010222042584e-06, "loss": 3.8956, "step": 47000 }, { "epoch": 0.0123080196657121, "grad_norm": 7.882737636566162, "learning_rate": 6.154086861285318e-06, "loss": 3.8779, "step": 47200 }, { "epoch": 0.01236017229141427, "grad_norm": 7.791324615478516, "learning_rate": 6.180163500528052e-06, "loss": 3.8339, "step": 47400 }, { "epoch": 0.012412324917116439, "grad_norm": 7.604274272918701, "learning_rate": 6.206240139770787e-06, "loss": 3.8552, "step": 47600 }, { "epoch": 0.01246447754281861, "grad_norm": 7.377195358276367, "learning_rate": 6.232316779013521e-06, "loss": 3.8075, "step": 47800 }, { "epoch": 0.01251663016852078, "grad_norm": 7.8386640548706055, "learning_rate": 6.258393418256256e-06, "loss": 3.7798, "step": 48000 }, { "epoch": 0.01256878279422295, "grad_norm": 7.123723983764648, "learning_rate": 6.28447005749899e-06, "loss": 3.8206, "step": 48200 }, { "epoch": 0.012620935419925119, "grad_norm": 8.309353828430176, "learning_rate": 6.310546696741724e-06, "loss": 3.8562, "step": 48400 }, { "epoch": 0.01267308804562729, "grad_norm": 7.630406379699707, "learning_rate": 6.336623335984459e-06, "loss": 3.829, "step": 48600 }, { "epoch": 0.01272524067132946, "grad_norm": 8.362411499023438, "learning_rate": 6.362699975227193e-06, "loss": 3.8011, "step": 48800 }, { "epoch": 0.012777393297031628, "grad_norm": 7.613163471221924, "learning_rate": 6.388776614469928e-06, "loss": 3.8119, "step": 49000 }, { "epoch": 0.012829545922733799, "grad_norm": 8.339622497558594, "learning_rate": 6.414853253712662e-06, "loss": 3.8347, "step": 49200 }, { "epoch": 0.012881698548435969, "grad_norm": 7.240667343139648, "learning_rate": 6.4409298929553965e-06, "loss": 3.7863, "step": 49400 }, { "epoch": 0.01293385117413814, "grad_norm": 7.500837326049805, "learning_rate": 6.467006532198132e-06, "loss": 3.78, "step": 49600 }, { "epoch": 0.012986003799840308, "grad_norm": 7.476478099822998, "learning_rate": 6.493083171440865e-06, "loss": 3.8424, "step": 49800 }, { "epoch": 0.013038156425542478, "grad_norm": 7.900282382965088, "learning_rate": 6.5191598106835995e-06, "loss": 3.757, "step": 50000 }, { "epoch": 0.013090309051244649, "grad_norm": 7.225290298461914, "learning_rate": 6.545236449926335e-06, "loss": 3.751, "step": 50200 }, { "epoch": 0.01314246167694682, "grad_norm": 7.843364715576172, "learning_rate": 6.571313089169068e-06, "loss": 3.8209, "step": 50400 }, { "epoch": 0.013194614302648988, "grad_norm": 7.373351573944092, "learning_rate": 6.597389728411803e-06, "loss": 3.8055, "step": 50600 }, { "epoch": 0.013246766928351158, "grad_norm": 8.107486724853516, "learning_rate": 6.623466367654538e-06, "loss": 3.7116, "step": 50800 }, { "epoch": 0.013298919554053329, "grad_norm": 7.618676662445068, "learning_rate": 6.649543006897272e-06, "loss": 3.7805, "step": 51000 }, { "epoch": 0.013351072179755497, "grad_norm": 7.945600509643555, "learning_rate": 6.675619646140006e-06, "loss": 3.7396, "step": 51200 }, { "epoch": 0.013403224805457668, "grad_norm": 7.414207935333252, "learning_rate": 6.701696285382741e-06, "loss": 3.7788, "step": 51400 }, { "epoch": 0.013455377431159838, "grad_norm": 8.000473022460938, "learning_rate": 6.727772924625475e-06, "loss": 3.7445, "step": 51600 }, { "epoch": 0.013507530056862008, "grad_norm": 7.633871555328369, "learning_rate": 6.753849563868209e-06, "loss": 3.7474, "step": 51800 }, { "epoch": 0.013559682682564177, "grad_norm": 7.910715103149414, "learning_rate": 6.779926203110944e-06, "loss": 3.7563, "step": 52000 }, { "epoch": 0.013611835308266347, "grad_norm": 8.315561294555664, "learning_rate": 6.806002842353678e-06, "loss": 3.7435, "step": 52200 }, { "epoch": 0.013663987933968518, "grad_norm": 7.666395664215088, "learning_rate": 6.832079481596412e-06, "loss": 3.7198, "step": 52400 }, { "epoch": 0.013716140559670688, "grad_norm": 7.776802062988281, "learning_rate": 6.858156120839147e-06, "loss": 3.7318, "step": 52600 }, { "epoch": 0.013768293185372857, "grad_norm": 7.258547782897949, "learning_rate": 6.884232760081881e-06, "loss": 3.7325, "step": 52800 }, { "epoch": 0.013820445811075027, "grad_norm": 7.867082595825195, "learning_rate": 6.910309399324615e-06, "loss": 3.7163, "step": 53000 }, { "epoch": 0.013872598436777198, "grad_norm": 7.452786922454834, "learning_rate": 6.93638603856735e-06, "loss": 3.7157, "step": 53200 }, { "epoch": 0.013924751062479366, "grad_norm": 7.4677042961120605, "learning_rate": 6.962462677810084e-06, "loss": 3.7251, "step": 53400 }, { "epoch": 0.013976903688181537, "grad_norm": 7.686807155609131, "learning_rate": 6.98853931705282e-06, "loss": 3.7158, "step": 53600 }, { "epoch": 0.014029056313883707, "grad_norm": 7.389094352722168, "learning_rate": 7.014615956295553e-06, "loss": 3.7139, "step": 53800 }, { "epoch": 0.014081208939585877, "grad_norm": 7.3070526123046875, "learning_rate": 7.0406925955382874e-06, "loss": 3.6748, "step": 54000 }, { "epoch": 0.014133361565288046, "grad_norm": 7.088414192199707, "learning_rate": 7.066769234781023e-06, "loss": 3.6817, "step": 54200 }, { "epoch": 0.014185514190990216, "grad_norm": 7.4261088371276855, "learning_rate": 7.092845874023756e-06, "loss": 3.7422, "step": 54400 }, { "epoch": 0.014237666816692387, "grad_norm": 7.321529388427734, "learning_rate": 7.1189225132664905e-06, "loss": 3.7282, "step": 54600 }, { "epoch": 0.014289819442394557, "grad_norm": 7.516880035400391, "learning_rate": 7.144999152509226e-06, "loss": 3.6875, "step": 54800 }, { "epoch": 0.014341972068096726, "grad_norm": 6.853364944458008, "learning_rate": 7.171075791751959e-06, "loss": 3.6896, "step": 55000 }, { "epoch": 0.014394124693798896, "grad_norm": 7.774241924285889, "learning_rate": 7.1971524309946936e-06, "loss": 3.6278, "step": 55200 }, { "epoch": 0.014446277319501067, "grad_norm": 8.340727806091309, "learning_rate": 7.223229070237429e-06, "loss": 3.6977, "step": 55400 }, { "epoch": 0.014498429945203237, "grad_norm": 7.890402793884277, "learning_rate": 7.249305709480163e-06, "loss": 3.6974, "step": 55600 }, { "epoch": 0.014550582570905406, "grad_norm": 7.747776985168457, "learning_rate": 7.275382348722897e-06, "loss": 3.6873, "step": 55800 }, { "epoch": 0.014602735196607576, "grad_norm": 6.7120184898376465, "learning_rate": 7.301458987965632e-06, "loss": 3.6961, "step": 56000 }, { "epoch": 0.014654887822309746, "grad_norm": 7.8532891273498535, "learning_rate": 7.327535627208366e-06, "loss": 3.6477, "step": 56200 }, { "epoch": 0.014707040448011915, "grad_norm": 8.437646865844727, "learning_rate": 7.3536122664511e-06, "loss": 3.6398, "step": 56400 }, { "epoch": 0.014759193073714085, "grad_norm": 7.008194446563721, "learning_rate": 7.379688905693835e-06, "loss": 3.6403, "step": 56600 }, { "epoch": 0.014811345699416256, "grad_norm": 7.821798801422119, "learning_rate": 7.405765544936569e-06, "loss": 3.6426, "step": 56800 }, { "epoch": 0.014863498325118426, "grad_norm": 6.9409356117248535, "learning_rate": 7.431842184179303e-06, "loss": 3.6323, "step": 57000 }, { "epoch": 0.014915650950820595, "grad_norm": 7.209104537963867, "learning_rate": 7.457918823422038e-06, "loss": 3.6272, "step": 57200 }, { "epoch": 0.014967803576522765, "grad_norm": 7.187128067016602, "learning_rate": 7.483995462664772e-06, "loss": 3.6558, "step": 57400 }, { "epoch": 0.015019956202224936, "grad_norm": 7.077954292297363, "learning_rate": 7.510072101907506e-06, "loss": 3.6594, "step": 57600 }, { "epoch": 0.015072108827927106, "grad_norm": 7.292501926422119, "learning_rate": 7.536148741150241e-06, "loss": 3.6413, "step": 57800 }, { "epoch": 0.015124261453629275, "grad_norm": 7.346433162689209, "learning_rate": 7.562225380392975e-06, "loss": 3.6647, "step": 58000 }, { "epoch": 0.015176414079331445, "grad_norm": 7.485527992248535, "learning_rate": 7.5883020196357106e-06, "loss": 3.6067, "step": 58200 }, { "epoch": 0.015228566705033615, "grad_norm": 7.4600629806518555, "learning_rate": 7.614378658878444e-06, "loss": 3.6154, "step": 58400 }, { "epoch": 0.015280719330735784, "grad_norm": 7.584334373474121, "learning_rate": 7.640455298121178e-06, "loss": 3.627, "step": 58600 }, { "epoch": 0.015332871956437955, "grad_norm": 6.661746978759766, "learning_rate": 7.666531937363914e-06, "loss": 3.6439, "step": 58800 }, { "epoch": 0.015385024582140125, "grad_norm": 7.055318355560303, "learning_rate": 7.692608576606647e-06, "loss": 3.6274, "step": 59000 }, { "epoch": 0.015437177207842295, "grad_norm": 7.231868267059326, "learning_rate": 7.718685215849382e-06, "loss": 3.6557, "step": 59200 }, { "epoch": 0.015489329833544464, "grad_norm": 7.788670063018799, "learning_rate": 7.744761855092116e-06, "loss": 3.6534, "step": 59400 }, { "epoch": 0.015541482459246634, "grad_norm": 7.5201263427734375, "learning_rate": 7.770838494334851e-06, "loss": 3.6055, "step": 59600 }, { "epoch": 0.015593635084948805, "grad_norm": 6.89668083190918, "learning_rate": 7.796915133577585e-06, "loss": 3.5638, "step": 59800 }, { "epoch": 0.015645787710650973, "grad_norm": 7.163186550140381, "learning_rate": 7.82299177282032e-06, "loss": 3.6157, "step": 60000 }, { "epoch": 0.015697940336353144, "grad_norm": 6.937023639678955, "learning_rate": 7.849068412063055e-06, "loss": 3.58, "step": 60200 }, { "epoch": 0.015750092962055314, "grad_norm": 7.33142614364624, "learning_rate": 7.875145051305788e-06, "loss": 3.5582, "step": 60400 }, { "epoch": 0.015802245587757485, "grad_norm": 7.158875465393066, "learning_rate": 7.901221690548524e-06, "loss": 3.5748, "step": 60600 }, { "epoch": 0.015854398213459655, "grad_norm": 7.254717826843262, "learning_rate": 7.927298329791257e-06, "loss": 3.5821, "step": 60800 }, { "epoch": 0.015906550839161825, "grad_norm": 7.033998966217041, "learning_rate": 7.95337496903399e-06, "loss": 3.5704, "step": 61000 }, { "epoch": 0.015958703464863992, "grad_norm": 8.059544563293457, "learning_rate": 7.979451608276726e-06, "loss": 3.5057, "step": 61200 }, { "epoch": 0.016010856090566163, "grad_norm": 6.929595470428467, "learning_rate": 8.005528247519461e-06, "loss": 3.5442, "step": 61400 }, { "epoch": 0.016063008716268333, "grad_norm": 7.488486289978027, "learning_rate": 8.031604886762195e-06, "loss": 3.5421, "step": 61600 }, { "epoch": 0.016115161341970503, "grad_norm": 7.025427341461182, "learning_rate": 8.05768152600493e-06, "loss": 3.5727, "step": 61800 }, { "epoch": 0.016167313967672674, "grad_norm": 7.745316982269287, "learning_rate": 8.083758165247663e-06, "loss": 3.5274, "step": 62000 }, { "epoch": 0.016219466593374844, "grad_norm": 8.315921783447266, "learning_rate": 8.109834804490397e-06, "loss": 3.5662, "step": 62200 }, { "epoch": 0.016271619219077015, "grad_norm": 7.587878704071045, "learning_rate": 8.135911443733132e-06, "loss": 3.5412, "step": 62400 }, { "epoch": 0.01632377184477918, "grad_norm": 7.723850250244141, "learning_rate": 8.161988082975867e-06, "loss": 3.5607, "step": 62600 }, { "epoch": 0.016375924470481352, "grad_norm": 7.842193603515625, "learning_rate": 8.188064722218602e-06, "loss": 3.5359, "step": 62800 }, { "epoch": 0.016428077096183522, "grad_norm": 7.071596145629883, "learning_rate": 8.214141361461336e-06, "loss": 3.5514, "step": 63000 }, { "epoch": 0.016480229721885693, "grad_norm": 7.444825649261475, "learning_rate": 8.24021800070407e-06, "loss": 3.5449, "step": 63200 }, { "epoch": 0.016532382347587863, "grad_norm": 7.005112648010254, "learning_rate": 8.266294639946805e-06, "loss": 3.5011, "step": 63400 }, { "epoch": 0.016584534973290033, "grad_norm": 7.4845290184021, "learning_rate": 8.292371279189538e-06, "loss": 3.5421, "step": 63600 }, { "epoch": 0.016636687598992204, "grad_norm": 7.57586669921875, "learning_rate": 8.318447918432273e-06, "loss": 3.4857, "step": 63800 }, { "epoch": 0.016688840224694374, "grad_norm": 6.911862850189209, "learning_rate": 8.344524557675009e-06, "loss": 3.5319, "step": 64000 }, { "epoch": 0.01674099285039654, "grad_norm": 6.8444061279296875, "learning_rate": 8.370601196917742e-06, "loss": 3.5423, "step": 64200 }, { "epoch": 0.01679314547609871, "grad_norm": 6.937068462371826, "learning_rate": 8.396677836160476e-06, "loss": 3.5273, "step": 64400 }, { "epoch": 0.016845298101800882, "grad_norm": 7.857777118682861, "learning_rate": 8.42275447540321e-06, "loss": 3.539, "step": 64600 }, { "epoch": 0.016897450727503052, "grad_norm": 6.760194778442383, "learning_rate": 8.448831114645946e-06, "loss": 3.545, "step": 64800 }, { "epoch": 0.016949603353205223, "grad_norm": 7.323217868804932, "learning_rate": 8.47490775388868e-06, "loss": 3.554, "step": 65000 }, { "epoch": 0.017001755978907393, "grad_norm": 7.225944519042969, "learning_rate": 8.500984393131415e-06, "loss": 3.5109, "step": 65200 }, { "epoch": 0.017053908604609563, "grad_norm": 7.282871246337891, "learning_rate": 8.527061032374148e-06, "loss": 3.5113, "step": 65400 }, { "epoch": 0.01710606123031173, "grad_norm": 6.699639320373535, "learning_rate": 8.553137671616882e-06, "loss": 3.555, "step": 65600 }, { "epoch": 0.0171582138560139, "grad_norm": 7.353262901306152, "learning_rate": 8.579214310859617e-06, "loss": 3.4825, "step": 65800 }, { "epoch": 0.01721036648171607, "grad_norm": 7.2611284255981445, "learning_rate": 8.605290950102352e-06, "loss": 3.5099, "step": 66000 }, { "epoch": 0.01726251910741824, "grad_norm": 7.1428680419921875, "learning_rate": 8.631367589345086e-06, "loss": 3.4868, "step": 66200 }, { "epoch": 0.017314671733120412, "grad_norm": 7.161787986755371, "learning_rate": 8.65744422858782e-06, "loss": 3.4979, "step": 66400 }, { "epoch": 0.017366824358822582, "grad_norm": 7.3627142906188965, "learning_rate": 8.683520867830554e-06, "loss": 3.4826, "step": 66600 }, { "epoch": 0.017418976984524753, "grad_norm": 7.277604103088379, "learning_rate": 8.70959750707329e-06, "loss": 3.4848, "step": 66800 }, { "epoch": 0.017471129610226923, "grad_norm": 6.910572052001953, "learning_rate": 8.735674146316023e-06, "loss": 3.4787, "step": 67000 }, { "epoch": 0.01752328223592909, "grad_norm": 7.803658962249756, "learning_rate": 8.761750785558758e-06, "loss": 3.4327, "step": 67200 }, { "epoch": 0.01757543486163126, "grad_norm": 7.014376640319824, "learning_rate": 8.787827424801493e-06, "loss": 3.4453, "step": 67400 }, { "epoch": 0.01762758748733343, "grad_norm": 6.735202312469482, "learning_rate": 8.813904064044227e-06, "loss": 3.4588, "step": 67600 }, { "epoch": 0.0176797401130356, "grad_norm": 7.092906951904297, "learning_rate": 8.83998070328696e-06, "loss": 3.4441, "step": 67800 }, { "epoch": 0.01773189273873777, "grad_norm": 7.564960956573486, "learning_rate": 8.866057342529696e-06, "loss": 3.4894, "step": 68000 }, { "epoch": 0.017784045364439942, "grad_norm": 7.072451591491699, "learning_rate": 8.892133981772429e-06, "loss": 3.4665, "step": 68200 }, { "epoch": 0.017836197990142112, "grad_norm": 7.669380187988281, "learning_rate": 8.918210621015164e-06, "loss": 3.4343, "step": 68400 }, { "epoch": 0.01788835061584428, "grad_norm": 6.885750770568848, "learning_rate": 8.9442872602579e-06, "loss": 3.4742, "step": 68600 }, { "epoch": 0.01794050324154645, "grad_norm": 6.932456016540527, "learning_rate": 8.970363899500633e-06, "loss": 3.5029, "step": 68800 }, { "epoch": 0.01799265586724862, "grad_norm": 7.914639472961426, "learning_rate": 8.996440538743367e-06, "loss": 3.4828, "step": 69000 }, { "epoch": 0.01804480849295079, "grad_norm": 7.070594310760498, "learning_rate": 9.022517177986102e-06, "loss": 3.474, "step": 69200 }, { "epoch": 0.01809696111865296, "grad_norm": 7.355897426605225, "learning_rate": 9.048593817228837e-06, "loss": 3.4265, "step": 69400 }, { "epoch": 0.01814911374435513, "grad_norm": 7.521884441375732, "learning_rate": 9.07467045647157e-06, "loss": 3.4737, "step": 69600 }, { "epoch": 0.0182012663700573, "grad_norm": 7.33009672164917, "learning_rate": 9.100747095714306e-06, "loss": 3.4509, "step": 69800 }, { "epoch": 0.01825341899575947, "grad_norm": 7.302325248718262, "learning_rate": 9.12682373495704e-06, "loss": 3.4412, "step": 70000 }, { "epoch": 0.01830557162146164, "grad_norm": 7.756179332733154, "learning_rate": 9.152900374199773e-06, "loss": 3.4473, "step": 70200 }, { "epoch": 0.01835772424716381, "grad_norm": 7.5515336990356445, "learning_rate": 9.178977013442508e-06, "loss": 3.4144, "step": 70400 }, { "epoch": 0.01840987687286598, "grad_norm": 7.913027286529541, "learning_rate": 9.205053652685243e-06, "loss": 3.4406, "step": 70600 }, { "epoch": 0.01846202949856815, "grad_norm": 7.048377990722656, "learning_rate": 9.231130291927977e-06, "loss": 3.4356, "step": 70800 }, { "epoch": 0.01851418212427032, "grad_norm": 7.313281536102295, "learning_rate": 9.257206931170712e-06, "loss": 3.4147, "step": 71000 }, { "epoch": 0.01856633474997249, "grad_norm": 7.301833629608154, "learning_rate": 9.283283570413445e-06, "loss": 3.4198, "step": 71200 }, { "epoch": 0.01861848737567466, "grad_norm": 6.697605133056641, "learning_rate": 9.30936020965618e-06, "loss": 3.4352, "step": 71400 }, { "epoch": 0.018670640001376828, "grad_norm": 6.958870887756348, "learning_rate": 9.335436848898914e-06, "loss": 3.4299, "step": 71600 }, { "epoch": 0.018722792627079, "grad_norm": 7.359804153442383, "learning_rate": 9.36151348814165e-06, "loss": 3.4789, "step": 71800 }, { "epoch": 0.01877494525278117, "grad_norm": 6.766062259674072, "learning_rate": 9.387590127384384e-06, "loss": 3.4272, "step": 72000 }, { "epoch": 0.01882709787848334, "grad_norm": 7.107087135314941, "learning_rate": 9.413666766627118e-06, "loss": 3.3802, "step": 72200 }, { "epoch": 0.01887925050418551, "grad_norm": 7.090783596038818, "learning_rate": 9.439743405869853e-06, "loss": 3.4318, "step": 72400 }, { "epoch": 0.01893140312988768, "grad_norm": 6.893887519836426, "learning_rate": 9.465820045112587e-06, "loss": 3.4297, "step": 72600 }, { "epoch": 0.01898355575558985, "grad_norm": 6.953131198883057, "learning_rate": 9.49189668435532e-06, "loss": 3.4238, "step": 72800 }, { "epoch": 0.019035708381292017, "grad_norm": 6.934586524963379, "learning_rate": 9.517973323598055e-06, "loss": 3.4275, "step": 73000 }, { "epoch": 0.019087861006994188, "grad_norm": 7.763511657714844, "learning_rate": 9.54404996284079e-06, "loss": 3.3778, "step": 73200 }, { "epoch": 0.019140013632696358, "grad_norm": 6.7908806800842285, "learning_rate": 9.570126602083524e-06, "loss": 3.4133, "step": 73400 }, { "epoch": 0.01919216625839853, "grad_norm": 6.870711326599121, "learning_rate": 9.59620324132626e-06, "loss": 3.4281, "step": 73600 }, { "epoch": 0.0192443188841007, "grad_norm": 7.813330173492432, "learning_rate": 9.622279880568993e-06, "loss": 3.424, "step": 73800 }, { "epoch": 0.01929647150980287, "grad_norm": 7.28371524810791, "learning_rate": 9.648356519811728e-06, "loss": 3.4238, "step": 74000 }, { "epoch": 0.01934862413550504, "grad_norm": 7.129782676696777, "learning_rate": 9.674433159054461e-06, "loss": 3.4407, "step": 74200 }, { "epoch": 0.019400776761207206, "grad_norm": 7.057884216308594, "learning_rate": 9.700509798297197e-06, "loss": 3.3992, "step": 74400 }, { "epoch": 0.019452929386909377, "grad_norm": 7.445634841918945, "learning_rate": 9.726586437539932e-06, "loss": 3.4258, "step": 74600 }, { "epoch": 0.019505082012611547, "grad_norm": 7.498905658721924, "learning_rate": 9.752663076782665e-06, "loss": 3.4097, "step": 74800 }, { "epoch": 0.019557234638313718, "grad_norm": 8.201305389404297, "learning_rate": 9.778739716025399e-06, "loss": 3.3639, "step": 75000 }, { "epoch": 0.019609387264015888, "grad_norm": 7.614349365234375, "learning_rate": 9.804816355268134e-06, "loss": 3.3741, "step": 75200 }, { "epoch": 0.01966153988971806, "grad_norm": 6.917168140411377, "learning_rate": 9.830892994510868e-06, "loss": 3.4018, "step": 75400 }, { "epoch": 0.01971369251542023, "grad_norm": 6.967098236083984, "learning_rate": 9.856969633753603e-06, "loss": 3.3677, "step": 75600 }, { "epoch": 0.0197658451411224, "grad_norm": 7.0407795906066895, "learning_rate": 9.883046272996338e-06, "loss": 3.4018, "step": 75800 }, { "epoch": 0.019817997766824566, "grad_norm": 6.73028564453125, "learning_rate": 9.909122912239071e-06, "loss": 3.3787, "step": 76000 }, { "epoch": 0.019870150392526736, "grad_norm": 6.180406093597412, "learning_rate": 9.935199551481805e-06, "loss": 3.3538, "step": 76200 }, { "epoch": 0.019922303018228907, "grad_norm": 7.133777618408203, "learning_rate": 9.96127619072454e-06, "loss": 3.3845, "step": 76400 }, { "epoch": 0.019974455643931077, "grad_norm": 6.977362632751465, "learning_rate": 9.987352829967275e-06, "loss": 3.3386, "step": 76600 }, { "epoch": 0.020026608269633248, "grad_norm": 7.869418621063232, "learning_rate": 9.999999981466639e-06, "loss": 3.355, "step": 76800 }, { "epoch": 0.020078760895335418, "grad_norm": 6.740837574005127, "learning_rate": 9.999999839614538e-06, "loss": 3.4371, "step": 77000 }, { "epoch": 0.02013091352103759, "grad_norm": 7.533843994140625, "learning_rate": 9.999999558006677e-06, "loss": 3.3576, "step": 77200 }, { "epoch": 0.020183066146739755, "grad_norm": 6.733776092529297, "learning_rate": 9.999999136643062e-06, "loss": 3.3787, "step": 77400 }, { "epoch": 0.020235218772441926, "grad_norm": 7.021015644073486, "learning_rate": 9.999998575523706e-06, "loss": 3.3798, "step": 77600 }, { "epoch": 0.020287371398144096, "grad_norm": 7.0405755043029785, "learning_rate": 9.999997874648624e-06, "loss": 3.3451, "step": 77800 }, { "epoch": 0.020339524023846266, "grad_norm": 7.242607116699219, "learning_rate": 9.999997034017837e-06, "loss": 3.3643, "step": 78000 }, { "epoch": 0.020391676649548437, "grad_norm": 7.021972179412842, "learning_rate": 9.999996053631368e-06, "loss": 3.3861, "step": 78200 }, { "epoch": 0.020443829275250607, "grad_norm": 7.320739269256592, "learning_rate": 9.999994933489244e-06, "loss": 3.3503, "step": 78400 }, { "epoch": 0.020495981900952778, "grad_norm": 7.283355712890625, "learning_rate": 9.999993673591494e-06, "loss": 3.393, "step": 78600 }, { "epoch": 0.020548134526654948, "grad_norm": 7.413722515106201, "learning_rate": 9.999992273938159e-06, "loss": 3.3393, "step": 78800 }, { "epoch": 0.020600287152357115, "grad_norm": 7.285412788391113, "learning_rate": 9.999990734529274e-06, "loss": 3.3531, "step": 79000 }, { "epoch": 0.020652439778059285, "grad_norm": 6.342610836029053, "learning_rate": 9.999989055364881e-06, "loss": 3.3199, "step": 79200 }, { "epoch": 0.020704592403761456, "grad_norm": 7.5218892097473145, "learning_rate": 9.999987236445031e-06, "loss": 3.3587, "step": 79400 }, { "epoch": 0.020756745029463626, "grad_norm": 6.564230918884277, "learning_rate": 9.99998527776977e-06, "loss": 3.3877, "step": 79600 }, { "epoch": 0.020808897655165796, "grad_norm": 6.8725786209106445, "learning_rate": 9.999983179339158e-06, "loss": 3.3915, "step": 79800 }, { "epoch": 0.020861050280867967, "grad_norm": 7.533461093902588, "learning_rate": 9.999980941153249e-06, "loss": 3.3572, "step": 80000 }, { "epoch": 0.020913202906570137, "grad_norm": 7.089059829711914, "learning_rate": 9.999978563212107e-06, "loss": 3.3238, "step": 80200 }, { "epoch": 0.020965355532272304, "grad_norm": 7.241924285888672, "learning_rate": 9.999976045515802e-06, "loss": 3.3311, "step": 80400 }, { "epoch": 0.021017508157974474, "grad_norm": 6.942836761474609, "learning_rate": 9.9999733880644e-06, "loss": 3.3374, "step": 80600 }, { "epoch": 0.021069660783676645, "grad_norm": 7.0181884765625, "learning_rate": 9.999970590857975e-06, "loss": 3.3009, "step": 80800 }, { "epoch": 0.021121813409378815, "grad_norm": 7.321265697479248, "learning_rate": 9.999967653896607e-06, "loss": 3.3696, "step": 81000 }, { "epoch": 0.021173966035080986, "grad_norm": 6.867557525634766, "learning_rate": 9.999964577180379e-06, "loss": 3.3183, "step": 81200 }, { "epoch": 0.021226118660783156, "grad_norm": 7.854918956756592, "learning_rate": 9.999961360709376e-06, "loss": 3.3517, "step": 81400 }, { "epoch": 0.021278271286485326, "grad_norm": 7.4617438316345215, "learning_rate": 9.999958004483687e-06, "loss": 3.3323, "step": 81600 }, { "epoch": 0.021330423912187493, "grad_norm": 7.94450569152832, "learning_rate": 9.999954508503407e-06, "loss": 3.3163, "step": 81800 }, { "epoch": 0.021382576537889664, "grad_norm": 6.6632184982299805, "learning_rate": 9.999950872768633e-06, "loss": 3.3429, "step": 82000 }, { "epoch": 0.021434729163591834, "grad_norm": 6.834831237792969, "learning_rate": 9.999947097279468e-06, "loss": 3.3278, "step": 82200 }, { "epoch": 0.021486881789294004, "grad_norm": 7.85338020324707, "learning_rate": 9.999943182036017e-06, "loss": 3.3649, "step": 82400 }, { "epoch": 0.021539034414996175, "grad_norm": 7.1437811851501465, "learning_rate": 9.999939127038387e-06, "loss": 3.3501, "step": 82600 }, { "epoch": 0.021591187040698345, "grad_norm": 7.180891990661621, "learning_rate": 9.999934932286692e-06, "loss": 3.3236, "step": 82800 }, { "epoch": 0.021643339666400516, "grad_norm": 6.1819353103637695, "learning_rate": 9.999930597781054e-06, "loss": 3.2915, "step": 83000 }, { "epoch": 0.021695492292102686, "grad_norm": 6.358526229858398, "learning_rate": 9.999926123521588e-06, "loss": 3.2759, "step": 83200 }, { "epoch": 0.021747644917804853, "grad_norm": 7.643764495849609, "learning_rate": 9.999921509508424e-06, "loss": 3.3382, "step": 83400 }, { "epoch": 0.021799797543507023, "grad_norm": 6.65496826171875, "learning_rate": 9.999916755741687e-06, "loss": 3.3042, "step": 83600 }, { "epoch": 0.021851950169209194, "grad_norm": 6.801998138427734, "learning_rate": 9.999911862221512e-06, "loss": 3.3193, "step": 83800 }, { "epoch": 0.021904102794911364, "grad_norm": 7.052958011627197, "learning_rate": 9.999906828948035e-06, "loss": 3.2707, "step": 84000 }, { "epoch": 0.021956255420613534, "grad_norm": 6.636064529418945, "learning_rate": 9.999901655921398e-06, "loss": 3.317, "step": 84200 }, { "epoch": 0.022008408046315705, "grad_norm": 6.989995956420898, "learning_rate": 9.999896343141742e-06, "loss": 3.3297, "step": 84400 }, { "epoch": 0.022060560672017875, "grad_norm": 6.694401741027832, "learning_rate": 9.999890890609221e-06, "loss": 3.2781, "step": 84600 }, { "epoch": 0.022112713297720042, "grad_norm": 6.958297252655029, "learning_rate": 9.999885298323984e-06, "loss": 3.3058, "step": 84800 }, { "epoch": 0.022164865923422213, "grad_norm": 7.147956371307373, "learning_rate": 9.999879566286187e-06, "loss": 3.3504, "step": 85000 }, { "epoch": 0.022217018549124383, "grad_norm": 7.238255023956299, "learning_rate": 9.999873694495991e-06, "loss": 3.3022, "step": 85200 }, { "epoch": 0.022269171174826553, "grad_norm": 7.0600056648254395, "learning_rate": 9.999867682953562e-06, "loss": 3.2688, "step": 85400 }, { "epoch": 0.022321323800528724, "grad_norm": 6.889624118804932, "learning_rate": 9.999861531659063e-06, "loss": 3.2854, "step": 85600 }, { "epoch": 0.022373476426230894, "grad_norm": 6.706833362579346, "learning_rate": 9.99985524061267e-06, "loss": 3.3073, "step": 85800 }, { "epoch": 0.022425629051933064, "grad_norm": 6.433074951171875, "learning_rate": 9.99984880981456e-06, "loss": 3.3223, "step": 86000 }, { "epoch": 0.02247778167763523, "grad_norm": 6.203549385070801, "learning_rate": 9.99984223926491e-06, "loss": 3.324, "step": 86200 }, { "epoch": 0.022529934303337402, "grad_norm": 6.171205043792725, "learning_rate": 9.999835528963905e-06, "loss": 3.2961, "step": 86400 }, { "epoch": 0.022582086929039572, "grad_norm": 7.106544017791748, "learning_rate": 9.999828678911729e-06, "loss": 3.2777, "step": 86600 }, { "epoch": 0.022634239554741743, "grad_norm": 6.84821081161499, "learning_rate": 9.99982168910858e-06, "loss": 3.3162, "step": 86800 }, { "epoch": 0.022686392180443913, "grad_norm": 7.026462078094482, "learning_rate": 9.999814559554648e-06, "loss": 3.2632, "step": 87000 }, { "epoch": 0.022738544806146083, "grad_norm": 6.290987014770508, "learning_rate": 9.999807290250133e-06, "loss": 3.2957, "step": 87200 }, { "epoch": 0.022790697431848254, "grad_norm": 6.156450271606445, "learning_rate": 9.99979988119524e-06, "loss": 3.2679, "step": 87400 }, { "epoch": 0.022842850057550424, "grad_norm": 6.672393321990967, "learning_rate": 9.999792332390177e-06, "loss": 3.2777, "step": 87600 }, { "epoch": 0.02289500268325259, "grad_norm": 6.625957489013672, "learning_rate": 9.99978464383515e-06, "loss": 3.2897, "step": 87800 }, { "epoch": 0.02294715530895476, "grad_norm": 6.969167232513428, "learning_rate": 9.99977681553038e-06, "loss": 3.2615, "step": 88000 }, { "epoch": 0.022999307934656932, "grad_norm": 6.787604331970215, "learning_rate": 9.999768847476084e-06, "loss": 3.2896, "step": 88200 }, { "epoch": 0.023051460560359102, "grad_norm": 7.575397491455078, "learning_rate": 9.999760739672481e-06, "loss": 3.2926, "step": 88400 }, { "epoch": 0.023103613186061273, "grad_norm": 6.810946941375732, "learning_rate": 9.9997524921198e-06, "loss": 3.2828, "step": 88600 }, { "epoch": 0.023155765811763443, "grad_norm": 6.538626194000244, "learning_rate": 9.999744104818275e-06, "loss": 3.2672, "step": 88800 }, { "epoch": 0.023207918437465613, "grad_norm": 6.649213790893555, "learning_rate": 9.999735577768135e-06, "loss": 3.3231, "step": 89000 }, { "epoch": 0.02326007106316778, "grad_norm": 6.549954414367676, "learning_rate": 9.999726910969621e-06, "loss": 3.2322, "step": 89200 }, { "epoch": 0.02331222368886995, "grad_norm": 6.339663982391357, "learning_rate": 9.999718104422977e-06, "loss": 3.2849, "step": 89400 }, { "epoch": 0.02336437631457212, "grad_norm": 7.996878147125244, "learning_rate": 9.999709158128444e-06, "loss": 3.2478, "step": 89600 }, { "epoch": 0.02341652894027429, "grad_norm": 6.590426921844482, "learning_rate": 9.999700072086277e-06, "loss": 3.257, "step": 89800 }, { "epoch": 0.023468681565976462, "grad_norm": 6.166894435882568, "learning_rate": 9.999690846296728e-06, "loss": 3.266, "step": 90000 }, { "epoch": 0.023520834191678632, "grad_norm": 6.546900749206543, "learning_rate": 9.999681480760054e-06, "loss": 3.3065, "step": 90200 }, { "epoch": 0.023572986817380803, "grad_norm": 7.30242919921875, "learning_rate": 9.99967197547652e-06, "loss": 3.2692, "step": 90400 }, { "epoch": 0.02362513944308297, "grad_norm": 6.420165061950684, "learning_rate": 9.999662330446387e-06, "loss": 3.2749, "step": 90600 }, { "epoch": 0.02367729206878514, "grad_norm": 6.860097408294678, "learning_rate": 9.999652545669926e-06, "loss": 3.2887, "step": 90800 }, { "epoch": 0.02372944469448731, "grad_norm": 6.4746599197387695, "learning_rate": 9.999642621147414e-06, "loss": 3.2687, "step": 91000 }, { "epoch": 0.02378159732018948, "grad_norm": 6.279762268066406, "learning_rate": 9.999632556879127e-06, "loss": 3.2578, "step": 91200 }, { "epoch": 0.02383374994589165, "grad_norm": 6.2368268966674805, "learning_rate": 9.999622352865342e-06, "loss": 3.2125, "step": 91400 }, { "epoch": 0.02388590257159382, "grad_norm": 6.1834869384765625, "learning_rate": 9.999612009106349e-06, "loss": 3.2436, "step": 91600 }, { "epoch": 0.023938055197295992, "grad_norm": 7.046905517578125, "learning_rate": 9.999601525602436e-06, "loss": 3.2105, "step": 91800 }, { "epoch": 0.023990207822998162, "grad_norm": 6.951870918273926, "learning_rate": 9.999590902353895e-06, "loss": 3.2179, "step": 92000 }, { "epoch": 0.02404236044870033, "grad_norm": 6.480534553527832, "learning_rate": 9.999580139361023e-06, "loss": 3.2562, "step": 92200 }, { "epoch": 0.0240945130744025, "grad_norm": 6.898007869720459, "learning_rate": 9.999569236624122e-06, "loss": 3.2618, "step": 92400 }, { "epoch": 0.02414666570010467, "grad_norm": 7.365018367767334, "learning_rate": 9.999558194143497e-06, "loss": 3.2709, "step": 92600 }, { "epoch": 0.02419881832580684, "grad_norm": 6.268816947937012, "learning_rate": 9.999547011919454e-06, "loss": 3.2585, "step": 92800 }, { "epoch": 0.02425097095150901, "grad_norm": 7.152459621429443, "learning_rate": 9.999535689952309e-06, "loss": 3.2336, "step": 93000 }, { "epoch": 0.02430312357721118, "grad_norm": 7.479735851287842, "learning_rate": 9.999524228242376e-06, "loss": 3.2201, "step": 93200 }, { "epoch": 0.02435527620291335, "grad_norm": 7.143455982208252, "learning_rate": 9.999512626789977e-06, "loss": 3.248, "step": 93400 }, { "epoch": 0.02440742882861552, "grad_norm": 6.253408432006836, "learning_rate": 9.999500885595435e-06, "loss": 3.2142, "step": 93600 }, { "epoch": 0.02445958145431769, "grad_norm": 7.234580039978027, "learning_rate": 9.999489004659077e-06, "loss": 3.2751, "step": 93800 }, { "epoch": 0.02451173408001986, "grad_norm": 6.902135372161865, "learning_rate": 9.999476983981238e-06, "loss": 3.2603, "step": 94000 }, { "epoch": 0.02456388670572203, "grad_norm": 6.845031261444092, "learning_rate": 9.999464823562253e-06, "loss": 3.2149, "step": 94200 }, { "epoch": 0.0246160393314242, "grad_norm": 6.742398738861084, "learning_rate": 9.999452523402461e-06, "loss": 3.2196, "step": 94400 }, { "epoch": 0.02466819195712637, "grad_norm": 7.369746208190918, "learning_rate": 9.999440083502206e-06, "loss": 3.2394, "step": 94600 }, { "epoch": 0.02472034458282854, "grad_norm": 7.084136962890625, "learning_rate": 9.999427503861836e-06, "loss": 3.1992, "step": 94800 }, { "epoch": 0.02477249720853071, "grad_norm": 6.0851054191589355, "learning_rate": 9.999414784481705e-06, "loss": 3.2615, "step": 95000 }, { "epoch": 0.024824649834232878, "grad_norm": 6.620251178741455, "learning_rate": 9.999401925362164e-06, "loss": 3.2164, "step": 95200 }, { "epoch": 0.02487680245993505, "grad_norm": 6.744817733764648, "learning_rate": 9.999388926503576e-06, "loss": 3.247, "step": 95400 }, { "epoch": 0.02492895508563722, "grad_norm": 7.277287006378174, "learning_rate": 9.999375787906301e-06, "loss": 3.2377, "step": 95600 }, { "epoch": 0.02498110771133939, "grad_norm": 6.504336357116699, "learning_rate": 9.999362509570709e-06, "loss": 3.2138, "step": 95800 }, { "epoch": 0.02503326033704156, "grad_norm": 6.75746488571167, "learning_rate": 9.999349091497173e-06, "loss": 3.2087, "step": 96000 }, { "epoch": 0.02508541296274373, "grad_norm": 6.542148590087891, "learning_rate": 9.999335533686061e-06, "loss": 3.1869, "step": 96200 }, { "epoch": 0.0251375655884459, "grad_norm": 6.434966087341309, "learning_rate": 9.999321836137759e-06, "loss": 3.2454, "step": 96400 }, { "epoch": 0.025189718214148067, "grad_norm": 6.355975151062012, "learning_rate": 9.999307998852648e-06, "loss": 3.2172, "step": 96600 }, { "epoch": 0.025241870839850238, "grad_norm": 6.389385223388672, "learning_rate": 9.999294021831112e-06, "loss": 3.238, "step": 96800 }, { "epoch": 0.025294023465552408, "grad_norm": 6.235101222991943, "learning_rate": 9.999279905073544e-06, "loss": 3.2511, "step": 97000 }, { "epoch": 0.02534617609125458, "grad_norm": 7.004864692687988, "learning_rate": 9.99926564858034e-06, "loss": 3.1807, "step": 97200 }, { "epoch": 0.02539832871695675, "grad_norm": 6.632132530212402, "learning_rate": 9.999251252351896e-06, "loss": 3.2, "step": 97400 }, { "epoch": 0.02545048134265892, "grad_norm": 7.012777328491211, "learning_rate": 9.999236716388614e-06, "loss": 3.2303, "step": 97600 }, { "epoch": 0.02550263396836109, "grad_norm": 7.067131519317627, "learning_rate": 9.999222040690901e-06, "loss": 3.1878, "step": 97800 }, { "epoch": 0.025554786594063256, "grad_norm": 6.799013137817383, "learning_rate": 9.99920722525917e-06, "loss": 3.1748, "step": 98000 }, { "epoch": 0.025606939219765427, "grad_norm": 6.825366497039795, "learning_rate": 9.999192270093832e-06, "loss": 3.2289, "step": 98200 }, { "epoch": 0.025659091845467597, "grad_norm": 7.0657782554626465, "learning_rate": 9.999177175195305e-06, "loss": 3.2448, "step": 98400 }, { "epoch": 0.025711244471169768, "grad_norm": 6.592565059661865, "learning_rate": 9.99916194056401e-06, "loss": 3.2417, "step": 98600 }, { "epoch": 0.025763397096871938, "grad_norm": 6.476219177246094, "learning_rate": 9.999146566200378e-06, "loss": 3.1744, "step": 98800 }, { "epoch": 0.02581554972257411, "grad_norm": 6.580258846282959, "learning_rate": 9.999131052104834e-06, "loss": 3.2692, "step": 99000 }, { "epoch": 0.02586770234827628, "grad_norm": 6.556769371032715, "learning_rate": 9.999115398277812e-06, "loss": 3.189, "step": 99200 }, { "epoch": 0.02591985497397845, "grad_norm": 7.036558151245117, "learning_rate": 9.999099604719751e-06, "loss": 3.2138, "step": 99400 }, { "epoch": 0.025972007599680616, "grad_norm": 6.926954746246338, "learning_rate": 9.999083671431092e-06, "loss": 3.2066, "step": 99600 }, { "epoch": 0.026024160225382786, "grad_norm": 6.171466827392578, "learning_rate": 9.999067598412279e-06, "loss": 3.1867, "step": 99800 }, { "epoch": 0.026076312851084957, "grad_norm": 6.851698398590088, "learning_rate": 9.999051385663765e-06, "loss": 3.2022, "step": 100000 }, { "epoch": 0.026128465476787127, "grad_norm": 6.199542045593262, "learning_rate": 9.999035033185998e-06, "loss": 3.205, "step": 100200 }, { "epoch": 0.026180618102489298, "grad_norm": 6.503675937652588, "learning_rate": 9.99901854097944e-06, "loss": 3.2172, "step": 100400 }, { "epoch": 0.026232770728191468, "grad_norm": 6.505238056182861, "learning_rate": 9.999001909044548e-06, "loss": 3.2047, "step": 100600 }, { "epoch": 0.02628492335389364, "grad_norm": 6.692224979400635, "learning_rate": 9.99898513738179e-06, "loss": 3.1518, "step": 100800 }, { "epoch": 0.026337075979595805, "grad_norm": 6.675695419311523, "learning_rate": 9.998968225991632e-06, "loss": 3.1623, "step": 101000 }, { "epoch": 0.026389228605297976, "grad_norm": 6.617093086242676, "learning_rate": 9.998951174874548e-06, "loss": 3.2322, "step": 101200 }, { "epoch": 0.026441381231000146, "grad_norm": 6.3754119873046875, "learning_rate": 9.998933984031016e-06, "loss": 3.2037, "step": 101400 }, { "epoch": 0.026493533856702316, "grad_norm": 6.500361442565918, "learning_rate": 9.998916653461515e-06, "loss": 3.2131, "step": 101600 }, { "epoch": 0.026545686482404487, "grad_norm": 6.8905415534973145, "learning_rate": 9.998899183166529e-06, "loss": 3.1613, "step": 101800 }, { "epoch": 0.026597839108106657, "grad_norm": 6.983830451965332, "learning_rate": 9.998881573146546e-06, "loss": 3.1294, "step": 102000 }, { "epoch": 0.026649991733808828, "grad_norm": 6.894038677215576, "learning_rate": 9.99886382340206e-06, "loss": 3.2047, "step": 102200 }, { "epoch": 0.026702144359510994, "grad_norm": 6.187081336975098, "learning_rate": 9.998845933933565e-06, "loss": 3.1967, "step": 102400 }, { "epoch": 0.026754296985213165, "grad_norm": 7.0318708419799805, "learning_rate": 9.998827904741563e-06, "loss": 3.2053, "step": 102600 }, { "epoch": 0.026806449610915335, "grad_norm": 6.508965969085693, "learning_rate": 9.998809735826559e-06, "loss": 3.1671, "step": 102800 }, { "epoch": 0.026858602236617506, "grad_norm": 6.52016019821167, "learning_rate": 9.998791427189058e-06, "loss": 3.1954, "step": 103000 }, { "epoch": 0.026910754862319676, "grad_norm": 6.182394981384277, "learning_rate": 9.998772978829571e-06, "loss": 3.1369, "step": 103200 }, { "epoch": 0.026962907488021846, "grad_norm": 6.727584362030029, "learning_rate": 9.998754390748617e-06, "loss": 3.1729, "step": 103400 }, { "epoch": 0.027015060113724017, "grad_norm": 6.066624641418457, "learning_rate": 9.998735662946715e-06, "loss": 3.1742, "step": 103600 }, { "epoch": 0.027067212739426187, "grad_norm": 6.274039268493652, "learning_rate": 9.998716795424385e-06, "loss": 3.1548, "step": 103800 }, { "epoch": 0.027119365365128354, "grad_norm": 6.5091352462768555, "learning_rate": 9.998697788182158e-06, "loss": 3.1464, "step": 104000 }, { "epoch": 0.027171517990830524, "grad_norm": 7.099887847900391, "learning_rate": 9.998678641220564e-06, "loss": 3.1755, "step": 104200 }, { "epoch": 0.027223670616532695, "grad_norm": 6.529077529907227, "learning_rate": 9.99865935454014e-06, "loss": 3.1749, "step": 104400 }, { "epoch": 0.027275823242234865, "grad_norm": 6.756040573120117, "learning_rate": 9.998639928141422e-06, "loss": 3.192, "step": 104600 }, { "epoch": 0.027327975867937036, "grad_norm": 6.822609901428223, "learning_rate": 9.998620362024954e-06, "loss": 3.2029, "step": 104800 }, { "epoch": 0.027380128493639206, "grad_norm": 6.252408504486084, "learning_rate": 9.998600656191284e-06, "loss": 3.1596, "step": 105000 }, { "epoch": 0.027432281119341376, "grad_norm": 6.114434719085693, "learning_rate": 9.99858081064096e-06, "loss": 3.2181, "step": 105200 }, { "epoch": 0.027484433745043543, "grad_norm": 6.77893590927124, "learning_rate": 9.998560825374542e-06, "loss": 3.1836, "step": 105400 }, { "epoch": 0.027536586370745714, "grad_norm": 5.703882217407227, "learning_rate": 9.998540700392583e-06, "loss": 3.1661, "step": 105600 }, { "epoch": 0.027588738996447884, "grad_norm": 6.503817558288574, "learning_rate": 9.99852043569565e-06, "loss": 3.1212, "step": 105800 }, { "epoch": 0.027640891622150054, "grad_norm": 6.397846221923828, "learning_rate": 9.998500031284304e-06, "loss": 3.1341, "step": 106000 }, { "epoch": 0.027693044247852225, "grad_norm": 6.377574443817139, "learning_rate": 9.998479487159121e-06, "loss": 3.1652, "step": 106200 }, { "epoch": 0.027745196873554395, "grad_norm": 6.59285831451416, "learning_rate": 9.998458803320671e-06, "loss": 3.1326, "step": 106400 }, { "epoch": 0.027797349499256566, "grad_norm": 5.924184322357178, "learning_rate": 9.998437979769536e-06, "loss": 3.1707, "step": 106600 }, { "epoch": 0.027849502124958733, "grad_norm": 6.608452320098877, "learning_rate": 9.998417016506296e-06, "loss": 3.176, "step": 106800 }, { "epoch": 0.027901654750660903, "grad_norm": 7.678200721740723, "learning_rate": 9.998395913531536e-06, "loss": 3.1778, "step": 107000 }, { "epoch": 0.027953807376363073, "grad_norm": 7.167779922485352, "learning_rate": 9.998374670845847e-06, "loss": 3.181, "step": 107200 }, { "epoch": 0.028005960002065244, "grad_norm": 6.5178937911987305, "learning_rate": 9.998353288449823e-06, "loss": 3.1868, "step": 107400 }, { "epoch": 0.028058112627767414, "grad_norm": 5.7756218910217285, "learning_rate": 9.998331766344062e-06, "loss": 3.1752, "step": 107600 }, { "epoch": 0.028110265253469584, "grad_norm": 6.773250579833984, "learning_rate": 9.998310104529163e-06, "loss": 3.1601, "step": 107800 }, { "epoch": 0.028162417879171755, "grad_norm": 7.053867816925049, "learning_rate": 9.998288303005734e-06, "loss": 3.181, "step": 108000 }, { "epoch": 0.028214570504873925, "grad_norm": 6.301112174987793, "learning_rate": 9.998266361774385e-06, "loss": 3.1717, "step": 108200 }, { "epoch": 0.028266723130576092, "grad_norm": 6.55330753326416, "learning_rate": 9.998244280835728e-06, "loss": 3.1113, "step": 108400 }, { "epoch": 0.028318875756278263, "grad_norm": 6.428907871246338, "learning_rate": 9.998222060190377e-06, "loss": 3.1395, "step": 108600 }, { "epoch": 0.028371028381980433, "grad_norm": 6.372659683227539, "learning_rate": 9.998199699838959e-06, "loss": 3.2008, "step": 108800 }, { "epoch": 0.028423181007682603, "grad_norm": 6.7776689529418945, "learning_rate": 9.998177199782095e-06, "loss": 3.214, "step": 109000 }, { "epoch": 0.028475333633384774, "grad_norm": 6.657548427581787, "learning_rate": 9.998154560020417e-06, "loss": 3.1759, "step": 109200 }, { "epoch": 0.028527486259086944, "grad_norm": 7.199187278747559, "learning_rate": 9.998131780554554e-06, "loss": 3.1263, "step": 109400 }, { "epoch": 0.028579638884789114, "grad_norm": 6.797530651092529, "learning_rate": 9.998108861385145e-06, "loss": 3.1394, "step": 109600 }, { "epoch": 0.02863179151049128, "grad_norm": 7.414548873901367, "learning_rate": 9.998085802512832e-06, "loss": 3.116, "step": 109800 }, { "epoch": 0.028683944136193452, "grad_norm": 6.82701301574707, "learning_rate": 9.998062603938255e-06, "loss": 3.1589, "step": 110000 }, { "epoch": 0.028736096761895622, "grad_norm": 6.446861267089844, "learning_rate": 9.998039265662067e-06, "loss": 3.1494, "step": 110200 }, { "epoch": 0.028788249387597793, "grad_norm": 6.727176189422607, "learning_rate": 9.998015787684919e-06, "loss": 3.1162, "step": 110400 }, { "epoch": 0.028840402013299963, "grad_norm": 7.0100226402282715, "learning_rate": 9.997992170007464e-06, "loss": 3.1154, "step": 110600 }, { "epoch": 0.028892554639002133, "grad_norm": 5.6385722160339355, "learning_rate": 9.997968412630368e-06, "loss": 3.1144, "step": 110800 }, { "epoch": 0.028944707264704304, "grad_norm": 6.636441230773926, "learning_rate": 9.997944515554291e-06, "loss": 3.1642, "step": 111000 }, { "epoch": 0.028996859890406474, "grad_norm": 6.764395236968994, "learning_rate": 9.997920478779901e-06, "loss": 3.1758, "step": 111200 }, { "epoch": 0.02904901251610864, "grad_norm": 7.620389938354492, "learning_rate": 9.997896302307872e-06, "loss": 3.1518, "step": 111400 }, { "epoch": 0.02910116514181081, "grad_norm": 6.4152021408081055, "learning_rate": 9.997871986138878e-06, "loss": 3.1043, "step": 111600 }, { "epoch": 0.029153317767512982, "grad_norm": 5.856719493865967, "learning_rate": 9.9978475302736e-06, "loss": 3.1493, "step": 111800 }, { "epoch": 0.029205470393215152, "grad_norm": 6.708147048950195, "learning_rate": 9.99782293471272e-06, "loss": 3.1646, "step": 112000 }, { "epoch": 0.029257623018917323, "grad_norm": 6.783796310424805, "learning_rate": 9.997798199456927e-06, "loss": 3.1477, "step": 112200 }, { "epoch": 0.029309775644619493, "grad_norm": 6.652106761932373, "learning_rate": 9.997773324506912e-06, "loss": 3.107, "step": 112400 }, { "epoch": 0.029361928270321663, "grad_norm": 6.451571464538574, "learning_rate": 9.997748309863367e-06, "loss": 3.1245, "step": 112600 }, { "epoch": 0.02941408089602383, "grad_norm": 6.6614203453063965, "learning_rate": 9.997723155526998e-06, "loss": 3.1216, "step": 112800 }, { "epoch": 0.029466233521726, "grad_norm": 6.864782333374023, "learning_rate": 9.9976978614985e-06, "loss": 3.1465, "step": 113000 }, { "epoch": 0.02951838614742817, "grad_norm": 7.249753952026367, "learning_rate": 9.997672427778588e-06, "loss": 3.1112, "step": 113200 }, { "epoch": 0.02957053877313034, "grad_norm": 6.135322093963623, "learning_rate": 9.997646854367968e-06, "loss": 3.1491, "step": 113400 }, { "epoch": 0.029622691398832512, "grad_norm": 6.486145496368408, "learning_rate": 9.997621141267355e-06, "loss": 3.1672, "step": 113600 }, { "epoch": 0.029674844024534682, "grad_norm": 7.196221828460693, "learning_rate": 9.99759528847747e-06, "loss": 3.1266, "step": 113800 }, { "epoch": 0.029726996650236853, "grad_norm": 6.458970546722412, "learning_rate": 9.997569295999032e-06, "loss": 3.1566, "step": 114000 }, { "epoch": 0.02977914927593902, "grad_norm": 6.437240123748779, "learning_rate": 9.997543163832772e-06, "loss": 3.0992, "step": 114200 }, { "epoch": 0.02983130190164119, "grad_norm": 6.01683235168457, "learning_rate": 9.997516891979418e-06, "loss": 3.1501, "step": 114400 }, { "epoch": 0.02988345452734336, "grad_norm": 6.238487720489502, "learning_rate": 9.997490480439705e-06, "loss": 3.1628, "step": 114600 }, { "epoch": 0.02993560715304553, "grad_norm": 6.446498870849609, "learning_rate": 9.997463929214368e-06, "loss": 3.1472, "step": 114800 }, { "epoch": 0.0299877597787477, "grad_norm": 7.348095417022705, "learning_rate": 9.997437238304154e-06, "loss": 3.171, "step": 115000 }, { "epoch": 0.03003991240444987, "grad_norm": 6.736360549926758, "learning_rate": 9.997410407709806e-06, "loss": 3.1841, "step": 115200 }, { "epoch": 0.030092065030152042, "grad_norm": 6.533570289611816, "learning_rate": 9.997383437432075e-06, "loss": 3.1261, "step": 115400 }, { "epoch": 0.030144217655854212, "grad_norm": 6.483672618865967, "learning_rate": 9.997356327471716e-06, "loss": 3.1736, "step": 115600 }, { "epoch": 0.03019637028155638, "grad_norm": 5.92385721206665, "learning_rate": 9.997329077829484e-06, "loss": 3.0937, "step": 115800 }, { "epoch": 0.03024852290725855, "grad_norm": 6.332182884216309, "learning_rate": 9.997301688506143e-06, "loss": 3.0874, "step": 116000 }, { "epoch": 0.03030067553296072, "grad_norm": 6.6715521812438965, "learning_rate": 9.997274159502457e-06, "loss": 3.1354, "step": 116200 }, { "epoch": 0.03035282815866289, "grad_norm": 6.579791069030762, "learning_rate": 9.997246490819197e-06, "loss": 3.1421, "step": 116400 }, { "epoch": 0.03040498078436506, "grad_norm": 7.069493770599365, "learning_rate": 9.997218682457135e-06, "loss": 3.1171, "step": 116600 }, { "epoch": 0.03045713341006723, "grad_norm": 6.911351680755615, "learning_rate": 9.997190734417048e-06, "loss": 3.0969, "step": 116800 }, { "epoch": 0.0305092860357694, "grad_norm": 6.206057548522949, "learning_rate": 9.99716264669972e-06, "loss": 3.1368, "step": 117000 }, { "epoch": 0.03056143866147157, "grad_norm": 6.447131156921387, "learning_rate": 9.997134419305933e-06, "loss": 3.1165, "step": 117200 }, { "epoch": 0.03061359128717374, "grad_norm": 6.3493804931640625, "learning_rate": 9.997106052236475e-06, "loss": 3.1207, "step": 117400 }, { "epoch": 0.03066574391287591, "grad_norm": 6.453786849975586, "learning_rate": 9.997077545492144e-06, "loss": 3.1396, "step": 117600 }, { "epoch": 0.03071789653857808, "grad_norm": 6.4193010330200195, "learning_rate": 9.997048899073734e-06, "loss": 3.129, "step": 117800 }, { "epoch": 0.03077004916428025, "grad_norm": 6.173766136169434, "learning_rate": 9.997020112982043e-06, "loss": 3.157, "step": 118000 }, { "epoch": 0.03082220178998242, "grad_norm": 6.576676368713379, "learning_rate": 9.99699118721788e-06, "loss": 3.1641, "step": 118200 }, { "epoch": 0.03087435441568459, "grad_norm": 6.761925220489502, "learning_rate": 9.99696212178205e-06, "loss": 3.101, "step": 118400 }, { "epoch": 0.030926507041386758, "grad_norm": 6.417123317718506, "learning_rate": 9.996932916675368e-06, "loss": 3.0993, "step": 118600 }, { "epoch": 0.030978659667088928, "grad_norm": 6.432227611541748, "learning_rate": 9.996903571898649e-06, "loss": 3.1012, "step": 118800 }, { "epoch": 0.0310308122927911, "grad_norm": 6.400918006896973, "learning_rate": 9.996874087452714e-06, "loss": 3.1017, "step": 119000 }, { "epoch": 0.03108296491849327, "grad_norm": 6.3425984382629395, "learning_rate": 9.996844463338387e-06, "loss": 3.0497, "step": 119200 }, { "epoch": 0.03113511754419544, "grad_norm": 6.788375377655029, "learning_rate": 9.996814699556494e-06, "loss": 3.1498, "step": 119400 }, { "epoch": 0.03118727016989761, "grad_norm": 6.5473761558532715, "learning_rate": 9.99678479610787e-06, "loss": 3.0913, "step": 119600 }, { "epoch": 0.03123942279559978, "grad_norm": 6.9028191566467285, "learning_rate": 9.996754752993348e-06, "loss": 3.1111, "step": 119800 }, { "epoch": 0.03129157542130195, "grad_norm": 6.383526802062988, "learning_rate": 9.99672457021377e-06, "loss": 3.1439, "step": 120000 }, { "epoch": 0.03134372804700412, "grad_norm": 6.976934909820557, "learning_rate": 9.996694247769979e-06, "loss": 3.1025, "step": 120200 }, { "epoch": 0.03139588067270629, "grad_norm": 6.561999320983887, "learning_rate": 9.996663785662823e-06, "loss": 3.1209, "step": 120400 }, { "epoch": 0.03144803329840846, "grad_norm": 6.6694817543029785, "learning_rate": 9.996633183893152e-06, "loss": 3.0911, "step": 120600 }, { "epoch": 0.03150018592411063, "grad_norm": 6.184081077575684, "learning_rate": 9.996602442461823e-06, "loss": 3.1038, "step": 120800 }, { "epoch": 0.0315523385498128, "grad_norm": 6.33581018447876, "learning_rate": 9.996571561369692e-06, "loss": 3.08, "step": 121000 }, { "epoch": 0.03160449117551497, "grad_norm": 6.361224174499512, "learning_rate": 9.996540540617628e-06, "loss": 3.1018, "step": 121200 }, { "epoch": 0.03165664380121714, "grad_norm": 5.576674461364746, "learning_rate": 9.996509380206491e-06, "loss": 3.1165, "step": 121400 }, { "epoch": 0.03170879642691931, "grad_norm": 6.4147844314575195, "learning_rate": 9.996478080137158e-06, "loss": 3.0525, "step": 121600 }, { "epoch": 0.03176094905262148, "grad_norm": 7.201076030731201, "learning_rate": 9.996446640410502e-06, "loss": 3.0917, "step": 121800 }, { "epoch": 0.03181310167832365, "grad_norm": 6.715845584869385, "learning_rate": 9.9964150610274e-06, "loss": 3.0954, "step": 122000 }, { "epoch": 0.03186525430402582, "grad_norm": 6.023102283477783, "learning_rate": 9.996383341988736e-06, "loss": 3.0728, "step": 122200 }, { "epoch": 0.031917406929727984, "grad_norm": 7.259410381317139, "learning_rate": 9.996351483295396e-06, "loss": 3.0708, "step": 122400 }, { "epoch": 0.031969559555430155, "grad_norm": 6.183089256286621, "learning_rate": 9.996319484948273e-06, "loss": 3.1023, "step": 122600 }, { "epoch": 0.032021712181132325, "grad_norm": 6.622340202331543, "learning_rate": 9.996287346948258e-06, "loss": 3.0743, "step": 122800 }, { "epoch": 0.032073864806834496, "grad_norm": 5.829206943511963, "learning_rate": 9.996255069296251e-06, "loss": 3.1051, "step": 123000 }, { "epoch": 0.032126017432536666, "grad_norm": 6.266514301300049, "learning_rate": 9.996222651993153e-06, "loss": 3.0887, "step": 123200 }, { "epoch": 0.032178170058238836, "grad_norm": 5.771326065063477, "learning_rate": 9.996190095039874e-06, "loss": 3.0605, "step": 123400 }, { "epoch": 0.03223032268394101, "grad_norm": 6.461459636688232, "learning_rate": 9.996157398437319e-06, "loss": 3.089, "step": 123600 }, { "epoch": 0.03228247530964318, "grad_norm": 6.701711177825928, "learning_rate": 9.996124562186402e-06, "loss": 3.0984, "step": 123800 }, { "epoch": 0.03233462793534535, "grad_norm": 6.251786708831787, "learning_rate": 9.996091586288045e-06, "loss": 3.0881, "step": 124000 }, { "epoch": 0.03238678056104752, "grad_norm": 6.822304725646973, "learning_rate": 9.996058470743167e-06, "loss": 3.1225, "step": 124200 }, { "epoch": 0.03243893318674969, "grad_norm": 6.3806257247924805, "learning_rate": 9.996025215552694e-06, "loss": 3.0939, "step": 124400 }, { "epoch": 0.03249108581245186, "grad_norm": 7.634413242340088, "learning_rate": 9.995991820717557e-06, "loss": 3.0543, "step": 124600 }, { "epoch": 0.03254323843815403, "grad_norm": 7.31152868270874, "learning_rate": 9.995958286238685e-06, "loss": 3.1145, "step": 124800 }, { "epoch": 0.0325953910638562, "grad_norm": 6.632618427276611, "learning_rate": 9.99592461211702e-06, "loss": 3.0899, "step": 125000 }, { "epoch": 0.03264754368955836, "grad_norm": 6.440008640289307, "learning_rate": 9.995890798353503e-06, "loss": 3.0536, "step": 125200 }, { "epoch": 0.03269969631526053, "grad_norm": 6.341098785400391, "learning_rate": 9.995856844949075e-06, "loss": 3.1024, "step": 125400 }, { "epoch": 0.032751848940962704, "grad_norm": 6.386812686920166, "learning_rate": 9.99582275190469e-06, "loss": 3.0769, "step": 125600 }, { "epoch": 0.032804001566664874, "grad_norm": 6.581667900085449, "learning_rate": 9.995788519221297e-06, "loss": 3.0737, "step": 125800 }, { "epoch": 0.032856154192367044, "grad_norm": 7.009296894073486, "learning_rate": 9.995754146899856e-06, "loss": 3.0732, "step": 126000 }, { "epoch": 0.032908306818069215, "grad_norm": 6.533790588378906, "learning_rate": 9.995719634941325e-06, "loss": 3.1034, "step": 126200 }, { "epoch": 0.032960459443771385, "grad_norm": 6.344036102294922, "learning_rate": 9.99568498334667e-06, "loss": 3.0929, "step": 126400 }, { "epoch": 0.033012612069473556, "grad_norm": 6.279763221740723, "learning_rate": 9.995650192116862e-06, "loss": 3.068, "step": 126600 }, { "epoch": 0.033064764695175726, "grad_norm": 7.0318922996521, "learning_rate": 9.995615261252868e-06, "loss": 3.061, "step": 126800 }, { "epoch": 0.033116917320877896, "grad_norm": 6.694097518920898, "learning_rate": 9.995580190755667e-06, "loss": 3.0943, "step": 127000 }, { "epoch": 0.03316906994658007, "grad_norm": 5.494612693786621, "learning_rate": 9.995544980626241e-06, "loss": 3.1181, "step": 127200 }, { "epoch": 0.03322122257228224, "grad_norm": 6.504891395568848, "learning_rate": 9.995509630865573e-06, "loss": 3.0556, "step": 127400 }, { "epoch": 0.03327337519798441, "grad_norm": 6.207190036773682, "learning_rate": 9.99547414147465e-06, "loss": 3.106, "step": 127600 }, { "epoch": 0.03332552782368658, "grad_norm": 6.687653064727783, "learning_rate": 9.995438512454465e-06, "loss": 3.1383, "step": 127800 }, { "epoch": 0.03337768044938875, "grad_norm": 5.720125198364258, "learning_rate": 9.995402743806012e-06, "loss": 3.062, "step": 128000 }, { "epoch": 0.03342983307509091, "grad_norm": 5.79355001449585, "learning_rate": 9.995366835530295e-06, "loss": 3.0938, "step": 128200 }, { "epoch": 0.03348198570079308, "grad_norm": 6.339210510253906, "learning_rate": 9.995330787628315e-06, "loss": 3.0733, "step": 128400 }, { "epoch": 0.03353413832649525, "grad_norm": 6.438317775726318, "learning_rate": 9.995294600101077e-06, "loss": 3.0625, "step": 128600 }, { "epoch": 0.03358629095219742, "grad_norm": 6.823408126831055, "learning_rate": 9.995258272949597e-06, "loss": 3.1021, "step": 128800 }, { "epoch": 0.03363844357789959, "grad_norm": 5.777518272399902, "learning_rate": 9.995221806174888e-06, "loss": 3.0813, "step": 129000 }, { "epoch": 0.033690596203601764, "grad_norm": 6.753819942474365, "learning_rate": 9.99518519977797e-06, "loss": 3.0556, "step": 129200 }, { "epoch": 0.033742748829303934, "grad_norm": 6.781210422515869, "learning_rate": 9.995148453759866e-06, "loss": 3.1216, "step": 129400 }, { "epoch": 0.033794901455006104, "grad_norm": 6.243967533111572, "learning_rate": 9.995111568121605e-06, "loss": 3.0294, "step": 129600 }, { "epoch": 0.033847054080708275, "grad_norm": 6.602694511413574, "learning_rate": 9.995074542864215e-06, "loss": 3.0701, "step": 129800 }, { "epoch": 0.033899206706410445, "grad_norm": 6.756824016571045, "learning_rate": 9.99503737798873e-06, "loss": 3.0907, "step": 130000 }, { "epoch": 0.033951359332112616, "grad_norm": 6.042007923126221, "learning_rate": 9.995000073496192e-06, "loss": 3.0582, "step": 130200 }, { "epoch": 0.034003511957814786, "grad_norm": 7.078050136566162, "learning_rate": 9.994962629387643e-06, "loss": 3.0474, "step": 130400 }, { "epoch": 0.034055664583516956, "grad_norm": 5.917229652404785, "learning_rate": 9.994925045664127e-06, "loss": 3.0569, "step": 130600 }, { "epoch": 0.03410781720921913, "grad_norm": 6.29770565032959, "learning_rate": 9.994887322326698e-06, "loss": 3.0739, "step": 130800 }, { "epoch": 0.0341599698349213, "grad_norm": 5.848755836486816, "learning_rate": 9.99484945937641e-06, "loss": 3.0496, "step": 131000 }, { "epoch": 0.03421212246062346, "grad_norm": 6.648880481719971, "learning_rate": 9.99481145681432e-06, "loss": 3.0691, "step": 131200 }, { "epoch": 0.03426427508632563, "grad_norm": 6.360013961791992, "learning_rate": 9.99477331464149e-06, "loss": 3.084, "step": 131400 }, { "epoch": 0.0343164277120278, "grad_norm": 6.162940979003906, "learning_rate": 9.994735032858987e-06, "loss": 3.1258, "step": 131600 }, { "epoch": 0.03436858033772997, "grad_norm": 6.413846492767334, "learning_rate": 9.99469661146788e-06, "loss": 3.0983, "step": 131800 }, { "epoch": 0.03442073296343214, "grad_norm": 6.364840030670166, "learning_rate": 9.994658050469243e-06, "loss": 3.0567, "step": 132000 }, { "epoch": 0.03447288558913431, "grad_norm": 6.655250072479248, "learning_rate": 9.994619349864156e-06, "loss": 3.0799, "step": 132200 }, { "epoch": 0.03452503821483648, "grad_norm": 5.781773090362549, "learning_rate": 9.9945805096537e-06, "loss": 3.0514, "step": 132400 }, { "epoch": 0.03457719084053865, "grad_norm": 7.000985145568848, "learning_rate": 9.994541529838957e-06, "loss": 3.0704, "step": 132600 }, { "epoch": 0.034629343466240824, "grad_norm": 6.9071550369262695, "learning_rate": 9.99450241042102e-06, "loss": 3.0856, "step": 132800 }, { "epoch": 0.034681496091942994, "grad_norm": 5.725367546081543, "learning_rate": 9.994463151400984e-06, "loss": 3.0851, "step": 133000 }, { "epoch": 0.034733648717645164, "grad_norm": 6.823086738586426, "learning_rate": 9.994423752779942e-06, "loss": 3.0728, "step": 133200 }, { "epoch": 0.034785801343347335, "grad_norm": 6.22132682800293, "learning_rate": 9.994384214558999e-06, "loss": 3.0625, "step": 133400 }, { "epoch": 0.034837953969049505, "grad_norm": 6.658827304840088, "learning_rate": 9.994344536739256e-06, "loss": 3.0398, "step": 133600 }, { "epoch": 0.034890106594751676, "grad_norm": 6.231532096862793, "learning_rate": 9.994304719321825e-06, "loss": 3.0462, "step": 133800 }, { "epoch": 0.034942259220453846, "grad_norm": 6.183959484100342, "learning_rate": 9.994264762307821e-06, "loss": 3.0558, "step": 134000 }, { "epoch": 0.03499441184615601, "grad_norm": 6.413631439208984, "learning_rate": 9.994224665698356e-06, "loss": 3.0879, "step": 134200 }, { "epoch": 0.03504656447185818, "grad_norm": 6.981194496154785, "learning_rate": 9.994184429494554e-06, "loss": 3.0688, "step": 134400 }, { "epoch": 0.03509871709756035, "grad_norm": 6.0282135009765625, "learning_rate": 9.994144053697539e-06, "loss": 3.0768, "step": 134600 }, { "epoch": 0.03515086972326252, "grad_norm": 5.871272563934326, "learning_rate": 9.994103538308437e-06, "loss": 3.0281, "step": 134800 }, { "epoch": 0.03520302234896469, "grad_norm": 6.554361343383789, "learning_rate": 9.994062883328384e-06, "loss": 3.054, "step": 135000 }, { "epoch": 0.03525517497466686, "grad_norm": 6.451972007751465, "learning_rate": 9.994022088758515e-06, "loss": 3.0884, "step": 135200 }, { "epoch": 0.03530732760036903, "grad_norm": 6.5166168212890625, "learning_rate": 9.993981154599972e-06, "loss": 3.034, "step": 135400 }, { "epoch": 0.0353594802260712, "grad_norm": 6.459281921386719, "learning_rate": 9.993940080853895e-06, "loss": 3.0411, "step": 135600 }, { "epoch": 0.03541163285177337, "grad_norm": 6.400155544281006, "learning_rate": 9.993898867521435e-06, "loss": 3.0376, "step": 135800 }, { "epoch": 0.03546378547747554, "grad_norm": 6.980037212371826, "learning_rate": 9.993857514603744e-06, "loss": 3.0522, "step": 136000 }, { "epoch": 0.03551593810317771, "grad_norm": 7.635961532592773, "learning_rate": 9.99381602210198e-06, "loss": 3.0653, "step": 136200 }, { "epoch": 0.035568090728879884, "grad_norm": 7.077882289886475, "learning_rate": 9.993774390017294e-06, "loss": 3.0062, "step": 136400 }, { "epoch": 0.035620243354582054, "grad_norm": 6.433791637420654, "learning_rate": 9.99373261835086e-06, "loss": 3.0285, "step": 136600 }, { "epoch": 0.035672395980284224, "grad_norm": 6.266335964202881, "learning_rate": 9.99369070710384e-06, "loss": 3.0393, "step": 136800 }, { "epoch": 0.03572454860598639, "grad_norm": 7.584403038024902, "learning_rate": 9.993648656277409e-06, "loss": 3.0379, "step": 137000 }, { "epoch": 0.03577670123168856, "grad_norm": 5.1457133293151855, "learning_rate": 9.993606465872737e-06, "loss": 3.025, "step": 137200 }, { "epoch": 0.03582885385739073, "grad_norm": 6.755367279052734, "learning_rate": 9.993564135891007e-06, "loss": 3.039, "step": 137400 }, { "epoch": 0.0358810064830929, "grad_norm": 6.786823272705078, "learning_rate": 9.993521666333404e-06, "loss": 3.0242, "step": 137600 }, { "epoch": 0.03593315910879507, "grad_norm": 6.774286270141602, "learning_rate": 9.99347905720111e-06, "loss": 3.0289, "step": 137800 }, { "epoch": 0.03598531173449724, "grad_norm": 6.3922200202941895, "learning_rate": 9.99343630849532e-06, "loss": 3.0061, "step": 138000 }, { "epoch": 0.03603746436019941, "grad_norm": 6.802679061889648, "learning_rate": 9.993393420217229e-06, "loss": 3.0451, "step": 138200 }, { "epoch": 0.03608961698590158, "grad_norm": 6.436954021453857, "learning_rate": 9.993350392368031e-06, "loss": 3.0221, "step": 138400 }, { "epoch": 0.03614176961160375, "grad_norm": 6.1146721839904785, "learning_rate": 9.993307224948934e-06, "loss": 3.0119, "step": 138600 }, { "epoch": 0.03619392223730592, "grad_norm": 6.813344478607178, "learning_rate": 9.993263917961142e-06, "loss": 3.0613, "step": 138800 }, { "epoch": 0.03624607486300809, "grad_norm": 5.840065002441406, "learning_rate": 9.993220471405866e-06, "loss": 3.0639, "step": 139000 }, { "epoch": 0.03629822748871026, "grad_norm": 7.216462135314941, "learning_rate": 9.99317688528432e-06, "loss": 3.0472, "step": 139200 }, { "epoch": 0.03635038011441243, "grad_norm": 6.292632102966309, "learning_rate": 9.993133159597724e-06, "loss": 3.0252, "step": 139400 }, { "epoch": 0.0364025327401146, "grad_norm": 6.57106351852417, "learning_rate": 9.993089294347297e-06, "loss": 3.0342, "step": 139600 }, { "epoch": 0.03645468536581677, "grad_norm": 6.309388160705566, "learning_rate": 9.99304528953427e-06, "loss": 3.0658, "step": 139800 }, { "epoch": 0.03650683799151894, "grad_norm": 6.843193054199219, "learning_rate": 9.993001145159867e-06, "loss": 3.0457, "step": 140000 }, { "epoch": 0.03655899061722111, "grad_norm": 6.428710460662842, "learning_rate": 9.992956861225325e-06, "loss": 3.108, "step": 140200 }, { "epoch": 0.03661114324292328, "grad_norm": 5.9581499099731445, "learning_rate": 9.992912437731884e-06, "loss": 3.0241, "step": 140400 }, { "epoch": 0.03666329586862545, "grad_norm": 6.154149055480957, "learning_rate": 9.99286787468078e-06, "loss": 3.0459, "step": 140600 }, { "epoch": 0.03671544849432762, "grad_norm": 6.878948211669922, "learning_rate": 9.992823172073264e-06, "loss": 3.0133, "step": 140800 }, { "epoch": 0.03676760112002979, "grad_norm": 7.1989264488220215, "learning_rate": 9.992778329910585e-06, "loss": 3.1056, "step": 141000 }, { "epoch": 0.03681975374573196, "grad_norm": 6.319765567779541, "learning_rate": 9.992733348193993e-06, "loss": 2.9999, "step": 141200 }, { "epoch": 0.03687190637143413, "grad_norm": 6.223499298095703, "learning_rate": 9.992688226924747e-06, "loss": 3.0706, "step": 141400 }, { "epoch": 0.0369240589971363, "grad_norm": 5.92350435256958, "learning_rate": 9.992642966104107e-06, "loss": 3.0198, "step": 141600 }, { "epoch": 0.03697621162283847, "grad_norm": 6.555150032043457, "learning_rate": 9.992597565733341e-06, "loss": 3.0331, "step": 141800 }, { "epoch": 0.03702836424854064, "grad_norm": 6.806456089019775, "learning_rate": 9.992552025813716e-06, "loss": 3.042, "step": 142000 }, { "epoch": 0.03708051687424281, "grad_norm": 6.848278045654297, "learning_rate": 9.992506346346505e-06, "loss": 3.0455, "step": 142200 }, { "epoch": 0.03713266949994498, "grad_norm": 6.233341693878174, "learning_rate": 9.992460527332986e-06, "loss": 3.0627, "step": 142400 }, { "epoch": 0.03718482212564715, "grad_norm": 6.317975044250488, "learning_rate": 9.992414568774438e-06, "loss": 3.0447, "step": 142600 }, { "epoch": 0.03723697475134932, "grad_norm": 6.190425395965576, "learning_rate": 9.992368470672146e-06, "loss": 3.0495, "step": 142800 }, { "epoch": 0.037289127377051486, "grad_norm": 6.360509395599365, "learning_rate": 9.992322233027398e-06, "loss": 2.9939, "step": 143000 }, { "epoch": 0.037341280002753656, "grad_norm": 6.74845552444458, "learning_rate": 9.99227585584149e-06, "loss": 2.9787, "step": 143200 }, { "epoch": 0.037393432628455826, "grad_norm": 6.610714912414551, "learning_rate": 9.992229339115714e-06, "loss": 3.0016, "step": 143400 }, { "epoch": 0.037445585254158, "grad_norm": 6.472560405731201, "learning_rate": 9.99218268285137e-06, "loss": 2.9786, "step": 143600 }, { "epoch": 0.03749773787986017, "grad_norm": 7.01295804977417, "learning_rate": 9.992135887049766e-06, "loss": 3.0622, "step": 143800 }, { "epoch": 0.03754989050556234, "grad_norm": 5.686848163604736, "learning_rate": 9.992088951712207e-06, "loss": 3.0528, "step": 144000 }, { "epoch": 0.03760204313126451, "grad_norm": 6.806542873382568, "learning_rate": 9.992041876840007e-06, "loss": 3.0256, "step": 144200 }, { "epoch": 0.03765419575696668, "grad_norm": 6.5922088623046875, "learning_rate": 9.991994662434478e-06, "loss": 3.046, "step": 144400 }, { "epoch": 0.03770634838266885, "grad_norm": 6.281486988067627, "learning_rate": 9.991947308496945e-06, "loss": 3.0209, "step": 144600 }, { "epoch": 0.03775850100837102, "grad_norm": 6.36431360244751, "learning_rate": 9.991899815028728e-06, "loss": 2.9928, "step": 144800 }, { "epoch": 0.03781065363407319, "grad_norm": 6.694343090057373, "learning_rate": 9.991852182031153e-06, "loss": 3.0287, "step": 145000 }, { "epoch": 0.03786280625977536, "grad_norm": 6.216729164123535, "learning_rate": 9.991804409505557e-06, "loss": 3.0095, "step": 145200 }, { "epoch": 0.03791495888547753, "grad_norm": 6.089059352874756, "learning_rate": 9.99175649745327e-06, "loss": 3.0268, "step": 145400 }, { "epoch": 0.0379671115111797, "grad_norm": 5.32235050201416, "learning_rate": 9.991708445875633e-06, "loss": 3.066, "step": 145600 }, { "epoch": 0.03801926413688187, "grad_norm": 7.004727363586426, "learning_rate": 9.991660254773989e-06, "loss": 3.0154, "step": 145800 }, { "epoch": 0.038071416762584034, "grad_norm": 5.930303573608398, "learning_rate": 9.991611924149687e-06, "loss": 3.0313, "step": 146000 }, { "epoch": 0.038123569388286205, "grad_norm": 6.0669145584106445, "learning_rate": 9.991563454004076e-06, "loss": 3.0479, "step": 146200 }, { "epoch": 0.038175722013988375, "grad_norm": 6.367830276489258, "learning_rate": 9.991514844338509e-06, "loss": 3.0386, "step": 146400 }, { "epoch": 0.038227874639690546, "grad_norm": 6.8309221267700195, "learning_rate": 9.991466095154348e-06, "loss": 3.0073, "step": 146600 }, { "epoch": 0.038280027265392716, "grad_norm": 5.97960090637207, "learning_rate": 9.991417206452953e-06, "loss": 3.0035, "step": 146800 }, { "epoch": 0.038332179891094886, "grad_norm": 6.310183048248291, "learning_rate": 9.991368178235695e-06, "loss": 3.0623, "step": 147000 }, { "epoch": 0.03838433251679706, "grad_norm": 5.791366100311279, "learning_rate": 9.991319010503938e-06, "loss": 2.9937, "step": 147200 }, { "epoch": 0.03843648514249923, "grad_norm": 6.281528949737549, "learning_rate": 9.991269703259061e-06, "loss": 3.0342, "step": 147400 }, { "epoch": 0.0384886377682014, "grad_norm": 6.502980709075928, "learning_rate": 9.99122025650244e-06, "loss": 3.0311, "step": 147600 }, { "epoch": 0.03854079039390357, "grad_norm": 6.228078842163086, "learning_rate": 9.991170670235456e-06, "loss": 3.0259, "step": 147800 }, { "epoch": 0.03859294301960574, "grad_norm": 6.6315083503723145, "learning_rate": 9.9911209444595e-06, "loss": 3.0557, "step": 148000 }, { "epoch": 0.03864509564530791, "grad_norm": 5.9421916007995605, "learning_rate": 9.991071079175958e-06, "loss": 3.0471, "step": 148200 }, { "epoch": 0.03869724827101008, "grad_norm": 5.794018745422363, "learning_rate": 9.991021074386222e-06, "loss": 3.0512, "step": 148400 }, { "epoch": 0.03874940089671225, "grad_norm": 6.507724761962891, "learning_rate": 9.990970930091695e-06, "loss": 3.0823, "step": 148600 }, { "epoch": 0.03880155352241441, "grad_norm": 6.405786514282227, "learning_rate": 9.990920646293773e-06, "loss": 2.9923, "step": 148800 }, { "epoch": 0.03885370614811658, "grad_norm": 6.925950527191162, "learning_rate": 9.990870222993867e-06, "loss": 3.003, "step": 149000 }, { "epoch": 0.038905858773818754, "grad_norm": 6.7786030769348145, "learning_rate": 9.990819660193383e-06, "loss": 2.9691, "step": 149200 }, { "epoch": 0.038958011399520924, "grad_norm": 6.323172569274902, "learning_rate": 9.990768957893732e-06, "loss": 3.0457, "step": 149400 }, { "epoch": 0.039010164025223094, "grad_norm": 5.784226894378662, "learning_rate": 9.990718116096336e-06, "loss": 3.0644, "step": 149600 }, { "epoch": 0.039062316650925265, "grad_norm": 6.236383438110352, "learning_rate": 9.990667134802616e-06, "loss": 3.0147, "step": 149800 }, { "epoch": 0.039114469276627435, "grad_norm": 6.505099773406982, "learning_rate": 9.990616014013992e-06, "loss": 3.0315, "step": 150000 }, { "epoch": 0.039166621902329606, "grad_norm": 5.979858875274658, "learning_rate": 9.990564753731898e-06, "loss": 2.9863, "step": 150200 }, { "epoch": 0.039218774528031776, "grad_norm": 6.28549861907959, "learning_rate": 9.990513353957765e-06, "loss": 3.0117, "step": 150400 }, { "epoch": 0.039270927153733946, "grad_norm": 6.456376552581787, "learning_rate": 9.990461814693028e-06, "loss": 3.0182, "step": 150600 }, { "epoch": 0.03932307977943612, "grad_norm": 6.814133644104004, "learning_rate": 9.99041013593913e-06, "loss": 2.9962, "step": 150800 }, { "epoch": 0.03937523240513829, "grad_norm": 5.923751354217529, "learning_rate": 9.990358317697513e-06, "loss": 2.9773, "step": 151000 }, { "epoch": 0.03942738503084046, "grad_norm": 6.386137962341309, "learning_rate": 9.990306359969629e-06, "loss": 3.0187, "step": 151200 }, { "epoch": 0.03947953765654263, "grad_norm": 6.146199703216553, "learning_rate": 9.990254262756926e-06, "loss": 3.0153, "step": 151400 }, { "epoch": 0.0395316902822448, "grad_norm": 6.410252571105957, "learning_rate": 9.990202026060864e-06, "loss": 2.9887, "step": 151600 }, { "epoch": 0.03958384290794696, "grad_norm": 5.882859230041504, "learning_rate": 9.990149649882902e-06, "loss": 3.026, "step": 151800 }, { "epoch": 0.03963599553364913, "grad_norm": 6.177847862243652, "learning_rate": 9.990097134224503e-06, "loss": 2.9983, "step": 152000 }, { "epoch": 0.0396881481593513, "grad_norm": 6.150598049163818, "learning_rate": 9.990044479087134e-06, "loss": 2.9855, "step": 152200 }, { "epoch": 0.03974030078505347, "grad_norm": 6.874467372894287, "learning_rate": 9.989991684472269e-06, "loss": 3.0343, "step": 152400 }, { "epoch": 0.03979245341075564, "grad_norm": 6.260432720184326, "learning_rate": 9.989938750381383e-06, "loss": 3.0507, "step": 152600 }, { "epoch": 0.039844606036457814, "grad_norm": 6.114156246185303, "learning_rate": 9.989885676815955e-06, "loss": 3.0064, "step": 152800 }, { "epoch": 0.039896758662159984, "grad_norm": 6.27534294128418, "learning_rate": 9.989832463777469e-06, "loss": 2.9889, "step": 153000 }, { "epoch": 0.039948911287862154, "grad_norm": 6.149242877960205, "learning_rate": 9.989779111267411e-06, "loss": 2.9928, "step": 153200 }, { "epoch": 0.040001063913564325, "grad_norm": 6.631002902984619, "learning_rate": 9.989725619287276e-06, "loss": 2.9901, "step": 153400 }, { "epoch": 0.040053216539266495, "grad_norm": 6.320735454559326, "learning_rate": 9.989671987838554e-06, "loss": 3.0054, "step": 153600 }, { "epoch": 0.040105369164968666, "grad_norm": 6.417031764984131, "learning_rate": 9.989618216922747e-06, "loss": 3.0356, "step": 153800 }, { "epoch": 0.040157521790670836, "grad_norm": 5.854264736175537, "learning_rate": 9.989564306541359e-06, "loss": 2.9719, "step": 154000 }, { "epoch": 0.040209674416373006, "grad_norm": 6.106746196746826, "learning_rate": 9.989510256695893e-06, "loss": 3.0148, "step": 154200 }, { "epoch": 0.04026182704207518, "grad_norm": 6.044825553894043, "learning_rate": 9.989456067387864e-06, "loss": 2.9664, "step": 154400 }, { "epoch": 0.04031397966777735, "grad_norm": 6.020196437835693, "learning_rate": 9.989401738618785e-06, "loss": 2.9737, "step": 154600 }, { "epoch": 0.04036613229347951, "grad_norm": 5.970254898071289, "learning_rate": 9.989347270390174e-06, "loss": 3.0247, "step": 154800 }, { "epoch": 0.04041828491918168, "grad_norm": 6.415491104125977, "learning_rate": 9.989292662703554e-06, "loss": 3.0039, "step": 155000 }, { "epoch": 0.04047043754488385, "grad_norm": 6.5930094718933105, "learning_rate": 9.98923791556045e-06, "loss": 2.9937, "step": 155200 }, { "epoch": 0.04052259017058602, "grad_norm": 6.674596786499023, "learning_rate": 9.989183028962395e-06, "loss": 3.0017, "step": 155400 }, { "epoch": 0.04057474279628819, "grad_norm": 6.060914039611816, "learning_rate": 9.989128002910922e-06, "loss": 3.0152, "step": 155600 }, { "epoch": 0.04062689542199036, "grad_norm": 5.70841121673584, "learning_rate": 9.989072837407567e-06, "loss": 3.0362, "step": 155800 }, { "epoch": 0.04067904804769253, "grad_norm": 5.961905479431152, "learning_rate": 9.989017532453876e-06, "loss": 2.9848, "step": 156000 }, { "epoch": 0.0407312006733947, "grad_norm": 5.478226184844971, "learning_rate": 9.988962088051389e-06, "loss": 2.9478, "step": 156200 }, { "epoch": 0.040783353299096874, "grad_norm": 6.810139179229736, "learning_rate": 9.98890650420166e-06, "loss": 3.0032, "step": 156400 }, { "epoch": 0.040835505924799044, "grad_norm": 6.793480396270752, "learning_rate": 9.988850780906242e-06, "loss": 2.9757, "step": 156600 }, { "epoch": 0.040887658550501214, "grad_norm": 6.606081962585449, "learning_rate": 9.988794918166695e-06, "loss": 3.0407, "step": 156800 }, { "epoch": 0.040939811176203385, "grad_norm": 6.329009532928467, "learning_rate": 9.988738915984575e-06, "loss": 2.9683, "step": 157000 }, { "epoch": 0.040991963801905555, "grad_norm": 6.828449249267578, "learning_rate": 9.988682774361451e-06, "loss": 3.0212, "step": 157200 }, { "epoch": 0.041044116427607726, "grad_norm": 7.731756210327148, "learning_rate": 9.98862649329889e-06, "loss": 2.9905, "step": 157400 }, { "epoch": 0.041096269053309896, "grad_norm": 6.351221561431885, "learning_rate": 9.98857007279847e-06, "loss": 2.9857, "step": 157600 }, { "epoch": 0.04114842167901206, "grad_norm": 6.527481555938721, "learning_rate": 9.988513512861761e-06, "loss": 2.9985, "step": 157800 }, { "epoch": 0.04120057430471423, "grad_norm": 6.648956298828125, "learning_rate": 9.988456813490348e-06, "loss": 2.972, "step": 158000 }, { "epoch": 0.0412527269304164, "grad_norm": 6.268485069274902, "learning_rate": 9.988399974685815e-06, "loss": 2.9893, "step": 158200 }, { "epoch": 0.04130487955611857, "grad_norm": 6.1307806968688965, "learning_rate": 9.988342996449751e-06, "loss": 2.9647, "step": 158400 }, { "epoch": 0.04135703218182074, "grad_norm": 7.105797290802002, "learning_rate": 9.988285878783748e-06, "loss": 2.9828, "step": 158600 }, { "epoch": 0.04140918480752291, "grad_norm": 6.388288974761963, "learning_rate": 9.988228621689403e-06, "loss": 3.0058, "step": 158800 }, { "epoch": 0.04146133743322508, "grad_norm": 6.2273850440979, "learning_rate": 9.988171225168318e-06, "loss": 2.9956, "step": 159000 }, { "epoch": 0.04151349005892725, "grad_norm": 6.558915138244629, "learning_rate": 9.988113689222094e-06, "loss": 2.9879, "step": 159200 }, { "epoch": 0.04156564268462942, "grad_norm": 5.836716651916504, "learning_rate": 9.988056013852343e-06, "loss": 2.9391, "step": 159400 }, { "epoch": 0.04161779531033159, "grad_norm": 6.490838050842285, "learning_rate": 9.987998199060674e-06, "loss": 3.0215, "step": 159600 }, { "epoch": 0.04166994793603376, "grad_norm": 6.8759589195251465, "learning_rate": 9.987940244848701e-06, "loss": 3.0393, "step": 159800 }, { "epoch": 0.041722100561735934, "grad_norm": 6.900877952575684, "learning_rate": 9.987882151218052e-06, "loss": 2.9726, "step": 160000 }, { "epoch": 0.041774253187438104, "grad_norm": 6.450899124145508, "learning_rate": 9.987823918170343e-06, "loss": 3.0096, "step": 160200 }, { "epoch": 0.041826405813140274, "grad_norm": 6.636816024780273, "learning_rate": 9.987765545707202e-06, "loss": 2.9626, "step": 160400 }, { "epoch": 0.04187855843884244, "grad_norm": 6.257472515106201, "learning_rate": 9.987707033830266e-06, "loss": 2.9946, "step": 160600 }, { "epoch": 0.04193071106454461, "grad_norm": 7.453077793121338, "learning_rate": 9.987648382541167e-06, "loss": 3.0467, "step": 160800 }, { "epoch": 0.04198286369024678, "grad_norm": 6.978682041168213, "learning_rate": 9.987589591841545e-06, "loss": 2.9799, "step": 161000 }, { "epoch": 0.04203501631594895, "grad_norm": 6.9115095138549805, "learning_rate": 9.98753066173304e-06, "loss": 3.0193, "step": 161200 }, { "epoch": 0.04208716894165112, "grad_norm": 6.3997392654418945, "learning_rate": 9.987471592217306e-06, "loss": 3.0198, "step": 161400 }, { "epoch": 0.04213932156735329, "grad_norm": 5.600056171417236, "learning_rate": 9.987412383295988e-06, "loss": 3.0084, "step": 161600 }, { "epoch": 0.04219147419305546, "grad_norm": 6.3253984451293945, "learning_rate": 9.987353034970743e-06, "loss": 2.964, "step": 161800 }, { "epoch": 0.04224362681875763, "grad_norm": 7.4514384269714355, "learning_rate": 9.987293547243231e-06, "loss": 3.0007, "step": 162000 }, { "epoch": 0.0422957794444598, "grad_norm": 6.729308605194092, "learning_rate": 9.987233920115114e-06, "loss": 2.9636, "step": 162200 }, { "epoch": 0.04234793207016197, "grad_norm": 6.6917195320129395, "learning_rate": 9.987174153588058e-06, "loss": 2.9993, "step": 162400 }, { "epoch": 0.04240008469586414, "grad_norm": 6.379083156585693, "learning_rate": 9.987114247663734e-06, "loss": 2.9828, "step": 162600 }, { "epoch": 0.04245223732156631, "grad_norm": 7.085773944854736, "learning_rate": 9.987054202343817e-06, "loss": 2.9738, "step": 162800 }, { "epoch": 0.04250438994726848, "grad_norm": 6.782082557678223, "learning_rate": 9.986994017629983e-06, "loss": 2.9808, "step": 163000 }, { "epoch": 0.04255654257297065, "grad_norm": 5.9306182861328125, "learning_rate": 9.986933693523919e-06, "loss": 2.981, "step": 163200 }, { "epoch": 0.04260869519867282, "grad_norm": 6.100393295288086, "learning_rate": 9.986873230027305e-06, "loss": 2.9452, "step": 163400 }, { "epoch": 0.04266084782437499, "grad_norm": 6.214852809906006, "learning_rate": 9.986812627141836e-06, "loss": 2.9452, "step": 163600 }, { "epoch": 0.04271300045007716, "grad_norm": 6.460691928863525, "learning_rate": 9.986751884869204e-06, "loss": 2.9781, "step": 163800 }, { "epoch": 0.04276515307577933, "grad_norm": 5.98699951171875, "learning_rate": 9.986691003211106e-06, "loss": 2.9453, "step": 164000 }, { "epoch": 0.0428173057014815, "grad_norm": 6.0534281730651855, "learning_rate": 9.986629982169244e-06, "loss": 2.978, "step": 164200 }, { "epoch": 0.04286945832718367, "grad_norm": 6.20578145980835, "learning_rate": 9.986568821745327e-06, "loss": 2.9576, "step": 164400 }, { "epoch": 0.04292161095288584, "grad_norm": 6.547789573669434, "learning_rate": 9.986507521941058e-06, "loss": 3.0062, "step": 164600 }, { "epoch": 0.04297376357858801, "grad_norm": 6.358675956726074, "learning_rate": 9.986446082758157e-06, "loss": 2.9771, "step": 164800 }, { "epoch": 0.04302591620429018, "grad_norm": 6.335031509399414, "learning_rate": 9.986384504198336e-06, "loss": 2.9796, "step": 165000 }, { "epoch": 0.04307806882999235, "grad_norm": 6.8711090087890625, "learning_rate": 9.98632278626332e-06, "loss": 2.9605, "step": 165200 }, { "epoch": 0.04313022145569452, "grad_norm": 5.9179911613464355, "learning_rate": 9.986260928954833e-06, "loss": 2.9654, "step": 165400 }, { "epoch": 0.04318237408139669, "grad_norm": 6.011270046234131, "learning_rate": 9.986198932274601e-06, "loss": 2.9727, "step": 165600 }, { "epoch": 0.04323452670709886, "grad_norm": 6.755000591278076, "learning_rate": 9.986136796224363e-06, "loss": 2.9796, "step": 165800 }, { "epoch": 0.04328667933280103, "grad_norm": 6.37379264831543, "learning_rate": 9.986074520805853e-06, "loss": 3.0062, "step": 166000 }, { "epoch": 0.0433388319585032, "grad_norm": 6.6591877937316895, "learning_rate": 9.986012106020808e-06, "loss": 2.9866, "step": 166200 }, { "epoch": 0.04339098458420537, "grad_norm": 6.0572614669799805, "learning_rate": 9.985949551870977e-06, "loss": 3.014, "step": 166400 }, { "epoch": 0.043443137209907536, "grad_norm": 5.694347381591797, "learning_rate": 9.985886858358105e-06, "loss": 2.9979, "step": 166600 }, { "epoch": 0.043495289835609706, "grad_norm": 6.983020782470703, "learning_rate": 9.98582402548395e-06, "loss": 2.9455, "step": 166800 }, { "epoch": 0.043547442461311876, "grad_norm": 5.710437297821045, "learning_rate": 9.985761053250261e-06, "loss": 2.9718, "step": 167000 }, { "epoch": 0.04359959508701405, "grad_norm": 6.083151817321777, "learning_rate": 9.985697941658803e-06, "loss": 2.9651, "step": 167200 }, { "epoch": 0.04365174771271622, "grad_norm": 6.084566116333008, "learning_rate": 9.985634690711339e-06, "loss": 2.9767, "step": 167400 }, { "epoch": 0.04370390033841839, "grad_norm": 5.805023193359375, "learning_rate": 9.985571300409637e-06, "loss": 2.9637, "step": 167600 }, { "epoch": 0.04375605296412056, "grad_norm": 6.559878826141357, "learning_rate": 9.985507770755469e-06, "loss": 2.9828, "step": 167800 }, { "epoch": 0.04380820558982273, "grad_norm": 6.1664347648620605, "learning_rate": 9.98544410175061e-06, "loss": 2.967, "step": 168000 }, { "epoch": 0.0438603582155249, "grad_norm": 6.0472564697265625, "learning_rate": 9.985380293396839e-06, "loss": 2.9384, "step": 168200 }, { "epoch": 0.04391251084122707, "grad_norm": 6.2360029220581055, "learning_rate": 9.985316345695941e-06, "loss": 2.9687, "step": 168400 }, { "epoch": 0.04396466346692924, "grad_norm": 6.714407920837402, "learning_rate": 9.985252258649702e-06, "loss": 2.9763, "step": 168600 }, { "epoch": 0.04401681609263141, "grad_norm": 7.117247104644775, "learning_rate": 9.985188032259917e-06, "loss": 2.9749, "step": 168800 }, { "epoch": 0.04406896871833358, "grad_norm": 5.8086981773376465, "learning_rate": 9.985123666528376e-06, "loss": 2.9501, "step": 169000 }, { "epoch": 0.04412112134403575, "grad_norm": 6.109192848205566, "learning_rate": 9.98505916145688e-06, "loss": 3.0017, "step": 169200 }, { "epoch": 0.044173273969737914, "grad_norm": 6.363615989685059, "learning_rate": 9.984994517047234e-06, "loss": 2.9637, "step": 169400 }, { "epoch": 0.044225426595440084, "grad_norm": 5.879502773284912, "learning_rate": 9.984929733301243e-06, "loss": 2.9474, "step": 169600 }, { "epoch": 0.044277579221142255, "grad_norm": 6.141928672790527, "learning_rate": 9.984864810220717e-06, "loss": 2.982, "step": 169800 }, { "epoch": 0.044329731846844425, "grad_norm": 6.232560157775879, "learning_rate": 9.984799747807473e-06, "loss": 2.9348, "step": 170000 }, { "epoch": 0.044381884472546596, "grad_norm": 6.826907634735107, "learning_rate": 9.984734546063328e-06, "loss": 3.0119, "step": 170200 }, { "epoch": 0.044434037098248766, "grad_norm": 6.180233955383301, "learning_rate": 9.984669204990104e-06, "loss": 2.9685, "step": 170400 }, { "epoch": 0.044486189723950936, "grad_norm": 6.817476272583008, "learning_rate": 9.98460372458963e-06, "loss": 2.9706, "step": 170600 }, { "epoch": 0.04453834234965311, "grad_norm": 6.815362930297852, "learning_rate": 9.984538104863732e-06, "loss": 2.9585, "step": 170800 }, { "epoch": 0.04459049497535528, "grad_norm": 6.279226779937744, "learning_rate": 9.98447234581425e-06, "loss": 2.9819, "step": 171000 }, { "epoch": 0.04464264760105745, "grad_norm": 5.946432590484619, "learning_rate": 9.984406447443015e-06, "loss": 2.92, "step": 171200 }, { "epoch": 0.04469480022675962, "grad_norm": 6.094913482666016, "learning_rate": 9.984340409751875e-06, "loss": 2.9627, "step": 171400 }, { "epoch": 0.04474695285246179, "grad_norm": 6.792775630950928, "learning_rate": 9.984274232742673e-06, "loss": 2.9564, "step": 171600 }, { "epoch": 0.04479910547816396, "grad_norm": 6.441338062286377, "learning_rate": 9.984207916417257e-06, "loss": 2.9794, "step": 171800 }, { "epoch": 0.04485125810386613, "grad_norm": 6.825250625610352, "learning_rate": 9.984141460777484e-06, "loss": 2.9504, "step": 172000 }, { "epoch": 0.0449034107295683, "grad_norm": 6.110622882843018, "learning_rate": 9.98407486582521e-06, "loss": 2.9836, "step": 172200 }, { "epoch": 0.04495556335527046, "grad_norm": 7.110909461975098, "learning_rate": 9.984008131562299e-06, "loss": 2.9565, "step": 172400 }, { "epoch": 0.04500771598097263, "grad_norm": 6.6831488609313965, "learning_rate": 9.983941257990611e-06, "loss": 2.9093, "step": 172600 }, { "epoch": 0.045059868606674804, "grad_norm": 6.000646114349365, "learning_rate": 9.983874245112019e-06, "loss": 2.9815, "step": 172800 }, { "epoch": 0.045112021232376974, "grad_norm": 5.678177356719971, "learning_rate": 9.983807092928396e-06, "loss": 3.0084, "step": 173000 }, { "epoch": 0.045164173858079144, "grad_norm": 5.743597030639648, "learning_rate": 9.983739801441618e-06, "loss": 2.937, "step": 173200 }, { "epoch": 0.045216326483781315, "grad_norm": 6.317743301391602, "learning_rate": 9.983672370653565e-06, "loss": 2.9439, "step": 173400 }, { "epoch": 0.045268479109483485, "grad_norm": 6.100225448608398, "learning_rate": 9.983604800566124e-06, "loss": 2.9455, "step": 173600 }, { "epoch": 0.045320631735185656, "grad_norm": 5.574644088745117, "learning_rate": 9.98353709118118e-06, "loss": 2.9886, "step": 173800 }, { "epoch": 0.045372784360887826, "grad_norm": 6.011674880981445, "learning_rate": 9.983469242500632e-06, "loss": 2.9362, "step": 174000 }, { "epoch": 0.045424936986589996, "grad_norm": 6.598609447479248, "learning_rate": 9.98340125452637e-06, "loss": 2.9851, "step": 174200 }, { "epoch": 0.04547708961229217, "grad_norm": 7.741209030151367, "learning_rate": 9.983333127260299e-06, "loss": 2.917, "step": 174400 }, { "epoch": 0.04552924223799434, "grad_norm": 6.492852687835693, "learning_rate": 9.98326486070432e-06, "loss": 2.936, "step": 174600 }, { "epoch": 0.04558139486369651, "grad_norm": 6.744225978851318, "learning_rate": 9.98319645486034e-06, "loss": 2.953, "step": 174800 }, { "epoch": 0.04563354748939868, "grad_norm": 6.86396598815918, "learning_rate": 9.983127909730275e-06, "loss": 2.9551, "step": 175000 }, { "epoch": 0.04568570011510085, "grad_norm": 6.352712631225586, "learning_rate": 9.983059225316042e-06, "loss": 2.9466, "step": 175200 }, { "epoch": 0.04573785274080301, "grad_norm": 6.346860408782959, "learning_rate": 9.982990401619554e-06, "loss": 2.9497, "step": 175400 }, { "epoch": 0.04579000536650518, "grad_norm": 7.074236869812012, "learning_rate": 9.982921438642739e-06, "loss": 2.9437, "step": 175600 }, { "epoch": 0.04584215799220735, "grad_norm": 5.964405536651611, "learning_rate": 9.982852336387525e-06, "loss": 2.9576, "step": 175800 }, { "epoch": 0.04589431061790952, "grad_norm": 6.961352348327637, "learning_rate": 9.982783094855844e-06, "loss": 2.9587, "step": 176000 }, { "epoch": 0.04594646324361169, "grad_norm": 6.538314342498779, "learning_rate": 9.982713714049627e-06, "loss": 2.996, "step": 176200 }, { "epoch": 0.045998615869313864, "grad_norm": 5.937180519104004, "learning_rate": 9.98264419397082e-06, "loss": 2.9536, "step": 176400 }, { "epoch": 0.046050768495016034, "grad_norm": 5.624795913696289, "learning_rate": 9.98257453462136e-06, "loss": 2.9593, "step": 176600 }, { "epoch": 0.046102921120718204, "grad_norm": 6.610598087310791, "learning_rate": 9.982504736003198e-06, "loss": 2.9338, "step": 176800 }, { "epoch": 0.046155073746420375, "grad_norm": 6.419288158416748, "learning_rate": 9.982434798118283e-06, "loss": 2.9388, "step": 177000 }, { "epoch": 0.046207226372122545, "grad_norm": 6.405141830444336, "learning_rate": 9.982364720968572e-06, "loss": 2.9473, "step": 177200 }, { "epoch": 0.046259378997824716, "grad_norm": 6.353682994842529, "learning_rate": 9.98229450455602e-06, "loss": 2.9132, "step": 177400 }, { "epoch": 0.046311531623526886, "grad_norm": 5.974079608917236, "learning_rate": 9.982224148882593e-06, "loss": 2.9748, "step": 177600 }, { "epoch": 0.046363684249229056, "grad_norm": 5.714940071105957, "learning_rate": 9.982153653950254e-06, "loss": 2.9446, "step": 177800 }, { "epoch": 0.04641583687493123, "grad_norm": 6.13021183013916, "learning_rate": 9.982083019760978e-06, "loss": 2.9442, "step": 178000 }, { "epoch": 0.0464679895006334, "grad_norm": 6.623483180999756, "learning_rate": 9.982012246316737e-06, "loss": 2.9269, "step": 178200 }, { "epoch": 0.04652014212633556, "grad_norm": 6.2891364097595215, "learning_rate": 9.981941333619509e-06, "loss": 2.9979, "step": 178400 }, { "epoch": 0.04657229475203773, "grad_norm": 6.528567314147949, "learning_rate": 9.981870281671277e-06, "loss": 2.9436, "step": 178600 }, { "epoch": 0.0466244473777399, "grad_norm": 6.393155574798584, "learning_rate": 9.981799090474024e-06, "loss": 2.9763, "step": 178800 }, { "epoch": 0.04667660000344207, "grad_norm": 6.487037181854248, "learning_rate": 9.981727760029745e-06, "loss": 2.9561, "step": 179000 }, { "epoch": 0.04672875262914424, "grad_norm": 5.926519393920898, "learning_rate": 9.981656290340429e-06, "loss": 2.9715, "step": 179200 }, { "epoch": 0.04678090525484641, "grad_norm": 6.406266689300537, "learning_rate": 9.981584681408076e-06, "loss": 2.9343, "step": 179400 }, { "epoch": 0.04683305788054858, "grad_norm": 6.141971588134766, "learning_rate": 9.981512933234688e-06, "loss": 2.9437, "step": 179600 }, { "epoch": 0.04688521050625075, "grad_norm": 5.708194732666016, "learning_rate": 9.98144104582227e-06, "loss": 2.927, "step": 179800 }, { "epoch": 0.046937363131952924, "grad_norm": 6.583145618438721, "learning_rate": 9.981369019172829e-06, "loss": 2.9649, "step": 180000 }, { "epoch": 0.046989515757655094, "grad_norm": 6.8080363273620605, "learning_rate": 9.981296853288382e-06, "loss": 2.9718, "step": 180200 }, { "epoch": 0.047041668383357264, "grad_norm": 5.960728645324707, "learning_rate": 9.981224548170942e-06, "loss": 2.9552, "step": 180400 }, { "epoch": 0.047093821009059435, "grad_norm": 6.71892786026001, "learning_rate": 9.981152103822535e-06, "loss": 2.878, "step": 180600 }, { "epoch": 0.047145973634761605, "grad_norm": 6.228333950042725, "learning_rate": 9.98107952024518e-06, "loss": 2.9596, "step": 180800 }, { "epoch": 0.047198126260463776, "grad_norm": 6.656292915344238, "learning_rate": 9.981006797440913e-06, "loss": 2.9251, "step": 181000 }, { "epoch": 0.04725027888616594, "grad_norm": 6.142068386077881, "learning_rate": 9.98093393541176e-06, "loss": 2.9004, "step": 181200 }, { "epoch": 0.04730243151186811, "grad_norm": 6.923412799835205, "learning_rate": 9.980860934159761e-06, "loss": 2.9182, "step": 181400 }, { "epoch": 0.04735458413757028, "grad_norm": 7.025545597076416, "learning_rate": 9.980787793686955e-06, "loss": 2.9484, "step": 181600 }, { "epoch": 0.04740673676327245, "grad_norm": 6.994638442993164, "learning_rate": 9.980714513995389e-06, "loss": 2.9093, "step": 181800 }, { "epoch": 0.04745888938897462, "grad_norm": 6.052518367767334, "learning_rate": 9.980641095087107e-06, "loss": 2.9469, "step": 182000 }, { "epoch": 0.04751104201467679, "grad_norm": 6.930753707885742, "learning_rate": 9.980567536964166e-06, "loss": 2.916, "step": 182200 }, { "epoch": 0.04756319464037896, "grad_norm": 5.495729923248291, "learning_rate": 9.980493839628618e-06, "loss": 2.9307, "step": 182400 }, { "epoch": 0.04761534726608113, "grad_norm": 5.72206974029541, "learning_rate": 9.980420003082524e-06, "loss": 2.9098, "step": 182600 }, { "epoch": 0.0476674998917833, "grad_norm": 6.242524147033691, "learning_rate": 9.980346027327948e-06, "loss": 2.889, "step": 182800 }, { "epoch": 0.04771965251748547, "grad_norm": 6.146420955657959, "learning_rate": 9.980271912366959e-06, "loss": 2.942, "step": 183000 }, { "epoch": 0.04777180514318764, "grad_norm": 6.690853595733643, "learning_rate": 9.980197658201627e-06, "loss": 2.9214, "step": 183200 }, { "epoch": 0.04782395776888981, "grad_norm": 5.759744644165039, "learning_rate": 9.98012326483403e-06, "loss": 2.9074, "step": 183400 }, { "epoch": 0.047876110394591984, "grad_norm": 7.025085926055908, "learning_rate": 9.980048732266243e-06, "loss": 2.9566, "step": 183600 }, { "epoch": 0.047928263020294154, "grad_norm": 6.400143623352051, "learning_rate": 9.979974060500353e-06, "loss": 2.9725, "step": 183800 }, { "epoch": 0.047980415645996324, "grad_norm": 6.825561046600342, "learning_rate": 9.979899249538445e-06, "loss": 2.8797, "step": 184000 }, { "epoch": 0.04803256827169849, "grad_norm": 6.444281578063965, "learning_rate": 9.979824299382612e-06, "loss": 2.9194, "step": 184200 }, { "epoch": 0.04808472089740066, "grad_norm": 5.93040132522583, "learning_rate": 9.979749210034948e-06, "loss": 2.9068, "step": 184400 }, { "epoch": 0.04813687352310283, "grad_norm": 5.90882682800293, "learning_rate": 9.97967398149755e-06, "loss": 2.894, "step": 184600 }, { "epoch": 0.048189026148805, "grad_norm": 5.31546688079834, "learning_rate": 9.979598613772523e-06, "loss": 2.9244, "step": 184800 }, { "epoch": 0.04824117877450717, "grad_norm": 6.048388957977295, "learning_rate": 9.979523106861974e-06, "loss": 2.8599, "step": 185000 }, { "epoch": 0.04829333140020934, "grad_norm": 6.296876430511475, "learning_rate": 9.979447460768012e-06, "loss": 2.9213, "step": 185200 }, { "epoch": 0.04834548402591151, "grad_norm": 6.221283435821533, "learning_rate": 9.979371675492753e-06, "loss": 2.9058, "step": 185400 }, { "epoch": 0.04839763665161368, "grad_norm": 7.359024524688721, "learning_rate": 9.979295751038313e-06, "loss": 2.9192, "step": 185600 }, { "epoch": 0.04844978927731585, "grad_norm": 6.607453346252441, "learning_rate": 9.979219687406816e-06, "loss": 3.0031, "step": 185800 }, { "epoch": 0.04850194190301802, "grad_norm": 6.028778553009033, "learning_rate": 9.979143484600387e-06, "loss": 2.9068, "step": 186000 }, { "epoch": 0.04855409452872019, "grad_norm": 5.630479335784912, "learning_rate": 9.979067142621157e-06, "loss": 2.9278, "step": 186200 }, { "epoch": 0.04860624715442236, "grad_norm": 6.049453258514404, "learning_rate": 9.978990661471257e-06, "loss": 2.891, "step": 186400 }, { "epoch": 0.04865839978012453, "grad_norm": 6.467658519744873, "learning_rate": 9.97891404115283e-06, "loss": 2.9207, "step": 186600 }, { "epoch": 0.0487105524058267, "grad_norm": 6.942977428436279, "learning_rate": 9.978837281668013e-06, "loss": 2.9211, "step": 186800 }, { "epoch": 0.04876270503152887, "grad_norm": 6.686459541320801, "learning_rate": 9.978760383018953e-06, "loss": 2.9207, "step": 187000 }, { "epoch": 0.04881485765723104, "grad_norm": 6.126353740692139, "learning_rate": 9.978683345207802e-06, "loss": 2.9331, "step": 187200 }, { "epoch": 0.04886701028293321, "grad_norm": 6.489861488342285, "learning_rate": 9.97860616823671e-06, "loss": 2.9333, "step": 187400 }, { "epoch": 0.04891916290863538, "grad_norm": 6.95447301864624, "learning_rate": 9.978528852107833e-06, "loss": 2.9839, "step": 187600 }, { "epoch": 0.04897131553433755, "grad_norm": 5.979517459869385, "learning_rate": 9.978451396823334e-06, "loss": 2.9585, "step": 187800 }, { "epoch": 0.04902346816003972, "grad_norm": 6.67393684387207, "learning_rate": 9.97837380238538e-06, "loss": 2.9367, "step": 188000 }, { "epoch": 0.04907562078574189, "grad_norm": 5.634337425231934, "learning_rate": 9.978296068796138e-06, "loss": 2.9027, "step": 188200 }, { "epoch": 0.04912777341144406, "grad_norm": 6.696192264556885, "learning_rate": 9.97821819605778e-06, "loss": 2.8881, "step": 188400 }, { "epoch": 0.04917992603714623, "grad_norm": 5.69584321975708, "learning_rate": 9.978140184172482e-06, "loss": 2.9149, "step": 188600 }, { "epoch": 0.0492320786628484, "grad_norm": 6.718496322631836, "learning_rate": 9.978062033142429e-06, "loss": 2.8973, "step": 188800 }, { "epoch": 0.04928423128855057, "grad_norm": 6.430253505706787, "learning_rate": 9.977983742969798e-06, "loss": 2.9127, "step": 189000 }, { "epoch": 0.04933638391425274, "grad_norm": 6.769885063171387, "learning_rate": 9.977905313656785e-06, "loss": 2.9418, "step": 189200 }, { "epoch": 0.04938853653995491, "grad_norm": 6.998106956481934, "learning_rate": 9.977826745205578e-06, "loss": 2.9484, "step": 189400 }, { "epoch": 0.04944068916565708, "grad_norm": 6.269783020019531, "learning_rate": 9.977748037618374e-06, "loss": 2.9273, "step": 189600 }, { "epoch": 0.04949284179135925, "grad_norm": 6.623769283294678, "learning_rate": 9.977669190897372e-06, "loss": 2.9459, "step": 189800 }, { "epoch": 0.04954499441706142, "grad_norm": 6.445021152496338, "learning_rate": 9.977590205044776e-06, "loss": 2.932, "step": 190000 }, { "epoch": 0.049597147042763585, "grad_norm": 6.26726770401001, "learning_rate": 9.977511080062797e-06, "loss": 2.9177, "step": 190200 }, { "epoch": 0.049649299668465756, "grad_norm": 5.96261739730835, "learning_rate": 9.977431815953642e-06, "loss": 2.9476, "step": 190400 }, { "epoch": 0.049701452294167926, "grad_norm": 6.362253189086914, "learning_rate": 9.977352412719528e-06, "loss": 2.9598, "step": 190600 }, { "epoch": 0.0497536049198701, "grad_norm": 6.155995845794678, "learning_rate": 9.977272870362676e-06, "loss": 2.8895, "step": 190800 }, { "epoch": 0.04980575754557227, "grad_norm": 6.854986667633057, "learning_rate": 9.977193188885307e-06, "loss": 2.9139, "step": 191000 }, { "epoch": 0.04985791017127444, "grad_norm": 5.611915588378906, "learning_rate": 9.97711336828965e-06, "loss": 2.9157, "step": 191200 }, { "epoch": 0.04991006279697661, "grad_norm": 6.135472774505615, "learning_rate": 9.977033408577936e-06, "loss": 2.8925, "step": 191400 }, { "epoch": 0.04996221542267878, "grad_norm": 6.853118419647217, "learning_rate": 9.976953309752401e-06, "loss": 2.9362, "step": 191600 }, { "epoch": 0.05001436804838095, "grad_norm": 6.357486248016357, "learning_rate": 9.97687307181528e-06, "loss": 2.9344, "step": 191800 }, { "epoch": 0.05006652067408312, "grad_norm": 6.773046016693115, "learning_rate": 9.97679269476882e-06, "loss": 2.9625, "step": 192000 }, { "epoch": 0.05011867329978529, "grad_norm": 6.14764404296875, "learning_rate": 9.976712178615264e-06, "loss": 2.9519, "step": 192200 }, { "epoch": 0.05017082592548746, "grad_norm": 6.529992580413818, "learning_rate": 9.976631523356866e-06, "loss": 2.9013, "step": 192400 }, { "epoch": 0.05022297855118963, "grad_norm": 5.915500164031982, "learning_rate": 9.976550728995877e-06, "loss": 2.9096, "step": 192600 }, { "epoch": 0.0502751311768918, "grad_norm": 6.39959716796875, "learning_rate": 9.976469795534557e-06, "loss": 2.9478, "step": 192800 }, { "epoch": 0.050327283802593964, "grad_norm": 6.220710277557373, "learning_rate": 9.976388722975169e-06, "loss": 2.9282, "step": 193000 }, { "epoch": 0.050379436428296134, "grad_norm": 5.63620662689209, "learning_rate": 9.976307511319979e-06, "loss": 2.9344, "step": 193200 }, { "epoch": 0.050431589053998305, "grad_norm": 6.4898600578308105, "learning_rate": 9.976226160571254e-06, "loss": 2.9223, "step": 193400 }, { "epoch": 0.050483741679700475, "grad_norm": 5.993746757507324, "learning_rate": 9.97614467073127e-06, "loss": 2.9314, "step": 193600 }, { "epoch": 0.050535894305402645, "grad_norm": 6.51394510269165, "learning_rate": 9.976063041802306e-06, "loss": 2.9471, "step": 193800 }, { "epoch": 0.050588046931104816, "grad_norm": 5.766570091247559, "learning_rate": 9.975981273786643e-06, "loss": 2.954, "step": 194000 }, { "epoch": 0.050640199556806986, "grad_norm": 6.516523838043213, "learning_rate": 9.975899366686564e-06, "loss": 2.9265, "step": 194200 }, { "epoch": 0.05069235218250916, "grad_norm": 6.924788475036621, "learning_rate": 9.975817320504362e-06, "loss": 2.8861, "step": 194400 }, { "epoch": 0.05074450480821133, "grad_norm": 6.611748218536377, "learning_rate": 9.975735135242328e-06, "loss": 2.9391, "step": 194600 }, { "epoch": 0.0507966574339135, "grad_norm": 6.635772228240967, "learning_rate": 9.975652810902759e-06, "loss": 2.9112, "step": 194800 }, { "epoch": 0.05084881005961567, "grad_norm": 6.5964741706848145, "learning_rate": 9.975570347487958e-06, "loss": 2.8749, "step": 195000 }, { "epoch": 0.05090096268531784, "grad_norm": 5.7668986320495605, "learning_rate": 9.975487745000228e-06, "loss": 2.9284, "step": 195200 }, { "epoch": 0.05095311531102001, "grad_norm": 6.671104907989502, "learning_rate": 9.975405003441877e-06, "loss": 2.924, "step": 195400 }, { "epoch": 0.05100526793672218, "grad_norm": 6.551841735839844, "learning_rate": 9.975322122815221e-06, "loss": 2.9267, "step": 195600 }, { "epoch": 0.05105742056242435, "grad_norm": 6.587834358215332, "learning_rate": 9.975239103122576e-06, "loss": 2.9144, "step": 195800 }, { "epoch": 0.05110957318812651, "grad_norm": 6.745659828186035, "learning_rate": 9.97515594436626e-06, "loss": 2.9037, "step": 196000 }, { "epoch": 0.05116172581382868, "grad_norm": 6.943085670471191, "learning_rate": 9.975072646548597e-06, "loss": 2.9468, "step": 196200 }, { "epoch": 0.051213878439530854, "grad_norm": 6.360815525054932, "learning_rate": 9.97498920967192e-06, "loss": 2.9095, "step": 196400 }, { "epoch": 0.051266031065233024, "grad_norm": 6.90670108795166, "learning_rate": 9.974905633738559e-06, "loss": 2.8835, "step": 196600 }, { "epoch": 0.051318183690935194, "grad_norm": 5.816658020019531, "learning_rate": 9.974821918750846e-06, "loss": 2.9313, "step": 196800 }, { "epoch": 0.051370336316637365, "grad_norm": 6.233535289764404, "learning_rate": 9.974738064711125e-06, "loss": 2.9081, "step": 197000 }, { "epoch": 0.051422488942339535, "grad_norm": 6.274383068084717, "learning_rate": 9.97465407162174e-06, "loss": 2.9143, "step": 197200 }, { "epoch": 0.051474641568041705, "grad_norm": 6.0712738037109375, "learning_rate": 9.974569939485038e-06, "loss": 2.9224, "step": 197400 }, { "epoch": 0.051526794193743876, "grad_norm": 6.007919788360596, "learning_rate": 9.974485668303369e-06, "loss": 2.9035, "step": 197600 }, { "epoch": 0.051578946819446046, "grad_norm": 6.441709518432617, "learning_rate": 9.974401258079092e-06, "loss": 2.938, "step": 197800 }, { "epoch": 0.05163109944514822, "grad_norm": 5.86509895324707, "learning_rate": 9.974316708814562e-06, "loss": 2.9291, "step": 198000 }, { "epoch": 0.05168325207085039, "grad_norm": 6.994657039642334, "learning_rate": 9.974232020512144e-06, "loss": 2.8551, "step": 198200 }, { "epoch": 0.05173540469655256, "grad_norm": 6.415815353393555, "learning_rate": 9.974147193174205e-06, "loss": 2.9468, "step": 198400 }, { "epoch": 0.05178755732225473, "grad_norm": 5.555944919586182, "learning_rate": 9.97406222680312e-06, "loss": 2.8942, "step": 198600 }, { "epoch": 0.0518397099479569, "grad_norm": 6.089178085327148, "learning_rate": 9.973977121401258e-06, "loss": 2.9051, "step": 198800 }, { "epoch": 0.05189186257365906, "grad_norm": 6.0938496589660645, "learning_rate": 9.973891876971e-06, "loss": 2.9038, "step": 199000 }, { "epoch": 0.05194401519936123, "grad_norm": 6.132476806640625, "learning_rate": 9.97380649351473e-06, "loss": 2.912, "step": 199200 }, { "epoch": 0.0519961678250634, "grad_norm": 6.650424957275391, "learning_rate": 9.973720971034834e-06, "loss": 2.9345, "step": 199400 }, { "epoch": 0.05204832045076557, "grad_norm": 5.777918338775635, "learning_rate": 9.9736353095337e-06, "loss": 2.9157, "step": 199600 }, { "epoch": 0.05210047307646774, "grad_norm": 6.441890239715576, "learning_rate": 9.973549509013727e-06, "loss": 2.8985, "step": 199800 }, { "epoch": 0.052152625702169914, "grad_norm": 6.862900733947754, "learning_rate": 9.973463569477309e-06, "loss": 2.9326, "step": 200000 }, { "epoch": 0.052204778327872084, "grad_norm": 6.423320770263672, "learning_rate": 9.973377490926848e-06, "loss": 2.8986, "step": 200200 }, { "epoch": 0.052256930953574254, "grad_norm": 6.303603649139404, "learning_rate": 9.973291273364754e-06, "loss": 2.885, "step": 200400 }, { "epoch": 0.052309083579276425, "grad_norm": 6.71627140045166, "learning_rate": 9.973204916793433e-06, "loss": 2.8799, "step": 200600 }, { "epoch": 0.052361236204978595, "grad_norm": 6.168727397918701, "learning_rate": 9.9731184212153e-06, "loss": 2.8999, "step": 200800 }, { "epoch": 0.052413388830680765, "grad_norm": 6.115705966949463, "learning_rate": 9.973031786632773e-06, "loss": 2.9156, "step": 201000 }, { "epoch": 0.052465541456382936, "grad_norm": 5.8287200927734375, "learning_rate": 9.972945013048275e-06, "loss": 2.8945, "step": 201200 }, { "epoch": 0.052517694082085106, "grad_norm": 6.276069164276123, "learning_rate": 9.972858100464229e-06, "loss": 2.9383, "step": 201400 }, { "epoch": 0.05256984670778728, "grad_norm": 5.857385635375977, "learning_rate": 9.972771048883065e-06, "loss": 2.895, "step": 201600 }, { "epoch": 0.05262199933348944, "grad_norm": 7.052547931671143, "learning_rate": 9.972683858307217e-06, "loss": 2.8879, "step": 201800 }, { "epoch": 0.05267415195919161, "grad_norm": 6.282922267913818, "learning_rate": 9.97259652873912e-06, "loss": 2.8945, "step": 202000 }, { "epoch": 0.05272630458489378, "grad_norm": 6.320275783538818, "learning_rate": 9.972509060181218e-06, "loss": 2.9122, "step": 202200 }, { "epoch": 0.05277845721059595, "grad_norm": 5.877612113952637, "learning_rate": 9.972421452635954e-06, "loss": 2.8862, "step": 202400 }, { "epoch": 0.05283060983629812, "grad_norm": 6.659661769866943, "learning_rate": 9.972333706105777e-06, "loss": 2.9418, "step": 202600 }, { "epoch": 0.05288276246200029, "grad_norm": 6.702484130859375, "learning_rate": 9.972245820593138e-06, "loss": 2.9173, "step": 202800 }, { "epoch": 0.05293491508770246, "grad_norm": 6.761118412017822, "learning_rate": 9.972157796100497e-06, "loss": 2.9208, "step": 203000 }, { "epoch": 0.05298706771340463, "grad_norm": 5.8187642097473145, "learning_rate": 9.972069632630312e-06, "loss": 2.8982, "step": 203200 }, { "epoch": 0.0530392203391068, "grad_norm": 6.023776531219482, "learning_rate": 9.971981330185047e-06, "loss": 2.9081, "step": 203400 }, { "epoch": 0.053091372964808974, "grad_norm": 6.4479451179504395, "learning_rate": 9.971892888767172e-06, "loss": 2.9146, "step": 203600 }, { "epoch": 0.053143525590511144, "grad_norm": 6.822802543640137, "learning_rate": 9.971804308379156e-06, "loss": 2.8861, "step": 203800 }, { "epoch": 0.053195678216213314, "grad_norm": 6.120795726776123, "learning_rate": 9.971715589023478e-06, "loss": 2.8868, "step": 204000 }, { "epoch": 0.053247830841915485, "grad_norm": 5.959011077880859, "learning_rate": 9.971626730702617e-06, "loss": 2.886, "step": 204200 }, { "epoch": 0.053299983467617655, "grad_norm": 5.855833530426025, "learning_rate": 9.971537733419057e-06, "loss": 2.9224, "step": 204400 }, { "epoch": 0.053352136093319825, "grad_norm": 6.570242881774902, "learning_rate": 9.971448597175284e-06, "loss": 2.9392, "step": 204600 }, { "epoch": 0.05340428871902199, "grad_norm": 5.748819828033447, "learning_rate": 9.97135932197379e-06, "loss": 2.8955, "step": 204800 }, { "epoch": 0.05345644134472416, "grad_norm": 6.4593000411987305, "learning_rate": 9.97126990781707e-06, "loss": 2.878, "step": 205000 }, { "epoch": 0.05350859397042633, "grad_norm": 5.999743461608887, "learning_rate": 9.971180354707627e-06, "loss": 2.9366, "step": 205200 }, { "epoch": 0.0535607465961285, "grad_norm": 6.419029235839844, "learning_rate": 9.97109066264796e-06, "loss": 2.9191, "step": 205400 }, { "epoch": 0.05361289922183067, "grad_norm": 6.093469142913818, "learning_rate": 9.971000831640576e-06, "loss": 2.925, "step": 205600 }, { "epoch": 0.05366505184753284, "grad_norm": 6.597023010253906, "learning_rate": 9.970910861687988e-06, "loss": 2.905, "step": 205800 }, { "epoch": 0.05371720447323501, "grad_norm": 6.599836349487305, "learning_rate": 9.97082075279271e-06, "loss": 2.8901, "step": 206000 }, { "epoch": 0.05376935709893718, "grad_norm": 7.024128437042236, "learning_rate": 9.970730504957258e-06, "loss": 2.8819, "step": 206200 }, { "epoch": 0.05382150972463935, "grad_norm": 5.909964561462402, "learning_rate": 9.970640118184158e-06, "loss": 2.9062, "step": 206400 }, { "epoch": 0.05387366235034152, "grad_norm": 6.118581771850586, "learning_rate": 9.970549592475936e-06, "loss": 2.9099, "step": 206600 }, { "epoch": 0.05392581497604369, "grad_norm": 5.8928327560424805, "learning_rate": 9.970458927835122e-06, "loss": 2.8966, "step": 206800 }, { "epoch": 0.05397796760174586, "grad_norm": 6.539867877960205, "learning_rate": 9.970368124264249e-06, "loss": 2.9375, "step": 207000 }, { "epoch": 0.054030120227448034, "grad_norm": 6.851650714874268, "learning_rate": 9.970277181765858e-06, "loss": 2.863, "step": 207200 }, { "epoch": 0.054082272853150204, "grad_norm": 6.393698215484619, "learning_rate": 9.970186100342486e-06, "loss": 2.9317, "step": 207400 }, { "epoch": 0.054134425478852374, "grad_norm": 5.992748260498047, "learning_rate": 9.970094879996683e-06, "loss": 2.8793, "step": 207600 }, { "epoch": 0.05418657810455454, "grad_norm": 5.8636298179626465, "learning_rate": 9.970003520730997e-06, "loss": 2.925, "step": 207800 }, { "epoch": 0.05423873073025671, "grad_norm": 6.160617351531982, "learning_rate": 9.96991202254798e-06, "loss": 2.9411, "step": 208000 }, { "epoch": 0.05429088335595888, "grad_norm": 7.532545566558838, "learning_rate": 9.969820385450195e-06, "loss": 2.8771, "step": 208200 }, { "epoch": 0.05434303598166105, "grad_norm": 6.879082202911377, "learning_rate": 9.969728609440197e-06, "loss": 2.9334, "step": 208400 }, { "epoch": 0.05439518860736322, "grad_norm": 6.399229049682617, "learning_rate": 9.969636694520556e-06, "loss": 2.8777, "step": 208600 }, { "epoch": 0.05444734123306539, "grad_norm": 6.303351402282715, "learning_rate": 9.969544640693838e-06, "loss": 2.8949, "step": 208800 }, { "epoch": 0.05449949385876756, "grad_norm": 6.815507888793945, "learning_rate": 9.969452447962617e-06, "loss": 2.876, "step": 209000 }, { "epoch": 0.05455164648446973, "grad_norm": 6.317996978759766, "learning_rate": 9.969360116329472e-06, "loss": 2.8672, "step": 209200 }, { "epoch": 0.0546037991101719, "grad_norm": 6.783117294311523, "learning_rate": 9.96926764579698e-06, "loss": 2.8434, "step": 209400 }, { "epoch": 0.05465595173587407, "grad_norm": 6.445932865142822, "learning_rate": 9.969175036367728e-06, "loss": 2.9331, "step": 209600 }, { "epoch": 0.05470810436157624, "grad_norm": 6.53240966796875, "learning_rate": 9.969082288044304e-06, "loss": 2.9213, "step": 209800 }, { "epoch": 0.05476025698727841, "grad_norm": 6.389587879180908, "learning_rate": 9.968989400829301e-06, "loss": 2.9206, "step": 210000 }, { "epoch": 0.05481240961298058, "grad_norm": 6.929988384246826, "learning_rate": 9.968896374725314e-06, "loss": 2.8684, "step": 210200 }, { "epoch": 0.05486456223868275, "grad_norm": 6.338830471038818, "learning_rate": 9.968803209734944e-06, "loss": 2.8601, "step": 210400 }, { "epoch": 0.05491671486438492, "grad_norm": 6.379602909088135, "learning_rate": 9.968709905860796e-06, "loss": 2.8824, "step": 210600 }, { "epoch": 0.05496886749008709, "grad_norm": 6.1573710441589355, "learning_rate": 9.968616463105476e-06, "loss": 2.85, "step": 210800 }, { "epoch": 0.05502102011578926, "grad_norm": 5.6570611000061035, "learning_rate": 9.968522881471599e-06, "loss": 2.8345, "step": 211000 }, { "epoch": 0.05507317274149143, "grad_norm": 6.473025798797607, "learning_rate": 9.968429160961776e-06, "loss": 2.9062, "step": 211200 }, { "epoch": 0.0551253253671936, "grad_norm": 5.675154209136963, "learning_rate": 9.968335301578629e-06, "loss": 2.9005, "step": 211400 }, { "epoch": 0.05517747799289577, "grad_norm": 6.401944160461426, "learning_rate": 9.968241303324783e-06, "loss": 2.8383, "step": 211600 }, { "epoch": 0.05522963061859794, "grad_norm": 5.85010290145874, "learning_rate": 9.968147166202864e-06, "loss": 2.8892, "step": 211800 }, { "epoch": 0.05528178324430011, "grad_norm": 5.4949517250061035, "learning_rate": 9.968052890215502e-06, "loss": 2.8813, "step": 212000 }, { "epoch": 0.05533393587000228, "grad_norm": 6.284722328186035, "learning_rate": 9.967958475365334e-06, "loss": 2.877, "step": 212200 }, { "epoch": 0.05538608849570445, "grad_norm": 6.463416576385498, "learning_rate": 9.967863921654998e-06, "loss": 2.8947, "step": 212400 }, { "epoch": 0.05543824112140662, "grad_norm": 6.696000576019287, "learning_rate": 9.967769229087138e-06, "loss": 2.8707, "step": 212600 }, { "epoch": 0.05549039374710879, "grad_norm": 6.760770320892334, "learning_rate": 9.9676743976644e-06, "loss": 2.8864, "step": 212800 }, { "epoch": 0.05554254637281096, "grad_norm": 5.885373115539551, "learning_rate": 9.967579427389434e-06, "loss": 2.8919, "step": 213000 }, { "epoch": 0.05559469899851313, "grad_norm": 5.777275085449219, "learning_rate": 9.967484318264895e-06, "loss": 2.8508, "step": 213200 }, { "epoch": 0.0556468516242153, "grad_norm": 6.474213123321533, "learning_rate": 9.967389070293442e-06, "loss": 2.875, "step": 213400 }, { "epoch": 0.055699004249917465, "grad_norm": 6.556694030761719, "learning_rate": 9.967293683477737e-06, "loss": 2.9205, "step": 213600 }, { "epoch": 0.055751156875619635, "grad_norm": 6.6564202308654785, "learning_rate": 9.967198157820445e-06, "loss": 2.8501, "step": 213800 }, { "epoch": 0.055803309501321806, "grad_norm": 6.465703964233398, "learning_rate": 9.967102493324239e-06, "loss": 2.9072, "step": 214000 }, { "epoch": 0.055855462127023976, "grad_norm": 6.2116780281066895, "learning_rate": 9.96700668999179e-06, "loss": 2.8542, "step": 214200 }, { "epoch": 0.05590761475272615, "grad_norm": 6.195470333099365, "learning_rate": 9.966910747825775e-06, "loss": 2.8891, "step": 214400 }, { "epoch": 0.05595976737842832, "grad_norm": 6.002094745635986, "learning_rate": 9.96681466682888e-06, "loss": 2.8885, "step": 214600 }, { "epoch": 0.05601192000413049, "grad_norm": 6.3701653480529785, "learning_rate": 9.966718447003787e-06, "loss": 2.9023, "step": 214800 }, { "epoch": 0.05606407262983266, "grad_norm": 6.410655975341797, "learning_rate": 9.966622088353189e-06, "loss": 2.9003, "step": 215000 }, { "epoch": 0.05611622525553483, "grad_norm": 6.293005466461182, "learning_rate": 9.966525590879774e-06, "loss": 2.8997, "step": 215200 }, { "epoch": 0.056168377881237, "grad_norm": 7.010239124298096, "learning_rate": 9.966428954586243e-06, "loss": 2.9075, "step": 215400 }, { "epoch": 0.05622053050693917, "grad_norm": 7.058774948120117, "learning_rate": 9.966332179475296e-06, "loss": 2.8649, "step": 215600 }, { "epoch": 0.05627268313264134, "grad_norm": 6.797397613525391, "learning_rate": 9.966235265549637e-06, "loss": 2.8828, "step": 215800 }, { "epoch": 0.05632483575834351, "grad_norm": 6.849272727966309, "learning_rate": 9.96613821281198e-06, "loss": 2.8903, "step": 216000 }, { "epoch": 0.05637698838404568, "grad_norm": 5.813775062561035, "learning_rate": 9.96604102126503e-06, "loss": 2.8662, "step": 216200 }, { "epoch": 0.05642914100974785, "grad_norm": 6.535881996154785, "learning_rate": 9.965943690911509e-06, "loss": 2.8643, "step": 216400 }, { "epoch": 0.056481293635450014, "grad_norm": 6.4796576499938965, "learning_rate": 9.965846221754135e-06, "loss": 2.8863, "step": 216600 }, { "epoch": 0.056533446261152184, "grad_norm": 6.315826892852783, "learning_rate": 9.965748613795633e-06, "loss": 2.8721, "step": 216800 }, { "epoch": 0.056585598886854355, "grad_norm": 6.6356635093688965, "learning_rate": 9.965650867038732e-06, "loss": 2.8823, "step": 217000 }, { "epoch": 0.056637751512556525, "grad_norm": 6.826320648193359, "learning_rate": 9.965552981486163e-06, "loss": 2.8792, "step": 217200 }, { "epoch": 0.056689904138258695, "grad_norm": 5.991021633148193, "learning_rate": 9.965454957140665e-06, "loss": 2.8717, "step": 217400 }, { "epoch": 0.056742056763960866, "grad_norm": 5.6707048416137695, "learning_rate": 9.965356794004974e-06, "loss": 2.898, "step": 217600 }, { "epoch": 0.056794209389663036, "grad_norm": 6.135042667388916, "learning_rate": 9.965258492081835e-06, "loss": 2.862, "step": 217800 }, { "epoch": 0.05684636201536521, "grad_norm": 6.86878776550293, "learning_rate": 9.965160051373996e-06, "loss": 2.8626, "step": 218000 }, { "epoch": 0.05689851464106738, "grad_norm": 6.301908016204834, "learning_rate": 9.96506147188421e-06, "loss": 2.865, "step": 218200 }, { "epoch": 0.05695066726676955, "grad_norm": 6.453223705291748, "learning_rate": 9.96496275361523e-06, "loss": 2.8613, "step": 218400 }, { "epoch": 0.05700281989247172, "grad_norm": 6.500892639160156, "learning_rate": 9.964863896569816e-06, "loss": 2.8533, "step": 218600 }, { "epoch": 0.05705497251817389, "grad_norm": 5.618515491485596, "learning_rate": 9.964764900750731e-06, "loss": 2.872, "step": 218800 }, { "epoch": 0.05710712514387606, "grad_norm": 6.596179485321045, "learning_rate": 9.964665766160744e-06, "loss": 2.8752, "step": 219000 }, { "epoch": 0.05715927776957823, "grad_norm": 6.309721946716309, "learning_rate": 9.964566492802623e-06, "loss": 2.8882, "step": 219200 }, { "epoch": 0.0572114303952804, "grad_norm": 6.222306251525879, "learning_rate": 9.964467080679145e-06, "loss": 2.8676, "step": 219400 }, { "epoch": 0.05726358302098256, "grad_norm": 6.931632041931152, "learning_rate": 9.964367529793086e-06, "loss": 2.8618, "step": 219600 }, { "epoch": 0.05731573564668473, "grad_norm": 6.057002544403076, "learning_rate": 9.964267840147232e-06, "loss": 2.8378, "step": 219800 }, { "epoch": 0.057367888272386904, "grad_norm": 6.065264701843262, "learning_rate": 9.964168011744367e-06, "loss": 2.9028, "step": 220000 }, { "epoch": 0.057420040898089074, "grad_norm": 6.861353874206543, "learning_rate": 9.964068044587283e-06, "loss": 2.8933, "step": 220200 }, { "epoch": 0.057472193523791244, "grad_norm": 6.993216037750244, "learning_rate": 9.963967938678774e-06, "loss": 2.8741, "step": 220400 }, { "epoch": 0.057524346149493415, "grad_norm": 6.941470146179199, "learning_rate": 9.963867694021637e-06, "loss": 2.8534, "step": 220600 }, { "epoch": 0.057576498775195585, "grad_norm": 6.341226100921631, "learning_rate": 9.963767310618673e-06, "loss": 2.8837, "step": 220800 }, { "epoch": 0.057628651400897755, "grad_norm": 6.165623188018799, "learning_rate": 9.96366678847269e-06, "loss": 2.9066, "step": 221000 }, { "epoch": 0.057680804026599926, "grad_norm": 5.758754730224609, "learning_rate": 9.963566127586497e-06, "loss": 2.8871, "step": 221200 }, { "epoch": 0.057732956652302096, "grad_norm": 6.3193206787109375, "learning_rate": 9.963465327962907e-06, "loss": 2.852, "step": 221400 }, { "epoch": 0.05778510927800427, "grad_norm": 6.58033561706543, "learning_rate": 9.963364389604739e-06, "loss": 2.8514, "step": 221600 }, { "epoch": 0.05783726190370644, "grad_norm": 5.843617916107178, "learning_rate": 9.963263312514812e-06, "loss": 2.8494, "step": 221800 }, { "epoch": 0.05788941452940861, "grad_norm": 6.093498706817627, "learning_rate": 9.963162096695954e-06, "loss": 2.9139, "step": 222000 }, { "epoch": 0.05794156715511078, "grad_norm": 6.31635856628418, "learning_rate": 9.963060742150992e-06, "loss": 2.8631, "step": 222200 }, { "epoch": 0.05799371978081295, "grad_norm": 6.643308639526367, "learning_rate": 9.96295924888276e-06, "loss": 2.9046, "step": 222400 }, { "epoch": 0.05804587240651511, "grad_norm": 6.348966121673584, "learning_rate": 9.962857616894095e-06, "loss": 2.87, "step": 222600 }, { "epoch": 0.05809802503221728, "grad_norm": 6.411705017089844, "learning_rate": 9.962755846187834e-06, "loss": 2.8712, "step": 222800 }, { "epoch": 0.05815017765791945, "grad_norm": 6.178708553314209, "learning_rate": 9.962653936766828e-06, "loss": 2.8583, "step": 223000 }, { "epoch": 0.05820233028362162, "grad_norm": 6.825628757476807, "learning_rate": 9.962551888633923e-06, "loss": 2.8627, "step": 223200 }, { "epoch": 0.05825448290932379, "grad_norm": 5.721690654754639, "learning_rate": 9.96244970179197e-06, "loss": 2.8824, "step": 223400 }, { "epoch": 0.058306635535025964, "grad_norm": 6.0849127769470215, "learning_rate": 9.962347376243824e-06, "loss": 2.8654, "step": 223600 }, { "epoch": 0.058358788160728134, "grad_norm": 6.45623254776001, "learning_rate": 9.962244911992347e-06, "loss": 2.9017, "step": 223800 }, { "epoch": 0.058410940786430304, "grad_norm": 6.159735679626465, "learning_rate": 9.962142309040405e-06, "loss": 2.9074, "step": 224000 }, { "epoch": 0.058463093412132475, "grad_norm": 6.511104106903076, "learning_rate": 9.962039567390863e-06, "loss": 2.8768, "step": 224200 }, { "epoch": 0.058515246037834645, "grad_norm": 6.441580772399902, "learning_rate": 9.961936687046595e-06, "loss": 2.8928, "step": 224400 }, { "epoch": 0.058567398663536815, "grad_norm": 5.743595123291016, "learning_rate": 9.961833668010473e-06, "loss": 2.9014, "step": 224600 }, { "epoch": 0.058619551289238986, "grad_norm": 6.290615558624268, "learning_rate": 9.961730510285379e-06, "loss": 2.8954, "step": 224800 }, { "epoch": 0.058671703914941156, "grad_norm": 6.227867126464844, "learning_rate": 9.961627213874198e-06, "loss": 2.858, "step": 225000 }, { "epoch": 0.05872385654064333, "grad_norm": 6.234389305114746, "learning_rate": 9.961523778779814e-06, "loss": 2.8905, "step": 225200 }, { "epoch": 0.05877600916634549, "grad_norm": 6.24118709564209, "learning_rate": 9.961420205005118e-06, "loss": 2.8632, "step": 225400 }, { "epoch": 0.05882816179204766, "grad_norm": 6.4152750968933105, "learning_rate": 9.96131649255301e-06, "loss": 2.857, "step": 225600 }, { "epoch": 0.05888031441774983, "grad_norm": 7.344254493713379, "learning_rate": 9.961212641426384e-06, "loss": 2.8742, "step": 225800 }, { "epoch": 0.058932467043452, "grad_norm": 6.300400257110596, "learning_rate": 9.961108651628144e-06, "loss": 2.893, "step": 226000 }, { "epoch": 0.05898461966915417, "grad_norm": 5.992100715637207, "learning_rate": 9.961004523161197e-06, "loss": 2.8256, "step": 226200 }, { "epoch": 0.05903677229485634, "grad_norm": 5.90500545501709, "learning_rate": 9.960900256028453e-06, "loss": 2.9001, "step": 226400 }, { "epoch": 0.05908892492055851, "grad_norm": 6.329569339752197, "learning_rate": 9.960795850232827e-06, "loss": 2.8679, "step": 226600 }, { "epoch": 0.05914107754626068, "grad_norm": 6.701547622680664, "learning_rate": 9.960691305777235e-06, "loss": 2.8826, "step": 226800 }, { "epoch": 0.05919323017196285, "grad_norm": 7.074747085571289, "learning_rate": 9.960586622664603e-06, "loss": 2.8816, "step": 227000 }, { "epoch": 0.059245382797665024, "grad_norm": 5.853117942810059, "learning_rate": 9.960481800897855e-06, "loss": 2.8941, "step": 227200 }, { "epoch": 0.059297535423367194, "grad_norm": 6.21675968170166, "learning_rate": 9.960376840479922e-06, "loss": 2.9143, "step": 227400 }, { "epoch": 0.059349688049069364, "grad_norm": 6.886922836303711, "learning_rate": 9.960271741413737e-06, "loss": 2.8215, "step": 227600 }, { "epoch": 0.059401840674771535, "grad_norm": 6.113399028778076, "learning_rate": 9.960166503702234e-06, "loss": 2.8652, "step": 227800 }, { "epoch": 0.059453993300473705, "grad_norm": 6.7419304847717285, "learning_rate": 9.96006112734836e-06, "loss": 2.8997, "step": 228000 }, { "epoch": 0.059506145926175875, "grad_norm": 5.719422817230225, "learning_rate": 9.959955612355059e-06, "loss": 2.8397, "step": 228200 }, { "epoch": 0.05955829855187804, "grad_norm": 6.3306732177734375, "learning_rate": 9.959849958725278e-06, "loss": 2.8576, "step": 228400 }, { "epoch": 0.05961045117758021, "grad_norm": 6.876055717468262, "learning_rate": 9.959744166461973e-06, "loss": 2.8646, "step": 228600 }, { "epoch": 0.05966260380328238, "grad_norm": 6.750349998474121, "learning_rate": 9.9596382355681e-06, "loss": 2.8445, "step": 228800 }, { "epoch": 0.05971475642898455, "grad_norm": 5.776258945465088, "learning_rate": 9.959532166046619e-06, "loss": 2.8702, "step": 229000 }, { "epoch": 0.05976690905468672, "grad_norm": 6.348162651062012, "learning_rate": 9.959425957900497e-06, "loss": 2.8475, "step": 229200 }, { "epoch": 0.05981906168038889, "grad_norm": 6.766139507293701, "learning_rate": 9.959319611132698e-06, "loss": 2.8688, "step": 229400 }, { "epoch": 0.05987121430609106, "grad_norm": 6.318508625030518, "learning_rate": 9.959213125746198e-06, "loss": 2.8977, "step": 229600 }, { "epoch": 0.05992336693179323, "grad_norm": 6.733820915222168, "learning_rate": 9.959106501743975e-06, "loss": 2.8547, "step": 229800 }, { "epoch": 0.0599755195574954, "grad_norm": 7.468286037445068, "learning_rate": 9.958999739129006e-06, "loss": 2.9255, "step": 230000 }, { "epoch": 0.06002767218319757, "grad_norm": 6.781874179840088, "learning_rate": 9.958892837904277e-06, "loss": 2.8473, "step": 230200 }, { "epoch": 0.06007982480889974, "grad_norm": 6.273848056793213, "learning_rate": 9.958785798072775e-06, "loss": 2.8736, "step": 230400 }, { "epoch": 0.06013197743460191, "grad_norm": 6.0436320304870605, "learning_rate": 9.958678619637491e-06, "loss": 2.8747, "step": 230600 }, { "epoch": 0.060184130060304084, "grad_norm": 6.5935378074646, "learning_rate": 9.958571302601425e-06, "loss": 2.8568, "step": 230800 }, { "epoch": 0.060236282686006254, "grad_norm": 6.040035724639893, "learning_rate": 9.958463846967572e-06, "loss": 2.8901, "step": 231000 }, { "epoch": 0.060288435311708424, "grad_norm": 6.27731466293335, "learning_rate": 9.958356252738937e-06, "loss": 2.8297, "step": 231200 }, { "epoch": 0.06034058793741059, "grad_norm": 6.9736199378967285, "learning_rate": 9.958248519918527e-06, "loss": 2.8432, "step": 231400 }, { "epoch": 0.06039274056311276, "grad_norm": 6.537654876708984, "learning_rate": 9.958140648509354e-06, "loss": 2.907, "step": 231600 }, { "epoch": 0.06044489318881493, "grad_norm": 6.521362781524658, "learning_rate": 9.958032638514435e-06, "loss": 2.832, "step": 231800 }, { "epoch": 0.0604970458145171, "grad_norm": 6.518667697906494, "learning_rate": 9.957924489936783e-06, "loss": 2.8741, "step": 232000 }, { "epoch": 0.06054919844021927, "grad_norm": 6.39730167388916, "learning_rate": 9.957816202779429e-06, "loss": 2.841, "step": 232200 }, { "epoch": 0.06060135106592144, "grad_norm": 6.15274715423584, "learning_rate": 9.957707777045392e-06, "loss": 2.8499, "step": 232400 }, { "epoch": 0.06065350369162361, "grad_norm": 6.834228515625, "learning_rate": 9.957599212737707e-06, "loss": 2.8709, "step": 232600 }, { "epoch": 0.06070565631732578, "grad_norm": 6.3925251960754395, "learning_rate": 9.957490509859409e-06, "loss": 2.8808, "step": 232800 }, { "epoch": 0.06075780894302795, "grad_norm": 6.036910533905029, "learning_rate": 9.957381668413535e-06, "loss": 2.8857, "step": 233000 }, { "epoch": 0.06080996156873012, "grad_norm": 6.22501802444458, "learning_rate": 9.957272688403126e-06, "loss": 2.8427, "step": 233200 }, { "epoch": 0.06086211419443229, "grad_norm": 5.928175449371338, "learning_rate": 9.95716356983123e-06, "loss": 2.8687, "step": 233400 }, { "epoch": 0.06091426682013446, "grad_norm": 6.415838241577148, "learning_rate": 9.957054312700897e-06, "loss": 2.8375, "step": 233600 }, { "epoch": 0.06096641944583663, "grad_norm": 6.5070013999938965, "learning_rate": 9.956944917015179e-06, "loss": 2.8811, "step": 233800 }, { "epoch": 0.0610185720715388, "grad_norm": 5.6311821937561035, "learning_rate": 9.956835382777137e-06, "loss": 2.8823, "step": 234000 }, { "epoch": 0.061070724697240966, "grad_norm": 5.70000696182251, "learning_rate": 9.956725709989829e-06, "loss": 2.881, "step": 234200 }, { "epoch": 0.06112287732294314, "grad_norm": 6.649327754974365, "learning_rate": 9.956615898656322e-06, "loss": 2.8617, "step": 234400 }, { "epoch": 0.06117502994864531, "grad_norm": 6.379722595214844, "learning_rate": 9.956505948779687e-06, "loss": 2.8547, "step": 234600 }, { "epoch": 0.06122718257434748, "grad_norm": 6.3186116218566895, "learning_rate": 9.956395860362992e-06, "loss": 2.8306, "step": 234800 }, { "epoch": 0.06127933520004965, "grad_norm": 6.583649158477783, "learning_rate": 9.956285633409322e-06, "loss": 2.8816, "step": 235000 }, { "epoch": 0.06133148782575182, "grad_norm": 6.297317981719971, "learning_rate": 9.956175267921752e-06, "loss": 2.8649, "step": 235200 }, { "epoch": 0.06138364045145399, "grad_norm": 7.1909871101379395, "learning_rate": 9.956064763903368e-06, "loss": 2.8988, "step": 235400 }, { "epoch": 0.06143579307715616, "grad_norm": 6.038774490356445, "learning_rate": 9.955954121357262e-06, "loss": 2.8585, "step": 235600 }, { "epoch": 0.06148794570285833, "grad_norm": 6.198418140411377, "learning_rate": 9.95584334028652e-06, "loss": 2.8393, "step": 235800 }, { "epoch": 0.0615400983285605, "grad_norm": 6.438840389251709, "learning_rate": 9.955732420694244e-06, "loss": 2.8219, "step": 236000 }, { "epoch": 0.06159225095426267, "grad_norm": 6.378972053527832, "learning_rate": 9.955621362583533e-06, "loss": 2.8501, "step": 236200 }, { "epoch": 0.06164440357996484, "grad_norm": 6.684047222137451, "learning_rate": 9.95551016595749e-06, "loss": 2.8735, "step": 236400 }, { "epoch": 0.06169655620566701, "grad_norm": 6.312347412109375, "learning_rate": 9.955398830819225e-06, "loss": 2.852, "step": 236600 }, { "epoch": 0.06174870883136918, "grad_norm": 6.520938873291016, "learning_rate": 9.955287357171848e-06, "loss": 2.8494, "step": 236800 }, { "epoch": 0.06180086145707135, "grad_norm": 6.856761932373047, "learning_rate": 9.955175745018477e-06, "loss": 2.8285, "step": 237000 }, { "epoch": 0.061853014082773515, "grad_norm": 6.1591620445251465, "learning_rate": 9.955063994362229e-06, "loss": 2.8291, "step": 237200 }, { "epoch": 0.061905166708475685, "grad_norm": 5.8291850090026855, "learning_rate": 9.95495210520623e-06, "loss": 2.8288, "step": 237400 }, { "epoch": 0.061957319334177856, "grad_norm": 6.742002964019775, "learning_rate": 9.954840077553604e-06, "loss": 2.8435, "step": 237600 }, { "epoch": 0.062009471959880026, "grad_norm": 6.073098659515381, "learning_rate": 9.954727911407489e-06, "loss": 2.8344, "step": 237800 }, { "epoch": 0.0620616245855822, "grad_norm": 7.29561710357666, "learning_rate": 9.954615606771012e-06, "loss": 2.8352, "step": 238000 }, { "epoch": 0.06211377721128437, "grad_norm": 6.251704216003418, "learning_rate": 9.954503163647319e-06, "loss": 2.8576, "step": 238200 }, { "epoch": 0.06216592983698654, "grad_norm": 5.640725612640381, "learning_rate": 9.954390582039545e-06, "loss": 2.8439, "step": 238400 }, { "epoch": 0.06221808246268871, "grad_norm": 6.10871696472168, "learning_rate": 9.954277861950847e-06, "loss": 2.8231, "step": 238600 }, { "epoch": 0.06227023508839088, "grad_norm": 7.135128974914551, "learning_rate": 9.954165003384367e-06, "loss": 2.857, "step": 238800 }, { "epoch": 0.06232238771409305, "grad_norm": 6.5773773193359375, "learning_rate": 9.954052006343264e-06, "loss": 2.8329, "step": 239000 }, { "epoch": 0.06237454033979522, "grad_norm": 6.622820854187012, "learning_rate": 9.953938870830696e-06, "loss": 2.8687, "step": 239200 }, { "epoch": 0.06242669296549739, "grad_norm": 6.241696834564209, "learning_rate": 9.953825596849823e-06, "loss": 2.8318, "step": 239400 }, { "epoch": 0.06247884559119956, "grad_norm": 5.970136642456055, "learning_rate": 9.953712184403811e-06, "loss": 2.8651, "step": 239600 }, { "epoch": 0.06253099821690172, "grad_norm": 5.942645072937012, "learning_rate": 9.953598633495835e-06, "loss": 2.8463, "step": 239800 }, { "epoch": 0.0625831508426039, "grad_norm": 5.821661949157715, "learning_rate": 9.953484944129064e-06, "loss": 2.8955, "step": 240000 }, { "epoch": 0.06263530346830606, "grad_norm": 6.606387138366699, "learning_rate": 9.953371116306678e-06, "loss": 2.8606, "step": 240200 }, { "epoch": 0.06268745609400823, "grad_norm": 6.661681175231934, "learning_rate": 9.953257150031857e-06, "loss": 2.8399, "step": 240400 }, { "epoch": 0.0627396087197104, "grad_norm": 6.761116027832031, "learning_rate": 9.953143045307788e-06, "loss": 2.8202, "step": 240600 }, { "epoch": 0.06279176134541258, "grad_norm": 6.392307281494141, "learning_rate": 9.953028802137658e-06, "loss": 2.8542, "step": 240800 }, { "epoch": 0.06284391397111475, "grad_norm": 6.313865661621094, "learning_rate": 9.952914420524663e-06, "loss": 2.8388, "step": 241000 }, { "epoch": 0.06289606659681692, "grad_norm": 6.371045112609863, "learning_rate": 9.952799900472e-06, "loss": 2.8371, "step": 241200 }, { "epoch": 0.06294821922251909, "grad_norm": 5.998501777648926, "learning_rate": 9.952685241982867e-06, "loss": 2.7792, "step": 241400 }, { "epoch": 0.06300037184822126, "grad_norm": 6.239037036895752, "learning_rate": 9.952570445060472e-06, "loss": 2.8392, "step": 241600 }, { "epoch": 0.06305252447392343, "grad_norm": 6.2942304611206055, "learning_rate": 9.95245550970802e-06, "loss": 2.8724, "step": 241800 }, { "epoch": 0.0631046770996256, "grad_norm": 6.233669281005859, "learning_rate": 9.95234043592873e-06, "loss": 2.8326, "step": 242000 }, { "epoch": 0.06315682972532777, "grad_norm": 6.2911577224731445, "learning_rate": 9.952225223725814e-06, "loss": 2.8716, "step": 242200 }, { "epoch": 0.06320898235102994, "grad_norm": 6.646729946136475, "learning_rate": 9.95210987310249e-06, "loss": 2.8266, "step": 242400 }, { "epoch": 0.06326113497673211, "grad_norm": 6.116643905639648, "learning_rate": 9.951994384061988e-06, "loss": 2.8853, "step": 242600 }, { "epoch": 0.06331328760243428, "grad_norm": 6.70005464553833, "learning_rate": 9.951878756607532e-06, "loss": 2.8247, "step": 242800 }, { "epoch": 0.06336544022813645, "grad_norm": 5.81242036819458, "learning_rate": 9.951762990742356e-06, "loss": 2.8165, "step": 243000 }, { "epoch": 0.06341759285383862, "grad_norm": 6.2581329345703125, "learning_rate": 9.951647086469694e-06, "loss": 2.883, "step": 243200 }, { "epoch": 0.06346974547954079, "grad_norm": 6.079429626464844, "learning_rate": 9.951531043792785e-06, "loss": 2.8643, "step": 243400 }, { "epoch": 0.06352189810524296, "grad_norm": 6.490660667419434, "learning_rate": 9.951414862714877e-06, "loss": 2.8605, "step": 243600 }, { "epoch": 0.06357405073094513, "grad_norm": 6.3001909255981445, "learning_rate": 9.951298543239213e-06, "loss": 2.8898, "step": 243800 }, { "epoch": 0.0636262033566473, "grad_norm": 6.262456893920898, "learning_rate": 9.951182085369044e-06, "loss": 2.8698, "step": 244000 }, { "epoch": 0.06367835598234947, "grad_norm": 7.45053243637085, "learning_rate": 9.951065489107628e-06, "loss": 2.8388, "step": 244200 }, { "epoch": 0.06373050860805164, "grad_norm": 6.364867210388184, "learning_rate": 9.950948754458222e-06, "loss": 2.8592, "step": 244400 }, { "epoch": 0.0637826612337538, "grad_norm": 6.993120193481445, "learning_rate": 9.950831881424092e-06, "loss": 2.824, "step": 244600 }, { "epoch": 0.06383481385945597, "grad_norm": 6.528937339782715, "learning_rate": 9.9507148700085e-06, "loss": 2.8447, "step": 244800 }, { "epoch": 0.06388696648515814, "grad_norm": 6.995550155639648, "learning_rate": 9.950597720214721e-06, "loss": 2.8018, "step": 245000 }, { "epoch": 0.06393911911086031, "grad_norm": 6.352587699890137, "learning_rate": 9.950480432046025e-06, "loss": 2.8619, "step": 245200 }, { "epoch": 0.06399127173656248, "grad_norm": 6.427020072937012, "learning_rate": 9.950363005505695e-06, "loss": 2.854, "step": 245400 }, { "epoch": 0.06404342436226465, "grad_norm": 6.2600603103637695, "learning_rate": 9.950245440597011e-06, "loss": 2.8246, "step": 245600 }, { "epoch": 0.06409557698796682, "grad_norm": 5.7311906814575195, "learning_rate": 9.950127737323258e-06, "loss": 2.8132, "step": 245800 }, { "epoch": 0.06414772961366899, "grad_norm": 5.9210920333862305, "learning_rate": 9.950009895687727e-06, "loss": 2.8446, "step": 246000 }, { "epoch": 0.06419988223937116, "grad_norm": 5.30364990234375, "learning_rate": 9.949891915693712e-06, "loss": 2.8175, "step": 246200 }, { "epoch": 0.06425203486507333, "grad_norm": 6.701354503631592, "learning_rate": 9.949773797344511e-06, "loss": 2.8503, "step": 246400 }, { "epoch": 0.0643041874907755, "grad_norm": 6.559759140014648, "learning_rate": 9.949655540643425e-06, "loss": 2.8999, "step": 246600 }, { "epoch": 0.06435634011647767, "grad_norm": 6.051071643829346, "learning_rate": 9.949537145593759e-06, "loss": 2.8777, "step": 246800 }, { "epoch": 0.06440849274217984, "grad_norm": 5.7465972900390625, "learning_rate": 9.949418612198822e-06, "loss": 2.8398, "step": 247000 }, { "epoch": 0.06446064536788201, "grad_norm": 6.2163872718811035, "learning_rate": 9.94929994046193e-06, "loss": 2.8245, "step": 247200 }, { "epoch": 0.06451279799358418, "grad_norm": 6.6872639656066895, "learning_rate": 9.949181130386396e-06, "loss": 2.8519, "step": 247400 }, { "epoch": 0.06456495061928635, "grad_norm": 5.855112552642822, "learning_rate": 9.949062181975544e-06, "loss": 2.8286, "step": 247600 }, { "epoch": 0.06461710324498852, "grad_norm": 6.136778354644775, "learning_rate": 9.948943095232697e-06, "loss": 2.8164, "step": 247800 }, { "epoch": 0.0646692558706907, "grad_norm": 5.666250705718994, "learning_rate": 9.948823870161184e-06, "loss": 2.8247, "step": 248000 }, { "epoch": 0.06472140849639287, "grad_norm": 6.700359344482422, "learning_rate": 9.948704506764336e-06, "loss": 2.7989, "step": 248200 }, { "epoch": 0.06477356112209504, "grad_norm": 6.757811546325684, "learning_rate": 9.948585005045495e-06, "loss": 2.868, "step": 248400 }, { "epoch": 0.0648257137477972, "grad_norm": 6.108405113220215, "learning_rate": 9.948465365007995e-06, "loss": 2.85, "step": 248600 }, { "epoch": 0.06487786637349938, "grad_norm": 6.390014171600342, "learning_rate": 9.948345586655181e-06, "loss": 2.8826, "step": 248800 }, { "epoch": 0.06493001899920155, "grad_norm": 6.4399733543396, "learning_rate": 9.948225669990404e-06, "loss": 2.7958, "step": 249000 }, { "epoch": 0.06498217162490372, "grad_norm": 6.0152058601379395, "learning_rate": 9.948105615017014e-06, "loss": 2.8615, "step": 249200 }, { "epoch": 0.06503432425060589, "grad_norm": 6.281088352203369, "learning_rate": 9.947985421738366e-06, "loss": 2.8323, "step": 249400 }, { "epoch": 0.06508647687630806, "grad_norm": 7.041154384613037, "learning_rate": 9.94786509015782e-06, "loss": 2.812, "step": 249600 }, { "epoch": 0.06513862950201023, "grad_norm": 6.751956462860107, "learning_rate": 9.94774462027874e-06, "loss": 2.8045, "step": 249800 }, { "epoch": 0.0651907821277124, "grad_norm": 6.53183126449585, "learning_rate": 9.947624012104494e-06, "loss": 2.8629, "step": 250000 }, { "epoch": 0.06524293475341457, "grad_norm": 6.1272969245910645, "learning_rate": 9.947503265638449e-06, "loss": 2.8009, "step": 250200 }, { "epoch": 0.06529508737911673, "grad_norm": 6.2636942863464355, "learning_rate": 9.947382380883985e-06, "loss": 2.8502, "step": 250400 }, { "epoch": 0.0653472400048189, "grad_norm": 5.978120803833008, "learning_rate": 9.94726135784448e-06, "loss": 2.8581, "step": 250600 }, { "epoch": 0.06539939263052107, "grad_norm": 6.1595778465271, "learning_rate": 9.947140196523312e-06, "loss": 2.8445, "step": 250800 }, { "epoch": 0.06545154525622324, "grad_norm": 6.8264923095703125, "learning_rate": 9.947018896923875e-06, "loss": 2.8439, "step": 251000 }, { "epoch": 0.06550369788192541, "grad_norm": 5.948680877685547, "learning_rate": 9.946897459049553e-06, "loss": 2.8293, "step": 251200 }, { "epoch": 0.06555585050762758, "grad_norm": 6.657248497009277, "learning_rate": 9.946775882903745e-06, "loss": 2.8726, "step": 251400 }, { "epoch": 0.06560800313332975, "grad_norm": 5.8180155754089355, "learning_rate": 9.946654168489847e-06, "loss": 2.8181, "step": 251600 }, { "epoch": 0.06566015575903192, "grad_norm": 6.403216361999512, "learning_rate": 9.946532315811262e-06, "loss": 2.806, "step": 251800 }, { "epoch": 0.06571230838473409, "grad_norm": 6.092254638671875, "learning_rate": 9.946410324871394e-06, "loss": 2.8513, "step": 252000 }, { "epoch": 0.06576446101043626, "grad_norm": 5.990058422088623, "learning_rate": 9.946288195673654e-06, "loss": 2.8147, "step": 252200 }, { "epoch": 0.06581661363613843, "grad_norm": 6.334691047668457, "learning_rate": 9.946165928221456e-06, "loss": 2.8539, "step": 252400 }, { "epoch": 0.0658687662618406, "grad_norm": 5.580613613128662, "learning_rate": 9.946043522518217e-06, "loss": 2.8393, "step": 252600 }, { "epoch": 0.06592091888754277, "grad_norm": 6.962843418121338, "learning_rate": 9.945920978567357e-06, "loss": 2.8269, "step": 252800 }, { "epoch": 0.06597307151324494, "grad_norm": 5.977869510650635, "learning_rate": 9.945798296372303e-06, "loss": 2.8493, "step": 253000 }, { "epoch": 0.06602522413894711, "grad_norm": 5.998539447784424, "learning_rate": 9.945675475936487e-06, "loss": 2.8413, "step": 253200 }, { "epoch": 0.06607737676464928, "grad_norm": 6.014648914337158, "learning_rate": 9.945552517263336e-06, "loss": 2.7819, "step": 253400 }, { "epoch": 0.06612952939035145, "grad_norm": 6.409533977508545, "learning_rate": 9.945429420356291e-06, "loss": 2.8191, "step": 253600 }, { "epoch": 0.06618168201605362, "grad_norm": 7.013908386230469, "learning_rate": 9.94530618521879e-06, "loss": 2.8302, "step": 253800 }, { "epoch": 0.06623383464175579, "grad_norm": 6.836598873138428, "learning_rate": 9.945182811854282e-06, "loss": 2.8531, "step": 254000 }, { "epoch": 0.06628598726745796, "grad_norm": 5.761704921722412, "learning_rate": 9.945059300266209e-06, "loss": 2.8364, "step": 254200 }, { "epoch": 0.06633813989316013, "grad_norm": 5.95949125289917, "learning_rate": 9.944935650458028e-06, "loss": 2.8268, "step": 254400 }, { "epoch": 0.0663902925188623, "grad_norm": 7.2780985832214355, "learning_rate": 9.944811862433194e-06, "loss": 2.8539, "step": 254600 }, { "epoch": 0.06644244514456447, "grad_norm": 6.372018814086914, "learning_rate": 9.944687936195168e-06, "loss": 2.8406, "step": 254800 }, { "epoch": 0.06649459777026664, "grad_norm": 7.005834579467773, "learning_rate": 9.94456387174741e-06, "loss": 2.8495, "step": 255000 }, { "epoch": 0.06654675039596882, "grad_norm": 6.090085983276367, "learning_rate": 9.944439669093393e-06, "loss": 2.8185, "step": 255200 }, { "epoch": 0.06659890302167099, "grad_norm": 6.5275750160217285, "learning_rate": 9.944315328236585e-06, "loss": 2.8444, "step": 255400 }, { "epoch": 0.06665105564737316, "grad_norm": 6.6855316162109375, "learning_rate": 9.944190849180464e-06, "loss": 2.8106, "step": 255600 }, { "epoch": 0.06670320827307533, "grad_norm": 5.903449058532715, "learning_rate": 9.944066231928506e-06, "loss": 2.8174, "step": 255800 }, { "epoch": 0.0667553608987775, "grad_norm": 5.807356357574463, "learning_rate": 9.943941476484195e-06, "loss": 2.8591, "step": 256000 }, { "epoch": 0.06680751352447967, "grad_norm": 6.048740386962891, "learning_rate": 9.943816582851023e-06, "loss": 2.8281, "step": 256200 }, { "epoch": 0.06685966615018182, "grad_norm": 5.910772800445557, "learning_rate": 9.943691551032472e-06, "loss": 2.8024, "step": 256400 }, { "epoch": 0.066911818775884, "grad_norm": 6.064548492431641, "learning_rate": 9.943566381032047e-06, "loss": 2.8232, "step": 256600 }, { "epoch": 0.06696397140158616, "grad_norm": 6.429322719573975, "learning_rate": 9.943441072853239e-06, "loss": 2.8604, "step": 256800 }, { "epoch": 0.06701612402728833, "grad_norm": 6.18205451965332, "learning_rate": 9.943315626499552e-06, "loss": 2.8166, "step": 257000 }, { "epoch": 0.0670682766529905, "grad_norm": 6.421455383300781, "learning_rate": 9.943190041974495e-06, "loss": 2.8616, "step": 257200 }, { "epoch": 0.06712042927869268, "grad_norm": 6.15507173538208, "learning_rate": 9.943064319281576e-06, "loss": 2.878, "step": 257400 }, { "epoch": 0.06717258190439485, "grad_norm": 6.6916728019714355, "learning_rate": 9.94293845842431e-06, "loss": 2.8272, "step": 257600 }, { "epoch": 0.06722473453009702, "grad_norm": 6.15769100189209, "learning_rate": 9.942812459406215e-06, "loss": 2.8024, "step": 257800 }, { "epoch": 0.06727688715579919, "grad_norm": 6.359096527099609, "learning_rate": 9.942686322230812e-06, "loss": 2.8265, "step": 258000 }, { "epoch": 0.06732903978150136, "grad_norm": 5.924700736999512, "learning_rate": 9.942560046901628e-06, "loss": 2.8314, "step": 258200 }, { "epoch": 0.06738119240720353, "grad_norm": 6.2472968101501465, "learning_rate": 9.94243363342219e-06, "loss": 2.8348, "step": 258400 }, { "epoch": 0.0674333450329057, "grad_norm": 6.63815450668335, "learning_rate": 9.942307081796034e-06, "loss": 2.8363, "step": 258600 }, { "epoch": 0.06748549765860787, "grad_norm": 7.617677211761475, "learning_rate": 9.942180392026697e-06, "loss": 2.8584, "step": 258800 }, { "epoch": 0.06753765028431004, "grad_norm": 6.234862327575684, "learning_rate": 9.942053564117718e-06, "loss": 2.8885, "step": 259000 }, { "epoch": 0.06758980291001221, "grad_norm": 6.803557395935059, "learning_rate": 9.941926598072644e-06, "loss": 2.8659, "step": 259200 }, { "epoch": 0.06764195553571438, "grad_norm": 6.141203880310059, "learning_rate": 9.941799493895024e-06, "loss": 2.8241, "step": 259400 }, { "epoch": 0.06769410816141655, "grad_norm": 6.708746910095215, "learning_rate": 9.94167225158841e-06, "loss": 2.8405, "step": 259600 }, { "epoch": 0.06774626078711872, "grad_norm": 6.132614612579346, "learning_rate": 9.94154487115636e-06, "loss": 2.8347, "step": 259800 }, { "epoch": 0.06779841341282089, "grad_norm": 6.152980327606201, "learning_rate": 9.941417352602429e-06, "loss": 2.795, "step": 260000 }, { "epoch": 0.06785056603852306, "grad_norm": 6.715810775756836, "learning_rate": 9.941289695930188e-06, "loss": 2.8301, "step": 260200 }, { "epoch": 0.06790271866422523, "grad_norm": 6.222926139831543, "learning_rate": 9.941161901143203e-06, "loss": 2.8402, "step": 260400 }, { "epoch": 0.0679548712899274, "grad_norm": 5.573058128356934, "learning_rate": 9.941033968245045e-06, "loss": 2.8325, "step": 260600 }, { "epoch": 0.06800702391562957, "grad_norm": 5.889303207397461, "learning_rate": 9.940905897239289e-06, "loss": 2.8836, "step": 260800 }, { "epoch": 0.06805917654133174, "grad_norm": 6.204052448272705, "learning_rate": 9.940777688129517e-06, "loss": 2.8051, "step": 261000 }, { "epoch": 0.06811132916703391, "grad_norm": 6.2043585777282715, "learning_rate": 9.940649340919313e-06, "loss": 2.8156, "step": 261200 }, { "epoch": 0.06816348179273608, "grad_norm": 6.628237724304199, "learning_rate": 9.940520855612262e-06, "loss": 2.8152, "step": 261400 }, { "epoch": 0.06821563441843825, "grad_norm": 5.52294397354126, "learning_rate": 9.940392232211955e-06, "loss": 2.8127, "step": 261600 }, { "epoch": 0.06826778704414042, "grad_norm": 6.279348373413086, "learning_rate": 9.94026347072199e-06, "loss": 2.8359, "step": 261800 }, { "epoch": 0.0683199396698426, "grad_norm": 5.843539237976074, "learning_rate": 9.940134571145966e-06, "loss": 2.8432, "step": 262000 }, { "epoch": 0.06837209229554475, "grad_norm": 6.166245460510254, "learning_rate": 9.940005533487483e-06, "loss": 2.8402, "step": 262200 }, { "epoch": 0.06842424492124692, "grad_norm": 5.798083305358887, "learning_rate": 9.93987635775015e-06, "loss": 2.8465, "step": 262400 }, { "epoch": 0.06847639754694909, "grad_norm": 6.206599712371826, "learning_rate": 9.939747043937575e-06, "loss": 2.8503, "step": 262600 }, { "epoch": 0.06852855017265126, "grad_norm": 6.811930179595947, "learning_rate": 9.939617592053376e-06, "loss": 2.7846, "step": 262800 }, { "epoch": 0.06858070279835343, "grad_norm": 6.215068340301514, "learning_rate": 9.93948800210117e-06, "loss": 2.8258, "step": 263000 }, { "epoch": 0.0686328554240556, "grad_norm": 6.74024772644043, "learning_rate": 9.939358274084578e-06, "loss": 2.8517, "step": 263200 }, { "epoch": 0.06868500804975777, "grad_norm": 6.108590602874756, "learning_rate": 9.939228408007227e-06, "loss": 2.8154, "step": 263400 }, { "epoch": 0.06873716067545994, "grad_norm": 7.009735584259033, "learning_rate": 9.939098403872747e-06, "loss": 2.8149, "step": 263600 }, { "epoch": 0.06878931330116211, "grad_norm": 6.192783832550049, "learning_rate": 9.938968261684771e-06, "loss": 2.8519, "step": 263800 }, { "epoch": 0.06884146592686428, "grad_norm": 6.660505771636963, "learning_rate": 9.938837981446939e-06, "loss": 2.8706, "step": 264000 }, { "epoch": 0.06889361855256645, "grad_norm": 6.753753185272217, "learning_rate": 9.938707563162888e-06, "loss": 2.7983, "step": 264200 }, { "epoch": 0.06894577117826863, "grad_norm": 5.999804973602295, "learning_rate": 9.938577006836268e-06, "loss": 2.8527, "step": 264400 }, { "epoch": 0.0689979238039708, "grad_norm": 6.876380443572998, "learning_rate": 9.938446312470726e-06, "loss": 2.8108, "step": 264600 }, { "epoch": 0.06905007642967297, "grad_norm": 6.110062122344971, "learning_rate": 9.938315480069916e-06, "loss": 2.8565, "step": 264800 }, { "epoch": 0.06910222905537514, "grad_norm": 6.41756534576416, "learning_rate": 9.938184509637494e-06, "loss": 2.8097, "step": 265000 }, { "epoch": 0.0691543816810773, "grad_norm": 6.181519985198975, "learning_rate": 9.93805340117712e-06, "loss": 2.7903, "step": 265200 }, { "epoch": 0.06920653430677948, "grad_norm": 6.470280170440674, "learning_rate": 9.93792215469246e-06, "loss": 2.7889, "step": 265400 }, { "epoch": 0.06925868693248165, "grad_norm": 6.139248371124268, "learning_rate": 9.937790770187182e-06, "loss": 2.8096, "step": 265600 }, { "epoch": 0.06931083955818382, "grad_norm": 5.582498073577881, "learning_rate": 9.937659247664959e-06, "loss": 2.8197, "step": 265800 }, { "epoch": 0.06936299218388599, "grad_norm": 6.460460186004639, "learning_rate": 9.937527587129468e-06, "loss": 2.78, "step": 266000 }, { "epoch": 0.06941514480958816, "grad_norm": 6.965539455413818, "learning_rate": 9.937395788584384e-06, "loss": 2.8352, "step": 266200 }, { "epoch": 0.06946729743529033, "grad_norm": 6.817525386810303, "learning_rate": 9.937263852033399e-06, "loss": 2.8182, "step": 266400 }, { "epoch": 0.0695194500609925, "grad_norm": 5.603514671325684, "learning_rate": 9.937131777480194e-06, "loss": 2.8105, "step": 266600 }, { "epoch": 0.06957160268669467, "grad_norm": 6.385352611541748, "learning_rate": 9.936999564928462e-06, "loss": 2.8152, "step": 266800 }, { "epoch": 0.06962375531239684, "grad_norm": 5.736618518829346, "learning_rate": 9.936867214381902e-06, "loss": 2.785, "step": 267000 }, { "epoch": 0.06967590793809901, "grad_norm": 7.078679084777832, "learning_rate": 9.93673472584421e-06, "loss": 2.8278, "step": 267200 }, { "epoch": 0.06972806056380118, "grad_norm": 6.587845802307129, "learning_rate": 9.93660209931909e-06, "loss": 2.7866, "step": 267400 }, { "epoch": 0.06978021318950335, "grad_norm": 6.5862717628479, "learning_rate": 9.93646933481025e-06, "loss": 2.7899, "step": 267600 }, { "epoch": 0.06983236581520552, "grad_norm": 6.024425506591797, "learning_rate": 9.936336432321398e-06, "loss": 2.8223, "step": 267800 }, { "epoch": 0.06988451844090769, "grad_norm": 6.144232749938965, "learning_rate": 9.936203391856254e-06, "loss": 2.8156, "step": 268000 }, { "epoch": 0.06993667106660985, "grad_norm": 6.591801166534424, "learning_rate": 9.936070213418532e-06, "loss": 2.7847, "step": 268200 }, { "epoch": 0.06998882369231202, "grad_norm": 6.024001121520996, "learning_rate": 9.935936897011955e-06, "loss": 2.808, "step": 268400 }, { "epoch": 0.07004097631801419, "grad_norm": 6.559907913208008, "learning_rate": 9.93580344264025e-06, "loss": 2.8023, "step": 268600 }, { "epoch": 0.07009312894371636, "grad_norm": 6.3227033615112305, "learning_rate": 9.93566985030715e-06, "loss": 2.7682, "step": 268800 }, { "epoch": 0.07014528156941853, "grad_norm": 5.811648368835449, "learning_rate": 9.935536120016386e-06, "loss": 2.8045, "step": 269000 }, { "epoch": 0.0701974341951207, "grad_norm": 6.381039619445801, "learning_rate": 9.935402251771696e-06, "loss": 2.822, "step": 269200 }, { "epoch": 0.07024958682082287, "grad_norm": 6.522758960723877, "learning_rate": 9.935268245576822e-06, "loss": 2.8439, "step": 269400 }, { "epoch": 0.07030173944652504, "grad_norm": 6.2131428718566895, "learning_rate": 9.935134101435511e-06, "loss": 2.8601, "step": 269600 }, { "epoch": 0.07035389207222721, "grad_norm": 6.588796615600586, "learning_rate": 9.93499981935151e-06, "loss": 2.8367, "step": 269800 }, { "epoch": 0.07040604469792938, "grad_norm": 6.30739688873291, "learning_rate": 9.934865399328575e-06, "loss": 2.8663, "step": 270000 }, { "epoch": 0.07045819732363155, "grad_norm": 6.022554397583008, "learning_rate": 9.934730841370461e-06, "loss": 2.8205, "step": 270200 }, { "epoch": 0.07051034994933372, "grad_norm": 6.7225341796875, "learning_rate": 9.93459614548093e-06, "loss": 2.8732, "step": 270400 }, { "epoch": 0.0705625025750359, "grad_norm": 5.783280849456787, "learning_rate": 9.934461311663748e-06, "loss": 2.7827, "step": 270600 }, { "epoch": 0.07061465520073806, "grad_norm": 6.880871772766113, "learning_rate": 9.934326339922682e-06, "loss": 2.7892, "step": 270800 }, { "epoch": 0.07066680782644023, "grad_norm": 7.110273361206055, "learning_rate": 9.934191230261506e-06, "loss": 2.8391, "step": 271000 }, { "epoch": 0.0707189604521424, "grad_norm": 6.837869167327881, "learning_rate": 9.934055982683995e-06, "loss": 2.824, "step": 271200 }, { "epoch": 0.07077111307784457, "grad_norm": 5.97743558883667, "learning_rate": 9.933920597193932e-06, "loss": 2.805, "step": 271400 }, { "epoch": 0.07082326570354674, "grad_norm": 6.262125015258789, "learning_rate": 9.933785073795096e-06, "loss": 2.7591, "step": 271600 }, { "epoch": 0.07087541832924892, "grad_norm": 6.377274036407471, "learning_rate": 9.93364941249128e-06, "loss": 2.8213, "step": 271800 }, { "epoch": 0.07092757095495109, "grad_norm": 6.591363906860352, "learning_rate": 9.933513613286273e-06, "loss": 2.8096, "step": 272000 }, { "epoch": 0.07097972358065326, "grad_norm": 6.756320476531982, "learning_rate": 9.933377676183875e-06, "loss": 2.7908, "step": 272200 }, { "epoch": 0.07103187620635543, "grad_norm": 5.893064975738525, "learning_rate": 9.93324160118788e-06, "loss": 2.8085, "step": 272400 }, { "epoch": 0.0710840288320576, "grad_norm": 6.648096561431885, "learning_rate": 9.933105388302094e-06, "loss": 2.8344, "step": 272600 }, { "epoch": 0.07113618145775977, "grad_norm": 5.9668049812316895, "learning_rate": 9.932969037530325e-06, "loss": 2.8419, "step": 272800 }, { "epoch": 0.07118833408346194, "grad_norm": 7.359618186950684, "learning_rate": 9.932832548876384e-06, "loss": 2.7993, "step": 273000 }, { "epoch": 0.07124048670916411, "grad_norm": 7.143940448760986, "learning_rate": 9.932695922344085e-06, "loss": 2.8044, "step": 273200 }, { "epoch": 0.07129263933486628, "grad_norm": 7.557311534881592, "learning_rate": 9.932559157937247e-06, "loss": 2.7901, "step": 273400 }, { "epoch": 0.07134479196056845, "grad_norm": 6.371750354766846, "learning_rate": 9.932422255659692e-06, "loss": 2.8085, "step": 273600 }, { "epoch": 0.07139694458627062, "grad_norm": 6.6176838874816895, "learning_rate": 9.93228521551525e-06, "loss": 2.8022, "step": 273800 }, { "epoch": 0.07144909721197278, "grad_norm": 6.3366851806640625, "learning_rate": 9.93214803750775e-06, "loss": 2.7898, "step": 274000 }, { "epoch": 0.07150124983767495, "grad_norm": 5.762211799621582, "learning_rate": 9.932010721641022e-06, "loss": 2.8627, "step": 274200 }, { "epoch": 0.07155340246337712, "grad_norm": 6.653818607330322, "learning_rate": 9.931873267918908e-06, "loss": 2.7655, "step": 274400 }, { "epoch": 0.07160555508907929, "grad_norm": 6.697924613952637, "learning_rate": 9.931735676345252e-06, "loss": 2.7799, "step": 274600 }, { "epoch": 0.07165770771478146, "grad_norm": 6.904690742492676, "learning_rate": 9.931597946923895e-06, "loss": 2.8291, "step": 274800 }, { "epoch": 0.07170986034048363, "grad_norm": 6.095543384552002, "learning_rate": 9.93146007965869e-06, "loss": 2.8048, "step": 275000 }, { "epoch": 0.0717620129661858, "grad_norm": 6.26823616027832, "learning_rate": 9.931322074553488e-06, "loss": 2.7887, "step": 275200 }, { "epoch": 0.07181416559188797, "grad_norm": 5.967849254608154, "learning_rate": 9.931183931612151e-06, "loss": 2.7847, "step": 275400 }, { "epoch": 0.07186631821759014, "grad_norm": 6.218311786651611, "learning_rate": 9.931045650838536e-06, "loss": 2.7812, "step": 275600 }, { "epoch": 0.07191847084329231, "grad_norm": 5.455589771270752, "learning_rate": 9.930907232236508e-06, "loss": 2.8283, "step": 275800 }, { "epoch": 0.07197062346899448, "grad_norm": 5.432662010192871, "learning_rate": 9.930768675809939e-06, "loss": 2.8221, "step": 276000 }, { "epoch": 0.07202277609469665, "grad_norm": 6.456852436065674, "learning_rate": 9.930629981562698e-06, "loss": 2.8253, "step": 276200 }, { "epoch": 0.07207492872039882, "grad_norm": 6.429563999176025, "learning_rate": 9.930491149498667e-06, "loss": 2.831, "step": 276400 }, { "epoch": 0.07212708134610099, "grad_norm": 6.186648368835449, "learning_rate": 9.930352179621721e-06, "loss": 2.7932, "step": 276600 }, { "epoch": 0.07217923397180316, "grad_norm": 6.750385761260986, "learning_rate": 9.930213071935746e-06, "loss": 2.748, "step": 276800 }, { "epoch": 0.07223138659750533, "grad_norm": 6.330704689025879, "learning_rate": 9.930073826444634e-06, "loss": 2.8346, "step": 277000 }, { "epoch": 0.0722835392232075, "grad_norm": 6.233454704284668, "learning_rate": 9.929934443152272e-06, "loss": 2.7588, "step": 277200 }, { "epoch": 0.07233569184890967, "grad_norm": 6.775033473968506, "learning_rate": 9.929794922062556e-06, "loss": 2.8368, "step": 277400 }, { "epoch": 0.07238784447461184, "grad_norm": 6.234418869018555, "learning_rate": 9.929655263179389e-06, "loss": 2.7867, "step": 277600 }, { "epoch": 0.07243999710031401, "grad_norm": 7.00774621963501, "learning_rate": 9.929515466506675e-06, "loss": 2.8419, "step": 277800 }, { "epoch": 0.07249214972601618, "grad_norm": 6.623013019561768, "learning_rate": 9.929375532048318e-06, "loss": 2.7692, "step": 278000 }, { "epoch": 0.07254430235171835, "grad_norm": 5.8589582443237305, "learning_rate": 9.929235459808233e-06, "loss": 2.8091, "step": 278200 }, { "epoch": 0.07259645497742052, "grad_norm": 5.785161018371582, "learning_rate": 9.92909524979033e-06, "loss": 2.8113, "step": 278400 }, { "epoch": 0.0726486076031227, "grad_norm": 6.4824018478393555, "learning_rate": 9.928954901998535e-06, "loss": 2.7695, "step": 278600 }, { "epoch": 0.07270076022882486, "grad_norm": 5.669132232666016, "learning_rate": 9.928814416436764e-06, "loss": 2.8168, "step": 278800 }, { "epoch": 0.07275291285452704, "grad_norm": 5.815093040466309, "learning_rate": 9.92867379310895e-06, "loss": 2.8186, "step": 279000 }, { "epoch": 0.0728050654802292, "grad_norm": 6.025607585906982, "learning_rate": 9.928533032019018e-06, "loss": 2.7955, "step": 279200 }, { "epoch": 0.07285721810593138, "grad_norm": 6.561540126800537, "learning_rate": 9.928392133170906e-06, "loss": 2.8035, "step": 279400 }, { "epoch": 0.07290937073163355, "grad_norm": 6.505258560180664, "learning_rate": 9.928251096568551e-06, "loss": 2.8136, "step": 279600 }, { "epoch": 0.07296152335733572, "grad_norm": 6.027585506439209, "learning_rate": 9.928109922215895e-06, "loss": 2.819, "step": 279800 }, { "epoch": 0.07301367598303787, "grad_norm": 6.295257091522217, "learning_rate": 9.927968610116885e-06, "loss": 2.7694, "step": 280000 }, { "epoch": 0.07306582860874004, "grad_norm": 5.988527297973633, "learning_rate": 9.92782716027547e-06, "loss": 2.8237, "step": 280200 }, { "epoch": 0.07311798123444221, "grad_norm": 6.000006198883057, "learning_rate": 9.927685572695602e-06, "loss": 2.7769, "step": 280400 }, { "epoch": 0.07317013386014438, "grad_norm": 5.901209831237793, "learning_rate": 9.927543847381242e-06, "loss": 2.8113, "step": 280600 }, { "epoch": 0.07322228648584655, "grad_norm": 6.257493019104004, "learning_rate": 9.927401984336351e-06, "loss": 2.803, "step": 280800 }, { "epoch": 0.07327443911154873, "grad_norm": 6.857747554779053, "learning_rate": 9.927259983564892e-06, "loss": 2.7732, "step": 281000 }, { "epoch": 0.0733265917372509, "grad_norm": 6.075799465179443, "learning_rate": 9.927117845070834e-06, "loss": 2.8093, "step": 281200 }, { "epoch": 0.07337874436295307, "grad_norm": 6.6486663818359375, "learning_rate": 9.926975568858152e-06, "loss": 2.7942, "step": 281400 }, { "epoch": 0.07343089698865524, "grad_norm": 6.643108367919922, "learning_rate": 9.926833154930823e-06, "loss": 2.7644, "step": 281600 }, { "epoch": 0.0734830496143574, "grad_norm": 6.577000141143799, "learning_rate": 9.926690603292825e-06, "loss": 2.8179, "step": 281800 }, { "epoch": 0.07353520224005958, "grad_norm": 6.4486083984375, "learning_rate": 9.926547913948146e-06, "loss": 2.8009, "step": 282000 }, { "epoch": 0.07358735486576175, "grad_norm": 6.277480602264404, "learning_rate": 9.92640508690077e-06, "loss": 2.8281, "step": 282200 }, { "epoch": 0.07363950749146392, "grad_norm": 5.925256729125977, "learning_rate": 9.926262122154692e-06, "loss": 2.7898, "step": 282400 }, { "epoch": 0.07369166011716609, "grad_norm": 6.8221049308776855, "learning_rate": 9.926119019713908e-06, "loss": 2.7954, "step": 282600 }, { "epoch": 0.07374381274286826, "grad_norm": 6.1247429847717285, "learning_rate": 9.925975779582417e-06, "loss": 2.7843, "step": 282800 }, { "epoch": 0.07379596536857043, "grad_norm": 6.930478572845459, "learning_rate": 9.925832401764222e-06, "loss": 2.7843, "step": 283000 }, { "epoch": 0.0738481179942726, "grad_norm": 6.8114190101623535, "learning_rate": 9.925688886263333e-06, "loss": 2.7745, "step": 283200 }, { "epoch": 0.07390027061997477, "grad_norm": 6.889013767242432, "learning_rate": 9.92554523308376e-06, "loss": 2.8041, "step": 283400 }, { "epoch": 0.07395242324567694, "grad_norm": 5.969966411590576, "learning_rate": 9.925401442229518e-06, "loss": 2.7588, "step": 283600 }, { "epoch": 0.07400457587137911, "grad_norm": 6.67039155960083, "learning_rate": 9.925257513704627e-06, "loss": 2.7643, "step": 283800 }, { "epoch": 0.07405672849708128, "grad_norm": 6.659237861633301, "learning_rate": 9.925113447513108e-06, "loss": 2.7798, "step": 284000 }, { "epoch": 0.07410888112278345, "grad_norm": 5.949248790740967, "learning_rate": 9.924969243658991e-06, "loss": 2.7853, "step": 284200 }, { "epoch": 0.07416103374848562, "grad_norm": 6.595699310302734, "learning_rate": 9.924824902146304e-06, "loss": 2.8293, "step": 284400 }, { "epoch": 0.07421318637418779, "grad_norm": 6.128589630126953, "learning_rate": 9.924680422979082e-06, "loss": 2.8168, "step": 284600 }, { "epoch": 0.07426533899988996, "grad_norm": 6.020960330963135, "learning_rate": 9.924535806161367e-06, "loss": 2.7953, "step": 284800 }, { "epoch": 0.07431749162559213, "grad_norm": 6.3884358406066895, "learning_rate": 9.924391051697194e-06, "loss": 2.8002, "step": 285000 }, { "epoch": 0.0743696442512943, "grad_norm": 6.152309417724609, "learning_rate": 9.924246159590614e-06, "loss": 2.7912, "step": 285200 }, { "epoch": 0.07442179687699647, "grad_norm": 6.479878902435303, "learning_rate": 9.924101129845678e-06, "loss": 2.7759, "step": 285400 }, { "epoch": 0.07447394950269864, "grad_norm": 6.872642517089844, "learning_rate": 9.923955962466437e-06, "loss": 2.801, "step": 285600 }, { "epoch": 0.0745261021284008, "grad_norm": 6.354683876037598, "learning_rate": 9.923810657456947e-06, "loss": 2.7839, "step": 285800 }, { "epoch": 0.07457825475410297, "grad_norm": 6.509720325469971, "learning_rate": 9.923665214821274e-06, "loss": 2.7437, "step": 286000 }, { "epoch": 0.07463040737980514, "grad_norm": 7.049443244934082, "learning_rate": 9.92351963456348e-06, "loss": 2.8347, "step": 286200 }, { "epoch": 0.07468256000550731, "grad_norm": 5.861264705657959, "learning_rate": 9.923373916687634e-06, "loss": 2.8464, "step": 286400 }, { "epoch": 0.07473471263120948, "grad_norm": 6.284032821655273, "learning_rate": 9.923228061197814e-06, "loss": 2.8014, "step": 286600 }, { "epoch": 0.07478686525691165, "grad_norm": 7.213873386383057, "learning_rate": 9.92308206809809e-06, "loss": 2.8036, "step": 286800 }, { "epoch": 0.07483901788261382, "grad_norm": 5.673182010650635, "learning_rate": 9.922935937392545e-06, "loss": 2.7957, "step": 287000 }, { "epoch": 0.074891170508316, "grad_norm": 6.418299198150635, "learning_rate": 9.922789669085266e-06, "loss": 2.7754, "step": 287200 }, { "epoch": 0.07494332313401816, "grad_norm": 6.251279830932617, "learning_rate": 9.92264326318034e-06, "loss": 2.8254, "step": 287400 }, { "epoch": 0.07499547575972033, "grad_norm": 11.405594825744629, "learning_rate": 9.922496719681858e-06, "loss": 2.8577, "step": 287600 }, { "epoch": 0.0750476283854225, "grad_norm": 5.6194963455200195, "learning_rate": 9.922350038593918e-06, "loss": 2.7584, "step": 287800 }, { "epoch": 0.07509978101112467, "grad_norm": 6.875600814819336, "learning_rate": 9.922203219920617e-06, "loss": 2.7921, "step": 288000 }, { "epoch": 0.07515193363682685, "grad_norm": 5.844818592071533, "learning_rate": 9.92205626366606e-06, "loss": 2.8321, "step": 288200 }, { "epoch": 0.07520408626252902, "grad_norm": 6.084321975708008, "learning_rate": 9.921909169834357e-06, "loss": 2.7855, "step": 288400 }, { "epoch": 0.07525623888823119, "grad_norm": 6.810995101928711, "learning_rate": 9.921761938429615e-06, "loss": 2.7948, "step": 288600 }, { "epoch": 0.07530839151393336, "grad_norm": 6.287647724151611, "learning_rate": 9.921614569455956e-06, "loss": 2.815, "step": 288800 }, { "epoch": 0.07536054413963553, "grad_norm": 6.69083833694458, "learning_rate": 9.921467062917492e-06, "loss": 2.7916, "step": 289000 }, { "epoch": 0.0754126967653377, "grad_norm": 6.045042514801025, "learning_rate": 9.921319418818347e-06, "loss": 2.7645, "step": 289200 }, { "epoch": 0.07546484939103987, "grad_norm": 6.019128799438477, "learning_rate": 9.921171637162654e-06, "loss": 2.813, "step": 289400 }, { "epoch": 0.07551700201674204, "grad_norm": 6.279255390167236, "learning_rate": 9.921023717954537e-06, "loss": 2.8184, "step": 289600 }, { "epoch": 0.07556915464244421, "grad_norm": 5.646428108215332, "learning_rate": 9.920875661198132e-06, "loss": 2.7849, "step": 289800 }, { "epoch": 0.07562130726814638, "grad_norm": 6.301240921020508, "learning_rate": 9.92072746689758e-06, "loss": 2.7739, "step": 290000 }, { "epoch": 0.07567345989384855, "grad_norm": 6.263399600982666, "learning_rate": 9.920579135057022e-06, "loss": 2.8212, "step": 290200 }, { "epoch": 0.07572561251955072, "grad_norm": 6.300200939178467, "learning_rate": 9.920430665680602e-06, "loss": 2.8088, "step": 290400 }, { "epoch": 0.07577776514525289, "grad_norm": 6.656348705291748, "learning_rate": 9.920282058772472e-06, "loss": 2.8058, "step": 290600 }, { "epoch": 0.07582991777095506, "grad_norm": 6.327916622161865, "learning_rate": 9.920133314336785e-06, "loss": 2.8079, "step": 290800 }, { "epoch": 0.07588207039665723, "grad_norm": 6.084270477294922, "learning_rate": 9.919984432377698e-06, "loss": 2.789, "step": 291000 }, { "epoch": 0.0759342230223594, "grad_norm": 6.9517388343811035, "learning_rate": 9.919835412899375e-06, "loss": 2.7942, "step": 291200 }, { "epoch": 0.07598637564806157, "grad_norm": 6.387750148773193, "learning_rate": 9.919686255905978e-06, "loss": 2.7857, "step": 291400 }, { "epoch": 0.07603852827376374, "grad_norm": 6.805675506591797, "learning_rate": 9.91953696140168e-06, "loss": 2.8233, "step": 291600 }, { "epoch": 0.0760906808994659, "grad_norm": 6.1650214195251465, "learning_rate": 9.919387529390648e-06, "loss": 2.8102, "step": 291800 }, { "epoch": 0.07614283352516807, "grad_norm": 6.393313407897949, "learning_rate": 9.919237959877064e-06, "loss": 2.8349, "step": 292000 }, { "epoch": 0.07619498615087024, "grad_norm": 6.139355659484863, "learning_rate": 9.919088252865107e-06, "loss": 2.7698, "step": 292200 }, { "epoch": 0.07624713877657241, "grad_norm": 6.505699157714844, "learning_rate": 9.918938408358961e-06, "loss": 2.8034, "step": 292400 }, { "epoch": 0.07629929140227458, "grad_norm": 6.276296138763428, "learning_rate": 9.918788426362816e-06, "loss": 2.7709, "step": 292600 }, { "epoch": 0.07635144402797675, "grad_norm": 5.510497570037842, "learning_rate": 9.918638306880861e-06, "loss": 2.7805, "step": 292800 }, { "epoch": 0.07640359665367892, "grad_norm": 6.827136993408203, "learning_rate": 9.918488049917294e-06, "loss": 2.7736, "step": 293000 }, { "epoch": 0.07645574927938109, "grad_norm": 6.214416980743408, "learning_rate": 9.918337655476315e-06, "loss": 2.8018, "step": 293200 }, { "epoch": 0.07650790190508326, "grad_norm": 6.037956237792969, "learning_rate": 9.91818712356213e-06, "loss": 2.7831, "step": 293400 }, { "epoch": 0.07656005453078543, "grad_norm": 7.491139888763428, "learning_rate": 9.918036454178942e-06, "loss": 2.8054, "step": 293600 }, { "epoch": 0.0766122071564876, "grad_norm": 7.301893711090088, "learning_rate": 9.917885647330963e-06, "loss": 2.8469, "step": 293800 }, { "epoch": 0.07666435978218977, "grad_norm": 6.116480350494385, "learning_rate": 9.917734703022411e-06, "loss": 2.7881, "step": 294000 }, { "epoch": 0.07671651240789194, "grad_norm": 6.379162788391113, "learning_rate": 9.917583621257504e-06, "loss": 2.7695, "step": 294200 }, { "epoch": 0.07676866503359411, "grad_norm": 7.193239212036133, "learning_rate": 9.917432402040463e-06, "loss": 2.7596, "step": 294400 }, { "epoch": 0.07682081765929628, "grad_norm": 6.380529880523682, "learning_rate": 9.917281045375518e-06, "loss": 2.8354, "step": 294600 }, { "epoch": 0.07687297028499845, "grad_norm": 6.858937740325928, "learning_rate": 9.917129551266897e-06, "loss": 2.7872, "step": 294800 }, { "epoch": 0.07692512291070062, "grad_norm": 6.391117572784424, "learning_rate": 9.916977919718837e-06, "loss": 2.8193, "step": 295000 }, { "epoch": 0.0769772755364028, "grad_norm": 6.219594478607178, "learning_rate": 9.916826150735573e-06, "loss": 2.8042, "step": 295200 }, { "epoch": 0.07702942816210497, "grad_norm": 6.027685642242432, "learning_rate": 9.91667424432135e-06, "loss": 2.8282, "step": 295400 }, { "epoch": 0.07708158078780714, "grad_norm": 5.730934143066406, "learning_rate": 9.916522200480412e-06, "loss": 2.766, "step": 295600 }, { "epoch": 0.0771337334135093, "grad_norm": 6.417144298553467, "learning_rate": 9.91637001921701e-06, "loss": 2.8094, "step": 295800 }, { "epoch": 0.07718588603921148, "grad_norm": 6.070188522338867, "learning_rate": 9.916217700535395e-06, "loss": 2.7917, "step": 296000 }, { "epoch": 0.07723803866491365, "grad_norm": 5.724254131317139, "learning_rate": 9.91606524443983e-06, "loss": 2.8104, "step": 296200 }, { "epoch": 0.07729019129061582, "grad_norm": 5.841590404510498, "learning_rate": 9.91591265093457e-06, "loss": 2.7847, "step": 296400 }, { "epoch": 0.07734234391631799, "grad_norm": 6.103632926940918, "learning_rate": 9.915759920023886e-06, "loss": 2.7978, "step": 296600 }, { "epoch": 0.07739449654202016, "grad_norm": 6.138303756713867, "learning_rate": 9.915607051712042e-06, "loss": 2.7617, "step": 296800 }, { "epoch": 0.07744664916772233, "grad_norm": 6.288359642028809, "learning_rate": 9.915454046003312e-06, "loss": 2.7719, "step": 297000 }, { "epoch": 0.0774988017934245, "grad_norm": 7.5223002433776855, "learning_rate": 9.915300902901976e-06, "loss": 2.8224, "step": 297200 }, { "epoch": 0.07755095441912667, "grad_norm": 6.539451599121094, "learning_rate": 9.915147622412311e-06, "loss": 2.7655, "step": 297400 }, { "epoch": 0.07760310704482883, "grad_norm": 6.718863487243652, "learning_rate": 9.914994204538603e-06, "loss": 2.8393, "step": 297600 }, { "epoch": 0.077655259670531, "grad_norm": 6.5553669929504395, "learning_rate": 9.914840649285142e-06, "loss": 2.7846, "step": 297800 }, { "epoch": 0.07770741229623317, "grad_norm": 5.977219104766846, "learning_rate": 9.914686956656214e-06, "loss": 2.7882, "step": 298000 }, { "epoch": 0.07775956492193534, "grad_norm": 6.6102752685546875, "learning_rate": 9.91453312665612e-06, "loss": 2.7676, "step": 298200 }, { "epoch": 0.07781171754763751, "grad_norm": 6.522745609283447, "learning_rate": 9.91437915928916e-06, "loss": 2.7544, "step": 298400 }, { "epoch": 0.07786387017333968, "grad_norm": 6.287838935852051, "learning_rate": 9.914225054559636e-06, "loss": 2.8284, "step": 298600 }, { "epoch": 0.07791602279904185, "grad_norm": 6.7383270263671875, "learning_rate": 9.914070812471853e-06, "loss": 2.7425, "step": 298800 }, { "epoch": 0.07796817542474402, "grad_norm": 6.654935359954834, "learning_rate": 9.913916433030126e-06, "loss": 2.7467, "step": 299000 }, { "epoch": 0.07802032805044619, "grad_norm": 5.797543525695801, "learning_rate": 9.91376191623877e-06, "loss": 2.789, "step": 299200 }, { "epoch": 0.07807248067614836, "grad_norm": 8.258265495300293, "learning_rate": 9.913607262102101e-06, "loss": 2.7822, "step": 299400 }, { "epoch": 0.07812463330185053, "grad_norm": 5.814871788024902, "learning_rate": 9.913452470624443e-06, "loss": 2.763, "step": 299600 }, { "epoch": 0.0781767859275527, "grad_norm": 6.059165954589844, "learning_rate": 9.913297541810123e-06, "loss": 2.7726, "step": 299800 }, { "epoch": 0.07822893855325487, "grad_norm": 6.355923175811768, "learning_rate": 9.913142475663472e-06, "loss": 2.8118, "step": 300000 }, { "epoch": 0.07828109117895704, "grad_norm": 6.16987419128418, "learning_rate": 9.912987272188826e-06, "loss": 2.7481, "step": 300200 }, { "epoch": 0.07833324380465921, "grad_norm": 6.023130416870117, "learning_rate": 9.912831931390518e-06, "loss": 2.7463, "step": 300400 }, { "epoch": 0.07838539643036138, "grad_norm": 5.977993011474609, "learning_rate": 9.912676453272894e-06, "loss": 2.845, "step": 300600 }, { "epoch": 0.07843754905606355, "grad_norm": 6.411383152008057, "learning_rate": 9.912520837840297e-06, "loss": 2.7646, "step": 300800 }, { "epoch": 0.07848970168176572, "grad_norm": 6.466489791870117, "learning_rate": 9.91236508509708e-06, "loss": 2.7944, "step": 301000 }, { "epoch": 0.07854185430746789, "grad_norm": 6.878299713134766, "learning_rate": 9.912209195047594e-06, "loss": 2.798, "step": 301200 }, { "epoch": 0.07859400693317006, "grad_norm": 6.08614444732666, "learning_rate": 9.912053167696195e-06, "loss": 2.7649, "step": 301400 }, { "epoch": 0.07864615955887223, "grad_norm": 6.1333909034729, "learning_rate": 9.911897003047251e-06, "loss": 2.7795, "step": 301600 }, { "epoch": 0.0786983121845744, "grad_norm": 6.349955081939697, "learning_rate": 9.91174070110512e-06, "loss": 2.7147, "step": 301800 }, { "epoch": 0.07875046481027657, "grad_norm": 6.769674301147461, "learning_rate": 9.91158426187417e-06, "loss": 2.8079, "step": 302000 }, { "epoch": 0.07880261743597874, "grad_norm": 6.519297122955322, "learning_rate": 9.91142768535878e-06, "loss": 2.7955, "step": 302200 }, { "epoch": 0.07885477006168091, "grad_norm": 7.310003757476807, "learning_rate": 9.911270971563322e-06, "loss": 2.7827, "step": 302400 }, { "epoch": 0.07890692268738309, "grad_norm": 5.958581924438477, "learning_rate": 9.911114120492177e-06, "loss": 2.7466, "step": 302600 }, { "epoch": 0.07895907531308526, "grad_norm": 5.792890548706055, "learning_rate": 9.91095713214973e-06, "loss": 2.7629, "step": 302800 }, { "epoch": 0.07901122793878743, "grad_norm": 6.76243782043457, "learning_rate": 9.910800006540368e-06, "loss": 2.8411, "step": 303000 }, { "epoch": 0.0790633805644896, "grad_norm": 6.586005687713623, "learning_rate": 9.910642743668486e-06, "loss": 2.7527, "step": 303200 }, { "epoch": 0.07911553319019177, "grad_norm": 6.768370628356934, "learning_rate": 9.910485343538474e-06, "loss": 2.7507, "step": 303400 }, { "epoch": 0.07916768581589392, "grad_norm": 6.578641891479492, "learning_rate": 9.910327806154735e-06, "loss": 2.8037, "step": 303600 }, { "epoch": 0.0792198384415961, "grad_norm": 6.524317741394043, "learning_rate": 9.910170131521675e-06, "loss": 2.7402, "step": 303800 }, { "epoch": 0.07927199106729826, "grad_norm": 6.295195579528809, "learning_rate": 9.910012319643696e-06, "loss": 2.8275, "step": 304000 }, { "epoch": 0.07932414369300043, "grad_norm": 7.033209800720215, "learning_rate": 9.909854370525212e-06, "loss": 2.7591, "step": 304200 }, { "epoch": 0.0793762963187026, "grad_norm": 6.22572660446167, "learning_rate": 9.909696284170636e-06, "loss": 2.7604, "step": 304400 }, { "epoch": 0.07942844894440478, "grad_norm": 6.447390556335449, "learning_rate": 9.909538060584387e-06, "loss": 2.799, "step": 304600 }, { "epoch": 0.07948060157010695, "grad_norm": 6.915390491485596, "learning_rate": 9.909379699770889e-06, "loss": 2.792, "step": 304800 }, { "epoch": 0.07953275419580912, "grad_norm": 6.417814254760742, "learning_rate": 9.909221201734568e-06, "loss": 2.7755, "step": 305000 }, { "epoch": 0.07958490682151129, "grad_norm": 6.955142974853516, "learning_rate": 9.909062566479854e-06, "loss": 2.7464, "step": 305200 }, { "epoch": 0.07963705944721346, "grad_norm": 6.3183393478393555, "learning_rate": 9.908903794011183e-06, "loss": 2.7509, "step": 305400 }, { "epoch": 0.07968921207291563, "grad_norm": 6.242965221405029, "learning_rate": 9.908744884332988e-06, "loss": 2.8254, "step": 305600 }, { "epoch": 0.0797413646986178, "grad_norm": 6.4110941886901855, "learning_rate": 9.908585837449714e-06, "loss": 2.7494, "step": 305800 }, { "epoch": 0.07979351732431997, "grad_norm": 6.298842430114746, "learning_rate": 9.908426653365805e-06, "loss": 2.7913, "step": 306000 }, { "epoch": 0.07984566995002214, "grad_norm": 6.478790760040283, "learning_rate": 9.908267332085712e-06, "loss": 2.7974, "step": 306200 }, { "epoch": 0.07989782257572431, "grad_norm": 6.1008453369140625, "learning_rate": 9.908107873613888e-06, "loss": 2.79, "step": 306400 }, { "epoch": 0.07994997520142648, "grad_norm": 6.282758712768555, "learning_rate": 9.90794827795479e-06, "loss": 2.7768, "step": 306600 }, { "epoch": 0.08000212782712865, "grad_norm": 6.637455463409424, "learning_rate": 9.907788545112879e-06, "loss": 2.7501, "step": 306800 }, { "epoch": 0.08005428045283082, "grad_norm": 6.408087730407715, "learning_rate": 9.907628675092618e-06, "loss": 2.8239, "step": 307000 }, { "epoch": 0.08010643307853299, "grad_norm": 5.750133991241455, "learning_rate": 9.907468667898478e-06, "loss": 2.7664, "step": 307200 }, { "epoch": 0.08015858570423516, "grad_norm": 6.643690586090088, "learning_rate": 9.90730852353493e-06, "loss": 2.7875, "step": 307400 }, { "epoch": 0.08021073832993733, "grad_norm": 6.663003921508789, "learning_rate": 9.907148242006451e-06, "loss": 2.758, "step": 307600 }, { "epoch": 0.0802628909556395, "grad_norm": 7.2608113288879395, "learning_rate": 9.906987823317517e-06, "loss": 2.7628, "step": 307800 }, { "epoch": 0.08031504358134167, "grad_norm": 5.9124274253845215, "learning_rate": 9.906827267472619e-06, "loss": 2.766, "step": 308000 }, { "epoch": 0.08036719620704384, "grad_norm": 6.0799241065979, "learning_rate": 9.90666657447624e-06, "loss": 2.7949, "step": 308200 }, { "epoch": 0.08041934883274601, "grad_norm": 6.578120231628418, "learning_rate": 9.906505744332873e-06, "loss": 2.7463, "step": 308400 }, { "epoch": 0.08047150145844818, "grad_norm": 6.545755386352539, "learning_rate": 9.906344777047012e-06, "loss": 2.771, "step": 308600 }, { "epoch": 0.08052365408415035, "grad_norm": 6.288052082061768, "learning_rate": 9.906183672623158e-06, "loss": 2.787, "step": 308800 }, { "epoch": 0.08057580670985252, "grad_norm": 7.15657377243042, "learning_rate": 9.906022431065814e-06, "loss": 2.7635, "step": 309000 }, { "epoch": 0.0806279593355547, "grad_norm": 6.247620582580566, "learning_rate": 9.905861052379484e-06, "loss": 2.8279, "step": 309200 }, { "epoch": 0.08068011196125685, "grad_norm": 6.499161243438721, "learning_rate": 9.905699536568682e-06, "loss": 2.8063, "step": 309400 }, { "epoch": 0.08073226458695902, "grad_norm": 5.619341850280762, "learning_rate": 9.905537883637923e-06, "loss": 2.776, "step": 309600 }, { "epoch": 0.08078441721266119, "grad_norm": 6.434546947479248, "learning_rate": 9.905376093591722e-06, "loss": 2.8069, "step": 309800 }, { "epoch": 0.08083656983836336, "grad_norm": 5.9819207191467285, "learning_rate": 9.905214166434605e-06, "loss": 2.7757, "step": 310000 }, { "epoch": 0.08088872246406553, "grad_norm": 6.195422649383545, "learning_rate": 9.905052102171093e-06, "loss": 2.7516, "step": 310200 }, { "epoch": 0.0809408750897677, "grad_norm": 6.815351486206055, "learning_rate": 9.904889900805721e-06, "loss": 2.7657, "step": 310400 }, { "epoch": 0.08099302771546987, "grad_norm": 6.130044937133789, "learning_rate": 9.904727562343021e-06, "loss": 2.8082, "step": 310600 }, { "epoch": 0.08104518034117204, "grad_norm": 6.268271446228027, "learning_rate": 9.90456508678753e-06, "loss": 2.7433, "step": 310800 }, { "epoch": 0.08109733296687421, "grad_norm": 6.54405403137207, "learning_rate": 9.904402474143789e-06, "loss": 2.7833, "step": 311000 }, { "epoch": 0.08114948559257638, "grad_norm": 6.206225872039795, "learning_rate": 9.904239724416345e-06, "loss": 2.7515, "step": 311200 }, { "epoch": 0.08120163821827855, "grad_norm": 6.3456645011901855, "learning_rate": 9.904076837609745e-06, "loss": 2.8183, "step": 311400 }, { "epoch": 0.08125379084398072, "grad_norm": 6.351884841918945, "learning_rate": 9.903913813728543e-06, "loss": 2.7917, "step": 311600 }, { "epoch": 0.0813059434696829, "grad_norm": 6.585855960845947, "learning_rate": 9.903750652777296e-06, "loss": 2.7465, "step": 311800 }, { "epoch": 0.08135809609538507, "grad_norm": 6.618515491485596, "learning_rate": 9.903587354760562e-06, "loss": 2.787, "step": 312000 }, { "epoch": 0.08141024872108724, "grad_norm": 6.154926300048828, "learning_rate": 9.90342391968291e-06, "loss": 2.7838, "step": 312200 }, { "epoch": 0.0814624013467894, "grad_norm": 6.921688079833984, "learning_rate": 9.903260347548904e-06, "loss": 2.7479, "step": 312400 }, { "epoch": 0.08151455397249158, "grad_norm": 7.639387130737305, "learning_rate": 9.903096638363119e-06, "loss": 2.752, "step": 312600 }, { "epoch": 0.08156670659819375, "grad_norm": 6.138737201690674, "learning_rate": 9.902932792130127e-06, "loss": 2.8019, "step": 312800 }, { "epoch": 0.08161885922389592, "grad_norm": 6.133929252624512, "learning_rate": 9.902768808854513e-06, "loss": 2.7514, "step": 313000 }, { "epoch": 0.08167101184959809, "grad_norm": 6.8112359046936035, "learning_rate": 9.902604688540855e-06, "loss": 2.8003, "step": 313200 }, { "epoch": 0.08172316447530026, "grad_norm": 6.365881443023682, "learning_rate": 9.902440431193744e-06, "loss": 2.7725, "step": 313400 }, { "epoch": 0.08177531710100243, "grad_norm": 6.9227447509765625, "learning_rate": 9.902276036817772e-06, "loss": 2.7692, "step": 313600 }, { "epoch": 0.0818274697267046, "grad_norm": 7.009827136993408, "learning_rate": 9.90211150541753e-06, "loss": 2.7507, "step": 313800 }, { "epoch": 0.08187962235240677, "grad_norm": 6.5645976066589355, "learning_rate": 9.90194683699762e-06, "loss": 2.7632, "step": 314000 }, { "epoch": 0.08193177497810894, "grad_norm": 6.148155689239502, "learning_rate": 9.901782031562643e-06, "loss": 2.7872, "step": 314200 }, { "epoch": 0.08198392760381111, "grad_norm": 6.268352031707764, "learning_rate": 9.901617089117209e-06, "loss": 2.7974, "step": 314400 }, { "epoch": 0.08203608022951328, "grad_norm": 5.926452159881592, "learning_rate": 9.901452009665922e-06, "loss": 2.7486, "step": 314600 }, { "epoch": 0.08208823285521545, "grad_norm": 6.640249729156494, "learning_rate": 9.901286793213402e-06, "loss": 2.7851, "step": 314800 }, { "epoch": 0.08214038548091762, "grad_norm": 7.127112865447998, "learning_rate": 9.901121439764263e-06, "loss": 2.7037, "step": 315000 }, { "epoch": 0.08219253810661979, "grad_norm": 7.389196872711182, "learning_rate": 9.90095594932313e-06, "loss": 2.7886, "step": 315200 }, { "epoch": 0.08224469073232195, "grad_norm": 6.33438777923584, "learning_rate": 9.900790321894627e-06, "loss": 2.7621, "step": 315400 }, { "epoch": 0.08229684335802412, "grad_norm": 6.59637451171875, "learning_rate": 9.900624557483383e-06, "loss": 2.7757, "step": 315600 }, { "epoch": 0.08234899598372629, "grad_norm": 6.34528923034668, "learning_rate": 9.900458656094031e-06, "loss": 2.7377, "step": 315800 }, { "epoch": 0.08240114860942846, "grad_norm": 6.219143390655518, "learning_rate": 9.90029261773121e-06, "loss": 2.7665, "step": 316000 }, { "epoch": 0.08245330123513063, "grad_norm": 6.358184337615967, "learning_rate": 9.900126442399562e-06, "loss": 2.7857, "step": 316200 }, { "epoch": 0.0825054538608328, "grad_norm": 6.92657470703125, "learning_rate": 9.899960130103728e-06, "loss": 2.7823, "step": 316400 }, { "epoch": 0.08255760648653497, "grad_norm": 6.66539192199707, "learning_rate": 9.89979368084836e-06, "loss": 2.7912, "step": 316600 }, { "epoch": 0.08260975911223714, "grad_norm": 6.198975563049316, "learning_rate": 9.899627094638107e-06, "loss": 2.7537, "step": 316800 }, { "epoch": 0.08266191173793931, "grad_norm": 5.979285717010498, "learning_rate": 9.899460371477629e-06, "loss": 2.7273, "step": 317000 }, { "epoch": 0.08271406436364148, "grad_norm": 6.5081071853637695, "learning_rate": 9.899293511371582e-06, "loss": 2.7514, "step": 317200 }, { "epoch": 0.08276621698934365, "grad_norm": 7.048851490020752, "learning_rate": 9.899126514324636e-06, "loss": 2.7624, "step": 317400 }, { "epoch": 0.08281836961504582, "grad_norm": 6.467654228210449, "learning_rate": 9.89895938034145e-06, "loss": 2.7895, "step": 317600 }, { "epoch": 0.08287052224074799, "grad_norm": 6.6041646003723145, "learning_rate": 9.898792109426705e-06, "loss": 2.7708, "step": 317800 }, { "epoch": 0.08292267486645016, "grad_norm": 6.265130996704102, "learning_rate": 9.898624701585069e-06, "loss": 2.7361, "step": 318000 }, { "epoch": 0.08297482749215233, "grad_norm": 6.6074724197387695, "learning_rate": 9.898457156821226e-06, "loss": 2.7643, "step": 318200 }, { "epoch": 0.0830269801178545, "grad_norm": 6.601407527923584, "learning_rate": 9.898289475139857e-06, "loss": 2.7805, "step": 318400 }, { "epoch": 0.08307913274355667, "grad_norm": 6.4573845863342285, "learning_rate": 9.89812165654565e-06, "loss": 2.8072, "step": 318600 }, { "epoch": 0.08313128536925884, "grad_norm": 6.106098175048828, "learning_rate": 9.897953701043292e-06, "loss": 2.7642, "step": 318800 }, { "epoch": 0.08318343799496102, "grad_norm": 6.303261756896973, "learning_rate": 9.897785608637484e-06, "loss": 2.7252, "step": 319000 }, { "epoch": 0.08323559062066319, "grad_norm": 6.720864772796631, "learning_rate": 9.897617379332919e-06, "loss": 2.8183, "step": 319200 }, { "epoch": 0.08328774324636536, "grad_norm": 6.235776424407959, "learning_rate": 9.897449013134301e-06, "loss": 2.7476, "step": 319400 }, { "epoch": 0.08333989587206753, "grad_norm": 7.237074851989746, "learning_rate": 9.897280510046337e-06, "loss": 2.7326, "step": 319600 }, { "epoch": 0.0833920484977697, "grad_norm": 6.430893421173096, "learning_rate": 9.897111870073736e-06, "loss": 2.7449, "step": 319800 }, { "epoch": 0.08344420112347187, "grad_norm": 6.649140357971191, "learning_rate": 9.896943093221212e-06, "loss": 2.778, "step": 320000 }, { "epoch": 0.08349635374917404, "grad_norm": 6.112555980682373, "learning_rate": 9.89677417949348e-06, "loss": 2.7041, "step": 320200 }, { "epoch": 0.08354850637487621, "grad_norm": 6.915252685546875, "learning_rate": 9.896605128895265e-06, "loss": 2.7855, "step": 320400 }, { "epoch": 0.08360065900057838, "grad_norm": 6.36874532699585, "learning_rate": 9.896435941431291e-06, "loss": 2.7738, "step": 320600 }, { "epoch": 0.08365281162628055, "grad_norm": 7.028889179229736, "learning_rate": 9.896266617106285e-06, "loss": 2.8031, "step": 320800 }, { "epoch": 0.08370496425198272, "grad_norm": 6.048758029937744, "learning_rate": 9.896097155924983e-06, "loss": 2.7842, "step": 321000 }, { "epoch": 0.08375711687768488, "grad_norm": 6.261166095733643, "learning_rate": 9.89592755789212e-06, "loss": 2.7553, "step": 321200 }, { "epoch": 0.08380926950338705, "grad_norm": 6.835334300994873, "learning_rate": 9.895757823012435e-06, "loss": 2.7636, "step": 321400 }, { "epoch": 0.08386142212908922, "grad_norm": 6.209692478179932, "learning_rate": 9.895587951290675e-06, "loss": 2.763, "step": 321600 }, { "epoch": 0.08391357475479139, "grad_norm": 6.277283191680908, "learning_rate": 9.895417942731589e-06, "loss": 2.8042, "step": 321800 }, { "epoch": 0.08396572738049356, "grad_norm": 6.796102523803711, "learning_rate": 9.895247797339925e-06, "loss": 2.7538, "step": 322000 }, { "epoch": 0.08401788000619573, "grad_norm": 6.786470890045166, "learning_rate": 9.895077515120439e-06, "loss": 2.7547, "step": 322200 }, { "epoch": 0.0840700326318979, "grad_norm": 6.2995429039001465, "learning_rate": 9.894907096077893e-06, "loss": 2.7658, "step": 322400 }, { "epoch": 0.08412218525760007, "grad_norm": 7.4601521492004395, "learning_rate": 9.894736540217052e-06, "loss": 2.7518, "step": 322600 }, { "epoch": 0.08417433788330224, "grad_norm": 7.032712459564209, "learning_rate": 9.89456584754268e-06, "loss": 2.7879, "step": 322800 }, { "epoch": 0.08422649050900441, "grad_norm": 6.522708892822266, "learning_rate": 9.894395018059546e-06, "loss": 2.782, "step": 323000 }, { "epoch": 0.08427864313470658, "grad_norm": 6.497341632843018, "learning_rate": 9.894224051772429e-06, "loss": 2.7811, "step": 323200 }, { "epoch": 0.08433079576040875, "grad_norm": 5.980485916137695, "learning_rate": 9.894052948686108e-06, "loss": 2.7278, "step": 323400 }, { "epoch": 0.08438294838611092, "grad_norm": 6.1288628578186035, "learning_rate": 9.893881708805364e-06, "loss": 2.7643, "step": 323600 }, { "epoch": 0.08443510101181309, "grad_norm": 6.619753360748291, "learning_rate": 9.893710332134982e-06, "loss": 2.7793, "step": 323800 }, { "epoch": 0.08448725363751526, "grad_norm": 6.239343166351318, "learning_rate": 9.893538818679754e-06, "loss": 2.7257, "step": 324000 }, { "epoch": 0.08453940626321743, "grad_norm": 6.4836883544921875, "learning_rate": 9.893367168444474e-06, "loss": 2.7462, "step": 324200 }, { "epoch": 0.0845915588889196, "grad_norm": 6.484433650970459, "learning_rate": 9.89319538143394e-06, "loss": 2.7384, "step": 324400 }, { "epoch": 0.08464371151462177, "grad_norm": 6.930042743682861, "learning_rate": 9.893023457652951e-06, "loss": 2.7905, "step": 324600 }, { "epoch": 0.08469586414032394, "grad_norm": 5.899946212768555, "learning_rate": 9.892851397106316e-06, "loss": 2.7162, "step": 324800 }, { "epoch": 0.08474801676602611, "grad_norm": 5.355585098266602, "learning_rate": 9.892679199798843e-06, "loss": 2.7872, "step": 325000 }, { "epoch": 0.08480016939172828, "grad_norm": 7.755842685699463, "learning_rate": 9.892506865735344e-06, "loss": 2.7645, "step": 325200 }, { "epoch": 0.08485232201743045, "grad_norm": 6.277362823486328, "learning_rate": 9.892334394920638e-06, "loss": 2.7905, "step": 325400 }, { "epoch": 0.08490447464313262, "grad_norm": 6.218322277069092, "learning_rate": 9.892161787359544e-06, "loss": 2.7879, "step": 325600 }, { "epoch": 0.0849566272688348, "grad_norm": 6.325297832489014, "learning_rate": 9.891989043056886e-06, "loss": 2.7732, "step": 325800 }, { "epoch": 0.08500877989453696, "grad_norm": 6.6276445388793945, "learning_rate": 9.891816162017495e-06, "loss": 2.7643, "step": 326000 }, { "epoch": 0.08506093252023914, "grad_norm": 6.802475929260254, "learning_rate": 9.891643144246202e-06, "loss": 2.7389, "step": 326200 }, { "epoch": 0.0851130851459413, "grad_norm": 6.825811862945557, "learning_rate": 9.89146998974784e-06, "loss": 2.7736, "step": 326400 }, { "epoch": 0.08516523777164348, "grad_norm": 6.184617519378662, "learning_rate": 9.891296698527255e-06, "loss": 2.7717, "step": 326600 }, { "epoch": 0.08521739039734565, "grad_norm": 7.5419158935546875, "learning_rate": 9.891123270589285e-06, "loss": 2.7393, "step": 326800 }, { "epoch": 0.0852695430230478, "grad_norm": 7.02357292175293, "learning_rate": 9.89094970593878e-06, "loss": 2.7573, "step": 327000 }, { "epoch": 0.08532169564874997, "grad_norm": 6.308924674987793, "learning_rate": 9.890776004580595e-06, "loss": 2.7554, "step": 327200 }, { "epoch": 0.08537384827445214, "grad_norm": 6.975244522094727, "learning_rate": 9.890602166519578e-06, "loss": 2.7893, "step": 327400 }, { "epoch": 0.08542600090015431, "grad_norm": 6.773929119110107, "learning_rate": 9.890428191760593e-06, "loss": 2.7138, "step": 327600 }, { "epoch": 0.08547815352585648, "grad_norm": 5.785812854766846, "learning_rate": 9.890254080308498e-06, "loss": 2.7643, "step": 327800 }, { "epoch": 0.08553030615155865, "grad_norm": 6.477972507476807, "learning_rate": 9.890079832168167e-06, "loss": 2.7599, "step": 328000 }, { "epoch": 0.08558245877726083, "grad_norm": 7.792716026306152, "learning_rate": 9.889905447344463e-06, "loss": 2.7773, "step": 328200 }, { "epoch": 0.085634611402963, "grad_norm": 5.729294300079346, "learning_rate": 9.889730925842266e-06, "loss": 2.7661, "step": 328400 }, { "epoch": 0.08568676402866517, "grad_norm": 5.776681900024414, "learning_rate": 9.889556267666449e-06, "loss": 2.714, "step": 328600 }, { "epoch": 0.08573891665436734, "grad_norm": 5.912201404571533, "learning_rate": 9.8893814728219e-06, "loss": 2.781, "step": 328800 }, { "epoch": 0.0857910692800695, "grad_norm": 6.655936241149902, "learning_rate": 9.889206541313498e-06, "loss": 2.752, "step": 329000 }, { "epoch": 0.08584322190577168, "grad_norm": 6.418868064880371, "learning_rate": 9.889031473146136e-06, "loss": 2.8002, "step": 329200 }, { "epoch": 0.08589537453147385, "grad_norm": 6.111396312713623, "learning_rate": 9.888856268324707e-06, "loss": 2.7168, "step": 329400 }, { "epoch": 0.08594752715717602, "grad_norm": 6.363344669342041, "learning_rate": 9.88868092685411e-06, "loss": 2.7737, "step": 329600 }, { "epoch": 0.08599967978287819, "grad_norm": 6.287228107452393, "learning_rate": 9.888505448739243e-06, "loss": 2.7443, "step": 329800 }, { "epoch": 0.08605183240858036, "grad_norm": 7.1249542236328125, "learning_rate": 9.888329833985012e-06, "loss": 2.7436, "step": 330000 }, { "epoch": 0.08610398503428253, "grad_norm": 6.433495998382568, "learning_rate": 9.888154082596326e-06, "loss": 2.7575, "step": 330200 }, { "epoch": 0.0861561376599847, "grad_norm": 5.9914937019348145, "learning_rate": 9.887978194578097e-06, "loss": 2.7368, "step": 330400 }, { "epoch": 0.08620829028568687, "grad_norm": 6.580966472625732, "learning_rate": 9.88780216993524e-06, "loss": 2.7948, "step": 330600 }, { "epoch": 0.08626044291138904, "grad_norm": 6.364790439605713, "learning_rate": 9.88762600867268e-06, "loss": 2.7685, "step": 330800 }, { "epoch": 0.08631259553709121, "grad_norm": 6.688601016998291, "learning_rate": 9.887449710795333e-06, "loss": 2.7504, "step": 331000 }, { "epoch": 0.08636474816279338, "grad_norm": 6.046875953674316, "learning_rate": 9.887273276308135e-06, "loss": 2.7278, "step": 331200 }, { "epoch": 0.08641690078849555, "grad_norm": 6.681288719177246, "learning_rate": 9.887096705216011e-06, "loss": 2.7646, "step": 331400 }, { "epoch": 0.08646905341419772, "grad_norm": 6.149299621582031, "learning_rate": 9.8869199975239e-06, "loss": 2.727, "step": 331600 }, { "epoch": 0.08652120603989989, "grad_norm": 6.459449291229248, "learning_rate": 9.886743153236741e-06, "loss": 2.7715, "step": 331800 }, { "epoch": 0.08657335866560206, "grad_norm": 7.189176082611084, "learning_rate": 9.886566172359475e-06, "loss": 2.7691, "step": 332000 }, { "epoch": 0.08662551129130423, "grad_norm": 6.436927318572998, "learning_rate": 9.886389054897051e-06, "loss": 2.7188, "step": 332200 }, { "epoch": 0.0866776639170064, "grad_norm": 6.325084686279297, "learning_rate": 9.886211800854419e-06, "loss": 2.7552, "step": 332400 }, { "epoch": 0.08672981654270857, "grad_norm": 5.9876251220703125, "learning_rate": 9.886034410236533e-06, "loss": 2.7334, "step": 332600 }, { "epoch": 0.08678196916841074, "grad_norm": 6.643549919128418, "learning_rate": 9.88585688304835e-06, "loss": 2.7637, "step": 332800 }, { "epoch": 0.0868341217941129, "grad_norm": 6.4676971435546875, "learning_rate": 9.885679219294835e-06, "loss": 2.7169, "step": 333000 }, { "epoch": 0.08688627441981507, "grad_norm": 6.548349380493164, "learning_rate": 9.885501418980953e-06, "loss": 2.7825, "step": 333200 }, { "epoch": 0.08693842704551724, "grad_norm": 5.312566757202148, "learning_rate": 9.885323482111671e-06, "loss": 2.7209, "step": 333400 }, { "epoch": 0.08699057967121941, "grad_norm": 6.16527795791626, "learning_rate": 9.885145408691965e-06, "loss": 2.713, "step": 333600 }, { "epoch": 0.08704273229692158, "grad_norm": 6.8437652587890625, "learning_rate": 9.884967198726814e-06, "loss": 2.7806, "step": 333800 }, { "epoch": 0.08709488492262375, "grad_norm": 6.265858173370361, "learning_rate": 9.884788852221195e-06, "loss": 2.789, "step": 334000 }, { "epoch": 0.08714703754832592, "grad_norm": 6.236710548400879, "learning_rate": 9.884610369180097e-06, "loss": 2.7499, "step": 334200 }, { "epoch": 0.0871991901740281, "grad_norm": 6.732831001281738, "learning_rate": 9.884431749608505e-06, "loss": 2.7575, "step": 334400 }, { "epoch": 0.08725134279973026, "grad_norm": 6.163575649261475, "learning_rate": 9.884252993511415e-06, "loss": 2.7529, "step": 334600 }, { "epoch": 0.08730349542543243, "grad_norm": 6.205536365509033, "learning_rate": 9.884074100893821e-06, "loss": 2.7299, "step": 334800 }, { "epoch": 0.0873556480511346, "grad_norm": 6.635568618774414, "learning_rate": 9.883895071760726e-06, "loss": 2.7082, "step": 335000 }, { "epoch": 0.08740780067683677, "grad_norm": 7.192404270172119, "learning_rate": 9.883715906117132e-06, "loss": 2.7596, "step": 335200 }, { "epoch": 0.08745995330253895, "grad_norm": 6.824044704437256, "learning_rate": 9.883536603968047e-06, "loss": 2.7477, "step": 335400 }, { "epoch": 0.08751210592824112, "grad_norm": 7.502521991729736, "learning_rate": 9.883357165318483e-06, "loss": 2.7955, "step": 335600 }, { "epoch": 0.08756425855394329, "grad_norm": 6.514331817626953, "learning_rate": 9.883177590173454e-06, "loss": 2.7597, "step": 335800 }, { "epoch": 0.08761641117964546, "grad_norm": 5.833806037902832, "learning_rate": 9.882997878537984e-06, "loss": 2.7714, "step": 336000 }, { "epoch": 0.08766856380534763, "grad_norm": 6.365836143493652, "learning_rate": 9.88281803041709e-06, "loss": 2.7577, "step": 336200 }, { "epoch": 0.0877207164310498, "grad_norm": 6.60760498046875, "learning_rate": 9.882638045815804e-06, "loss": 2.7664, "step": 336400 }, { "epoch": 0.08777286905675197, "grad_norm": 6.648801326751709, "learning_rate": 9.882457924739153e-06, "loss": 2.7739, "step": 336600 }, { "epoch": 0.08782502168245414, "grad_norm": 6.417782306671143, "learning_rate": 9.882277667192175e-06, "loss": 2.777, "step": 336800 }, { "epoch": 0.08787717430815631, "grad_norm": 7.36497163772583, "learning_rate": 9.882097273179904e-06, "loss": 2.7481, "step": 337000 }, { "epoch": 0.08792932693385848, "grad_norm": 6.485381603240967, "learning_rate": 9.881916742707389e-06, "loss": 2.7754, "step": 337200 }, { "epoch": 0.08798147955956065, "grad_norm": 6.6873369216918945, "learning_rate": 9.88173607577967e-06, "loss": 2.7517, "step": 337400 }, { "epoch": 0.08803363218526282, "grad_norm": 7.222953796386719, "learning_rate": 9.881555272401797e-06, "loss": 2.7745, "step": 337600 }, { "epoch": 0.08808578481096499, "grad_norm": 6.487031936645508, "learning_rate": 9.881374332578829e-06, "loss": 2.762, "step": 337800 }, { "epoch": 0.08813793743666716, "grad_norm": 6.065067768096924, "learning_rate": 9.881193256315816e-06, "loss": 2.7632, "step": 338000 }, { "epoch": 0.08819009006236933, "grad_norm": 6.604552745819092, "learning_rate": 9.881012043617826e-06, "loss": 2.7412, "step": 338200 }, { "epoch": 0.0882422426880715, "grad_norm": 6.458118915557861, "learning_rate": 9.88083069448992e-06, "loss": 2.7364, "step": 338400 }, { "epoch": 0.08829439531377367, "grad_norm": 7.036708354949951, "learning_rate": 9.88064920893717e-06, "loss": 2.779, "step": 338600 }, { "epoch": 0.08834654793947583, "grad_norm": 6.828887462615967, "learning_rate": 9.880467586964646e-06, "loss": 2.7671, "step": 338800 }, { "epoch": 0.088398700565178, "grad_norm": 7.044036388397217, "learning_rate": 9.880285828577426e-06, "loss": 2.7701, "step": 339000 }, { "epoch": 0.08845085319088017, "grad_norm": 6.224187850952148, "learning_rate": 9.880103933780589e-06, "loss": 2.7674, "step": 339200 }, { "epoch": 0.08850300581658234, "grad_norm": 5.749971389770508, "learning_rate": 9.879921902579219e-06, "loss": 2.7177, "step": 339400 }, { "epoch": 0.08855515844228451, "grad_norm": 6.637483596801758, "learning_rate": 9.879739734978408e-06, "loss": 2.7148, "step": 339600 }, { "epoch": 0.08860731106798668, "grad_norm": 6.52347469329834, "learning_rate": 9.879557430983242e-06, "loss": 2.7768, "step": 339800 }, { "epoch": 0.08865946369368885, "grad_norm": 7.416553497314453, "learning_rate": 9.879374990598821e-06, "loss": 2.7519, "step": 340000 }, { "epoch": 0.08871161631939102, "grad_norm": 7.152632713317871, "learning_rate": 9.879192413830244e-06, "loss": 2.761, "step": 340200 }, { "epoch": 0.08876376894509319, "grad_norm": 6.972984313964844, "learning_rate": 9.879009700682611e-06, "loss": 2.7335, "step": 340400 }, { "epoch": 0.08881592157079536, "grad_norm": 6.66843318939209, "learning_rate": 9.878826851161032e-06, "loss": 2.7011, "step": 340600 }, { "epoch": 0.08886807419649753, "grad_norm": 6.625106334686279, "learning_rate": 9.878643865270617e-06, "loss": 2.7638, "step": 340800 }, { "epoch": 0.0889202268221997, "grad_norm": 5.736109733581543, "learning_rate": 9.87846074301648e-06, "loss": 2.6837, "step": 341000 }, { "epoch": 0.08897237944790187, "grad_norm": 6.541588306427002, "learning_rate": 9.878277484403742e-06, "loss": 2.785, "step": 341200 }, { "epoch": 0.08902453207360404, "grad_norm": 6.787119388580322, "learning_rate": 9.878094089437523e-06, "loss": 2.768, "step": 341400 }, { "epoch": 0.08907668469930621, "grad_norm": 6.477860927581787, "learning_rate": 9.877910558122948e-06, "loss": 2.7734, "step": 341600 }, { "epoch": 0.08912883732500838, "grad_norm": 6.601665496826172, "learning_rate": 9.87772689046515e-06, "loss": 2.7677, "step": 341800 }, { "epoch": 0.08918098995071055, "grad_norm": 6.276432514190674, "learning_rate": 9.87754308646926e-06, "loss": 2.7615, "step": 342000 }, { "epoch": 0.08923314257641272, "grad_norm": 6.968433380126953, "learning_rate": 9.877359146140416e-06, "loss": 2.7536, "step": 342200 }, { "epoch": 0.0892852952021149, "grad_norm": 6.095193862915039, "learning_rate": 9.877175069483762e-06, "loss": 2.727, "step": 342400 }, { "epoch": 0.08933744782781707, "grad_norm": 6.783011436462402, "learning_rate": 9.87699085650444e-06, "loss": 2.7556, "step": 342600 }, { "epoch": 0.08938960045351924, "grad_norm": 6.720550537109375, "learning_rate": 9.876806507207601e-06, "loss": 2.7567, "step": 342800 }, { "epoch": 0.0894417530792214, "grad_norm": 7.076850891113281, "learning_rate": 9.876622021598396e-06, "loss": 2.735, "step": 343000 }, { "epoch": 0.08949390570492358, "grad_norm": 6.559168815612793, "learning_rate": 9.876437399681983e-06, "loss": 2.7888, "step": 343200 }, { "epoch": 0.08954605833062575, "grad_norm": 6.786516189575195, "learning_rate": 9.876252641463522e-06, "loss": 2.7834, "step": 343400 }, { "epoch": 0.08959821095632792, "grad_norm": 7.153230667114258, "learning_rate": 9.876067746948176e-06, "loss": 2.724, "step": 343600 }, { "epoch": 0.08965036358203009, "grad_norm": 6.643190860748291, "learning_rate": 9.875882716141116e-06, "loss": 2.7588, "step": 343800 }, { "epoch": 0.08970251620773226, "grad_norm": 5.990670680999756, "learning_rate": 9.875697549047511e-06, "loss": 2.7544, "step": 344000 }, { "epoch": 0.08975466883343443, "grad_norm": 6.413429260253906, "learning_rate": 9.875512245672538e-06, "loss": 2.755, "step": 344200 }, { "epoch": 0.0898068214591366, "grad_norm": 6.6495361328125, "learning_rate": 9.875326806021377e-06, "loss": 2.7291, "step": 344400 }, { "epoch": 0.08985897408483877, "grad_norm": 6.507997512817383, "learning_rate": 9.875141230099209e-06, "loss": 2.7421, "step": 344600 }, { "epoch": 0.08991112671054093, "grad_norm": 6.5377726554870605, "learning_rate": 9.874955517911223e-06, "loss": 2.7329, "step": 344800 }, { "epoch": 0.0899632793362431, "grad_norm": 7.305025577545166, "learning_rate": 9.874769669462608e-06, "loss": 2.7645, "step": 345000 }, { "epoch": 0.09001543196194527, "grad_norm": 6.375731945037842, "learning_rate": 9.87458368475856e-06, "loss": 2.7613, "step": 345200 }, { "epoch": 0.09006758458764744, "grad_norm": 6.055731773376465, "learning_rate": 9.87439756380428e-06, "loss": 2.7406, "step": 345400 }, { "epoch": 0.09011973721334961, "grad_norm": 6.731203556060791, "learning_rate": 9.874211306604966e-06, "loss": 2.7252, "step": 345600 }, { "epoch": 0.09017188983905178, "grad_norm": 7.642002582550049, "learning_rate": 9.874024913165825e-06, "loss": 2.7897, "step": 345800 }, { "epoch": 0.09022404246475395, "grad_norm": 6.946300983428955, "learning_rate": 9.873838383492069e-06, "loss": 2.7631, "step": 346000 }, { "epoch": 0.09027619509045612, "grad_norm": 6.039514064788818, "learning_rate": 9.873651717588909e-06, "loss": 2.7506, "step": 346200 }, { "epoch": 0.09032834771615829, "grad_norm": 6.728977203369141, "learning_rate": 9.873464915461566e-06, "loss": 2.747, "step": 346400 }, { "epoch": 0.09038050034186046, "grad_norm": 5.914807319641113, "learning_rate": 9.873277977115256e-06, "loss": 2.7535, "step": 346600 }, { "epoch": 0.09043265296756263, "grad_norm": 7.127989768981934, "learning_rate": 9.87309090255521e-06, "loss": 2.7418, "step": 346800 }, { "epoch": 0.0904848055932648, "grad_norm": 6.722864151000977, "learning_rate": 9.872903691786655e-06, "loss": 2.753, "step": 347000 }, { "epoch": 0.09053695821896697, "grad_norm": 7.177712440490723, "learning_rate": 9.87271634481482e-06, "loss": 2.7418, "step": 347200 }, { "epoch": 0.09058911084466914, "grad_norm": 6.4624552726745605, "learning_rate": 9.872528861644947e-06, "loss": 2.79, "step": 347400 }, { "epoch": 0.09064126347037131, "grad_norm": 6.420626163482666, "learning_rate": 9.872341242282274e-06, "loss": 2.7962, "step": 347600 }, { "epoch": 0.09069341609607348, "grad_norm": 5.890540599822998, "learning_rate": 9.872153486732045e-06, "loss": 2.7281, "step": 347800 }, { "epoch": 0.09074556872177565, "grad_norm": 6.1183013916015625, "learning_rate": 9.871965594999509e-06, "loss": 2.7133, "step": 348000 }, { "epoch": 0.09079772134747782, "grad_norm": 6.170734405517578, "learning_rate": 9.871777567089916e-06, "loss": 2.7438, "step": 348200 }, { "epoch": 0.09084987397317999, "grad_norm": 6.432772636413574, "learning_rate": 9.871589403008524e-06, "loss": 2.7667, "step": 348400 }, { "epoch": 0.09090202659888216, "grad_norm": 6.09767484664917, "learning_rate": 9.87140110276059e-06, "loss": 2.7265, "step": 348600 }, { "epoch": 0.09095417922458433, "grad_norm": 6.650676727294922, "learning_rate": 9.871212666351378e-06, "loss": 2.7454, "step": 348800 }, { "epoch": 0.0910063318502865, "grad_norm": 6.796266078948975, "learning_rate": 9.871024093786154e-06, "loss": 2.7593, "step": 349000 }, { "epoch": 0.09105848447598867, "grad_norm": 6.901579856872559, "learning_rate": 9.870835385070191e-06, "loss": 2.7642, "step": 349200 }, { "epoch": 0.09111063710169084, "grad_norm": 7.005277633666992, "learning_rate": 9.870646540208763e-06, "loss": 2.6938, "step": 349400 }, { "epoch": 0.09116278972739301, "grad_norm": 7.363898277282715, "learning_rate": 9.87045755920715e-06, "loss": 2.7215, "step": 349600 }, { "epoch": 0.09121494235309519, "grad_norm": 6.584136009216309, "learning_rate": 9.870268442070629e-06, "loss": 2.7236, "step": 349800 }, { "epoch": 0.09126709497879736, "grad_norm": 6.563084602355957, "learning_rate": 9.870079188804492e-06, "loss": 2.7463, "step": 350000 }, { "epoch": 0.09131924760449953, "grad_norm": 6.536829471588135, "learning_rate": 9.869889799414026e-06, "loss": 2.814, "step": 350200 }, { "epoch": 0.0913714002302017, "grad_norm": 6.699076175689697, "learning_rate": 9.869700273904524e-06, "loss": 2.7248, "step": 350400 }, { "epoch": 0.09142355285590385, "grad_norm": 6.6044111251831055, "learning_rate": 9.869510612281284e-06, "loss": 2.7594, "step": 350600 }, { "epoch": 0.09147570548160602, "grad_norm": 6.523010730743408, "learning_rate": 9.869320814549608e-06, "loss": 2.7255, "step": 350800 }, { "epoch": 0.0915278581073082, "grad_norm": 6.2166218757629395, "learning_rate": 9.869130880714801e-06, "loss": 2.7329, "step": 351000 }, { "epoch": 0.09158001073301036, "grad_norm": 7.409599304199219, "learning_rate": 9.868940810782172e-06, "loss": 2.7557, "step": 351200 }, { "epoch": 0.09163216335871253, "grad_norm": 6.580782890319824, "learning_rate": 9.868750604757034e-06, "loss": 2.7538, "step": 351400 }, { "epoch": 0.0916843159844147, "grad_norm": 6.399933815002441, "learning_rate": 9.868560262644701e-06, "loss": 2.7467, "step": 351600 }, { "epoch": 0.09173646861011688, "grad_norm": 6.934614181518555, "learning_rate": 9.868369784450496e-06, "loss": 2.7431, "step": 351800 }, { "epoch": 0.09178862123581905, "grad_norm": 6.793469429016113, "learning_rate": 9.868179170179742e-06, "loss": 2.7429, "step": 352000 }, { "epoch": 0.09184077386152122, "grad_norm": 6.359063148498535, "learning_rate": 9.867988419837765e-06, "loss": 2.7431, "step": 352200 }, { "epoch": 0.09189292648722339, "grad_norm": 6.626612663269043, "learning_rate": 9.8677975334299e-06, "loss": 2.7322, "step": 352400 }, { "epoch": 0.09194507911292556, "grad_norm": 7.432502746582031, "learning_rate": 9.867606510961482e-06, "loss": 2.7438, "step": 352600 }, { "epoch": 0.09199723173862773, "grad_norm": 6.605191707611084, "learning_rate": 9.867415352437849e-06, "loss": 2.7487, "step": 352800 }, { "epoch": 0.0920493843643299, "grad_norm": 7.211688995361328, "learning_rate": 9.867224057864344e-06, "loss": 2.721, "step": 353000 }, { "epoch": 0.09210153699003207, "grad_norm": 6.4896135330200195, "learning_rate": 9.867032627246315e-06, "loss": 2.7345, "step": 353200 }, { "epoch": 0.09215368961573424, "grad_norm": 7.31119966506958, "learning_rate": 9.866841060589113e-06, "loss": 2.7576, "step": 353400 }, { "epoch": 0.09220584224143641, "grad_norm": 7.012179851531982, "learning_rate": 9.866649357898089e-06, "loss": 2.72, "step": 353600 }, { "epoch": 0.09225799486713858, "grad_norm": 6.721981048583984, "learning_rate": 9.866457519178605e-06, "loss": 2.7204, "step": 353800 }, { "epoch": 0.09231014749284075, "grad_norm": 6.36168098449707, "learning_rate": 9.866265544436024e-06, "loss": 2.7532, "step": 354000 }, { "epoch": 0.09236230011854292, "grad_norm": 6.767820835113525, "learning_rate": 9.866073433675709e-06, "loss": 2.7872, "step": 354200 }, { "epoch": 0.09241445274424509, "grad_norm": 6.610678672790527, "learning_rate": 9.86588118690303e-06, "loss": 2.687, "step": 354400 }, { "epoch": 0.09246660536994726, "grad_norm": 6.138505458831787, "learning_rate": 9.865688804123361e-06, "loss": 2.7339, "step": 354600 }, { "epoch": 0.09251875799564943, "grad_norm": 6.75731897354126, "learning_rate": 9.865496285342079e-06, "loss": 2.7374, "step": 354800 }, { "epoch": 0.0925709106213516, "grad_norm": 6.869755268096924, "learning_rate": 9.865303630564569e-06, "loss": 2.7248, "step": 355000 }, { "epoch": 0.09262306324705377, "grad_norm": 6.32354211807251, "learning_rate": 9.86511083979621e-06, "loss": 2.7101, "step": 355200 }, { "epoch": 0.09267521587275594, "grad_norm": 6.488105297088623, "learning_rate": 9.864917913042393e-06, "loss": 2.7202, "step": 355400 }, { "epoch": 0.09272736849845811, "grad_norm": 6.858880043029785, "learning_rate": 9.864724850308513e-06, "loss": 2.7165, "step": 355600 }, { "epoch": 0.09277952112416028, "grad_norm": 7.065299034118652, "learning_rate": 9.864531651599963e-06, "loss": 2.759, "step": 355800 }, { "epoch": 0.09283167374986245, "grad_norm": 6.129859924316406, "learning_rate": 9.864338316922145e-06, "loss": 2.7342, "step": 356000 }, { "epoch": 0.09288382637556462, "grad_norm": 6.4933648109436035, "learning_rate": 9.864144846280461e-06, "loss": 2.7194, "step": 356200 }, { "epoch": 0.0929359790012668, "grad_norm": 6.755699634552002, "learning_rate": 9.86395123968032e-06, "loss": 2.7097, "step": 356400 }, { "epoch": 0.09298813162696895, "grad_norm": 6.979990005493164, "learning_rate": 9.863757497127134e-06, "loss": 2.6846, "step": 356600 }, { "epoch": 0.09304028425267112, "grad_norm": 7.256637096405029, "learning_rate": 9.863563618626317e-06, "loss": 2.7368, "step": 356800 }, { "epoch": 0.09309243687837329, "grad_norm": 6.8452253341674805, "learning_rate": 9.86336960418329e-06, "loss": 2.75, "step": 357000 }, { "epoch": 0.09314458950407546, "grad_norm": 5.944540977478027, "learning_rate": 9.863175453803476e-06, "loss": 2.7395, "step": 357200 }, { "epoch": 0.09319674212977763, "grad_norm": 7.049804210662842, "learning_rate": 9.862981167492298e-06, "loss": 2.7871, "step": 357400 }, { "epoch": 0.0932488947554798, "grad_norm": 6.192124843597412, "learning_rate": 9.862786745255191e-06, "loss": 2.7268, "step": 357600 }, { "epoch": 0.09330104738118197, "grad_norm": 6.69340181350708, "learning_rate": 9.862592187097587e-06, "loss": 2.7712, "step": 357800 }, { "epoch": 0.09335320000688414, "grad_norm": 6.953091144561768, "learning_rate": 9.862397493024925e-06, "loss": 2.7733, "step": 358000 }, { "epoch": 0.09340535263258631, "grad_norm": 7.205249309539795, "learning_rate": 9.862202663042647e-06, "loss": 2.691, "step": 358200 }, { "epoch": 0.09345750525828848, "grad_norm": 6.435111999511719, "learning_rate": 9.862007697156195e-06, "loss": 2.7516, "step": 358400 }, { "epoch": 0.09350965788399065, "grad_norm": 6.6230669021606445, "learning_rate": 9.861812595371026e-06, "loss": 2.7293, "step": 358600 }, { "epoch": 0.09356181050969282, "grad_norm": 6.619906425476074, "learning_rate": 9.861617357692585e-06, "loss": 2.7295, "step": 358800 }, { "epoch": 0.093613963135395, "grad_norm": 6.5545268058776855, "learning_rate": 9.861421984126335e-06, "loss": 2.72, "step": 359000 }, { "epoch": 0.09366611576109717, "grad_norm": 7.116846561431885, "learning_rate": 9.861226474677737e-06, "loss": 2.7458, "step": 359200 }, { "epoch": 0.09371826838679934, "grad_norm": 7.412746429443359, "learning_rate": 9.861030829352252e-06, "loss": 2.7375, "step": 359400 }, { "epoch": 0.0937704210125015, "grad_norm": 7.558204174041748, "learning_rate": 9.86083504815535e-06, "loss": 2.7256, "step": 359600 }, { "epoch": 0.09382257363820368, "grad_norm": 6.926628589630127, "learning_rate": 9.860639131092504e-06, "loss": 2.7327, "step": 359800 }, { "epoch": 0.09387472626390585, "grad_norm": 6.14035177230835, "learning_rate": 9.86044307816919e-06, "loss": 2.7614, "step": 360000 }, { "epoch": 0.09392687888960802, "grad_norm": 7.323028087615967, "learning_rate": 9.860246889390888e-06, "loss": 2.7235, "step": 360200 }, { "epoch": 0.09397903151531019, "grad_norm": 6.862823009490967, "learning_rate": 9.86005056476308e-06, "loss": 2.75, "step": 360400 }, { "epoch": 0.09403118414101236, "grad_norm": 6.618449687957764, "learning_rate": 9.859854104291256e-06, "loss": 2.7222, "step": 360600 }, { "epoch": 0.09408333676671453, "grad_norm": 5.8654608726501465, "learning_rate": 9.859657507980907e-06, "loss": 2.7302, "step": 360800 }, { "epoch": 0.0941354893924167, "grad_norm": 6.324068546295166, "learning_rate": 9.859460775837526e-06, "loss": 2.7301, "step": 361000 }, { "epoch": 0.09418764201811887, "grad_norm": 6.8306732177734375, "learning_rate": 9.859263907866612e-06, "loss": 2.7369, "step": 361200 }, { "epoch": 0.09423979464382104, "grad_norm": 6.69620943069458, "learning_rate": 9.859066904073671e-06, "loss": 2.7301, "step": 361400 }, { "epoch": 0.09429194726952321, "grad_norm": 6.917806148529053, "learning_rate": 9.858869764464208e-06, "loss": 2.7438, "step": 361600 }, { "epoch": 0.09434409989522538, "grad_norm": 7.454828262329102, "learning_rate": 9.85867248904373e-06, "loss": 2.729, "step": 361800 }, { "epoch": 0.09439625252092755, "grad_norm": 6.281285762786865, "learning_rate": 9.858475077817756e-06, "loss": 2.7598, "step": 362000 }, { "epoch": 0.09444840514662972, "grad_norm": 6.297204971313477, "learning_rate": 9.8582775307918e-06, "loss": 2.741, "step": 362200 }, { "epoch": 0.09450055777233188, "grad_norm": 6.6822590827941895, "learning_rate": 9.858079847971384e-06, "loss": 2.715, "step": 362400 }, { "epoch": 0.09455271039803405, "grad_norm": 6.762353897094727, "learning_rate": 9.857882029362036e-06, "loss": 2.727, "step": 362600 }, { "epoch": 0.09460486302373622, "grad_norm": 7.140170574188232, "learning_rate": 9.857684074969285e-06, "loss": 2.714, "step": 362800 }, { "epoch": 0.09465701564943839, "grad_norm": 6.909539222717285, "learning_rate": 9.85748598479866e-06, "loss": 2.7019, "step": 363000 }, { "epoch": 0.09470916827514056, "grad_norm": 7.183372497558594, "learning_rate": 9.857287758855705e-06, "loss": 2.7792, "step": 363200 }, { "epoch": 0.09476132090084273, "grad_norm": 6.247964859008789, "learning_rate": 9.857089397145954e-06, "loss": 2.7433, "step": 363400 }, { "epoch": 0.0948134735265449, "grad_norm": 7.346426010131836, "learning_rate": 9.856890899674954e-06, "loss": 2.6835, "step": 363600 }, { "epoch": 0.09486562615224707, "grad_norm": 7.062637805938721, "learning_rate": 9.856692266448254e-06, "loss": 2.7761, "step": 363800 }, { "epoch": 0.09491777877794924, "grad_norm": 7.02108907699585, "learning_rate": 9.856493497471405e-06, "loss": 2.7348, "step": 364000 }, { "epoch": 0.09496993140365141, "grad_norm": 6.802475929260254, "learning_rate": 9.856294592749963e-06, "loss": 2.7111, "step": 364200 }, { "epoch": 0.09502208402935358, "grad_norm": 6.6680402755737305, "learning_rate": 9.856095552289487e-06, "loss": 2.7501, "step": 364400 }, { "epoch": 0.09507423665505575, "grad_norm": 6.280881404876709, "learning_rate": 9.85589637609554e-06, "loss": 2.7415, "step": 364600 }, { "epoch": 0.09512638928075792, "grad_norm": 6.65505313873291, "learning_rate": 9.85569706417369e-06, "loss": 2.7116, "step": 364800 }, { "epoch": 0.09517854190646009, "grad_norm": 6.576626777648926, "learning_rate": 9.855497616529511e-06, "loss": 2.7489, "step": 365000 }, { "epoch": 0.09523069453216226, "grad_norm": 7.567196369171143, "learning_rate": 9.855298033168575e-06, "loss": 2.7427, "step": 365200 }, { "epoch": 0.09528284715786443, "grad_norm": 6.262023448944092, "learning_rate": 9.855098314096459e-06, "loss": 2.7202, "step": 365400 }, { "epoch": 0.0953349997835666, "grad_norm": 6.940713405609131, "learning_rate": 9.854898459318748e-06, "loss": 2.7366, "step": 365600 }, { "epoch": 0.09538715240926877, "grad_norm": 7.290978908538818, "learning_rate": 9.854698468841024e-06, "loss": 2.732, "step": 365800 }, { "epoch": 0.09543930503497094, "grad_norm": 6.973400592803955, "learning_rate": 9.854498342668883e-06, "loss": 2.7675, "step": 366000 }, { "epoch": 0.09549145766067312, "grad_norm": 6.667946815490723, "learning_rate": 9.854298080807917e-06, "loss": 2.7709, "step": 366200 }, { "epoch": 0.09554361028637529, "grad_norm": 6.699540615081787, "learning_rate": 9.854097683263719e-06, "loss": 2.7282, "step": 366400 }, { "epoch": 0.09559576291207746, "grad_norm": 6.533851146697998, "learning_rate": 9.853897150041896e-06, "loss": 2.6941, "step": 366600 }, { "epoch": 0.09564791553777963, "grad_norm": 6.288170337677002, "learning_rate": 9.85369648114805e-06, "loss": 2.7054, "step": 366800 }, { "epoch": 0.0957000681634818, "grad_norm": 6.440084457397461, "learning_rate": 9.853495676587791e-06, "loss": 2.7668, "step": 367000 }, { "epoch": 0.09575222078918397, "grad_norm": 6.943708896636963, "learning_rate": 9.853294736366732e-06, "loss": 2.758, "step": 367200 }, { "epoch": 0.09580437341488614, "grad_norm": 7.354572296142578, "learning_rate": 9.85309366049049e-06, "loss": 2.7425, "step": 367400 }, { "epoch": 0.09585652604058831, "grad_norm": 6.695876121520996, "learning_rate": 9.852892448964682e-06, "loss": 2.7463, "step": 367600 }, { "epoch": 0.09590867866629048, "grad_norm": 6.828666687011719, "learning_rate": 9.852691101794937e-06, "loss": 2.74, "step": 367800 }, { "epoch": 0.09596083129199265, "grad_norm": 6.269225120544434, "learning_rate": 9.852489618986878e-06, "loss": 2.718, "step": 368000 }, { "epoch": 0.09601298391769482, "grad_norm": 6.452040672302246, "learning_rate": 9.85228800054614e-06, "loss": 2.7166, "step": 368200 }, { "epoch": 0.09606513654339698, "grad_norm": 6.910484790802002, "learning_rate": 9.852086246478358e-06, "loss": 2.744, "step": 368400 }, { "epoch": 0.09611728916909915, "grad_norm": 8.016570091247559, "learning_rate": 9.85188435678917e-06, "loss": 2.7105, "step": 368600 }, { "epoch": 0.09616944179480132, "grad_norm": 6.72928524017334, "learning_rate": 9.85168233148422e-06, "loss": 2.7281, "step": 368800 }, { "epoch": 0.09622159442050349, "grad_norm": 7.141217231750488, "learning_rate": 9.851480170569155e-06, "loss": 2.7017, "step": 369000 }, { "epoch": 0.09627374704620566, "grad_norm": 7.320204734802246, "learning_rate": 9.851277874049624e-06, "loss": 2.7614, "step": 369200 }, { "epoch": 0.09632589967190783, "grad_norm": 6.801264762878418, "learning_rate": 9.851075441931285e-06, "loss": 2.7137, "step": 369400 }, { "epoch": 0.09637805229761, "grad_norm": 6.665674686431885, "learning_rate": 9.850872874219792e-06, "loss": 2.7348, "step": 369600 }, { "epoch": 0.09643020492331217, "grad_norm": 7.685469627380371, "learning_rate": 9.85067017092081e-06, "loss": 2.7101, "step": 369800 }, { "epoch": 0.09648235754901434, "grad_norm": 7.03206729888916, "learning_rate": 9.850467332040003e-06, "loss": 2.7283, "step": 370000 }, { "epoch": 0.09653451017471651, "grad_norm": 6.763091087341309, "learning_rate": 9.850264357583042e-06, "loss": 2.721, "step": 370200 }, { "epoch": 0.09658666280041868, "grad_norm": 7.308859348297119, "learning_rate": 9.850061247555598e-06, "loss": 2.7411, "step": 370400 }, { "epoch": 0.09663881542612085, "grad_norm": 6.72197961807251, "learning_rate": 9.84985800196335e-06, "loss": 2.7493, "step": 370600 }, { "epoch": 0.09669096805182302, "grad_norm": 7.104571342468262, "learning_rate": 9.849654620811981e-06, "loss": 2.7556, "step": 370800 }, { "epoch": 0.09674312067752519, "grad_norm": 6.912766456604004, "learning_rate": 9.849451104107172e-06, "loss": 2.7397, "step": 371000 }, { "epoch": 0.09679527330322736, "grad_norm": 7.259765148162842, "learning_rate": 9.849247451854614e-06, "loss": 2.6929, "step": 371200 }, { "epoch": 0.09684742592892953, "grad_norm": 7.524007320404053, "learning_rate": 9.849043664059996e-06, "loss": 2.7068, "step": 371400 }, { "epoch": 0.0968995785546317, "grad_norm": 6.4871392250061035, "learning_rate": 9.848839740729018e-06, "loss": 2.752, "step": 371600 }, { "epoch": 0.09695173118033387, "grad_norm": 6.942018985748291, "learning_rate": 9.848635681867377e-06, "loss": 2.7455, "step": 371800 }, { "epoch": 0.09700388380603604, "grad_norm": 6.313770294189453, "learning_rate": 9.84843148748078e-06, "loss": 2.7225, "step": 372000 }, { "epoch": 0.09705603643173821, "grad_norm": 7.180691242218018, "learning_rate": 9.848227157574932e-06, "loss": 2.7707, "step": 372200 }, { "epoch": 0.09710818905744038, "grad_norm": 6.999627113342285, "learning_rate": 9.848022692155544e-06, "loss": 2.7113, "step": 372400 }, { "epoch": 0.09716034168314255, "grad_norm": 6.8867950439453125, "learning_rate": 9.847818091228332e-06, "loss": 2.7619, "step": 372600 }, { "epoch": 0.09721249430884472, "grad_norm": 6.820121765136719, "learning_rate": 9.847613354799014e-06, "loss": 2.7334, "step": 372800 }, { "epoch": 0.0972646469345469, "grad_norm": 6.528743267059326, "learning_rate": 9.847408482873316e-06, "loss": 2.7372, "step": 373000 }, { "epoch": 0.09731679956024906, "grad_norm": 7.362827301025391, "learning_rate": 9.847203475456959e-06, "loss": 2.7489, "step": 373200 }, { "epoch": 0.09736895218595124, "grad_norm": 7.119494438171387, "learning_rate": 9.846998332555676e-06, "loss": 2.7094, "step": 373400 }, { "epoch": 0.0974211048116534, "grad_norm": 7.4878387451171875, "learning_rate": 9.8467930541752e-06, "loss": 2.7318, "step": 373600 }, { "epoch": 0.09747325743735558, "grad_norm": 7.716200828552246, "learning_rate": 9.846587640321273e-06, "loss": 2.8075, "step": 373800 }, { "epoch": 0.09752541006305775, "grad_norm": 6.292218208312988, "learning_rate": 9.84638209099963e-06, "loss": 2.7333, "step": 374000 }, { "epoch": 0.0975775626887599, "grad_norm": 6.929902076721191, "learning_rate": 9.846176406216019e-06, "loss": 2.7248, "step": 374200 }, { "epoch": 0.09762971531446207, "grad_norm": 7.24159049987793, "learning_rate": 9.845970585976192e-06, "loss": 2.749, "step": 374400 }, { "epoch": 0.09768186794016424, "grad_norm": 7.5973663330078125, "learning_rate": 9.8457646302859e-06, "loss": 2.7228, "step": 374600 }, { "epoch": 0.09773402056586641, "grad_norm": 7.047082424163818, "learning_rate": 9.845558539150895e-06, "loss": 2.7224, "step": 374800 }, { "epoch": 0.09778617319156858, "grad_norm": 6.793696880340576, "learning_rate": 9.845352312576946e-06, "loss": 2.7347, "step": 375000 }, { "epoch": 0.09783832581727075, "grad_norm": 7.282424449920654, "learning_rate": 9.84514595056981e-06, "loss": 2.7181, "step": 375200 }, { "epoch": 0.09789047844297293, "grad_norm": 7.171825885772705, "learning_rate": 9.84493945313526e-06, "loss": 2.7174, "step": 375400 }, { "epoch": 0.0979426310686751, "grad_norm": 7.00319766998291, "learning_rate": 9.844732820279064e-06, "loss": 2.7221, "step": 375600 }, { "epoch": 0.09799478369437727, "grad_norm": 7.138320446014404, "learning_rate": 9.844526052007001e-06, "loss": 2.7306, "step": 375800 }, { "epoch": 0.09804693632007944, "grad_norm": 6.744442939758301, "learning_rate": 9.84431914832485e-06, "loss": 2.6985, "step": 376000 }, { "epoch": 0.0980990889457816, "grad_norm": 7.503383636474609, "learning_rate": 9.84411210923839e-06, "loss": 2.7451, "step": 376200 }, { "epoch": 0.09815124157148378, "grad_norm": 6.678633689880371, "learning_rate": 9.843904934753414e-06, "loss": 2.7316, "step": 376400 }, { "epoch": 0.09820339419718595, "grad_norm": 6.815631866455078, "learning_rate": 9.843697624875708e-06, "loss": 2.7003, "step": 376600 }, { "epoch": 0.09825554682288812, "grad_norm": 6.820425987243652, "learning_rate": 9.843490179611069e-06, "loss": 2.7462, "step": 376800 }, { "epoch": 0.09830769944859029, "grad_norm": 7.071511268615723, "learning_rate": 9.843282598965293e-06, "loss": 2.7266, "step": 377000 }, { "epoch": 0.09835985207429246, "grad_norm": 6.579829692840576, "learning_rate": 9.843074882944188e-06, "loss": 2.7225, "step": 377200 }, { "epoch": 0.09841200469999463, "grad_norm": 7.302144527435303, "learning_rate": 9.842867031553551e-06, "loss": 2.7498, "step": 377400 }, { "epoch": 0.0984641573256968, "grad_norm": 7.175357341766357, "learning_rate": 9.8426590447992e-06, "loss": 2.7374, "step": 377600 }, { "epoch": 0.09851630995139897, "grad_norm": 7.382828712463379, "learning_rate": 9.842450922686944e-06, "loss": 2.6983, "step": 377800 }, { "epoch": 0.09856846257710114, "grad_norm": 6.3378190994262695, "learning_rate": 9.8422426652226e-06, "loss": 2.7448, "step": 378000 }, { "epoch": 0.09862061520280331, "grad_norm": 5.941269397735596, "learning_rate": 9.84203427241199e-06, "loss": 2.7119, "step": 378200 }, { "epoch": 0.09867276782850548, "grad_norm": 7.9518842697143555, "learning_rate": 9.84182574426094e-06, "loss": 2.7104, "step": 378400 }, { "epoch": 0.09872492045420765, "grad_norm": 7.776278018951416, "learning_rate": 9.841617080775278e-06, "loss": 2.7247, "step": 378600 }, { "epoch": 0.09877707307990982, "grad_norm": 7.218269348144531, "learning_rate": 9.841408281960836e-06, "loss": 2.7109, "step": 378800 }, { "epoch": 0.09882922570561199, "grad_norm": 6.931689739227295, "learning_rate": 9.841199347823448e-06, "loss": 2.7071, "step": 379000 }, { "epoch": 0.09888137833131416, "grad_norm": 6.874160289764404, "learning_rate": 9.840990278368957e-06, "loss": 2.7048, "step": 379200 }, { "epoch": 0.09893353095701633, "grad_norm": 7.4862823486328125, "learning_rate": 9.840781073603208e-06, "loss": 2.7754, "step": 379400 }, { "epoch": 0.0989856835827185, "grad_norm": 6.68509578704834, "learning_rate": 9.840571733532044e-06, "loss": 2.7133, "step": 379600 }, { "epoch": 0.09903783620842067, "grad_norm": 7.173377990722656, "learning_rate": 9.840362258161322e-06, "loss": 2.7296, "step": 379800 }, { "epoch": 0.09908998883412284, "grad_norm": 6.899176120758057, "learning_rate": 9.84015264749689e-06, "loss": 2.7376, "step": 380000 }, { "epoch": 0.099142141459825, "grad_norm": 7.660719394683838, "learning_rate": 9.839942901544612e-06, "loss": 2.6995, "step": 380200 }, { "epoch": 0.09919429408552717, "grad_norm": 6.3884687423706055, "learning_rate": 9.83973302031035e-06, "loss": 2.7238, "step": 380400 }, { "epoch": 0.09924644671122934, "grad_norm": 7.147850513458252, "learning_rate": 9.839523003799969e-06, "loss": 2.7093, "step": 380600 }, { "epoch": 0.09929859933693151, "grad_norm": 7.06928014755249, "learning_rate": 9.839312852019337e-06, "loss": 2.6814, "step": 380800 }, { "epoch": 0.09935075196263368, "grad_norm": 6.980913162231445, "learning_rate": 9.839102564974336e-06, "loss": 2.6875, "step": 381000 }, { "epoch": 0.09940290458833585, "grad_norm": 7.352754592895508, "learning_rate": 9.838892142670834e-06, "loss": 2.6892, "step": 381200 }, { "epoch": 0.09945505721403802, "grad_norm": 7.018017768859863, "learning_rate": 9.838681585114721e-06, "loss": 2.7277, "step": 381400 }, { "epoch": 0.0995072098397402, "grad_norm": 7.365713119506836, "learning_rate": 9.838470892311876e-06, "loss": 2.7038, "step": 381600 }, { "epoch": 0.09955936246544236, "grad_norm": 6.4748711585998535, "learning_rate": 9.838260064268192e-06, "loss": 2.727, "step": 381800 }, { "epoch": 0.09961151509114453, "grad_norm": 7.302340507507324, "learning_rate": 9.83804910098956e-06, "loss": 2.7436, "step": 382000 }, { "epoch": 0.0996636677168467, "grad_norm": 6.783388614654541, "learning_rate": 9.837838002481876e-06, "loss": 2.6953, "step": 382200 }, { "epoch": 0.09971582034254887, "grad_norm": 7.748034477233887, "learning_rate": 9.837626768751043e-06, "loss": 2.737, "step": 382400 }, { "epoch": 0.09976797296825105, "grad_norm": 7.11197566986084, "learning_rate": 9.837415399802962e-06, "loss": 2.7801, "step": 382600 }, { "epoch": 0.09982012559395322, "grad_norm": 7.256601333618164, "learning_rate": 9.837203895643546e-06, "loss": 2.7174, "step": 382800 }, { "epoch": 0.09987227821965539, "grad_norm": 7.089999675750732, "learning_rate": 9.8369922562787e-06, "loss": 2.7441, "step": 383000 }, { "epoch": 0.09992443084535756, "grad_norm": 6.694625377655029, "learning_rate": 9.836780481714348e-06, "loss": 2.6984, "step": 383200 }, { "epoch": 0.09997658347105973, "grad_norm": 7.070290565490723, "learning_rate": 9.8365685719564e-06, "loss": 2.7062, "step": 383400 }, { "epoch": 0.1000287360967619, "grad_norm": 7.1637959480285645, "learning_rate": 9.836356527010785e-06, "loss": 2.7344, "step": 383600 }, { "epoch": 0.10008088872246407, "grad_norm": 6.960715293884277, "learning_rate": 9.83614434688343e-06, "loss": 2.7294, "step": 383800 }, { "epoch": 0.10013304134816624, "grad_norm": 6.766301155090332, "learning_rate": 9.835932031580262e-06, "loss": 2.7184, "step": 384000 }, { "epoch": 0.10018519397386841, "grad_norm": 6.3087358474731445, "learning_rate": 9.83571958110722e-06, "loss": 2.6987, "step": 384200 }, { "epoch": 0.10023734659957058, "grad_norm": 7.121251583099365, "learning_rate": 9.835506995470237e-06, "loss": 2.7211, "step": 384400 }, { "epoch": 0.10028949922527275, "grad_norm": 6.787637710571289, "learning_rate": 9.83529427467526e-06, "loss": 2.7046, "step": 384600 }, { "epoch": 0.10034165185097492, "grad_norm": 6.736263751983643, "learning_rate": 9.835081418728232e-06, "loss": 2.7612, "step": 384800 }, { "epoch": 0.10039380447667709, "grad_norm": 7.012972831726074, "learning_rate": 9.834868427635102e-06, "loss": 2.6529, "step": 385000 }, { "epoch": 0.10044595710237926, "grad_norm": 7.4089531898498535, "learning_rate": 9.834655301401827e-06, "loss": 2.7274, "step": 385200 }, { "epoch": 0.10049810972808143, "grad_norm": 6.628983497619629, "learning_rate": 9.83444204003436e-06, "loss": 2.7027, "step": 385400 }, { "epoch": 0.1005502623537836, "grad_norm": 6.684551239013672, "learning_rate": 9.834228643538665e-06, "loss": 2.72, "step": 385600 }, { "epoch": 0.10060241497948577, "grad_norm": 7.784319877624512, "learning_rate": 9.834015111920705e-06, "loss": 2.7083, "step": 385800 }, { "epoch": 0.10065456760518793, "grad_norm": 6.074433326721191, "learning_rate": 9.833801445186448e-06, "loss": 2.7258, "step": 386000 }, { "epoch": 0.1007067202308901, "grad_norm": 7.0298171043396, "learning_rate": 9.833587643341869e-06, "loss": 2.7326, "step": 386200 }, { "epoch": 0.10075887285659227, "grad_norm": 6.243155002593994, "learning_rate": 9.83337370639294e-06, "loss": 2.6969, "step": 386400 }, { "epoch": 0.10081102548229444, "grad_norm": 6.764981269836426, "learning_rate": 9.833159634345643e-06, "loss": 2.709, "step": 386600 }, { "epoch": 0.10086317810799661, "grad_norm": 6.844011306762695, "learning_rate": 9.832945427205963e-06, "loss": 2.738, "step": 386800 }, { "epoch": 0.10091533073369878, "grad_norm": 7.266912460327148, "learning_rate": 9.832731084979883e-06, "loss": 2.7677, "step": 387000 }, { "epoch": 0.10096748335940095, "grad_norm": 7.799858093261719, "learning_rate": 9.8325166076734e-06, "loss": 2.7008, "step": 387200 }, { "epoch": 0.10101963598510312, "grad_norm": 6.45143461227417, "learning_rate": 9.832301995292502e-06, "loss": 2.7465, "step": 387400 }, { "epoch": 0.10107178861080529, "grad_norm": 6.146852970123291, "learning_rate": 9.832087247843194e-06, "loss": 2.6801, "step": 387600 }, { "epoch": 0.10112394123650746, "grad_norm": 6.76859188079834, "learning_rate": 9.831872365331475e-06, "loss": 2.7365, "step": 387800 }, { "epoch": 0.10117609386220963, "grad_norm": 6.941719055175781, "learning_rate": 9.831657347763353e-06, "loss": 2.7285, "step": 388000 }, { "epoch": 0.1012282464879118, "grad_norm": 7.166417121887207, "learning_rate": 9.831442195144836e-06, "loss": 2.7176, "step": 388200 }, { "epoch": 0.10128039911361397, "grad_norm": 8.05933952331543, "learning_rate": 9.83122690748194e-06, "loss": 2.6899, "step": 388400 }, { "epoch": 0.10133255173931614, "grad_norm": 6.222696781158447, "learning_rate": 9.831011484780679e-06, "loss": 2.7254, "step": 388600 }, { "epoch": 0.10138470436501831, "grad_norm": 7.399394512176514, "learning_rate": 9.830795927047078e-06, "loss": 2.7333, "step": 388800 }, { "epoch": 0.10143685699072048, "grad_norm": 7.602478504180908, "learning_rate": 9.83058023428716e-06, "loss": 2.7245, "step": 389000 }, { "epoch": 0.10148900961642265, "grad_norm": 6.894057273864746, "learning_rate": 9.830364406506956e-06, "loss": 2.7472, "step": 389200 }, { "epoch": 0.10154116224212482, "grad_norm": 6.566694259643555, "learning_rate": 9.830148443712498e-06, "loss": 2.6904, "step": 389400 }, { "epoch": 0.101593314867827, "grad_norm": 6.913676738739014, "learning_rate": 9.829932345909819e-06, "loss": 2.7406, "step": 389600 }, { "epoch": 0.10164546749352917, "grad_norm": 6.623051166534424, "learning_rate": 9.829716113104964e-06, "loss": 2.71, "step": 389800 }, { "epoch": 0.10169762011923134, "grad_norm": 6.746899127960205, "learning_rate": 9.829499745303972e-06, "loss": 2.7284, "step": 390000 }, { "epoch": 0.1017497727449335, "grad_norm": 7.177034854888916, "learning_rate": 9.829283242512896e-06, "loss": 2.7186, "step": 390200 }, { "epoch": 0.10180192537063568, "grad_norm": 7.524350166320801, "learning_rate": 9.829066604737784e-06, "loss": 2.692, "step": 390400 }, { "epoch": 0.10185407799633785, "grad_norm": 6.514491558074951, "learning_rate": 9.828849831984693e-06, "loss": 2.7025, "step": 390600 }, { "epoch": 0.10190623062204002, "grad_norm": 7.224747180938721, "learning_rate": 9.82863292425968e-06, "loss": 2.7793, "step": 390800 }, { "epoch": 0.10195838324774219, "grad_norm": 7.076272010803223, "learning_rate": 9.82841588156881e-06, "loss": 2.7168, "step": 391000 }, { "epoch": 0.10201053587344436, "grad_norm": 7.147103786468506, "learning_rate": 9.828198703918148e-06, "loss": 2.7419, "step": 391200 }, { "epoch": 0.10206268849914653, "grad_norm": 6.700717926025391, "learning_rate": 9.827981391313766e-06, "loss": 2.668, "step": 391400 }, { "epoch": 0.1021148411248487, "grad_norm": 6.936060428619385, "learning_rate": 9.827763943761736e-06, "loss": 2.709, "step": 391600 }, { "epoch": 0.10216699375055086, "grad_norm": 6.975337982177734, "learning_rate": 9.827546361268139e-06, "loss": 2.7271, "step": 391800 }, { "epoch": 0.10221914637625303, "grad_norm": 7.4715118408203125, "learning_rate": 9.827328643839054e-06, "loss": 2.7067, "step": 392000 }, { "epoch": 0.1022712990019552, "grad_norm": 8.031476974487305, "learning_rate": 9.827110791480568e-06, "loss": 2.7094, "step": 392200 }, { "epoch": 0.10232345162765737, "grad_norm": 6.826619625091553, "learning_rate": 9.82689280419877e-06, "loss": 2.6987, "step": 392400 }, { "epoch": 0.10237560425335954, "grad_norm": 7.765749931335449, "learning_rate": 9.826674681999749e-06, "loss": 2.7286, "step": 392600 }, { "epoch": 0.10242775687906171, "grad_norm": 7.1541666984558105, "learning_rate": 9.826456424889608e-06, "loss": 2.7061, "step": 392800 }, { "epoch": 0.10247990950476388, "grad_norm": 6.534503936767578, "learning_rate": 9.826238032874445e-06, "loss": 2.7177, "step": 393000 }, { "epoch": 0.10253206213046605, "grad_norm": 7.043641090393066, "learning_rate": 9.826019505960363e-06, "loss": 2.7428, "step": 393200 }, { "epoch": 0.10258421475616822, "grad_norm": 6.870479583740234, "learning_rate": 9.82580084415347e-06, "loss": 2.73, "step": 393400 }, { "epoch": 0.10263636738187039, "grad_norm": 7.458539962768555, "learning_rate": 9.825582047459883e-06, "loss": 2.6956, "step": 393600 }, { "epoch": 0.10268852000757256, "grad_norm": 7.4572038650512695, "learning_rate": 9.825363115885711e-06, "loss": 2.7238, "step": 393800 }, { "epoch": 0.10274067263327473, "grad_norm": 6.63360595703125, "learning_rate": 9.825144049437077e-06, "loss": 2.752, "step": 394000 }, { "epoch": 0.1027928252589769, "grad_norm": 6.744916915893555, "learning_rate": 9.824924848120101e-06, "loss": 2.694, "step": 394200 }, { "epoch": 0.10284497788467907, "grad_norm": 7.818175315856934, "learning_rate": 9.824705511940914e-06, "loss": 2.7334, "step": 394400 }, { "epoch": 0.10289713051038124, "grad_norm": 6.705904483795166, "learning_rate": 9.824486040905646e-06, "loss": 2.7112, "step": 394600 }, { "epoch": 0.10294928313608341, "grad_norm": 6.82873010635376, "learning_rate": 9.824266435020428e-06, "loss": 2.6945, "step": 394800 }, { "epoch": 0.10300143576178558, "grad_norm": 7.144585132598877, "learning_rate": 9.824046694291401e-06, "loss": 2.7079, "step": 395000 }, { "epoch": 0.10305358838748775, "grad_norm": 7.308895587921143, "learning_rate": 9.823826818724707e-06, "loss": 2.6953, "step": 395200 }, { "epoch": 0.10310574101318992, "grad_norm": 6.582967281341553, "learning_rate": 9.823606808326491e-06, "loss": 2.7148, "step": 395400 }, { "epoch": 0.10315789363889209, "grad_norm": 7.389861106872559, "learning_rate": 9.823386663102902e-06, "loss": 2.6945, "step": 395600 }, { "epoch": 0.10321004626459426, "grad_norm": 7.0465006828308105, "learning_rate": 9.823166383060096e-06, "loss": 2.7263, "step": 395800 }, { "epoch": 0.10326219889029643, "grad_norm": 7.7955322265625, "learning_rate": 9.82294596820423e-06, "loss": 2.7256, "step": 396000 }, { "epoch": 0.1033143515159986, "grad_norm": 7.1410322189331055, "learning_rate": 9.82272541854146e-06, "loss": 2.6904, "step": 396200 }, { "epoch": 0.10336650414170077, "grad_norm": 7.031514644622803, "learning_rate": 9.822504734077953e-06, "loss": 2.6924, "step": 396400 }, { "epoch": 0.10341865676740294, "grad_norm": 7.500546932220459, "learning_rate": 9.822283914819882e-06, "loss": 2.6723, "step": 396600 }, { "epoch": 0.10347080939310511, "grad_norm": 7.630676746368408, "learning_rate": 9.82206296077341e-06, "loss": 2.7074, "step": 396800 }, { "epoch": 0.10352296201880729, "grad_norm": 6.640892028808594, "learning_rate": 9.821841871944723e-06, "loss": 2.7259, "step": 397000 }, { "epoch": 0.10357511464450946, "grad_norm": 7.342281818389893, "learning_rate": 9.821620648339993e-06, "loss": 2.6999, "step": 397200 }, { "epoch": 0.10362726727021163, "grad_norm": 7.83634614944458, "learning_rate": 9.821399289965408e-06, "loss": 2.7423, "step": 397400 }, { "epoch": 0.1036794198959138, "grad_norm": 7.7452826499938965, "learning_rate": 9.821177796827152e-06, "loss": 2.7492, "step": 397600 }, { "epoch": 0.10373157252161595, "grad_norm": 7.283862113952637, "learning_rate": 9.820956168931418e-06, "loss": 2.6983, "step": 397800 }, { "epoch": 0.10378372514731812, "grad_norm": 7.292904376983643, "learning_rate": 9.820734406284402e-06, "loss": 2.7275, "step": 398000 }, { "epoch": 0.1038358777730203, "grad_norm": 7.42738151550293, "learning_rate": 9.820512508892299e-06, "loss": 2.673, "step": 398200 }, { "epoch": 0.10388803039872246, "grad_norm": 7.42814302444458, "learning_rate": 9.820290476761314e-06, "loss": 2.7334, "step": 398400 }, { "epoch": 0.10394018302442463, "grad_norm": 7.708849906921387, "learning_rate": 9.820068309897653e-06, "loss": 2.6999, "step": 398600 }, { "epoch": 0.1039923356501268, "grad_norm": 7.1372551918029785, "learning_rate": 9.819846008307523e-06, "loss": 2.6921, "step": 398800 }, { "epoch": 0.10404448827582898, "grad_norm": 6.49362850189209, "learning_rate": 9.81962357199714e-06, "loss": 2.7561, "step": 399000 }, { "epoch": 0.10409664090153115, "grad_norm": 7.146295547485352, "learning_rate": 9.819401000972721e-06, "loss": 2.7148, "step": 399200 }, { "epoch": 0.10414879352723332, "grad_norm": 7.127407550811768, "learning_rate": 9.819178295240488e-06, "loss": 2.7187, "step": 399400 }, { "epoch": 0.10420094615293549, "grad_norm": 7.735341548919678, "learning_rate": 9.818955454806664e-06, "loss": 2.7221, "step": 399600 }, { "epoch": 0.10425309877863766, "grad_norm": 7.073373317718506, "learning_rate": 9.81873247967748e-06, "loss": 2.7069, "step": 399800 }, { "epoch": 0.10430525140433983, "grad_norm": 6.615175247192383, "learning_rate": 9.818509369859165e-06, "loss": 2.6807, "step": 400000 }, { "epoch": 0.104357404030042, "grad_norm": 7.4061174392700195, "learning_rate": 9.818286125357958e-06, "loss": 2.671, "step": 400200 }, { "epoch": 0.10440955665574417, "grad_norm": 7.3244524002075195, "learning_rate": 9.818062746180098e-06, "loss": 2.7053, "step": 400400 }, { "epoch": 0.10446170928144634, "grad_norm": 7.333294868469238, "learning_rate": 9.81783923233183e-06, "loss": 2.7226, "step": 400600 }, { "epoch": 0.10451386190714851, "grad_norm": 8.426340103149414, "learning_rate": 9.817615583819397e-06, "loss": 2.7258, "step": 400800 }, { "epoch": 0.10456601453285068, "grad_norm": 7.6054558753967285, "learning_rate": 9.817391800649056e-06, "loss": 2.6957, "step": 401000 }, { "epoch": 0.10461816715855285, "grad_norm": 7.174556732177734, "learning_rate": 9.81716788282706e-06, "loss": 2.6637, "step": 401200 }, { "epoch": 0.10467031978425502, "grad_norm": 7.382963180541992, "learning_rate": 9.816943830359666e-06, "loss": 2.6776, "step": 401400 }, { "epoch": 0.10472247240995719, "grad_norm": 7.8001203536987305, "learning_rate": 9.816719643253138e-06, "loss": 2.7124, "step": 401600 }, { "epoch": 0.10477462503565936, "grad_norm": 7.5196733474731445, "learning_rate": 9.816495321513744e-06, "loss": 2.6645, "step": 401800 }, { "epoch": 0.10482677766136153, "grad_norm": 6.8788628578186035, "learning_rate": 9.81627086514775e-06, "loss": 2.7302, "step": 402000 }, { "epoch": 0.1048789302870637, "grad_norm": 7.129185199737549, "learning_rate": 9.816046274161432e-06, "loss": 2.6782, "step": 402200 }, { "epoch": 0.10493108291276587, "grad_norm": 7.342111587524414, "learning_rate": 9.815821548561069e-06, "loss": 2.711, "step": 402400 }, { "epoch": 0.10498323553846804, "grad_norm": 5.888463020324707, "learning_rate": 9.81559668835294e-06, "loss": 2.6887, "step": 402600 }, { "epoch": 0.10503538816417021, "grad_norm": 7.24282169342041, "learning_rate": 9.81537169354333e-06, "loss": 2.746, "step": 402800 }, { "epoch": 0.10508754078987238, "grad_norm": 7.5223822593688965, "learning_rate": 9.81514656413853e-06, "loss": 2.7138, "step": 403000 }, { "epoch": 0.10513969341557455, "grad_norm": 6.787247180938721, "learning_rate": 9.814921300144831e-06, "loss": 2.7083, "step": 403200 }, { "epoch": 0.10519184604127672, "grad_norm": 8.027321815490723, "learning_rate": 9.814695901568529e-06, "loss": 2.7397, "step": 403400 }, { "epoch": 0.10524399866697888, "grad_norm": 7.712162017822266, "learning_rate": 9.814470368415925e-06, "loss": 2.6962, "step": 403600 }, { "epoch": 0.10529615129268105, "grad_norm": 7.541025638580322, "learning_rate": 9.814244700693325e-06, "loss": 2.717, "step": 403800 }, { "epoch": 0.10534830391838322, "grad_norm": 7.47944974899292, "learning_rate": 9.814018898407034e-06, "loss": 2.6941, "step": 404000 }, { "epoch": 0.10540045654408539, "grad_norm": 7.1561737060546875, "learning_rate": 9.813792961563363e-06, "loss": 2.6746, "step": 404200 }, { "epoch": 0.10545260916978756, "grad_norm": 7.5194220542907715, "learning_rate": 9.81356689016863e-06, "loss": 2.6951, "step": 404400 }, { "epoch": 0.10550476179548973, "grad_norm": 7.529834747314453, "learning_rate": 9.813340684229148e-06, "loss": 2.6781, "step": 404600 }, { "epoch": 0.1055569144211919, "grad_norm": 7.3788533210754395, "learning_rate": 9.813114343751248e-06, "loss": 2.6886, "step": 404800 }, { "epoch": 0.10560906704689407, "grad_norm": 7.26027250289917, "learning_rate": 9.81288786874125e-06, "loss": 2.7073, "step": 405000 }, { "epoch": 0.10566121967259624, "grad_norm": 7.290615081787109, "learning_rate": 9.812661259205489e-06, "loss": 2.72, "step": 405200 }, { "epoch": 0.10571337229829841, "grad_norm": 7.432085037231445, "learning_rate": 9.812434515150294e-06, "loss": 2.6673, "step": 405400 }, { "epoch": 0.10576552492400058, "grad_norm": 7.36521577835083, "learning_rate": 9.812207636582007e-06, "loss": 2.7188, "step": 405600 }, { "epoch": 0.10581767754970275, "grad_norm": 6.904064655303955, "learning_rate": 9.811980623506967e-06, "loss": 2.6936, "step": 405800 }, { "epoch": 0.10586983017540492, "grad_norm": 6.532309055328369, "learning_rate": 9.81175347593152e-06, "loss": 2.6753, "step": 406000 }, { "epoch": 0.1059219828011071, "grad_norm": 7.805009365081787, "learning_rate": 9.811526193862017e-06, "loss": 2.7279, "step": 406200 }, { "epoch": 0.10597413542680927, "grad_norm": 6.72572135925293, "learning_rate": 9.811298777304807e-06, "loss": 2.6603, "step": 406400 }, { "epoch": 0.10602628805251144, "grad_norm": 6.9229559898376465, "learning_rate": 9.811071226266248e-06, "loss": 2.6847, "step": 406600 }, { "epoch": 0.1060784406782136, "grad_norm": 7.181591033935547, "learning_rate": 9.810843540752703e-06, "loss": 2.7129, "step": 406800 }, { "epoch": 0.10613059330391578, "grad_norm": 7.120484828948975, "learning_rate": 9.810615720770533e-06, "loss": 2.7121, "step": 407000 }, { "epoch": 0.10618274592961795, "grad_norm": 8.234344482421875, "learning_rate": 9.810387766326108e-06, "loss": 2.6959, "step": 407200 }, { "epoch": 0.10623489855532012, "grad_norm": 7.5896100997924805, "learning_rate": 9.810159677425797e-06, "loss": 2.7096, "step": 407400 }, { "epoch": 0.10628705118102229, "grad_norm": 6.704971790313721, "learning_rate": 9.809931454075976e-06, "loss": 2.7057, "step": 407600 }, { "epoch": 0.10633920380672446, "grad_norm": 8.379301071166992, "learning_rate": 9.809703096283025e-06, "loss": 2.7059, "step": 407800 }, { "epoch": 0.10639135643242663, "grad_norm": 8.338081359863281, "learning_rate": 9.809474604053329e-06, "loss": 2.7231, "step": 408000 }, { "epoch": 0.1064435090581288, "grad_norm": 7.795104026794434, "learning_rate": 9.809245977393268e-06, "loss": 2.7019, "step": 408200 }, { "epoch": 0.10649566168383097, "grad_norm": 6.772091865539551, "learning_rate": 9.80901721630924e-06, "loss": 2.7151, "step": 408400 }, { "epoch": 0.10654781430953314, "grad_norm": 7.263775825500488, "learning_rate": 9.808788320807636e-06, "loss": 2.7085, "step": 408600 }, { "epoch": 0.10659996693523531, "grad_norm": 7.627434730529785, "learning_rate": 9.808559290894852e-06, "loss": 2.6663, "step": 408800 }, { "epoch": 0.10665211956093748, "grad_norm": 7.4458465576171875, "learning_rate": 9.808330126577293e-06, "loss": 2.6748, "step": 409000 }, { "epoch": 0.10670427218663965, "grad_norm": 7.223238468170166, "learning_rate": 9.808100827861361e-06, "loss": 2.6919, "step": 409200 }, { "epoch": 0.10675642481234182, "grad_norm": 7.894956111907959, "learning_rate": 9.807871394753468e-06, "loss": 2.6707, "step": 409400 }, { "epoch": 0.10680857743804398, "grad_norm": 7.548394203186035, "learning_rate": 9.807641827260027e-06, "loss": 2.6829, "step": 409600 }, { "epoch": 0.10686073006374615, "grad_norm": 6.605945587158203, "learning_rate": 9.807412125387449e-06, "loss": 2.7355, "step": 409800 }, { "epoch": 0.10691288268944832, "grad_norm": 6.510282516479492, "learning_rate": 9.807182289142163e-06, "loss": 2.726, "step": 410000 }, { "epoch": 0.10696503531515049, "grad_norm": 6.95159387588501, "learning_rate": 9.806952318530589e-06, "loss": 2.7377, "step": 410200 }, { "epoch": 0.10701718794085266, "grad_norm": 8.0206298828125, "learning_rate": 9.806722213559153e-06, "loss": 2.7229, "step": 410400 }, { "epoch": 0.10706934056655483, "grad_norm": 7.942624568939209, "learning_rate": 9.806491974234291e-06, "loss": 2.7275, "step": 410600 }, { "epoch": 0.107121493192257, "grad_norm": 8.461700439453125, "learning_rate": 9.806261600562434e-06, "loss": 2.6925, "step": 410800 }, { "epoch": 0.10717364581795917, "grad_norm": 7.115810871124268, "learning_rate": 9.806031092550025e-06, "loss": 2.6673, "step": 411000 }, { "epoch": 0.10722579844366134, "grad_norm": 7.023535251617432, "learning_rate": 9.805800450203504e-06, "loss": 2.768, "step": 411200 }, { "epoch": 0.10727795106936351, "grad_norm": 7.725059986114502, "learning_rate": 9.80556967352932e-06, "loss": 2.6898, "step": 411400 }, { "epoch": 0.10733010369506568, "grad_norm": 7.8010573387146, "learning_rate": 9.805338762533924e-06, "loss": 2.7011, "step": 411600 }, { "epoch": 0.10738225632076785, "grad_norm": 7.579238414764404, "learning_rate": 9.805107717223767e-06, "loss": 2.7297, "step": 411800 }, { "epoch": 0.10743440894647002, "grad_norm": 8.260748863220215, "learning_rate": 9.80487653760531e-06, "loss": 2.6825, "step": 412000 }, { "epoch": 0.10748656157217219, "grad_norm": 7.9606547355651855, "learning_rate": 9.804645223685012e-06, "loss": 2.7009, "step": 412200 }, { "epoch": 0.10753871419787436, "grad_norm": 8.684904098510742, "learning_rate": 9.804413775469342e-06, "loss": 2.7123, "step": 412400 }, { "epoch": 0.10759086682357653, "grad_norm": 7.265321731567383, "learning_rate": 9.804182192964765e-06, "loss": 2.664, "step": 412600 }, { "epoch": 0.1076430194492787, "grad_norm": 7.080298900604248, "learning_rate": 9.803950476177757e-06, "loss": 2.705, "step": 412800 }, { "epoch": 0.10769517207498087, "grad_norm": 7.625601768493652, "learning_rate": 9.803718625114796e-06, "loss": 2.7355, "step": 413000 }, { "epoch": 0.10774732470068304, "grad_norm": 7.863028526306152, "learning_rate": 9.803486639782357e-06, "loss": 2.7024, "step": 413200 }, { "epoch": 0.10779947732638522, "grad_norm": 7.483668327331543, "learning_rate": 9.803254520186932e-06, "loss": 2.7058, "step": 413400 }, { "epoch": 0.10785162995208739, "grad_norm": 7.60951042175293, "learning_rate": 9.803022266335e-06, "loss": 2.689, "step": 413600 }, { "epoch": 0.10790378257778956, "grad_norm": 7.47837495803833, "learning_rate": 9.80278987823306e-06, "loss": 2.6998, "step": 413800 }, { "epoch": 0.10795593520349173, "grad_norm": 7.093349933624268, "learning_rate": 9.802557355887607e-06, "loss": 2.7003, "step": 414000 }, { "epoch": 0.1080080878291939, "grad_norm": 7.799093723297119, "learning_rate": 9.802324699305136e-06, "loss": 2.7044, "step": 414200 }, { "epoch": 0.10806024045489607, "grad_norm": 7.693939208984375, "learning_rate": 9.802091908492153e-06, "loss": 2.6745, "step": 414400 }, { "epoch": 0.10811239308059824, "grad_norm": 7.931087493896484, "learning_rate": 9.801858983455164e-06, "loss": 2.703, "step": 414600 }, { "epoch": 0.10816454570630041, "grad_norm": 7.6536407470703125, "learning_rate": 9.801625924200679e-06, "loss": 2.6576, "step": 414800 }, { "epoch": 0.10821669833200258, "grad_norm": 7.151515483856201, "learning_rate": 9.801392730735214e-06, "loss": 2.7166, "step": 415000 }, { "epoch": 0.10826885095770475, "grad_norm": 8.142443656921387, "learning_rate": 9.801159403065286e-06, "loss": 2.6738, "step": 415200 }, { "epoch": 0.1083210035834069, "grad_norm": 7.3941121101379395, "learning_rate": 9.800925941197415e-06, "loss": 2.7265, "step": 415400 }, { "epoch": 0.10837315620910908, "grad_norm": 7.601657390594482, "learning_rate": 9.80069234513813e-06, "loss": 2.6959, "step": 415600 }, { "epoch": 0.10842530883481125, "grad_norm": 7.414285182952881, "learning_rate": 9.800458614893958e-06, "loss": 2.7261, "step": 415800 }, { "epoch": 0.10847746146051342, "grad_norm": 7.267521858215332, "learning_rate": 9.800224750471434e-06, "loss": 2.6731, "step": 416000 }, { "epoch": 0.10852961408621559, "grad_norm": 7.712509632110596, "learning_rate": 9.79999075187709e-06, "loss": 2.7288, "step": 416200 }, { "epoch": 0.10858176671191776, "grad_norm": 7.250224590301514, "learning_rate": 9.799756619117473e-06, "loss": 2.7043, "step": 416400 }, { "epoch": 0.10863391933761993, "grad_norm": 7.441985130310059, "learning_rate": 9.799522352199122e-06, "loss": 2.6993, "step": 416600 }, { "epoch": 0.1086860719633221, "grad_norm": 7.325314044952393, "learning_rate": 9.79928795112859e-06, "loss": 2.6846, "step": 416800 }, { "epoch": 0.10873822458902427, "grad_norm": 7.375991344451904, "learning_rate": 9.799053415912422e-06, "loss": 2.7052, "step": 417000 }, { "epoch": 0.10879037721472644, "grad_norm": 7.841715335845947, "learning_rate": 9.798818746557182e-06, "loss": 2.7222, "step": 417200 }, { "epoch": 0.10884252984042861, "grad_norm": 7.707851886749268, "learning_rate": 9.798583943069422e-06, "loss": 2.6917, "step": 417400 }, { "epoch": 0.10889468246613078, "grad_norm": 8.474435806274414, "learning_rate": 9.798349005455707e-06, "loss": 2.6629, "step": 417600 }, { "epoch": 0.10894683509183295, "grad_norm": 7.427988529205322, "learning_rate": 9.798113933722606e-06, "loss": 2.6719, "step": 417800 }, { "epoch": 0.10899898771753512, "grad_norm": 7.597637176513672, "learning_rate": 9.797878727876689e-06, "loss": 2.6991, "step": 418000 }, { "epoch": 0.10905114034323729, "grad_norm": 8.201911926269531, "learning_rate": 9.797643387924529e-06, "loss": 2.6592, "step": 418200 }, { "epoch": 0.10910329296893946, "grad_norm": 8.380842208862305, "learning_rate": 9.797407913872703e-06, "loss": 2.7308, "step": 418400 }, { "epoch": 0.10915544559464163, "grad_norm": 8.024592399597168, "learning_rate": 9.797172305727796e-06, "loss": 2.6984, "step": 418600 }, { "epoch": 0.1092075982203438, "grad_norm": 7.051395893096924, "learning_rate": 9.79693656349639e-06, "loss": 2.7273, "step": 418800 }, { "epoch": 0.10925975084604597, "grad_norm": 8.769367218017578, "learning_rate": 9.796700687185076e-06, "loss": 2.7199, "step": 419000 }, { "epoch": 0.10931190347174814, "grad_norm": 6.581594467163086, "learning_rate": 9.796464676800448e-06, "loss": 2.7053, "step": 419200 }, { "epoch": 0.10936405609745031, "grad_norm": 7.729722499847412, "learning_rate": 9.796228532349102e-06, "loss": 2.6976, "step": 419400 }, { "epoch": 0.10941620872315248, "grad_norm": 6.908459186553955, "learning_rate": 9.795992253837638e-06, "loss": 2.704, "step": 419600 }, { "epoch": 0.10946836134885465, "grad_norm": 7.443035125732422, "learning_rate": 9.79575584127266e-06, "loss": 2.6812, "step": 419800 }, { "epoch": 0.10952051397455682, "grad_norm": 7.436124801635742, "learning_rate": 9.795519294660775e-06, "loss": 2.6817, "step": 420000 }, { "epoch": 0.109572666600259, "grad_norm": 7.913905620574951, "learning_rate": 9.795282614008598e-06, "loss": 2.7261, "step": 420200 }, { "epoch": 0.10962481922596116, "grad_norm": 7.672698497772217, "learning_rate": 9.795045799322744e-06, "loss": 2.7001, "step": 420400 }, { "epoch": 0.10967697185166334, "grad_norm": 7.240036487579346, "learning_rate": 9.794808850609828e-06, "loss": 2.6813, "step": 420600 }, { "epoch": 0.1097291244773655, "grad_norm": 7.6813812255859375, "learning_rate": 9.79457176787648e-06, "loss": 2.6612, "step": 420800 }, { "epoch": 0.10978127710306768, "grad_norm": 7.365231513977051, "learning_rate": 9.79433455112932e-06, "loss": 2.7116, "step": 421000 }, { "epoch": 0.10983342972876985, "grad_norm": 8.09189510345459, "learning_rate": 9.794097200374981e-06, "loss": 2.6995, "step": 421200 }, { "epoch": 0.109885582354472, "grad_norm": 7.653886318206787, "learning_rate": 9.7938597156201e-06, "loss": 2.6798, "step": 421400 }, { "epoch": 0.10993773498017417, "grad_norm": 6.208761692047119, "learning_rate": 9.79362209687131e-06, "loss": 2.6639, "step": 421600 }, { "epoch": 0.10998988760587634, "grad_norm": 7.84056282043457, "learning_rate": 9.793384344135254e-06, "loss": 2.6726, "step": 421800 }, { "epoch": 0.11004204023157851, "grad_norm": 6.51863431930542, "learning_rate": 9.793146457418581e-06, "loss": 2.6799, "step": 422000 }, { "epoch": 0.11009419285728068, "grad_norm": 7.248006820678711, "learning_rate": 9.792908436727937e-06, "loss": 2.7188, "step": 422200 }, { "epoch": 0.11014634548298285, "grad_norm": 7.784631252288818, "learning_rate": 9.792670282069976e-06, "loss": 2.6932, "step": 422400 }, { "epoch": 0.11019849810868503, "grad_norm": 7.782918930053711, "learning_rate": 9.792431993451355e-06, "loss": 2.6632, "step": 422600 }, { "epoch": 0.1102506507343872, "grad_norm": 6.995285511016846, "learning_rate": 9.792193570878733e-06, "loss": 2.7118, "step": 422800 }, { "epoch": 0.11030280336008937, "grad_norm": 8.329195976257324, "learning_rate": 9.791955014358774e-06, "loss": 2.6919, "step": 423000 }, { "epoch": 0.11035495598579154, "grad_norm": 7.235878944396973, "learning_rate": 9.791716323898149e-06, "loss": 2.6916, "step": 423200 }, { "epoch": 0.1104071086114937, "grad_norm": 7.546281814575195, "learning_rate": 9.791477499503528e-06, "loss": 2.695, "step": 423400 }, { "epoch": 0.11045926123719588, "grad_norm": 7.888071537017822, "learning_rate": 9.791238541181584e-06, "loss": 2.708, "step": 423600 }, { "epoch": 0.11051141386289805, "grad_norm": 8.009730339050293, "learning_rate": 9.790999448939001e-06, "loss": 2.6739, "step": 423800 }, { "epoch": 0.11056356648860022, "grad_norm": 7.215407371520996, "learning_rate": 9.790760222782455e-06, "loss": 2.6846, "step": 424000 }, { "epoch": 0.11061571911430239, "grad_norm": 7.5460638999938965, "learning_rate": 9.79052086271864e-06, "loss": 2.6856, "step": 424200 }, { "epoch": 0.11066787174000456, "grad_norm": 7.906571865081787, "learning_rate": 9.790281368754245e-06, "loss": 2.6999, "step": 424400 }, { "epoch": 0.11072002436570673, "grad_norm": 7.218746662139893, "learning_rate": 9.790041740895958e-06, "loss": 2.7122, "step": 424600 }, { "epoch": 0.1107721769914089, "grad_norm": 9.075844764709473, "learning_rate": 9.789801979150483e-06, "loss": 2.6593, "step": 424800 }, { "epoch": 0.11082432961711107, "grad_norm": 8.792651176452637, "learning_rate": 9.78956208352452e-06, "loss": 2.7338, "step": 425000 }, { "epoch": 0.11087648224281324, "grad_norm": 7.27746057510376, "learning_rate": 9.789322054024774e-06, "loss": 2.6658, "step": 425200 }, { "epoch": 0.11092863486851541, "grad_norm": 7.638511657714844, "learning_rate": 9.789081890657955e-06, "loss": 2.7166, "step": 425400 }, { "epoch": 0.11098078749421758, "grad_norm": 7.376978874206543, "learning_rate": 9.788841593430775e-06, "loss": 2.6812, "step": 425600 }, { "epoch": 0.11103294011991975, "grad_norm": 7.535377025604248, "learning_rate": 9.788601162349949e-06, "loss": 2.6784, "step": 425800 }, { "epoch": 0.11108509274562192, "grad_norm": 8.068282127380371, "learning_rate": 9.7883605974222e-06, "loss": 2.7089, "step": 426000 }, { "epoch": 0.11113724537132409, "grad_norm": 8.093073844909668, "learning_rate": 9.788119898654253e-06, "loss": 2.6628, "step": 426200 }, { "epoch": 0.11118939799702626, "grad_norm": 7.169579029083252, "learning_rate": 9.787879066052833e-06, "loss": 2.7379, "step": 426400 }, { "epoch": 0.11124155062272843, "grad_norm": 7.598651885986328, "learning_rate": 9.78763809962467e-06, "loss": 2.6831, "step": 426600 }, { "epoch": 0.1112937032484306, "grad_norm": 8.083666801452637, "learning_rate": 9.787396999376503e-06, "loss": 2.7267, "step": 426800 }, { "epoch": 0.11134585587413277, "grad_norm": 8.373312950134277, "learning_rate": 9.787155765315071e-06, "loss": 2.6836, "step": 427000 }, { "epoch": 0.11139800849983493, "grad_norm": 8.161797523498535, "learning_rate": 9.786914397447116e-06, "loss": 2.6782, "step": 427200 }, { "epoch": 0.1114501611255371, "grad_norm": 8.155803680419922, "learning_rate": 9.786672895779382e-06, "loss": 2.661, "step": 427400 }, { "epoch": 0.11150231375123927, "grad_norm": 8.83053207397461, "learning_rate": 9.786431260318624e-06, "loss": 2.6674, "step": 427600 }, { "epoch": 0.11155446637694144, "grad_norm": 6.54095983505249, "learning_rate": 9.78618949107159e-06, "loss": 2.7123, "step": 427800 }, { "epoch": 0.11160661900264361, "grad_norm": 7.278541088104248, "learning_rate": 9.785947588045044e-06, "loss": 2.6894, "step": 428000 }, { "epoch": 0.11165877162834578, "grad_norm": 8.509937286376953, "learning_rate": 9.785705551245741e-06, "loss": 2.6652, "step": 428200 }, { "epoch": 0.11171092425404795, "grad_norm": 8.071086883544922, "learning_rate": 9.785463380680454e-06, "loss": 2.6859, "step": 428400 }, { "epoch": 0.11176307687975012, "grad_norm": 8.377106666564941, "learning_rate": 9.785221076355944e-06, "loss": 2.7175, "step": 428600 }, { "epoch": 0.1118152295054523, "grad_norm": 7.088006496429443, "learning_rate": 9.784978638278989e-06, "loss": 2.6663, "step": 428800 }, { "epoch": 0.11186738213115446, "grad_norm": 8.437390327453613, "learning_rate": 9.784736066456363e-06, "loss": 2.7336, "step": 429000 }, { "epoch": 0.11191953475685663, "grad_norm": 8.951874732971191, "learning_rate": 9.784493360894849e-06, "loss": 2.732, "step": 429200 }, { "epoch": 0.1119716873825588, "grad_norm": 7.750514030456543, "learning_rate": 9.784250521601226e-06, "loss": 2.6847, "step": 429400 }, { "epoch": 0.11202384000826097, "grad_norm": 8.56009292602539, "learning_rate": 9.784007548582287e-06, "loss": 2.6984, "step": 429600 }, { "epoch": 0.11207599263396315, "grad_norm": 7.063655376434326, "learning_rate": 9.783764441844819e-06, "loss": 2.6662, "step": 429800 }, { "epoch": 0.11212814525966532, "grad_norm": 7.597879886627197, "learning_rate": 9.78352120139562e-06, "loss": 2.6689, "step": 430000 }, { "epoch": 0.11218029788536749, "grad_norm": 7.35047721862793, "learning_rate": 9.783277827241486e-06, "loss": 2.7019, "step": 430200 }, { "epoch": 0.11223245051106966, "grad_norm": 7.905674457550049, "learning_rate": 9.783034319389223e-06, "loss": 2.6588, "step": 430400 }, { "epoch": 0.11228460313677183, "grad_norm": 8.24709415435791, "learning_rate": 9.782790677845638e-06, "loss": 2.6809, "step": 430600 }, { "epoch": 0.112336755762474, "grad_norm": 7.832298278808594, "learning_rate": 9.782546902617535e-06, "loss": 2.6982, "step": 430800 }, { "epoch": 0.11238890838817617, "grad_norm": 7.27885103225708, "learning_rate": 9.782302993711733e-06, "loss": 2.7151, "step": 431000 }, { "epoch": 0.11244106101387834, "grad_norm": 7.919535160064697, "learning_rate": 9.782058951135047e-06, "loss": 2.6966, "step": 431200 }, { "epoch": 0.11249321363958051, "grad_norm": 7.770106315612793, "learning_rate": 9.781814774894302e-06, "loss": 2.7011, "step": 431400 }, { "epoch": 0.11254536626528268, "grad_norm": 8.116305351257324, "learning_rate": 9.781570464996318e-06, "loss": 2.6831, "step": 431600 }, { "epoch": 0.11259751889098485, "grad_norm": 8.72941780090332, "learning_rate": 9.781326021447928e-06, "loss": 2.6948, "step": 431800 }, { "epoch": 0.11264967151668702, "grad_norm": 8.299556732177734, "learning_rate": 9.781081444255962e-06, "loss": 2.7151, "step": 432000 }, { "epoch": 0.11270182414238919, "grad_norm": 7.460847854614258, "learning_rate": 9.780836733427255e-06, "loss": 2.6989, "step": 432200 }, { "epoch": 0.11275397676809136, "grad_norm": 8.605817794799805, "learning_rate": 9.780591888968652e-06, "loss": 2.6516, "step": 432400 }, { "epoch": 0.11280612939379353, "grad_norm": 7.617114067077637, "learning_rate": 9.780346910886991e-06, "loss": 2.6607, "step": 432600 }, { "epoch": 0.1128582820194957, "grad_norm": 7.732776641845703, "learning_rate": 9.78010179918912e-06, "loss": 2.6997, "step": 432800 }, { "epoch": 0.11291043464519787, "grad_norm": 7.245545864105225, "learning_rate": 9.779856553881897e-06, "loss": 2.6959, "step": 433000 }, { "epoch": 0.11296258727090003, "grad_norm": 7.128195285797119, "learning_rate": 9.779611174972169e-06, "loss": 2.656, "step": 433200 }, { "epoch": 0.1130147398966022, "grad_norm": 7.312046051025391, "learning_rate": 9.779365662466798e-06, "loss": 2.6691, "step": 433400 }, { "epoch": 0.11306689252230437, "grad_norm": 8.948485374450684, "learning_rate": 9.779120016372646e-06, "loss": 2.7061, "step": 433600 }, { "epoch": 0.11311904514800654, "grad_norm": 7.606996059417725, "learning_rate": 9.77887423669658e-06, "loss": 2.6527, "step": 433800 }, { "epoch": 0.11317119777370871, "grad_norm": 8.001826286315918, "learning_rate": 9.778628323445467e-06, "loss": 2.663, "step": 434000 }, { "epoch": 0.11322335039941088, "grad_norm": 8.344403266906738, "learning_rate": 9.778382276626183e-06, "loss": 2.6678, "step": 434200 }, { "epoch": 0.11327550302511305, "grad_norm": 8.27188491821289, "learning_rate": 9.778136096245605e-06, "loss": 2.7192, "step": 434400 }, { "epoch": 0.11332765565081522, "grad_norm": 8.35887336730957, "learning_rate": 9.777889782310614e-06, "loss": 2.6627, "step": 434600 }, { "epoch": 0.11337980827651739, "grad_norm": 7.3902201652526855, "learning_rate": 9.777643334828094e-06, "loss": 2.7158, "step": 434800 }, { "epoch": 0.11343196090221956, "grad_norm": 8.090744972229004, "learning_rate": 9.777396753804935e-06, "loss": 2.7031, "step": 435000 }, { "epoch": 0.11348411352792173, "grad_norm": 7.8479084968566895, "learning_rate": 9.777150039248027e-06, "loss": 2.6811, "step": 435200 }, { "epoch": 0.1135362661536239, "grad_norm": 7.591457366943359, "learning_rate": 9.776903191164267e-06, "loss": 2.6928, "step": 435400 }, { "epoch": 0.11358841877932607, "grad_norm": 8.277278900146484, "learning_rate": 9.776656209560556e-06, "loss": 2.6798, "step": 435600 }, { "epoch": 0.11364057140502824, "grad_norm": 8.627765655517578, "learning_rate": 9.776409094443796e-06, "loss": 2.6743, "step": 435800 }, { "epoch": 0.11369272403073041, "grad_norm": 7.7793288230896, "learning_rate": 9.776161845820894e-06, "loss": 2.6513, "step": 436000 }, { "epoch": 0.11374487665643258, "grad_norm": 6.800634384155273, "learning_rate": 9.775914463698758e-06, "loss": 2.7216, "step": 436200 }, { "epoch": 0.11379702928213475, "grad_norm": 7.648603439331055, "learning_rate": 9.77566694808431e-06, "loss": 2.6822, "step": 436400 }, { "epoch": 0.11384918190783692, "grad_norm": 7.305148124694824, "learning_rate": 9.775419298984463e-06, "loss": 2.6863, "step": 436600 }, { "epoch": 0.1139013345335391, "grad_norm": 7.8893513679504395, "learning_rate": 9.775171516406142e-06, "loss": 2.6731, "step": 436800 }, { "epoch": 0.11395348715924127, "grad_norm": 7.8577775955200195, "learning_rate": 9.77492360035627e-06, "loss": 2.6812, "step": 437000 }, { "epoch": 0.11400563978494344, "grad_norm": 8.009392738342285, "learning_rate": 9.774675550841776e-06, "loss": 2.7047, "step": 437200 }, { "epoch": 0.1140577924106456, "grad_norm": 7.961512088775635, "learning_rate": 9.774427367869597e-06, "loss": 2.6934, "step": 437400 }, { "epoch": 0.11410994503634778, "grad_norm": 7.57539701461792, "learning_rate": 9.774179051446667e-06, "loss": 2.667, "step": 437600 }, { "epoch": 0.11416209766204995, "grad_norm": 7.794543266296387, "learning_rate": 9.773930601579928e-06, "loss": 2.6578, "step": 437800 }, { "epoch": 0.11421425028775212, "grad_norm": 8.647083282470703, "learning_rate": 9.773682018276325e-06, "loss": 2.7059, "step": 438000 }, { "epoch": 0.11426640291345429, "grad_norm": 6.988203048706055, "learning_rate": 9.773433301542805e-06, "loss": 2.6848, "step": 438200 }, { "epoch": 0.11431855553915646, "grad_norm": 7.052835941314697, "learning_rate": 9.77318445138632e-06, "loss": 2.696, "step": 438400 }, { "epoch": 0.11437070816485863, "grad_norm": 8.016982078552246, "learning_rate": 9.772935467813825e-06, "loss": 2.6304, "step": 438600 }, { "epoch": 0.1144228607905608, "grad_norm": 8.256826400756836, "learning_rate": 9.77268635083228e-06, "loss": 2.6369, "step": 438800 }, { "epoch": 0.11447501341626296, "grad_norm": 9.124797821044922, "learning_rate": 9.772437100448649e-06, "loss": 2.6915, "step": 439000 }, { "epoch": 0.11452716604196513, "grad_norm": 6.831801414489746, "learning_rate": 9.772187716669898e-06, "loss": 2.6886, "step": 439200 }, { "epoch": 0.1145793186676673, "grad_norm": 8.428510665893555, "learning_rate": 9.771938199503e-06, "loss": 2.7035, "step": 439400 }, { "epoch": 0.11463147129336947, "grad_norm": 7.8952717781066895, "learning_rate": 9.771688548954923e-06, "loss": 2.7098, "step": 439600 }, { "epoch": 0.11468362391907164, "grad_norm": 6.776541233062744, "learning_rate": 9.771438765032653e-06, "loss": 2.6655, "step": 439800 }, { "epoch": 0.11473577654477381, "grad_norm": 7.783594608306885, "learning_rate": 9.771188847743167e-06, "loss": 2.7002, "step": 440000 }, { "epoch": 0.11478792917047598, "grad_norm": 7.315393924713135, "learning_rate": 9.77093879709345e-06, "loss": 2.6525, "step": 440200 }, { "epoch": 0.11484008179617815, "grad_norm": 8.225870132446289, "learning_rate": 9.770688613090492e-06, "loss": 2.6535, "step": 440400 }, { "epoch": 0.11489223442188032, "grad_norm": 7.509810447692871, "learning_rate": 9.770438295741289e-06, "loss": 2.7409, "step": 440600 }, { "epoch": 0.11494438704758249, "grad_norm": 7.6700592041015625, "learning_rate": 9.770187845052833e-06, "loss": 2.6997, "step": 440800 }, { "epoch": 0.11499653967328466, "grad_norm": 6.8598527908325195, "learning_rate": 9.769937261032126e-06, "loss": 2.6779, "step": 441000 }, { "epoch": 0.11504869229898683, "grad_norm": 8.12748908996582, "learning_rate": 9.769686543686174e-06, "loss": 2.717, "step": 441200 }, { "epoch": 0.115100844924689, "grad_norm": 8.549765586853027, "learning_rate": 9.769435693021982e-06, "loss": 2.663, "step": 441400 }, { "epoch": 0.11515299755039117, "grad_norm": 6.6760783195495605, "learning_rate": 9.769184709046565e-06, "loss": 2.6786, "step": 441600 }, { "epoch": 0.11520515017609334, "grad_norm": 7.719881057739258, "learning_rate": 9.768933591766935e-06, "loss": 2.6848, "step": 441800 }, { "epoch": 0.11525730280179551, "grad_norm": 7.913510799407959, "learning_rate": 9.768682341190114e-06, "loss": 2.7145, "step": 442000 }, { "epoch": 0.11530945542749768, "grad_norm": 7.543449401855469, "learning_rate": 9.768430957323122e-06, "loss": 2.6923, "step": 442200 }, { "epoch": 0.11536160805319985, "grad_norm": 8.190464973449707, "learning_rate": 9.768179440172985e-06, "loss": 2.6409, "step": 442400 }, { "epoch": 0.11541376067890202, "grad_norm": 8.82253360748291, "learning_rate": 9.767927789746736e-06, "loss": 2.6654, "step": 442600 }, { "epoch": 0.11546591330460419, "grad_norm": 8.999987602233887, "learning_rate": 9.767676006051409e-06, "loss": 2.7038, "step": 442800 }, { "epoch": 0.11551806593030636, "grad_norm": 8.303157806396484, "learning_rate": 9.767424089094039e-06, "loss": 2.6908, "step": 443000 }, { "epoch": 0.11557021855600853, "grad_norm": 6.951584339141846, "learning_rate": 9.767172038881669e-06, "loss": 2.623, "step": 443200 }, { "epoch": 0.1156223711817107, "grad_norm": 8.04922866821289, "learning_rate": 9.766919855421343e-06, "loss": 2.6307, "step": 443400 }, { "epoch": 0.11567452380741287, "grad_norm": 7.755749225616455, "learning_rate": 9.76666753872011e-06, "loss": 2.662, "step": 443600 }, { "epoch": 0.11572667643311504, "grad_norm": 9.410798072814941, "learning_rate": 9.766415088785026e-06, "loss": 2.7075, "step": 443800 }, { "epoch": 0.11577882905881721, "grad_norm": 9.511571884155273, "learning_rate": 9.76616250562314e-06, "loss": 2.6914, "step": 444000 }, { "epoch": 0.11583098168451939, "grad_norm": 8.075037956237793, "learning_rate": 9.765909789241521e-06, "loss": 2.6813, "step": 444200 }, { "epoch": 0.11588313431022156, "grad_norm": 7.264787197113037, "learning_rate": 9.765656939647225e-06, "loss": 2.6665, "step": 444400 }, { "epoch": 0.11593528693592373, "grad_norm": 8.67489242553711, "learning_rate": 9.765403956847323e-06, "loss": 2.7131, "step": 444600 }, { "epoch": 0.1159874395616259, "grad_norm": 7.648482799530029, "learning_rate": 9.765150840848888e-06, "loss": 2.7141, "step": 444800 }, { "epoch": 0.11603959218732805, "grad_norm": 7.76444149017334, "learning_rate": 9.764897591658989e-06, "loss": 2.6504, "step": 445000 }, { "epoch": 0.11609174481303022, "grad_norm": 7.081927299499512, "learning_rate": 9.764644209284711e-06, "loss": 2.7151, "step": 445200 }, { "epoch": 0.1161438974387324, "grad_norm": 8.81956958770752, "learning_rate": 9.764390693733132e-06, "loss": 2.6997, "step": 445400 }, { "epoch": 0.11619605006443456, "grad_norm": 7.683972358703613, "learning_rate": 9.764137045011339e-06, "loss": 2.7042, "step": 445600 }, { "epoch": 0.11624820269013673, "grad_norm": 8.693424224853516, "learning_rate": 9.763883263126423e-06, "loss": 2.7078, "step": 445800 }, { "epoch": 0.1163003553158389, "grad_norm": 8.919655799865723, "learning_rate": 9.763629348085478e-06, "loss": 2.6838, "step": 446000 }, { "epoch": 0.11635250794154108, "grad_norm": 9.183130264282227, "learning_rate": 9.763375299895598e-06, "loss": 2.7268, "step": 446200 }, { "epoch": 0.11640466056724325, "grad_norm": 8.304872512817383, "learning_rate": 9.763121118563888e-06, "loss": 2.6786, "step": 446400 }, { "epoch": 0.11645681319294542, "grad_norm": 8.370039939880371, "learning_rate": 9.76286680409745e-06, "loss": 2.6958, "step": 446600 }, { "epoch": 0.11650896581864759, "grad_norm": 9.099044799804688, "learning_rate": 9.762612356503394e-06, "loss": 2.6626, "step": 446800 }, { "epoch": 0.11656111844434976, "grad_norm": 7.61244535446167, "learning_rate": 9.762357775788829e-06, "loss": 2.6916, "step": 447000 }, { "epoch": 0.11661327107005193, "grad_norm": 7.555819034576416, "learning_rate": 9.762103061960874e-06, "loss": 2.6282, "step": 447200 }, { "epoch": 0.1166654236957541, "grad_norm": 8.290244102478027, "learning_rate": 9.761848215026647e-06, "loss": 2.6645, "step": 447400 }, { "epoch": 0.11671757632145627, "grad_norm": 7.066608905792236, "learning_rate": 9.761593234993273e-06, "loss": 2.6977, "step": 447600 }, { "epoch": 0.11676972894715844, "grad_norm": 8.447099685668945, "learning_rate": 9.761338121867876e-06, "loss": 2.7276, "step": 447800 }, { "epoch": 0.11682188157286061, "grad_norm": 8.586709976196289, "learning_rate": 9.76108287565759e-06, "loss": 2.7122, "step": 448000 }, { "epoch": 0.11687403419856278, "grad_norm": 8.613016128540039, "learning_rate": 9.760827496369547e-06, "loss": 2.6843, "step": 448200 }, { "epoch": 0.11692618682426495, "grad_norm": 7.8855719566345215, "learning_rate": 9.760571984010887e-06, "loss": 2.6395, "step": 448400 }, { "epoch": 0.11697833944996712, "grad_norm": 9.135221481323242, "learning_rate": 9.76031633858875e-06, "loss": 2.667, "step": 448600 }, { "epoch": 0.11703049207566929, "grad_norm": 8.805586814880371, "learning_rate": 9.760060560110283e-06, "loss": 2.6881, "step": 448800 }, { "epoch": 0.11708264470137146, "grad_norm": 8.323440551757812, "learning_rate": 9.759804648582633e-06, "loss": 2.656, "step": 449000 }, { "epoch": 0.11713479732707363, "grad_norm": 7.569894790649414, "learning_rate": 9.759548604012959e-06, "loss": 2.6998, "step": 449200 }, { "epoch": 0.1171869499527758, "grad_norm": 8.15855598449707, "learning_rate": 9.759292426408408e-06, "loss": 2.6691, "step": 449400 }, { "epoch": 0.11723910257847797, "grad_norm": 7.520715713500977, "learning_rate": 9.75903611577615e-06, "loss": 2.6674, "step": 449600 }, { "epoch": 0.11729125520418014, "grad_norm": 8.77229118347168, "learning_rate": 9.758779672123344e-06, "loss": 2.6771, "step": 449800 }, { "epoch": 0.11734340782988231, "grad_norm": 8.861888885498047, "learning_rate": 9.75852309545716e-06, "loss": 2.7158, "step": 450000 }, { "epoch": 0.11739556045558448, "grad_norm": 7.774789810180664, "learning_rate": 9.758266385784767e-06, "loss": 2.6555, "step": 450200 }, { "epoch": 0.11744771308128665, "grad_norm": 7.58319091796875, "learning_rate": 9.758009543113343e-06, "loss": 2.6737, "step": 450400 }, { "epoch": 0.11749986570698882, "grad_norm": 8.081302642822266, "learning_rate": 9.757752567450065e-06, "loss": 2.6793, "step": 450600 }, { "epoch": 0.11755201833269098, "grad_norm": 7.114253044128418, "learning_rate": 9.757495458802119e-06, "loss": 2.694, "step": 450800 }, { "epoch": 0.11760417095839315, "grad_norm": 7.961178779602051, "learning_rate": 9.757238217176687e-06, "loss": 2.6656, "step": 451000 }, { "epoch": 0.11765632358409532, "grad_norm": 7.241010665893555, "learning_rate": 9.756980842580963e-06, "loss": 2.6901, "step": 451200 }, { "epoch": 0.11770847620979749, "grad_norm": 9.523578643798828, "learning_rate": 9.75672333502214e-06, "loss": 2.6705, "step": 451400 }, { "epoch": 0.11776062883549966, "grad_norm": 7.9366230964660645, "learning_rate": 9.756465694507413e-06, "loss": 2.6603, "step": 451600 }, { "epoch": 0.11781278146120183, "grad_norm": 8.917617797851562, "learning_rate": 9.756207921043988e-06, "loss": 2.7168, "step": 451800 }, { "epoch": 0.117864934086904, "grad_norm": 8.094945907592773, "learning_rate": 9.755950014639065e-06, "loss": 2.6525, "step": 452000 }, { "epoch": 0.11791708671260617, "grad_norm": 8.251530647277832, "learning_rate": 9.755691975299857e-06, "loss": 2.6882, "step": 452200 }, { "epoch": 0.11796923933830834, "grad_norm": 7.913506031036377, "learning_rate": 9.755433803033574e-06, "loss": 2.6975, "step": 452400 }, { "epoch": 0.11802139196401051, "grad_norm": 8.891002655029297, "learning_rate": 9.755175497847434e-06, "loss": 2.6518, "step": 452600 }, { "epoch": 0.11807354458971268, "grad_norm": 9.051965713500977, "learning_rate": 9.754917059748654e-06, "loss": 2.715, "step": 452800 }, { "epoch": 0.11812569721541485, "grad_norm": 8.167122840881348, "learning_rate": 9.75465848874446e-06, "loss": 2.7022, "step": 453000 }, { "epoch": 0.11817784984111702, "grad_norm": 7.507691860198975, "learning_rate": 9.754399784842079e-06, "loss": 2.6796, "step": 453200 }, { "epoch": 0.1182300024668192, "grad_norm": 7.737783908843994, "learning_rate": 9.754140948048743e-06, "loss": 2.6774, "step": 453400 }, { "epoch": 0.11828215509252137, "grad_norm": 8.166272163391113, "learning_rate": 9.753881978371685e-06, "loss": 2.7361, "step": 453600 }, { "epoch": 0.11833430771822354, "grad_norm": 7.494532585144043, "learning_rate": 9.753622875818145e-06, "loss": 2.6566, "step": 453800 }, { "epoch": 0.1183864603439257, "grad_norm": 8.62021255493164, "learning_rate": 9.753363640395364e-06, "loss": 2.6363, "step": 454000 }, { "epoch": 0.11843861296962788, "grad_norm": 8.136500358581543, "learning_rate": 9.753104272110587e-06, "loss": 2.7016, "step": 454200 }, { "epoch": 0.11849076559533005, "grad_norm": 6.9378662109375, "learning_rate": 9.752844770971067e-06, "loss": 2.6903, "step": 454400 }, { "epoch": 0.11854291822103222, "grad_norm": 9.27441692352295, "learning_rate": 9.752585136984055e-06, "loss": 2.6897, "step": 454600 }, { "epoch": 0.11859507084673439, "grad_norm": 8.395896911621094, "learning_rate": 9.752325370156808e-06, "loss": 2.6787, "step": 454800 }, { "epoch": 0.11864722347243656, "grad_norm": 6.82293701171875, "learning_rate": 9.752065470496587e-06, "loss": 2.6816, "step": 455000 }, { "epoch": 0.11869937609813873, "grad_norm": 9.240446090698242, "learning_rate": 9.751805438010658e-06, "loss": 2.7092, "step": 455200 }, { "epoch": 0.1187515287238409, "grad_norm": 7.735306262969971, "learning_rate": 9.751545272706287e-06, "loss": 2.6778, "step": 455400 }, { "epoch": 0.11880368134954307, "grad_norm": 7.703882694244385, "learning_rate": 9.751284974590746e-06, "loss": 2.6471, "step": 455600 }, { "epoch": 0.11885583397524524, "grad_norm": 7.228545665740967, "learning_rate": 9.751024543671315e-06, "loss": 2.617, "step": 455800 }, { "epoch": 0.11890798660094741, "grad_norm": 8.354397773742676, "learning_rate": 9.750763979955267e-06, "loss": 2.6919, "step": 456000 }, { "epoch": 0.11896013922664958, "grad_norm": 7.73373556137085, "learning_rate": 9.750503283449888e-06, "loss": 2.7015, "step": 456200 }, { "epoch": 0.11901229185235175, "grad_norm": 8.244440078735352, "learning_rate": 9.750242454162465e-06, "loss": 2.6825, "step": 456400 }, { "epoch": 0.11906444447805391, "grad_norm": 8.330254554748535, "learning_rate": 9.749981492100289e-06, "loss": 2.6659, "step": 456600 }, { "epoch": 0.11911659710375608, "grad_norm": 8.01742935180664, "learning_rate": 9.749720397270652e-06, "loss": 2.6779, "step": 456800 }, { "epoch": 0.11916874972945825, "grad_norm": 8.478322982788086, "learning_rate": 9.749459169680855e-06, "loss": 2.6841, "step": 457000 }, { "epoch": 0.11922090235516042, "grad_norm": 7.7415080070495605, "learning_rate": 9.749197809338195e-06, "loss": 2.6466, "step": 457200 }, { "epoch": 0.11927305498086259, "grad_norm": 8.464523315429688, "learning_rate": 9.748936316249982e-06, "loss": 2.6419, "step": 457400 }, { "epoch": 0.11932520760656476, "grad_norm": 8.651251792907715, "learning_rate": 9.748674690423524e-06, "loss": 2.684, "step": 457600 }, { "epoch": 0.11937736023226693, "grad_norm": 8.235745429992676, "learning_rate": 9.748412931866132e-06, "loss": 2.6694, "step": 457800 }, { "epoch": 0.1194295128579691, "grad_norm": 7.739285469055176, "learning_rate": 9.748151040585123e-06, "loss": 2.6602, "step": 458000 }, { "epoch": 0.11948166548367127, "grad_norm": 9.078447341918945, "learning_rate": 9.747889016587819e-06, "loss": 2.6301, "step": 458200 }, { "epoch": 0.11953381810937344, "grad_norm": 7.439968109130859, "learning_rate": 9.747626859881542e-06, "loss": 2.6789, "step": 458400 }, { "epoch": 0.11958597073507561, "grad_norm": 8.582900047302246, "learning_rate": 9.74736457047362e-06, "loss": 2.6473, "step": 458600 }, { "epoch": 0.11963812336077778, "grad_norm": 8.283160209655762, "learning_rate": 9.747102148371384e-06, "loss": 2.6383, "step": 458800 }, { "epoch": 0.11969027598647995, "grad_norm": 9.031500816345215, "learning_rate": 9.74683959358217e-06, "loss": 2.6908, "step": 459000 }, { "epoch": 0.11974242861218212, "grad_norm": 7.941998481750488, "learning_rate": 9.746576906113314e-06, "loss": 2.6518, "step": 459200 }, { "epoch": 0.11979458123788429, "grad_norm": 9.694351196289062, "learning_rate": 9.746314085972162e-06, "loss": 2.6758, "step": 459400 }, { "epoch": 0.11984673386358646, "grad_norm": 7.908174991607666, "learning_rate": 9.746051133166059e-06, "loss": 2.6652, "step": 459600 }, { "epoch": 0.11989888648928863, "grad_norm": 7.092363357543945, "learning_rate": 9.745788047702354e-06, "loss": 2.6604, "step": 459800 }, { "epoch": 0.1199510391149908, "grad_norm": 7.674038887023926, "learning_rate": 9.7455248295884e-06, "loss": 2.6792, "step": 460000 }, { "epoch": 0.12000319174069297, "grad_norm": 8.855584144592285, "learning_rate": 9.745261478831555e-06, "loss": 2.6561, "step": 460200 }, { "epoch": 0.12005534436639514, "grad_norm": 7.2290120124816895, "learning_rate": 9.744997995439184e-06, "loss": 2.6789, "step": 460400 }, { "epoch": 0.12010749699209732, "grad_norm": 8.269558906555176, "learning_rate": 9.744734379418644e-06, "loss": 2.6772, "step": 460600 }, { "epoch": 0.12015964961779949, "grad_norm": 9.019466400146484, "learning_rate": 9.744470630777307e-06, "loss": 2.685, "step": 460800 }, { "epoch": 0.12021180224350166, "grad_norm": 8.867697715759277, "learning_rate": 9.744206749522547e-06, "loss": 2.6937, "step": 461000 }, { "epoch": 0.12026395486920383, "grad_norm": 8.083369255065918, "learning_rate": 9.743942735661738e-06, "loss": 2.6659, "step": 461200 }, { "epoch": 0.120316107494906, "grad_norm": 8.023956298828125, "learning_rate": 9.743678589202258e-06, "loss": 2.6797, "step": 461400 }, { "epoch": 0.12036826012060817, "grad_norm": 8.839423179626465, "learning_rate": 9.743414310151494e-06, "loss": 2.7316, "step": 461600 }, { "epoch": 0.12042041274631034, "grad_norm": 8.470783233642578, "learning_rate": 9.74314989851683e-06, "loss": 2.6571, "step": 461800 }, { "epoch": 0.12047256537201251, "grad_norm": 8.4376802444458, "learning_rate": 9.742885354305657e-06, "loss": 2.649, "step": 462000 }, { "epoch": 0.12052471799771468, "grad_norm": 7.8062424659729, "learning_rate": 9.74262067752537e-06, "loss": 2.6703, "step": 462200 }, { "epoch": 0.12057687062341685, "grad_norm": 8.25600814819336, "learning_rate": 9.742355868183365e-06, "loss": 2.7073, "step": 462400 }, { "epoch": 0.120629023249119, "grad_norm": 8.327945709228516, "learning_rate": 9.742090926287047e-06, "loss": 2.6969, "step": 462600 }, { "epoch": 0.12068117587482118, "grad_norm": 8.068275451660156, "learning_rate": 9.741825851843819e-06, "loss": 2.7028, "step": 462800 }, { "epoch": 0.12073332850052335, "grad_norm": 10.301764488220215, "learning_rate": 9.741560644861091e-06, "loss": 2.6903, "step": 463000 }, { "epoch": 0.12078548112622552, "grad_norm": 7.792321681976318, "learning_rate": 9.741295305346276e-06, "loss": 2.6443, "step": 463200 }, { "epoch": 0.12083763375192769, "grad_norm": 8.240435600280762, "learning_rate": 9.74102983330679e-06, "loss": 2.6793, "step": 463400 }, { "epoch": 0.12088978637762986, "grad_norm": 8.414860725402832, "learning_rate": 9.740764228750053e-06, "loss": 2.6749, "step": 463600 }, { "epoch": 0.12094193900333203, "grad_norm": 8.185742378234863, "learning_rate": 9.74049849168349e-06, "loss": 2.6609, "step": 463800 }, { "epoch": 0.1209940916290342, "grad_norm": 8.727797508239746, "learning_rate": 9.74023262211453e-06, "loss": 2.6738, "step": 464000 }, { "epoch": 0.12104624425473637, "grad_norm": 8.264991760253906, "learning_rate": 9.739966620050601e-06, "loss": 2.6553, "step": 464200 }, { "epoch": 0.12109839688043854, "grad_norm": 8.572891235351562, "learning_rate": 9.739700485499139e-06, "loss": 2.689, "step": 464400 }, { "epoch": 0.12115054950614071, "grad_norm": 8.757530212402344, "learning_rate": 9.739434218467583e-06, "loss": 2.6752, "step": 464600 }, { "epoch": 0.12120270213184288, "grad_norm": 8.559809684753418, "learning_rate": 9.739167818963378e-06, "loss": 2.6809, "step": 464800 }, { "epoch": 0.12125485475754505, "grad_norm": 8.356423377990723, "learning_rate": 9.738901286993965e-06, "loss": 2.6699, "step": 465000 }, { "epoch": 0.12130700738324722, "grad_norm": 7.829847812652588, "learning_rate": 9.738634622566799e-06, "loss": 2.6739, "step": 465200 }, { "epoch": 0.12135916000894939, "grad_norm": 8.362701416015625, "learning_rate": 9.73836782568933e-06, "loss": 2.6874, "step": 465400 }, { "epoch": 0.12141131263465156, "grad_norm": 9.722343444824219, "learning_rate": 9.738100896369019e-06, "loss": 2.6827, "step": 465600 }, { "epoch": 0.12146346526035373, "grad_norm": 8.5645112991333, "learning_rate": 9.737833834613322e-06, "loss": 2.6557, "step": 465800 }, { "epoch": 0.1215156178860559, "grad_norm": 8.088398933410645, "learning_rate": 9.737566640429707e-06, "loss": 2.6685, "step": 466000 }, { "epoch": 0.12156777051175807, "grad_norm": 8.453975677490234, "learning_rate": 9.737299313825644e-06, "loss": 2.6338, "step": 466200 }, { "epoch": 0.12161992313746024, "grad_norm": 7.445858478546143, "learning_rate": 9.737031854808601e-06, "loss": 2.6752, "step": 466400 }, { "epoch": 0.12167207576316241, "grad_norm": 9.255156517028809, "learning_rate": 9.736764263386057e-06, "loss": 2.7129, "step": 466600 }, { "epoch": 0.12172422838886458, "grad_norm": 9.095346450805664, "learning_rate": 9.736496539565488e-06, "loss": 2.6596, "step": 466800 }, { "epoch": 0.12177638101456675, "grad_norm": 8.963985443115234, "learning_rate": 9.73622868335438e-06, "loss": 2.6857, "step": 467000 }, { "epoch": 0.12182853364026892, "grad_norm": 9.29616641998291, "learning_rate": 9.73596069476022e-06, "loss": 2.6297, "step": 467200 }, { "epoch": 0.1218806862659711, "grad_norm": 7.7633795738220215, "learning_rate": 9.735692573790498e-06, "loss": 2.6467, "step": 467400 }, { "epoch": 0.12193283889167326, "grad_norm": 9.138328552246094, "learning_rate": 9.735424320452707e-06, "loss": 2.7386, "step": 467600 }, { "epoch": 0.12198499151737544, "grad_norm": 7.824506759643555, "learning_rate": 9.735155934754347e-06, "loss": 2.6447, "step": 467800 }, { "epoch": 0.1220371441430776, "grad_norm": 9.13517951965332, "learning_rate": 9.734887416702919e-06, "loss": 2.6749, "step": 468000 }, { "epoch": 0.12208929676877978, "grad_norm": 8.389497756958008, "learning_rate": 9.734618766305928e-06, "loss": 2.6867, "step": 468200 }, { "epoch": 0.12214144939448193, "grad_norm": 8.484880447387695, "learning_rate": 9.734349983570882e-06, "loss": 2.6819, "step": 468400 }, { "epoch": 0.1221936020201841, "grad_norm": 8.580875396728516, "learning_rate": 9.734081068505296e-06, "loss": 2.6606, "step": 468600 }, { "epoch": 0.12224575464588627, "grad_norm": 8.64186954498291, "learning_rate": 9.733812021116687e-06, "loss": 2.711, "step": 468800 }, { "epoch": 0.12229790727158844, "grad_norm": 8.544197082519531, "learning_rate": 9.733542841412571e-06, "loss": 2.6684, "step": 469000 }, { "epoch": 0.12235005989729061, "grad_norm": 7.277874946594238, "learning_rate": 9.733273529400476e-06, "loss": 2.6726, "step": 469200 }, { "epoch": 0.12240221252299278, "grad_norm": 7.846140384674072, "learning_rate": 9.733004085087926e-06, "loss": 2.6603, "step": 469400 }, { "epoch": 0.12245436514869495, "grad_norm": 7.558496475219727, "learning_rate": 9.732734508482458e-06, "loss": 2.6833, "step": 469600 }, { "epoch": 0.12250651777439713, "grad_norm": 8.439860343933105, "learning_rate": 9.732464799591603e-06, "loss": 2.6507, "step": 469800 }, { "epoch": 0.1225586704000993, "grad_norm": 9.658734321594238, "learning_rate": 9.732194958422898e-06, "loss": 2.7056, "step": 470000 }, { "epoch": 0.12261082302580147, "grad_norm": 7.978820323944092, "learning_rate": 9.731924984983889e-06, "loss": 2.704, "step": 470200 }, { "epoch": 0.12266297565150364, "grad_norm": 7.804432392120361, "learning_rate": 9.73165487928212e-06, "loss": 2.6903, "step": 470400 }, { "epoch": 0.1227151282772058, "grad_norm": 8.81722640991211, "learning_rate": 9.73138464132514e-06, "loss": 2.7032, "step": 470600 }, { "epoch": 0.12276728090290798, "grad_norm": 9.00986385345459, "learning_rate": 9.731114271120505e-06, "loss": 2.6696, "step": 470800 }, { "epoch": 0.12281943352861015, "grad_norm": 8.249679565429688, "learning_rate": 9.730843768675772e-06, "loss": 2.6984, "step": 471000 }, { "epoch": 0.12287158615431232, "grad_norm": 8.557232856750488, "learning_rate": 9.7305731339985e-06, "loss": 2.6371, "step": 471200 }, { "epoch": 0.12292373878001449, "grad_norm": 8.401301383972168, "learning_rate": 9.730302367096254e-06, "loss": 2.6919, "step": 471400 }, { "epoch": 0.12297589140571666, "grad_norm": 7.651167392730713, "learning_rate": 9.730031467976601e-06, "loss": 2.7055, "step": 471600 }, { "epoch": 0.12302804403141883, "grad_norm": 8.762410163879395, "learning_rate": 9.729760436647118e-06, "loss": 2.6395, "step": 471800 }, { "epoch": 0.123080196657121, "grad_norm": 8.571608543395996, "learning_rate": 9.729489273115376e-06, "loss": 2.651, "step": 472000 }, { "epoch": 0.12313234928282317, "grad_norm": 8.2172269821167, "learning_rate": 9.729217977388954e-06, "loss": 2.6385, "step": 472200 }, { "epoch": 0.12318450190852534, "grad_norm": 9.237062454223633, "learning_rate": 9.728946549475438e-06, "loss": 2.718, "step": 472400 }, { "epoch": 0.12323665453422751, "grad_norm": 8.741727828979492, "learning_rate": 9.728674989382412e-06, "loss": 2.6775, "step": 472600 }, { "epoch": 0.12328880715992968, "grad_norm": 7.985013008117676, "learning_rate": 9.728403297117469e-06, "loss": 2.6703, "step": 472800 }, { "epoch": 0.12334095978563185, "grad_norm": 8.41588020324707, "learning_rate": 9.7281314726882e-06, "loss": 2.6602, "step": 473000 }, { "epoch": 0.12339311241133402, "grad_norm": 10.485689163208008, "learning_rate": 9.727859516102208e-06, "loss": 2.6371, "step": 473200 }, { "epoch": 0.12344526503703619, "grad_norm": 8.508200645446777, "learning_rate": 9.727587427367088e-06, "loss": 2.7213, "step": 473400 }, { "epoch": 0.12349741766273836, "grad_norm": 7.656301498413086, "learning_rate": 9.72731520649045e-06, "loss": 2.6225, "step": 473600 }, { "epoch": 0.12354957028844053, "grad_norm": 7.055719375610352, "learning_rate": 9.7270428534799e-06, "loss": 2.6489, "step": 473800 }, { "epoch": 0.1236017229141427, "grad_norm": 7.498048782348633, "learning_rate": 9.72677036834305e-06, "loss": 2.679, "step": 474000 }, { "epoch": 0.12365387553984487, "grad_norm": 8.528254508972168, "learning_rate": 9.726497751087522e-06, "loss": 2.6907, "step": 474200 }, { "epoch": 0.12370602816554703, "grad_norm": 8.69704818725586, "learning_rate": 9.72622500172093e-06, "loss": 2.666, "step": 474400 }, { "epoch": 0.1237581807912492, "grad_norm": 8.989028930664062, "learning_rate": 9.7259521202509e-06, "loss": 2.6891, "step": 474600 }, { "epoch": 0.12381033341695137, "grad_norm": 8.570834159851074, "learning_rate": 9.725679106685058e-06, "loss": 2.6665, "step": 474800 }, { "epoch": 0.12386248604265354, "grad_norm": 8.544633865356445, "learning_rate": 9.725405961031038e-06, "loss": 2.6161, "step": 475000 }, { "epoch": 0.12391463866835571, "grad_norm": 8.439756393432617, "learning_rate": 9.725132683296471e-06, "loss": 2.6956, "step": 475200 }, { "epoch": 0.12396679129405788, "grad_norm": 8.925586700439453, "learning_rate": 9.724859273488999e-06, "loss": 2.6371, "step": 475400 }, { "epoch": 0.12401894391976005, "grad_norm": 8.36025333404541, "learning_rate": 9.72458573161626e-06, "loss": 2.6451, "step": 475600 }, { "epoch": 0.12407109654546222, "grad_norm": 8.255962371826172, "learning_rate": 9.724312057685902e-06, "loss": 2.6251, "step": 475800 }, { "epoch": 0.1241232491711644, "grad_norm": 9.104443550109863, "learning_rate": 9.724038251705577e-06, "loss": 2.6458, "step": 476000 }, { "epoch": 0.12417540179686656, "grad_norm": 8.984846115112305, "learning_rate": 9.723764313682936e-06, "loss": 2.6515, "step": 476200 }, { "epoch": 0.12422755442256873, "grad_norm": 8.620625495910645, "learning_rate": 9.723490243625634e-06, "loss": 2.6752, "step": 476400 }, { "epoch": 0.1242797070482709, "grad_norm": 8.312873840332031, "learning_rate": 9.723216041541334e-06, "loss": 2.6998, "step": 476600 }, { "epoch": 0.12433185967397307, "grad_norm": 8.567025184631348, "learning_rate": 9.7229417074377e-06, "loss": 2.6366, "step": 476800 }, { "epoch": 0.12438401229967525, "grad_norm": 9.091941833496094, "learning_rate": 9.7226672413224e-06, "loss": 2.6761, "step": 477000 }, { "epoch": 0.12443616492537742, "grad_norm": 8.620004653930664, "learning_rate": 9.722392643203103e-06, "loss": 2.6256, "step": 477200 }, { "epoch": 0.12448831755107959, "grad_norm": 8.094307899475098, "learning_rate": 9.722117913087488e-06, "loss": 2.6586, "step": 477400 }, { "epoch": 0.12454047017678176, "grad_norm": 9.369007110595703, "learning_rate": 9.72184305098323e-06, "loss": 2.6223, "step": 477600 }, { "epoch": 0.12459262280248393, "grad_norm": 9.586237907409668, "learning_rate": 9.721568056898018e-06, "loss": 2.6983, "step": 477800 }, { "epoch": 0.1246447754281861, "grad_norm": 7.8790106773376465, "learning_rate": 9.721292930839532e-06, "loss": 2.6527, "step": 478000 }, { "epoch": 0.12469692805388827, "grad_norm": 9.248851776123047, "learning_rate": 9.721017672815467e-06, "loss": 2.6589, "step": 478200 }, { "epoch": 0.12474908067959044, "grad_norm": 9.875216484069824, "learning_rate": 9.720742282833513e-06, "loss": 2.6659, "step": 478400 }, { "epoch": 0.12480123330529261, "grad_norm": 9.6345796585083, "learning_rate": 9.720466760901368e-06, "loss": 2.676, "step": 478600 }, { "epoch": 0.12485338593099478, "grad_norm": 8.288617134094238, "learning_rate": 9.720191107026735e-06, "loss": 2.661, "step": 478800 }, { "epoch": 0.12490553855669695, "grad_norm": 9.437127113342285, "learning_rate": 9.719915321217319e-06, "loss": 2.6597, "step": 479000 }, { "epoch": 0.12495769118239912, "grad_norm": 8.890555381774902, "learning_rate": 9.719639403480827e-06, "loss": 2.6872, "step": 479200 }, { "epoch": 0.1250098438081013, "grad_norm": 8.575922966003418, "learning_rate": 9.71936335382497e-06, "loss": 2.6705, "step": 479400 }, { "epoch": 0.12506199643380345, "grad_norm": 8.43736457824707, "learning_rate": 9.719087172257468e-06, "loss": 2.6723, "step": 479600 }, { "epoch": 0.12511414905950563, "grad_norm": 7.534491539001465, "learning_rate": 9.71881085878604e-06, "loss": 2.6203, "step": 479800 }, { "epoch": 0.1251663016852078, "grad_norm": 9.604147911071777, "learning_rate": 9.718534413418404e-06, "loss": 2.6615, "step": 480000 }, { "epoch": 0.12521845431090997, "grad_norm": 8.846263885498047, "learning_rate": 9.71825783616229e-06, "loss": 2.6699, "step": 480200 }, { "epoch": 0.12527060693661213, "grad_norm": 8.537163734436035, "learning_rate": 9.71798112702543e-06, "loss": 2.6123, "step": 480400 }, { "epoch": 0.1253227595623143, "grad_norm": 9.015103340148926, "learning_rate": 9.71770428601556e-06, "loss": 2.6473, "step": 480600 }, { "epoch": 0.12537491218801647, "grad_norm": 8.286890029907227, "learning_rate": 9.717427313140412e-06, "loss": 2.6601, "step": 480800 }, { "epoch": 0.12542706481371865, "grad_norm": 8.386801719665527, "learning_rate": 9.717150208407733e-06, "loss": 2.6411, "step": 481000 }, { "epoch": 0.1254792174394208, "grad_norm": 9.405022621154785, "learning_rate": 9.716872971825265e-06, "loss": 2.6861, "step": 481200 }, { "epoch": 0.125531370065123, "grad_norm": 9.213386535644531, "learning_rate": 9.71659560340076e-06, "loss": 2.6431, "step": 481400 }, { "epoch": 0.12558352269082515, "grad_norm": 9.34145736694336, "learning_rate": 9.71631810314197e-06, "loss": 2.6649, "step": 481600 }, { "epoch": 0.12563567531652733, "grad_norm": 9.355376243591309, "learning_rate": 9.71604047105665e-06, "loss": 2.6769, "step": 481800 }, { "epoch": 0.1256878279422295, "grad_norm": 9.561544418334961, "learning_rate": 9.715762707152561e-06, "loss": 2.6069, "step": 482000 }, { "epoch": 0.12573998056793168, "grad_norm": 9.205405235290527, "learning_rate": 9.715484811437468e-06, "loss": 2.6307, "step": 482200 }, { "epoch": 0.12579213319363383, "grad_norm": 9.112932205200195, "learning_rate": 9.715206783919136e-06, "loss": 2.6701, "step": 482400 }, { "epoch": 0.12584428581933602, "grad_norm": 8.555209159851074, "learning_rate": 9.714928624605337e-06, "loss": 2.6885, "step": 482600 }, { "epoch": 0.12589643844503817, "grad_norm": 8.725874900817871, "learning_rate": 9.714650333503848e-06, "loss": 2.663, "step": 482800 }, { "epoch": 0.12594859107074036, "grad_norm": 8.564589500427246, "learning_rate": 9.714371910622445e-06, "loss": 2.6496, "step": 483000 }, { "epoch": 0.1260007436964425, "grad_norm": 9.859867095947266, "learning_rate": 9.714093355968913e-06, "loss": 2.6103, "step": 483200 }, { "epoch": 0.12605289632214467, "grad_norm": 8.561869621276855, "learning_rate": 9.713814669551034e-06, "loss": 2.6476, "step": 483400 }, { "epoch": 0.12610504894784685, "grad_norm": 9.05595874786377, "learning_rate": 9.7135358513766e-06, "loss": 2.6854, "step": 483600 }, { "epoch": 0.126157201573549, "grad_norm": 8.561729431152344, "learning_rate": 9.713256901453405e-06, "loss": 2.6784, "step": 483800 }, { "epoch": 0.1262093541992512, "grad_norm": 9.193467140197754, "learning_rate": 9.712977819789244e-06, "loss": 2.6623, "step": 484000 }, { "epoch": 0.12626150682495335, "grad_norm": 8.660527229309082, "learning_rate": 9.71269860639192e-06, "loss": 2.682, "step": 484200 }, { "epoch": 0.12631365945065554, "grad_norm": 8.973142623901367, "learning_rate": 9.712419261269234e-06, "loss": 2.6837, "step": 484400 }, { "epoch": 0.1263658120763577, "grad_norm": 9.378042221069336, "learning_rate": 9.712139784428998e-06, "loss": 2.6662, "step": 484600 }, { "epoch": 0.12641796470205988, "grad_norm": 7.6433424949646, "learning_rate": 9.71186017587902e-06, "loss": 2.6865, "step": 484800 }, { "epoch": 0.12647011732776203, "grad_norm": 8.966972351074219, "learning_rate": 9.711580435627118e-06, "loss": 2.671, "step": 485000 }, { "epoch": 0.12652226995346422, "grad_norm": 9.110841751098633, "learning_rate": 9.71130056368111e-06, "loss": 2.6932, "step": 485200 }, { "epoch": 0.12657442257916637, "grad_norm": 8.476582527160645, "learning_rate": 9.711020560048819e-06, "loss": 2.6634, "step": 485400 }, { "epoch": 0.12662657520486856, "grad_norm": 8.898265838623047, "learning_rate": 9.710740424738072e-06, "loss": 2.6732, "step": 485600 }, { "epoch": 0.12667872783057071, "grad_norm": 9.571182250976562, "learning_rate": 9.710460157756698e-06, "loss": 2.6777, "step": 485800 }, { "epoch": 0.1267308804562729, "grad_norm": 9.099579811096191, "learning_rate": 9.710179759112531e-06, "loss": 2.6837, "step": 486000 }, { "epoch": 0.12678303308197506, "grad_norm": 8.9638671875, "learning_rate": 9.709899228813408e-06, "loss": 2.6928, "step": 486200 }, { "epoch": 0.12683518570767724, "grad_norm": 9.293763160705566, "learning_rate": 9.709618566867173e-06, "loss": 2.6583, "step": 486400 }, { "epoch": 0.1268873383333794, "grad_norm": 7.878268241882324, "learning_rate": 9.709337773281666e-06, "loss": 2.6466, "step": 486600 }, { "epoch": 0.12693949095908158, "grad_norm": 8.64033317565918, "learning_rate": 9.70905684806474e-06, "loss": 2.6509, "step": 486800 }, { "epoch": 0.12699164358478374, "grad_norm": 9.131351470947266, "learning_rate": 9.708775791224246e-06, "loss": 2.6382, "step": 487000 }, { "epoch": 0.12704379621048592, "grad_norm": 8.987448692321777, "learning_rate": 9.708494602768038e-06, "loss": 2.6429, "step": 487200 }, { "epoch": 0.12709594883618808, "grad_norm": 9.501585960388184, "learning_rate": 9.708213282703975e-06, "loss": 2.6805, "step": 487400 }, { "epoch": 0.12714810146189026, "grad_norm": 9.123064994812012, "learning_rate": 9.707931831039926e-06, "loss": 2.6948, "step": 487600 }, { "epoch": 0.12720025408759242, "grad_norm": 9.532994270324707, "learning_rate": 9.70765024778375e-06, "loss": 2.6968, "step": 487800 }, { "epoch": 0.1272524067132946, "grad_norm": 9.438848495483398, "learning_rate": 9.707368532943324e-06, "loss": 2.669, "step": 488000 }, { "epoch": 0.12730455933899676, "grad_norm": 9.121577262878418, "learning_rate": 9.707086686526518e-06, "loss": 2.6328, "step": 488200 }, { "epoch": 0.12735671196469894, "grad_norm": 9.110394477844238, "learning_rate": 9.706804708541211e-06, "loss": 2.6712, "step": 488400 }, { "epoch": 0.1274088645904011, "grad_norm": 7.500752925872803, "learning_rate": 9.706522598995288e-06, "loss": 2.6645, "step": 488600 }, { "epoch": 0.12746101721610328, "grad_norm": 9.652029037475586, "learning_rate": 9.706240357896631e-06, "loss": 2.6274, "step": 488800 }, { "epoch": 0.12751316984180544, "grad_norm": 9.1790189743042, "learning_rate": 9.705957985253128e-06, "loss": 2.6722, "step": 489000 }, { "epoch": 0.1275653224675076, "grad_norm": 10.447507858276367, "learning_rate": 9.705675481072673e-06, "loss": 2.6685, "step": 489200 }, { "epoch": 0.12761747509320978, "grad_norm": 9.254826545715332, "learning_rate": 9.705392845363164e-06, "loss": 2.6453, "step": 489400 }, { "epoch": 0.12766962771891194, "grad_norm": 8.488991737365723, "learning_rate": 9.705110078132498e-06, "loss": 2.6604, "step": 489600 }, { "epoch": 0.12772178034461412, "grad_norm": 8.976917266845703, "learning_rate": 9.704827179388581e-06, "loss": 2.6436, "step": 489800 }, { "epoch": 0.12777393297031628, "grad_norm": 9.096659660339355, "learning_rate": 9.704544149139319e-06, "loss": 2.6794, "step": 490000 }, { "epoch": 0.12782608559601846, "grad_norm": 9.129677772521973, "learning_rate": 9.704260987392624e-06, "loss": 2.6928, "step": 490200 }, { "epoch": 0.12787823822172062, "grad_norm": 8.751590728759766, "learning_rate": 9.70397769415641e-06, "loss": 2.6381, "step": 490400 }, { "epoch": 0.1279303908474228, "grad_norm": 9.134084701538086, "learning_rate": 9.703694269438596e-06, "loss": 2.6475, "step": 490600 }, { "epoch": 0.12798254347312496, "grad_norm": 9.106485366821289, "learning_rate": 9.703410713247102e-06, "loss": 2.6657, "step": 490800 }, { "epoch": 0.12803469609882714, "grad_norm": 8.137367248535156, "learning_rate": 9.703127025589856e-06, "loss": 2.6392, "step": 491000 }, { "epoch": 0.1280868487245293, "grad_norm": 8.145878791809082, "learning_rate": 9.702843206474788e-06, "loss": 2.6477, "step": 491200 }, { "epoch": 0.12813900135023149, "grad_norm": 8.465982437133789, "learning_rate": 9.70255925590983e-06, "loss": 2.6539, "step": 491400 }, { "epoch": 0.12819115397593364, "grad_norm": 8.704390525817871, "learning_rate": 9.702275173902915e-06, "loss": 2.656, "step": 491600 }, { "epoch": 0.12824330660163583, "grad_norm": 9.234391212463379, "learning_rate": 9.70199096046199e-06, "loss": 2.6496, "step": 491800 }, { "epoch": 0.12829545922733798, "grad_norm": 8.896957397460938, "learning_rate": 9.701706615594996e-06, "loss": 2.5983, "step": 492000 }, { "epoch": 0.12834761185304017, "grad_norm": 8.498276710510254, "learning_rate": 9.70142213930988e-06, "loss": 2.6514, "step": 492200 }, { "epoch": 0.12839976447874232, "grad_norm": 8.999076843261719, "learning_rate": 9.701137531614595e-06, "loss": 2.7108, "step": 492400 }, { "epoch": 0.1284519171044445, "grad_norm": 8.710136413574219, "learning_rate": 9.700852792517095e-06, "loss": 2.6373, "step": 492600 }, { "epoch": 0.12850406973014666, "grad_norm": 8.627429008483887, "learning_rate": 9.70056792202534e-06, "loss": 2.6688, "step": 492800 }, { "epoch": 0.12855622235584885, "grad_norm": 8.266898155212402, "learning_rate": 9.700282920147288e-06, "loss": 2.6819, "step": 493000 }, { "epoch": 0.128608374981551, "grad_norm": 8.928200721740723, "learning_rate": 9.699997786890911e-06, "loss": 2.6685, "step": 493200 }, { "epoch": 0.1286605276072532, "grad_norm": 9.263498306274414, "learning_rate": 9.699712522264179e-06, "loss": 2.6548, "step": 493400 }, { "epoch": 0.12871268023295535, "grad_norm": 8.80411434173584, "learning_rate": 9.69942712627506e-06, "loss": 2.6194, "step": 493600 }, { "epoch": 0.12876483285865753, "grad_norm": 9.677428245544434, "learning_rate": 9.699141598931536e-06, "loss": 2.6491, "step": 493800 }, { "epoch": 0.1288169854843597, "grad_norm": 8.916584968566895, "learning_rate": 9.698855940241584e-06, "loss": 2.6393, "step": 494000 }, { "epoch": 0.12886913811006187, "grad_norm": 8.620122909545898, "learning_rate": 9.698570150213192e-06, "loss": 2.7015, "step": 494200 }, { "epoch": 0.12892129073576403, "grad_norm": 9.354145050048828, "learning_rate": 9.698284228854346e-06, "loss": 2.6735, "step": 494400 }, { "epoch": 0.1289734433614662, "grad_norm": 9.221833229064941, "learning_rate": 9.69799817617304e-06, "loss": 2.639, "step": 494600 }, { "epoch": 0.12902559598716837, "grad_norm": 8.699933052062988, "learning_rate": 9.697711992177266e-06, "loss": 2.6933, "step": 494800 }, { "epoch": 0.12907774861287052, "grad_norm": 8.266427993774414, "learning_rate": 9.697425676875026e-06, "loss": 2.6265, "step": 495000 }, { "epoch": 0.1291299012385727, "grad_norm": 9.16121768951416, "learning_rate": 9.697139230274322e-06, "loss": 2.6737, "step": 495200 }, { "epoch": 0.12918205386427487, "grad_norm": 9.327274322509766, "learning_rate": 9.69685265238316e-06, "loss": 2.6599, "step": 495400 }, { "epoch": 0.12923420648997705, "grad_norm": 8.239920616149902, "learning_rate": 9.696565943209551e-06, "loss": 2.6745, "step": 495600 }, { "epoch": 0.1292863591156792, "grad_norm": 10.874256134033203, "learning_rate": 9.696279102761508e-06, "loss": 2.639, "step": 495800 }, { "epoch": 0.1293385117413814, "grad_norm": 8.510387420654297, "learning_rate": 9.69599213104705e-06, "loss": 2.6401, "step": 496000 }, { "epoch": 0.12939066436708355, "grad_norm": 9.092486381530762, "learning_rate": 9.695705028074197e-06, "loss": 2.6394, "step": 496200 }, { "epoch": 0.12944281699278573, "grad_norm": 9.543620109558105, "learning_rate": 9.695417793850974e-06, "loss": 2.6777, "step": 496400 }, { "epoch": 0.1294949696184879, "grad_norm": 8.863883018493652, "learning_rate": 9.69513042838541e-06, "loss": 2.6614, "step": 496600 }, { "epoch": 0.12954712224419007, "grad_norm": 8.839035034179688, "learning_rate": 9.694842931685536e-06, "loss": 2.6575, "step": 496800 }, { "epoch": 0.12959927486989223, "grad_norm": 9.193402290344238, "learning_rate": 9.69455530375939e-06, "loss": 2.676, "step": 497000 }, { "epoch": 0.1296514274955944, "grad_norm": 8.699831008911133, "learning_rate": 9.69426754461501e-06, "loss": 2.654, "step": 497200 }, { "epoch": 0.12970358012129657, "grad_norm": 9.69882869720459, "learning_rate": 9.693979654260439e-06, "loss": 2.6663, "step": 497400 }, { "epoch": 0.12975573274699875, "grad_norm": 10.164560317993164, "learning_rate": 9.693691632703724e-06, "loss": 2.671, "step": 497600 }, { "epoch": 0.1298078853727009, "grad_norm": 9.261726379394531, "learning_rate": 9.693403479952914e-06, "loss": 2.6322, "step": 497800 }, { "epoch": 0.1298600379984031, "grad_norm": 8.697185516357422, "learning_rate": 9.693115196016068e-06, "loss": 2.6649, "step": 498000 }, { "epoch": 0.12991219062410525, "grad_norm": 9.236394882202148, "learning_rate": 9.69282678090124e-06, "loss": 2.6507, "step": 498200 }, { "epoch": 0.12996434324980743, "grad_norm": 9.209188461303711, "learning_rate": 9.692538234616491e-06, "loss": 2.6858, "step": 498400 }, { "epoch": 0.1300164958755096, "grad_norm": 7.591978073120117, "learning_rate": 9.69224955716989e-06, "loss": 2.6934, "step": 498600 }, { "epoch": 0.13006864850121178, "grad_norm": 9.55863094329834, "learning_rate": 9.691960748569503e-06, "loss": 2.6473, "step": 498800 }, { "epoch": 0.13012080112691393, "grad_norm": 9.636316299438477, "learning_rate": 9.691671808823403e-06, "loss": 2.7021, "step": 499000 }, { "epoch": 0.13017295375261612, "grad_norm": 9.680166244506836, "learning_rate": 9.691382737939665e-06, "loss": 2.6483, "step": 499200 }, { "epoch": 0.13022510637831827, "grad_norm": 8.427440643310547, "learning_rate": 9.69109353592637e-06, "loss": 2.6132, "step": 499400 }, { "epoch": 0.13027725900402046, "grad_norm": 9.360069274902344, "learning_rate": 9.690804202791603e-06, "loss": 2.6739, "step": 499600 }, { "epoch": 0.1303294116297226, "grad_norm": 8.880375862121582, "learning_rate": 9.69051473854345e-06, "loss": 2.6774, "step": 499800 }, { "epoch": 0.1303815642554248, "grad_norm": 8.440618515014648, "learning_rate": 9.69022514319e-06, "loss": 2.6508, "step": 500000 }, { "epoch": 0.13043371688112695, "grad_norm": 9.217138290405273, "learning_rate": 9.68993541673935e-06, "loss": 2.6629, "step": 500200 }, { "epoch": 0.13048586950682914, "grad_norm": 9.152853012084961, "learning_rate": 9.6896455591996e-06, "loss": 2.6395, "step": 500400 }, { "epoch": 0.1305380221325313, "grad_norm": 9.89096736907959, "learning_rate": 9.689355570578845e-06, "loss": 2.6171, "step": 500600 }, { "epoch": 0.13059017475823345, "grad_norm": 8.032015800476074, "learning_rate": 9.689065450885198e-06, "loss": 2.6471, "step": 500800 }, { "epoch": 0.13064232738393564, "grad_norm": 8.966907501220703, "learning_rate": 9.688775200126763e-06, "loss": 2.6885, "step": 501000 }, { "epoch": 0.1306944800096378, "grad_norm": 9.194870948791504, "learning_rate": 9.688484818311654e-06, "loss": 2.6357, "step": 501200 }, { "epoch": 0.13074663263533998, "grad_norm": 10.05204963684082, "learning_rate": 9.68819430544799e-06, "loss": 2.6813, "step": 501400 }, { "epoch": 0.13079878526104213, "grad_norm": 9.8402738571167, "learning_rate": 9.68790366154389e-06, "loss": 2.6826, "step": 501600 }, { "epoch": 0.13085093788674432, "grad_norm": 8.931715965270996, "learning_rate": 9.687612886607477e-06, "loss": 2.6638, "step": 501800 }, { "epoch": 0.13090309051244647, "grad_norm": 9.182944297790527, "learning_rate": 9.687321980646877e-06, "loss": 2.6094, "step": 502000 }, { "epoch": 0.13095524313814866, "grad_norm": 8.474428176879883, "learning_rate": 9.687030943670224e-06, "loss": 2.6104, "step": 502200 }, { "epoch": 0.13100739576385081, "grad_norm": 9.128947257995605, "learning_rate": 9.686739775685653e-06, "loss": 2.6591, "step": 502400 }, { "epoch": 0.131059548389553, "grad_norm": 8.498746871948242, "learning_rate": 9.6864484767013e-06, "loss": 2.6751, "step": 502600 }, { "epoch": 0.13111170101525516, "grad_norm": 9.038972854614258, "learning_rate": 9.686157046725308e-06, "loss": 2.6486, "step": 502800 }, { "epoch": 0.13116385364095734, "grad_norm": 7.748460292816162, "learning_rate": 9.685865485765824e-06, "loss": 2.6387, "step": 503000 }, { "epoch": 0.1312160062666595, "grad_norm": 9.406355857849121, "learning_rate": 9.685573793830996e-06, "loss": 2.66, "step": 503200 }, { "epoch": 0.13126815889236168, "grad_norm": 10.351799011230469, "learning_rate": 9.68528197092898e-06, "loss": 2.6431, "step": 503400 }, { "epoch": 0.13132031151806384, "grad_norm": 10.22693920135498, "learning_rate": 9.684990017067928e-06, "loss": 2.6585, "step": 503600 }, { "epoch": 0.13137246414376602, "grad_norm": 9.691548347473145, "learning_rate": 9.684697932256004e-06, "loss": 2.6583, "step": 503800 }, { "epoch": 0.13142461676946818, "grad_norm": 9.202975273132324, "learning_rate": 9.68440571650137e-06, "loss": 2.6798, "step": 504000 }, { "epoch": 0.13147676939517036, "grad_norm": 10.292789459228516, "learning_rate": 9.684113369812198e-06, "loss": 2.6643, "step": 504200 }, { "epoch": 0.13152892202087252, "grad_norm": 7.787657737731934, "learning_rate": 9.683820892196654e-06, "loss": 2.631, "step": 504400 }, { "epoch": 0.1315810746465747, "grad_norm": 8.93878173828125, "learning_rate": 9.683528283662916e-06, "loss": 2.6179, "step": 504600 }, { "epoch": 0.13163322727227686, "grad_norm": 7.960376739501953, "learning_rate": 9.683235544219161e-06, "loss": 2.6363, "step": 504800 }, { "epoch": 0.13168537989797904, "grad_norm": 8.953857421875, "learning_rate": 9.682942673873572e-06, "loss": 2.642, "step": 505000 }, { "epoch": 0.1317375325236812, "grad_norm": 8.703145027160645, "learning_rate": 9.682649672634337e-06, "loss": 2.639, "step": 505200 }, { "epoch": 0.13178968514938338, "grad_norm": 9.463846206665039, "learning_rate": 9.682356540509645e-06, "loss": 2.6506, "step": 505400 }, { "epoch": 0.13184183777508554, "grad_norm": 9.317432403564453, "learning_rate": 9.68206327750769e-06, "loss": 2.6546, "step": 505600 }, { "epoch": 0.13189399040078773, "grad_norm": 8.97231674194336, "learning_rate": 9.681769883636664e-06, "loss": 2.6196, "step": 505800 }, { "epoch": 0.13194614302648988, "grad_norm": 9.235013961791992, "learning_rate": 9.681476358904773e-06, "loss": 2.6777, "step": 506000 }, { "epoch": 0.13199829565219207, "grad_norm": 9.837597846984863, "learning_rate": 9.68118270332022e-06, "loss": 2.6295, "step": 506200 }, { "epoch": 0.13205044827789422, "grad_norm": 9.842741966247559, "learning_rate": 9.680888916891213e-06, "loss": 2.6498, "step": 506400 }, { "epoch": 0.1321026009035964, "grad_norm": 9.616024017333984, "learning_rate": 9.680594999625964e-06, "loss": 2.6751, "step": 506600 }, { "epoch": 0.13215475352929856, "grad_norm": 8.86656665802002, "learning_rate": 9.680300951532686e-06, "loss": 2.6171, "step": 506800 }, { "epoch": 0.13220690615500072, "grad_norm": 8.192331314086914, "learning_rate": 9.6800067726196e-06, "loss": 2.6294, "step": 507000 }, { "epoch": 0.1322590587807029, "grad_norm": 8.469772338867188, "learning_rate": 9.679712462894931e-06, "loss": 2.6976, "step": 507200 }, { "epoch": 0.13231121140640506, "grad_norm": 7.870084762573242, "learning_rate": 9.679418022366903e-06, "loss": 2.6391, "step": 507400 }, { "epoch": 0.13236336403210724, "grad_norm": 9.129673957824707, "learning_rate": 9.679123451043743e-06, "loss": 2.6854, "step": 507600 }, { "epoch": 0.1324155166578094, "grad_norm": 9.1187744140625, "learning_rate": 9.678828748933689e-06, "loss": 2.6234, "step": 507800 }, { "epoch": 0.13246766928351159, "grad_norm": 9.26366901397705, "learning_rate": 9.678533916044974e-06, "loss": 2.64, "step": 508000 }, { "epoch": 0.13251982190921374, "grad_norm": 8.143036842346191, "learning_rate": 9.678238952385844e-06, "loss": 2.6068, "step": 508200 }, { "epoch": 0.13257197453491593, "grad_norm": 9.583711624145508, "learning_rate": 9.67794385796454e-06, "loss": 2.6514, "step": 508400 }, { "epoch": 0.13262412716061808, "grad_norm": 8.947481155395508, "learning_rate": 9.67764863278931e-06, "loss": 2.6757, "step": 508600 }, { "epoch": 0.13267627978632027, "grad_norm": 9.801794052124023, "learning_rate": 9.67735327686841e-06, "loss": 2.6304, "step": 508800 }, { "epoch": 0.13272843241202242, "grad_norm": 8.388129234313965, "learning_rate": 9.677057790210093e-06, "loss": 2.6435, "step": 509000 }, { "epoch": 0.1327805850377246, "grad_norm": 9.845732688903809, "learning_rate": 9.676762172822615e-06, "loss": 2.6488, "step": 509200 }, { "epoch": 0.13283273766342676, "grad_norm": 8.10527515411377, "learning_rate": 9.676466424714244e-06, "loss": 2.6525, "step": 509400 }, { "epoch": 0.13288489028912895, "grad_norm": 9.184629440307617, "learning_rate": 9.676170545893245e-06, "loss": 2.62, "step": 509600 }, { "epoch": 0.1329370429148311, "grad_norm": 9.896688461303711, "learning_rate": 9.675874536367883e-06, "loss": 2.6222, "step": 509800 }, { "epoch": 0.1329891955405333, "grad_norm": 9.492822647094727, "learning_rate": 9.675578396146441e-06, "loss": 2.6601, "step": 510000 }, { "epoch": 0.13304134816623545, "grad_norm": 8.227307319641113, "learning_rate": 9.67528212523719e-06, "loss": 2.6555, "step": 510200 }, { "epoch": 0.13309350079193763, "grad_norm": 8.036877632141113, "learning_rate": 9.674985723648413e-06, "loss": 2.648, "step": 510400 }, { "epoch": 0.1331456534176398, "grad_norm": 10.020105361938477, "learning_rate": 9.674689191388393e-06, "loss": 2.6916, "step": 510600 }, { "epoch": 0.13319780604334197, "grad_norm": 9.248919486999512, "learning_rate": 9.674392528465421e-06, "loss": 2.621, "step": 510800 }, { "epoch": 0.13324995866904413, "grad_norm": 10.11646842956543, "learning_rate": 9.67409573488779e-06, "loss": 2.6674, "step": 511000 }, { "epoch": 0.1333021112947463, "grad_norm": 8.71399974822998, "learning_rate": 9.67379881066379e-06, "loss": 2.6399, "step": 511200 }, { "epoch": 0.13335426392044847, "grad_norm": 9.745195388793945, "learning_rate": 9.673501755801726e-06, "loss": 2.6376, "step": 511400 }, { "epoch": 0.13340641654615065, "grad_norm": 9.938071250915527, "learning_rate": 9.673204570309899e-06, "loss": 2.6562, "step": 511600 }, { "epoch": 0.1334585691718528, "grad_norm": 9.505668640136719, "learning_rate": 9.672907254196618e-06, "loss": 2.6752, "step": 511800 }, { "epoch": 0.133510721797555, "grad_norm": 9.43238353729248, "learning_rate": 9.672609807470187e-06, "loss": 2.6658, "step": 512000 }, { "epoch": 0.13356287442325715, "grad_norm": 9.957179069519043, "learning_rate": 9.672312230138926e-06, "loss": 2.6154, "step": 512200 }, { "epoch": 0.13361502704895933, "grad_norm": 10.3068265914917, "learning_rate": 9.672014522211153e-06, "loss": 2.6855, "step": 512400 }, { "epoch": 0.1336671796746615, "grad_norm": 10.027313232421875, "learning_rate": 9.671716683695184e-06, "loss": 2.6288, "step": 512600 }, { "epoch": 0.13371933230036365, "grad_norm": 8.177754402160645, "learning_rate": 9.67141871459935e-06, "loss": 2.6548, "step": 512800 }, { "epoch": 0.13377148492606583, "grad_norm": 7.8090500831604, "learning_rate": 9.671120614931975e-06, "loss": 2.6628, "step": 513000 }, { "epoch": 0.133823637551768, "grad_norm": 9.168340682983398, "learning_rate": 9.670822384701393e-06, "loss": 2.6837, "step": 513200 }, { "epoch": 0.13387579017747017, "grad_norm": 9.05853271484375, "learning_rate": 9.670524023915939e-06, "loss": 2.619, "step": 513400 }, { "epoch": 0.13392794280317233, "grad_norm": 9.077415466308594, "learning_rate": 9.670225532583954e-06, "loss": 2.6538, "step": 513600 }, { "epoch": 0.1339800954288745, "grad_norm": 10.212859153747559, "learning_rate": 9.66992691071378e-06, "loss": 2.6305, "step": 513800 }, { "epoch": 0.13403224805457667, "grad_norm": 8.626009941101074, "learning_rate": 9.669628158313764e-06, "loss": 2.6326, "step": 514000 }, { "epoch": 0.13408440068027885, "grad_norm": 9.030853271484375, "learning_rate": 9.669329275392256e-06, "loss": 2.6567, "step": 514200 }, { "epoch": 0.134136553305981, "grad_norm": 10.016151428222656, "learning_rate": 9.669030261957613e-06, "loss": 2.6399, "step": 514400 }, { "epoch": 0.1341887059316832, "grad_norm": 9.281685829162598, "learning_rate": 9.668731118018189e-06, "loss": 2.6532, "step": 514600 }, { "epoch": 0.13424085855738535, "grad_norm": 9.473543167114258, "learning_rate": 9.668431843582347e-06, "loss": 2.6171, "step": 514800 }, { "epoch": 0.13429301118308754, "grad_norm": 11.197412490844727, "learning_rate": 9.668132438658452e-06, "loss": 2.6532, "step": 515000 }, { "epoch": 0.1343451638087897, "grad_norm": 8.542428016662598, "learning_rate": 9.667832903254873e-06, "loss": 2.6227, "step": 515200 }, { "epoch": 0.13439731643449188, "grad_norm": 8.935531616210938, "learning_rate": 9.667533237379983e-06, "loss": 2.663, "step": 515400 }, { "epoch": 0.13444946906019403, "grad_norm": 8.45200252532959, "learning_rate": 9.667233441042156e-06, "loss": 2.6751, "step": 515600 }, { "epoch": 0.13450162168589622, "grad_norm": 10.14538288116455, "learning_rate": 9.666933514249773e-06, "loss": 2.6057, "step": 515800 }, { "epoch": 0.13455377431159837, "grad_norm": 9.2836332321167, "learning_rate": 9.666633457011216e-06, "loss": 2.6303, "step": 516000 }, { "epoch": 0.13460592693730056, "grad_norm": 10.366707801818848, "learning_rate": 9.666333269334876e-06, "loss": 2.693, "step": 516200 }, { "epoch": 0.1346580795630027, "grad_norm": 10.014763832092285, "learning_rate": 9.666032951229138e-06, "loss": 2.6208, "step": 516400 }, { "epoch": 0.1347102321887049, "grad_norm": 8.867531776428223, "learning_rate": 9.665732502702401e-06, "loss": 2.6653, "step": 516600 }, { "epoch": 0.13476238481440705, "grad_norm": 9.882331848144531, "learning_rate": 9.665431923763059e-06, "loss": 2.6914, "step": 516800 }, { "epoch": 0.13481453744010924, "grad_norm": 9.857499122619629, "learning_rate": 9.665131214419516e-06, "loss": 2.6641, "step": 517000 }, { "epoch": 0.1348666900658114, "grad_norm": 9.31207275390625, "learning_rate": 9.664830374680176e-06, "loss": 2.6546, "step": 517200 }, { "epoch": 0.13491884269151358, "grad_norm": 10.368709564208984, "learning_rate": 9.664529404553449e-06, "loss": 2.6633, "step": 517400 }, { "epoch": 0.13497099531721574, "grad_norm": 9.503002166748047, "learning_rate": 9.664228304047746e-06, "loss": 2.6746, "step": 517600 }, { "epoch": 0.13502314794291792, "grad_norm": 8.640365600585938, "learning_rate": 9.663927073171485e-06, "loss": 2.6339, "step": 517800 }, { "epoch": 0.13507530056862008, "grad_norm": 10.261897087097168, "learning_rate": 9.663625711933083e-06, "loss": 2.6787, "step": 518000 }, { "epoch": 0.13512745319432226, "grad_norm": 10.067601203918457, "learning_rate": 9.663324220340964e-06, "loss": 2.6697, "step": 518200 }, { "epoch": 0.13517960582002442, "grad_norm": 8.839470863342285, "learning_rate": 9.66302259840356e-06, "loss": 2.6629, "step": 518400 }, { "epoch": 0.13523175844572657, "grad_norm": 9.251420974731445, "learning_rate": 9.662720846129295e-06, "loss": 2.6574, "step": 518600 }, { "epoch": 0.13528391107142876, "grad_norm": 9.32863998413086, "learning_rate": 9.662418963526605e-06, "loss": 2.6484, "step": 518800 }, { "epoch": 0.13533606369713091, "grad_norm": 9.35538387298584, "learning_rate": 9.662116950603932e-06, "loss": 2.6542, "step": 519000 }, { "epoch": 0.1353882163228331, "grad_norm": 9.882723808288574, "learning_rate": 9.661814807369713e-06, "loss": 2.6853, "step": 519200 }, { "epoch": 0.13544036894853526, "grad_norm": 9.342474937438965, "learning_rate": 9.661512533832395e-06, "loss": 2.6694, "step": 519400 }, { "epoch": 0.13549252157423744, "grad_norm": 8.824580192565918, "learning_rate": 9.661210130000425e-06, "loss": 2.6505, "step": 519600 }, { "epoch": 0.1355446741999396, "grad_norm": 9.28695011138916, "learning_rate": 9.660907595882261e-06, "loss": 2.6744, "step": 519800 }, { "epoch": 0.13559682682564178, "grad_norm": 9.04250431060791, "learning_rate": 9.660604931486353e-06, "loss": 2.6515, "step": 520000 }, { "epoch": 0.13564897945134394, "grad_norm": 10.339582443237305, "learning_rate": 9.660302136821163e-06, "loss": 2.6614, "step": 520200 }, { "epoch": 0.13570113207704612, "grad_norm": 8.636717796325684, "learning_rate": 9.659999211895156e-06, "loss": 2.6249, "step": 520400 }, { "epoch": 0.13575328470274828, "grad_norm": 8.852993965148926, "learning_rate": 9.659696156716797e-06, "loss": 2.6207, "step": 520600 }, { "epoch": 0.13580543732845046, "grad_norm": 9.094849586486816, "learning_rate": 9.659392971294558e-06, "loss": 2.6388, "step": 520800 }, { "epoch": 0.13585758995415262, "grad_norm": 8.45062255859375, "learning_rate": 9.659089655636914e-06, "loss": 2.6394, "step": 521000 }, { "epoch": 0.1359097425798548, "grad_norm": 10.011489868164062, "learning_rate": 9.65878620975234e-06, "loss": 2.6376, "step": 521200 }, { "epoch": 0.13596189520555696, "grad_norm": 9.696266174316406, "learning_rate": 9.65848263364932e-06, "loss": 2.6553, "step": 521400 }, { "epoch": 0.13601404783125914, "grad_norm": 8.92795181274414, "learning_rate": 9.65817892733634e-06, "loss": 2.6496, "step": 521600 }, { "epoch": 0.1360662004569613, "grad_norm": 9.415807723999023, "learning_rate": 9.657875090821886e-06, "loss": 2.5864, "step": 521800 }, { "epoch": 0.13611835308266348, "grad_norm": 10.63681697845459, "learning_rate": 9.657571124114455e-06, "loss": 2.6961, "step": 522000 }, { "epoch": 0.13617050570836564, "grad_norm": 9.53947639465332, "learning_rate": 9.657267027222539e-06, "loss": 2.6501, "step": 522200 }, { "epoch": 0.13622265833406783, "grad_norm": 10.805290222167969, "learning_rate": 9.656962800154641e-06, "loss": 2.6548, "step": 522400 }, { "epoch": 0.13627481095976998, "grad_norm": 7.7652764320373535, "learning_rate": 9.656658442919261e-06, "loss": 2.6244, "step": 522600 }, { "epoch": 0.13632696358547217, "grad_norm": 9.094009399414062, "learning_rate": 9.65635395552491e-06, "loss": 2.6543, "step": 522800 }, { "epoch": 0.13637911621117432, "grad_norm": 9.510210990905762, "learning_rate": 9.656049337980096e-06, "loss": 2.6013, "step": 523000 }, { "epoch": 0.1364312688368765, "grad_norm": 9.347325325012207, "learning_rate": 9.655744590293334e-06, "loss": 2.617, "step": 523200 }, { "epoch": 0.13648342146257866, "grad_norm": 10.486848831176758, "learning_rate": 9.655439712473143e-06, "loss": 2.6461, "step": 523400 }, { "epoch": 0.13653557408828085, "grad_norm": 9.532522201538086, "learning_rate": 9.655134704528044e-06, "loss": 2.6238, "step": 523600 }, { "epoch": 0.136587726713983, "grad_norm": 9.079800605773926, "learning_rate": 9.654829566466564e-06, "loss": 2.6561, "step": 523800 }, { "epoch": 0.1366398793396852, "grad_norm": 9.900603294372559, "learning_rate": 9.654524298297228e-06, "loss": 2.6736, "step": 524000 }, { "epoch": 0.13669203196538735, "grad_norm": 10.008954048156738, "learning_rate": 9.65421890002857e-06, "loss": 2.5807, "step": 524200 }, { "epoch": 0.1367441845910895, "grad_norm": 9.736871719360352, "learning_rate": 9.653913371669131e-06, "loss": 2.6297, "step": 524400 }, { "epoch": 0.13679633721679169, "grad_norm": 9.787025451660156, "learning_rate": 9.653607713227445e-06, "loss": 2.6355, "step": 524600 }, { "epoch": 0.13684848984249384, "grad_norm": 9.512006759643555, "learning_rate": 9.65330192471206e-06, "loss": 2.6131, "step": 524800 }, { "epoch": 0.13690064246819603, "grad_norm": 9.525954246520996, "learning_rate": 9.652996006131517e-06, "loss": 2.6761, "step": 525000 }, { "epoch": 0.13695279509389818, "grad_norm": 9.713210105895996, "learning_rate": 9.652689957494371e-06, "loss": 2.6427, "step": 525200 }, { "epoch": 0.13700494771960037, "grad_norm": 7.939797878265381, "learning_rate": 9.652383778809178e-06, "loss": 2.6448, "step": 525400 }, { "epoch": 0.13705710034530252, "grad_norm": 9.904065132141113, "learning_rate": 9.652077470084492e-06, "loss": 2.616, "step": 525600 }, { "epoch": 0.1371092529710047, "grad_norm": 8.858099937438965, "learning_rate": 9.651771031328878e-06, "loss": 2.6253, "step": 525800 }, { "epoch": 0.13716140559670686, "grad_norm": 9.863373756408691, "learning_rate": 9.651464462550902e-06, "loss": 2.6251, "step": 526000 }, { "epoch": 0.13721355822240905, "grad_norm": 9.789137840270996, "learning_rate": 9.651157763759126e-06, "loss": 2.6234, "step": 526200 }, { "epoch": 0.1372657108481112, "grad_norm": 10.034318923950195, "learning_rate": 9.650850934962132e-06, "loss": 2.6572, "step": 526400 }, { "epoch": 0.1373178634738134, "grad_norm": 9.916723251342773, "learning_rate": 9.65054397616849e-06, "loss": 2.632, "step": 526600 }, { "epoch": 0.13737001609951555, "grad_norm": 8.940628051757812, "learning_rate": 9.65023688738678e-06, "loss": 2.6402, "step": 526800 }, { "epoch": 0.13742216872521773, "grad_norm": 9.220625877380371, "learning_rate": 9.649929668625589e-06, "loss": 2.6262, "step": 527000 }, { "epoch": 0.1374743213509199, "grad_norm": 8.476348876953125, "learning_rate": 9.649622319893502e-06, "loss": 2.6734, "step": 527200 }, { "epoch": 0.13752647397662207, "grad_norm": 8.9732084274292, "learning_rate": 9.649314841199109e-06, "loss": 2.6198, "step": 527400 }, { "epoch": 0.13757862660232423, "grad_norm": 9.643847465515137, "learning_rate": 9.649007232551006e-06, "loss": 2.672, "step": 527600 }, { "epoch": 0.1376307792280264, "grad_norm": 9.115861892700195, "learning_rate": 9.648699493957792e-06, "loss": 2.6423, "step": 527800 }, { "epoch": 0.13768293185372857, "grad_norm": 10.297475814819336, "learning_rate": 9.648391625428064e-06, "loss": 2.6552, "step": 528000 }, { "epoch": 0.13773508447943075, "grad_norm": 9.935859680175781, "learning_rate": 9.648083626970432e-06, "loss": 2.6381, "step": 528200 }, { "epoch": 0.1377872371051329, "grad_norm": 10.028658866882324, "learning_rate": 9.647775498593502e-06, "loss": 2.6879, "step": 528400 }, { "epoch": 0.1378393897308351, "grad_norm": 10.135675430297852, "learning_rate": 9.647467240305888e-06, "loss": 2.6395, "step": 528600 }, { "epoch": 0.13789154235653725, "grad_norm": 9.688112258911133, "learning_rate": 9.647158852116207e-06, "loss": 2.6373, "step": 528800 }, { "epoch": 0.13794369498223943, "grad_norm": 9.650997161865234, "learning_rate": 9.646850334033078e-06, "loss": 2.623, "step": 529000 }, { "epoch": 0.1379958476079416, "grad_norm": 8.232544898986816, "learning_rate": 9.646541686065122e-06, "loss": 2.6484, "step": 529200 }, { "epoch": 0.13804800023364378, "grad_norm": 9.324143409729004, "learning_rate": 9.64623290822097e-06, "loss": 2.6221, "step": 529400 }, { "epoch": 0.13810015285934593, "grad_norm": 9.4964599609375, "learning_rate": 9.64592400050925e-06, "loss": 2.6273, "step": 529600 }, { "epoch": 0.13815230548504812, "grad_norm": 10.16766357421875, "learning_rate": 9.6456149629386e-06, "loss": 2.6316, "step": 529800 }, { "epoch": 0.13820445811075027, "grad_norm": 9.12529468536377, "learning_rate": 9.64530579551765e-06, "loss": 2.6704, "step": 530000 }, { "epoch": 0.13825661073645246, "grad_norm": 10.48526382446289, "learning_rate": 9.64499649825505e-06, "loss": 2.6643, "step": 530200 }, { "epoch": 0.1383087633621546, "grad_norm": 8.55528736114502, "learning_rate": 9.644687071159442e-06, "loss": 2.6488, "step": 530400 }, { "epoch": 0.13836091598785677, "grad_norm": 9.063948631286621, "learning_rate": 9.644377514239473e-06, "loss": 2.6316, "step": 530600 }, { "epoch": 0.13841306861355895, "grad_norm": 8.189355850219727, "learning_rate": 9.6440678275038e-06, "loss": 2.6458, "step": 530800 }, { "epoch": 0.1384652212392611, "grad_norm": 10.547919273376465, "learning_rate": 9.643758010961075e-06, "loss": 2.6643, "step": 531000 }, { "epoch": 0.1385173738649633, "grad_norm": 8.875152587890625, "learning_rate": 9.643448064619958e-06, "loss": 2.6343, "step": 531200 }, { "epoch": 0.13856952649066545, "grad_norm": 10.155411720275879, "learning_rate": 9.643137988489115e-06, "loss": 2.6402, "step": 531400 }, { "epoch": 0.13862167911636764, "grad_norm": 9.877212524414062, "learning_rate": 9.64282778257721e-06, "loss": 2.6482, "step": 531600 }, { "epoch": 0.1386738317420698, "grad_norm": 8.81261157989502, "learning_rate": 9.642517446892919e-06, "loss": 2.6458, "step": 531800 }, { "epoch": 0.13872598436777198, "grad_norm": 9.765755653381348, "learning_rate": 9.642206981444908e-06, "loss": 2.6615, "step": 532000 }, { "epoch": 0.13877813699347413, "grad_norm": 9.739371299743652, "learning_rate": 9.641896386241861e-06, "loss": 2.6525, "step": 532200 }, { "epoch": 0.13883028961917632, "grad_norm": 10.315713882446289, "learning_rate": 9.641585661292457e-06, "loss": 2.659, "step": 532400 }, { "epoch": 0.13888244224487847, "grad_norm": 9.760064125061035, "learning_rate": 9.641274806605384e-06, "loss": 2.6282, "step": 532600 }, { "epoch": 0.13893459487058066, "grad_norm": 9.685067176818848, "learning_rate": 9.640963822189327e-06, "loss": 2.6185, "step": 532800 }, { "epoch": 0.13898674749628281, "grad_norm": 10.004737854003906, "learning_rate": 9.640652708052978e-06, "loss": 2.6327, "step": 533000 }, { "epoch": 0.139038900121985, "grad_norm": 8.516515731811523, "learning_rate": 9.640341464205037e-06, "loss": 2.6649, "step": 533200 }, { "epoch": 0.13909105274768715, "grad_norm": 10.124285697937012, "learning_rate": 9.640030090654202e-06, "loss": 2.6205, "step": 533400 }, { "epoch": 0.13914320537338934, "grad_norm": 10.336373329162598, "learning_rate": 9.639718587409174e-06, "loss": 2.6386, "step": 533600 }, { "epoch": 0.1391953579990915, "grad_norm": 10.257096290588379, "learning_rate": 9.639406954478663e-06, "loss": 2.643, "step": 533800 }, { "epoch": 0.13924751062479368, "grad_norm": 9.125519752502441, "learning_rate": 9.639095191871379e-06, "loss": 2.6576, "step": 534000 }, { "epoch": 0.13929966325049584, "grad_norm": 8.960975646972656, "learning_rate": 9.638783299596033e-06, "loss": 2.6339, "step": 534200 }, { "epoch": 0.13935181587619802, "grad_norm": 10.509865760803223, "learning_rate": 9.638471277661347e-06, "loss": 2.6326, "step": 534400 }, { "epoch": 0.13940396850190018, "grad_norm": 9.875240325927734, "learning_rate": 9.63815912607604e-06, "loss": 2.6501, "step": 534600 }, { "epoch": 0.13945612112760236, "grad_norm": 8.997678756713867, "learning_rate": 9.637846844848838e-06, "loss": 2.6376, "step": 534800 }, { "epoch": 0.13950827375330452, "grad_norm": 9.591438293457031, "learning_rate": 9.637534433988467e-06, "loss": 2.6116, "step": 535000 }, { "epoch": 0.1395604263790067, "grad_norm": 9.580354690551758, "learning_rate": 9.637221893503662e-06, "loss": 2.6207, "step": 535200 }, { "epoch": 0.13961257900470886, "grad_norm": 8.961016654968262, "learning_rate": 9.63690922340316e-06, "loss": 2.6337, "step": 535400 }, { "epoch": 0.13966473163041104, "grad_norm": 8.852940559387207, "learning_rate": 9.636596423695697e-06, "loss": 2.6134, "step": 535600 }, { "epoch": 0.1397168842561132, "grad_norm": 9.002432823181152, "learning_rate": 9.636283494390017e-06, "loss": 2.6261, "step": 535800 }, { "epoch": 0.13976903688181538, "grad_norm": 9.397136688232422, "learning_rate": 9.635970435494868e-06, "loss": 2.6458, "step": 536000 }, { "epoch": 0.13982118950751754, "grad_norm": 9.2616548538208, "learning_rate": 9.635657247019001e-06, "loss": 2.6269, "step": 536200 }, { "epoch": 0.1398733421332197, "grad_norm": 9.671730041503906, "learning_rate": 9.635343928971167e-06, "loss": 2.6507, "step": 536400 }, { "epoch": 0.13992549475892188, "grad_norm": 9.459322929382324, "learning_rate": 9.635030481360129e-06, "loss": 2.6104, "step": 536600 }, { "epoch": 0.13997764738462404, "grad_norm": 9.426004409790039, "learning_rate": 9.63471690419464e-06, "loss": 2.63, "step": 536800 }, { "epoch": 0.14002980001032622, "grad_norm": 9.91978931427002, "learning_rate": 9.634403197483472e-06, "loss": 2.6297, "step": 537000 }, { "epoch": 0.14008195263602838, "grad_norm": 10.36573600769043, "learning_rate": 9.634089361235391e-06, "loss": 2.6198, "step": 537200 }, { "epoch": 0.14013410526173056, "grad_norm": 12.126402854919434, "learning_rate": 9.633775395459169e-06, "loss": 2.6285, "step": 537400 }, { "epoch": 0.14018625788743272, "grad_norm": 8.916886329650879, "learning_rate": 9.633461300163582e-06, "loss": 2.6479, "step": 537600 }, { "epoch": 0.1402384105131349, "grad_norm": 10.035049438476562, "learning_rate": 9.633147075357409e-06, "loss": 2.6763, "step": 537800 }, { "epoch": 0.14029056313883706, "grad_norm": 9.93181324005127, "learning_rate": 9.632832721049432e-06, "loss": 2.6169, "step": 538000 }, { "epoch": 0.14034271576453924, "grad_norm": 10.294628143310547, "learning_rate": 9.632518237248439e-06, "loss": 2.6648, "step": 538200 }, { "epoch": 0.1403948683902414, "grad_norm": 9.200163841247559, "learning_rate": 9.63220362396322e-06, "loss": 2.6165, "step": 538400 }, { "epoch": 0.14044702101594359, "grad_norm": 9.44741153717041, "learning_rate": 9.631888881202569e-06, "loss": 2.6108, "step": 538600 }, { "epoch": 0.14049917364164574, "grad_norm": 9.307562828063965, "learning_rate": 9.631574008975281e-06, "loss": 2.6276, "step": 538800 }, { "epoch": 0.14055132626734793, "grad_norm": 8.72498893737793, "learning_rate": 9.631259007290162e-06, "loss": 2.6432, "step": 539000 }, { "epoch": 0.14060347889305008, "grad_norm": 10.48019027709961, "learning_rate": 9.63094387615601e-06, "loss": 2.6292, "step": 539200 }, { "epoch": 0.14065563151875227, "grad_norm": 9.463690757751465, "learning_rate": 9.63062861558164e-06, "loss": 2.6205, "step": 539400 }, { "epoch": 0.14070778414445442, "grad_norm": 9.219118118286133, "learning_rate": 9.630313225575862e-06, "loss": 2.6125, "step": 539600 }, { "epoch": 0.1407599367701566, "grad_norm": 11.975398063659668, "learning_rate": 9.629997706147488e-06, "loss": 2.6088, "step": 539800 }, { "epoch": 0.14081208939585876, "grad_norm": 8.592241287231445, "learning_rate": 9.629682057305341e-06, "loss": 2.626, "step": 540000 }, { "epoch": 0.14086424202156095, "grad_norm": 9.817209243774414, "learning_rate": 9.62936627905824e-06, "loss": 2.647, "step": 540200 }, { "epoch": 0.1409163946472631, "grad_norm": 10.493924140930176, "learning_rate": 9.629050371415015e-06, "loss": 2.622, "step": 540400 }, { "epoch": 0.1409685472729653, "grad_norm": 9.412971496582031, "learning_rate": 9.628734334384495e-06, "loss": 2.6524, "step": 540600 }, { "epoch": 0.14102069989866745, "grad_norm": 10.187376976013184, "learning_rate": 9.628418167975512e-06, "loss": 2.6041, "step": 540800 }, { "epoch": 0.14107285252436963, "grad_norm": 9.966575622558594, "learning_rate": 9.628101872196907e-06, "loss": 2.664, "step": 541000 }, { "epoch": 0.1411250051500718, "grad_norm": 9.64553451538086, "learning_rate": 9.627785447057517e-06, "loss": 2.686, "step": 541200 }, { "epoch": 0.14117715777577397, "grad_norm": 11.089734077453613, "learning_rate": 9.627468892566186e-06, "loss": 2.6556, "step": 541400 }, { "epoch": 0.14122931040147613, "grad_norm": 9.867591857910156, "learning_rate": 9.627152208731765e-06, "loss": 2.6004, "step": 541600 }, { "epoch": 0.1412814630271783, "grad_norm": 9.480960845947266, "learning_rate": 9.626835395563104e-06, "loss": 2.6388, "step": 541800 }, { "epoch": 0.14133361565288047, "grad_norm": 9.904638290405273, "learning_rate": 9.62651845306906e-06, "loss": 2.6437, "step": 542000 }, { "epoch": 0.14138576827858262, "grad_norm": 10.75538158416748, "learning_rate": 9.626201381258488e-06, "loss": 2.6249, "step": 542200 }, { "epoch": 0.1414379209042848, "grad_norm": 9.948746681213379, "learning_rate": 9.625884180140255e-06, "loss": 2.6459, "step": 542400 }, { "epoch": 0.14149007352998696, "grad_norm": 8.870070457458496, "learning_rate": 9.625566849723227e-06, "loss": 2.6629, "step": 542600 }, { "epoch": 0.14154222615568915, "grad_norm": 10.196480751037598, "learning_rate": 9.62524939001627e-06, "loss": 2.6402, "step": 542800 }, { "epoch": 0.1415943787813913, "grad_norm": 9.863924026489258, "learning_rate": 9.624931801028258e-06, "loss": 2.6385, "step": 543000 }, { "epoch": 0.1416465314070935, "grad_norm": 9.387092590332031, "learning_rate": 9.624614082768073e-06, "loss": 2.6503, "step": 543200 }, { "epoch": 0.14169868403279565, "grad_norm": 11.089527130126953, "learning_rate": 9.62429623524459e-06, "loss": 2.6537, "step": 543400 }, { "epoch": 0.14175083665849783, "grad_norm": 10.329263687133789, "learning_rate": 9.623978258466696e-06, "loss": 2.6778, "step": 543600 }, { "epoch": 0.1418029892842, "grad_norm": 10.49349308013916, "learning_rate": 9.623660152443277e-06, "loss": 2.6065, "step": 543800 }, { "epoch": 0.14185514190990217, "grad_norm": 9.694085121154785, "learning_rate": 9.623341917183227e-06, "loss": 2.627, "step": 544000 }, { "epoch": 0.14190729453560433, "grad_norm": 10.875962257385254, "learning_rate": 9.623023552695438e-06, "loss": 2.653, "step": 544200 }, { "epoch": 0.1419594471613065, "grad_norm": 8.842427253723145, "learning_rate": 9.62270505898881e-06, "loss": 2.6317, "step": 544400 }, { "epoch": 0.14201159978700867, "grad_norm": 9.090118408203125, "learning_rate": 9.622386436072246e-06, "loss": 2.6197, "step": 544600 }, { "epoch": 0.14206375241271085, "grad_norm": 10.754172325134277, "learning_rate": 9.622067683954651e-06, "loss": 2.5924, "step": 544800 }, { "epoch": 0.142115905038413, "grad_norm": 9.412361145019531, "learning_rate": 9.621748802644934e-06, "loss": 2.6484, "step": 545000 }, { "epoch": 0.1421680576641152, "grad_norm": 10.36867618560791, "learning_rate": 9.62142979215201e-06, "loss": 2.6376, "step": 545200 }, { "epoch": 0.14222021028981735, "grad_norm": 9.003300666809082, "learning_rate": 9.621110652484794e-06, "loss": 2.6281, "step": 545400 }, { "epoch": 0.14227236291551953, "grad_norm": 11.816666603088379, "learning_rate": 9.62079138365221e-06, "loss": 2.6324, "step": 545600 }, { "epoch": 0.1423245155412217, "grad_norm": 9.261345863342285, "learning_rate": 9.620471985663175e-06, "loss": 2.6267, "step": 545800 }, { "epoch": 0.14237666816692388, "grad_norm": 8.67002010345459, "learning_rate": 9.620152458526622e-06, "loss": 2.6205, "step": 546000 }, { "epoch": 0.14242882079262603, "grad_norm": 10.905217170715332, "learning_rate": 9.61983280225148e-06, "loss": 2.6096, "step": 546200 }, { "epoch": 0.14248097341832822, "grad_norm": 10.862191200256348, "learning_rate": 9.619513016846685e-06, "loss": 2.6368, "step": 546400 }, { "epoch": 0.14253312604403037, "grad_norm": 9.508679389953613, "learning_rate": 9.619193102321174e-06, "loss": 2.6533, "step": 546600 }, { "epoch": 0.14258527866973256, "grad_norm": 9.74131965637207, "learning_rate": 9.61887305868389e-06, "loss": 2.6502, "step": 546800 }, { "epoch": 0.1426374312954347, "grad_norm": 9.636917114257812, "learning_rate": 9.618552885943777e-06, "loss": 2.6608, "step": 547000 }, { "epoch": 0.1426895839211369, "grad_norm": 9.495331764221191, "learning_rate": 9.618232584109788e-06, "loss": 2.6519, "step": 547200 }, { "epoch": 0.14274173654683905, "grad_norm": 9.559065818786621, "learning_rate": 9.617912153190871e-06, "loss": 2.6379, "step": 547400 }, { "epoch": 0.14279388917254124, "grad_norm": 9.602763175964355, "learning_rate": 9.617591593195987e-06, "loss": 2.6304, "step": 547600 }, { "epoch": 0.1428460417982434, "grad_norm": 9.828039169311523, "learning_rate": 9.617270904134091e-06, "loss": 2.6251, "step": 547800 }, { "epoch": 0.14289819442394555, "grad_norm": 9.119592666625977, "learning_rate": 9.616950086014152e-06, "loss": 2.6231, "step": 548000 }, { "epoch": 0.14295034704964774, "grad_norm": 10.890464782714844, "learning_rate": 9.616629138845132e-06, "loss": 2.649, "step": 548200 }, { "epoch": 0.1430024996753499, "grad_norm": 8.780187606811523, "learning_rate": 9.616308062636006e-06, "loss": 2.6794, "step": 548400 }, { "epoch": 0.14305465230105208, "grad_norm": 9.78468132019043, "learning_rate": 9.615986857395746e-06, "loss": 2.6462, "step": 548600 }, { "epoch": 0.14310680492675423, "grad_norm": 8.558454513549805, "learning_rate": 9.615665523133331e-06, "loss": 2.6206, "step": 548800 }, { "epoch": 0.14315895755245642, "grad_norm": 8.706876754760742, "learning_rate": 9.615344059857743e-06, "loss": 2.6301, "step": 549000 }, { "epoch": 0.14321111017815857, "grad_norm": 9.68352222442627, "learning_rate": 9.615022467577966e-06, "loss": 2.6818, "step": 549200 }, { "epoch": 0.14326326280386076, "grad_norm": 11.24920654296875, "learning_rate": 9.614700746302991e-06, "loss": 2.6596, "step": 549400 }, { "epoch": 0.14331541542956291, "grad_norm": 9.429580688476562, "learning_rate": 9.614378896041808e-06, "loss": 2.6525, "step": 549600 }, { "epoch": 0.1433675680552651, "grad_norm": 10.127577781677246, "learning_rate": 9.614056916803415e-06, "loss": 2.6228, "step": 549800 }, { "epoch": 0.14341972068096726, "grad_norm": 10.32852840423584, "learning_rate": 9.61373480859681e-06, "loss": 2.6263, "step": 550000 }, { "epoch": 0.14347187330666944, "grad_norm": 10.721578598022461, "learning_rate": 9.613412571430998e-06, "loss": 2.6296, "step": 550200 }, { "epoch": 0.1435240259323716, "grad_norm": 8.454923629760742, "learning_rate": 9.613090205314985e-06, "loss": 2.656, "step": 550400 }, { "epoch": 0.14357617855807378, "grad_norm": 9.832836151123047, "learning_rate": 9.612767710257782e-06, "loss": 2.6442, "step": 550600 }, { "epoch": 0.14362833118377594, "grad_norm": 9.232677459716797, "learning_rate": 9.612445086268403e-06, "loss": 2.6341, "step": 550800 }, { "epoch": 0.14368048380947812, "grad_norm": 11.069082260131836, "learning_rate": 9.612122333355865e-06, "loss": 2.6251, "step": 551000 }, { "epoch": 0.14373263643518028, "grad_norm": 10.895528793334961, "learning_rate": 9.611799451529189e-06, "loss": 2.6328, "step": 551200 }, { "epoch": 0.14378478906088246, "grad_norm": 9.884492874145508, "learning_rate": 9.611476440797403e-06, "loss": 2.5978, "step": 551400 }, { "epoch": 0.14383694168658462, "grad_norm": 9.460986137390137, "learning_rate": 9.61115330116953e-06, "loss": 2.6392, "step": 551600 }, { "epoch": 0.1438890943122868, "grad_norm": 9.917821884155273, "learning_rate": 9.610830032654607e-06, "loss": 2.6414, "step": 551800 }, { "epoch": 0.14394124693798896, "grad_norm": 8.92539119720459, "learning_rate": 9.61050663526167e-06, "loss": 2.606, "step": 552000 }, { "epoch": 0.14399339956369114, "grad_norm": 10.357989311218262, "learning_rate": 9.610183108999754e-06, "loss": 2.5919, "step": 552200 }, { "epoch": 0.1440455521893933, "grad_norm": 11.38748550415039, "learning_rate": 9.609859453877906e-06, "loss": 2.6614, "step": 552400 }, { "epoch": 0.14409770481509548, "grad_norm": 8.538469314575195, "learning_rate": 9.60953566990517e-06, "loss": 2.6751, "step": 552600 }, { "epoch": 0.14414985744079764, "grad_norm": 10.609665870666504, "learning_rate": 9.609211757090596e-06, "loss": 2.6102, "step": 552800 }, { "epoch": 0.14420201006649983, "grad_norm": 9.466772079467773, "learning_rate": 9.60888771544324e-06, "loss": 2.6074, "step": 553000 }, { "epoch": 0.14425416269220198, "grad_norm": 10.574810981750488, "learning_rate": 9.608563544972159e-06, "loss": 2.6195, "step": 553200 }, { "epoch": 0.14430631531790417, "grad_norm": 8.984042167663574, "learning_rate": 9.60823924568641e-06, "loss": 2.6586, "step": 553400 }, { "epoch": 0.14435846794360632, "grad_norm": 9.159278869628906, "learning_rate": 9.607914817595062e-06, "loss": 2.5936, "step": 553600 }, { "epoch": 0.1444106205693085, "grad_norm": 10.08705997467041, "learning_rate": 9.607590260707182e-06, "loss": 2.6402, "step": 553800 }, { "epoch": 0.14446277319501066, "grad_norm": 9.373217582702637, "learning_rate": 9.607265575031843e-06, "loss": 2.6356, "step": 554000 }, { "epoch": 0.14451492582071282, "grad_norm": 9.303369522094727, "learning_rate": 9.606940760578118e-06, "loss": 2.6004, "step": 554200 }, { "epoch": 0.144567078446415, "grad_norm": 10.792057991027832, "learning_rate": 9.606615817355084e-06, "loss": 2.619, "step": 554400 }, { "epoch": 0.14461923107211716, "grad_norm": 9.816146850585938, "learning_rate": 9.60629074537183e-06, "loss": 2.5744, "step": 554600 }, { "epoch": 0.14467138369781934, "grad_norm": 9.5195894241333, "learning_rate": 9.605965544637437e-06, "loss": 2.6646, "step": 554800 }, { "epoch": 0.1447235363235215, "grad_norm": 9.027356147766113, "learning_rate": 9.605640215160996e-06, "loss": 2.5848, "step": 555000 }, { "epoch": 0.14477568894922369, "grad_norm": 9.522998809814453, "learning_rate": 9.6053147569516e-06, "loss": 2.631, "step": 555200 }, { "epoch": 0.14482784157492584, "grad_norm": 10.631654739379883, "learning_rate": 9.604989170018347e-06, "loss": 2.6315, "step": 555400 }, { "epoch": 0.14487999420062803, "grad_norm": 9.056488037109375, "learning_rate": 9.604663454370338e-06, "loss": 2.5954, "step": 555600 }, { "epoch": 0.14493214682633018, "grad_norm": 8.656085014343262, "learning_rate": 9.604337610016674e-06, "loss": 2.619, "step": 555800 }, { "epoch": 0.14498429945203237, "grad_norm": 10.284260749816895, "learning_rate": 9.604011636966466e-06, "loss": 2.6313, "step": 556000 }, { "epoch": 0.14503645207773452, "grad_norm": 8.677928924560547, "learning_rate": 9.603685535228823e-06, "loss": 2.6198, "step": 556200 }, { "epoch": 0.1450886047034367, "grad_norm": 9.978887557983398, "learning_rate": 9.603359304812863e-06, "loss": 2.5997, "step": 556400 }, { "epoch": 0.14514075732913886, "grad_norm": 10.44409465789795, "learning_rate": 9.6030329457277e-06, "loss": 2.6256, "step": 556600 }, { "epoch": 0.14519290995484105, "grad_norm": 10.068913459777832, "learning_rate": 9.60270645798246e-06, "loss": 2.5958, "step": 556800 }, { "epoch": 0.1452450625805432, "grad_norm": 9.706381797790527, "learning_rate": 9.602379841586269e-06, "loss": 2.5839, "step": 557000 }, { "epoch": 0.1452972152062454, "grad_norm": 8.250496864318848, "learning_rate": 9.602053096548251e-06, "loss": 2.6462, "step": 557200 }, { "epoch": 0.14534936783194755, "grad_norm": 10.653825759887695, "learning_rate": 9.601726222877546e-06, "loss": 2.6536, "step": 557400 }, { "epoch": 0.14540152045764973, "grad_norm": 9.490010261535645, "learning_rate": 9.601399220583285e-06, "loss": 2.6482, "step": 557600 }, { "epoch": 0.1454536730833519, "grad_norm": 9.792756080627441, "learning_rate": 9.601072089674613e-06, "loss": 2.6215, "step": 557800 }, { "epoch": 0.14550582570905407, "grad_norm": 11.322357177734375, "learning_rate": 9.600744830160667e-06, "loss": 2.6084, "step": 558000 }, { "epoch": 0.14555797833475623, "grad_norm": 10.1467924118042, "learning_rate": 9.6004174420506e-06, "loss": 2.6443, "step": 558200 }, { "epoch": 0.1456101309604584, "grad_norm": 10.210265159606934, "learning_rate": 9.600089925353562e-06, "loss": 2.6163, "step": 558400 }, { "epoch": 0.14566228358616057, "grad_norm": 12.064213752746582, "learning_rate": 9.599762280078705e-06, "loss": 2.6455, "step": 558600 }, { "epoch": 0.14571443621186275, "grad_norm": 8.971247673034668, "learning_rate": 9.599434506235188e-06, "loss": 2.6323, "step": 558800 }, { "epoch": 0.1457665888375649, "grad_norm": 9.186264991760254, "learning_rate": 9.599106603832176e-06, "loss": 2.6271, "step": 559000 }, { "epoch": 0.1458187414632671, "grad_norm": 10.96983814239502, "learning_rate": 9.598778572878828e-06, "loss": 2.6766, "step": 559200 }, { "epoch": 0.14587089408896925, "grad_norm": 9.236897468566895, "learning_rate": 9.59845041338432e-06, "loss": 2.6698, "step": 559400 }, { "epoch": 0.14592304671467143, "grad_norm": 9.946492195129395, "learning_rate": 9.598122125357817e-06, "loss": 2.6006, "step": 559600 }, { "epoch": 0.1459751993403736, "grad_norm": 10.890714645385742, "learning_rate": 9.597793708808501e-06, "loss": 2.6319, "step": 559800 }, { "epoch": 0.14602735196607575, "grad_norm": 10.893404006958008, "learning_rate": 9.597465163745548e-06, "loss": 2.6361, "step": 560000 }, { "epoch": 0.14607950459177793, "grad_norm": 10.29994010925293, "learning_rate": 9.597136490178145e-06, "loss": 2.6443, "step": 560200 }, { "epoch": 0.1461316572174801, "grad_norm": 10.262697219848633, "learning_rate": 9.596807688115474e-06, "loss": 2.6291, "step": 560400 }, { "epoch": 0.14618380984318227, "grad_norm": 10.052336692810059, "learning_rate": 9.596478757566729e-06, "loss": 2.6116, "step": 560600 }, { "epoch": 0.14623596246888443, "grad_norm": 9.586968421936035, "learning_rate": 9.596149698541102e-06, "loss": 2.6361, "step": 560800 }, { "epoch": 0.1462881150945866, "grad_norm": 10.506963729858398, "learning_rate": 9.595820511047791e-06, "loss": 2.6558, "step": 561000 }, { "epoch": 0.14634026772028877, "grad_norm": 11.302355766296387, "learning_rate": 9.595491195095998e-06, "loss": 2.6385, "step": 561200 }, { "epoch": 0.14639242034599095, "grad_norm": 10.142350196838379, "learning_rate": 9.595161750694927e-06, "loss": 2.6049, "step": 561400 }, { "epoch": 0.1464445729716931, "grad_norm": 8.8736572265625, "learning_rate": 9.594832177853787e-06, "loss": 2.606, "step": 561600 }, { "epoch": 0.1464967255973953, "grad_norm": 10.957189559936523, "learning_rate": 9.59450247658179e-06, "loss": 2.6365, "step": 561800 }, { "epoch": 0.14654887822309745, "grad_norm": 10.375482559204102, "learning_rate": 9.594172646888151e-06, "loss": 2.6283, "step": 562000 }, { "epoch": 0.14660103084879963, "grad_norm": 10.932716369628906, "learning_rate": 9.59384268878209e-06, "loss": 2.658, "step": 562200 }, { "epoch": 0.1466531834745018, "grad_norm": 9.20926284790039, "learning_rate": 9.593512602272828e-06, "loss": 2.6566, "step": 562400 }, { "epoch": 0.14670533610020398, "grad_norm": 10.561077117919922, "learning_rate": 9.593182387369592e-06, "loss": 2.6381, "step": 562600 }, { "epoch": 0.14675748872590613, "grad_norm": 9.518685340881348, "learning_rate": 9.592852044081614e-06, "loss": 2.5938, "step": 562800 }, { "epoch": 0.14680964135160832, "grad_norm": 10.072381973266602, "learning_rate": 9.592521572418123e-06, "loss": 2.665, "step": 563000 }, { "epoch": 0.14686179397731047, "grad_norm": 10.22624683380127, "learning_rate": 9.59219097238836e-06, "loss": 2.563, "step": 563200 }, { "epoch": 0.14691394660301266, "grad_norm": 10.61490249633789, "learning_rate": 9.591860244001563e-06, "loss": 2.6146, "step": 563400 }, { "epoch": 0.1469660992287148, "grad_norm": 9.314760208129883, "learning_rate": 9.59152938726698e-06, "loss": 2.6327, "step": 563600 }, { "epoch": 0.147018251854417, "grad_norm": 10.206314086914062, "learning_rate": 9.591198402193854e-06, "loss": 2.6513, "step": 563800 }, { "epoch": 0.14707040448011915, "grad_norm": 11.540358543395996, "learning_rate": 9.59086728879144e-06, "loss": 2.6168, "step": 564000 }, { "epoch": 0.14712255710582134, "grad_norm": 10.943085670471191, "learning_rate": 9.590536047068992e-06, "loss": 2.6184, "step": 564200 }, { "epoch": 0.1471747097315235, "grad_norm": 11.087271690368652, "learning_rate": 9.590204677035769e-06, "loss": 2.6272, "step": 564400 }, { "epoch": 0.14722686235722568, "grad_norm": 10.729355812072754, "learning_rate": 9.58987317870103e-06, "loss": 2.6478, "step": 564600 }, { "epoch": 0.14727901498292784, "grad_norm": 10.2618408203125, "learning_rate": 9.589541552074044e-06, "loss": 2.6489, "step": 564800 }, { "epoch": 0.14733116760863002, "grad_norm": 8.719612121582031, "learning_rate": 9.589209797164082e-06, "loss": 2.6471, "step": 565000 }, { "epoch": 0.14738332023433218, "grad_norm": 9.179375648498535, "learning_rate": 9.588877913980411e-06, "loss": 2.6204, "step": 565200 }, { "epoch": 0.14743547286003436, "grad_norm": 8.887311935424805, "learning_rate": 9.588545902532316e-06, "loss": 2.627, "step": 565400 }, { "epoch": 0.14748762548573652, "grad_norm": 10.409414291381836, "learning_rate": 9.588213762829068e-06, "loss": 2.6492, "step": 565600 }, { "epoch": 0.14753977811143867, "grad_norm": 9.475335121154785, "learning_rate": 9.587881494879956e-06, "loss": 2.6261, "step": 565800 }, { "epoch": 0.14759193073714086, "grad_norm": 10.67691707611084, "learning_rate": 9.587549098694268e-06, "loss": 2.6279, "step": 566000 }, { "epoch": 0.14764408336284301, "grad_norm": 9.022562980651855, "learning_rate": 9.58721657428129e-06, "loss": 2.6222, "step": 566200 }, { "epoch": 0.1476962359885452, "grad_norm": 9.80457878112793, "learning_rate": 9.586883921650322e-06, "loss": 2.6257, "step": 566400 }, { "epoch": 0.14774838861424736, "grad_norm": 10.465949058532715, "learning_rate": 9.586551140810659e-06, "loss": 2.6321, "step": 566600 }, { "epoch": 0.14780054123994954, "grad_norm": 8.894414901733398, "learning_rate": 9.586218231771602e-06, "loss": 2.6289, "step": 566800 }, { "epoch": 0.1478526938656517, "grad_norm": 9.935043334960938, "learning_rate": 9.58588519454246e-06, "loss": 2.6112, "step": 567000 }, { "epoch": 0.14790484649135388, "grad_norm": 10.814425468444824, "learning_rate": 9.585552029132537e-06, "loss": 2.6257, "step": 567200 }, { "epoch": 0.14795699911705604, "grad_norm": 10.175069808959961, "learning_rate": 9.585218735551147e-06, "loss": 2.6417, "step": 567400 }, { "epoch": 0.14800915174275822, "grad_norm": 9.798264503479004, "learning_rate": 9.584885313807607e-06, "loss": 2.6016, "step": 567600 }, { "epoch": 0.14806130436846038, "grad_norm": 9.588288307189941, "learning_rate": 9.584551763911236e-06, "loss": 2.6012, "step": 567800 }, { "epoch": 0.14811345699416256, "grad_norm": 9.886007308959961, "learning_rate": 9.584218085871358e-06, "loss": 2.6357, "step": 568000 }, { "epoch": 0.14816560961986472, "grad_norm": 9.629053115844727, "learning_rate": 9.583884279697297e-06, "loss": 2.6257, "step": 568200 }, { "epoch": 0.1482177622455669, "grad_norm": 9.915539741516113, "learning_rate": 9.583550345398385e-06, "loss": 2.6651, "step": 568400 }, { "epoch": 0.14826991487126906, "grad_norm": 10.119706153869629, "learning_rate": 9.583216282983955e-06, "loss": 2.6624, "step": 568600 }, { "epoch": 0.14832206749697124, "grad_norm": 10.616450309753418, "learning_rate": 9.582882092463348e-06, "loss": 2.6166, "step": 568800 }, { "epoch": 0.1483742201226734, "grad_norm": 10.009668350219727, "learning_rate": 9.582547773845901e-06, "loss": 2.6372, "step": 569000 }, { "epoch": 0.14842637274837558, "grad_norm": 9.56704330444336, "learning_rate": 9.582213327140958e-06, "loss": 2.648, "step": 569200 }, { "epoch": 0.14847852537407774, "grad_norm": 12.643216133117676, "learning_rate": 9.581878752357871e-06, "loss": 2.6362, "step": 569400 }, { "epoch": 0.14853067799977993, "grad_norm": 9.96658992767334, "learning_rate": 9.581544049505991e-06, "loss": 2.6044, "step": 569600 }, { "epoch": 0.14858283062548208, "grad_norm": 9.937406539916992, "learning_rate": 9.58120921859467e-06, "loss": 2.6604, "step": 569800 }, { "epoch": 0.14863498325118427, "grad_norm": 10.475345611572266, "learning_rate": 9.58087425963327e-06, "loss": 2.6185, "step": 570000 }, { "epoch": 0.14868713587688642, "grad_norm": 9.268296241760254, "learning_rate": 9.580539172631152e-06, "loss": 2.6081, "step": 570200 }, { "epoch": 0.1487392885025886, "grad_norm": 9.733953475952148, "learning_rate": 9.580203957597683e-06, "loss": 2.6293, "step": 570400 }, { "epoch": 0.14879144112829076, "grad_norm": 10.396161079406738, "learning_rate": 9.579868614542231e-06, "loss": 2.6485, "step": 570600 }, { "epoch": 0.14884359375399295, "grad_norm": 10.291255950927734, "learning_rate": 9.579533143474172e-06, "loss": 2.6246, "step": 570800 }, { "epoch": 0.1488957463796951, "grad_norm": 10.212169647216797, "learning_rate": 9.579197544402881e-06, "loss": 2.5966, "step": 571000 }, { "epoch": 0.1489478990053973, "grad_norm": 9.417984962463379, "learning_rate": 9.578861817337736e-06, "loss": 2.628, "step": 571200 }, { "epoch": 0.14900005163109944, "grad_norm": 11.286150932312012, "learning_rate": 9.578525962288127e-06, "loss": 2.6472, "step": 571400 }, { "epoch": 0.1490522042568016, "grad_norm": 9.67978286743164, "learning_rate": 9.578189979263438e-06, "loss": 2.6192, "step": 571600 }, { "epoch": 0.14910435688250379, "grad_norm": 10.592140197753906, "learning_rate": 9.577853868273057e-06, "loss": 2.6086, "step": 571800 }, { "epoch": 0.14915650950820594, "grad_norm": 10.959556579589844, "learning_rate": 9.577517629326385e-06, "loss": 2.5899, "step": 572000 }, { "epoch": 0.14920866213390813, "grad_norm": 11.056251525878906, "learning_rate": 9.577181262432815e-06, "loss": 2.655, "step": 572200 }, { "epoch": 0.14926081475961028, "grad_norm": 10.780267715454102, "learning_rate": 9.576844767601753e-06, "loss": 2.6094, "step": 572400 }, { "epoch": 0.14931296738531247, "grad_norm": 10.068269729614258, "learning_rate": 9.576508144842603e-06, "loss": 2.6275, "step": 572600 }, { "epoch": 0.14936512001101462, "grad_norm": 10.287019729614258, "learning_rate": 9.57617139416477e-06, "loss": 2.6343, "step": 572800 }, { "epoch": 0.1494172726367168, "grad_norm": 8.428248405456543, "learning_rate": 9.575834515577673e-06, "loss": 2.597, "step": 573000 }, { "epoch": 0.14946942526241896, "grad_norm": 10.998594284057617, "learning_rate": 9.575497509090723e-06, "loss": 2.6543, "step": 573200 }, { "epoch": 0.14952157788812115, "grad_norm": 9.181807518005371, "learning_rate": 9.575160374713344e-06, "loss": 2.6463, "step": 573400 }, { "epoch": 0.1495737305138233, "grad_norm": 10.688831329345703, "learning_rate": 9.574823112454957e-06, "loss": 2.6277, "step": 573600 }, { "epoch": 0.1496258831395255, "grad_norm": 9.806432723999023, "learning_rate": 9.574485722324986e-06, "loss": 2.6082, "step": 573800 }, { "epoch": 0.14967803576522765, "grad_norm": 10.59438419342041, "learning_rate": 9.574148204332868e-06, "loss": 2.6137, "step": 574000 }, { "epoch": 0.14973018839092983, "grad_norm": 11.886221885681152, "learning_rate": 9.573810558488032e-06, "loss": 2.6198, "step": 574200 }, { "epoch": 0.149782341016632, "grad_norm": 9.97330379486084, "learning_rate": 9.573472784799918e-06, "loss": 2.6259, "step": 574400 }, { "epoch": 0.14983449364233417, "grad_norm": 10.082623481750488, "learning_rate": 9.573134883277966e-06, "loss": 2.6735, "step": 574600 }, { "epoch": 0.14988664626803633, "grad_norm": 10.873509407043457, "learning_rate": 9.572796853931619e-06, "loss": 2.6497, "step": 574800 }, { "epoch": 0.1499387988937385, "grad_norm": 10.863423347473145, "learning_rate": 9.57245869677033e-06, "loss": 2.6382, "step": 575000 }, { "epoch": 0.14999095151944067, "grad_norm": 9.81516170501709, "learning_rate": 9.572120411803547e-06, "loss": 2.5861, "step": 575200 }, { "epoch": 0.15004310414514285, "grad_norm": 10.294231414794922, "learning_rate": 9.571781999040726e-06, "loss": 2.5945, "step": 575400 }, { "epoch": 0.150095256770845, "grad_norm": 10.843868255615234, "learning_rate": 9.571443458491327e-06, "loss": 2.6269, "step": 575600 }, { "epoch": 0.1501474093965472, "grad_norm": 8.99116039276123, "learning_rate": 9.571104790164814e-06, "loss": 2.6215, "step": 575800 }, { "epoch": 0.15019956202224935, "grad_norm": 10.161559104919434, "learning_rate": 9.57076599407065e-06, "loss": 2.6052, "step": 576000 }, { "epoch": 0.15025171464795153, "grad_norm": 9.422714233398438, "learning_rate": 9.570427070218306e-06, "loss": 2.6268, "step": 576200 }, { "epoch": 0.1503038672736537, "grad_norm": 9.956077575683594, "learning_rate": 9.570088018617255e-06, "loss": 2.6241, "step": 576400 }, { "epoch": 0.15035601989935587, "grad_norm": 10.455558776855469, "learning_rate": 9.569748839276975e-06, "loss": 2.6146, "step": 576600 }, { "epoch": 0.15040817252505803, "grad_norm": 9.470061302185059, "learning_rate": 9.569409532206945e-06, "loss": 2.6122, "step": 576800 }, { "epoch": 0.15046032515076022, "grad_norm": 9.468082427978516, "learning_rate": 9.56907009741665e-06, "loss": 2.6527, "step": 577000 }, { "epoch": 0.15051247777646237, "grad_norm": 10.123863220214844, "learning_rate": 9.56873053491558e-06, "loss": 2.6252, "step": 577200 }, { "epoch": 0.15056463040216453, "grad_norm": 9.473311424255371, "learning_rate": 9.568390844713221e-06, "loss": 2.622, "step": 577400 }, { "epoch": 0.1506167830278667, "grad_norm": 10.261393547058105, "learning_rate": 9.568051026819072e-06, "loss": 2.6369, "step": 577600 }, { "epoch": 0.15066893565356887, "grad_norm": 7.631354331970215, "learning_rate": 9.567711081242628e-06, "loss": 2.6039, "step": 577800 }, { "epoch": 0.15072108827927105, "grad_norm": 11.26151180267334, "learning_rate": 9.567371007993394e-06, "loss": 2.6304, "step": 578000 }, { "epoch": 0.1507732409049732, "grad_norm": 10.592456817626953, "learning_rate": 9.567030807080872e-06, "loss": 2.6539, "step": 578200 }, { "epoch": 0.1508253935306754, "grad_norm": 9.679903030395508, "learning_rate": 9.566690478514574e-06, "loss": 2.621, "step": 578400 }, { "epoch": 0.15087754615637755, "grad_norm": 9.416767120361328, "learning_rate": 9.566350022304013e-06, "loss": 2.6417, "step": 578600 }, { "epoch": 0.15092969878207974, "grad_norm": 11.1919527053833, "learning_rate": 9.566009438458701e-06, "loss": 2.5929, "step": 578800 }, { "epoch": 0.1509818514077819, "grad_norm": 10.748730659484863, "learning_rate": 9.565668726988161e-06, "loss": 2.6213, "step": 579000 }, { "epoch": 0.15103400403348408, "grad_norm": 11.362808227539062, "learning_rate": 9.565327887901918e-06, "loss": 2.65, "step": 579200 }, { "epoch": 0.15108615665918623, "grad_norm": 10.134300231933594, "learning_rate": 9.564986921209493e-06, "loss": 2.5866, "step": 579400 }, { "epoch": 0.15113830928488842, "grad_norm": 10.209643363952637, "learning_rate": 9.564645826920422e-06, "loss": 2.5839, "step": 579600 }, { "epoch": 0.15119046191059057, "grad_norm": 11.057905197143555, "learning_rate": 9.564304605044239e-06, "loss": 2.5859, "step": 579800 }, { "epoch": 0.15124261453629276, "grad_norm": 11.382614135742188, "learning_rate": 9.563963255590476e-06, "loss": 2.5781, "step": 580000 }, { "epoch": 0.15129476716199491, "grad_norm": 9.950427055358887, "learning_rate": 9.563621778568679e-06, "loss": 2.6076, "step": 580200 }, { "epoch": 0.1513469197876971, "grad_norm": 9.879427909851074, "learning_rate": 9.56328017398839e-06, "loss": 2.5791, "step": 580400 }, { "epoch": 0.15139907241339925, "grad_norm": 9.203298568725586, "learning_rate": 9.56293844185916e-06, "loss": 2.6405, "step": 580600 }, { "epoch": 0.15145122503910144, "grad_norm": 10.701813697814941, "learning_rate": 9.562596582190539e-06, "loss": 2.6244, "step": 580800 }, { "epoch": 0.1515033776648036, "grad_norm": 10.396395683288574, "learning_rate": 9.562254594992082e-06, "loss": 2.5892, "step": 581000 }, { "epoch": 0.15155553029050578, "grad_norm": 8.901702880859375, "learning_rate": 9.561912480273348e-06, "loss": 2.631, "step": 581200 }, { "epoch": 0.15160768291620794, "grad_norm": 10.96087646484375, "learning_rate": 9.561570238043902e-06, "loss": 2.6193, "step": 581400 }, { "epoch": 0.15165983554191012, "grad_norm": 10.27599048614502, "learning_rate": 9.561227868313306e-06, "loss": 2.6072, "step": 581600 }, { "epoch": 0.15171198816761228, "grad_norm": 9.896045684814453, "learning_rate": 9.560885371091134e-06, "loss": 2.6083, "step": 581800 }, { "epoch": 0.15176414079331446, "grad_norm": 10.777628898620605, "learning_rate": 9.560542746386955e-06, "loss": 2.5971, "step": 582000 }, { "epoch": 0.15181629341901662, "grad_norm": 9.133131980895996, "learning_rate": 9.560199994210349e-06, "loss": 2.6302, "step": 582200 }, { "epoch": 0.1518684460447188, "grad_norm": 11.197638511657715, "learning_rate": 9.559857114570893e-06, "loss": 2.6054, "step": 582400 }, { "epoch": 0.15192059867042096, "grad_norm": 10.832380294799805, "learning_rate": 9.559514107478177e-06, "loss": 2.5988, "step": 582600 }, { "epoch": 0.15197275129612314, "grad_norm": 10.314582824707031, "learning_rate": 9.559170972941782e-06, "loss": 2.5965, "step": 582800 }, { "epoch": 0.1520249039218253, "grad_norm": 11.280247688293457, "learning_rate": 9.558827710971302e-06, "loss": 2.631, "step": 583000 }, { "epoch": 0.15207705654752748, "grad_norm": 10.295195579528809, "learning_rate": 9.558484321576329e-06, "loss": 2.6085, "step": 583200 }, { "epoch": 0.15212920917322964, "grad_norm": 9.563957214355469, "learning_rate": 9.558140804766464e-06, "loss": 2.6147, "step": 583400 }, { "epoch": 0.1521813617989318, "grad_norm": 10.042048454284668, "learning_rate": 9.55779716055131e-06, "loss": 2.6046, "step": 583600 }, { "epoch": 0.15223351442463398, "grad_norm": 11.11385440826416, "learning_rate": 9.557453388940468e-06, "loss": 2.6468, "step": 583800 }, { "epoch": 0.15228566705033614, "grad_norm": 10.20233154296875, "learning_rate": 9.557109489943548e-06, "loss": 2.6675, "step": 584000 }, { "epoch": 0.15233781967603832, "grad_norm": 10.255865097045898, "learning_rate": 9.556765463570164e-06, "loss": 2.6439, "step": 584200 }, { "epoch": 0.15238997230174048, "grad_norm": 9.48961067199707, "learning_rate": 9.55642130982993e-06, "loss": 2.6361, "step": 584400 }, { "epoch": 0.15244212492744266, "grad_norm": 9.803009986877441, "learning_rate": 9.55607702873247e-06, "loss": 2.6297, "step": 584600 }, { "epoch": 0.15249427755314482, "grad_norm": 10.121756553649902, "learning_rate": 9.5557326202874e-06, "loss": 2.61, "step": 584800 }, { "epoch": 0.152546430178847, "grad_norm": 9.789412498474121, "learning_rate": 9.555388084504353e-06, "loss": 2.6089, "step": 585000 }, { "epoch": 0.15259858280454916, "grad_norm": 10.218076705932617, "learning_rate": 9.555043421392955e-06, "loss": 2.5853, "step": 585200 }, { "epoch": 0.15265073543025134, "grad_norm": 10.865063667297363, "learning_rate": 9.554698630962841e-06, "loss": 2.6124, "step": 585400 }, { "epoch": 0.1527028880559535, "grad_norm": 10.640392303466797, "learning_rate": 9.55435371322365e-06, "loss": 2.6186, "step": 585600 }, { "epoch": 0.15275504068165568, "grad_norm": 11.789724349975586, "learning_rate": 9.55400866818502e-06, "loss": 2.6226, "step": 585800 }, { "epoch": 0.15280719330735784, "grad_norm": 8.585991859436035, "learning_rate": 9.553663495856598e-06, "loss": 2.6153, "step": 586000 }, { "epoch": 0.15285934593306003, "grad_norm": 12.74191665649414, "learning_rate": 9.55331819624803e-06, "loss": 2.6273, "step": 586200 }, { "epoch": 0.15291149855876218, "grad_norm": 11.765472412109375, "learning_rate": 9.552972769368969e-06, "loss": 2.6034, "step": 586400 }, { "epoch": 0.15296365118446437, "grad_norm": 8.79220962524414, "learning_rate": 9.552627215229067e-06, "loss": 2.624, "step": 586600 }, { "epoch": 0.15301580381016652, "grad_norm": 9.72270679473877, "learning_rate": 9.552281533837988e-06, "loss": 2.6228, "step": 586800 }, { "epoch": 0.1530679564358687, "grad_norm": 11.258645057678223, "learning_rate": 9.55193572520539e-06, "loss": 2.6312, "step": 587000 }, { "epoch": 0.15312010906157086, "grad_norm": 10.41986083984375, "learning_rate": 9.551589789340939e-06, "loss": 2.5924, "step": 587200 }, { "epoch": 0.15317226168727305, "grad_norm": 10.305185317993164, "learning_rate": 9.551243726254304e-06, "loss": 2.6179, "step": 587400 }, { "epoch": 0.1532244143129752, "grad_norm": 10.521088600158691, "learning_rate": 9.550897535955161e-06, "loss": 2.6227, "step": 587600 }, { "epoch": 0.1532765669386774, "grad_norm": 10.343436241149902, "learning_rate": 9.550551218453186e-06, "loss": 2.6194, "step": 587800 }, { "epoch": 0.15332871956437955, "grad_norm": 9.396683692932129, "learning_rate": 9.550204773758055e-06, "loss": 2.6268, "step": 588000 }, { "epoch": 0.15338087219008173, "grad_norm": 11.622981071472168, "learning_rate": 9.549858201879456e-06, "loss": 2.6188, "step": 588200 }, { "epoch": 0.15343302481578389, "grad_norm": 9.679468154907227, "learning_rate": 9.549511502827071e-06, "loss": 2.5841, "step": 588400 }, { "epoch": 0.15348517744148607, "grad_norm": 10.439282417297363, "learning_rate": 9.549164676610596e-06, "loss": 2.626, "step": 588600 }, { "epoch": 0.15353733006718823, "grad_norm": 10.602144241333008, "learning_rate": 9.548817723239723e-06, "loss": 2.6081, "step": 588800 }, { "epoch": 0.1535894826928904, "grad_norm": 9.781011581420898, "learning_rate": 9.548470642724148e-06, "loss": 2.5835, "step": 589000 }, { "epoch": 0.15364163531859257, "grad_norm": 10.308581352233887, "learning_rate": 9.548123435073575e-06, "loss": 2.6292, "step": 589200 }, { "epoch": 0.15369378794429472, "grad_norm": 11.796847343444824, "learning_rate": 9.547776100297708e-06, "loss": 2.6315, "step": 589400 }, { "epoch": 0.1537459405699969, "grad_norm": 10.120135307312012, "learning_rate": 9.547428638406255e-06, "loss": 2.6352, "step": 589600 }, { "epoch": 0.15379809319569906, "grad_norm": 10.831207275390625, "learning_rate": 9.547081049408928e-06, "loss": 2.6174, "step": 589800 }, { "epoch": 0.15385024582140125, "grad_norm": 9.786165237426758, "learning_rate": 9.546733333315444e-06, "loss": 2.6019, "step": 590000 }, { "epoch": 0.1539023984471034, "grad_norm": 10.431953430175781, "learning_rate": 9.54638549013552e-06, "loss": 2.5921, "step": 590200 }, { "epoch": 0.1539545510728056, "grad_norm": 10.351963996887207, "learning_rate": 9.546037519878878e-06, "loss": 2.6084, "step": 590400 }, { "epoch": 0.15400670369850775, "grad_norm": 13.948197364807129, "learning_rate": 9.545689422555246e-06, "loss": 2.6002, "step": 590600 }, { "epoch": 0.15405885632420993, "grad_norm": 9.773366928100586, "learning_rate": 9.545341198174355e-06, "loss": 2.6003, "step": 590800 }, { "epoch": 0.1541110089499121, "grad_norm": 9.792441368103027, "learning_rate": 9.544992846745936e-06, "loss": 2.6275, "step": 591000 }, { "epoch": 0.15416316157561427, "grad_norm": 10.63184642791748, "learning_rate": 9.544644368279724e-06, "loss": 2.6105, "step": 591200 }, { "epoch": 0.15421531420131643, "grad_norm": 11.19427490234375, "learning_rate": 9.544295762785464e-06, "loss": 2.6435, "step": 591400 }, { "epoch": 0.1542674668270186, "grad_norm": 9.695633888244629, "learning_rate": 9.543947030272897e-06, "loss": 2.6084, "step": 591600 }, { "epoch": 0.15431961945272077, "grad_norm": 10.136941909790039, "learning_rate": 9.543598170751772e-06, "loss": 2.6144, "step": 591800 }, { "epoch": 0.15437177207842295, "grad_norm": 10.13197135925293, "learning_rate": 9.543249184231837e-06, "loss": 2.6069, "step": 592000 }, { "epoch": 0.1544239247041251, "grad_norm": 10.096322059631348, "learning_rate": 9.54290007072285e-06, "loss": 2.6032, "step": 592200 }, { "epoch": 0.1544760773298273, "grad_norm": 8.74548053741455, "learning_rate": 9.542550830234568e-06, "loss": 2.6174, "step": 592400 }, { "epoch": 0.15452822995552945, "grad_norm": 13.123319625854492, "learning_rate": 9.542201462776752e-06, "loss": 2.6112, "step": 592600 }, { "epoch": 0.15458038258123163, "grad_norm": 11.316987037658691, "learning_rate": 9.541851968359169e-06, "loss": 2.6042, "step": 592800 }, { "epoch": 0.1546325352069338, "grad_norm": 9.933026313781738, "learning_rate": 9.541502346991586e-06, "loss": 2.6165, "step": 593000 }, { "epoch": 0.15468468783263598, "grad_norm": 9.549524307250977, "learning_rate": 9.541152598683776e-06, "loss": 2.5907, "step": 593200 }, { "epoch": 0.15473684045833813, "grad_norm": 10.845429420471191, "learning_rate": 9.540802723445513e-06, "loss": 2.6287, "step": 593400 }, { "epoch": 0.15478899308404032, "grad_norm": 9.898306846618652, "learning_rate": 9.54045272128658e-06, "loss": 2.6095, "step": 593600 }, { "epoch": 0.15484114570974247, "grad_norm": 11.204236030578613, "learning_rate": 9.540102592216757e-06, "loss": 2.6506, "step": 593800 }, { "epoch": 0.15489329833544466, "grad_norm": 9.741256713867188, "learning_rate": 9.539752336245834e-06, "loss": 2.5979, "step": 594000 }, { "epoch": 0.1549454509611468, "grad_norm": 9.863458633422852, "learning_rate": 9.539401953383595e-06, "loss": 2.559, "step": 594200 }, { "epoch": 0.154997603586849, "grad_norm": 11.014803886413574, "learning_rate": 9.53905144363984e-06, "loss": 2.647, "step": 594400 }, { "epoch": 0.15504975621255115, "grad_norm": 10.70952033996582, "learning_rate": 9.538700807024363e-06, "loss": 2.6143, "step": 594600 }, { "epoch": 0.15510190883825334, "grad_norm": 10.817914962768555, "learning_rate": 9.538350043546965e-06, "loss": 2.5906, "step": 594800 }, { "epoch": 0.1551540614639555, "grad_norm": 10.918020248413086, "learning_rate": 9.53799915321745e-06, "loss": 2.6202, "step": 595000 }, { "epoch": 0.15520621408965765, "grad_norm": 9.967550277709961, "learning_rate": 9.537648136045626e-06, "loss": 2.6368, "step": 595200 }, { "epoch": 0.15525836671535984, "grad_norm": 10.808151245117188, "learning_rate": 9.537296992041303e-06, "loss": 2.6267, "step": 595400 }, { "epoch": 0.155310519341062, "grad_norm": 10.046856880187988, "learning_rate": 9.536945721214299e-06, "loss": 2.6376, "step": 595600 }, { "epoch": 0.15536267196676418, "grad_norm": 10.47192668914795, "learning_rate": 9.536594323574432e-06, "loss": 2.5965, "step": 595800 }, { "epoch": 0.15541482459246633, "grad_norm": 10.471516609191895, "learning_rate": 9.536242799131522e-06, "loss": 2.6248, "step": 596000 }, { "epoch": 0.15546697721816852, "grad_norm": 9.41662311553955, "learning_rate": 9.535891147895395e-06, "loss": 2.568, "step": 596200 }, { "epoch": 0.15551912984387067, "grad_norm": 10.728153228759766, "learning_rate": 9.53553936987588e-06, "loss": 2.615, "step": 596400 }, { "epoch": 0.15557128246957286, "grad_norm": 11.101585388183594, "learning_rate": 9.535187465082809e-06, "loss": 2.6044, "step": 596600 }, { "epoch": 0.15562343509527501, "grad_norm": 10.208956718444824, "learning_rate": 9.534835433526021e-06, "loss": 2.5869, "step": 596800 }, { "epoch": 0.1556755877209772, "grad_norm": 9.980835914611816, "learning_rate": 9.534483275215353e-06, "loss": 2.6418, "step": 597000 }, { "epoch": 0.15572774034667936, "grad_norm": 10.444533348083496, "learning_rate": 9.534130990160649e-06, "loss": 2.6149, "step": 597200 }, { "epoch": 0.15577989297238154, "grad_norm": 9.33448600769043, "learning_rate": 9.533778578371755e-06, "loss": 2.6071, "step": 597400 }, { "epoch": 0.1558320455980837, "grad_norm": 11.563689231872559, "learning_rate": 9.533426039858523e-06, "loss": 2.5906, "step": 597600 }, { "epoch": 0.15588419822378588, "grad_norm": 10.888169288635254, "learning_rate": 9.533073374630805e-06, "loss": 2.6418, "step": 597800 }, { "epoch": 0.15593635084948804, "grad_norm": 10.408832550048828, "learning_rate": 9.53272058269846e-06, "loss": 2.5685, "step": 598000 }, { "epoch": 0.15598850347519022, "grad_norm": 11.165427207946777, "learning_rate": 9.532367664071349e-06, "loss": 2.6015, "step": 598200 }, { "epoch": 0.15604065610089238, "grad_norm": 11.39417552947998, "learning_rate": 9.532014618759336e-06, "loss": 2.5874, "step": 598400 }, { "epoch": 0.15609280872659456, "grad_norm": 10.001264572143555, "learning_rate": 9.531661446772287e-06, "loss": 2.5927, "step": 598600 }, { "epoch": 0.15614496135229672, "grad_norm": 10.457130432128906, "learning_rate": 9.531308148120077e-06, "loss": 2.5764, "step": 598800 }, { "epoch": 0.1561971139779989, "grad_norm": 8.923357963562012, "learning_rate": 9.530954722812577e-06, "loss": 2.6262, "step": 599000 }, { "epoch": 0.15624926660370106, "grad_norm": 11.424574851989746, "learning_rate": 9.530601170859672e-06, "loss": 2.6137, "step": 599200 }, { "epoch": 0.15630141922940324, "grad_norm": 11.1590576171875, "learning_rate": 9.530247492271237e-06, "loss": 2.6372, "step": 599400 }, { "epoch": 0.1563535718551054, "grad_norm": 10.053206443786621, "learning_rate": 9.529893687057163e-06, "loss": 2.6275, "step": 599600 }, { "epoch": 0.15640572448080758, "grad_norm": 10.665011405944824, "learning_rate": 9.529539755227336e-06, "loss": 2.5987, "step": 599800 }, { "epoch": 0.15645787710650974, "grad_norm": 10.702035903930664, "learning_rate": 9.529185696791651e-06, "loss": 2.5779, "step": 600000 }, { "epoch": 0.15651002973221192, "grad_norm": 9.53780746459961, "learning_rate": 9.528831511760003e-06, "loss": 2.6361, "step": 600200 }, { "epoch": 0.15656218235791408, "grad_norm": 11.620895385742188, "learning_rate": 9.528477200142292e-06, "loss": 2.6155, "step": 600400 }, { "epoch": 0.15661433498361627, "grad_norm": 9.885120391845703, "learning_rate": 9.528122761948421e-06, "loss": 2.6552, "step": 600600 }, { "epoch": 0.15666648760931842, "grad_norm": 11.59696102142334, "learning_rate": 9.527768197188299e-06, "loss": 2.6134, "step": 600800 }, { "epoch": 0.15671864023502058, "grad_norm": 10.598244667053223, "learning_rate": 9.527413505871836e-06, "loss": 2.5894, "step": 601000 }, { "epoch": 0.15677079286072276, "grad_norm": 11.047648429870605, "learning_rate": 9.527058688008943e-06, "loss": 2.6413, "step": 601200 }, { "epoch": 0.15682294548642492, "grad_norm": 10.149199485778809, "learning_rate": 9.526703743609543e-06, "loss": 2.6164, "step": 601400 }, { "epoch": 0.1568750981121271, "grad_norm": 10.866960525512695, "learning_rate": 9.52634867268355e-06, "loss": 2.5793, "step": 601600 }, { "epoch": 0.15692725073782926, "grad_norm": 9.808792114257812, "learning_rate": 9.525993475240894e-06, "loss": 2.6329, "step": 601800 }, { "epoch": 0.15697940336353144, "grad_norm": 9.316394805908203, "learning_rate": 9.525638151291504e-06, "loss": 2.6382, "step": 602000 }, { "epoch": 0.1570315559892336, "grad_norm": 9.754195213317871, "learning_rate": 9.525282700845305e-06, "loss": 2.6143, "step": 602200 }, { "epoch": 0.15708370861493579, "grad_norm": 11.504837036132812, "learning_rate": 9.52492712391224e-06, "loss": 2.6187, "step": 602400 }, { "epoch": 0.15713586124063794, "grad_norm": 9.503823280334473, "learning_rate": 9.524571420502243e-06, "loss": 2.638, "step": 602600 }, { "epoch": 0.15718801386634013, "grad_norm": 11.267765998840332, "learning_rate": 9.524215590625258e-06, "loss": 2.6037, "step": 602800 }, { "epoch": 0.15724016649204228, "grad_norm": 12.685362815856934, "learning_rate": 9.52385963429123e-06, "loss": 2.5829, "step": 603000 }, { "epoch": 0.15729231911774447, "grad_norm": 9.462133407592773, "learning_rate": 9.523503551510112e-06, "loss": 2.6734, "step": 603200 }, { "epoch": 0.15734447174344662, "grad_norm": 10.139527320861816, "learning_rate": 9.52314734229185e-06, "loss": 2.6098, "step": 603400 }, { "epoch": 0.1573966243691488, "grad_norm": 10.855724334716797, "learning_rate": 9.522791006646407e-06, "loss": 2.5865, "step": 603600 }, { "epoch": 0.15744877699485096, "grad_norm": 10.368106842041016, "learning_rate": 9.522434544583738e-06, "loss": 2.621, "step": 603800 }, { "epoch": 0.15750092962055315, "grad_norm": 11.346565246582031, "learning_rate": 9.522077956113812e-06, "loss": 2.6346, "step": 604000 }, { "epoch": 0.1575530822462553, "grad_norm": 11.265953063964844, "learning_rate": 9.521721241246591e-06, "loss": 2.6319, "step": 604200 }, { "epoch": 0.1576052348719575, "grad_norm": 10.973451614379883, "learning_rate": 9.521364399992048e-06, "loss": 2.6154, "step": 604400 }, { "epoch": 0.15765738749765965, "grad_norm": 10.001646995544434, "learning_rate": 9.521007432360157e-06, "loss": 2.5999, "step": 604600 }, { "epoch": 0.15770954012336183, "grad_norm": 9.617718696594238, "learning_rate": 9.520650338360896e-06, "loss": 2.6654, "step": 604800 }, { "epoch": 0.157761692749064, "grad_norm": 9.889549255371094, "learning_rate": 9.520293118004243e-06, "loss": 2.6504, "step": 605000 }, { "epoch": 0.15781384537476617, "grad_norm": 12.019047737121582, "learning_rate": 9.519935771300188e-06, "loss": 2.619, "step": 605200 }, { "epoch": 0.15786599800046833, "grad_norm": 9.76209831237793, "learning_rate": 9.519578298258714e-06, "loss": 2.5828, "step": 605400 }, { "epoch": 0.1579181506261705, "grad_norm": 11.635560989379883, "learning_rate": 9.519220698889817e-06, "loss": 2.5842, "step": 605600 }, { "epoch": 0.15797030325187267, "grad_norm": 11.385933876037598, "learning_rate": 9.518862973203491e-06, "loss": 2.57, "step": 605800 }, { "epoch": 0.15802245587757485, "grad_norm": 10.346911430358887, "learning_rate": 9.518505121209733e-06, "loss": 2.6275, "step": 606000 }, { "epoch": 0.158074608503277, "grad_norm": 11.468281745910645, "learning_rate": 9.518147142918548e-06, "loss": 2.5972, "step": 606200 }, { "epoch": 0.1581267611289792, "grad_norm": 10.361491203308105, "learning_rate": 9.517789038339939e-06, "loss": 2.6139, "step": 606400 }, { "epoch": 0.15817891375468135, "grad_norm": 10.77645206451416, "learning_rate": 9.517430807483919e-06, "loss": 2.6024, "step": 606600 }, { "epoch": 0.15823106638038353, "grad_norm": 10.53165054321289, "learning_rate": 9.517072450360499e-06, "loss": 2.5576, "step": 606800 }, { "epoch": 0.1582832190060857, "grad_norm": 10.261857986450195, "learning_rate": 9.516713966979694e-06, "loss": 2.6202, "step": 607000 }, { "epoch": 0.15833537163178785, "grad_norm": 10.302119255065918, "learning_rate": 9.516355357351528e-06, "loss": 2.617, "step": 607200 }, { "epoch": 0.15838752425749003, "grad_norm": 10.450231552124023, "learning_rate": 9.515996621486021e-06, "loss": 2.6169, "step": 607400 }, { "epoch": 0.1584396768831922, "grad_norm": 11.078191757202148, "learning_rate": 9.515637759393201e-06, "loss": 2.6044, "step": 607600 }, { "epoch": 0.15849182950889437, "grad_norm": 11.261504173278809, "learning_rate": 9.515278771083098e-06, "loss": 2.6508, "step": 607800 }, { "epoch": 0.15854398213459653, "grad_norm": 11.22635555267334, "learning_rate": 9.514919656565749e-06, "loss": 2.595, "step": 608000 }, { "epoch": 0.1585961347602987, "grad_norm": 9.812448501586914, "learning_rate": 9.514560415851189e-06, "loss": 2.6281, "step": 608200 }, { "epoch": 0.15864828738600087, "grad_norm": 11.062207221984863, "learning_rate": 9.51420104894946e-06, "loss": 2.628, "step": 608400 }, { "epoch": 0.15870044001170305, "grad_norm": 11.523585319519043, "learning_rate": 9.513841555870607e-06, "loss": 2.6367, "step": 608600 }, { "epoch": 0.1587525926374052, "grad_norm": 9.39417839050293, "learning_rate": 9.513481936624677e-06, "loss": 2.63, "step": 608800 }, { "epoch": 0.1588047452631074, "grad_norm": 10.997912406921387, "learning_rate": 9.513122191221722e-06, "loss": 2.5634, "step": 609000 }, { "epoch": 0.15885689788880955, "grad_norm": 10.309552192687988, "learning_rate": 9.5127623196718e-06, "loss": 2.6526, "step": 609200 }, { "epoch": 0.15890905051451173, "grad_norm": 10.835392951965332, "learning_rate": 9.512402321984966e-06, "loss": 2.6135, "step": 609400 }, { "epoch": 0.1589612031402139, "grad_norm": 10.294236183166504, "learning_rate": 9.512042198171286e-06, "loss": 2.6536, "step": 609600 }, { "epoch": 0.15901335576591608, "grad_norm": 11.246005058288574, "learning_rate": 9.51168194824082e-06, "loss": 2.6102, "step": 609800 }, { "epoch": 0.15906550839161823, "grad_norm": 11.818621635437012, "learning_rate": 9.511321572203645e-06, "loss": 2.6259, "step": 610000 }, { "epoch": 0.15911766101732042, "grad_norm": 9.672310829162598, "learning_rate": 9.510961070069829e-06, "loss": 2.5676, "step": 610200 }, { "epoch": 0.15916981364302257, "grad_norm": 11.772770881652832, "learning_rate": 9.510600441849451e-06, "loss": 2.5989, "step": 610400 }, { "epoch": 0.15922196626872476, "grad_norm": 10.861141204833984, "learning_rate": 9.510239687552589e-06, "loss": 2.5797, "step": 610600 }, { "epoch": 0.1592741188944269, "grad_norm": 9.267687797546387, "learning_rate": 9.509878807189327e-06, "loss": 2.5731, "step": 610800 }, { "epoch": 0.1593262715201291, "grad_norm": 9.538235664367676, "learning_rate": 9.509517800769752e-06, "loss": 2.6158, "step": 611000 }, { "epoch": 0.15937842414583125, "grad_norm": 11.026472091674805, "learning_rate": 9.509156668303956e-06, "loss": 2.6308, "step": 611200 }, { "epoch": 0.15943057677153344, "grad_norm": 10.019219398498535, "learning_rate": 9.50879540980203e-06, "loss": 2.6177, "step": 611400 }, { "epoch": 0.1594827293972356, "grad_norm": 9.39755916595459, "learning_rate": 9.508434025274074e-06, "loss": 2.5825, "step": 611600 }, { "epoch": 0.15953488202293778, "grad_norm": 9.156355857849121, "learning_rate": 9.508072514730189e-06, "loss": 2.6348, "step": 611800 }, { "epoch": 0.15958703464863994, "grad_norm": 10.577462196350098, "learning_rate": 9.50771087818048e-06, "loss": 2.627, "step": 612000 }, { "epoch": 0.15963918727434212, "grad_norm": 10.779411315917969, "learning_rate": 9.507349115635053e-06, "loss": 2.6185, "step": 612200 }, { "epoch": 0.15969133990004428, "grad_norm": 12.37381362915039, "learning_rate": 9.506987227104021e-06, "loss": 2.6284, "step": 612400 }, { "epoch": 0.15974349252574646, "grad_norm": 9.256758689880371, "learning_rate": 9.5066252125975e-06, "loss": 2.6302, "step": 612600 }, { "epoch": 0.15979564515144862, "grad_norm": 10.987409591674805, "learning_rate": 9.506263072125607e-06, "loss": 2.5929, "step": 612800 }, { "epoch": 0.15984779777715077, "grad_norm": 9.691329002380371, "learning_rate": 9.505900805698468e-06, "loss": 2.6097, "step": 613000 }, { "epoch": 0.15989995040285296, "grad_norm": 10.040337562561035, "learning_rate": 9.505538413326203e-06, "loss": 2.6048, "step": 613200 }, { "epoch": 0.15995210302855511, "grad_norm": 8.556117057800293, "learning_rate": 9.505175895018946e-06, "loss": 2.5706, "step": 613400 }, { "epoch": 0.1600042556542573, "grad_norm": 11.223811149597168, "learning_rate": 9.504813250786826e-06, "loss": 2.5724, "step": 613600 }, { "epoch": 0.16005640827995946, "grad_norm": 9.269017219543457, "learning_rate": 9.504450480639984e-06, "loss": 2.6118, "step": 613800 }, { "epoch": 0.16010856090566164, "grad_norm": 10.954751968383789, "learning_rate": 9.504087584588554e-06, "loss": 2.622, "step": 614000 }, { "epoch": 0.1601607135313638, "grad_norm": 10.984915733337402, "learning_rate": 9.503724562642686e-06, "loss": 2.5626, "step": 614200 }, { "epoch": 0.16021286615706598, "grad_norm": 10.41535758972168, "learning_rate": 9.503361414812522e-06, "loss": 2.6036, "step": 614400 }, { "epoch": 0.16026501878276814, "grad_norm": 9.983470916748047, "learning_rate": 9.502998141108215e-06, "loss": 2.6439, "step": 614600 }, { "epoch": 0.16031717140847032, "grad_norm": 9.90550708770752, "learning_rate": 9.502634741539916e-06, "loss": 2.5825, "step": 614800 }, { "epoch": 0.16036932403417248, "grad_norm": 10.278912544250488, "learning_rate": 9.502271216117784e-06, "loss": 2.5804, "step": 615000 }, { "epoch": 0.16042147665987466, "grad_norm": 10.6985445022583, "learning_rate": 9.501907564851982e-06, "loss": 2.6131, "step": 615200 }, { "epoch": 0.16047362928557682, "grad_norm": 10.952655792236328, "learning_rate": 9.501543787752672e-06, "loss": 2.6165, "step": 615400 }, { "epoch": 0.160525781911279, "grad_norm": 10.143887519836426, "learning_rate": 9.501179884830021e-06, "loss": 2.6062, "step": 615600 }, { "epoch": 0.16057793453698116, "grad_norm": 11.375021934509277, "learning_rate": 9.500815856094204e-06, "loss": 2.6002, "step": 615800 }, { "epoch": 0.16063008716268334, "grad_norm": 10.76820182800293, "learning_rate": 9.500451701555394e-06, "loss": 2.5903, "step": 616000 }, { "epoch": 0.1606822397883855, "grad_norm": 9.969457626342773, "learning_rate": 9.500087421223769e-06, "loss": 2.6216, "step": 616200 }, { "epoch": 0.16073439241408768, "grad_norm": 11.117074012756348, "learning_rate": 9.499723015109514e-06, "loss": 2.6253, "step": 616400 }, { "epoch": 0.16078654503978984, "grad_norm": 10.098209381103516, "learning_rate": 9.499358483222809e-06, "loss": 2.6172, "step": 616600 }, { "epoch": 0.16083869766549203, "grad_norm": 10.3887939453125, "learning_rate": 9.49899382557385e-06, "loss": 2.619, "step": 616800 }, { "epoch": 0.16089085029119418, "grad_norm": 11.24229621887207, "learning_rate": 9.498629042172824e-06, "loss": 2.6362, "step": 617000 }, { "epoch": 0.16094300291689637, "grad_norm": 11.60924243927002, "learning_rate": 9.498264133029928e-06, "loss": 2.6756, "step": 617200 }, { "epoch": 0.16099515554259852, "grad_norm": 11.145500183105469, "learning_rate": 9.497899098155364e-06, "loss": 2.6094, "step": 617400 }, { "epoch": 0.1610473081683007, "grad_norm": 11.344988822937012, "learning_rate": 9.497533937559335e-06, "loss": 2.5854, "step": 617600 }, { "epoch": 0.16109946079400286, "grad_norm": 11.063643455505371, "learning_rate": 9.497168651252044e-06, "loss": 2.6057, "step": 617800 }, { "epoch": 0.16115161341970505, "grad_norm": 10.891105651855469, "learning_rate": 9.496803239243706e-06, "loss": 2.5996, "step": 618000 }, { "epoch": 0.1612037660454072, "grad_norm": 10.276857376098633, "learning_rate": 9.496437701544533e-06, "loss": 2.6236, "step": 618200 }, { "epoch": 0.1612559186711094, "grad_norm": 10.791783332824707, "learning_rate": 9.49607203816474e-06, "loss": 2.6193, "step": 618400 }, { "epoch": 0.16130807129681154, "grad_norm": 9.532054901123047, "learning_rate": 9.495706249114549e-06, "loss": 2.5948, "step": 618600 }, { "epoch": 0.1613602239225137, "grad_norm": 9.186354637145996, "learning_rate": 9.495340334404186e-06, "loss": 2.595, "step": 618800 }, { "epoch": 0.16141237654821589, "grad_norm": 9.525355339050293, "learning_rate": 9.494974294043878e-06, "loss": 2.551, "step": 619000 }, { "epoch": 0.16146452917391804, "grad_norm": 9.88534164428711, "learning_rate": 9.494608128043852e-06, "loss": 2.5548, "step": 619200 }, { "epoch": 0.16151668179962023, "grad_norm": 11.86941146850586, "learning_rate": 9.49424183641435e-06, "loss": 2.6126, "step": 619400 }, { "epoch": 0.16156883442532238, "grad_norm": 9.518510818481445, "learning_rate": 9.493875419165605e-06, "loss": 2.6217, "step": 619600 }, { "epoch": 0.16162098705102457, "grad_norm": 11.503427505493164, "learning_rate": 9.49350887630786e-06, "loss": 2.6139, "step": 619800 }, { "epoch": 0.16167313967672672, "grad_norm": 11.120110511779785, "learning_rate": 9.493142207851362e-06, "loss": 2.6086, "step": 620000 }, { "epoch": 0.1617252923024289, "grad_norm": 9.80033016204834, "learning_rate": 9.492775413806357e-06, "loss": 2.5888, "step": 620200 }, { "epoch": 0.16177744492813106, "grad_norm": 9.526128768920898, "learning_rate": 9.4924084941831e-06, "loss": 2.5898, "step": 620400 }, { "epoch": 0.16182959755383325, "grad_norm": 9.477727890014648, "learning_rate": 9.492041448991845e-06, "loss": 2.5997, "step": 620600 }, { "epoch": 0.1618817501795354, "grad_norm": 11.579855918884277, "learning_rate": 9.491674278242852e-06, "loss": 2.6183, "step": 620800 }, { "epoch": 0.1619339028052376, "grad_norm": 10.910771369934082, "learning_rate": 9.491306981946385e-06, "loss": 2.5857, "step": 621000 }, { "epoch": 0.16198605543093975, "grad_norm": 10.065752983093262, "learning_rate": 9.490939560112709e-06, "loss": 2.6177, "step": 621200 }, { "epoch": 0.16203820805664193, "grad_norm": 11.171086311340332, "learning_rate": 9.490572012752093e-06, "loss": 2.5936, "step": 621400 }, { "epoch": 0.1620903606823441, "grad_norm": 11.971680641174316, "learning_rate": 9.490204339874811e-06, "loss": 2.6105, "step": 621600 }, { "epoch": 0.16214251330804627, "grad_norm": 11.462523460388184, "learning_rate": 9.489836541491142e-06, "loss": 2.571, "step": 621800 }, { "epoch": 0.16219466593374843, "grad_norm": 10.30375862121582, "learning_rate": 9.489468617611363e-06, "loss": 2.6248, "step": 622000 }, { "epoch": 0.1622468185594506, "grad_norm": 12.754924774169922, "learning_rate": 9.48910056824576e-06, "loss": 2.626, "step": 622200 }, { "epoch": 0.16229897118515277, "grad_norm": 10.41318416595459, "learning_rate": 9.48873239340462e-06, "loss": 2.6197, "step": 622400 }, { "epoch": 0.16235112381085495, "grad_norm": 10.452898025512695, "learning_rate": 9.488364093098235e-06, "loss": 2.6034, "step": 622600 }, { "epoch": 0.1624032764365571, "grad_norm": 10.983317375183105, "learning_rate": 9.487995667336897e-06, "loss": 2.6084, "step": 622800 }, { "epoch": 0.1624554290622593, "grad_norm": 12.018777847290039, "learning_rate": 9.487627116130907e-06, "loss": 2.5987, "step": 623000 }, { "epoch": 0.16250758168796145, "grad_norm": 10.180075645446777, "learning_rate": 9.487258439490563e-06, "loss": 2.578, "step": 623200 }, { "epoch": 0.16255973431366363, "grad_norm": 10.785030364990234, "learning_rate": 9.486889637426171e-06, "loss": 2.5835, "step": 623400 }, { "epoch": 0.1626118869393658, "grad_norm": 12.28320026397705, "learning_rate": 9.486520709948042e-06, "loss": 2.5512, "step": 623600 }, { "epoch": 0.16266403956506797, "grad_norm": 10.83224868774414, "learning_rate": 9.486151657066485e-06, "loss": 2.621, "step": 623800 }, { "epoch": 0.16271619219077013, "grad_norm": 11.643034934997559, "learning_rate": 9.485782478791817e-06, "loss": 2.6352, "step": 624000 }, { "epoch": 0.16276834481647232, "grad_norm": 11.283828735351562, "learning_rate": 9.485413175134355e-06, "loss": 2.6173, "step": 624200 }, { "epoch": 0.16282049744217447, "grad_norm": 11.758881568908691, "learning_rate": 9.485043746104424e-06, "loss": 2.6289, "step": 624400 }, { "epoch": 0.16287265006787663, "grad_norm": 10.56377124786377, "learning_rate": 9.484674191712348e-06, "loss": 2.5545, "step": 624600 }, { "epoch": 0.1629248026935788, "grad_norm": 10.32989501953125, "learning_rate": 9.48430451196846e-06, "loss": 2.5994, "step": 624800 }, { "epoch": 0.16297695531928097, "grad_norm": 10.324919700622559, "learning_rate": 9.483934706883086e-06, "loss": 2.6167, "step": 625000 }, { "epoch": 0.16302910794498315, "grad_norm": 10.184969902038574, "learning_rate": 9.48356477646657e-06, "loss": 2.5771, "step": 625200 }, { "epoch": 0.1630812605706853, "grad_norm": 10.589273452758789, "learning_rate": 9.483194720729247e-06, "loss": 2.6033, "step": 625400 }, { "epoch": 0.1631334131963875, "grad_norm": 10.217822074890137, "learning_rate": 9.482824539681463e-06, "loss": 2.6012, "step": 625600 }, { "epoch": 0.16318556582208965, "grad_norm": 10.657920837402344, "learning_rate": 9.482454233333564e-06, "loss": 2.6132, "step": 625800 }, { "epoch": 0.16323771844779184, "grad_norm": 12.07504653930664, "learning_rate": 9.482083801695902e-06, "loss": 2.6208, "step": 626000 }, { "epoch": 0.163289871073494, "grad_norm": 10.118162155151367, "learning_rate": 9.481713244778829e-06, "loss": 2.6053, "step": 626200 }, { "epoch": 0.16334202369919618, "grad_norm": 10.543754577636719, "learning_rate": 9.481342562592702e-06, "loss": 2.6122, "step": 626400 }, { "epoch": 0.16339417632489833, "grad_norm": 11.980351448059082, "learning_rate": 9.480971755147884e-06, "loss": 2.5896, "step": 626600 }, { "epoch": 0.16344632895060052, "grad_norm": 9.149388313293457, "learning_rate": 9.48060082245474e-06, "loss": 2.62, "step": 626800 }, { "epoch": 0.16349848157630267, "grad_norm": 10.801803588867188, "learning_rate": 9.480229764523634e-06, "loss": 2.5989, "step": 627000 }, { "epoch": 0.16355063420200486, "grad_norm": 9.936863899230957, "learning_rate": 9.479858581364942e-06, "loss": 2.5892, "step": 627200 }, { "epoch": 0.16360278682770701, "grad_norm": 12.720370292663574, "learning_rate": 9.479487272989036e-06, "loss": 2.6159, "step": 627400 }, { "epoch": 0.1636549394534092, "grad_norm": 8.66869831085205, "learning_rate": 9.479115839406296e-06, "loss": 2.6255, "step": 627600 }, { "epoch": 0.16370709207911135, "grad_norm": 11.05100154876709, "learning_rate": 9.478744280627106e-06, "loss": 2.6127, "step": 627800 }, { "epoch": 0.16375924470481354, "grad_norm": 11.327431678771973, "learning_rate": 9.478372596661847e-06, "loss": 2.6414, "step": 628000 }, { "epoch": 0.1638113973305157, "grad_norm": 10.757742881774902, "learning_rate": 9.47800078752091e-06, "loss": 2.6034, "step": 628200 }, { "epoch": 0.16386354995621788, "grad_norm": 9.70084285736084, "learning_rate": 9.477628853214689e-06, "loss": 2.582, "step": 628400 }, { "epoch": 0.16391570258192004, "grad_norm": 11.229771614074707, "learning_rate": 9.477256793753578e-06, "loss": 2.6069, "step": 628600 }, { "epoch": 0.16396785520762222, "grad_norm": 10.731244087219238, "learning_rate": 9.476884609147976e-06, "loss": 2.5613, "step": 628800 }, { "epoch": 0.16402000783332438, "grad_norm": 9.405749320983887, "learning_rate": 9.476512299408287e-06, "loss": 2.5655, "step": 629000 }, { "epoch": 0.16407216045902656, "grad_norm": 12.480544090270996, "learning_rate": 9.47613986454492e-06, "loss": 2.6047, "step": 629200 }, { "epoch": 0.16412431308472872, "grad_norm": 10.722823143005371, "learning_rate": 9.47576730456828e-06, "loss": 2.5775, "step": 629400 }, { "epoch": 0.1641764657104309, "grad_norm": 10.385933876037598, "learning_rate": 9.475394619488785e-06, "loss": 2.6002, "step": 629600 }, { "epoch": 0.16422861833613306, "grad_norm": 8.670275688171387, "learning_rate": 9.47502180931685e-06, "loss": 2.552, "step": 629800 }, { "epoch": 0.16428077096183524, "grad_norm": 11.322480201721191, "learning_rate": 9.474648874062894e-06, "loss": 2.6183, "step": 630000 }, { "epoch": 0.1643329235875374, "grad_norm": 10.096075057983398, "learning_rate": 9.474275813737344e-06, "loss": 2.582, "step": 630200 }, { "epoch": 0.16438507621323958, "grad_norm": 10.714835166931152, "learning_rate": 9.473902628350624e-06, "loss": 2.6248, "step": 630400 }, { "epoch": 0.16443722883894174, "grad_norm": 9.770901679992676, "learning_rate": 9.473529317913169e-06, "loss": 2.6214, "step": 630600 }, { "epoch": 0.1644893814646439, "grad_norm": 11.455342292785645, "learning_rate": 9.47315588243541e-06, "loss": 2.5787, "step": 630800 }, { "epoch": 0.16454153409034608, "grad_norm": 11.288113594055176, "learning_rate": 9.472782321927786e-06, "loss": 2.6009, "step": 631000 }, { "epoch": 0.16459368671604824, "grad_norm": 9.5037260055542, "learning_rate": 9.472408636400742e-06, "loss": 2.579, "step": 631200 }, { "epoch": 0.16464583934175042, "grad_norm": 11.143527030944824, "learning_rate": 9.472034825864718e-06, "loss": 2.585, "step": 631400 }, { "epoch": 0.16469799196745258, "grad_norm": 10.50413703918457, "learning_rate": 9.471660890330163e-06, "loss": 2.5971, "step": 631600 }, { "epoch": 0.16475014459315476, "grad_norm": 11.185114860534668, "learning_rate": 9.471286829807531e-06, "loss": 2.6326, "step": 631800 }, { "epoch": 0.16480229721885692, "grad_norm": 10.260226249694824, "learning_rate": 9.470912644307276e-06, "loss": 2.6244, "step": 632000 }, { "epoch": 0.1648544498445591, "grad_norm": 11.488462448120117, "learning_rate": 9.470538333839858e-06, "loss": 2.598, "step": 632200 }, { "epoch": 0.16490660247026126, "grad_norm": 11.961732864379883, "learning_rate": 9.470163898415738e-06, "loss": 2.5907, "step": 632400 }, { "epoch": 0.16495875509596344, "grad_norm": 12.416627883911133, "learning_rate": 9.469789338045383e-06, "loss": 2.5822, "step": 632600 }, { "epoch": 0.1650109077216656, "grad_norm": 10.282339096069336, "learning_rate": 9.469414652739262e-06, "loss": 2.5707, "step": 632800 }, { "epoch": 0.16506306034736778, "grad_norm": 10.362985610961914, "learning_rate": 9.46903984250785e-06, "loss": 2.6038, "step": 633000 }, { "epoch": 0.16511521297306994, "grad_norm": 11.942998886108398, "learning_rate": 9.468664907361618e-06, "loss": 2.6165, "step": 633200 }, { "epoch": 0.16516736559877213, "grad_norm": 11.504192352294922, "learning_rate": 9.468289847311052e-06, "loss": 2.6164, "step": 633400 }, { "epoch": 0.16521951822447428, "grad_norm": 11.131896018981934, "learning_rate": 9.467914662366632e-06, "loss": 2.5774, "step": 633600 }, { "epoch": 0.16527167085017647, "grad_norm": 10.091717720031738, "learning_rate": 9.467539352538845e-06, "loss": 2.6165, "step": 633800 }, { "epoch": 0.16532382347587862, "grad_norm": 9.936517715454102, "learning_rate": 9.467163917838183e-06, "loss": 2.5712, "step": 634000 }, { "epoch": 0.1653759761015808, "grad_norm": 11.358585357666016, "learning_rate": 9.466788358275136e-06, "loss": 2.5891, "step": 634200 }, { "epoch": 0.16542812872728296, "grad_norm": 10.875038146972656, "learning_rate": 9.466412673860206e-06, "loss": 2.6178, "step": 634400 }, { "epoch": 0.16548028135298515, "grad_norm": 10.750335693359375, "learning_rate": 9.466036864603893e-06, "loss": 2.6026, "step": 634600 }, { "epoch": 0.1655324339786873, "grad_norm": 9.322654724121094, "learning_rate": 9.465660930516698e-06, "loss": 2.5806, "step": 634800 }, { "epoch": 0.1655845866043895, "grad_norm": 11.006738662719727, "learning_rate": 9.465284871609132e-06, "loss": 2.5799, "step": 635000 }, { "epoch": 0.16563673923009165, "grad_norm": 11.173583030700684, "learning_rate": 9.464908687891704e-06, "loss": 2.6159, "step": 635200 }, { "epoch": 0.16568889185579383, "grad_norm": 9.691333770751953, "learning_rate": 9.46453237937493e-06, "loss": 2.5984, "step": 635400 }, { "epoch": 0.16574104448149599, "grad_norm": 10.239128112792969, "learning_rate": 9.464155946069329e-06, "loss": 2.6037, "step": 635600 }, { "epoch": 0.16579319710719817, "grad_norm": 11.106518745422363, "learning_rate": 9.463779387985422e-06, "loss": 2.5693, "step": 635800 }, { "epoch": 0.16584534973290033, "grad_norm": 11.097664833068848, "learning_rate": 9.463402705133735e-06, "loss": 2.567, "step": 636000 }, { "epoch": 0.1658975023586025, "grad_norm": 10.165889739990234, "learning_rate": 9.463025897524794e-06, "loss": 2.6039, "step": 636200 }, { "epoch": 0.16594965498430467, "grad_norm": 10.918819427490234, "learning_rate": 9.462648965169133e-06, "loss": 2.6199, "step": 636400 }, { "epoch": 0.16600180761000682, "grad_norm": 10.222068786621094, "learning_rate": 9.46227190807729e-06, "loss": 2.5954, "step": 636600 }, { "epoch": 0.166053960235709, "grad_norm": 10.973430633544922, "learning_rate": 9.4618947262598e-06, "loss": 2.5924, "step": 636800 }, { "epoch": 0.16610611286141116, "grad_norm": 12.050614356994629, "learning_rate": 9.461517419727208e-06, "loss": 2.5885, "step": 637000 }, { "epoch": 0.16615826548711335, "grad_norm": 10.913813591003418, "learning_rate": 9.46113998849006e-06, "loss": 2.5929, "step": 637200 }, { "epoch": 0.1662104181128155, "grad_norm": 11.964225769042969, "learning_rate": 9.460762432558905e-06, "loss": 2.5462, "step": 637400 }, { "epoch": 0.1662625707385177, "grad_norm": 12.226649284362793, "learning_rate": 9.460384751944298e-06, "loss": 2.5944, "step": 637600 }, { "epoch": 0.16631472336421985, "grad_norm": 11.865588188171387, "learning_rate": 9.460006946656794e-06, "loss": 2.5778, "step": 637800 }, { "epoch": 0.16636687598992203, "grad_norm": 10.3606538772583, "learning_rate": 9.459629016706952e-06, "loss": 2.5963, "step": 638000 }, { "epoch": 0.1664190286156242, "grad_norm": 9.922835350036621, "learning_rate": 9.459250962105337e-06, "loss": 2.606, "step": 638200 }, { "epoch": 0.16647118124132637, "grad_norm": 11.583536148071289, "learning_rate": 9.458872782862516e-06, "loss": 2.5747, "step": 638400 }, { "epoch": 0.16652333386702853, "grad_norm": 9.941807746887207, "learning_rate": 9.458494478989059e-06, "loss": 2.5838, "step": 638600 }, { "epoch": 0.1665754864927307, "grad_norm": 11.269411087036133, "learning_rate": 9.458116050495543e-06, "loss": 2.6033, "step": 638800 }, { "epoch": 0.16662763911843287, "grad_norm": 11.465824127197266, "learning_rate": 9.457737497392541e-06, "loss": 2.5794, "step": 639000 }, { "epoch": 0.16667979174413505, "grad_norm": 11.089844703674316, "learning_rate": 9.457358819690636e-06, "loss": 2.591, "step": 639200 }, { "epoch": 0.1667319443698372, "grad_norm": 11.80525016784668, "learning_rate": 9.456980017400413e-06, "loss": 2.5967, "step": 639400 }, { "epoch": 0.1667840969955394, "grad_norm": 11.84639835357666, "learning_rate": 9.456601090532458e-06, "loss": 2.5895, "step": 639600 }, { "epoch": 0.16683624962124155, "grad_norm": 10.74103832244873, "learning_rate": 9.456222039097365e-06, "loss": 2.5974, "step": 639800 }, { "epoch": 0.16688840224694373, "grad_norm": 11.63347053527832, "learning_rate": 9.455842863105728e-06, "loss": 2.6094, "step": 640000 }, { "epoch": 0.1669405548726459, "grad_norm": 10.795598983764648, "learning_rate": 9.455463562568143e-06, "loss": 2.585, "step": 640200 }, { "epoch": 0.16699270749834808, "grad_norm": 10.217510223388672, "learning_rate": 9.455084137495216e-06, "loss": 2.6029, "step": 640400 }, { "epoch": 0.16704486012405023, "grad_norm": 11.189576148986816, "learning_rate": 9.454704587897553e-06, "loss": 2.6158, "step": 640600 }, { "epoch": 0.16709701274975242, "grad_norm": 12.054612159729004, "learning_rate": 9.454324913785756e-06, "loss": 2.5981, "step": 640800 }, { "epoch": 0.16714916537545457, "grad_norm": 10.436782836914062, "learning_rate": 9.453945115170444e-06, "loss": 2.5662, "step": 641000 }, { "epoch": 0.16720131800115676, "grad_norm": 8.611639022827148, "learning_rate": 9.453565192062228e-06, "loss": 2.5866, "step": 641200 }, { "epoch": 0.1672534706268589, "grad_norm": 12.085579872131348, "learning_rate": 9.453185144471732e-06, "loss": 2.5935, "step": 641400 }, { "epoch": 0.1673056232525611, "grad_norm": 12.087554931640625, "learning_rate": 9.452804972409576e-06, "loss": 2.6145, "step": 641600 }, { "epoch": 0.16735777587826325, "grad_norm": 11.656718254089355, "learning_rate": 9.452424675886386e-06, "loss": 2.6006, "step": 641800 }, { "epoch": 0.16740992850396544, "grad_norm": 9.5364351272583, "learning_rate": 9.452044254912794e-06, "loss": 2.638, "step": 642000 }, { "epoch": 0.1674620811296676, "grad_norm": 9.897908210754395, "learning_rate": 9.45166370949943e-06, "loss": 2.5881, "step": 642200 }, { "epoch": 0.16751423375536975, "grad_norm": 10.919352531433105, "learning_rate": 9.451283039656933e-06, "loss": 2.5761, "step": 642400 }, { "epoch": 0.16756638638107194, "grad_norm": 11.361388206481934, "learning_rate": 9.450902245395943e-06, "loss": 2.6329, "step": 642600 }, { "epoch": 0.1676185390067741, "grad_norm": 11.634418487548828, "learning_rate": 9.450521326727104e-06, "loss": 2.5698, "step": 642800 }, { "epoch": 0.16767069163247628, "grad_norm": 11.556270599365234, "learning_rate": 9.45014028366106e-06, "loss": 2.5883, "step": 643000 }, { "epoch": 0.16772284425817843, "grad_norm": 11.184852600097656, "learning_rate": 9.449759116208465e-06, "loss": 2.6441, "step": 643200 }, { "epoch": 0.16777499688388062, "grad_norm": 10.851284980773926, "learning_rate": 9.449377824379973e-06, "loss": 2.5469, "step": 643400 }, { "epoch": 0.16782714950958277, "grad_norm": 11.684367179870605, "learning_rate": 9.448996408186239e-06, "loss": 2.6165, "step": 643600 }, { "epoch": 0.16787930213528496, "grad_norm": 11.91071891784668, "learning_rate": 9.448614867637926e-06, "loss": 2.6167, "step": 643800 }, { "epoch": 0.16793145476098711, "grad_norm": 10.489933013916016, "learning_rate": 9.448233202745699e-06, "loss": 2.5944, "step": 644000 }, { "epoch": 0.1679836073866893, "grad_norm": 9.974836349487305, "learning_rate": 9.447851413520222e-06, "loss": 2.5532, "step": 644200 }, { "epoch": 0.16803576001239146, "grad_norm": 12.725610733032227, "learning_rate": 9.447469499972172e-06, "loss": 2.5883, "step": 644400 }, { "epoch": 0.16808791263809364, "grad_norm": 10.390010833740234, "learning_rate": 9.447087462112222e-06, "loss": 2.6363, "step": 644600 }, { "epoch": 0.1681400652637958, "grad_norm": 11.040779113769531, "learning_rate": 9.446705299951047e-06, "loss": 2.5922, "step": 644800 }, { "epoch": 0.16819221788949798, "grad_norm": 10.456809997558594, "learning_rate": 9.446323013499333e-06, "loss": 2.587, "step": 645000 }, { "epoch": 0.16824437051520014, "grad_norm": 12.05203628540039, "learning_rate": 9.445940602767764e-06, "loss": 2.6597, "step": 645200 }, { "epoch": 0.16829652314090232, "grad_norm": 12.002357482910156, "learning_rate": 9.445558067767028e-06, "loss": 2.6082, "step": 645400 }, { "epoch": 0.16834867576660448, "grad_norm": 11.387803077697754, "learning_rate": 9.445175408507818e-06, "loss": 2.6126, "step": 645600 }, { "epoch": 0.16840082839230666, "grad_norm": 11.261042594909668, "learning_rate": 9.444792625000832e-06, "loss": 2.6011, "step": 645800 }, { "epoch": 0.16845298101800882, "grad_norm": 10.792635917663574, "learning_rate": 9.444409717256766e-06, "loss": 2.6017, "step": 646000 }, { "epoch": 0.168505133643711, "grad_norm": 10.310630798339844, "learning_rate": 9.444026685286323e-06, "loss": 2.6158, "step": 646200 }, { "epoch": 0.16855728626941316, "grad_norm": 11.336012840270996, "learning_rate": 9.443643529100211e-06, "loss": 2.5637, "step": 646400 }, { "epoch": 0.16860943889511534, "grad_norm": 10.27746295928955, "learning_rate": 9.443260248709138e-06, "loss": 2.5942, "step": 646600 }, { "epoch": 0.1686615915208175, "grad_norm": 11.652708053588867, "learning_rate": 9.442876844123818e-06, "loss": 2.5802, "step": 646800 }, { "epoch": 0.16871374414651968, "grad_norm": 10.635597229003906, "learning_rate": 9.442493315354966e-06, "loss": 2.6022, "step": 647000 }, { "epoch": 0.16876589677222184, "grad_norm": 12.215426445007324, "learning_rate": 9.442109662413306e-06, "loss": 2.603, "step": 647200 }, { "epoch": 0.16881804939792402, "grad_norm": 10.632146835327148, "learning_rate": 9.441725885309558e-06, "loss": 2.5967, "step": 647400 }, { "epoch": 0.16887020202362618, "grad_norm": 12.226157188415527, "learning_rate": 9.441341984054448e-06, "loss": 2.6191, "step": 647600 }, { "epoch": 0.16892235464932837, "grad_norm": 11.939155578613281, "learning_rate": 9.440957958658712e-06, "loss": 2.5977, "step": 647800 }, { "epoch": 0.16897450727503052, "grad_norm": 11.38477611541748, "learning_rate": 9.440573809133078e-06, "loss": 2.6372, "step": 648000 }, { "epoch": 0.16902665990073268, "grad_norm": 10.353914260864258, "learning_rate": 9.440189535488286e-06, "loss": 2.6133, "step": 648200 }, { "epoch": 0.16907881252643486, "grad_norm": 10.853445053100586, "learning_rate": 9.439805137735078e-06, "loss": 2.5439, "step": 648400 }, { "epoch": 0.16913096515213702, "grad_norm": 10.450331687927246, "learning_rate": 9.439420615884196e-06, "loss": 2.6195, "step": 648600 }, { "epoch": 0.1691831177778392, "grad_norm": 11.067255973815918, "learning_rate": 9.43903596994639e-06, "loss": 2.5792, "step": 648800 }, { "epoch": 0.16923527040354136, "grad_norm": 10.674095153808594, "learning_rate": 9.43865119993241e-06, "loss": 2.5886, "step": 649000 }, { "epoch": 0.16928742302924354, "grad_norm": 11.1282377243042, "learning_rate": 9.43826630585301e-06, "loss": 2.5493, "step": 649200 }, { "epoch": 0.1693395756549457, "grad_norm": 11.285887718200684, "learning_rate": 9.43788128771895e-06, "loss": 2.5827, "step": 649400 }, { "epoch": 0.16939172828064789, "grad_norm": 12.13648509979248, "learning_rate": 9.43749614554099e-06, "loss": 2.589, "step": 649600 }, { "epoch": 0.16944388090635004, "grad_norm": 11.782530784606934, "learning_rate": 9.437110879329897e-06, "loss": 2.5266, "step": 649800 }, { "epoch": 0.16949603353205223, "grad_norm": 10.601338386535645, "learning_rate": 9.436725489096438e-06, "loss": 2.6385, "step": 650000 }, { "epoch": 0.16954818615775438, "grad_norm": 10.485753059387207, "learning_rate": 9.436339974851388e-06, "loss": 2.5673, "step": 650200 }, { "epoch": 0.16960033878345657, "grad_norm": 10.410168647766113, "learning_rate": 9.435954336605518e-06, "loss": 2.6, "step": 650400 }, { "epoch": 0.16965249140915872, "grad_norm": 11.205649375915527, "learning_rate": 9.43556857436961e-06, "loss": 2.5582, "step": 650600 }, { "epoch": 0.1697046440348609, "grad_norm": 11.990690231323242, "learning_rate": 9.435182688154447e-06, "loss": 2.6067, "step": 650800 }, { "epoch": 0.16975679666056306, "grad_norm": 12.044350624084473, "learning_rate": 9.434796677970813e-06, "loss": 2.5565, "step": 651000 }, { "epoch": 0.16980894928626525, "grad_norm": 12.324272155761719, "learning_rate": 9.434410543829497e-06, "loss": 2.5784, "step": 651200 }, { "epoch": 0.1698611019119674, "grad_norm": 10.339320182800293, "learning_rate": 9.434024285741295e-06, "loss": 2.5863, "step": 651400 }, { "epoch": 0.1699132545376696, "grad_norm": 10.739986419677734, "learning_rate": 9.433637903717e-06, "loss": 2.6013, "step": 651600 }, { "epoch": 0.16996540716337175, "grad_norm": 11.532021522521973, "learning_rate": 9.433251397767415e-06, "loss": 2.5864, "step": 651800 }, { "epoch": 0.17001755978907393, "grad_norm": 11.322754859924316, "learning_rate": 9.432864767903342e-06, "loss": 2.5692, "step": 652000 }, { "epoch": 0.1700697124147761, "grad_norm": 10.201240539550781, "learning_rate": 9.432478014135587e-06, "loss": 2.579, "step": 652200 }, { "epoch": 0.17012186504047827, "grad_norm": 10.831350326538086, "learning_rate": 9.432091136474961e-06, "loss": 2.5776, "step": 652400 }, { "epoch": 0.17017401766618043, "grad_norm": 10.44613265991211, "learning_rate": 9.431704134932276e-06, "loss": 2.5915, "step": 652600 }, { "epoch": 0.1702261702918826, "grad_norm": 10.422142028808594, "learning_rate": 9.431317009518352e-06, "loss": 2.5365, "step": 652800 }, { "epoch": 0.17027832291758477, "grad_norm": 12.878448486328125, "learning_rate": 9.430929760244007e-06, "loss": 2.6059, "step": 653000 }, { "epoch": 0.17033047554328695, "grad_norm": 10.190254211425781, "learning_rate": 9.430542387120068e-06, "loss": 2.5466, "step": 653200 }, { "epoch": 0.1703826281689891, "grad_norm": 11.413830757141113, "learning_rate": 9.430154890157359e-06, "loss": 2.5667, "step": 653400 }, { "epoch": 0.1704347807946913, "grad_norm": 9.806634902954102, "learning_rate": 9.429767269366712e-06, "loss": 2.5951, "step": 653600 }, { "epoch": 0.17048693342039345, "grad_norm": 11.987215042114258, "learning_rate": 9.429379524758963e-06, "loss": 2.5993, "step": 653800 }, { "epoch": 0.1705390860460956, "grad_norm": 11.944783210754395, "learning_rate": 9.42899165634495e-06, "loss": 2.6648, "step": 654000 }, { "epoch": 0.1705912386717978, "grad_norm": 10.974967956542969, "learning_rate": 9.428603664135511e-06, "loss": 2.5808, "step": 654200 }, { "epoch": 0.17064339129749995, "grad_norm": 11.658656120300293, "learning_rate": 9.428215548141495e-06, "loss": 2.5701, "step": 654400 }, { "epoch": 0.17069554392320213, "grad_norm": 10.598328590393066, "learning_rate": 9.427827308373747e-06, "loss": 2.6166, "step": 654600 }, { "epoch": 0.1707476965489043, "grad_norm": 11.660055160522461, "learning_rate": 9.427438944843121e-06, "loss": 2.6142, "step": 654800 }, { "epoch": 0.17079984917460647, "grad_norm": 11.008819580078125, "learning_rate": 9.427050457560472e-06, "loss": 2.6348, "step": 655000 }, { "epoch": 0.17085200180030863, "grad_norm": 10.61733341217041, "learning_rate": 9.42666184653666e-06, "loss": 2.6307, "step": 655200 }, { "epoch": 0.1709041544260108, "grad_norm": 10.868151664733887, "learning_rate": 9.426273111782543e-06, "loss": 2.5962, "step": 655400 }, { "epoch": 0.17095630705171297, "grad_norm": 11.051012992858887, "learning_rate": 9.425884253308988e-06, "loss": 2.625, "step": 655600 }, { "epoch": 0.17100845967741515, "grad_norm": 11.177716255187988, "learning_rate": 9.425495271126865e-06, "loss": 2.5808, "step": 655800 }, { "epoch": 0.1710606123031173, "grad_norm": 10.845020294189453, "learning_rate": 9.425106165247048e-06, "loss": 2.5558, "step": 656000 }, { "epoch": 0.1711127649288195, "grad_norm": 10.903834342956543, "learning_rate": 9.42471693568041e-06, "loss": 2.6147, "step": 656200 }, { "epoch": 0.17116491755452165, "grad_norm": 9.43630599975586, "learning_rate": 9.424327582437833e-06, "loss": 2.6235, "step": 656400 }, { "epoch": 0.17121707018022383, "grad_norm": 10.715693473815918, "learning_rate": 9.423938105530197e-06, "loss": 2.6456, "step": 656600 }, { "epoch": 0.171269222805926, "grad_norm": 10.508771896362305, "learning_rate": 9.423548504968392e-06, "loss": 2.578, "step": 656800 }, { "epoch": 0.17132137543162818, "grad_norm": 10.303911209106445, "learning_rate": 9.423158780763304e-06, "loss": 2.5929, "step": 657000 }, { "epoch": 0.17137352805733033, "grad_norm": 11.929397583007812, "learning_rate": 9.422768932925828e-06, "loss": 2.5516, "step": 657200 }, { "epoch": 0.17142568068303252, "grad_norm": 11.60670280456543, "learning_rate": 9.422378961466863e-06, "loss": 2.6122, "step": 657400 }, { "epoch": 0.17147783330873467, "grad_norm": 10.905845642089844, "learning_rate": 9.421988866397304e-06, "loss": 2.5991, "step": 657600 }, { "epoch": 0.17152998593443686, "grad_norm": 9.888165473937988, "learning_rate": 9.421598647728059e-06, "loss": 2.5957, "step": 657800 }, { "epoch": 0.171582138560139, "grad_norm": 10.218853950500488, "learning_rate": 9.421208305470033e-06, "loss": 2.5762, "step": 658000 }, { "epoch": 0.1716342911858412, "grad_norm": 10.51092529296875, "learning_rate": 9.420817839634138e-06, "loss": 2.6286, "step": 658200 }, { "epoch": 0.17168644381154335, "grad_norm": 12.242321968078613, "learning_rate": 9.420427250231286e-06, "loss": 2.5824, "step": 658400 }, { "epoch": 0.17173859643724554, "grad_norm": 9.587675094604492, "learning_rate": 9.420036537272395e-06, "loss": 2.6159, "step": 658600 }, { "epoch": 0.1717907490629477, "grad_norm": 12.193526268005371, "learning_rate": 9.419645700768386e-06, "loss": 2.5814, "step": 658800 }, { "epoch": 0.17184290168864988, "grad_norm": 13.005501747131348, "learning_rate": 9.419254740730184e-06, "loss": 2.6249, "step": 659000 }, { "epoch": 0.17189505431435204, "grad_norm": 12.042132377624512, "learning_rate": 9.418863657168717e-06, "loss": 2.592, "step": 659200 }, { "epoch": 0.17194720694005422, "grad_norm": 11.057788848876953, "learning_rate": 9.418472450094915e-06, "loss": 2.6023, "step": 659400 }, { "epoch": 0.17199935956575638, "grad_norm": 11.42861557006836, "learning_rate": 9.418081119519715e-06, "loss": 2.6074, "step": 659600 }, { "epoch": 0.17205151219145856, "grad_norm": 10.534041404724121, "learning_rate": 9.417689665454052e-06, "loss": 2.6056, "step": 659800 }, { "epoch": 0.17210366481716072, "grad_norm": 10.17191219329834, "learning_rate": 9.417298087908869e-06, "loss": 2.5732, "step": 660000 }, { "epoch": 0.17215581744286287, "grad_norm": 10.42569351196289, "learning_rate": 9.41690638689511e-06, "loss": 2.5551, "step": 660200 }, { "epoch": 0.17220797006856506, "grad_norm": 10.62281608581543, "learning_rate": 9.416514562423726e-06, "loss": 2.5706, "step": 660400 }, { "epoch": 0.17226012269426721, "grad_norm": 10.042311668395996, "learning_rate": 9.416122614505669e-06, "loss": 2.5726, "step": 660600 }, { "epoch": 0.1723122753199694, "grad_norm": 11.639966011047363, "learning_rate": 9.41573054315189e-06, "loss": 2.6004, "step": 660800 }, { "epoch": 0.17236442794567156, "grad_norm": 12.120760917663574, "learning_rate": 9.415338348373354e-06, "loss": 2.5984, "step": 661000 }, { "epoch": 0.17241658057137374, "grad_norm": 10.926827430725098, "learning_rate": 9.414946030181018e-06, "loss": 2.5645, "step": 661200 }, { "epoch": 0.1724687331970759, "grad_norm": 12.61771011352539, "learning_rate": 9.414553588585849e-06, "loss": 2.6064, "step": 661400 }, { "epoch": 0.17252088582277808, "grad_norm": 11.024273872375488, "learning_rate": 9.41416102359882e-06, "loss": 2.6001, "step": 661600 }, { "epoch": 0.17257303844848024, "grad_norm": 12.414682388305664, "learning_rate": 9.413768335230897e-06, "loss": 2.5993, "step": 661800 }, { "epoch": 0.17262519107418242, "grad_norm": 12.029796600341797, "learning_rate": 9.413375523493062e-06, "loss": 2.542, "step": 662000 }, { "epoch": 0.17267734369988458, "grad_norm": 10.11413288116455, "learning_rate": 9.412982588396292e-06, "loss": 2.5833, "step": 662200 }, { "epoch": 0.17272949632558676, "grad_norm": 11.750227928161621, "learning_rate": 9.41258952995157e-06, "loss": 2.5748, "step": 662400 }, { "epoch": 0.17278164895128892, "grad_norm": 10.92453670501709, "learning_rate": 9.412196348169883e-06, "loss": 2.5841, "step": 662600 }, { "epoch": 0.1728338015769911, "grad_norm": 11.723861694335938, "learning_rate": 9.411803043062222e-06, "loss": 2.5925, "step": 662800 }, { "epoch": 0.17288595420269326, "grad_norm": 10.404121398925781, "learning_rate": 9.411409614639576e-06, "loss": 2.5609, "step": 663000 }, { "epoch": 0.17293810682839544, "grad_norm": 10.455796241760254, "learning_rate": 9.411016062912946e-06, "loss": 2.6097, "step": 663200 }, { "epoch": 0.1729902594540976, "grad_norm": 10.422048568725586, "learning_rate": 9.410622387893332e-06, "loss": 2.6332, "step": 663400 }, { "epoch": 0.17304241207979978, "grad_norm": 10.72389030456543, "learning_rate": 9.410228589591735e-06, "loss": 2.5613, "step": 663600 }, { "epoch": 0.17309456470550194, "grad_norm": 10.777589797973633, "learning_rate": 9.409834668019165e-06, "loss": 2.56, "step": 663800 }, { "epoch": 0.17314671733120413, "grad_norm": 11.299818992614746, "learning_rate": 9.40944062318663e-06, "loss": 2.6022, "step": 664000 }, { "epoch": 0.17319886995690628, "grad_norm": 12.202249526977539, "learning_rate": 9.409046455105146e-06, "loss": 2.6085, "step": 664200 }, { "epoch": 0.17325102258260847, "grad_norm": 10.913101196289062, "learning_rate": 9.40865216378573e-06, "loss": 2.5667, "step": 664400 }, { "epoch": 0.17330317520831062, "grad_norm": 12.865901947021484, "learning_rate": 9.408257749239402e-06, "loss": 2.6148, "step": 664600 }, { "epoch": 0.1733553278340128, "grad_norm": 11.728925704956055, "learning_rate": 9.407863211477189e-06, "loss": 2.5964, "step": 664800 }, { "epoch": 0.17340748045971496, "grad_norm": 10.57736587524414, "learning_rate": 9.407468550510114e-06, "loss": 2.5577, "step": 665000 }, { "epoch": 0.17345963308541715, "grad_norm": 11.458696365356445, "learning_rate": 9.407073766349213e-06, "loss": 2.5685, "step": 665200 }, { "epoch": 0.1735117857111193, "grad_norm": 10.13565444946289, "learning_rate": 9.406678859005518e-06, "loss": 2.5543, "step": 665400 }, { "epoch": 0.1735639383368215, "grad_norm": 11.961712837219238, "learning_rate": 9.406283828490068e-06, "loss": 2.5873, "step": 665600 }, { "epoch": 0.17361609096252364, "grad_norm": 12.306097030639648, "learning_rate": 9.405888674813903e-06, "loss": 2.6193, "step": 665800 }, { "epoch": 0.1736682435882258, "grad_norm": 11.791540145874023, "learning_rate": 9.40549339798807e-06, "loss": 2.575, "step": 666000 }, { "epoch": 0.17372039621392799, "grad_norm": 12.87740421295166, "learning_rate": 9.405097998023618e-06, "loss": 2.5365, "step": 666200 }, { "epoch": 0.17377254883963014, "grad_norm": 13.530356407165527, "learning_rate": 9.404702474931598e-06, "loss": 2.5866, "step": 666400 }, { "epoch": 0.17382470146533233, "grad_norm": 10.974345207214355, "learning_rate": 9.404306828723063e-06, "loss": 2.6068, "step": 666600 }, { "epoch": 0.17387685409103448, "grad_norm": 12.054535865783691, "learning_rate": 9.403911059409075e-06, "loss": 2.6092, "step": 666800 }, { "epoch": 0.17392900671673667, "grad_norm": 10.9446382522583, "learning_rate": 9.403515167000694e-06, "loss": 2.5992, "step": 667000 }, { "epoch": 0.17398115934243882, "grad_norm": 10.59375286102295, "learning_rate": 9.403119151508988e-06, "loss": 2.5542, "step": 667200 }, { "epoch": 0.174033311968141, "grad_norm": 10.471702575683594, "learning_rate": 9.402723012945023e-06, "loss": 2.6148, "step": 667400 }, { "epoch": 0.17408546459384316, "grad_norm": 11.591514587402344, "learning_rate": 9.402326751319875e-06, "loss": 2.5909, "step": 667600 }, { "epoch": 0.17413761721954535, "grad_norm": 11.10065746307373, "learning_rate": 9.401930366644617e-06, "loss": 2.5856, "step": 667800 }, { "epoch": 0.1741897698452475, "grad_norm": 11.09948444366455, "learning_rate": 9.401533858930331e-06, "loss": 2.6074, "step": 668000 }, { "epoch": 0.1742419224709497, "grad_norm": 10.385692596435547, "learning_rate": 9.401137228188098e-06, "loss": 2.5725, "step": 668200 }, { "epoch": 0.17429407509665185, "grad_norm": 8.567261695861816, "learning_rate": 9.400740474429004e-06, "loss": 2.5714, "step": 668400 }, { "epoch": 0.17434622772235403, "grad_norm": 10.014311790466309, "learning_rate": 9.40034359766414e-06, "loss": 2.5659, "step": 668600 }, { "epoch": 0.1743983803480562, "grad_norm": 11.485774040222168, "learning_rate": 9.3999465979046e-06, "loss": 2.5965, "step": 668800 }, { "epoch": 0.17445053297375837, "grad_norm": 10.644266128540039, "learning_rate": 9.399549475161476e-06, "loss": 2.61, "step": 669000 }, { "epoch": 0.17450268559946053, "grad_norm": 12.157289505004883, "learning_rate": 9.399152229445874e-06, "loss": 2.5701, "step": 669200 }, { "epoch": 0.1745548382251627, "grad_norm": 9.978353500366211, "learning_rate": 9.398754860768892e-06, "loss": 2.5829, "step": 669400 }, { "epoch": 0.17460699085086487, "grad_norm": 11.341612815856934, "learning_rate": 9.398357369141641e-06, "loss": 2.6211, "step": 669600 }, { "epoch": 0.17465914347656705, "grad_norm": 10.040153503417969, "learning_rate": 9.397959754575232e-06, "loss": 2.6005, "step": 669800 }, { "epoch": 0.1747112961022692, "grad_norm": 12.061646461486816, "learning_rate": 9.397562017080774e-06, "loss": 2.608, "step": 670000 }, { "epoch": 0.1747634487279714, "grad_norm": 11.82841968536377, "learning_rate": 9.39716415666939e-06, "loss": 2.5988, "step": 670200 }, { "epoch": 0.17481560135367355, "grad_norm": 9.31937313079834, "learning_rate": 9.396766173352196e-06, "loss": 2.585, "step": 670400 }, { "epoch": 0.17486775397937573, "grad_norm": 11.591330528259277, "learning_rate": 9.396368067140318e-06, "loss": 2.5517, "step": 670600 }, { "epoch": 0.1749199066050779, "grad_norm": 10.716523170471191, "learning_rate": 9.395969838044882e-06, "loss": 2.5751, "step": 670800 }, { "epoch": 0.17497205923078007, "grad_norm": 9.377893447875977, "learning_rate": 9.395571486077024e-06, "loss": 2.5578, "step": 671000 }, { "epoch": 0.17502421185648223, "grad_norm": 10.563931465148926, "learning_rate": 9.395173011247871e-06, "loss": 2.5933, "step": 671200 }, { "epoch": 0.17507636448218442, "grad_norm": 12.169573783874512, "learning_rate": 9.394774413568565e-06, "loss": 2.5995, "step": 671400 }, { "epoch": 0.17512851710788657, "grad_norm": 10.579012870788574, "learning_rate": 9.394375693050248e-06, "loss": 2.6174, "step": 671600 }, { "epoch": 0.17518066973358873, "grad_norm": 11.727015495300293, "learning_rate": 9.393976849704063e-06, "loss": 2.5779, "step": 671800 }, { "epoch": 0.1752328223592909, "grad_norm": 11.265336036682129, "learning_rate": 9.393577883541158e-06, "loss": 2.6134, "step": 672000 }, { "epoch": 0.17528497498499307, "grad_norm": 10.374798774719238, "learning_rate": 9.393178794572687e-06, "loss": 2.6363, "step": 672200 }, { "epoch": 0.17533712761069525, "grad_norm": 12.196139335632324, "learning_rate": 9.392779582809802e-06, "loss": 2.599, "step": 672400 }, { "epoch": 0.1753892802363974, "grad_norm": 10.827107429504395, "learning_rate": 9.392380248263661e-06, "loss": 2.5711, "step": 672600 }, { "epoch": 0.1754414328620996, "grad_norm": 11.606658935546875, "learning_rate": 9.39198079094543e-06, "loss": 2.5836, "step": 672800 }, { "epoch": 0.17549358548780175, "grad_norm": 11.592090606689453, "learning_rate": 9.39158121086627e-06, "loss": 2.5763, "step": 673000 }, { "epoch": 0.17554573811350394, "grad_norm": 9.465235710144043, "learning_rate": 9.391181508037352e-06, "loss": 2.5332, "step": 673200 }, { "epoch": 0.1755978907392061, "grad_norm": 11.990010261535645, "learning_rate": 9.390781682469848e-06, "loss": 2.5785, "step": 673400 }, { "epoch": 0.17565004336490828, "grad_norm": 11.225507736206055, "learning_rate": 9.390381734174933e-06, "loss": 2.5475, "step": 673600 }, { "epoch": 0.17570219599061043, "grad_norm": 12.462555885314941, "learning_rate": 9.389981663163786e-06, "loss": 2.628, "step": 673800 }, { "epoch": 0.17575434861631262, "grad_norm": 12.385912895202637, "learning_rate": 9.389581469447591e-06, "loss": 2.5793, "step": 674000 }, { "epoch": 0.17580650124201477, "grad_norm": 10.774480819702148, "learning_rate": 9.38918115303753e-06, "loss": 2.5479, "step": 674200 }, { "epoch": 0.17585865386771696, "grad_norm": 11.743460655212402, "learning_rate": 9.388780713944795e-06, "loss": 2.5841, "step": 674400 }, { "epoch": 0.17591080649341911, "grad_norm": 10.81772232055664, "learning_rate": 9.388380152180582e-06, "loss": 2.568, "step": 674600 }, { "epoch": 0.1759629591191213, "grad_norm": 11.486669540405273, "learning_rate": 9.387979467756081e-06, "loss": 2.5902, "step": 674800 }, { "epoch": 0.17601511174482345, "grad_norm": 10.133848190307617, "learning_rate": 9.387578660682495e-06, "loss": 2.5696, "step": 675000 }, { "epoch": 0.17606726437052564, "grad_norm": 11.145345687866211, "learning_rate": 9.387177730971027e-06, "loss": 2.5567, "step": 675200 }, { "epoch": 0.1761194169962278, "grad_norm": 11.252696990966797, "learning_rate": 9.386776678632881e-06, "loss": 2.5772, "step": 675400 }, { "epoch": 0.17617156962192998, "grad_norm": 10.816537857055664, "learning_rate": 9.38637550367927e-06, "loss": 2.6356, "step": 675600 }, { "epoch": 0.17622372224763214, "grad_norm": 11.040006637573242, "learning_rate": 9.385974206121407e-06, "loss": 2.558, "step": 675800 }, { "epoch": 0.17627587487333432, "grad_norm": 10.698840141296387, "learning_rate": 9.385572785970505e-06, "loss": 2.5948, "step": 676000 }, { "epoch": 0.17632802749903648, "grad_norm": 11.69105339050293, "learning_rate": 9.38517124323779e-06, "loss": 2.6081, "step": 676200 }, { "epoch": 0.17638018012473866, "grad_norm": 11.279681205749512, "learning_rate": 9.38476957793448e-06, "loss": 2.6172, "step": 676400 }, { "epoch": 0.17643233275044082, "grad_norm": 10.574265480041504, "learning_rate": 9.384367790071805e-06, "loss": 2.603, "step": 676600 }, { "epoch": 0.176484485376143, "grad_norm": 13.000247955322266, "learning_rate": 9.383965879660995e-06, "loss": 2.5997, "step": 676800 }, { "epoch": 0.17653663800184516, "grad_norm": 10.329712867736816, "learning_rate": 9.383563846713284e-06, "loss": 2.5801, "step": 677000 }, { "epoch": 0.17658879062754734, "grad_norm": 10.054405212402344, "learning_rate": 9.383161691239909e-06, "loss": 2.5559, "step": 677200 }, { "epoch": 0.1766409432532495, "grad_norm": 10.512011528015137, "learning_rate": 9.382759413252112e-06, "loss": 2.5955, "step": 677400 }, { "epoch": 0.17669309587895166, "grad_norm": 11.089954376220703, "learning_rate": 9.382357012761133e-06, "loss": 2.565, "step": 677600 }, { "epoch": 0.17674524850465384, "grad_norm": 9.8038330078125, "learning_rate": 9.381954489778225e-06, "loss": 2.5907, "step": 677800 }, { "epoch": 0.176797401130356, "grad_norm": 15.173497200012207, "learning_rate": 9.381551844314637e-06, "loss": 2.6021, "step": 678000 }, { "epoch": 0.17684955375605818, "grad_norm": 12.605514526367188, "learning_rate": 9.38114907638162e-06, "loss": 2.6105, "step": 678200 }, { "epoch": 0.17690170638176034, "grad_norm": 10.773606300354004, "learning_rate": 9.380746185990435e-06, "loss": 2.5503, "step": 678400 }, { "epoch": 0.17695385900746252, "grad_norm": 12.675154685974121, "learning_rate": 9.380343173152344e-06, "loss": 2.5884, "step": 678600 }, { "epoch": 0.17700601163316468, "grad_norm": 11.189080238342285, "learning_rate": 9.379940037878611e-06, "loss": 2.5901, "step": 678800 }, { "epoch": 0.17705816425886686, "grad_norm": 11.658578872680664, "learning_rate": 9.379536780180503e-06, "loss": 2.6359, "step": 679000 }, { "epoch": 0.17711031688456902, "grad_norm": 11.764540672302246, "learning_rate": 9.379133400069292e-06, "loss": 2.572, "step": 679200 }, { "epoch": 0.1771624695102712, "grad_norm": 12.119824409484863, "learning_rate": 9.378729897556253e-06, "loss": 2.581, "step": 679400 }, { "epoch": 0.17721462213597336, "grad_norm": 12.850314140319824, "learning_rate": 9.378326272652666e-06, "loss": 2.6052, "step": 679600 }, { "epoch": 0.17726677476167554, "grad_norm": 11.843114852905273, "learning_rate": 9.37792252536981e-06, "loss": 2.6066, "step": 679800 }, { "epoch": 0.1773189273873777, "grad_norm": 11.768165588378906, "learning_rate": 9.377518655718971e-06, "loss": 2.5708, "step": 680000 }, { "epoch": 0.17737108001307988, "grad_norm": 11.470100402832031, "learning_rate": 9.37711466371144e-06, "loss": 2.5771, "step": 680200 }, { "epoch": 0.17742323263878204, "grad_norm": 11.421998023986816, "learning_rate": 9.376710549358505e-06, "loss": 2.602, "step": 680400 }, { "epoch": 0.17747538526448423, "grad_norm": 11.222713470458984, "learning_rate": 9.376306312671465e-06, "loss": 2.6073, "step": 680600 }, { "epoch": 0.17752753789018638, "grad_norm": 11.578697204589844, "learning_rate": 9.375901953661618e-06, "loss": 2.6128, "step": 680800 }, { "epoch": 0.17757969051588857, "grad_norm": 11.527877807617188, "learning_rate": 9.375497472340263e-06, "loss": 2.6079, "step": 681000 }, { "epoch": 0.17763184314159072, "grad_norm": 11.338236808776855, "learning_rate": 9.375092868718711e-06, "loss": 2.5406, "step": 681200 }, { "epoch": 0.1776839957672929, "grad_norm": 10.98487377166748, "learning_rate": 9.374688142808268e-06, "loss": 2.5617, "step": 681400 }, { "epoch": 0.17773614839299506, "grad_norm": 10.685301780700684, "learning_rate": 9.374283294620247e-06, "loss": 2.5682, "step": 681600 }, { "epoch": 0.17778830101869725, "grad_norm": 12.549797058105469, "learning_rate": 9.373878324165965e-06, "loss": 2.6306, "step": 681800 }, { "epoch": 0.1778404536443994, "grad_norm": 11.423171997070312, "learning_rate": 9.37347323145674e-06, "loss": 2.5989, "step": 682000 }, { "epoch": 0.1778926062701016, "grad_norm": 11.801868438720703, "learning_rate": 9.373068016503894e-06, "loss": 2.5902, "step": 682200 }, { "epoch": 0.17794475889580375, "grad_norm": 10.22305965423584, "learning_rate": 9.372662679318755e-06, "loss": 2.5873, "step": 682400 }, { "epoch": 0.17799691152150593, "grad_norm": 10.769112586975098, "learning_rate": 9.372257219912654e-06, "loss": 2.5549, "step": 682600 }, { "epoch": 0.17804906414720809, "grad_norm": 11.621337890625, "learning_rate": 9.371851638296922e-06, "loss": 2.6047, "step": 682800 }, { "epoch": 0.17810121677291027, "grad_norm": 10.884603500366211, "learning_rate": 9.371445934482895e-06, "loss": 2.6138, "step": 683000 }, { "epoch": 0.17815336939861243, "grad_norm": 11.253877639770508, "learning_rate": 9.371040108481915e-06, "loss": 2.5755, "step": 683200 }, { "epoch": 0.1782055220243146, "grad_norm": 13.072035789489746, "learning_rate": 9.370634160305321e-06, "loss": 2.5701, "step": 683400 }, { "epoch": 0.17825767465001677, "grad_norm": 13.245817184448242, "learning_rate": 9.370228089964466e-06, "loss": 2.5832, "step": 683600 }, { "epoch": 0.17830982727571892, "grad_norm": 11.859313011169434, "learning_rate": 9.369821897470695e-06, "loss": 2.5743, "step": 683800 }, { "epoch": 0.1783619799014211, "grad_norm": 10.999593734741211, "learning_rate": 9.369415582835365e-06, "loss": 2.6368, "step": 684000 }, { "epoch": 0.17841413252712326, "grad_norm": 12.858357429504395, "learning_rate": 9.369009146069828e-06, "loss": 2.5957, "step": 684200 }, { "epoch": 0.17846628515282545, "grad_norm": 12.619375228881836, "learning_rate": 9.36860258718545e-06, "loss": 2.55, "step": 684400 }, { "epoch": 0.1785184377785276, "grad_norm": 11.915067672729492, "learning_rate": 9.368195906193593e-06, "loss": 2.5971, "step": 684600 }, { "epoch": 0.1785705904042298, "grad_norm": 15.2025146484375, "learning_rate": 9.367789103105623e-06, "loss": 2.5893, "step": 684800 }, { "epoch": 0.17862274302993195, "grad_norm": 11.45402717590332, "learning_rate": 9.367382177932911e-06, "loss": 2.5893, "step": 685000 }, { "epoch": 0.17867489565563413, "grad_norm": 12.089704513549805, "learning_rate": 9.36697513068683e-06, "loss": 2.5795, "step": 685200 }, { "epoch": 0.1787270482813363, "grad_norm": 12.791685104370117, "learning_rate": 9.366567961378762e-06, "loss": 2.5793, "step": 685400 }, { "epoch": 0.17877920090703847, "grad_norm": 11.997780799865723, "learning_rate": 9.366160670020084e-06, "loss": 2.5665, "step": 685600 }, { "epoch": 0.17883135353274063, "grad_norm": 10.478496551513672, "learning_rate": 9.365753256622178e-06, "loss": 2.5858, "step": 685800 }, { "epoch": 0.1788835061584428, "grad_norm": 9.763985633850098, "learning_rate": 9.365345721196436e-06, "loss": 2.5637, "step": 686000 }, { "epoch": 0.17893565878414497, "grad_norm": 12.446106910705566, "learning_rate": 9.36493806375425e-06, "loss": 2.5734, "step": 686200 }, { "epoch": 0.17898781140984715, "grad_norm": 12.356990814208984, "learning_rate": 9.364530284307009e-06, "loss": 2.5836, "step": 686400 }, { "epoch": 0.1790399640355493, "grad_norm": 12.088711738586426, "learning_rate": 9.364122382866117e-06, "loss": 2.5795, "step": 686600 }, { "epoch": 0.1790921166612515, "grad_norm": 10.435306549072266, "learning_rate": 9.363714359442971e-06, "loss": 2.6026, "step": 686800 }, { "epoch": 0.17914426928695365, "grad_norm": 11.215888023376465, "learning_rate": 9.363306214048975e-06, "loss": 2.577, "step": 687000 }, { "epoch": 0.17919642191265583, "grad_norm": 13.190821647644043, "learning_rate": 9.362897946695542e-06, "loss": 2.6001, "step": 687200 }, { "epoch": 0.179248574538358, "grad_norm": 11.123869895935059, "learning_rate": 9.362489557394079e-06, "loss": 2.6028, "step": 687400 }, { "epoch": 0.17930072716406018, "grad_norm": 11.40549373626709, "learning_rate": 9.362081046156004e-06, "loss": 2.6268, "step": 687600 }, { "epoch": 0.17935287978976233, "grad_norm": 11.619917869567871, "learning_rate": 9.361672412992734e-06, "loss": 2.6188, "step": 687800 }, { "epoch": 0.17940503241546452, "grad_norm": 11.15652084350586, "learning_rate": 9.36126365791569e-06, "loss": 2.5824, "step": 688000 }, { "epoch": 0.17945718504116667, "grad_norm": 11.156540870666504, "learning_rate": 9.360854780936298e-06, "loss": 2.581, "step": 688200 }, { "epoch": 0.17950933766686886, "grad_norm": 11.437117576599121, "learning_rate": 9.360445782065989e-06, "loss": 2.5932, "step": 688400 }, { "epoch": 0.179561490292571, "grad_norm": 11.62105655670166, "learning_rate": 9.36003666131619e-06, "loss": 2.5851, "step": 688600 }, { "epoch": 0.1796136429182732, "grad_norm": 14.763154029846191, "learning_rate": 9.359627418698338e-06, "loss": 2.6017, "step": 688800 }, { "epoch": 0.17966579554397535, "grad_norm": 12.010329246520996, "learning_rate": 9.359218054223876e-06, "loss": 2.5934, "step": 689000 }, { "epoch": 0.17971794816967754, "grad_norm": 12.975419998168945, "learning_rate": 9.35880856790424e-06, "loss": 2.5678, "step": 689200 }, { "epoch": 0.1797701007953797, "grad_norm": 11.763967514038086, "learning_rate": 9.35839895975088e-06, "loss": 2.5383, "step": 689400 }, { "epoch": 0.17982225342108185, "grad_norm": 11.407500267028809, "learning_rate": 9.357989229775245e-06, "loss": 2.5591, "step": 689600 }, { "epoch": 0.17987440604678404, "grad_norm": 10.644564628601074, "learning_rate": 9.357579377988786e-06, "loss": 2.5843, "step": 689800 }, { "epoch": 0.1799265586724862, "grad_norm": 11.088133811950684, "learning_rate": 9.357169404402958e-06, "loss": 2.592, "step": 690000 }, { "epoch": 0.17997871129818838, "grad_norm": 11.872809410095215, "learning_rate": 9.35675930902922e-06, "loss": 2.5783, "step": 690200 }, { "epoch": 0.18003086392389053, "grad_norm": 12.999768257141113, "learning_rate": 9.356349091879037e-06, "loss": 2.6072, "step": 690400 }, { "epoch": 0.18008301654959272, "grad_norm": 13.067625045776367, "learning_rate": 9.355938752963874e-06, "loss": 2.5654, "step": 690600 }, { "epoch": 0.18013516917529487, "grad_norm": 11.797418594360352, "learning_rate": 9.355528292295199e-06, "loss": 2.5624, "step": 690800 }, { "epoch": 0.18018732180099706, "grad_norm": 10.904340744018555, "learning_rate": 9.355117709884487e-06, "loss": 2.5769, "step": 691000 }, { "epoch": 0.18023947442669921, "grad_norm": 10.31658935546875, "learning_rate": 9.354707005743213e-06, "loss": 2.5788, "step": 691200 }, { "epoch": 0.1802916270524014, "grad_norm": 12.880640029907227, "learning_rate": 9.354296179882858e-06, "loss": 2.588, "step": 691400 }, { "epoch": 0.18034377967810356, "grad_norm": 12.985132217407227, "learning_rate": 9.353885232314902e-06, "loss": 2.599, "step": 691600 }, { "epoch": 0.18039593230380574, "grad_norm": 9.16226863861084, "learning_rate": 9.353474163050835e-06, "loss": 2.5426, "step": 691800 }, { "epoch": 0.1804480849295079, "grad_norm": 12.11599349975586, "learning_rate": 9.353062972102146e-06, "loss": 2.5754, "step": 692000 }, { "epoch": 0.18050023755521008, "grad_norm": 12.336219787597656, "learning_rate": 9.352651659480327e-06, "loss": 2.5988, "step": 692200 }, { "epoch": 0.18055239018091224, "grad_norm": 10.14101505279541, "learning_rate": 9.352240225196876e-06, "loss": 2.5971, "step": 692400 }, { "epoch": 0.18060454280661442, "grad_norm": 12.834318161010742, "learning_rate": 9.35182866926329e-06, "loss": 2.5767, "step": 692600 }, { "epoch": 0.18065669543231658, "grad_norm": 10.38381576538086, "learning_rate": 9.351416991691078e-06, "loss": 2.5789, "step": 692800 }, { "epoch": 0.18070884805801876, "grad_norm": 11.648655891418457, "learning_rate": 9.351005192491742e-06, "loss": 2.5979, "step": 693000 }, { "epoch": 0.18076100068372092, "grad_norm": 11.521854400634766, "learning_rate": 9.350593271676793e-06, "loss": 2.5649, "step": 693200 }, { "epoch": 0.1808131533094231, "grad_norm": 12.451175689697266, "learning_rate": 9.350181229257748e-06, "loss": 2.5619, "step": 693400 }, { "epoch": 0.18086530593512526, "grad_norm": 11.495298385620117, "learning_rate": 9.34976906524612e-06, "loss": 2.5701, "step": 693600 }, { "epoch": 0.18091745856082744, "grad_norm": 12.428627967834473, "learning_rate": 9.349356779653432e-06, "loss": 2.5957, "step": 693800 }, { "epoch": 0.1809696111865296, "grad_norm": 12.761686325073242, "learning_rate": 9.348944372491207e-06, "loss": 2.577, "step": 694000 }, { "epoch": 0.18102176381223178, "grad_norm": 13.41728401184082, "learning_rate": 9.348531843770971e-06, "loss": 2.5966, "step": 694200 }, { "epoch": 0.18107391643793394, "grad_norm": 12.403990745544434, "learning_rate": 9.348119193504256e-06, "loss": 2.5791, "step": 694400 }, { "epoch": 0.18112606906363612, "grad_norm": 11.769331932067871, "learning_rate": 9.347706421702598e-06, "loss": 2.5963, "step": 694600 }, { "epoch": 0.18117822168933828, "grad_norm": 12.589640617370605, "learning_rate": 9.347293528377532e-06, "loss": 2.6107, "step": 694800 }, { "epoch": 0.18123037431504047, "grad_norm": 13.722719192504883, "learning_rate": 9.346880513540598e-06, "loss": 2.5917, "step": 695000 }, { "epoch": 0.18128252694074262, "grad_norm": 9.757765769958496, "learning_rate": 9.34646737720334e-06, "loss": 2.585, "step": 695200 }, { "epoch": 0.18133467956644478, "grad_norm": 11.891173362731934, "learning_rate": 9.34605411937731e-06, "loss": 2.5579, "step": 695400 }, { "epoch": 0.18138683219214696, "grad_norm": 10.937880516052246, "learning_rate": 9.345640740074055e-06, "loss": 2.5802, "step": 695600 }, { "epoch": 0.18143898481784912, "grad_norm": 11.980300903320312, "learning_rate": 9.345227239305133e-06, "loss": 2.5721, "step": 695800 }, { "epoch": 0.1814911374435513, "grad_norm": 10.967789649963379, "learning_rate": 9.344813617082096e-06, "loss": 2.6024, "step": 696000 }, { "epoch": 0.18154329006925346, "grad_norm": 12.861534118652344, "learning_rate": 9.34439987341651e-06, "loss": 2.619, "step": 696200 }, { "epoch": 0.18159544269495564, "grad_norm": 12.319485664367676, "learning_rate": 9.343986008319938e-06, "loss": 2.5858, "step": 696400 }, { "epoch": 0.1816475953206578, "grad_norm": 11.069070816040039, "learning_rate": 9.343572021803948e-06, "loss": 2.6071, "step": 696600 }, { "epoch": 0.18169974794635999, "grad_norm": 12.012398719787598, "learning_rate": 9.343157913880113e-06, "loss": 2.6151, "step": 696800 }, { "epoch": 0.18175190057206214, "grad_norm": 9.027801513671875, "learning_rate": 9.342743684560006e-06, "loss": 2.6, "step": 697000 }, { "epoch": 0.18180405319776433, "grad_norm": 13.315918922424316, "learning_rate": 9.342329333855205e-06, "loss": 2.5678, "step": 697200 }, { "epoch": 0.18185620582346648, "grad_norm": 11.573213577270508, "learning_rate": 9.341914861777293e-06, "loss": 2.5673, "step": 697400 }, { "epoch": 0.18190835844916867, "grad_norm": 10.803704261779785, "learning_rate": 9.341500268337853e-06, "loss": 2.5895, "step": 697600 }, { "epoch": 0.18196051107487082, "grad_norm": 11.29845142364502, "learning_rate": 9.341085553548474e-06, "loss": 2.5807, "step": 697800 }, { "epoch": 0.182012663700573, "grad_norm": 12.04796314239502, "learning_rate": 9.34067071742075e-06, "loss": 2.5675, "step": 698000 }, { "epoch": 0.18206481632627516, "grad_norm": 12.755510330200195, "learning_rate": 9.340255759966274e-06, "loss": 2.5761, "step": 698200 }, { "epoch": 0.18211696895197735, "grad_norm": 11.835643768310547, "learning_rate": 9.339840681196645e-06, "loss": 2.5841, "step": 698400 }, { "epoch": 0.1821691215776795, "grad_norm": 11.568483352661133, "learning_rate": 9.339425481123465e-06, "loss": 2.5586, "step": 698600 }, { "epoch": 0.1822212742033817, "grad_norm": 13.110424041748047, "learning_rate": 9.33901015975834e-06, "loss": 2.6143, "step": 698800 }, { "epoch": 0.18227342682908385, "grad_norm": 11.408985137939453, "learning_rate": 9.338594717112877e-06, "loss": 2.5753, "step": 699000 }, { "epoch": 0.18232557945478603, "grad_norm": 10.737070083618164, "learning_rate": 9.33817915319869e-06, "loss": 2.5876, "step": 699200 }, { "epoch": 0.1823777320804882, "grad_norm": 12.560068130493164, "learning_rate": 9.337763468027392e-06, "loss": 2.5513, "step": 699400 }, { "epoch": 0.18242988470619037, "grad_norm": 11.872684478759766, "learning_rate": 9.337347661610604e-06, "loss": 2.5624, "step": 699600 }, { "epoch": 0.18248203733189253, "grad_norm": 11.913339614868164, "learning_rate": 9.336931733959948e-06, "loss": 2.538, "step": 699800 }, { "epoch": 0.1825341899575947, "grad_norm": 10.39101791381836, "learning_rate": 9.336515685087048e-06, "loss": 2.5836, "step": 700000 }, { "epoch": 0.18258634258329687, "grad_norm": 8.983271598815918, "learning_rate": 9.336099515003538e-06, "loss": 2.5803, "step": 700200 }, { "epoch": 0.18263849520899905, "grad_norm": 12.488344192504883, "learning_rate": 9.335683223721044e-06, "loss": 2.5908, "step": 700400 }, { "epoch": 0.1826906478347012, "grad_norm": 10.796935081481934, "learning_rate": 9.335266811251205e-06, "loss": 2.6309, "step": 700600 }, { "epoch": 0.1827428004604034, "grad_norm": 10.452614784240723, "learning_rate": 9.33485027760566e-06, "loss": 2.5893, "step": 700800 }, { "epoch": 0.18279495308610555, "grad_norm": 10.865897178649902, "learning_rate": 9.33443362279605e-06, "loss": 2.5761, "step": 701000 }, { "epoch": 0.1828471057118077, "grad_norm": 12.461289405822754, "learning_rate": 9.334016846834024e-06, "loss": 2.5851, "step": 701200 }, { "epoch": 0.1828992583375099, "grad_norm": 10.119328498840332, "learning_rate": 9.333599949731232e-06, "loss": 2.5364, "step": 701400 }, { "epoch": 0.18295141096321205, "grad_norm": 12.12794017791748, "learning_rate": 9.33318293149932e-06, "loss": 2.6064, "step": 701600 }, { "epoch": 0.18300356358891423, "grad_norm": 12.383764266967773, "learning_rate": 9.332765792149951e-06, "loss": 2.6052, "step": 701800 }, { "epoch": 0.1830557162146164, "grad_norm": 10.087251663208008, "learning_rate": 9.332348531694783e-06, "loss": 2.5666, "step": 702000 }, { "epoch": 0.18310786884031857, "grad_norm": 12.394096374511719, "learning_rate": 9.331931150145478e-06, "loss": 2.6195, "step": 702200 }, { "epoch": 0.18316002146602073, "grad_norm": 12.181896209716797, "learning_rate": 9.331513647513702e-06, "loss": 2.5802, "step": 702400 }, { "epoch": 0.1832121740917229, "grad_norm": 12.717833518981934, "learning_rate": 9.331096023811125e-06, "loss": 2.5753, "step": 702600 }, { "epoch": 0.18326432671742507, "grad_norm": 11.721474647521973, "learning_rate": 9.33067827904942e-06, "loss": 2.6129, "step": 702800 }, { "epoch": 0.18331647934312725, "grad_norm": 10.442642211914062, "learning_rate": 9.330260413240265e-06, "loss": 2.614, "step": 703000 }, { "epoch": 0.1833686319688294, "grad_norm": 10.827333450317383, "learning_rate": 9.329842426395335e-06, "loss": 2.5673, "step": 703200 }, { "epoch": 0.1834207845945316, "grad_norm": 11.326786994934082, "learning_rate": 9.32942431852632e-06, "loss": 2.5926, "step": 703400 }, { "epoch": 0.18347293722023375, "grad_norm": 11.326935768127441, "learning_rate": 9.329006089644902e-06, "loss": 2.6239, "step": 703600 }, { "epoch": 0.18352508984593593, "grad_norm": 13.342899322509766, "learning_rate": 9.328587739762774e-06, "loss": 2.5926, "step": 703800 }, { "epoch": 0.1835772424716381, "grad_norm": 10.269484519958496, "learning_rate": 9.328169268891624e-06, "loss": 2.5741, "step": 704000 }, { "epoch": 0.18362939509734028, "grad_norm": 12.175603866577148, "learning_rate": 9.327750677043156e-06, "loss": 2.5677, "step": 704200 }, { "epoch": 0.18368154772304243, "grad_norm": 11.766140937805176, "learning_rate": 9.327331964229066e-06, "loss": 2.5791, "step": 704400 }, { "epoch": 0.18373370034874462, "grad_norm": 10.756917953491211, "learning_rate": 9.326913130461056e-06, "loss": 2.5927, "step": 704600 }, { "epoch": 0.18378585297444677, "grad_norm": 11.94490909576416, "learning_rate": 9.326494175750836e-06, "loss": 2.6214, "step": 704800 }, { "epoch": 0.18383800560014896, "grad_norm": 12.475340843200684, "learning_rate": 9.326075100110115e-06, "loss": 2.5548, "step": 705000 }, { "epoch": 0.1838901582258511, "grad_norm": 10.366930961608887, "learning_rate": 9.325655903550605e-06, "loss": 2.5704, "step": 705200 }, { "epoch": 0.1839423108515533, "grad_norm": 13.210090637207031, "learning_rate": 9.325236586084028e-06, "loss": 2.5656, "step": 705400 }, { "epoch": 0.18399446347725545, "grad_norm": 12.236307144165039, "learning_rate": 9.3248171477221e-06, "loss": 2.549, "step": 705600 }, { "epoch": 0.18404661610295764, "grad_norm": 12.707765579223633, "learning_rate": 9.324397588476545e-06, "loss": 2.5777, "step": 705800 }, { "epoch": 0.1840987687286598, "grad_norm": 12.61831283569336, "learning_rate": 9.323977908359093e-06, "loss": 2.6, "step": 706000 }, { "epoch": 0.18415092135436198, "grad_norm": 11.035533905029297, "learning_rate": 9.323558107381472e-06, "loss": 2.5415, "step": 706200 }, { "epoch": 0.18420307398006414, "grad_norm": 10.974006652832031, "learning_rate": 9.323138185555416e-06, "loss": 2.6035, "step": 706400 }, { "epoch": 0.18425522660576632, "grad_norm": 11.0816650390625, "learning_rate": 9.322718142892663e-06, "loss": 2.5626, "step": 706600 }, { "epoch": 0.18430737923146848, "grad_norm": 11.282931327819824, "learning_rate": 9.322297979404955e-06, "loss": 2.5733, "step": 706800 }, { "epoch": 0.18435953185717063, "grad_norm": 11.803282737731934, "learning_rate": 9.321877695104034e-06, "loss": 2.5855, "step": 707000 }, { "epoch": 0.18441168448287282, "grad_norm": 11.176085472106934, "learning_rate": 9.321457290001646e-06, "loss": 2.5848, "step": 707200 }, { "epoch": 0.18446383710857497, "grad_norm": 10.337486267089844, "learning_rate": 9.321036764109546e-06, "loss": 2.5328, "step": 707400 }, { "epoch": 0.18451598973427716, "grad_norm": 11.199088096618652, "learning_rate": 9.320616117439486e-06, "loss": 2.5684, "step": 707600 }, { "epoch": 0.18456814235997931, "grad_norm": 11.944901466369629, "learning_rate": 9.320195350003223e-06, "loss": 2.5778, "step": 707800 }, { "epoch": 0.1846202949856815, "grad_norm": 12.067788124084473, "learning_rate": 9.31977446181252e-06, "loss": 2.6062, "step": 708000 }, { "epoch": 0.18467244761138366, "grad_norm": 11.054697036743164, "learning_rate": 9.319353452879139e-06, "loss": 2.5903, "step": 708200 }, { "epoch": 0.18472460023708584, "grad_norm": 12.969680786132812, "learning_rate": 9.31893232321485e-06, "loss": 2.5689, "step": 708400 }, { "epoch": 0.184776752862788, "grad_norm": 12.888757705688477, "learning_rate": 9.31851107283142e-06, "loss": 2.593, "step": 708600 }, { "epoch": 0.18482890548849018, "grad_norm": 11.72866153717041, "learning_rate": 9.318089701740627e-06, "loss": 2.5896, "step": 708800 }, { "epoch": 0.18488105811419234, "grad_norm": 12.54987621307373, "learning_rate": 9.317668209954248e-06, "loss": 2.5961, "step": 709000 }, { "epoch": 0.18493321073989452, "grad_norm": 12.830785751342773, "learning_rate": 9.317246597484065e-06, "loss": 2.5997, "step": 709200 }, { "epoch": 0.18498536336559668, "grad_norm": 10.779577255249023, "learning_rate": 9.31682486434186e-06, "loss": 2.608, "step": 709400 }, { "epoch": 0.18503751599129886, "grad_norm": 10.413871765136719, "learning_rate": 9.316403010539424e-06, "loss": 2.5584, "step": 709600 }, { "epoch": 0.18508966861700102, "grad_norm": 12.727044105529785, "learning_rate": 9.315981036088547e-06, "loss": 2.5805, "step": 709800 }, { "epoch": 0.1851418212427032, "grad_norm": 10.990044593811035, "learning_rate": 9.315558941001022e-06, "loss": 2.5659, "step": 710000 }, { "epoch": 0.18519397386840536, "grad_norm": 13.053669929504395, "learning_rate": 9.315136725288648e-06, "loss": 2.5645, "step": 710200 }, { "epoch": 0.18524612649410754, "grad_norm": 12.4849853515625, "learning_rate": 9.314714388963229e-06, "loss": 2.5712, "step": 710400 }, { "epoch": 0.1852982791198097, "grad_norm": 13.245108604431152, "learning_rate": 9.314291932036568e-06, "loss": 2.589, "step": 710600 }, { "epoch": 0.18535043174551188, "grad_norm": 11.096653938293457, "learning_rate": 9.31386935452047e-06, "loss": 2.5642, "step": 710800 }, { "epoch": 0.18540258437121404, "grad_norm": 12.3607177734375, "learning_rate": 9.313446656426752e-06, "loss": 2.5596, "step": 711000 }, { "epoch": 0.18545473699691623, "grad_norm": 11.200916290283203, "learning_rate": 9.313023837767225e-06, "loss": 2.5321, "step": 711200 }, { "epoch": 0.18550688962261838, "grad_norm": 12.221725463867188, "learning_rate": 9.31260089855371e-06, "loss": 2.5827, "step": 711400 }, { "epoch": 0.18555904224832057, "grad_norm": 11.39847469329834, "learning_rate": 9.312177838798028e-06, "loss": 2.581, "step": 711600 }, { "epoch": 0.18561119487402272, "grad_norm": 11.100894927978516, "learning_rate": 9.311754658512003e-06, "loss": 2.5598, "step": 711800 }, { "epoch": 0.1856633474997249, "grad_norm": 11.927329063415527, "learning_rate": 9.31133135770746e-06, "loss": 2.5918, "step": 712000 }, { "epoch": 0.18571550012542706, "grad_norm": 12.195674896240234, "learning_rate": 9.310907936396238e-06, "loss": 2.5352, "step": 712200 }, { "epoch": 0.18576765275112925, "grad_norm": 12.455114364624023, "learning_rate": 9.310484394590168e-06, "loss": 2.5571, "step": 712400 }, { "epoch": 0.1858198053768314, "grad_norm": 12.462471961975098, "learning_rate": 9.310060732301087e-06, "loss": 2.5686, "step": 712600 }, { "epoch": 0.1858719580025336, "grad_norm": 12.136844635009766, "learning_rate": 9.30963694954084e-06, "loss": 2.5705, "step": 712800 }, { "epoch": 0.18592411062823574, "grad_norm": 13.6740083694458, "learning_rate": 9.309213046321272e-06, "loss": 2.568, "step": 713000 }, { "epoch": 0.1859762632539379, "grad_norm": 10.739278793334961, "learning_rate": 9.308789022654233e-06, "loss": 2.576, "step": 713200 }, { "epoch": 0.18602841587964009, "grad_norm": 12.809683799743652, "learning_rate": 9.308364878551567e-06, "loss": 2.5984, "step": 713400 }, { "epoch": 0.18608056850534224, "grad_norm": 11.436633110046387, "learning_rate": 9.30794061402514e-06, "loss": 2.5605, "step": 713600 }, { "epoch": 0.18613272113104443, "grad_norm": 10.629294395446777, "learning_rate": 9.307516229086802e-06, "loss": 2.5838, "step": 713800 }, { "epoch": 0.18618487375674658, "grad_norm": 11.265149116516113, "learning_rate": 9.30709172374842e-06, "loss": 2.5753, "step": 714000 }, { "epoch": 0.18623702638244877, "grad_norm": 13.775673866271973, "learning_rate": 9.306667098021858e-06, "loss": 2.5764, "step": 714200 }, { "epoch": 0.18628917900815092, "grad_norm": 10.820959091186523, "learning_rate": 9.306242351918983e-06, "loss": 2.5516, "step": 714400 }, { "epoch": 0.1863413316338531, "grad_norm": 11.11262035369873, "learning_rate": 9.305817485451672e-06, "loss": 2.5498, "step": 714600 }, { "epoch": 0.18639348425955526, "grad_norm": 12.226655006408691, "learning_rate": 9.305392498631796e-06, "loss": 2.5372, "step": 714800 }, { "epoch": 0.18644563688525745, "grad_norm": 11.85600757598877, "learning_rate": 9.304967391471236e-06, "loss": 2.628, "step": 715000 }, { "epoch": 0.1864977895109596, "grad_norm": 10.384273529052734, "learning_rate": 9.304542163981874e-06, "loss": 2.5591, "step": 715200 }, { "epoch": 0.1865499421366618, "grad_norm": 10.744622230529785, "learning_rate": 9.304116816175593e-06, "loss": 2.5466, "step": 715400 }, { "epoch": 0.18660209476236395, "grad_norm": 14.192846298217773, "learning_rate": 9.303691348064285e-06, "loss": 2.6048, "step": 715600 }, { "epoch": 0.18665424738806613, "grad_norm": 10.374147415161133, "learning_rate": 9.303265759659842e-06, "loss": 2.5472, "step": 715800 }, { "epoch": 0.1867064000137683, "grad_norm": 12.727148056030273, "learning_rate": 9.30284005097416e-06, "loss": 2.5792, "step": 716000 }, { "epoch": 0.18675855263947047, "grad_norm": 14.218303680419922, "learning_rate": 9.302414222019136e-06, "loss": 2.5839, "step": 716200 }, { "epoch": 0.18681070526517263, "grad_norm": 11.11287784576416, "learning_rate": 9.301988272806675e-06, "loss": 2.5779, "step": 716400 }, { "epoch": 0.1868628578908748, "grad_norm": 9.908751487731934, "learning_rate": 9.30156220334868e-06, "loss": 2.562, "step": 716600 }, { "epoch": 0.18691501051657697, "grad_norm": 10.67510986328125, "learning_rate": 9.301136013657061e-06, "loss": 2.6033, "step": 716800 }, { "epoch": 0.18696716314227915, "grad_norm": 10.921405792236328, "learning_rate": 9.300709703743733e-06, "loss": 2.5775, "step": 717000 }, { "epoch": 0.1870193157679813, "grad_norm": 11.9124116897583, "learning_rate": 9.300283273620607e-06, "loss": 2.5652, "step": 717200 }, { "epoch": 0.1870714683936835, "grad_norm": 13.153666496276855, "learning_rate": 9.299856723299608e-06, "loss": 2.5931, "step": 717400 }, { "epoch": 0.18712362101938565, "grad_norm": 11.42650318145752, "learning_rate": 9.299430052792655e-06, "loss": 2.5649, "step": 717600 }, { "epoch": 0.18717577364508783, "grad_norm": 14.207989692687988, "learning_rate": 9.299003262111672e-06, "loss": 2.5943, "step": 717800 }, { "epoch": 0.18722792627079, "grad_norm": 12.080742835998535, "learning_rate": 9.298576351268594e-06, "loss": 2.5583, "step": 718000 }, { "epoch": 0.18728007889649217, "grad_norm": 10.130331039428711, "learning_rate": 9.298149320275349e-06, "loss": 2.5811, "step": 718200 }, { "epoch": 0.18733223152219433, "grad_norm": 11.960268020629883, "learning_rate": 9.297722169143875e-06, "loss": 2.5706, "step": 718400 }, { "epoch": 0.18738438414789652, "grad_norm": 12.298662185668945, "learning_rate": 9.29729489788611e-06, "loss": 2.5851, "step": 718600 }, { "epoch": 0.18743653677359867, "grad_norm": 12.417919158935547, "learning_rate": 9.296867506514e-06, "loss": 2.557, "step": 718800 }, { "epoch": 0.18748868939930083, "grad_norm": 11.724634170532227, "learning_rate": 9.296439995039488e-06, "loss": 2.571, "step": 719000 }, { "epoch": 0.187540842025003, "grad_norm": 13.457600593566895, "learning_rate": 9.296012363474523e-06, "loss": 2.6037, "step": 719200 }, { "epoch": 0.18759299465070517, "grad_norm": 15.612174987792969, "learning_rate": 9.295584611831059e-06, "loss": 2.5789, "step": 719400 }, { "epoch": 0.18764514727640735, "grad_norm": 11.381507873535156, "learning_rate": 9.295156740121052e-06, "loss": 2.5929, "step": 719600 }, { "epoch": 0.1876972999021095, "grad_norm": 11.643939971923828, "learning_rate": 9.294728748356463e-06, "loss": 2.5697, "step": 719800 }, { "epoch": 0.1877494525278117, "grad_norm": 12.142743110656738, "learning_rate": 9.294300636549251e-06, "loss": 2.5505, "step": 720000 }, { "epoch": 0.18780160515351385, "grad_norm": 12.137763023376465, "learning_rate": 9.293872404711388e-06, "loss": 2.6389, "step": 720200 }, { "epoch": 0.18785375777921604, "grad_norm": 12.277729988098145, "learning_rate": 9.29344405285484e-06, "loss": 2.5933, "step": 720400 }, { "epoch": 0.1879059104049182, "grad_norm": 11.714141845703125, "learning_rate": 9.293015580991577e-06, "loss": 2.5719, "step": 720600 }, { "epoch": 0.18795806303062038, "grad_norm": 11.93021297454834, "learning_rate": 9.292586989133581e-06, "loss": 2.5805, "step": 720800 }, { "epoch": 0.18801021565632253, "grad_norm": 12.212942123413086, "learning_rate": 9.292158277292828e-06, "loss": 2.5759, "step": 721000 }, { "epoch": 0.18806236828202472, "grad_norm": 10.348050117492676, "learning_rate": 9.2917294454813e-06, "loss": 2.5756, "step": 721200 }, { "epoch": 0.18811452090772687, "grad_norm": 11.41096305847168, "learning_rate": 9.29130049371099e-06, "loss": 2.589, "step": 721400 }, { "epoch": 0.18816667353342906, "grad_norm": 11.99845027923584, "learning_rate": 9.290871421993881e-06, "loss": 2.5932, "step": 721600 }, { "epoch": 0.18821882615913121, "grad_norm": 13.32406234741211, "learning_rate": 9.290442230341967e-06, "loss": 2.5926, "step": 721800 }, { "epoch": 0.1882709787848334, "grad_norm": 12.009814262390137, "learning_rate": 9.290012918767248e-06, "loss": 2.5701, "step": 722000 }, { "epoch": 0.18832313141053555, "grad_norm": 13.789295196533203, "learning_rate": 9.289583487281719e-06, "loss": 2.576, "step": 722200 }, { "epoch": 0.18837528403623774, "grad_norm": 10.698989868164062, "learning_rate": 9.289153935897387e-06, "loss": 2.5756, "step": 722400 }, { "epoch": 0.1884274366619399, "grad_norm": 12.086974143981934, "learning_rate": 9.288724264626256e-06, "loss": 2.5829, "step": 722600 }, { "epoch": 0.18847958928764208, "grad_norm": 11.715788841247559, "learning_rate": 9.288294473480337e-06, "loss": 2.5337, "step": 722800 }, { "epoch": 0.18853174191334424, "grad_norm": 10.705584526062012, "learning_rate": 9.287864562471644e-06, "loss": 2.5735, "step": 723000 }, { "epoch": 0.18858389453904642, "grad_norm": 12.674153327941895, "learning_rate": 9.287434531612192e-06, "loss": 2.5843, "step": 723200 }, { "epoch": 0.18863604716474858, "grad_norm": 12.425131797790527, "learning_rate": 9.287004380914e-06, "loss": 2.5627, "step": 723400 }, { "epoch": 0.18868819979045076, "grad_norm": 11.173230171203613, "learning_rate": 9.286574110389094e-06, "loss": 2.5609, "step": 723600 }, { "epoch": 0.18874035241615292, "grad_norm": 12.521120071411133, "learning_rate": 9.286143720049498e-06, "loss": 2.5708, "step": 723800 }, { "epoch": 0.1887925050418551, "grad_norm": 10.894947052001953, "learning_rate": 9.285713209907243e-06, "loss": 2.524, "step": 724000 }, { "epoch": 0.18884465766755726, "grad_norm": 11.677881240844727, "learning_rate": 9.285282579974362e-06, "loss": 2.6056, "step": 724200 }, { "epoch": 0.18889681029325944, "grad_norm": 12.158601760864258, "learning_rate": 9.284851830262892e-06, "loss": 2.6083, "step": 724400 }, { "epoch": 0.1889489629189616, "grad_norm": 13.806035995483398, "learning_rate": 9.284420960784873e-06, "loss": 2.6437, "step": 724600 }, { "epoch": 0.18900111554466376, "grad_norm": 12.63658332824707, "learning_rate": 9.283989971552348e-06, "loss": 2.5694, "step": 724800 }, { "epoch": 0.18905326817036594, "grad_norm": 13.176187515258789, "learning_rate": 9.283558862577363e-06, "loss": 2.5266, "step": 725000 }, { "epoch": 0.1891054207960681, "grad_norm": 11.850820541381836, "learning_rate": 9.28312763387197e-06, "loss": 2.5799, "step": 725200 }, { "epoch": 0.18915757342177028, "grad_norm": 12.633203506469727, "learning_rate": 9.282696285448219e-06, "loss": 2.5638, "step": 725400 }, { "epoch": 0.18920972604747244, "grad_norm": 11.37259292602539, "learning_rate": 9.28226481731817e-06, "loss": 2.5573, "step": 725600 }, { "epoch": 0.18926187867317462, "grad_norm": 12.123799324035645, "learning_rate": 9.28183322949388e-06, "loss": 2.5736, "step": 725800 }, { "epoch": 0.18931403129887678, "grad_norm": 11.90881061553955, "learning_rate": 9.281401521987416e-06, "loss": 2.5502, "step": 726000 }, { "epoch": 0.18936618392457896, "grad_norm": 12.91506576538086, "learning_rate": 9.280969694810844e-06, "loss": 2.5229, "step": 726200 }, { "epoch": 0.18941833655028112, "grad_norm": 11.650004386901855, "learning_rate": 9.28053774797623e-06, "loss": 2.5783, "step": 726400 }, { "epoch": 0.1894704891759833, "grad_norm": 12.156107902526855, "learning_rate": 9.280105681495652e-06, "loss": 2.604, "step": 726600 }, { "epoch": 0.18952264180168546, "grad_norm": 10.867297172546387, "learning_rate": 9.279673495381185e-06, "loss": 2.5855, "step": 726800 }, { "epoch": 0.18957479442738764, "grad_norm": 12.994755744934082, "learning_rate": 9.279241189644908e-06, "loss": 2.5467, "step": 727000 }, { "epoch": 0.1896269470530898, "grad_norm": 11.489134788513184, "learning_rate": 9.278808764298906e-06, "loss": 2.5654, "step": 727200 }, { "epoch": 0.18967909967879198, "grad_norm": 12.920225143432617, "learning_rate": 9.278376219355267e-06, "loss": 2.5749, "step": 727400 }, { "epoch": 0.18973125230449414, "grad_norm": 9.510746002197266, "learning_rate": 9.277943554826076e-06, "loss": 2.5537, "step": 727600 }, { "epoch": 0.18978340493019633, "grad_norm": 12.295012474060059, "learning_rate": 9.277510770723433e-06, "loss": 2.5912, "step": 727800 }, { "epoch": 0.18983555755589848, "grad_norm": 10.54262638092041, "learning_rate": 9.277077867059432e-06, "loss": 2.591, "step": 728000 }, { "epoch": 0.18988771018160067, "grad_norm": 11.955072402954102, "learning_rate": 9.276644843846171e-06, "loss": 2.5709, "step": 728200 }, { "epoch": 0.18993986280730282, "grad_norm": 10.959343910217285, "learning_rate": 9.276211701095756e-06, "loss": 2.5735, "step": 728400 }, { "epoch": 0.189992015433005, "grad_norm": 12.265323638916016, "learning_rate": 9.275778438820294e-06, "loss": 2.5402, "step": 728600 }, { "epoch": 0.19004416805870716, "grad_norm": 13.312240600585938, "learning_rate": 9.275345057031894e-06, "loss": 2.5894, "step": 728800 }, { "epoch": 0.19009632068440935, "grad_norm": 12.820699691772461, "learning_rate": 9.27491155574267e-06, "loss": 2.5495, "step": 729000 }, { "epoch": 0.1901484733101115, "grad_norm": 12.239838600158691, "learning_rate": 9.27447793496474e-06, "loss": 2.5772, "step": 729200 }, { "epoch": 0.1902006259358137, "grad_norm": 12.58304214477539, "learning_rate": 9.274044194710222e-06, "loss": 2.5978, "step": 729400 }, { "epoch": 0.19025277856151585, "grad_norm": 12.55810260772705, "learning_rate": 9.27361033499124e-06, "loss": 2.6003, "step": 729600 }, { "epoch": 0.19030493118721803, "grad_norm": 11.778428077697754, "learning_rate": 9.273176355819922e-06, "loss": 2.5742, "step": 729800 }, { "epoch": 0.19035708381292019, "grad_norm": 13.294259071350098, "learning_rate": 9.272742257208399e-06, "loss": 2.5477, "step": 730000 }, { "epoch": 0.19040923643862237, "grad_norm": 13.392877578735352, "learning_rate": 9.272308039168802e-06, "loss": 2.535, "step": 730200 }, { "epoch": 0.19046138906432453, "grad_norm": 11.833348274230957, "learning_rate": 9.271873701713269e-06, "loss": 2.5452, "step": 730400 }, { "epoch": 0.19051354169002668, "grad_norm": 12.308874130249023, "learning_rate": 9.27143924485394e-06, "loss": 2.5484, "step": 730600 }, { "epoch": 0.19056569431572887, "grad_norm": 10.478835105895996, "learning_rate": 9.27100466860296e-06, "loss": 2.5708, "step": 730800 }, { "epoch": 0.19061784694143102, "grad_norm": 11.751260757446289, "learning_rate": 9.270569972972475e-06, "loss": 2.5554, "step": 731000 }, { "epoch": 0.1906699995671332, "grad_norm": 13.028668403625488, "learning_rate": 9.270135157974633e-06, "loss": 2.5585, "step": 731200 }, { "epoch": 0.19072215219283536, "grad_norm": 11.351934432983398, "learning_rate": 9.269700223621591e-06, "loss": 2.5808, "step": 731400 }, { "epoch": 0.19077430481853755, "grad_norm": 11.560927391052246, "learning_rate": 9.269265169925506e-06, "loss": 2.5884, "step": 731600 }, { "epoch": 0.1908264574442397, "grad_norm": 11.362569808959961, "learning_rate": 9.268829996898537e-06, "loss": 2.6066, "step": 731800 }, { "epoch": 0.1908786100699419, "grad_norm": 12.66362476348877, "learning_rate": 9.268394704552845e-06, "loss": 2.5914, "step": 732000 }, { "epoch": 0.19093076269564405, "grad_norm": 10.702469825744629, "learning_rate": 9.2679592929006e-06, "loss": 2.5223, "step": 732200 }, { "epoch": 0.19098291532134623, "grad_norm": 12.114631652832031, "learning_rate": 9.267523761953973e-06, "loss": 2.5882, "step": 732400 }, { "epoch": 0.1910350679470484, "grad_norm": 10.952610969543457, "learning_rate": 9.267088111725135e-06, "loss": 2.5453, "step": 732600 }, { "epoch": 0.19108722057275057, "grad_norm": 11.303411483764648, "learning_rate": 9.266652342226264e-06, "loss": 2.5963, "step": 732800 }, { "epoch": 0.19113937319845273, "grad_norm": 11.67508602142334, "learning_rate": 9.26621645346954e-06, "loss": 2.5723, "step": 733000 }, { "epoch": 0.1911915258241549, "grad_norm": 11.651264190673828, "learning_rate": 9.265780445467148e-06, "loss": 2.5943, "step": 733200 }, { "epoch": 0.19124367844985707, "grad_norm": 12.28518009185791, "learning_rate": 9.265344318231274e-06, "loss": 2.5283, "step": 733400 }, { "epoch": 0.19129583107555925, "grad_norm": 13.750175476074219, "learning_rate": 9.264908071774108e-06, "loss": 2.5599, "step": 733600 }, { "epoch": 0.1913479837012614, "grad_norm": 12.860754013061523, "learning_rate": 9.264471706107846e-06, "loss": 2.5571, "step": 733800 }, { "epoch": 0.1914001363269636, "grad_norm": 11.767181396484375, "learning_rate": 9.264035221244678e-06, "loss": 2.5767, "step": 734000 }, { "epoch": 0.19145228895266575, "grad_norm": 13.27584171295166, "learning_rate": 9.263598617196812e-06, "loss": 2.5659, "step": 734200 }, { "epoch": 0.19150444157836793, "grad_norm": 11.297654151916504, "learning_rate": 9.263161893976446e-06, "loss": 2.5645, "step": 734400 }, { "epoch": 0.1915565942040701, "grad_norm": 13.385966300964355, "learning_rate": 9.262725051595793e-06, "loss": 2.5824, "step": 734600 }, { "epoch": 0.19160874682977228, "grad_norm": 12.745625495910645, "learning_rate": 9.262288090067058e-06, "loss": 2.5605, "step": 734800 }, { "epoch": 0.19166089945547443, "grad_norm": 13.604351043701172, "learning_rate": 9.261851009402457e-06, "loss": 2.5457, "step": 735000 }, { "epoch": 0.19171305208117662, "grad_norm": 14.262986183166504, "learning_rate": 9.261413809614204e-06, "loss": 2.5429, "step": 735200 }, { "epoch": 0.19176520470687877, "grad_norm": 13.41905689239502, "learning_rate": 9.260976490714524e-06, "loss": 2.5912, "step": 735400 }, { "epoch": 0.19181735733258096, "grad_norm": 11.548351287841797, "learning_rate": 9.260539052715636e-06, "loss": 2.55, "step": 735600 }, { "epoch": 0.1918695099582831, "grad_norm": 12.51290512084961, "learning_rate": 9.26010149562977e-06, "loss": 2.557, "step": 735800 }, { "epoch": 0.1919216625839853, "grad_norm": 11.825644493103027, "learning_rate": 9.259663819469154e-06, "loss": 2.5657, "step": 736000 }, { "epoch": 0.19197381520968745, "grad_norm": 11.373788833618164, "learning_rate": 9.259226024246024e-06, "loss": 2.5175, "step": 736200 }, { "epoch": 0.19202596783538964, "grad_norm": 13.40487003326416, "learning_rate": 9.258788109972616e-06, "loss": 2.5853, "step": 736400 }, { "epoch": 0.1920781204610918, "grad_norm": 11.846600532531738, "learning_rate": 9.258350076661168e-06, "loss": 2.5468, "step": 736600 }, { "epoch": 0.19213027308679395, "grad_norm": 11.58352279663086, "learning_rate": 9.257911924323926e-06, "loss": 2.5769, "step": 736800 }, { "epoch": 0.19218242571249614, "grad_norm": 11.748197555541992, "learning_rate": 9.257473652973137e-06, "loss": 2.5518, "step": 737000 }, { "epoch": 0.1922345783381983, "grad_norm": 10.648813247680664, "learning_rate": 9.25703526262105e-06, "loss": 2.5885, "step": 737200 }, { "epoch": 0.19228673096390048, "grad_norm": 13.590847969055176, "learning_rate": 9.256596753279918e-06, "loss": 2.5561, "step": 737400 }, { "epoch": 0.19233888358960263, "grad_norm": 11.725268363952637, "learning_rate": 9.256158124962e-06, "loss": 2.5282, "step": 737600 }, { "epoch": 0.19239103621530482, "grad_norm": 12.270450592041016, "learning_rate": 9.255719377679552e-06, "loss": 2.5635, "step": 737800 }, { "epoch": 0.19244318884100697, "grad_norm": 10.960904121398926, "learning_rate": 9.255280511444842e-06, "loss": 2.5165, "step": 738000 }, { "epoch": 0.19249534146670916, "grad_norm": 11.803460121154785, "learning_rate": 9.254841526270137e-06, "loss": 2.5727, "step": 738200 }, { "epoch": 0.19254749409241131, "grad_norm": 12.786358833312988, "learning_rate": 9.254402422167704e-06, "loss": 2.5412, "step": 738400 }, { "epoch": 0.1925996467181135, "grad_norm": 12.822945594787598, "learning_rate": 9.253963199149818e-06, "loss": 2.5498, "step": 738600 }, { "epoch": 0.19265179934381566, "grad_norm": 11.774388313293457, "learning_rate": 9.253523857228754e-06, "loss": 2.5893, "step": 738800 }, { "epoch": 0.19270395196951784, "grad_norm": 12.514403343200684, "learning_rate": 9.253084396416795e-06, "loss": 2.6186, "step": 739000 }, { "epoch": 0.19275610459522, "grad_norm": 11.048097610473633, "learning_rate": 9.252644816726224e-06, "loss": 2.5628, "step": 739200 }, { "epoch": 0.19280825722092218, "grad_norm": 11.978404998779297, "learning_rate": 9.252205118169326e-06, "loss": 2.592, "step": 739400 }, { "epoch": 0.19286040984662434, "grad_norm": 11.403801918029785, "learning_rate": 9.25176530075839e-06, "loss": 2.5574, "step": 739600 }, { "epoch": 0.19291256247232652, "grad_norm": 13.35159969329834, "learning_rate": 9.251325364505715e-06, "loss": 2.5446, "step": 739800 }, { "epoch": 0.19296471509802868, "grad_norm": 11.937582969665527, "learning_rate": 9.250885309423592e-06, "loss": 2.5404, "step": 740000 }, { "epoch": 0.19301686772373086, "grad_norm": 12.94690990447998, "learning_rate": 9.250445135524325e-06, "loss": 2.5505, "step": 740200 }, { "epoch": 0.19306902034943302, "grad_norm": 13.668988227844238, "learning_rate": 9.250004842820212e-06, "loss": 2.5818, "step": 740400 }, { "epoch": 0.1931211729751352, "grad_norm": 11.1817626953125, "learning_rate": 9.249564431323566e-06, "loss": 2.574, "step": 740600 }, { "epoch": 0.19317332560083736, "grad_norm": 12.859142303466797, "learning_rate": 9.249123901046694e-06, "loss": 2.5295, "step": 740800 }, { "epoch": 0.19322547822653954, "grad_norm": 10.10409164428711, "learning_rate": 9.248683252001909e-06, "loss": 2.557, "step": 741000 }, { "epoch": 0.1932776308522417, "grad_norm": 13.226909637451172, "learning_rate": 9.248242484201528e-06, "loss": 2.5851, "step": 741200 }, { "epoch": 0.19332978347794388, "grad_norm": 12.193153381347656, "learning_rate": 9.247801597657871e-06, "loss": 2.5712, "step": 741400 }, { "epoch": 0.19338193610364604, "grad_norm": 12.625770568847656, "learning_rate": 9.24736059238326e-06, "loss": 2.5571, "step": 741600 }, { "epoch": 0.19343408872934822, "grad_norm": 13.078922271728516, "learning_rate": 9.246919468390026e-06, "loss": 2.5776, "step": 741800 }, { "epoch": 0.19348624135505038, "grad_norm": 12.740152359008789, "learning_rate": 9.246478225690496e-06, "loss": 2.5985, "step": 742000 }, { "epoch": 0.19353839398075257, "grad_norm": 13.259086608886719, "learning_rate": 9.246036864297001e-06, "loss": 2.5446, "step": 742200 }, { "epoch": 0.19359054660645472, "grad_norm": 12.893516540527344, "learning_rate": 9.245595384221881e-06, "loss": 2.5904, "step": 742400 }, { "epoch": 0.19364269923215688, "grad_norm": 11.758481979370117, "learning_rate": 9.245153785477475e-06, "loss": 2.5576, "step": 742600 }, { "epoch": 0.19369485185785906, "grad_norm": 11.747676849365234, "learning_rate": 9.244712068076125e-06, "loss": 2.5636, "step": 742800 }, { "epoch": 0.19374700448356122, "grad_norm": 12.008854866027832, "learning_rate": 9.24427023203018e-06, "loss": 2.5726, "step": 743000 }, { "epoch": 0.1937991571092634, "grad_norm": 9.737689971923828, "learning_rate": 9.243828277351985e-06, "loss": 2.5412, "step": 743200 }, { "epoch": 0.19385130973496556, "grad_norm": 13.055411338806152, "learning_rate": 9.2433862040539e-06, "loss": 2.5482, "step": 743400 }, { "epoch": 0.19390346236066774, "grad_norm": 11.527270317077637, "learning_rate": 9.242944012148273e-06, "loss": 2.5932, "step": 743600 }, { "epoch": 0.1939556149863699, "grad_norm": 11.464157104492188, "learning_rate": 9.242501701647473e-06, "loss": 2.5869, "step": 743800 }, { "epoch": 0.19400776761207209, "grad_norm": 13.078601837158203, "learning_rate": 9.242059272563858e-06, "loss": 2.54, "step": 744000 }, { "epoch": 0.19405992023777424, "grad_norm": 10.983396530151367, "learning_rate": 9.241616724909794e-06, "loss": 2.5761, "step": 744200 }, { "epoch": 0.19411207286347643, "grad_norm": 10.754619598388672, "learning_rate": 9.241174058697651e-06, "loss": 2.5597, "step": 744400 }, { "epoch": 0.19416422548917858, "grad_norm": 13.653790473937988, "learning_rate": 9.240731273939804e-06, "loss": 2.5787, "step": 744600 }, { "epoch": 0.19421637811488077, "grad_norm": 11.377376556396484, "learning_rate": 9.240288370648628e-06, "loss": 2.5587, "step": 744800 }, { "epoch": 0.19426853074058292, "grad_norm": 10.749224662780762, "learning_rate": 9.239845348836503e-06, "loss": 2.5793, "step": 745000 }, { "epoch": 0.1943206833662851, "grad_norm": 13.052656173706055, "learning_rate": 9.23940220851581e-06, "loss": 2.5555, "step": 745200 }, { "epoch": 0.19437283599198726, "grad_norm": 12.965775489807129, "learning_rate": 9.23895894969894e-06, "loss": 2.5797, "step": 745400 }, { "epoch": 0.19442498861768945, "grad_norm": 13.597799301147461, "learning_rate": 9.238515572398278e-06, "loss": 2.5492, "step": 745600 }, { "epoch": 0.1944771412433916, "grad_norm": 11.634117126464844, "learning_rate": 9.23807207662622e-06, "loss": 2.5746, "step": 745800 }, { "epoch": 0.1945292938690938, "grad_norm": 12.693094253540039, "learning_rate": 9.237628462395158e-06, "loss": 2.6023, "step": 746000 }, { "epoch": 0.19458144649479595, "grad_norm": 15.489215850830078, "learning_rate": 9.237184729717496e-06, "loss": 2.5555, "step": 746200 }, { "epoch": 0.19463359912049813, "grad_norm": 13.176939010620117, "learning_rate": 9.236740878605635e-06, "loss": 2.5887, "step": 746400 }, { "epoch": 0.1946857517462003, "grad_norm": 12.691610336303711, "learning_rate": 9.23629690907198e-06, "loss": 2.5661, "step": 746600 }, { "epoch": 0.19473790437190247, "grad_norm": 11.665881156921387, "learning_rate": 9.235852821128943e-06, "loss": 2.5562, "step": 746800 }, { "epoch": 0.19479005699760463, "grad_norm": 13.195979118347168, "learning_rate": 9.235408614788937e-06, "loss": 2.5584, "step": 747000 }, { "epoch": 0.1948422096233068, "grad_norm": 11.392936706542969, "learning_rate": 9.234964290064375e-06, "loss": 2.5803, "step": 747200 }, { "epoch": 0.19489436224900897, "grad_norm": 11.263978004455566, "learning_rate": 9.234519846967678e-06, "loss": 2.6038, "step": 747400 }, { "epoch": 0.19494651487471115, "grad_norm": 10.900824546813965, "learning_rate": 9.234075285511268e-06, "loss": 2.5391, "step": 747600 }, { "epoch": 0.1949986675004133, "grad_norm": 11.745079040527344, "learning_rate": 9.233630605707573e-06, "loss": 2.6114, "step": 747800 }, { "epoch": 0.1950508201261155, "grad_norm": 11.821778297424316, "learning_rate": 9.23318580756902e-06, "loss": 2.6185, "step": 748000 }, { "epoch": 0.19510297275181765, "grad_norm": 11.111929893493652, "learning_rate": 9.232740891108042e-06, "loss": 2.6214, "step": 748200 }, { "epoch": 0.1951551253775198, "grad_norm": 11.941657066345215, "learning_rate": 9.232295856337078e-06, "loss": 2.5602, "step": 748400 }, { "epoch": 0.195207278003222, "grad_norm": 11.378263473510742, "learning_rate": 9.231850703268563e-06, "loss": 2.5749, "step": 748600 }, { "epoch": 0.19525943062892415, "grad_norm": 10.591818809509277, "learning_rate": 9.231405431914942e-06, "loss": 2.5177, "step": 748800 }, { "epoch": 0.19531158325462633, "grad_norm": 12.579631805419922, "learning_rate": 9.230960042288661e-06, "loss": 2.5455, "step": 749000 }, { "epoch": 0.1953637358803285, "grad_norm": 13.101773262023926, "learning_rate": 9.230514534402166e-06, "loss": 2.6177, "step": 749200 }, { "epoch": 0.19541588850603067, "grad_norm": 12.09244441986084, "learning_rate": 9.230068908267916e-06, "loss": 2.5752, "step": 749400 }, { "epoch": 0.19546804113173283, "grad_norm": 12.434764862060547, "learning_rate": 9.229623163898358e-06, "loss": 2.5857, "step": 749600 }, { "epoch": 0.195520193757435, "grad_norm": 11.809731483459473, "learning_rate": 9.229177301305957e-06, "loss": 2.5606, "step": 749800 }, { "epoch": 0.19557234638313717, "grad_norm": 12.460705757141113, "learning_rate": 9.228731320503174e-06, "loss": 2.574, "step": 750000 }, { "epoch": 0.19562449900883935, "grad_norm": 12.289457321166992, "learning_rate": 9.228285221502475e-06, "loss": 2.5422, "step": 750200 }, { "epoch": 0.1956766516345415, "grad_norm": 12.129727363586426, "learning_rate": 9.227839004316329e-06, "loss": 2.5749, "step": 750400 }, { "epoch": 0.1957288042602437, "grad_norm": 12.88148307800293, "learning_rate": 9.227392668957208e-06, "loss": 2.6006, "step": 750600 }, { "epoch": 0.19578095688594585, "grad_norm": 11.499628067016602, "learning_rate": 9.226946215437586e-06, "loss": 2.5805, "step": 750800 }, { "epoch": 0.19583310951164803, "grad_norm": 11.71347427368164, "learning_rate": 9.226499643769945e-06, "loss": 2.587, "step": 751000 }, { "epoch": 0.1958852621373502, "grad_norm": 11.851282119750977, "learning_rate": 9.226052953966766e-06, "loss": 2.5516, "step": 751200 }, { "epoch": 0.19593741476305238, "grad_norm": 14.087993621826172, "learning_rate": 9.225606146040533e-06, "loss": 2.5593, "step": 751400 }, { "epoch": 0.19598956738875453, "grad_norm": 13.594001770019531, "learning_rate": 9.225159220003737e-06, "loss": 2.5425, "step": 751600 }, { "epoch": 0.19604172001445672, "grad_norm": 10.394376754760742, "learning_rate": 9.224712175868866e-06, "loss": 2.5569, "step": 751800 }, { "epoch": 0.19609387264015887, "grad_norm": 13.085862159729004, "learning_rate": 9.224265013648421e-06, "loss": 2.5548, "step": 752000 }, { "epoch": 0.19614602526586106, "grad_norm": 12.25311279296875, "learning_rate": 9.223817733354898e-06, "loss": 2.5291, "step": 752200 }, { "epoch": 0.1961981778915632, "grad_norm": 10.312944412231445, "learning_rate": 9.223370335000799e-06, "loss": 2.5391, "step": 752400 }, { "epoch": 0.1962503305172654, "grad_norm": 13.032076835632324, "learning_rate": 9.22292281859863e-06, "loss": 2.5997, "step": 752600 }, { "epoch": 0.19630248314296755, "grad_norm": 11.651784896850586, "learning_rate": 9.222475184160897e-06, "loss": 2.5493, "step": 752800 }, { "epoch": 0.19635463576866974, "grad_norm": 12.205024719238281, "learning_rate": 9.222027431700116e-06, "loss": 2.5583, "step": 753000 }, { "epoch": 0.1964067883943719, "grad_norm": 12.745562553405762, "learning_rate": 9.221579561228799e-06, "loss": 2.5554, "step": 753200 }, { "epoch": 0.19645894102007408, "grad_norm": 13.312294006347656, "learning_rate": 9.221131572759464e-06, "loss": 2.5625, "step": 753400 }, { "epoch": 0.19651109364577624, "grad_norm": 11.942469596862793, "learning_rate": 9.220683466304638e-06, "loss": 2.5518, "step": 753600 }, { "epoch": 0.19656324627147842, "grad_norm": 12.910618782043457, "learning_rate": 9.22023524187684e-06, "loss": 2.554, "step": 753800 }, { "epoch": 0.19661539889718058, "grad_norm": 9.96152400970459, "learning_rate": 9.219786899488603e-06, "loss": 2.5309, "step": 754000 }, { "epoch": 0.19666755152288273, "grad_norm": 12.984268188476562, "learning_rate": 9.219338439152455e-06, "loss": 2.5723, "step": 754200 }, { "epoch": 0.19671970414858492, "grad_norm": 12.589516639709473, "learning_rate": 9.218889860880932e-06, "loss": 2.5655, "step": 754400 }, { "epoch": 0.19677185677428707, "grad_norm": 13.960628509521484, "learning_rate": 9.218441164686573e-06, "loss": 2.5815, "step": 754600 }, { "epoch": 0.19682400939998926, "grad_norm": 11.358234405517578, "learning_rate": 9.217992350581921e-06, "loss": 2.574, "step": 754800 }, { "epoch": 0.19687616202569141, "grad_norm": 12.214129447937012, "learning_rate": 9.217543418579517e-06, "loss": 2.5392, "step": 755000 }, { "epoch": 0.1969283146513936, "grad_norm": 14.097967147827148, "learning_rate": 9.217094368691913e-06, "loss": 2.6056, "step": 755200 }, { "epoch": 0.19698046727709576, "grad_norm": 12.386037826538086, "learning_rate": 9.216645200931657e-06, "loss": 2.5794, "step": 755400 }, { "epoch": 0.19703261990279794, "grad_norm": 12.34218692779541, "learning_rate": 9.216195915311307e-06, "loss": 2.5821, "step": 755600 }, { "epoch": 0.1970847725285001, "grad_norm": 13.724196434020996, "learning_rate": 9.215746511843422e-06, "loss": 2.5602, "step": 755800 }, { "epoch": 0.19713692515420228, "grad_norm": 12.456463813781738, "learning_rate": 9.21529699054056e-06, "loss": 2.5832, "step": 756000 }, { "epoch": 0.19718907777990444, "grad_norm": 12.305964469909668, "learning_rate": 9.214847351415283e-06, "loss": 2.5294, "step": 756200 }, { "epoch": 0.19724123040560662, "grad_norm": 11.164262771606445, "learning_rate": 9.214397594480165e-06, "loss": 2.5495, "step": 756400 }, { "epoch": 0.19729338303130878, "grad_norm": 13.309433937072754, "learning_rate": 9.213947719747775e-06, "loss": 2.5062, "step": 756600 }, { "epoch": 0.19734553565701096, "grad_norm": 12.617058753967285, "learning_rate": 9.213497727230688e-06, "loss": 2.5857, "step": 756800 }, { "epoch": 0.19739768828271312, "grad_norm": 11.997270584106445, "learning_rate": 9.21304761694148e-06, "loss": 2.5594, "step": 757000 }, { "epoch": 0.1974498409084153, "grad_norm": 11.326188087463379, "learning_rate": 9.212597388892734e-06, "loss": 2.5126, "step": 757200 }, { "epoch": 0.19750199353411746, "grad_norm": 13.245183944702148, "learning_rate": 9.212147043097034e-06, "loss": 2.5587, "step": 757400 }, { "epoch": 0.19755414615981964, "grad_norm": 12.528741836547852, "learning_rate": 9.211696579566967e-06, "loss": 2.5524, "step": 757600 }, { "epoch": 0.1976062987855218, "grad_norm": 12.96116828918457, "learning_rate": 9.211245998315124e-06, "loss": 2.5411, "step": 757800 }, { "epoch": 0.19765845141122398, "grad_norm": 14.029032707214355, "learning_rate": 9.210795299354101e-06, "loss": 2.5702, "step": 758000 }, { "epoch": 0.19771060403692614, "grad_norm": 12.810486793518066, "learning_rate": 9.210344482696491e-06, "loss": 2.5369, "step": 758200 }, { "epoch": 0.19776275666262833, "grad_norm": 11.713202476501465, "learning_rate": 9.2098935483549e-06, "loss": 2.5273, "step": 758400 }, { "epoch": 0.19781490928833048, "grad_norm": 11.515366554260254, "learning_rate": 9.209442496341932e-06, "loss": 2.5773, "step": 758600 }, { "epoch": 0.19786706191403267, "grad_norm": 12.124978065490723, "learning_rate": 9.208991326670189e-06, "loss": 2.5723, "step": 758800 }, { "epoch": 0.19791921453973482, "grad_norm": 11.972982406616211, "learning_rate": 9.208540039352288e-06, "loss": 2.5797, "step": 759000 }, { "epoch": 0.197971367165437, "grad_norm": 12.521751403808594, "learning_rate": 9.208088634400838e-06, "loss": 2.5721, "step": 759200 }, { "epoch": 0.19802351979113916, "grad_norm": 12.26280403137207, "learning_rate": 9.20763711182846e-06, "loss": 2.5352, "step": 759400 }, { "epoch": 0.19807567241684135, "grad_norm": 11.379363059997559, "learning_rate": 9.207185471647774e-06, "loss": 2.5372, "step": 759600 }, { "epoch": 0.1981278250425435, "grad_norm": 11.682640075683594, "learning_rate": 9.2067337138714e-06, "loss": 2.584, "step": 759800 }, { "epoch": 0.1981799776682457, "grad_norm": 10.798892974853516, "learning_rate": 9.206281838511972e-06, "loss": 2.5728, "step": 760000 }, { "epoch": 0.19823213029394784, "grad_norm": 13.537489891052246, "learning_rate": 9.205829845582114e-06, "loss": 2.5534, "step": 760200 }, { "epoch": 0.19828428291965, "grad_norm": 12.42953872680664, "learning_rate": 9.205377735094462e-06, "loss": 2.5662, "step": 760400 }, { "epoch": 0.19833643554535219, "grad_norm": 10.437345504760742, "learning_rate": 9.204925507061655e-06, "loss": 2.5577, "step": 760600 }, { "epoch": 0.19838858817105434, "grad_norm": 12.269465446472168, "learning_rate": 9.204473161496331e-06, "loss": 2.5762, "step": 760800 }, { "epoch": 0.19844074079675653, "grad_norm": 11.716769218444824, "learning_rate": 9.204020698411133e-06, "loss": 2.5548, "step": 761000 }, { "epoch": 0.19849289342245868, "grad_norm": 13.604849815368652, "learning_rate": 9.20356811781871e-06, "loss": 2.5544, "step": 761200 }, { "epoch": 0.19854504604816087, "grad_norm": 12.914355278015137, "learning_rate": 9.203115419731711e-06, "loss": 2.5554, "step": 761400 }, { "epoch": 0.19859719867386302, "grad_norm": 13.604276657104492, "learning_rate": 9.20266260416279e-06, "loss": 2.556, "step": 761600 }, { "epoch": 0.1986493512995652, "grad_norm": 12.59882640838623, "learning_rate": 9.202209671124605e-06, "loss": 2.5409, "step": 761800 }, { "epoch": 0.19870150392526736, "grad_norm": 12.737641334533691, "learning_rate": 9.20175662062981e-06, "loss": 2.5656, "step": 762000 }, { "epoch": 0.19875365655096955, "grad_norm": 11.206287384033203, "learning_rate": 9.201303452691078e-06, "loss": 2.5629, "step": 762200 }, { "epoch": 0.1988058091766717, "grad_norm": 12.095778465270996, "learning_rate": 9.200850167321067e-06, "loss": 2.5611, "step": 762400 }, { "epoch": 0.1988579618023739, "grad_norm": 11.354753494262695, "learning_rate": 9.20039676453245e-06, "loss": 2.5851, "step": 762600 }, { "epoch": 0.19891011442807605, "grad_norm": 10.592757225036621, "learning_rate": 9.199943244337901e-06, "loss": 2.5733, "step": 762800 }, { "epoch": 0.19896226705377823, "grad_norm": 12.773603439331055, "learning_rate": 9.199489606750095e-06, "loss": 2.578, "step": 763000 }, { "epoch": 0.1990144196794804, "grad_norm": 11.842935562133789, "learning_rate": 9.199035851781712e-06, "loss": 2.5338, "step": 763200 }, { "epoch": 0.19906657230518257, "grad_norm": 11.150446891784668, "learning_rate": 9.198581979445437e-06, "loss": 2.5977, "step": 763400 }, { "epoch": 0.19911872493088473, "grad_norm": 13.279424667358398, "learning_rate": 9.198127989753953e-06, "loss": 2.5638, "step": 763600 }, { "epoch": 0.1991708775565869, "grad_norm": 12.847058296203613, "learning_rate": 9.197673882719953e-06, "loss": 2.5607, "step": 763800 }, { "epoch": 0.19922303018228907, "grad_norm": 12.287805557250977, "learning_rate": 9.197219658356125e-06, "loss": 2.6154, "step": 764000 }, { "epoch": 0.19927518280799125, "grad_norm": 12.280625343322754, "learning_rate": 9.196765316675169e-06, "loss": 2.5747, "step": 764200 }, { "epoch": 0.1993273354336934, "grad_norm": 12.880599975585938, "learning_rate": 9.196310857689785e-06, "loss": 2.5721, "step": 764400 }, { "epoch": 0.1993794880593956, "grad_norm": 12.690503120422363, "learning_rate": 9.195856281412672e-06, "loss": 2.5614, "step": 764600 }, { "epoch": 0.19943164068509775, "grad_norm": 11.280840873718262, "learning_rate": 9.195401587856538e-06, "loss": 2.5797, "step": 764800 }, { "epoch": 0.19948379331079993, "grad_norm": 13.591432571411133, "learning_rate": 9.194946777034094e-06, "loss": 2.554, "step": 765000 }, { "epoch": 0.1995359459365021, "grad_norm": 13.786008834838867, "learning_rate": 9.19449184895805e-06, "loss": 2.5347, "step": 765200 }, { "epoch": 0.19958809856220427, "grad_norm": 11.823830604553223, "learning_rate": 9.19403680364112e-06, "loss": 2.551, "step": 765400 }, { "epoch": 0.19964025118790643, "grad_norm": 11.602143287658691, "learning_rate": 9.193581641096028e-06, "loss": 2.577, "step": 765600 }, { "epoch": 0.19969240381360862, "grad_norm": 14.088010787963867, "learning_rate": 9.193126361335491e-06, "loss": 2.5477, "step": 765800 }, { "epoch": 0.19974455643931077, "grad_norm": 13.316328048706055, "learning_rate": 9.19267096437224e-06, "loss": 2.5517, "step": 766000 }, { "epoch": 0.19979670906501293, "grad_norm": 11.534945487976074, "learning_rate": 9.192215450219e-06, "loss": 2.5514, "step": 766200 }, { "epoch": 0.1998488616907151, "grad_norm": 11.87644100189209, "learning_rate": 9.191759818888506e-06, "loss": 2.5328, "step": 766400 }, { "epoch": 0.19990101431641727, "grad_norm": 12.74470043182373, "learning_rate": 9.19130407039349e-06, "loss": 2.5838, "step": 766600 }, { "epoch": 0.19995316694211945, "grad_norm": 11.58600902557373, "learning_rate": 9.190848204746694e-06, "loss": 2.5657, "step": 766800 }, { "epoch": 0.2000053195678216, "grad_norm": 12.279742240905762, "learning_rate": 9.190392221960858e-06, "loss": 2.5543, "step": 767000 }, { "epoch": 0.2000574721935238, "grad_norm": 12.125825881958008, "learning_rate": 9.189936122048727e-06, "loss": 2.5282, "step": 767200 }, { "epoch": 0.20010962481922595, "grad_norm": 10.70932674407959, "learning_rate": 9.18947990502305e-06, "loss": 2.5886, "step": 767400 }, { "epoch": 0.20016177744492814, "grad_norm": 12.008572578430176, "learning_rate": 9.189023570896579e-06, "loss": 2.5466, "step": 767600 }, { "epoch": 0.2002139300706303, "grad_norm": 10.968779563903809, "learning_rate": 9.18856711968207e-06, "loss": 2.5674, "step": 767800 }, { "epoch": 0.20026608269633248, "grad_norm": 11.873457908630371, "learning_rate": 9.18811055139228e-06, "loss": 2.5667, "step": 768000 }, { "epoch": 0.20031823532203463, "grad_norm": 11.809953689575195, "learning_rate": 9.18765386603997e-06, "loss": 2.5602, "step": 768200 }, { "epoch": 0.20037038794773682, "grad_norm": 12.026460647583008, "learning_rate": 9.187197063637906e-06, "loss": 2.5693, "step": 768400 }, { "epoch": 0.20042254057343897, "grad_norm": 12.511942863464355, "learning_rate": 9.186740144198858e-06, "loss": 2.5677, "step": 768600 }, { "epoch": 0.20047469319914116, "grad_norm": 11.590380668640137, "learning_rate": 9.186283107735593e-06, "loss": 2.5472, "step": 768800 }, { "epoch": 0.20052684582484331, "grad_norm": 12.04472541809082, "learning_rate": 9.18582595426089e-06, "loss": 2.5422, "step": 769000 }, { "epoch": 0.2005789984505455, "grad_norm": 13.985250473022461, "learning_rate": 9.185368683787526e-06, "loss": 2.5152, "step": 769200 }, { "epoch": 0.20063115107624765, "grad_norm": 12.050628662109375, "learning_rate": 9.184911296328279e-06, "loss": 2.5715, "step": 769400 }, { "epoch": 0.20068330370194984, "grad_norm": 13.415891647338867, "learning_rate": 9.184453791895937e-06, "loss": 2.5828, "step": 769600 }, { "epoch": 0.200735456327652, "grad_norm": 13.432297706604004, "learning_rate": 9.183996170503287e-06, "loss": 2.5177, "step": 769800 }, { "epoch": 0.20078760895335418, "grad_norm": 12.210216522216797, "learning_rate": 9.18353843216312e-06, "loss": 2.5423, "step": 770000 }, { "epoch": 0.20083976157905634, "grad_norm": 11.473400115966797, "learning_rate": 9.183080576888228e-06, "loss": 2.5561, "step": 770200 }, { "epoch": 0.20089191420475852, "grad_norm": 12.12957763671875, "learning_rate": 9.182622604691411e-06, "loss": 2.5614, "step": 770400 }, { "epoch": 0.20094406683046068, "grad_norm": 11.806977272033691, "learning_rate": 9.182164515585472e-06, "loss": 2.5903, "step": 770600 }, { "epoch": 0.20099621945616286, "grad_norm": 12.727505683898926, "learning_rate": 9.181706309583209e-06, "loss": 2.577, "step": 770800 }, { "epoch": 0.20104837208186502, "grad_norm": 13.943395614624023, "learning_rate": 9.181247986697436e-06, "loss": 2.5928, "step": 771000 }, { "epoch": 0.2011005247075672, "grad_norm": 12.006414413452148, "learning_rate": 9.18078954694096e-06, "loss": 2.5669, "step": 771200 }, { "epoch": 0.20115267733326936, "grad_norm": 11.008611679077148, "learning_rate": 9.180330990326593e-06, "loss": 2.5689, "step": 771400 }, { "epoch": 0.20120482995897154, "grad_norm": 12.664291381835938, "learning_rate": 9.179872316867158e-06, "loss": 2.6105, "step": 771600 }, { "epoch": 0.2012569825846737, "grad_norm": 11.425148010253906, "learning_rate": 9.179413526575472e-06, "loss": 2.5416, "step": 771800 }, { "epoch": 0.20130913521037586, "grad_norm": 10.56281566619873, "learning_rate": 9.178954619464357e-06, "loss": 2.5577, "step": 772000 }, { "epoch": 0.20136128783607804, "grad_norm": 12.567456245422363, "learning_rate": 9.178495595546641e-06, "loss": 2.5976, "step": 772200 }, { "epoch": 0.2014134404617802, "grad_norm": 11.59072208404541, "learning_rate": 9.178036454835157e-06, "loss": 2.5353, "step": 772400 }, { "epoch": 0.20146559308748238, "grad_norm": 12.048215866088867, "learning_rate": 9.177577197342738e-06, "loss": 2.5556, "step": 772600 }, { "epoch": 0.20151774571318454, "grad_norm": 13.227574348449707, "learning_rate": 9.177117823082217e-06, "loss": 2.5473, "step": 772800 }, { "epoch": 0.20156989833888672, "grad_norm": 11.449342727661133, "learning_rate": 9.176658332066438e-06, "loss": 2.5772, "step": 773000 }, { "epoch": 0.20162205096458888, "grad_norm": 13.140538215637207, "learning_rate": 9.17619872430824e-06, "loss": 2.5734, "step": 773200 }, { "epoch": 0.20167420359029106, "grad_norm": 13.485116958618164, "learning_rate": 9.175738999820476e-06, "loss": 2.5461, "step": 773400 }, { "epoch": 0.20172635621599322, "grad_norm": 14.482010841369629, "learning_rate": 9.175279158615989e-06, "loss": 2.5771, "step": 773600 }, { "epoch": 0.2017785088416954, "grad_norm": 11.397805213928223, "learning_rate": 9.174819200707636e-06, "loss": 2.5517, "step": 773800 }, { "epoch": 0.20183066146739756, "grad_norm": 13.39069938659668, "learning_rate": 9.174359126108274e-06, "loss": 2.5457, "step": 774000 }, { "epoch": 0.20188281409309974, "grad_norm": 13.59749698638916, "learning_rate": 9.17389893483076e-06, "loss": 2.5409, "step": 774200 }, { "epoch": 0.2019349667188019, "grad_norm": 12.491495132446289, "learning_rate": 9.173438626887958e-06, "loss": 2.5758, "step": 774400 }, { "epoch": 0.20198711934450408, "grad_norm": 12.573298454284668, "learning_rate": 9.172978202292735e-06, "loss": 2.5665, "step": 774600 }, { "epoch": 0.20203927197020624, "grad_norm": 12.443755149841309, "learning_rate": 9.17251766105796e-06, "loss": 2.603, "step": 774800 }, { "epoch": 0.20209142459590843, "grad_norm": 11.910422325134277, "learning_rate": 9.172057003196505e-06, "loss": 2.5361, "step": 775000 }, { "epoch": 0.20214357722161058, "grad_norm": 12.631829261779785, "learning_rate": 9.171596228721245e-06, "loss": 2.5566, "step": 775200 }, { "epoch": 0.20219572984731277, "grad_norm": 13.858083724975586, "learning_rate": 9.171135337645061e-06, "loss": 2.5467, "step": 775400 }, { "epoch": 0.20224788247301492, "grad_norm": 11.458602905273438, "learning_rate": 9.170674329980835e-06, "loss": 2.5821, "step": 775600 }, { "epoch": 0.2023000350987171, "grad_norm": 12.20776081085205, "learning_rate": 9.170213205741453e-06, "loss": 2.5435, "step": 775800 }, { "epoch": 0.20235218772441926, "grad_norm": 12.886215209960938, "learning_rate": 9.169751964939802e-06, "loss": 2.6111, "step": 776000 }, { "epoch": 0.20240434035012145, "grad_norm": 13.515503883361816, "learning_rate": 9.169290607588776e-06, "loss": 2.5066, "step": 776200 }, { "epoch": 0.2024564929758236, "grad_norm": 12.592185974121094, "learning_rate": 9.168829133701273e-06, "loss": 2.5507, "step": 776400 }, { "epoch": 0.2025086456015258, "grad_norm": 13.320043563842773, "learning_rate": 9.168367543290187e-06, "loss": 2.5546, "step": 776600 }, { "epoch": 0.20256079822722795, "grad_norm": 12.519879341125488, "learning_rate": 9.16790583636842e-06, "loss": 2.5799, "step": 776800 }, { "epoch": 0.20261295085293013, "grad_norm": 12.188693046569824, "learning_rate": 9.16744401294888e-06, "loss": 2.5855, "step": 777000 }, { "epoch": 0.20266510347863229, "grad_norm": 14.931890487670898, "learning_rate": 9.166982073044475e-06, "loss": 2.5712, "step": 777200 }, { "epoch": 0.20271725610433447, "grad_norm": 13.907896995544434, "learning_rate": 9.166520016668117e-06, "loss": 2.5602, "step": 777400 }, { "epoch": 0.20276940873003663, "grad_norm": 12.67017936706543, "learning_rate": 9.16605784383272e-06, "loss": 2.5684, "step": 777600 }, { "epoch": 0.20282156135573878, "grad_norm": 12.602633476257324, "learning_rate": 9.165595554551204e-06, "loss": 2.5424, "step": 777800 }, { "epoch": 0.20287371398144097, "grad_norm": 11.999449729919434, "learning_rate": 9.165133148836487e-06, "loss": 2.5635, "step": 778000 }, { "epoch": 0.20292586660714312, "grad_norm": 15.479057312011719, "learning_rate": 9.164670626701495e-06, "loss": 2.5707, "step": 778200 }, { "epoch": 0.2029780192328453, "grad_norm": 11.809393882751465, "learning_rate": 9.16420798815916e-06, "loss": 2.537, "step": 778400 }, { "epoch": 0.20303017185854746, "grad_norm": 12.722270965576172, "learning_rate": 9.163745233222407e-06, "loss": 2.569, "step": 778600 }, { "epoch": 0.20308232448424965, "grad_norm": 12.335058212280273, "learning_rate": 9.163282361904176e-06, "loss": 2.5594, "step": 778800 }, { "epoch": 0.2031344771099518, "grad_norm": 11.937206268310547, "learning_rate": 9.162819374217403e-06, "loss": 2.5612, "step": 779000 }, { "epoch": 0.203186629735654, "grad_norm": 10.777091026306152, "learning_rate": 9.162356270175026e-06, "loss": 2.5397, "step": 779200 }, { "epoch": 0.20323878236135615, "grad_norm": 13.843822479248047, "learning_rate": 9.161893049789995e-06, "loss": 2.5632, "step": 779400 }, { "epoch": 0.20329093498705833, "grad_norm": 11.743480682373047, "learning_rate": 9.161429713075252e-06, "loss": 2.587, "step": 779600 }, { "epoch": 0.2033430876127605, "grad_norm": 10.867159843444824, "learning_rate": 9.160966260043751e-06, "loss": 2.5847, "step": 779800 }, { "epoch": 0.20339524023846267, "grad_norm": 12.39773941040039, "learning_rate": 9.160502690708447e-06, "loss": 2.5626, "step": 780000 }, { "epoch": 0.20344739286416483, "grad_norm": 13.51063060760498, "learning_rate": 9.160039005082291e-06, "loss": 2.5048, "step": 780200 }, { "epoch": 0.203499545489867, "grad_norm": 12.987715721130371, "learning_rate": 9.159575203178253e-06, "loss": 2.5538, "step": 780400 }, { "epoch": 0.20355169811556917, "grad_norm": 12.870197296142578, "learning_rate": 9.159111285009289e-06, "loss": 2.5578, "step": 780600 }, { "epoch": 0.20360385074127135, "grad_norm": 10.9700927734375, "learning_rate": 9.15864725058837e-06, "loss": 2.5964, "step": 780800 }, { "epoch": 0.2036560033669735, "grad_norm": 11.876527786254883, "learning_rate": 9.158183099928465e-06, "loss": 2.5587, "step": 781000 }, { "epoch": 0.2037081559926757, "grad_norm": 12.678916931152344, "learning_rate": 9.157718833042548e-06, "loss": 2.57, "step": 781200 }, { "epoch": 0.20376030861837785, "grad_norm": 11.316527366638184, "learning_rate": 9.157254449943594e-06, "loss": 2.531, "step": 781400 }, { "epoch": 0.20381246124408003, "grad_norm": 11.957809448242188, "learning_rate": 9.156789950644587e-06, "loss": 2.5381, "step": 781600 }, { "epoch": 0.2038646138697822, "grad_norm": 12.463024139404297, "learning_rate": 9.156325335158507e-06, "loss": 2.5681, "step": 781800 }, { "epoch": 0.20391676649548438, "grad_norm": 12.041062355041504, "learning_rate": 9.155860603498341e-06, "loss": 2.5684, "step": 782000 }, { "epoch": 0.20396891912118653, "grad_norm": 11.98395824432373, "learning_rate": 9.15539575567708e-06, "loss": 2.5603, "step": 782200 }, { "epoch": 0.20402107174688872, "grad_norm": 12.181907653808594, "learning_rate": 9.154930791707714e-06, "loss": 2.5406, "step": 782400 }, { "epoch": 0.20407322437259087, "grad_norm": 12.69912338256836, "learning_rate": 9.154465711603244e-06, "loss": 2.5441, "step": 782600 }, { "epoch": 0.20412537699829306, "grad_norm": 11.131607055664062, "learning_rate": 9.154000515376667e-06, "loss": 2.5484, "step": 782800 }, { "epoch": 0.2041775296239952, "grad_norm": 11.983840942382812, "learning_rate": 9.153535203040986e-06, "loss": 2.5317, "step": 783000 }, { "epoch": 0.2042296822496974, "grad_norm": 13.614217758178711, "learning_rate": 9.153069774609205e-06, "loss": 2.5382, "step": 783200 }, { "epoch": 0.20428183487539955, "grad_norm": 13.162174224853516, "learning_rate": 9.152604230094337e-06, "loss": 2.5865, "step": 783400 }, { "epoch": 0.2043339875011017, "grad_norm": 12.635634422302246, "learning_rate": 9.152138569509393e-06, "loss": 2.549, "step": 783600 }, { "epoch": 0.2043861401268039, "grad_norm": 9.673964500427246, "learning_rate": 9.151672792867387e-06, "loss": 2.5795, "step": 783800 }, { "epoch": 0.20443829275250605, "grad_norm": 12.545588493347168, "learning_rate": 9.15120690018134e-06, "loss": 2.546, "step": 784000 }, { "epoch": 0.20449044537820824, "grad_norm": 11.919151306152344, "learning_rate": 9.150740891464272e-06, "loss": 2.5573, "step": 784200 }, { "epoch": 0.2045425980039104, "grad_norm": 14.723599433898926, "learning_rate": 9.150274766729213e-06, "loss": 2.5681, "step": 784400 }, { "epoch": 0.20459475062961258, "grad_norm": 11.212860107421875, "learning_rate": 9.149808525989185e-06, "loss": 2.5524, "step": 784600 }, { "epoch": 0.20464690325531473, "grad_norm": 13.626235961914062, "learning_rate": 9.149342169257228e-06, "loss": 2.6008, "step": 784800 }, { "epoch": 0.20469905588101692, "grad_norm": 13.193553924560547, "learning_rate": 9.148875696546372e-06, "loss": 2.54, "step": 785000 }, { "epoch": 0.20475120850671907, "grad_norm": 12.401244163513184, "learning_rate": 9.148409107869654e-06, "loss": 2.5822, "step": 785200 }, { "epoch": 0.20480336113242126, "grad_norm": 13.341384887695312, "learning_rate": 9.14794240324012e-06, "loss": 2.581, "step": 785400 }, { "epoch": 0.20485551375812341, "grad_norm": 10.86294937133789, "learning_rate": 9.147475582670813e-06, "loss": 2.558, "step": 785600 }, { "epoch": 0.2049076663838256, "grad_norm": 11.97840690612793, "learning_rate": 9.14700864617478e-06, "loss": 2.569, "step": 785800 }, { "epoch": 0.20495981900952776, "grad_norm": 12.532195091247559, "learning_rate": 9.146541593765075e-06, "loss": 2.5214, "step": 786000 }, { "epoch": 0.20501197163522994, "grad_norm": 13.745171546936035, "learning_rate": 9.14607442545475e-06, "loss": 2.5511, "step": 786200 }, { "epoch": 0.2050641242609321, "grad_norm": 12.68714714050293, "learning_rate": 9.145607141256864e-06, "loss": 2.5863, "step": 786400 }, { "epoch": 0.20511627688663428, "grad_norm": 12.79076862335205, "learning_rate": 9.145139741184481e-06, "loss": 2.5629, "step": 786600 }, { "epoch": 0.20516842951233644, "grad_norm": 13.012794494628906, "learning_rate": 9.144672225250661e-06, "loss": 2.582, "step": 786800 }, { "epoch": 0.20522058213803862, "grad_norm": 12.885845184326172, "learning_rate": 9.144204593468473e-06, "loss": 2.5376, "step": 787000 }, { "epoch": 0.20527273476374078, "grad_norm": 11.415619850158691, "learning_rate": 9.14373684585099e-06, "loss": 2.5376, "step": 787200 }, { "epoch": 0.20532488738944296, "grad_norm": 10.882523536682129, "learning_rate": 9.143268982411282e-06, "loss": 2.5388, "step": 787400 }, { "epoch": 0.20537704001514512, "grad_norm": 13.310264587402344, "learning_rate": 9.142801003162429e-06, "loss": 2.5681, "step": 787600 }, { "epoch": 0.2054291926408473, "grad_norm": 10.162239074707031, "learning_rate": 9.142332908117512e-06, "loss": 2.5519, "step": 787800 }, { "epoch": 0.20548134526654946, "grad_norm": 10.431777000427246, "learning_rate": 9.141864697289612e-06, "loss": 2.5372, "step": 788000 }, { "epoch": 0.20553349789225164, "grad_norm": 11.729969024658203, "learning_rate": 9.141396370691822e-06, "loss": 2.5446, "step": 788200 }, { "epoch": 0.2055856505179538, "grad_norm": 13.751687049865723, "learning_rate": 9.140927928337224e-06, "loss": 2.5337, "step": 788400 }, { "epoch": 0.20563780314365598, "grad_norm": 11.931695938110352, "learning_rate": 9.140459370238919e-06, "loss": 2.5327, "step": 788600 }, { "epoch": 0.20568995576935814, "grad_norm": 12.925618171691895, "learning_rate": 9.139990696409998e-06, "loss": 2.5283, "step": 788800 }, { "epoch": 0.20574210839506032, "grad_norm": 11.938955307006836, "learning_rate": 9.139521906863564e-06, "loss": 2.545, "step": 789000 }, { "epoch": 0.20579426102076248, "grad_norm": 12.688498497009277, "learning_rate": 9.13905300161272e-06, "loss": 2.5699, "step": 789200 }, { "epoch": 0.20584641364646467, "grad_norm": 12.349008560180664, "learning_rate": 9.138583980670573e-06, "loss": 2.5775, "step": 789400 }, { "epoch": 0.20589856627216682, "grad_norm": 13.858742713928223, "learning_rate": 9.13811484405023e-06, "loss": 2.5536, "step": 789600 }, { "epoch": 0.20595071889786898, "grad_norm": 12.965885162353516, "learning_rate": 9.137645591764807e-06, "loss": 2.5364, "step": 789800 }, { "epoch": 0.20600287152357116, "grad_norm": 12.98017692565918, "learning_rate": 9.137176223827417e-06, "loss": 2.5487, "step": 790000 }, { "epoch": 0.20605502414927332, "grad_norm": 12.841651916503906, "learning_rate": 9.136706740251185e-06, "loss": 2.5751, "step": 790200 }, { "epoch": 0.2061071767749755, "grad_norm": 11.136397361755371, "learning_rate": 9.136237141049226e-06, "loss": 2.5773, "step": 790400 }, { "epoch": 0.20615932940067766, "grad_norm": 13.198196411132812, "learning_rate": 9.135767426234671e-06, "loss": 2.529, "step": 790600 }, { "epoch": 0.20621148202637984, "grad_norm": 13.684469223022461, "learning_rate": 9.135297595820646e-06, "loss": 2.5633, "step": 790800 }, { "epoch": 0.206263634652082, "grad_norm": 12.033394813537598, "learning_rate": 9.134827649820289e-06, "loss": 2.602, "step": 791000 }, { "epoch": 0.20631578727778419, "grad_norm": 10.937596321105957, "learning_rate": 9.134357588246729e-06, "loss": 2.5496, "step": 791200 }, { "epoch": 0.20636793990348634, "grad_norm": 11.299455642700195, "learning_rate": 9.133887411113108e-06, "loss": 2.5665, "step": 791400 }, { "epoch": 0.20642009252918853, "grad_norm": 13.532719612121582, "learning_rate": 9.133417118432566e-06, "loss": 2.5522, "step": 791600 }, { "epoch": 0.20647224515489068, "grad_norm": 12.678144454956055, "learning_rate": 9.13294671021825e-06, "loss": 2.5615, "step": 791800 }, { "epoch": 0.20652439778059287, "grad_norm": 12.046524047851562, "learning_rate": 9.13247618648331e-06, "loss": 2.562, "step": 792000 }, { "epoch": 0.20657655040629502, "grad_norm": 12.832523345947266, "learning_rate": 9.132005547240893e-06, "loss": 2.5695, "step": 792200 }, { "epoch": 0.2066287030319972, "grad_norm": 12.524669647216797, "learning_rate": 9.131534792504158e-06, "loss": 2.5554, "step": 792400 }, { "epoch": 0.20668085565769936, "grad_norm": 11.63914966583252, "learning_rate": 9.131063922286261e-06, "loss": 2.5182, "step": 792600 }, { "epoch": 0.20673300828340155, "grad_norm": 12.573324203491211, "learning_rate": 9.130592936600364e-06, "loss": 2.589, "step": 792800 }, { "epoch": 0.2067851609091037, "grad_norm": 11.700994491577148, "learning_rate": 9.130121835459633e-06, "loss": 2.6005, "step": 793000 }, { "epoch": 0.2068373135348059, "grad_norm": 10.683900833129883, "learning_rate": 9.129650618877233e-06, "loss": 2.5726, "step": 793200 }, { "epoch": 0.20688946616050805, "grad_norm": 12.077662467956543, "learning_rate": 9.129179286866337e-06, "loss": 2.5581, "step": 793400 }, { "epoch": 0.20694161878621023, "grad_norm": 12.618571281433105, "learning_rate": 9.128707839440119e-06, "loss": 2.5401, "step": 793600 }, { "epoch": 0.2069937714119124, "grad_norm": 13.423776626586914, "learning_rate": 9.128236276611757e-06, "loss": 2.5562, "step": 793800 }, { "epoch": 0.20704592403761457, "grad_norm": 11.433748245239258, "learning_rate": 9.127764598394429e-06, "loss": 2.5461, "step": 794000 }, { "epoch": 0.20709807666331673, "grad_norm": 13.008195877075195, "learning_rate": 9.127292804801322e-06, "loss": 2.5663, "step": 794200 }, { "epoch": 0.2071502292890189, "grad_norm": 12.282379150390625, "learning_rate": 9.126820895845623e-06, "loss": 2.5785, "step": 794400 }, { "epoch": 0.20720238191472107, "grad_norm": 12.038056373596191, "learning_rate": 9.12634887154052e-06, "loss": 2.5305, "step": 794600 }, { "epoch": 0.20725453454042325, "grad_norm": 10.847259521484375, "learning_rate": 9.125876731899209e-06, "loss": 2.5142, "step": 794800 }, { "epoch": 0.2073066871661254, "grad_norm": 12.055601119995117, "learning_rate": 9.125404476934888e-06, "loss": 2.556, "step": 795000 }, { "epoch": 0.2073588397918276, "grad_norm": 14.068562507629395, "learning_rate": 9.124932106660752e-06, "loss": 2.5441, "step": 795200 }, { "epoch": 0.20741099241752975, "grad_norm": 12.50983715057373, "learning_rate": 9.124459621090009e-06, "loss": 2.5705, "step": 795400 }, { "epoch": 0.2074631450432319, "grad_norm": 12.685457229614258, "learning_rate": 9.123987020235863e-06, "loss": 2.5693, "step": 795600 }, { "epoch": 0.2075152976689341, "grad_norm": 12.426636695861816, "learning_rate": 9.123514304111525e-06, "loss": 2.555, "step": 795800 }, { "epoch": 0.20756745029463625, "grad_norm": 11.003486633300781, "learning_rate": 9.123041472730207e-06, "loss": 2.5363, "step": 796000 }, { "epoch": 0.20761960292033843, "grad_norm": 12.062921524047852, "learning_rate": 9.122568526105127e-06, "loss": 2.5494, "step": 796200 }, { "epoch": 0.2076717555460406, "grad_norm": 9.995904922485352, "learning_rate": 9.122095464249504e-06, "loss": 2.5621, "step": 796400 }, { "epoch": 0.20772390817174277, "grad_norm": 12.198345184326172, "learning_rate": 9.121622287176557e-06, "loss": 2.5531, "step": 796600 }, { "epoch": 0.20777606079744493, "grad_norm": 11.961278915405273, "learning_rate": 9.121148994899517e-06, "loss": 2.5245, "step": 796800 }, { "epoch": 0.2078282134231471, "grad_norm": 11.721890449523926, "learning_rate": 9.12067558743161e-06, "loss": 2.5773, "step": 797000 }, { "epoch": 0.20788036604884927, "grad_norm": 11.148172378540039, "learning_rate": 9.120202064786067e-06, "loss": 2.5425, "step": 797200 }, { "epoch": 0.20793251867455145, "grad_norm": 12.131102561950684, "learning_rate": 9.119728426976129e-06, "loss": 2.5342, "step": 797400 }, { "epoch": 0.2079846713002536, "grad_norm": 12.33278751373291, "learning_rate": 9.11925467401503e-06, "loss": 2.5778, "step": 797600 }, { "epoch": 0.2080368239259558, "grad_norm": 13.5529203414917, "learning_rate": 9.118780805916011e-06, "loss": 2.5409, "step": 797800 }, { "epoch": 0.20808897655165795, "grad_norm": 13.351973533630371, "learning_rate": 9.118306822692322e-06, "loss": 2.5533, "step": 798000 }, { "epoch": 0.20814112917736013, "grad_norm": 11.997188568115234, "learning_rate": 9.117832724357208e-06, "loss": 2.5669, "step": 798200 }, { "epoch": 0.2081932818030623, "grad_norm": 12.028074264526367, "learning_rate": 9.117358510923921e-06, "loss": 2.5191, "step": 798400 }, { "epoch": 0.20824543442876448, "grad_norm": 10.646809577941895, "learning_rate": 9.116884182405718e-06, "loss": 2.5348, "step": 798600 }, { "epoch": 0.20829758705446663, "grad_norm": 10.890563011169434, "learning_rate": 9.116409738815853e-06, "loss": 2.5531, "step": 798800 }, { "epoch": 0.20834973968016882, "grad_norm": 11.447530746459961, "learning_rate": 9.11593518016759e-06, "loss": 2.5651, "step": 799000 }, { "epoch": 0.20840189230587097, "grad_norm": 13.491917610168457, "learning_rate": 9.115460506474193e-06, "loss": 2.5481, "step": 799200 }, { "epoch": 0.20845404493157316, "grad_norm": 12.284280776977539, "learning_rate": 9.114985717748928e-06, "loss": 2.5543, "step": 799400 }, { "epoch": 0.2085061975572753, "grad_norm": 11.280057907104492, "learning_rate": 9.11451081400507e-06, "loss": 2.5347, "step": 799600 }, { "epoch": 0.2085583501829775, "grad_norm": 13.034576416015625, "learning_rate": 9.114035795255888e-06, "loss": 2.5667, "step": 799800 }, { "epoch": 0.20861050280867965, "grad_norm": 10.916426658630371, "learning_rate": 9.113560661514664e-06, "loss": 2.5311, "step": 800000 }, { "epoch": 0.20866265543438184, "grad_norm": 12.369806289672852, "learning_rate": 9.113085412794676e-06, "loss": 2.5332, "step": 800200 }, { "epoch": 0.208714808060084, "grad_norm": 12.768467903137207, "learning_rate": 9.112610049109207e-06, "loss": 2.5693, "step": 800400 }, { "epoch": 0.20876696068578618, "grad_norm": 13.05500316619873, "learning_rate": 9.112134570471545e-06, "loss": 2.5134, "step": 800600 }, { "epoch": 0.20881911331148834, "grad_norm": 11.387818336486816, "learning_rate": 9.111658976894982e-06, "loss": 2.5834, "step": 800800 }, { "epoch": 0.20887126593719052, "grad_norm": 10.839855194091797, "learning_rate": 9.11118326839281e-06, "loss": 2.5247, "step": 801000 }, { "epoch": 0.20892341856289268, "grad_norm": 12.861638069152832, "learning_rate": 9.110707444978322e-06, "loss": 2.5669, "step": 801200 }, { "epoch": 0.20897557118859483, "grad_norm": 11.404274940490723, "learning_rate": 9.110231506664824e-06, "loss": 2.5437, "step": 801400 }, { "epoch": 0.20902772381429702, "grad_norm": 13.15756607055664, "learning_rate": 9.109755453465615e-06, "loss": 2.53, "step": 801600 }, { "epoch": 0.20907987643999917, "grad_norm": 13.401494026184082, "learning_rate": 9.109279285394003e-06, "loss": 2.5359, "step": 801800 }, { "epoch": 0.20913202906570136, "grad_norm": 13.968293190002441, "learning_rate": 9.108803002463295e-06, "loss": 2.5573, "step": 802000 }, { "epoch": 0.20918418169140351, "grad_norm": 13.148781776428223, "learning_rate": 9.108326604686807e-06, "loss": 2.56, "step": 802200 }, { "epoch": 0.2092363343171057, "grad_norm": 13.43651294708252, "learning_rate": 9.107850092077853e-06, "loss": 2.5353, "step": 802400 }, { "epoch": 0.20928848694280786, "grad_norm": 12.484440803527832, "learning_rate": 9.107373464649754e-06, "loss": 2.5772, "step": 802600 }, { "epoch": 0.20934063956851004, "grad_norm": 13.605812072753906, "learning_rate": 9.106896722415829e-06, "loss": 2.5525, "step": 802800 }, { "epoch": 0.2093927921942122, "grad_norm": 13.259238243103027, "learning_rate": 9.106419865389405e-06, "loss": 2.596, "step": 803000 }, { "epoch": 0.20944494481991438, "grad_norm": 11.530973434448242, "learning_rate": 9.10594289358381e-06, "loss": 2.5526, "step": 803200 }, { "epoch": 0.20949709744561654, "grad_norm": 12.25853157043457, "learning_rate": 9.105465807012379e-06, "loss": 2.5761, "step": 803400 }, { "epoch": 0.20954925007131872, "grad_norm": 10.755802154541016, "learning_rate": 9.104988605688445e-06, "loss": 2.5424, "step": 803600 }, { "epoch": 0.20960140269702088, "grad_norm": 13.130935668945312, "learning_rate": 9.104511289625345e-06, "loss": 2.5453, "step": 803800 }, { "epoch": 0.20965355532272306, "grad_norm": 13.585618019104004, "learning_rate": 9.10403385883642e-06, "loss": 2.4936, "step": 804000 }, { "epoch": 0.20970570794842522, "grad_norm": 13.895092964172363, "learning_rate": 9.103556313335019e-06, "loss": 2.5333, "step": 804200 }, { "epoch": 0.2097578605741274, "grad_norm": 12.45972728729248, "learning_rate": 9.103078653134487e-06, "loss": 2.5459, "step": 804400 }, { "epoch": 0.20981001319982956, "grad_norm": 13.445938110351562, "learning_rate": 9.102600878248176e-06, "loss": 2.5226, "step": 804600 }, { "epoch": 0.20986216582553174, "grad_norm": 12.260578155517578, "learning_rate": 9.102122988689439e-06, "loss": 2.5466, "step": 804800 }, { "epoch": 0.2099143184512339, "grad_norm": 13.503302574157715, "learning_rate": 9.101644984471636e-06, "loss": 2.5485, "step": 805000 }, { "epoch": 0.20996647107693608, "grad_norm": 11.830412864685059, "learning_rate": 9.101166865608125e-06, "loss": 2.5331, "step": 805200 }, { "epoch": 0.21001862370263824, "grad_norm": 11.57419204711914, "learning_rate": 9.100688632112272e-06, "loss": 2.5551, "step": 805400 }, { "epoch": 0.21007077632834043, "grad_norm": 13.763896942138672, "learning_rate": 9.100210283997442e-06, "loss": 2.5441, "step": 805600 }, { "epoch": 0.21012292895404258, "grad_norm": 11.554506301879883, "learning_rate": 9.099731821277008e-06, "loss": 2.571, "step": 805800 }, { "epoch": 0.21017508157974477, "grad_norm": 12.712745666503906, "learning_rate": 9.099253243964343e-06, "loss": 2.5449, "step": 806000 }, { "epoch": 0.21022723420544692, "grad_norm": 12.259404182434082, "learning_rate": 9.098774552072823e-06, "loss": 2.5364, "step": 806200 }, { "epoch": 0.2102793868311491, "grad_norm": 14.388871192932129, "learning_rate": 9.098295745615826e-06, "loss": 2.5435, "step": 806400 }, { "epoch": 0.21033153945685126, "grad_norm": 11.50523853302002, "learning_rate": 9.09781682460674e-06, "loss": 2.5728, "step": 806600 }, { "epoch": 0.21038369208255345, "grad_norm": 11.484689712524414, "learning_rate": 9.097337789058947e-06, "loss": 2.5825, "step": 806800 }, { "epoch": 0.2104358447082556, "grad_norm": 12.460712432861328, "learning_rate": 9.096858638985839e-06, "loss": 2.5243, "step": 807000 }, { "epoch": 0.21048799733395776, "grad_norm": 13.950214385986328, "learning_rate": 9.09637937440081e-06, "loss": 2.5351, "step": 807200 }, { "epoch": 0.21054014995965994, "grad_norm": 12.65377140045166, "learning_rate": 9.095899995317252e-06, "loss": 2.5472, "step": 807400 }, { "epoch": 0.2105923025853621, "grad_norm": 11.964714050292969, "learning_rate": 9.095420501748568e-06, "loss": 2.5647, "step": 807600 }, { "epoch": 0.21064445521106429, "grad_norm": 12.092817306518555, "learning_rate": 9.094940893708157e-06, "loss": 2.5691, "step": 807800 }, { "epoch": 0.21069660783676644, "grad_norm": 13.357704162597656, "learning_rate": 9.094461171209427e-06, "loss": 2.5075, "step": 808000 }, { "epoch": 0.21074876046246863, "grad_norm": 11.30611801147461, "learning_rate": 9.093981334265787e-06, "loss": 2.509, "step": 808200 }, { "epoch": 0.21080091308817078, "grad_norm": 12.069721221923828, "learning_rate": 9.093501382890647e-06, "loss": 2.5223, "step": 808400 }, { "epoch": 0.21085306571387297, "grad_norm": 11.740452766418457, "learning_rate": 9.093021317097424e-06, "loss": 2.5337, "step": 808600 }, { "epoch": 0.21090521833957512, "grad_norm": 12.168525695800781, "learning_rate": 9.092541136899535e-06, "loss": 2.5598, "step": 808800 }, { "epoch": 0.2109573709652773, "grad_norm": 13.350499153137207, "learning_rate": 9.092060842310404e-06, "loss": 2.4994, "step": 809000 }, { "epoch": 0.21100952359097946, "grad_norm": 12.407944679260254, "learning_rate": 9.091580433343452e-06, "loss": 2.4965, "step": 809200 }, { "epoch": 0.21106167621668165, "grad_norm": 14.529518127441406, "learning_rate": 9.09109991001211e-06, "loss": 2.5579, "step": 809400 }, { "epoch": 0.2111138288423838, "grad_norm": 12.541584968566895, "learning_rate": 9.09061927232981e-06, "loss": 2.5532, "step": 809600 }, { "epoch": 0.211165981468086, "grad_norm": 14.252954483032227, "learning_rate": 9.090138520309983e-06, "loss": 2.5472, "step": 809800 }, { "epoch": 0.21121813409378815, "grad_norm": 13.405433654785156, "learning_rate": 9.089657653966069e-06, "loss": 2.5333, "step": 810000 }, { "epoch": 0.21127028671949033, "grad_norm": 10.493156433105469, "learning_rate": 9.089176673311506e-06, "loss": 2.5312, "step": 810200 }, { "epoch": 0.2113224393451925, "grad_norm": 12.578083992004395, "learning_rate": 9.088695578359742e-06, "loss": 2.5336, "step": 810400 }, { "epoch": 0.21137459197089467, "grad_norm": 12.994580268859863, "learning_rate": 9.088214369124221e-06, "loss": 2.5489, "step": 810600 }, { "epoch": 0.21142674459659683, "grad_norm": 11.961739540100098, "learning_rate": 9.087733045618396e-06, "loss": 2.5717, "step": 810800 }, { "epoch": 0.211478897222299, "grad_norm": 12.124736785888672, "learning_rate": 9.087251607855718e-06, "loss": 2.5273, "step": 811000 }, { "epoch": 0.21153104984800117, "grad_norm": 14.014177322387695, "learning_rate": 9.086770055849645e-06, "loss": 2.5209, "step": 811200 }, { "epoch": 0.21158320247370335, "grad_norm": 12.617166519165039, "learning_rate": 9.086288389613636e-06, "loss": 2.5425, "step": 811400 }, { "epoch": 0.2116353550994055, "grad_norm": 12.897732734680176, "learning_rate": 9.085806609161156e-06, "loss": 2.5501, "step": 811600 }, { "epoch": 0.2116875077251077, "grad_norm": 14.856685638427734, "learning_rate": 9.085324714505669e-06, "loss": 2.5208, "step": 811800 }, { "epoch": 0.21173966035080985, "grad_norm": 11.253473281860352, "learning_rate": 9.084842705660646e-06, "loss": 2.5396, "step": 812000 }, { "epoch": 0.21179181297651203, "grad_norm": 13.86609172821045, "learning_rate": 9.08436058263956e-06, "loss": 2.5441, "step": 812200 }, { "epoch": 0.2118439656022142, "grad_norm": 11.815281867980957, "learning_rate": 9.083878345455885e-06, "loss": 2.543, "step": 812400 }, { "epoch": 0.21189611822791637, "grad_norm": 12.452479362487793, "learning_rate": 9.083395994123103e-06, "loss": 2.5331, "step": 812600 }, { "epoch": 0.21194827085361853, "grad_norm": 13.19066333770752, "learning_rate": 9.082913528654694e-06, "loss": 2.5344, "step": 812800 }, { "epoch": 0.21200042347932072, "grad_norm": 11.936997413635254, "learning_rate": 9.082430949064144e-06, "loss": 2.5393, "step": 813000 }, { "epoch": 0.21205257610502287, "grad_norm": 13.574031829833984, "learning_rate": 9.081948255364941e-06, "loss": 2.5316, "step": 813200 }, { "epoch": 0.21210472873072503, "grad_norm": 11.525445938110352, "learning_rate": 9.081465447570578e-06, "loss": 2.5151, "step": 813400 }, { "epoch": 0.2121568813564272, "grad_norm": 11.842822074890137, "learning_rate": 9.08098252569455e-06, "loss": 2.6006, "step": 813600 }, { "epoch": 0.21220903398212937, "grad_norm": 12.733033180236816, "learning_rate": 9.080499489750357e-06, "loss": 2.569, "step": 813800 }, { "epoch": 0.21226118660783155, "grad_norm": 13.042905807495117, "learning_rate": 9.080016339751495e-06, "loss": 2.5706, "step": 814000 }, { "epoch": 0.2123133392335337, "grad_norm": 12.87771224975586, "learning_rate": 9.079533075711473e-06, "loss": 2.5289, "step": 814200 }, { "epoch": 0.2123654918592359, "grad_norm": 12.961420059204102, "learning_rate": 9.079049697643798e-06, "loss": 2.58, "step": 814400 }, { "epoch": 0.21241764448493805, "grad_norm": 12.723822593688965, "learning_rate": 9.078566205561981e-06, "loss": 2.542, "step": 814600 }, { "epoch": 0.21246979711064023, "grad_norm": 12.822134971618652, "learning_rate": 9.078082599479535e-06, "loss": 2.5328, "step": 814800 }, { "epoch": 0.2125219497363424, "grad_norm": 10.794196128845215, "learning_rate": 9.077598879409977e-06, "loss": 2.5756, "step": 815000 }, { "epoch": 0.21257410236204458, "grad_norm": 11.451395034790039, "learning_rate": 9.07711504536683e-06, "loss": 2.5266, "step": 815200 }, { "epoch": 0.21262625498774673, "grad_norm": 13.746084213256836, "learning_rate": 9.076631097363615e-06, "loss": 2.53, "step": 815400 }, { "epoch": 0.21267840761344892, "grad_norm": 13.913445472717285, "learning_rate": 9.076147035413863e-06, "loss": 2.5575, "step": 815600 }, { "epoch": 0.21273056023915107, "grad_norm": 12.547298431396484, "learning_rate": 9.0756628595311e-06, "loss": 2.5415, "step": 815800 }, { "epoch": 0.21278271286485326, "grad_norm": 13.744832038879395, "learning_rate": 9.07517856972886e-06, "loss": 2.5073, "step": 816000 }, { "epoch": 0.2128348654905554, "grad_norm": 12.159852027893066, "learning_rate": 9.074694166020682e-06, "loss": 2.5024, "step": 816200 }, { "epoch": 0.2128870181162576, "grad_norm": 12.20659065246582, "learning_rate": 9.074209648420101e-06, "loss": 2.518, "step": 816400 }, { "epoch": 0.21293917074195975, "grad_norm": 12.317503929138184, "learning_rate": 9.073725016940665e-06, "loss": 2.5395, "step": 816600 }, { "epoch": 0.21299132336766194, "grad_norm": 12.387846946716309, "learning_rate": 9.073240271595916e-06, "loss": 2.5347, "step": 816800 }, { "epoch": 0.2130434759933641, "grad_norm": 13.619342803955078, "learning_rate": 9.072755412399406e-06, "loss": 2.5815, "step": 817000 }, { "epoch": 0.21309562861906628, "grad_norm": 12.313323020935059, "learning_rate": 9.072270439364684e-06, "loss": 2.5288, "step": 817200 }, { "epoch": 0.21314778124476844, "grad_norm": 15.066256523132324, "learning_rate": 9.07178535250531e-06, "loss": 2.5235, "step": 817400 }, { "epoch": 0.21319993387047062, "grad_norm": 13.39647388458252, "learning_rate": 9.07130015183484e-06, "loss": 2.5484, "step": 817600 }, { "epoch": 0.21325208649617278, "grad_norm": 11.415046691894531, "learning_rate": 9.070814837366836e-06, "loss": 2.5879, "step": 817800 }, { "epoch": 0.21330423912187496, "grad_norm": 10.62917709350586, "learning_rate": 9.070329409114862e-06, "loss": 2.5464, "step": 818000 }, { "epoch": 0.21335639174757712, "grad_norm": 13.691715240478516, "learning_rate": 9.06984386709249e-06, "loss": 2.5395, "step": 818200 }, { "epoch": 0.2134085443732793, "grad_norm": 10.864567756652832, "learning_rate": 9.069358211313289e-06, "loss": 2.5282, "step": 818400 }, { "epoch": 0.21346069699898146, "grad_norm": 11.341833114624023, "learning_rate": 9.068872441790832e-06, "loss": 2.5543, "step": 818600 }, { "epoch": 0.21351284962468364, "grad_norm": 11.319380760192871, "learning_rate": 9.0683865585387e-06, "loss": 2.5383, "step": 818800 }, { "epoch": 0.2135650022503858, "grad_norm": 11.097443580627441, "learning_rate": 9.067900561570472e-06, "loss": 2.5757, "step": 819000 }, { "epoch": 0.21361715487608796, "grad_norm": 11.91701889038086, "learning_rate": 9.067414450899735e-06, "loss": 2.5255, "step": 819200 }, { "epoch": 0.21366930750179014, "grad_norm": 11.580026626586914, "learning_rate": 9.066928226540073e-06, "loss": 2.5549, "step": 819400 }, { "epoch": 0.2137214601274923, "grad_norm": 11.118182182312012, "learning_rate": 9.066441888505077e-06, "loss": 2.5484, "step": 819600 }, { "epoch": 0.21377361275319448, "grad_norm": 11.571749687194824, "learning_rate": 9.065955436808343e-06, "loss": 2.5065, "step": 819800 }, { "epoch": 0.21382576537889664, "grad_norm": 12.736671447753906, "learning_rate": 9.065468871463464e-06, "loss": 2.5625, "step": 820000 }, { "epoch": 0.21387791800459882, "grad_norm": 13.102340698242188, "learning_rate": 9.064982192484042e-06, "loss": 2.5556, "step": 820200 }, { "epoch": 0.21393007063030098, "grad_norm": 13.578348159790039, "learning_rate": 9.064495399883684e-06, "loss": 2.5439, "step": 820400 }, { "epoch": 0.21398222325600316, "grad_norm": 13.68409538269043, "learning_rate": 9.064008493675991e-06, "loss": 2.5887, "step": 820600 }, { "epoch": 0.21403437588170532, "grad_norm": 13.699695587158203, "learning_rate": 9.063521473874574e-06, "loss": 2.5437, "step": 820800 }, { "epoch": 0.2140865285074075, "grad_norm": 12.38679313659668, "learning_rate": 9.063034340493048e-06, "loss": 2.5402, "step": 821000 }, { "epoch": 0.21413868113310966, "grad_norm": 11.524242401123047, "learning_rate": 9.062547093545025e-06, "loss": 2.5281, "step": 821200 }, { "epoch": 0.21419083375881184, "grad_norm": 11.350960731506348, "learning_rate": 9.062059733044129e-06, "loss": 2.5657, "step": 821400 }, { "epoch": 0.214242986384514, "grad_norm": 14.124442100524902, "learning_rate": 9.06157225900398e-06, "loss": 2.543, "step": 821600 }, { "epoch": 0.21429513901021618, "grad_norm": 13.106842041015625, "learning_rate": 9.061084671438201e-06, "loss": 2.5266, "step": 821800 }, { "epoch": 0.21434729163591834, "grad_norm": 12.565150260925293, "learning_rate": 9.060596970360423e-06, "loss": 2.5241, "step": 822000 }, { "epoch": 0.21439944426162053, "grad_norm": 12.959101676940918, "learning_rate": 9.060109155784278e-06, "loss": 2.5496, "step": 822200 }, { "epoch": 0.21445159688732268, "grad_norm": 12.728083610534668, "learning_rate": 9.059621227723401e-06, "loss": 2.5343, "step": 822400 }, { "epoch": 0.21450374951302487, "grad_norm": 13.199159622192383, "learning_rate": 9.05913318619143e-06, "loss": 2.5516, "step": 822600 }, { "epoch": 0.21455590213872702, "grad_norm": 11.12812614440918, "learning_rate": 9.058645031202005e-06, "loss": 2.5033, "step": 822800 }, { "epoch": 0.2146080547644292, "grad_norm": 13.735895156860352, "learning_rate": 9.058156762768773e-06, "loss": 2.5754, "step": 823000 }, { "epoch": 0.21466020739013136, "grad_norm": 14.441383361816406, "learning_rate": 9.057668380905379e-06, "loss": 2.5403, "step": 823200 }, { "epoch": 0.21471236001583355, "grad_norm": 14.57379150390625, "learning_rate": 9.057179885625475e-06, "loss": 2.5133, "step": 823400 }, { "epoch": 0.2147645126415357, "grad_norm": 12.929058074951172, "learning_rate": 9.056691276942717e-06, "loss": 2.5448, "step": 823600 }, { "epoch": 0.2148166652672379, "grad_norm": 12.269718170166016, "learning_rate": 9.056202554870759e-06, "loss": 2.5746, "step": 823800 }, { "epoch": 0.21486881789294004, "grad_norm": 13.69266414642334, "learning_rate": 9.055713719423262e-06, "loss": 2.5415, "step": 824000 }, { "epoch": 0.21492097051864223, "grad_norm": 10.19147777557373, "learning_rate": 9.05522477061389e-06, "loss": 2.6066, "step": 824200 }, { "epoch": 0.21497312314434439, "grad_norm": 13.49588680267334, "learning_rate": 9.054735708456311e-06, "loss": 2.5503, "step": 824400 }, { "epoch": 0.21502527577004657, "grad_norm": 13.206926345825195, "learning_rate": 9.054246532964194e-06, "loss": 2.582, "step": 824600 }, { "epoch": 0.21507742839574873, "grad_norm": 14.236640930175781, "learning_rate": 9.053757244151211e-06, "loss": 2.563, "step": 824800 }, { "epoch": 0.21512958102145088, "grad_norm": 12.087728500366211, "learning_rate": 9.053267842031038e-06, "loss": 2.5586, "step": 825000 }, { "epoch": 0.21518173364715307, "grad_norm": 13.20797348022461, "learning_rate": 9.052778326617357e-06, "loss": 2.5417, "step": 825200 }, { "epoch": 0.21523388627285522, "grad_norm": 13.239849090576172, "learning_rate": 9.052288697923848e-06, "loss": 2.5569, "step": 825400 }, { "epoch": 0.2152860388985574, "grad_norm": 13.600337028503418, "learning_rate": 9.051798955964196e-06, "loss": 2.5431, "step": 825600 }, { "epoch": 0.21533819152425956, "grad_norm": 14.598825454711914, "learning_rate": 9.051309100752093e-06, "loss": 2.5203, "step": 825800 }, { "epoch": 0.21539034414996175, "grad_norm": 11.750934600830078, "learning_rate": 9.050819132301227e-06, "loss": 2.5027, "step": 826000 }, { "epoch": 0.2154424967756639, "grad_norm": 13.683958053588867, "learning_rate": 9.050329050625299e-06, "loss": 2.5162, "step": 826200 }, { "epoch": 0.2154946494013661, "grad_norm": 12.847051620483398, "learning_rate": 9.049838855738001e-06, "loss": 2.5338, "step": 826400 }, { "epoch": 0.21554680202706825, "grad_norm": 12.681694984436035, "learning_rate": 9.049348547653038e-06, "loss": 2.532, "step": 826600 }, { "epoch": 0.21559895465277043, "grad_norm": 13.66247272491455, "learning_rate": 9.048858126384115e-06, "loss": 2.5436, "step": 826800 }, { "epoch": 0.2156511072784726, "grad_norm": 14.794609069824219, "learning_rate": 9.048367591944938e-06, "loss": 2.57, "step": 827000 }, { "epoch": 0.21570325990417477, "grad_norm": 13.683991432189941, "learning_rate": 9.04787694434922e-06, "loss": 2.5269, "step": 827200 }, { "epoch": 0.21575541252987693, "grad_norm": 11.677818298339844, "learning_rate": 9.047386183610672e-06, "loss": 2.5464, "step": 827400 }, { "epoch": 0.2158075651555791, "grad_norm": 12.510682106018066, "learning_rate": 9.046895309743016e-06, "loss": 2.5522, "step": 827600 }, { "epoch": 0.21585971778128127, "grad_norm": 14.272283554077148, "learning_rate": 9.046404322759968e-06, "loss": 2.5624, "step": 827800 }, { "epoch": 0.21591187040698345, "grad_norm": 13.069134712219238, "learning_rate": 9.045913222675253e-06, "loss": 2.576, "step": 828000 }, { "epoch": 0.2159640230326856, "grad_norm": 12.253870964050293, "learning_rate": 9.045422009502598e-06, "loss": 2.5634, "step": 828200 }, { "epoch": 0.2160161756583878, "grad_norm": 13.879683494567871, "learning_rate": 9.044930683255735e-06, "loss": 2.5178, "step": 828400 }, { "epoch": 0.21606832828408995, "grad_norm": 11.61858081817627, "learning_rate": 9.044439243948395e-06, "loss": 2.5837, "step": 828600 }, { "epoch": 0.21612048090979213, "grad_norm": 14.2604398727417, "learning_rate": 9.043947691594314e-06, "loss": 2.5271, "step": 828800 }, { "epoch": 0.2161726335354943, "grad_norm": 13.9703950881958, "learning_rate": 9.043456026207234e-06, "loss": 2.5254, "step": 829000 }, { "epoch": 0.21622478616119647, "grad_norm": 12.149286270141602, "learning_rate": 9.042964247800894e-06, "loss": 2.5174, "step": 829200 }, { "epoch": 0.21627693878689863, "grad_norm": 11.892560958862305, "learning_rate": 9.042472356389043e-06, "loss": 2.5527, "step": 829400 }, { "epoch": 0.21632909141260082, "grad_norm": 12.29517650604248, "learning_rate": 9.041980351985427e-06, "loss": 2.5139, "step": 829600 }, { "epoch": 0.21638124403830297, "grad_norm": 12.279248237609863, "learning_rate": 9.0414882346038e-06, "loss": 2.5625, "step": 829800 }, { "epoch": 0.21643339666400516, "grad_norm": 13.284655570983887, "learning_rate": 9.040996004257915e-06, "loss": 2.5293, "step": 830000 }, { "epoch": 0.2164855492897073, "grad_norm": 14.251969337463379, "learning_rate": 9.040503660961536e-06, "loss": 2.5693, "step": 830200 }, { "epoch": 0.2165377019154095, "grad_norm": 11.924370765686035, "learning_rate": 9.040011204728419e-06, "loss": 2.5473, "step": 830400 }, { "epoch": 0.21658985454111165, "grad_norm": 10.449265480041504, "learning_rate": 9.03951863557233e-06, "loss": 2.5117, "step": 830600 }, { "epoch": 0.2166420071668138, "grad_norm": 12.4495210647583, "learning_rate": 9.039025953507038e-06, "loss": 2.5552, "step": 830800 }, { "epoch": 0.216694159792516, "grad_norm": 13.751075744628906, "learning_rate": 9.038533158546313e-06, "loss": 2.568, "step": 831000 }, { "epoch": 0.21674631241821815, "grad_norm": 12.695106506347656, "learning_rate": 9.03804025070393e-06, "loss": 2.5351, "step": 831200 }, { "epoch": 0.21679846504392034, "grad_norm": 11.535404205322266, "learning_rate": 9.037547229993667e-06, "loss": 2.5723, "step": 831400 }, { "epoch": 0.2168506176696225, "grad_norm": 12.56066608428955, "learning_rate": 9.037054096429301e-06, "loss": 2.54, "step": 831600 }, { "epoch": 0.21690277029532468, "grad_norm": 12.696258544921875, "learning_rate": 9.03656085002462e-06, "loss": 2.5412, "step": 831800 }, { "epoch": 0.21695492292102683, "grad_norm": 12.870928764343262, "learning_rate": 9.036067490793408e-06, "loss": 2.5416, "step": 832000 }, { "epoch": 0.21700707554672902, "grad_norm": 11.494585037231445, "learning_rate": 9.035574018749456e-06, "loss": 2.5161, "step": 832200 }, { "epoch": 0.21705922817243117, "grad_norm": 13.567378997802734, "learning_rate": 9.035080433906557e-06, "loss": 2.5638, "step": 832400 }, { "epoch": 0.21711138079813336, "grad_norm": 11.019129753112793, "learning_rate": 9.034586736278507e-06, "loss": 2.5199, "step": 832600 }, { "epoch": 0.21716353342383551, "grad_norm": 11.188690185546875, "learning_rate": 9.034092925879106e-06, "loss": 2.5601, "step": 832800 }, { "epoch": 0.2172156860495377, "grad_norm": 13.095340728759766, "learning_rate": 9.033599002722157e-06, "loss": 2.5373, "step": 833000 }, { "epoch": 0.21726783867523985, "grad_norm": 13.44362735748291, "learning_rate": 9.033104966821464e-06, "loss": 2.5655, "step": 833200 }, { "epoch": 0.21731999130094204, "grad_norm": 13.946337699890137, "learning_rate": 9.032610818190836e-06, "loss": 2.5533, "step": 833400 }, { "epoch": 0.2173721439266442, "grad_norm": 13.125496864318848, "learning_rate": 9.032116556844088e-06, "loss": 2.5252, "step": 833600 }, { "epoch": 0.21742429655234638, "grad_norm": 14.219965934753418, "learning_rate": 9.031622182795033e-06, "loss": 2.5037, "step": 833800 }, { "epoch": 0.21747644917804854, "grad_norm": 12.486796379089355, "learning_rate": 9.031127696057488e-06, "loss": 2.5568, "step": 834000 }, { "epoch": 0.21752860180375072, "grad_norm": 14.136662483215332, "learning_rate": 9.030633096645277e-06, "loss": 2.5376, "step": 834200 }, { "epoch": 0.21758075442945288, "grad_norm": 13.452658653259277, "learning_rate": 9.030138384572222e-06, "loss": 2.5766, "step": 834400 }, { "epoch": 0.21763290705515506, "grad_norm": 11.165623664855957, "learning_rate": 9.029643559852152e-06, "loss": 2.527, "step": 834600 }, { "epoch": 0.21768505968085722, "grad_norm": 11.985844612121582, "learning_rate": 9.029148622498898e-06, "loss": 2.5726, "step": 834800 }, { "epoch": 0.2177372123065594, "grad_norm": 12.202817916870117, "learning_rate": 9.028653572526296e-06, "loss": 2.5832, "step": 835000 }, { "epoch": 0.21778936493226156, "grad_norm": 11.637069702148438, "learning_rate": 9.028158409948181e-06, "loss": 2.5166, "step": 835200 }, { "epoch": 0.21784151755796374, "grad_norm": 12.84749698638916, "learning_rate": 9.027663134778394e-06, "loss": 2.5217, "step": 835400 }, { "epoch": 0.2178936701836659, "grad_norm": 12.990008354187012, "learning_rate": 9.027167747030776e-06, "loss": 2.5318, "step": 835600 }, { "epoch": 0.21794582280936808, "grad_norm": 12.376798629760742, "learning_rate": 9.026672246719179e-06, "loss": 2.5525, "step": 835800 }, { "epoch": 0.21799797543507024, "grad_norm": 11.98593521118164, "learning_rate": 9.02617663385745e-06, "loss": 2.492, "step": 836000 }, { "epoch": 0.21805012806077242, "grad_norm": 12.605941772460938, "learning_rate": 9.025680908459437e-06, "loss": 2.5542, "step": 836200 }, { "epoch": 0.21810228068647458, "grad_norm": 12.421950340270996, "learning_rate": 9.025185070539005e-06, "loss": 2.5238, "step": 836400 }, { "epoch": 0.21815443331217677, "grad_norm": 11.057263374328613, "learning_rate": 9.024689120110009e-06, "loss": 2.5182, "step": 836600 }, { "epoch": 0.21820658593787892, "grad_norm": 13.192597389221191, "learning_rate": 9.02419305718631e-06, "loss": 2.5215, "step": 836800 }, { "epoch": 0.21825873856358108, "grad_norm": 13.819595336914062, "learning_rate": 9.023696881781776e-06, "loss": 2.5339, "step": 837000 }, { "epoch": 0.21831089118928326, "grad_norm": 12.131237983703613, "learning_rate": 9.023200593910273e-06, "loss": 2.5688, "step": 837200 }, { "epoch": 0.21836304381498542, "grad_norm": 12.664222717285156, "learning_rate": 9.022704193585677e-06, "loss": 2.5514, "step": 837400 }, { "epoch": 0.2184151964406876, "grad_norm": 13.142849922180176, "learning_rate": 9.022207680821858e-06, "loss": 2.5142, "step": 837600 }, { "epoch": 0.21846734906638976, "grad_norm": 13.29996109008789, "learning_rate": 9.021711055632698e-06, "loss": 2.5353, "step": 837800 }, { "epoch": 0.21851950169209194, "grad_norm": 12.690460205078125, "learning_rate": 9.021214318032077e-06, "loss": 2.5696, "step": 838000 }, { "epoch": 0.2185716543177941, "grad_norm": 13.326699256896973, "learning_rate": 9.020717468033877e-06, "loss": 2.5998, "step": 838200 }, { "epoch": 0.21862380694349628, "grad_norm": 11.772669792175293, "learning_rate": 9.02022050565199e-06, "loss": 2.5221, "step": 838400 }, { "epoch": 0.21867595956919844, "grad_norm": 12.478630065917969, "learning_rate": 9.019723430900304e-06, "loss": 2.5427, "step": 838600 }, { "epoch": 0.21872811219490063, "grad_norm": 11.8933687210083, "learning_rate": 9.019226243792712e-06, "loss": 2.5576, "step": 838800 }, { "epoch": 0.21878026482060278, "grad_norm": 11.510830879211426, "learning_rate": 9.018728944343113e-06, "loss": 2.5357, "step": 839000 }, { "epoch": 0.21883241744630497, "grad_norm": 12.072577476501465, "learning_rate": 9.018231532565407e-06, "loss": 2.5547, "step": 839200 }, { "epoch": 0.21888457007200712, "grad_norm": 12.852787017822266, "learning_rate": 9.017734008473495e-06, "loss": 2.5539, "step": 839400 }, { "epoch": 0.2189367226977093, "grad_norm": 15.137070655822754, "learning_rate": 9.017236372081286e-06, "loss": 2.5924, "step": 839600 }, { "epoch": 0.21898887532341146, "grad_norm": 13.725959777832031, "learning_rate": 9.016738623402688e-06, "loss": 2.5264, "step": 839800 }, { "epoch": 0.21904102794911365, "grad_norm": 12.524657249450684, "learning_rate": 9.016240762451613e-06, "loss": 2.5328, "step": 840000 }, { "epoch": 0.2190931805748158, "grad_norm": 12.656744956970215, "learning_rate": 9.015742789241979e-06, "loss": 2.5303, "step": 840200 }, { "epoch": 0.219145333200518, "grad_norm": 14.41982364654541, "learning_rate": 9.015244703787704e-06, "loss": 2.5192, "step": 840400 }, { "epoch": 0.21919748582622015, "grad_norm": 13.495166778564453, "learning_rate": 9.014746506102709e-06, "loss": 2.5097, "step": 840600 }, { "epoch": 0.21924963845192233, "grad_norm": 12.303117752075195, "learning_rate": 9.01424819620092e-06, "loss": 2.5406, "step": 840800 }, { "epoch": 0.21930179107762449, "grad_norm": 11.897546768188477, "learning_rate": 9.013749774096265e-06, "loss": 2.5776, "step": 841000 }, { "epoch": 0.21935394370332667, "grad_norm": 11.718711853027344, "learning_rate": 9.013251239802676e-06, "loss": 2.5363, "step": 841200 }, { "epoch": 0.21940609632902883, "grad_norm": 14.32603931427002, "learning_rate": 9.012752593334087e-06, "loss": 2.528, "step": 841400 }, { "epoch": 0.219458248954731, "grad_norm": 11.693556785583496, "learning_rate": 9.012253834704438e-06, "loss": 2.5022, "step": 841600 }, { "epoch": 0.21951040158043317, "grad_norm": 10.804730415344238, "learning_rate": 9.011754963927666e-06, "loss": 2.5521, "step": 841800 }, { "epoch": 0.21956255420613535, "grad_norm": 12.166260719299316, "learning_rate": 9.011255981017718e-06, "loss": 2.5571, "step": 842000 }, { "epoch": 0.2196147068318375, "grad_norm": 9.505474090576172, "learning_rate": 9.010756885988541e-06, "loss": 2.5141, "step": 842200 }, { "epoch": 0.2196668594575397, "grad_norm": 12.927766799926758, "learning_rate": 9.010257678854081e-06, "loss": 2.5813, "step": 842400 }, { "epoch": 0.21971901208324185, "grad_norm": 12.975953102111816, "learning_rate": 9.009758359628298e-06, "loss": 2.5434, "step": 842600 }, { "epoch": 0.219771164708944, "grad_norm": 12.598480224609375, "learning_rate": 9.009258928325146e-06, "loss": 2.52, "step": 842800 }, { "epoch": 0.2198233173346462, "grad_norm": 11.917780876159668, "learning_rate": 9.008759384958582e-06, "loss": 2.5169, "step": 843000 }, { "epoch": 0.21987546996034835, "grad_norm": 12.304062843322754, "learning_rate": 9.008259729542572e-06, "loss": 2.5698, "step": 843200 }, { "epoch": 0.21992762258605053, "grad_norm": 15.084339141845703, "learning_rate": 9.00775996209108e-06, "loss": 2.5361, "step": 843400 }, { "epoch": 0.2199797752117527, "grad_norm": 13.05875301361084, "learning_rate": 9.007260082618077e-06, "loss": 2.5248, "step": 843600 }, { "epoch": 0.22003192783745487, "grad_norm": 13.704729080200195, "learning_rate": 9.006760091137534e-06, "loss": 2.5512, "step": 843800 }, { "epoch": 0.22008408046315703, "grad_norm": 13.089705467224121, "learning_rate": 9.006259987663425e-06, "loss": 2.526, "step": 844000 }, { "epoch": 0.2201362330888592, "grad_norm": 12.926041603088379, "learning_rate": 9.005759772209732e-06, "loss": 2.5207, "step": 844200 }, { "epoch": 0.22018838571456137, "grad_norm": 11.693181991577148, "learning_rate": 9.005259444790432e-06, "loss": 2.492, "step": 844400 }, { "epoch": 0.22024053834026355, "grad_norm": 12.62060260772705, "learning_rate": 9.004759005419515e-06, "loss": 2.5323, "step": 844600 }, { "epoch": 0.2202926909659657, "grad_norm": 12.25759506225586, "learning_rate": 9.004258454110966e-06, "loss": 2.4841, "step": 844800 }, { "epoch": 0.2203448435916679, "grad_norm": 13.82259750366211, "learning_rate": 9.003757790878775e-06, "loss": 2.5533, "step": 845000 }, { "epoch": 0.22039699621737005, "grad_norm": 12.706326484680176, "learning_rate": 9.003257015736937e-06, "loss": 2.5247, "step": 845200 }, { "epoch": 0.22044914884307223, "grad_norm": 12.647273063659668, "learning_rate": 9.00275612869945e-06, "loss": 2.5762, "step": 845400 }, { "epoch": 0.2205013014687744, "grad_norm": 12.341408729553223, "learning_rate": 9.002255129780313e-06, "loss": 2.5289, "step": 845600 }, { "epoch": 0.22055345409447658, "grad_norm": 13.406020164489746, "learning_rate": 9.001754018993531e-06, "loss": 2.4797, "step": 845800 }, { "epoch": 0.22060560672017873, "grad_norm": 13.798885345458984, "learning_rate": 9.001252796353111e-06, "loss": 2.5031, "step": 846000 }, { "epoch": 0.22065775934588092, "grad_norm": 13.883713722229004, "learning_rate": 9.000751461873061e-06, "loss": 2.5013, "step": 846200 }, { "epoch": 0.22070991197158307, "grad_norm": 12.606523513793945, "learning_rate": 9.000250015567394e-06, "loss": 2.5392, "step": 846400 }, { "epoch": 0.22076206459728526, "grad_norm": 13.218819618225098, "learning_rate": 8.999748457450125e-06, "loss": 2.5223, "step": 846600 }, { "epoch": 0.2208142172229874, "grad_norm": 14.202933311462402, "learning_rate": 8.999246787535279e-06, "loss": 2.5431, "step": 846800 }, { "epoch": 0.2208663698486896, "grad_norm": 10.781413078308105, "learning_rate": 8.99874500583687e-06, "loss": 2.5516, "step": 847000 }, { "epoch": 0.22091852247439175, "grad_norm": 12.983638763427734, "learning_rate": 8.99824311236893e-06, "loss": 2.5079, "step": 847200 }, { "epoch": 0.22097067510009394, "grad_norm": 12.726056098937988, "learning_rate": 8.997741107145484e-06, "loss": 2.4876, "step": 847400 }, { "epoch": 0.2210228277257961, "grad_norm": 13.305671691894531, "learning_rate": 8.997238990180563e-06, "loss": 2.5322, "step": 847600 }, { "epoch": 0.22107498035149828, "grad_norm": 12.6900634765625, "learning_rate": 8.996736761488205e-06, "loss": 2.539, "step": 847800 }, { "epoch": 0.22112713297720044, "grad_norm": 12.353796005249023, "learning_rate": 8.996234421082447e-06, "loss": 2.6022, "step": 848000 }, { "epoch": 0.22117928560290262, "grad_norm": 13.8502836227417, "learning_rate": 8.995731968977327e-06, "loss": 2.5678, "step": 848200 }, { "epoch": 0.22123143822860478, "grad_norm": 13.232843399047852, "learning_rate": 8.995229405186892e-06, "loss": 2.5363, "step": 848400 }, { "epoch": 0.22128359085430693, "grad_norm": 13.014341354370117, "learning_rate": 8.994726729725188e-06, "loss": 2.5494, "step": 848600 }, { "epoch": 0.22133574348000912, "grad_norm": 13.17010498046875, "learning_rate": 8.994223942606266e-06, "loss": 2.5208, "step": 848800 }, { "epoch": 0.22138789610571127, "grad_norm": 13.344563484191895, "learning_rate": 8.99372104384418e-06, "loss": 2.5356, "step": 849000 }, { "epoch": 0.22144004873141346, "grad_norm": 11.80360221862793, "learning_rate": 8.993218033452985e-06, "loss": 2.5652, "step": 849200 }, { "epoch": 0.22149220135711561, "grad_norm": 12.011212348937988, "learning_rate": 8.992714911446743e-06, "loss": 2.5629, "step": 849400 }, { "epoch": 0.2215443539828178, "grad_norm": 13.381622314453125, "learning_rate": 8.992211677839514e-06, "loss": 2.542, "step": 849600 }, { "epoch": 0.22159650660851996, "grad_norm": 12.223089218139648, "learning_rate": 8.991708332645365e-06, "loss": 2.545, "step": 849800 }, { "epoch": 0.22164865923422214, "grad_norm": 12.903460502624512, "learning_rate": 8.991204875878369e-06, "loss": 2.5258, "step": 850000 }, { "epoch": 0.2217008118599243, "grad_norm": 13.3897123336792, "learning_rate": 8.990701307552589e-06, "loss": 2.4829, "step": 850200 }, { "epoch": 0.22175296448562648, "grad_norm": 14.366217613220215, "learning_rate": 8.99019762768211e-06, "loss": 2.5757, "step": 850400 }, { "epoch": 0.22180511711132864, "grad_norm": 12.164314270019531, "learning_rate": 8.989693836281006e-06, "loss": 2.5282, "step": 850600 }, { "epoch": 0.22185726973703082, "grad_norm": 12.974170684814453, "learning_rate": 8.989189933363359e-06, "loss": 2.5623, "step": 850800 }, { "epoch": 0.22190942236273298, "grad_norm": 12.635279655456543, "learning_rate": 8.988685918943252e-06, "loss": 2.4967, "step": 851000 }, { "epoch": 0.22196157498843516, "grad_norm": 11.550530433654785, "learning_rate": 8.988181793034776e-06, "loss": 2.5069, "step": 851200 }, { "epoch": 0.22201372761413732, "grad_norm": 12.456903457641602, "learning_rate": 8.987677555652018e-06, "loss": 2.5032, "step": 851400 }, { "epoch": 0.2220658802398395, "grad_norm": 14.79409122467041, "learning_rate": 8.987173206809078e-06, "loss": 2.5223, "step": 851600 }, { "epoch": 0.22211803286554166, "grad_norm": 12.690301895141602, "learning_rate": 8.986668746520048e-06, "loss": 2.5552, "step": 851800 }, { "epoch": 0.22217018549124384, "grad_norm": 14.763097763061523, "learning_rate": 8.986164174799029e-06, "loss": 2.5392, "step": 852000 }, { "epoch": 0.222222338116946, "grad_norm": 13.585575103759766, "learning_rate": 8.985659491660126e-06, "loss": 2.5645, "step": 852200 }, { "epoch": 0.22227449074264818, "grad_norm": 13.037672996520996, "learning_rate": 8.985154697117444e-06, "loss": 2.545, "step": 852400 }, { "epoch": 0.22232664336835034, "grad_norm": 13.282525062561035, "learning_rate": 8.984649791185093e-06, "loss": 2.5653, "step": 852600 }, { "epoch": 0.22237879599405252, "grad_norm": 13.44238567352295, "learning_rate": 8.984144773877186e-06, "loss": 2.5334, "step": 852800 }, { "epoch": 0.22243094861975468, "grad_norm": 14.811910629272461, "learning_rate": 8.983639645207839e-06, "loss": 2.5435, "step": 853000 }, { "epoch": 0.22248310124545687, "grad_norm": 11.7992582321167, "learning_rate": 8.98313440519117e-06, "loss": 2.5333, "step": 853200 }, { "epoch": 0.22253525387115902, "grad_norm": 12.599976539611816, "learning_rate": 8.982629053841302e-06, "loss": 2.5414, "step": 853400 }, { "epoch": 0.2225874064968612, "grad_norm": 12.891295433044434, "learning_rate": 8.98212359117236e-06, "loss": 2.5178, "step": 853600 }, { "epoch": 0.22263955912256336, "grad_norm": 10.120680809020996, "learning_rate": 8.981618017198472e-06, "loss": 2.523, "step": 853800 }, { "epoch": 0.22269171174826555, "grad_norm": 14.803202629089355, "learning_rate": 8.98111233193377e-06, "loss": 2.5104, "step": 854000 }, { "epoch": 0.2227438643739677, "grad_norm": 11.992095947265625, "learning_rate": 8.980606535392387e-06, "loss": 2.5419, "step": 854200 }, { "epoch": 0.22279601699966986, "grad_norm": 13.348030090332031, "learning_rate": 8.980100627588461e-06, "loss": 2.5249, "step": 854400 }, { "epoch": 0.22284816962537204, "grad_norm": 12.748968124389648, "learning_rate": 8.979594608536135e-06, "loss": 2.5046, "step": 854600 }, { "epoch": 0.2229003222510742, "grad_norm": 12.485724449157715, "learning_rate": 8.97908847824955e-06, "loss": 2.5059, "step": 854800 }, { "epoch": 0.22295247487677639, "grad_norm": 13.133146286010742, "learning_rate": 8.978582236742854e-06, "loss": 2.5285, "step": 855000 }, { "epoch": 0.22300462750247854, "grad_norm": 11.857640266418457, "learning_rate": 8.978075884030197e-06, "loss": 2.5358, "step": 855200 }, { "epoch": 0.22305678012818073, "grad_norm": 12.643730163574219, "learning_rate": 8.977569420125732e-06, "loss": 2.4963, "step": 855400 }, { "epoch": 0.22310893275388288, "grad_norm": 11.763866424560547, "learning_rate": 8.977062845043616e-06, "loss": 2.5002, "step": 855600 }, { "epoch": 0.22316108537958507, "grad_norm": 11.065905570983887, "learning_rate": 8.976556158798006e-06, "loss": 2.5613, "step": 855800 }, { "epoch": 0.22321323800528722, "grad_norm": 12.88422679901123, "learning_rate": 8.976049361403067e-06, "loss": 2.5092, "step": 856000 }, { "epoch": 0.2232653906309894, "grad_norm": 12.495038986206055, "learning_rate": 8.975542452872966e-06, "loss": 2.5522, "step": 856200 }, { "epoch": 0.22331754325669156, "grad_norm": 10.122501373291016, "learning_rate": 8.975035433221867e-06, "loss": 2.5214, "step": 856400 }, { "epoch": 0.22336969588239375, "grad_norm": 10.929478645324707, "learning_rate": 8.974528302463946e-06, "loss": 2.5244, "step": 856600 }, { "epoch": 0.2234218485080959, "grad_norm": 13.326924324035645, "learning_rate": 8.974021060613374e-06, "loss": 2.499, "step": 856800 }, { "epoch": 0.2234740011337981, "grad_norm": 11.079444885253906, "learning_rate": 8.973513707684332e-06, "loss": 2.5378, "step": 857000 }, { "epoch": 0.22352615375950025, "grad_norm": 12.476800918579102, "learning_rate": 8.973006243691001e-06, "loss": 2.5788, "step": 857200 }, { "epoch": 0.22357830638520243, "grad_norm": 14.203133583068848, "learning_rate": 8.972498668647565e-06, "loss": 2.5313, "step": 857400 }, { "epoch": 0.2236304590109046, "grad_norm": 12.900059700012207, "learning_rate": 8.971990982568208e-06, "loss": 2.5252, "step": 857600 }, { "epoch": 0.22368261163660677, "grad_norm": 10.50400447845459, "learning_rate": 8.971483185467126e-06, "loss": 2.506, "step": 857800 }, { "epoch": 0.22373476426230893, "grad_norm": 12.504179954528809, "learning_rate": 8.970975277358509e-06, "loss": 2.5408, "step": 858000 }, { "epoch": 0.2237869168880111, "grad_norm": 11.387737274169922, "learning_rate": 8.970467258256552e-06, "loss": 2.4938, "step": 858200 }, { "epoch": 0.22383906951371327, "grad_norm": 12.079946517944336, "learning_rate": 8.96995912817546e-06, "loss": 2.5486, "step": 858400 }, { "epoch": 0.22389122213941545, "grad_norm": 11.162164688110352, "learning_rate": 8.969450887129431e-06, "loss": 2.5334, "step": 858600 }, { "epoch": 0.2239433747651176, "grad_norm": 15.253430366516113, "learning_rate": 8.968942535132675e-06, "loss": 2.5128, "step": 858800 }, { "epoch": 0.2239955273908198, "grad_norm": 12.579545974731445, "learning_rate": 8.968434072199396e-06, "loss": 2.59, "step": 859000 }, { "epoch": 0.22404768001652195, "grad_norm": 11.06598949432373, "learning_rate": 8.96792549834381e-06, "loss": 2.5401, "step": 859200 }, { "epoch": 0.22409983264222413, "grad_norm": 12.26706600189209, "learning_rate": 8.967416813580132e-06, "loss": 2.5401, "step": 859400 }, { "epoch": 0.2241519852679263, "grad_norm": 12.439864158630371, "learning_rate": 8.966908017922578e-06, "loss": 2.5566, "step": 859600 }, { "epoch": 0.22420413789362847, "grad_norm": 12.08425521850586, "learning_rate": 8.966399111385371e-06, "loss": 2.5422, "step": 859800 }, { "epoch": 0.22425629051933063, "grad_norm": 11.494114875793457, "learning_rate": 8.965890093982736e-06, "loss": 2.5269, "step": 860000 }, { "epoch": 0.2243084431450328, "grad_norm": 13.456847190856934, "learning_rate": 8.9653809657289e-06, "loss": 2.5331, "step": 860200 }, { "epoch": 0.22436059577073497, "grad_norm": 12.735716819763184, "learning_rate": 8.964871726638093e-06, "loss": 2.5312, "step": 860400 }, { "epoch": 0.22441274839643713, "grad_norm": 12.311686515808105, "learning_rate": 8.964362376724552e-06, "loss": 2.5054, "step": 860600 }, { "epoch": 0.2244649010221393, "grad_norm": 14.03046989440918, "learning_rate": 8.96385291600251e-06, "loss": 2.5587, "step": 860800 }, { "epoch": 0.22451705364784147, "grad_norm": 12.448440551757812, "learning_rate": 8.963343344486208e-06, "loss": 2.5317, "step": 861000 }, { "epoch": 0.22456920627354365, "grad_norm": 12.499717712402344, "learning_rate": 8.962833662189889e-06, "loss": 2.5408, "step": 861200 }, { "epoch": 0.2246213588992458, "grad_norm": 12.628153800964355, "learning_rate": 8.962323869127802e-06, "loss": 2.5539, "step": 861400 }, { "epoch": 0.224673511524948, "grad_norm": 12.310242652893066, "learning_rate": 8.961813965314192e-06, "loss": 2.4965, "step": 861600 }, { "epoch": 0.22472566415065015, "grad_norm": 12.289976119995117, "learning_rate": 8.961303950763314e-06, "loss": 2.5299, "step": 861800 }, { "epoch": 0.22477781677635233, "grad_norm": 14.414912223815918, "learning_rate": 8.960793825489425e-06, "loss": 2.6051, "step": 862000 }, { "epoch": 0.2248299694020545, "grad_norm": 12.091730117797852, "learning_rate": 8.960283589506779e-06, "loss": 2.5215, "step": 862200 }, { "epoch": 0.22488212202775668, "grad_norm": 13.612672805786133, "learning_rate": 8.959773242829641e-06, "loss": 2.5501, "step": 862400 }, { "epoch": 0.22493427465345883, "grad_norm": 13.881776809692383, "learning_rate": 8.959262785472275e-06, "loss": 2.5188, "step": 862600 }, { "epoch": 0.22498642727916102, "grad_norm": 14.095483779907227, "learning_rate": 8.95875221744895e-06, "loss": 2.5083, "step": 862800 }, { "epoch": 0.22503857990486317, "grad_norm": 12.817255020141602, "learning_rate": 8.958241538773935e-06, "loss": 2.5343, "step": 863000 }, { "epoch": 0.22509073253056536, "grad_norm": 12.889725685119629, "learning_rate": 8.957730749461505e-06, "loss": 2.5256, "step": 863200 }, { "epoch": 0.2251428851562675, "grad_norm": 13.904434204101562, "learning_rate": 8.957219849525938e-06, "loss": 2.5147, "step": 863400 }, { "epoch": 0.2251950377819697, "grad_norm": 12.321836471557617, "learning_rate": 8.956708838981512e-06, "loss": 2.5378, "step": 863600 }, { "epoch": 0.22524719040767185, "grad_norm": 12.931632041931152, "learning_rate": 8.956197717842512e-06, "loss": 2.4962, "step": 863800 }, { "epoch": 0.22529934303337404, "grad_norm": 13.46435260772705, "learning_rate": 8.955686486123225e-06, "loss": 2.5106, "step": 864000 }, { "epoch": 0.2253514956590762, "grad_norm": 13.250704765319824, "learning_rate": 8.95517514383794e-06, "loss": 2.5317, "step": 864200 }, { "epoch": 0.22540364828477838, "grad_norm": 12.657617568969727, "learning_rate": 8.954663691000947e-06, "loss": 2.5211, "step": 864400 }, { "epoch": 0.22545580091048054, "grad_norm": 13.068683624267578, "learning_rate": 8.954152127626545e-06, "loss": 2.4836, "step": 864600 }, { "epoch": 0.22550795353618272, "grad_norm": 12.12160873413086, "learning_rate": 8.953640453729034e-06, "loss": 2.548, "step": 864800 }, { "epoch": 0.22556010616188488, "grad_norm": 11.243968963623047, "learning_rate": 8.95312866932271e-06, "loss": 2.5234, "step": 865000 }, { "epoch": 0.22561225878758706, "grad_norm": 12.38250732421875, "learning_rate": 8.952616774421883e-06, "loss": 2.573, "step": 865200 }, { "epoch": 0.22566441141328922, "grad_norm": 10.652915954589844, "learning_rate": 8.95210476904086e-06, "loss": 2.5371, "step": 865400 }, { "epoch": 0.2257165640389914, "grad_norm": 13.076416015625, "learning_rate": 8.95159265319395e-06, "loss": 2.5046, "step": 865600 }, { "epoch": 0.22576871666469356, "grad_norm": 12.925193786621094, "learning_rate": 8.951080426895473e-06, "loss": 2.5507, "step": 865800 }, { "epoch": 0.22582086929039574, "grad_norm": 14.921239852905273, "learning_rate": 8.95056809015974e-06, "loss": 2.5211, "step": 866000 }, { "epoch": 0.2258730219160979, "grad_norm": 12.77281665802002, "learning_rate": 8.950055643001072e-06, "loss": 2.548, "step": 866200 }, { "epoch": 0.22592517454180006, "grad_norm": 12.110180854797363, "learning_rate": 8.949543085433797e-06, "loss": 2.5187, "step": 866400 }, { "epoch": 0.22597732716750224, "grad_norm": 12.40743637084961, "learning_rate": 8.949030417472235e-06, "loss": 2.507, "step": 866600 }, { "epoch": 0.2260294797932044, "grad_norm": 12.984556198120117, "learning_rate": 8.948517639130722e-06, "loss": 2.5171, "step": 866800 }, { "epoch": 0.22608163241890658, "grad_norm": 12.792410850524902, "learning_rate": 8.948004750423589e-06, "loss": 2.5155, "step": 867000 }, { "epoch": 0.22613378504460874, "grad_norm": 13.460736274719238, "learning_rate": 8.94749175136517e-06, "loss": 2.5426, "step": 867200 }, { "epoch": 0.22618593767031092, "grad_norm": 10.04277229309082, "learning_rate": 8.946978641969805e-06, "loss": 2.5214, "step": 867400 }, { "epoch": 0.22623809029601308, "grad_norm": 12.798548698425293, "learning_rate": 8.946465422251836e-06, "loss": 2.585, "step": 867600 }, { "epoch": 0.22629024292171526, "grad_norm": 12.13387680053711, "learning_rate": 8.945952092225607e-06, "loss": 2.4898, "step": 867800 }, { "epoch": 0.22634239554741742, "grad_norm": 13.749103546142578, "learning_rate": 8.945438651905469e-06, "loss": 2.4992, "step": 868000 }, { "epoch": 0.2263945481731196, "grad_norm": 12.682474136352539, "learning_rate": 8.94492510130577e-06, "loss": 2.534, "step": 868200 }, { "epoch": 0.22644670079882176, "grad_norm": 14.173575401306152, "learning_rate": 8.944411440440865e-06, "loss": 2.5106, "step": 868400 }, { "epoch": 0.22649885342452394, "grad_norm": 14.226604461669922, "learning_rate": 8.943897669325114e-06, "loss": 2.538, "step": 868600 }, { "epoch": 0.2265510060502261, "grad_norm": 11.75302791595459, "learning_rate": 8.943383787972875e-06, "loss": 2.5191, "step": 868800 }, { "epoch": 0.22660315867592828, "grad_norm": 14.185355186462402, "learning_rate": 8.942869796398513e-06, "loss": 2.5775, "step": 869000 }, { "epoch": 0.22665531130163044, "grad_norm": 13.081635475158691, "learning_rate": 8.942355694616393e-06, "loss": 2.5, "step": 869200 }, { "epoch": 0.22670746392733263, "grad_norm": 13.637446403503418, "learning_rate": 8.941841482640885e-06, "loss": 2.5123, "step": 869400 }, { "epoch": 0.22675961655303478, "grad_norm": 12.775493621826172, "learning_rate": 8.941327160486364e-06, "loss": 2.5291, "step": 869600 }, { "epoch": 0.22681176917873697, "grad_norm": 12.630292892456055, "learning_rate": 8.940812728167203e-06, "loss": 2.5195, "step": 869800 }, { "epoch": 0.22686392180443912, "grad_norm": 12.8863525390625, "learning_rate": 8.940298185697783e-06, "loss": 2.5228, "step": 870000 } ], "logging_steps": 200, "max_steps": 3834898, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 7, "trial_name": null, "trial_params": null }