{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999997680336444, "eval_steps": 5000, "global_step": 53887, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018557308447982704, "grad_norm": 28.265625, "learning_rate": 9.999971004213958e-06, "loss": 80.2344, "step": 10 }, { "epoch": 0.0003711461689596541, "grad_norm": 27.53125, "learning_rate": 9.999942008427916e-06, "loss": 73.1461, "step": 20 }, { "epoch": 0.0005567192534394811, "grad_norm": 24.703125, "learning_rate": 9.999913012641873e-06, "loss": 70.992, "step": 30 }, { "epoch": 0.0007422923379193082, "grad_norm": 24.046875, "learning_rate": 9.99988401685583e-06, "loss": 69.4784, "step": 40 }, { "epoch": 0.0009278654223991352, "grad_norm": 23.203125, "learning_rate": 9.999855021069788e-06, "loss": 67.9841, "step": 50 }, { "epoch": 0.0011134385068789622, "grad_norm": 25.015625, "learning_rate": 9.999826025283747e-06, "loss": 66.8785, "step": 60 }, { "epoch": 0.0012990115913587893, "grad_norm": 28.875, "learning_rate": 9.999797029497704e-06, "loss": 65.5765, "step": 70 }, { "epoch": 0.0014845846758386164, "grad_norm": 34.375, "learning_rate": 9.999768033711662e-06, "loss": 64.6408, "step": 80 }, { "epoch": 0.0016701577603184434, "grad_norm": 30.875, "learning_rate": 9.99973903792562e-06, "loss": 63.1597, "step": 90 }, { "epoch": 0.0018557308447982705, "grad_norm": 25.90625, "learning_rate": 9.999710042139577e-06, "loss": 61.7359, "step": 100 }, { "epoch": 0.0020413039292780976, "grad_norm": 27.625, "learning_rate": 9.999681046353534e-06, "loss": 61.0914, "step": 110 }, { "epoch": 0.0022268770137579244, "grad_norm": 26.453125, "learning_rate": 9.999652050567491e-06, "loss": 59.8145, "step": 120 }, { "epoch": 0.0024124500982377517, "grad_norm": 33.40625, "learning_rate": 9.99962305478145e-06, "loss": 58.7817, "step": 130 }, { "epoch": 0.0025980231827175786, "grad_norm": 27.578125, "learning_rate": 9.999594058995406e-06, "loss": 57.699, "step": 140 }, { "epoch": 0.002783596267197406, "grad_norm": 25.59375, "learning_rate": 9.999565063209364e-06, "loss": 56.9819, "step": 150 }, { "epoch": 0.0029691693516772327, "grad_norm": 26.84375, "learning_rate": 9.999536067423323e-06, "loss": 55.928, "step": 160 }, { "epoch": 0.0031547424361570596, "grad_norm": 26.734375, "learning_rate": 9.99950707163728e-06, "loss": 54.613, "step": 170 }, { "epoch": 0.003340315520636887, "grad_norm": 28.75, "learning_rate": 9.999478075851238e-06, "loss": 53.661, "step": 180 }, { "epoch": 0.0035258886051167137, "grad_norm": 30.421875, "learning_rate": 9.999449080065195e-06, "loss": 51.9486, "step": 190 }, { "epoch": 0.003711461689596541, "grad_norm": 29.140625, "learning_rate": 9.999420084279152e-06, "loss": 51.4946, "step": 200 }, { "epoch": 0.003897034774076368, "grad_norm": 37.1875, "learning_rate": 9.99939108849311e-06, "loss": 50.4218, "step": 210 }, { "epoch": 0.004082607858556195, "grad_norm": 32.0, "learning_rate": 9.999362092707067e-06, "loss": 49.2055, "step": 220 }, { "epoch": 0.004268180943036022, "grad_norm": 28.40625, "learning_rate": 9.999333096921026e-06, "loss": 48.1393, "step": 230 }, { "epoch": 0.004453754027515849, "grad_norm": 28.90625, "learning_rate": 9.999304101134984e-06, "loss": 46.6941, "step": 240 }, { "epoch": 0.004639327111995676, "grad_norm": 30.421875, "learning_rate": 9.99927510534894e-06, "loss": 46.6683, "step": 250 }, { "epoch": 0.004824900196475503, "grad_norm": 29.6875, "learning_rate": 9.999246109562898e-06, "loss": 46.1998, "step": 260 }, { "epoch": 0.00501047328095533, "grad_norm": 27.90625, "learning_rate": 9.999217113776856e-06, "loss": 45.5885, "step": 270 }, { "epoch": 0.005196046365435157, "grad_norm": 31.09375, "learning_rate": 9.999188117990813e-06, "loss": 44.4987, "step": 280 }, { "epoch": 0.005381619449914984, "grad_norm": 32.53125, "learning_rate": 9.99915912220477e-06, "loss": 44.1291, "step": 290 }, { "epoch": 0.005567192534394812, "grad_norm": 27.390625, "learning_rate": 9.999130126418728e-06, "loss": 43.4694, "step": 300 }, { "epoch": 0.005752765618874638, "grad_norm": 29.796875, "learning_rate": 9.999101130632686e-06, "loss": 42.7829, "step": 310 }, { "epoch": 0.005938338703354465, "grad_norm": 33.5625, "learning_rate": 9.999072134846643e-06, "loss": 43.0898, "step": 320 }, { "epoch": 0.006123911787834293, "grad_norm": 28.9375, "learning_rate": 9.999043139060602e-06, "loss": 41.7324, "step": 330 }, { "epoch": 0.006309484872314119, "grad_norm": 29.890625, "learning_rate": 9.99901414327456e-06, "loss": 41.9896, "step": 340 }, { "epoch": 0.006495057956793946, "grad_norm": 35.65625, "learning_rate": 9.998985147488515e-06, "loss": 41.4999, "step": 350 }, { "epoch": 0.006680631041273774, "grad_norm": 28.296875, "learning_rate": 9.998956151702474e-06, "loss": 41.3521, "step": 360 }, { "epoch": 0.006866204125753601, "grad_norm": 27.921875, "learning_rate": 9.998927155916432e-06, "loss": 41.2431, "step": 370 }, { "epoch": 0.007051777210233427, "grad_norm": 34.28125, "learning_rate": 9.998898160130389e-06, "loss": 40.6052, "step": 380 }, { "epoch": 0.007237350294713255, "grad_norm": 27.5625, "learning_rate": 9.998869164344346e-06, "loss": 40.2853, "step": 390 }, { "epoch": 0.007422923379193082, "grad_norm": 30.5, "learning_rate": 9.998840168558306e-06, "loss": 40.2161, "step": 400 }, { "epoch": 0.007608496463672909, "grad_norm": 28.078125, "learning_rate": 9.998811172772261e-06, "loss": 40.0517, "step": 410 }, { "epoch": 0.007794069548152736, "grad_norm": 31.015625, "learning_rate": 9.998782176986219e-06, "loss": 40.1129, "step": 420 }, { "epoch": 0.007979642632632563, "grad_norm": 29.421875, "learning_rate": 9.998753181200178e-06, "loss": 39.3547, "step": 430 }, { "epoch": 0.00816521571711239, "grad_norm": 30.109375, "learning_rate": 9.998724185414135e-06, "loss": 39.3066, "step": 440 }, { "epoch": 0.008350788801592218, "grad_norm": 31.03125, "learning_rate": 9.998695189628093e-06, "loss": 38.9512, "step": 450 }, { "epoch": 0.008536361886072045, "grad_norm": 28.984375, "learning_rate": 9.99866619384205e-06, "loss": 38.7517, "step": 460 }, { "epoch": 0.00872193497055187, "grad_norm": 31.140625, "learning_rate": 9.998637198056007e-06, "loss": 38.4473, "step": 470 }, { "epoch": 0.008907508055031698, "grad_norm": 31.5, "learning_rate": 9.998608202269965e-06, "loss": 38.784, "step": 480 }, { "epoch": 0.009093081139511525, "grad_norm": 29.125, "learning_rate": 9.998579206483922e-06, "loss": 38.3422, "step": 490 }, { "epoch": 0.009278654223991352, "grad_norm": 29.234375, "learning_rate": 9.99855021069788e-06, "loss": 38.4766, "step": 500 }, { "epoch": 0.00946422730847118, "grad_norm": 30.078125, "learning_rate": 9.998521214911837e-06, "loss": 38.1635, "step": 510 }, { "epoch": 0.009649800392951007, "grad_norm": 29.109375, "learning_rate": 9.998492219125794e-06, "loss": 38.1851, "step": 520 }, { "epoch": 0.009835373477430834, "grad_norm": 29.765625, "learning_rate": 9.998463223339754e-06, "loss": 37.7056, "step": 530 }, { "epoch": 0.01002094656191066, "grad_norm": 30.0, "learning_rate": 9.998434227553711e-06, "loss": 38.0842, "step": 540 }, { "epoch": 0.010206519646390487, "grad_norm": 28.625, "learning_rate": 9.998405231767668e-06, "loss": 37.9516, "step": 550 }, { "epoch": 0.010392092730870314, "grad_norm": 31.1875, "learning_rate": 9.998376235981626e-06, "loss": 37.4985, "step": 560 }, { "epoch": 0.010577665815350142, "grad_norm": 29.65625, "learning_rate": 9.998347240195583e-06, "loss": 37.3159, "step": 570 }, { "epoch": 0.010763238899829969, "grad_norm": 29.578125, "learning_rate": 9.99831824440954e-06, "loss": 36.7818, "step": 580 }, { "epoch": 0.010948811984309796, "grad_norm": 29.90625, "learning_rate": 9.998289248623498e-06, "loss": 37.5521, "step": 590 }, { "epoch": 0.011134385068789623, "grad_norm": 27.546875, "learning_rate": 9.998260252837455e-06, "loss": 36.2803, "step": 600 }, { "epoch": 0.011319958153269449, "grad_norm": 28.84375, "learning_rate": 9.998231257051415e-06, "loss": 36.9916, "step": 610 }, { "epoch": 0.011505531237749276, "grad_norm": 29.546875, "learning_rate": 9.99820226126537e-06, "loss": 36.3332, "step": 620 }, { "epoch": 0.011691104322229104, "grad_norm": 28.390625, "learning_rate": 9.998173265479328e-06, "loss": 36.4675, "step": 630 }, { "epoch": 0.01187667740670893, "grad_norm": 29.8125, "learning_rate": 9.998144269693287e-06, "loss": 36.7096, "step": 640 }, { "epoch": 0.012062250491188758, "grad_norm": 29.53125, "learning_rate": 9.998115273907244e-06, "loss": 36.3192, "step": 650 }, { "epoch": 0.012247823575668585, "grad_norm": 29.671875, "learning_rate": 9.998086278121202e-06, "loss": 35.995, "step": 660 }, { "epoch": 0.012433396660148413, "grad_norm": 29.0625, "learning_rate": 9.998057282335159e-06, "loss": 36.1418, "step": 670 }, { "epoch": 0.012618969744628238, "grad_norm": 31.671875, "learning_rate": 9.998028286549116e-06, "loss": 35.9698, "step": 680 }, { "epoch": 0.012804542829108066, "grad_norm": 30.78125, "learning_rate": 9.997999290763074e-06, "loss": 35.9465, "step": 690 }, { "epoch": 0.012990115913587893, "grad_norm": 30.125, "learning_rate": 9.997970294977031e-06, "loss": 36.0424, "step": 700 }, { "epoch": 0.01317568899806772, "grad_norm": 30.359375, "learning_rate": 9.99794129919099e-06, "loss": 35.7538, "step": 710 }, { "epoch": 0.013361262082547547, "grad_norm": 31.40625, "learning_rate": 9.997912303404948e-06, "loss": 35.7376, "step": 720 }, { "epoch": 0.013546835167027375, "grad_norm": 31.890625, "learning_rate": 9.997883307618903e-06, "loss": 35.5567, "step": 730 }, { "epoch": 0.013732408251507202, "grad_norm": 29.75, "learning_rate": 9.997854311832862e-06, "loss": 35.4276, "step": 740 }, { "epoch": 0.01391798133598703, "grad_norm": 30.453125, "learning_rate": 9.99782531604682e-06, "loss": 35.4647, "step": 750 }, { "epoch": 0.014103554420466855, "grad_norm": 31.796875, "learning_rate": 9.997796320260777e-06, "loss": 35.292, "step": 760 }, { "epoch": 0.014289127504946682, "grad_norm": 28.6875, "learning_rate": 9.997767324474735e-06, "loss": 35.523, "step": 770 }, { "epoch": 0.01447470058942651, "grad_norm": 31.734375, "learning_rate": 9.997738328688692e-06, "loss": 34.9746, "step": 780 }, { "epoch": 0.014660273673906337, "grad_norm": 30.0, "learning_rate": 9.99770933290265e-06, "loss": 35.2019, "step": 790 }, { "epoch": 0.014845846758386164, "grad_norm": 29.859375, "learning_rate": 9.997680337116607e-06, "loss": 35.1678, "step": 800 }, { "epoch": 0.015031419842865991, "grad_norm": 30.796875, "learning_rate": 9.997651341330566e-06, "loss": 35.1566, "step": 810 }, { "epoch": 0.015216992927345819, "grad_norm": 31.046875, "learning_rate": 9.997622345544523e-06, "loss": 34.756, "step": 820 }, { "epoch": 0.015402566011825644, "grad_norm": 31.9375, "learning_rate": 9.997593349758481e-06, "loss": 35.1637, "step": 830 }, { "epoch": 0.015588139096305471, "grad_norm": 30.640625, "learning_rate": 9.997564353972438e-06, "loss": 35.125, "step": 840 }, { "epoch": 0.0157737121807853, "grad_norm": 30.0625, "learning_rate": 9.997535358186396e-06, "loss": 34.0675, "step": 850 }, { "epoch": 0.015959285265265126, "grad_norm": 31.1875, "learning_rate": 9.997506362400353e-06, "loss": 34.6025, "step": 860 }, { "epoch": 0.016144858349744953, "grad_norm": 32.59375, "learning_rate": 9.99747736661431e-06, "loss": 34.8561, "step": 870 }, { "epoch": 0.01633043143422478, "grad_norm": 28.703125, "learning_rate": 9.99744837082827e-06, "loss": 34.5315, "step": 880 }, { "epoch": 0.016516004518704608, "grad_norm": 32.125, "learning_rate": 9.997419375042225e-06, "loss": 34.2966, "step": 890 }, { "epoch": 0.016701577603184435, "grad_norm": 31.96875, "learning_rate": 9.997390379256183e-06, "loss": 34.5866, "step": 900 }, { "epoch": 0.016887150687664262, "grad_norm": 30.515625, "learning_rate": 9.997361383470142e-06, "loss": 34.3122, "step": 910 }, { "epoch": 0.01707272377214409, "grad_norm": 30.078125, "learning_rate": 9.9973323876841e-06, "loss": 34.4973, "step": 920 }, { "epoch": 0.017258296856623917, "grad_norm": 31.359375, "learning_rate": 9.997303391898057e-06, "loss": 34.3231, "step": 930 }, { "epoch": 0.01744386994110374, "grad_norm": 31.046875, "learning_rate": 9.997274396112014e-06, "loss": 34.2656, "step": 940 }, { "epoch": 0.017629443025583568, "grad_norm": 30.140625, "learning_rate": 9.997245400325971e-06, "loss": 34.1969, "step": 950 }, { "epoch": 0.017815016110063395, "grad_norm": 31.96875, "learning_rate": 9.997216404539929e-06, "loss": 34.1862, "step": 960 }, { "epoch": 0.018000589194543223, "grad_norm": 30.59375, "learning_rate": 9.997187408753886e-06, "loss": 33.5469, "step": 970 }, { "epoch": 0.01818616227902305, "grad_norm": 31.65625, "learning_rate": 9.997158412967845e-06, "loss": 34.473, "step": 980 }, { "epoch": 0.018371735363502877, "grad_norm": 31.328125, "learning_rate": 9.997129417181803e-06, "loss": 33.9839, "step": 990 }, { "epoch": 0.018557308447982705, "grad_norm": 30.546875, "learning_rate": 9.997100421395758e-06, "loss": 33.7282, "step": 1000 }, { "epoch": 0.018742881532462532, "grad_norm": 32.90625, "learning_rate": 9.997071425609718e-06, "loss": 33.5576, "step": 1010 }, { "epoch": 0.01892845461694236, "grad_norm": 30.8125, "learning_rate": 9.997042429823675e-06, "loss": 33.5456, "step": 1020 }, { "epoch": 0.019114027701422186, "grad_norm": 30.09375, "learning_rate": 9.997013434037632e-06, "loss": 33.7308, "step": 1030 }, { "epoch": 0.019299600785902014, "grad_norm": 30.75, "learning_rate": 9.99698443825159e-06, "loss": 33.1215, "step": 1040 }, { "epoch": 0.01948517387038184, "grad_norm": 32.46875, "learning_rate": 9.996955442465547e-06, "loss": 33.6073, "step": 1050 }, { "epoch": 0.01967074695486167, "grad_norm": 33.28125, "learning_rate": 9.996926446679505e-06, "loss": 33.6201, "step": 1060 }, { "epoch": 0.019856320039341496, "grad_norm": 31.265625, "learning_rate": 9.996897450893462e-06, "loss": 33.4164, "step": 1070 }, { "epoch": 0.02004189312382132, "grad_norm": 30.5625, "learning_rate": 9.99686845510742e-06, "loss": 33.5612, "step": 1080 }, { "epoch": 0.020227466208301147, "grad_norm": 30.9375, "learning_rate": 9.996839459321379e-06, "loss": 33.128, "step": 1090 }, { "epoch": 0.020413039292780974, "grad_norm": 30.390625, "learning_rate": 9.996810463535334e-06, "loss": 33.3032, "step": 1100 }, { "epoch": 0.0205986123772608, "grad_norm": 31.8125, "learning_rate": 9.996781467749292e-06, "loss": 32.6613, "step": 1110 }, { "epoch": 0.02078418546174063, "grad_norm": 31.5, "learning_rate": 9.99675247196325e-06, "loss": 32.9665, "step": 1120 }, { "epoch": 0.020969758546220456, "grad_norm": 30.828125, "learning_rate": 9.996723476177208e-06, "loss": 33.3112, "step": 1130 }, { "epoch": 0.021155331630700283, "grad_norm": 33.0, "learning_rate": 9.996694480391166e-06, "loss": 33.3407, "step": 1140 }, { "epoch": 0.02134090471518011, "grad_norm": 32.34375, "learning_rate": 9.996665484605123e-06, "loss": 33.1562, "step": 1150 }, { "epoch": 0.021526477799659938, "grad_norm": 31.96875, "learning_rate": 9.99663648881908e-06, "loss": 32.8546, "step": 1160 }, { "epoch": 0.021712050884139765, "grad_norm": 32.28125, "learning_rate": 9.996607493033038e-06, "loss": 32.8359, "step": 1170 }, { "epoch": 0.021897623968619592, "grad_norm": 31.484375, "learning_rate": 9.996578497246995e-06, "loss": 32.6904, "step": 1180 }, { "epoch": 0.02208319705309942, "grad_norm": 33.9375, "learning_rate": 9.996549501460954e-06, "loss": 33.2044, "step": 1190 }, { "epoch": 0.022268770137579247, "grad_norm": 33.03125, "learning_rate": 9.996520505674912e-06, "loss": 33.0732, "step": 1200 }, { "epoch": 0.022454343222059074, "grad_norm": 30.65625, "learning_rate": 9.996491509888867e-06, "loss": 32.7397, "step": 1210 }, { "epoch": 0.022639916306538898, "grad_norm": 32.5625, "learning_rate": 9.996462514102827e-06, "loss": 32.2421, "step": 1220 }, { "epoch": 0.022825489391018725, "grad_norm": 31.390625, "learning_rate": 9.996433518316784e-06, "loss": 32.6799, "step": 1230 }, { "epoch": 0.023011062475498553, "grad_norm": 32.625, "learning_rate": 9.996404522530741e-06, "loss": 32.6819, "step": 1240 }, { "epoch": 0.02319663555997838, "grad_norm": 31.046875, "learning_rate": 9.996375526744699e-06, "loss": 32.3593, "step": 1250 }, { "epoch": 0.023382208644458207, "grad_norm": 31.96875, "learning_rate": 9.996346530958658e-06, "loss": 32.3033, "step": 1260 }, { "epoch": 0.023567781728938034, "grad_norm": 31.71875, "learning_rate": 9.996317535172614e-06, "loss": 32.0812, "step": 1270 }, { "epoch": 0.02375335481341786, "grad_norm": 32.75, "learning_rate": 9.996288539386571e-06, "loss": 32.636, "step": 1280 }, { "epoch": 0.02393892789789769, "grad_norm": 30.765625, "learning_rate": 9.99625954360053e-06, "loss": 32.2014, "step": 1290 }, { "epoch": 0.024124500982377516, "grad_norm": 32.625, "learning_rate": 9.996230547814487e-06, "loss": 32.5447, "step": 1300 }, { "epoch": 0.024310074066857344, "grad_norm": 32.5, "learning_rate": 9.996201552028445e-06, "loss": 32.4539, "step": 1310 }, { "epoch": 0.02449564715133717, "grad_norm": 30.59375, "learning_rate": 9.996172556242402e-06, "loss": 32.3299, "step": 1320 }, { "epoch": 0.024681220235816998, "grad_norm": 31.46875, "learning_rate": 9.99614356045636e-06, "loss": 32.2265, "step": 1330 }, { "epoch": 0.024866793320296825, "grad_norm": 32.65625, "learning_rate": 9.996114564670317e-06, "loss": 32.1521, "step": 1340 }, { "epoch": 0.025052366404776653, "grad_norm": 32.0625, "learning_rate": 9.996085568884274e-06, "loss": 32.5649, "step": 1350 }, { "epoch": 0.025237939489256477, "grad_norm": 32.6875, "learning_rate": 9.996056573098234e-06, "loss": 32.2027, "step": 1360 }, { "epoch": 0.025423512573736304, "grad_norm": 32.28125, "learning_rate": 9.99602757731219e-06, "loss": 32.4088, "step": 1370 }, { "epoch": 0.02560908565821613, "grad_norm": 31.21875, "learning_rate": 9.995998581526147e-06, "loss": 32.554, "step": 1380 }, { "epoch": 0.02579465874269596, "grad_norm": 33.5, "learning_rate": 9.995969585740106e-06, "loss": 32.3441, "step": 1390 }, { "epoch": 0.025980231827175786, "grad_norm": 31.484375, "learning_rate": 9.995940589954063e-06, "loss": 31.8443, "step": 1400 }, { "epoch": 0.026165804911655613, "grad_norm": 32.25, "learning_rate": 9.99591159416802e-06, "loss": 32.0743, "step": 1410 }, { "epoch": 0.02635137799613544, "grad_norm": 32.03125, "learning_rate": 9.995882598381978e-06, "loss": 31.8419, "step": 1420 }, { "epoch": 0.026536951080615268, "grad_norm": 33.625, "learning_rate": 9.995853602595935e-06, "loss": 31.8767, "step": 1430 }, { "epoch": 0.026722524165095095, "grad_norm": 30.9375, "learning_rate": 9.995824606809893e-06, "loss": 31.7176, "step": 1440 }, { "epoch": 0.026908097249574922, "grad_norm": 31.234375, "learning_rate": 9.99579561102385e-06, "loss": 32.3033, "step": 1450 }, { "epoch": 0.02709367033405475, "grad_norm": 30.828125, "learning_rate": 9.99576661523781e-06, "loss": 31.5171, "step": 1460 }, { "epoch": 0.027279243418534577, "grad_norm": 30.0625, "learning_rate": 9.995737619451767e-06, "loss": 31.8232, "step": 1470 }, { "epoch": 0.027464816503014404, "grad_norm": 33.34375, "learning_rate": 9.995708623665722e-06, "loss": 31.6173, "step": 1480 }, { "epoch": 0.02765038958749423, "grad_norm": 32.09375, "learning_rate": 9.995679627879682e-06, "loss": 31.9755, "step": 1490 }, { "epoch": 0.02783596267197406, "grad_norm": 33.03125, "learning_rate": 9.995650632093639e-06, "loss": 32.1813, "step": 1500 }, { "epoch": 0.028021535756453882, "grad_norm": 30.984375, "learning_rate": 9.995621636307596e-06, "loss": 31.64, "step": 1510 }, { "epoch": 0.02820710884093371, "grad_norm": 31.828125, "learning_rate": 9.995592640521554e-06, "loss": 31.9134, "step": 1520 }, { "epoch": 0.028392681925413537, "grad_norm": 33.28125, "learning_rate": 9.995563644735511e-06, "loss": 31.2862, "step": 1530 }, { "epoch": 0.028578255009893364, "grad_norm": 31.984375, "learning_rate": 9.995534648949469e-06, "loss": 31.092, "step": 1540 }, { "epoch": 0.02876382809437319, "grad_norm": 31.015625, "learning_rate": 9.995505653163426e-06, "loss": 31.5553, "step": 1550 }, { "epoch": 0.02894940117885302, "grad_norm": 32.1875, "learning_rate": 9.995476657377383e-06, "loss": 31.8129, "step": 1560 }, { "epoch": 0.029134974263332846, "grad_norm": 31.859375, "learning_rate": 9.995447661591343e-06, "loss": 31.3235, "step": 1570 }, { "epoch": 0.029320547347812673, "grad_norm": 31.984375, "learning_rate": 9.9954186658053e-06, "loss": 31.2428, "step": 1580 }, { "epoch": 0.0295061204322925, "grad_norm": 32.8125, "learning_rate": 9.995389670019257e-06, "loss": 31.4959, "step": 1590 }, { "epoch": 0.029691693516772328, "grad_norm": 32.65625, "learning_rate": 9.995360674233215e-06, "loss": 31.7104, "step": 1600 }, { "epoch": 0.029877266601252155, "grad_norm": 31.828125, "learning_rate": 9.995331678447172e-06, "loss": 31.4195, "step": 1610 }, { "epoch": 0.030062839685731983, "grad_norm": 31.4375, "learning_rate": 9.99530268266113e-06, "loss": 31.5362, "step": 1620 }, { "epoch": 0.03024841277021181, "grad_norm": 30.09375, "learning_rate": 9.995273686875087e-06, "loss": 31.0253, "step": 1630 }, { "epoch": 0.030433985854691637, "grad_norm": 33.53125, "learning_rate": 9.995244691089044e-06, "loss": 31.1842, "step": 1640 }, { "epoch": 0.03061955893917146, "grad_norm": 30.609375, "learning_rate": 9.995215695303002e-06, "loss": 31.149, "step": 1650 }, { "epoch": 0.030805132023651288, "grad_norm": 31.71875, "learning_rate": 9.99518669951696e-06, "loss": 31.649, "step": 1660 }, { "epoch": 0.030990705108131116, "grad_norm": 31.53125, "learning_rate": 9.995157703730918e-06, "loss": 30.8072, "step": 1670 }, { "epoch": 0.031176278192610943, "grad_norm": 32.25, "learning_rate": 9.995128707944876e-06, "loss": 31.2502, "step": 1680 }, { "epoch": 0.03136185127709077, "grad_norm": 31.140625, "learning_rate": 9.995099712158831e-06, "loss": 31.1485, "step": 1690 }, { "epoch": 0.0315474243615706, "grad_norm": 33.125, "learning_rate": 9.99507071637279e-06, "loss": 31.5347, "step": 1700 }, { "epoch": 0.031732997446050425, "grad_norm": 30.890625, "learning_rate": 9.995041720586748e-06, "loss": 31.1346, "step": 1710 }, { "epoch": 0.03191857053053025, "grad_norm": 30.640625, "learning_rate": 9.995012724800705e-06, "loss": 31.0724, "step": 1720 }, { "epoch": 0.03210414361501008, "grad_norm": 29.265625, "learning_rate": 9.994983729014663e-06, "loss": 31.0588, "step": 1730 }, { "epoch": 0.032289716699489907, "grad_norm": 33.53125, "learning_rate": 9.994954733228622e-06, "loss": 31.6928, "step": 1740 }, { "epoch": 0.032475289783969734, "grad_norm": 33.75, "learning_rate": 9.994925737442578e-06, "loss": 31.1413, "step": 1750 }, { "epoch": 0.03266086286844956, "grad_norm": 33.125, "learning_rate": 9.994896741656535e-06, "loss": 30.7202, "step": 1760 }, { "epoch": 0.03284643595292939, "grad_norm": 33.78125, "learning_rate": 9.994867745870494e-06, "loss": 30.409, "step": 1770 }, { "epoch": 0.033032009037409216, "grad_norm": 33.15625, "learning_rate": 9.994838750084451e-06, "loss": 31.4688, "step": 1780 }, { "epoch": 0.03321758212188904, "grad_norm": 31.953125, "learning_rate": 9.994809754298409e-06, "loss": 30.6562, "step": 1790 }, { "epoch": 0.03340315520636887, "grad_norm": 33.25, "learning_rate": 9.994780758512366e-06, "loss": 30.7922, "step": 1800 }, { "epoch": 0.0335887282908487, "grad_norm": 32.96875, "learning_rate": 9.994751762726324e-06, "loss": 30.7261, "step": 1810 }, { "epoch": 0.033774301375328525, "grad_norm": 30.546875, "learning_rate": 9.994722766940281e-06, "loss": 30.6429, "step": 1820 }, { "epoch": 0.03395987445980835, "grad_norm": 31.796875, "learning_rate": 9.994693771154239e-06, "loss": 31.0909, "step": 1830 }, { "epoch": 0.03414544754428818, "grad_norm": 30.84375, "learning_rate": 9.994664775368198e-06, "loss": 30.5669, "step": 1840 }, { "epoch": 0.03433102062876801, "grad_norm": 31.65625, "learning_rate": 9.994635779582155e-06, "loss": 30.707, "step": 1850 }, { "epoch": 0.034516593713247834, "grad_norm": 31.1875, "learning_rate": 9.99460678379611e-06, "loss": 31.0501, "step": 1860 }, { "epoch": 0.034702166797727654, "grad_norm": 33.5, "learning_rate": 9.99457778801007e-06, "loss": 30.9713, "step": 1870 }, { "epoch": 0.03488773988220748, "grad_norm": 33.84375, "learning_rate": 9.994548792224027e-06, "loss": 30.8638, "step": 1880 }, { "epoch": 0.03507331296668731, "grad_norm": 32.5, "learning_rate": 9.994519796437985e-06, "loss": 31.0086, "step": 1890 }, { "epoch": 0.035258886051167136, "grad_norm": 33.8125, "learning_rate": 9.994490800651942e-06, "loss": 30.5325, "step": 1900 }, { "epoch": 0.035444459135646963, "grad_norm": 33.4375, "learning_rate": 9.9944618048659e-06, "loss": 30.611, "step": 1910 }, { "epoch": 0.03563003222012679, "grad_norm": 31.984375, "learning_rate": 9.994432809079857e-06, "loss": 30.3778, "step": 1920 }, { "epoch": 0.03581560530460662, "grad_norm": 32.40625, "learning_rate": 9.994403813293814e-06, "loss": 30.6509, "step": 1930 }, { "epoch": 0.036001178389086445, "grad_norm": 31.796875, "learning_rate": 9.994374817507773e-06, "loss": 30.8435, "step": 1940 }, { "epoch": 0.03618675147356627, "grad_norm": 32.75, "learning_rate": 9.99434582172173e-06, "loss": 30.1924, "step": 1950 }, { "epoch": 0.0363723245580461, "grad_norm": 31.984375, "learning_rate": 9.994316825935686e-06, "loss": 30.4882, "step": 1960 }, { "epoch": 0.03655789764252593, "grad_norm": 33.40625, "learning_rate": 9.994287830149646e-06, "loss": 30.4694, "step": 1970 }, { "epoch": 0.036743470727005755, "grad_norm": 33.96875, "learning_rate": 9.994258834363603e-06, "loss": 30.297, "step": 1980 }, { "epoch": 0.03692904381148558, "grad_norm": 32.71875, "learning_rate": 9.99422983857756e-06, "loss": 30.2815, "step": 1990 }, { "epoch": 0.03711461689596541, "grad_norm": 31.96875, "learning_rate": 9.994200842791518e-06, "loss": 30.7292, "step": 2000 }, { "epoch": 0.037300189980445236, "grad_norm": 32.96875, "learning_rate": 9.994171847005475e-06, "loss": 30.7787, "step": 2010 }, { "epoch": 0.037485763064925064, "grad_norm": 33.59375, "learning_rate": 9.994142851219433e-06, "loss": 30.5994, "step": 2020 }, { "epoch": 0.03767133614940489, "grad_norm": 32.5, "learning_rate": 9.99411385543339e-06, "loss": 30.2407, "step": 2030 }, { "epoch": 0.03785690923388472, "grad_norm": 30.28125, "learning_rate": 9.994084859647349e-06, "loss": 29.9962, "step": 2040 }, { "epoch": 0.038042482318364546, "grad_norm": 36.53125, "learning_rate": 9.994055863861307e-06, "loss": 30.1751, "step": 2050 }, { "epoch": 0.03822805540284437, "grad_norm": 34.3125, "learning_rate": 9.994026868075264e-06, "loss": 30.1296, "step": 2060 }, { "epoch": 0.0384136284873242, "grad_norm": 31.78125, "learning_rate": 9.993997872289221e-06, "loss": 29.8851, "step": 2070 }, { "epoch": 0.03859920157180403, "grad_norm": 34.3125, "learning_rate": 9.993968876503179e-06, "loss": 30.1799, "step": 2080 }, { "epoch": 0.038784774656283855, "grad_norm": 32.5625, "learning_rate": 9.993939880717136e-06, "loss": 30.1617, "step": 2090 }, { "epoch": 0.03897034774076368, "grad_norm": 31.671875, "learning_rate": 9.993910884931094e-06, "loss": 30.4521, "step": 2100 }, { "epoch": 0.03915592082524351, "grad_norm": 33.34375, "learning_rate": 9.993881889145051e-06, "loss": 30.1233, "step": 2110 }, { "epoch": 0.03934149390972334, "grad_norm": 32.65625, "learning_rate": 9.993852893359008e-06, "loss": 30.265, "step": 2120 }, { "epoch": 0.039527066994203164, "grad_norm": 31.859375, "learning_rate": 9.993823897572966e-06, "loss": 29.874, "step": 2130 }, { "epoch": 0.03971264007868299, "grad_norm": 32.8125, "learning_rate": 9.993794901786923e-06, "loss": 30.2309, "step": 2140 }, { "epoch": 0.03989821316316281, "grad_norm": 33.0, "learning_rate": 9.993765906000882e-06, "loss": 30.0726, "step": 2150 }, { "epoch": 0.04008378624764264, "grad_norm": 33.0, "learning_rate": 9.99373691021484e-06, "loss": 30.0015, "step": 2160 }, { "epoch": 0.040269359332122466, "grad_norm": 32.96875, "learning_rate": 9.993707914428797e-06, "loss": 30.4088, "step": 2170 }, { "epoch": 0.04045493241660229, "grad_norm": 32.21875, "learning_rate": 9.993678918642755e-06, "loss": 29.5758, "step": 2180 }, { "epoch": 0.04064050550108212, "grad_norm": 32.6875, "learning_rate": 9.993649922856712e-06, "loss": 30.0286, "step": 2190 }, { "epoch": 0.04082607858556195, "grad_norm": 31.515625, "learning_rate": 9.99362092707067e-06, "loss": 29.7122, "step": 2200 }, { "epoch": 0.041011651670041775, "grad_norm": 31.375, "learning_rate": 9.993591931284627e-06, "loss": 30.272, "step": 2210 }, { "epoch": 0.0411972247545216, "grad_norm": 32.84375, "learning_rate": 9.993562935498586e-06, "loss": 29.549, "step": 2220 }, { "epoch": 0.04138279783900143, "grad_norm": 32.5625, "learning_rate": 9.993533939712542e-06, "loss": 29.9526, "step": 2230 }, { "epoch": 0.04156837092348126, "grad_norm": 32.75, "learning_rate": 9.993504943926499e-06, "loss": 29.7783, "step": 2240 }, { "epoch": 0.041753944007961084, "grad_norm": 33.375, "learning_rate": 9.993475948140458e-06, "loss": 29.815, "step": 2250 }, { "epoch": 0.04193951709244091, "grad_norm": 31.921875, "learning_rate": 9.993446952354415e-06, "loss": 30.2447, "step": 2260 }, { "epoch": 0.04212509017692074, "grad_norm": 33.71875, "learning_rate": 9.993417956568373e-06, "loss": 30.0316, "step": 2270 }, { "epoch": 0.042310663261400566, "grad_norm": 31.609375, "learning_rate": 9.99338896078233e-06, "loss": 29.8423, "step": 2280 }, { "epoch": 0.042496236345880394, "grad_norm": 30.828125, "learning_rate": 9.993359964996288e-06, "loss": 29.8542, "step": 2290 }, { "epoch": 0.04268180943036022, "grad_norm": 32.15625, "learning_rate": 9.993330969210245e-06, "loss": 29.8611, "step": 2300 }, { "epoch": 0.04286738251484005, "grad_norm": 33.03125, "learning_rate": 9.993301973424203e-06, "loss": 29.6654, "step": 2310 }, { "epoch": 0.043052955599319875, "grad_norm": 33.6875, "learning_rate": 9.993272977638162e-06, "loss": 29.5965, "step": 2320 }, { "epoch": 0.0432385286837997, "grad_norm": 33.5625, "learning_rate": 9.993243981852119e-06, "loss": 29.4433, "step": 2330 }, { "epoch": 0.04342410176827953, "grad_norm": 32.71875, "learning_rate": 9.993214986066075e-06, "loss": 29.2134, "step": 2340 }, { "epoch": 0.04360967485275936, "grad_norm": 33.09375, "learning_rate": 9.993185990280034e-06, "loss": 29.5822, "step": 2350 }, { "epoch": 0.043795247937239185, "grad_norm": 34.5625, "learning_rate": 9.993156994493991e-06, "loss": 29.5265, "step": 2360 }, { "epoch": 0.04398082102171901, "grad_norm": 32.8125, "learning_rate": 9.993127998707949e-06, "loss": 29.9218, "step": 2370 }, { "epoch": 0.04416639410619884, "grad_norm": 34.25, "learning_rate": 9.993099002921906e-06, "loss": 29.5988, "step": 2380 }, { "epoch": 0.044351967190678666, "grad_norm": 32.84375, "learning_rate": 9.993070007135863e-06, "loss": 29.2194, "step": 2390 }, { "epoch": 0.044537540275158494, "grad_norm": 33.25, "learning_rate": 9.993041011349821e-06, "loss": 29.3336, "step": 2400 }, { "epoch": 0.04472311335963832, "grad_norm": 32.625, "learning_rate": 9.993012015563778e-06, "loss": 29.1395, "step": 2410 }, { "epoch": 0.04490868644411815, "grad_norm": 31.890625, "learning_rate": 9.992983019777737e-06, "loss": 29.6973, "step": 2420 }, { "epoch": 0.045094259528597976, "grad_norm": 35.0625, "learning_rate": 9.992954023991695e-06, "loss": 30.2094, "step": 2430 }, { "epoch": 0.045279832613077796, "grad_norm": 30.609375, "learning_rate": 9.992925028205652e-06, "loss": 29.4222, "step": 2440 }, { "epoch": 0.04546540569755762, "grad_norm": 34.1875, "learning_rate": 9.99289603241961e-06, "loss": 29.5045, "step": 2450 }, { "epoch": 0.04565097878203745, "grad_norm": 33.15625, "learning_rate": 9.992867036633567e-06, "loss": 29.2573, "step": 2460 }, { "epoch": 0.04583655186651728, "grad_norm": 33.34375, "learning_rate": 9.992838040847524e-06, "loss": 29.4728, "step": 2470 }, { "epoch": 0.046022124950997105, "grad_norm": 36.40625, "learning_rate": 9.992809045061482e-06, "loss": 29.705, "step": 2480 }, { "epoch": 0.04620769803547693, "grad_norm": 32.65625, "learning_rate": 9.992780049275441e-06, "loss": 29.8484, "step": 2490 }, { "epoch": 0.04639327111995676, "grad_norm": 34.75, "learning_rate": 9.992751053489397e-06, "loss": 29.621, "step": 2500 }, { "epoch": 0.04657884420443659, "grad_norm": 33.71875, "learning_rate": 9.992722057703354e-06, "loss": 29.9103, "step": 2510 }, { "epoch": 0.046764417288916414, "grad_norm": 32.53125, "learning_rate": 9.992693061917313e-06, "loss": 29.0736, "step": 2520 }, { "epoch": 0.04694999037339624, "grad_norm": 31.203125, "learning_rate": 9.99266406613127e-06, "loss": 29.5914, "step": 2530 }, { "epoch": 0.04713556345787607, "grad_norm": 32.125, "learning_rate": 9.992635070345228e-06, "loss": 28.9369, "step": 2540 }, { "epoch": 0.047321136542355896, "grad_norm": 32.28125, "learning_rate": 9.992606074559185e-06, "loss": 29.315, "step": 2550 }, { "epoch": 0.04750670962683572, "grad_norm": 31.65625, "learning_rate": 9.992577078773143e-06, "loss": 29.248, "step": 2560 }, { "epoch": 0.04769228271131555, "grad_norm": 35.625, "learning_rate": 9.9925480829871e-06, "loss": 29.8038, "step": 2570 }, { "epoch": 0.04787785579579538, "grad_norm": 33.625, "learning_rate": 9.992519087201058e-06, "loss": 29.1508, "step": 2580 }, { "epoch": 0.048063428880275205, "grad_norm": 32.34375, "learning_rate": 9.992490091415015e-06, "loss": 29.4222, "step": 2590 }, { "epoch": 0.04824900196475503, "grad_norm": 34.375, "learning_rate": 9.992461095628974e-06, "loss": 29.7663, "step": 2600 }, { "epoch": 0.04843457504923486, "grad_norm": 34.5, "learning_rate": 9.99243209984293e-06, "loss": 29.1559, "step": 2610 }, { "epoch": 0.04862014813371469, "grad_norm": 34.21875, "learning_rate": 9.992403104056887e-06, "loss": 29.0828, "step": 2620 }, { "epoch": 0.048805721218194514, "grad_norm": 32.625, "learning_rate": 9.992374108270846e-06, "loss": 28.9075, "step": 2630 }, { "epoch": 0.04899129430267434, "grad_norm": 33.46875, "learning_rate": 9.992345112484804e-06, "loss": 29.1731, "step": 2640 }, { "epoch": 0.04917686738715417, "grad_norm": 33.5625, "learning_rate": 9.992316116698761e-06, "loss": 28.5537, "step": 2650 }, { "epoch": 0.049362440471633996, "grad_norm": 32.59375, "learning_rate": 9.992287120912719e-06, "loss": 29.2992, "step": 2660 }, { "epoch": 0.049548013556113824, "grad_norm": 33.6875, "learning_rate": 9.992258125126676e-06, "loss": 29.2843, "step": 2670 }, { "epoch": 0.04973358664059365, "grad_norm": 35.5, "learning_rate": 9.992229129340633e-06, "loss": 29.039, "step": 2680 }, { "epoch": 0.04991915972507348, "grad_norm": 31.34375, "learning_rate": 9.99220013355459e-06, "loss": 28.7605, "step": 2690 }, { "epoch": 0.050104732809553305, "grad_norm": 32.65625, "learning_rate": 9.99217113776855e-06, "loss": 29.3325, "step": 2700 }, { "epoch": 0.05029030589403313, "grad_norm": 33.71875, "learning_rate": 9.992142141982506e-06, "loss": 28.8713, "step": 2710 }, { "epoch": 0.05047587897851295, "grad_norm": 31.328125, "learning_rate": 9.992113146196463e-06, "loss": 29.162, "step": 2720 }, { "epoch": 0.05066145206299278, "grad_norm": 32.1875, "learning_rate": 9.992084150410422e-06, "loss": 28.794, "step": 2730 }, { "epoch": 0.05084702514747261, "grad_norm": 33.78125, "learning_rate": 9.99205515462438e-06, "loss": 29.1353, "step": 2740 }, { "epoch": 0.051032598231952435, "grad_norm": 33.125, "learning_rate": 9.992026158838337e-06, "loss": 29.5384, "step": 2750 }, { "epoch": 0.05121817131643226, "grad_norm": 32.625, "learning_rate": 9.991997163052294e-06, "loss": 29.1446, "step": 2760 }, { "epoch": 0.05140374440091209, "grad_norm": 34.03125, "learning_rate": 9.991968167266252e-06, "loss": 28.8186, "step": 2770 }, { "epoch": 0.05158931748539192, "grad_norm": 31.96875, "learning_rate": 9.991939171480209e-06, "loss": 29.0019, "step": 2780 }, { "epoch": 0.051774890569871744, "grad_norm": 31.84375, "learning_rate": 9.991910175694167e-06, "loss": 28.832, "step": 2790 }, { "epoch": 0.05196046365435157, "grad_norm": 32.5625, "learning_rate": 9.991881179908126e-06, "loss": 28.9655, "step": 2800 }, { "epoch": 0.0521460367388314, "grad_norm": 34.84375, "learning_rate": 9.991852184122083e-06, "loss": 28.7527, "step": 2810 }, { "epoch": 0.052331609823311226, "grad_norm": 34.375, "learning_rate": 9.991823188336039e-06, "loss": 29.0628, "step": 2820 }, { "epoch": 0.05251718290779105, "grad_norm": 31.453125, "learning_rate": 9.991794192549998e-06, "loss": 28.9482, "step": 2830 }, { "epoch": 0.05270275599227088, "grad_norm": 35.15625, "learning_rate": 9.991765196763955e-06, "loss": 28.9719, "step": 2840 }, { "epoch": 0.05288832907675071, "grad_norm": 31.28125, "learning_rate": 9.991736200977913e-06, "loss": 28.7786, "step": 2850 }, { "epoch": 0.053073902161230535, "grad_norm": 33.15625, "learning_rate": 9.99170720519187e-06, "loss": 28.7459, "step": 2860 }, { "epoch": 0.05325947524571036, "grad_norm": 33.5625, "learning_rate": 9.991678209405827e-06, "loss": 29.0512, "step": 2870 }, { "epoch": 0.05344504833019019, "grad_norm": 34.75, "learning_rate": 9.991649213619785e-06, "loss": 28.7549, "step": 2880 }, { "epoch": 0.05363062141467002, "grad_norm": 34.8125, "learning_rate": 9.991620217833742e-06, "loss": 29.0014, "step": 2890 }, { "epoch": 0.053816194499149844, "grad_norm": 32.03125, "learning_rate": 9.991591222047701e-06, "loss": 28.7066, "step": 2900 }, { "epoch": 0.05400176758362967, "grad_norm": 33.4375, "learning_rate": 9.991562226261659e-06, "loss": 28.8083, "step": 2910 }, { "epoch": 0.0541873406681095, "grad_norm": 32.5, "learning_rate": 9.991533230475616e-06, "loss": 28.5716, "step": 2920 }, { "epoch": 0.054372913752589326, "grad_norm": 32.53125, "learning_rate": 9.991504234689574e-06, "loss": 28.7969, "step": 2930 }, { "epoch": 0.05455848683706915, "grad_norm": 33.09375, "learning_rate": 9.991475238903531e-06, "loss": 28.535, "step": 2940 }, { "epoch": 0.05474405992154898, "grad_norm": 32.5625, "learning_rate": 9.991446243117488e-06, "loss": 28.7149, "step": 2950 }, { "epoch": 0.05492963300602881, "grad_norm": 33.75, "learning_rate": 9.991417247331446e-06, "loss": 28.7103, "step": 2960 }, { "epoch": 0.055115206090508635, "grad_norm": 32.53125, "learning_rate": 9.991388251545405e-06, "loss": 28.6576, "step": 2970 }, { "epoch": 0.05530077917498846, "grad_norm": 35.375, "learning_rate": 9.99135925575936e-06, "loss": 28.6555, "step": 2980 }, { "epoch": 0.05548635225946829, "grad_norm": 33.78125, "learning_rate": 9.991330259973318e-06, "loss": 28.4429, "step": 2990 }, { "epoch": 0.05567192534394812, "grad_norm": 33.3125, "learning_rate": 9.991301264187277e-06, "loss": 28.3745, "step": 3000 }, { "epoch": 0.05585749842842794, "grad_norm": 33.5, "learning_rate": 9.991272268401235e-06, "loss": 28.3746, "step": 3010 }, { "epoch": 0.056043071512907765, "grad_norm": 31.96875, "learning_rate": 9.991243272615192e-06, "loss": 28.5534, "step": 3020 }, { "epoch": 0.05622864459738759, "grad_norm": 34.25, "learning_rate": 9.99121427682915e-06, "loss": 28.6145, "step": 3030 }, { "epoch": 0.05641421768186742, "grad_norm": 33.46875, "learning_rate": 9.991185281043107e-06, "loss": 28.6077, "step": 3040 }, { "epoch": 0.05659979076634725, "grad_norm": 33.375, "learning_rate": 9.991156285257064e-06, "loss": 28.967, "step": 3050 }, { "epoch": 0.056785363850827074, "grad_norm": 33.1875, "learning_rate": 9.991127289471022e-06, "loss": 28.4267, "step": 3060 }, { "epoch": 0.0569709369353069, "grad_norm": 33.53125, "learning_rate": 9.991098293684979e-06, "loss": 28.6182, "step": 3070 }, { "epoch": 0.05715651001978673, "grad_norm": 33.03125, "learning_rate": 9.991069297898938e-06, "loss": 28.5312, "step": 3080 }, { "epoch": 0.057342083104266556, "grad_norm": 33.28125, "learning_rate": 9.991040302112894e-06, "loss": 28.7762, "step": 3090 }, { "epoch": 0.05752765618874638, "grad_norm": 33.84375, "learning_rate": 9.991011306326853e-06, "loss": 28.4776, "step": 3100 }, { "epoch": 0.05771322927322621, "grad_norm": 31.390625, "learning_rate": 9.99098231054081e-06, "loss": 28.2534, "step": 3110 }, { "epoch": 0.05789880235770604, "grad_norm": 33.0, "learning_rate": 9.990953314754768e-06, "loss": 28.5445, "step": 3120 }, { "epoch": 0.058084375442185865, "grad_norm": 32.8125, "learning_rate": 9.990924318968725e-06, "loss": 28.4319, "step": 3130 }, { "epoch": 0.05826994852666569, "grad_norm": 32.9375, "learning_rate": 9.990895323182683e-06, "loss": 28.4076, "step": 3140 }, { "epoch": 0.05845552161114552, "grad_norm": 34.09375, "learning_rate": 9.99086632739664e-06, "loss": 28.4729, "step": 3150 }, { "epoch": 0.05864109469562535, "grad_norm": 34.09375, "learning_rate": 9.990837331610597e-06, "loss": 28.3709, "step": 3160 }, { "epoch": 0.058826667780105174, "grad_norm": 33.0, "learning_rate": 9.990808335824555e-06, "loss": 28.149, "step": 3170 }, { "epoch": 0.059012240864585, "grad_norm": 33.21875, "learning_rate": 9.990779340038514e-06, "loss": 28.3831, "step": 3180 }, { "epoch": 0.05919781394906483, "grad_norm": 33.5, "learning_rate": 9.990750344252471e-06, "loss": 28.5165, "step": 3190 }, { "epoch": 0.059383387033544656, "grad_norm": 32.71875, "learning_rate": 9.990721348466427e-06, "loss": 28.5913, "step": 3200 }, { "epoch": 0.05956896011802448, "grad_norm": 32.40625, "learning_rate": 9.990692352680386e-06, "loss": 27.9974, "step": 3210 }, { "epoch": 0.05975453320250431, "grad_norm": 35.0, "learning_rate": 9.990663356894344e-06, "loss": 28.6096, "step": 3220 }, { "epoch": 0.05994010628698414, "grad_norm": 33.6875, "learning_rate": 9.990634361108301e-06, "loss": 28.4732, "step": 3230 }, { "epoch": 0.060125679371463965, "grad_norm": 34.3125, "learning_rate": 9.990605365322258e-06, "loss": 28.7017, "step": 3240 }, { "epoch": 0.06031125245594379, "grad_norm": 34.09375, "learning_rate": 9.990576369536216e-06, "loss": 28.2992, "step": 3250 }, { "epoch": 0.06049682554042362, "grad_norm": 36.84375, "learning_rate": 9.990547373750173e-06, "loss": 28.4159, "step": 3260 }, { "epoch": 0.06068239862490345, "grad_norm": 33.4375, "learning_rate": 9.99051837796413e-06, "loss": 28.3044, "step": 3270 }, { "epoch": 0.060867971709383274, "grad_norm": 32.96875, "learning_rate": 9.99048938217809e-06, "loss": 28.8831, "step": 3280 }, { "epoch": 0.0610535447938631, "grad_norm": 34.0, "learning_rate": 9.990460386392047e-06, "loss": 28.2866, "step": 3290 }, { "epoch": 0.06123911787834292, "grad_norm": 32.59375, "learning_rate": 9.990431390606003e-06, "loss": 28.5798, "step": 3300 }, { "epoch": 0.06142469096282275, "grad_norm": 35.59375, "learning_rate": 9.990402394819962e-06, "loss": 28.379, "step": 3310 }, { "epoch": 0.061610264047302576, "grad_norm": 32.4375, "learning_rate": 9.99037339903392e-06, "loss": 27.8911, "step": 3320 }, { "epoch": 0.061795837131782404, "grad_norm": 31.421875, "learning_rate": 9.990344403247877e-06, "loss": 27.7105, "step": 3330 }, { "epoch": 0.06198141021626223, "grad_norm": 34.90625, "learning_rate": 9.990315407461834e-06, "loss": 27.8929, "step": 3340 }, { "epoch": 0.06216698330074206, "grad_norm": 31.84375, "learning_rate": 9.990286411675793e-06, "loss": 28.0276, "step": 3350 }, { "epoch": 0.062352556385221886, "grad_norm": 33.21875, "learning_rate": 9.990257415889749e-06, "loss": 28.489, "step": 3360 }, { "epoch": 0.06253812946970172, "grad_norm": 32.96875, "learning_rate": 9.990228420103706e-06, "loss": 28.0725, "step": 3370 }, { "epoch": 0.06272370255418154, "grad_norm": 32.90625, "learning_rate": 9.990199424317665e-06, "loss": 28.3279, "step": 3380 }, { "epoch": 0.06290927563866137, "grad_norm": 34.21875, "learning_rate": 9.990170428531623e-06, "loss": 28.3579, "step": 3390 }, { "epoch": 0.0630948487231412, "grad_norm": 35.1875, "learning_rate": 9.99014143274558e-06, "loss": 28.056, "step": 3400 }, { "epoch": 0.06328042180762103, "grad_norm": 34.25, "learning_rate": 9.990112436959538e-06, "loss": 28.0313, "step": 3410 }, { "epoch": 0.06346599489210085, "grad_norm": 33.28125, "learning_rate": 9.990083441173495e-06, "loss": 28.1424, "step": 3420 }, { "epoch": 0.06365156797658068, "grad_norm": 33.25, "learning_rate": 9.990054445387452e-06, "loss": 28.0296, "step": 3430 }, { "epoch": 0.0638371410610605, "grad_norm": 32.75, "learning_rate": 9.99002544960141e-06, "loss": 27.9722, "step": 3440 }, { "epoch": 0.06402271414554032, "grad_norm": 33.5625, "learning_rate": 9.989996453815369e-06, "loss": 27.9661, "step": 3450 }, { "epoch": 0.06420828723002016, "grad_norm": 34.0625, "learning_rate": 9.989967458029325e-06, "loss": 28.3075, "step": 3460 }, { "epoch": 0.06439386031449998, "grad_norm": 32.6875, "learning_rate": 9.989938462243282e-06, "loss": 27.7058, "step": 3470 }, { "epoch": 0.06457943339897981, "grad_norm": 33.90625, "learning_rate": 9.989909466457241e-06, "loss": 28.327, "step": 3480 }, { "epoch": 0.06476500648345963, "grad_norm": 35.5625, "learning_rate": 9.989880470671199e-06, "loss": 28.1936, "step": 3490 }, { "epoch": 0.06495057956793947, "grad_norm": 34.375, "learning_rate": 9.989851474885156e-06, "loss": 28.0495, "step": 3500 }, { "epoch": 0.06513615265241929, "grad_norm": 33.8125, "learning_rate": 9.989822479099113e-06, "loss": 27.782, "step": 3510 }, { "epoch": 0.06532172573689912, "grad_norm": 33.53125, "learning_rate": 9.98979348331307e-06, "loss": 27.8067, "step": 3520 }, { "epoch": 0.06550729882137894, "grad_norm": 32.71875, "learning_rate": 9.989764487527028e-06, "loss": 27.7428, "step": 3530 }, { "epoch": 0.06569287190585878, "grad_norm": 34.125, "learning_rate": 9.989735491740986e-06, "loss": 27.872, "step": 3540 }, { "epoch": 0.0658784449903386, "grad_norm": 34.875, "learning_rate": 9.989706495954945e-06, "loss": 27.917, "step": 3550 }, { "epoch": 0.06606401807481843, "grad_norm": 33.75, "learning_rate": 9.989677500168902e-06, "loss": 27.772, "step": 3560 }, { "epoch": 0.06624959115929825, "grad_norm": 34.125, "learning_rate": 9.989648504382858e-06, "loss": 27.3048, "step": 3570 }, { "epoch": 0.06643516424377809, "grad_norm": 35.59375, "learning_rate": 9.989619508596817e-06, "loss": 27.7842, "step": 3580 }, { "epoch": 0.0666207373282579, "grad_norm": 33.125, "learning_rate": 9.989590512810774e-06, "loss": 27.7142, "step": 3590 }, { "epoch": 0.06680631041273774, "grad_norm": 34.875, "learning_rate": 9.989561517024732e-06, "loss": 27.9764, "step": 3600 }, { "epoch": 0.06699188349721756, "grad_norm": 33.03125, "learning_rate": 9.98953252123869e-06, "loss": 27.9061, "step": 3610 }, { "epoch": 0.0671774565816974, "grad_norm": 34.09375, "learning_rate": 9.989503525452647e-06, "loss": 28.1396, "step": 3620 }, { "epoch": 0.06736302966617722, "grad_norm": 34.375, "learning_rate": 9.989474529666604e-06, "loss": 27.8155, "step": 3630 }, { "epoch": 0.06754860275065705, "grad_norm": 33.59375, "learning_rate": 9.989445533880561e-06, "loss": 28.192, "step": 3640 }, { "epoch": 0.06773417583513687, "grad_norm": 33.40625, "learning_rate": 9.989416538094519e-06, "loss": 27.6073, "step": 3650 }, { "epoch": 0.0679197489196167, "grad_norm": 33.65625, "learning_rate": 9.989387542308478e-06, "loss": 27.8798, "step": 3660 }, { "epoch": 0.06810532200409652, "grad_norm": 34.0, "learning_rate": 9.989358546522435e-06, "loss": 27.4086, "step": 3670 }, { "epoch": 0.06829089508857636, "grad_norm": 34.3125, "learning_rate": 9.989329550736393e-06, "loss": 28.0494, "step": 3680 }, { "epoch": 0.06847646817305618, "grad_norm": 33.71875, "learning_rate": 9.98930055495035e-06, "loss": 27.7582, "step": 3690 }, { "epoch": 0.06866204125753601, "grad_norm": 32.75, "learning_rate": 9.989271559164308e-06, "loss": 27.7977, "step": 3700 }, { "epoch": 0.06884761434201583, "grad_norm": 33.34375, "learning_rate": 9.989242563378265e-06, "loss": 28.2126, "step": 3710 }, { "epoch": 0.06903318742649567, "grad_norm": 31.671875, "learning_rate": 9.989213567592222e-06, "loss": 27.4855, "step": 3720 }, { "epoch": 0.06921876051097549, "grad_norm": 32.65625, "learning_rate": 9.98918457180618e-06, "loss": 27.8408, "step": 3730 }, { "epoch": 0.06940433359545531, "grad_norm": 31.484375, "learning_rate": 9.989155576020137e-06, "loss": 27.7657, "step": 3740 }, { "epoch": 0.06958990667993514, "grad_norm": 33.1875, "learning_rate": 9.989126580234095e-06, "loss": 27.8898, "step": 3750 }, { "epoch": 0.06977547976441496, "grad_norm": 32.90625, "learning_rate": 9.989097584448054e-06, "loss": 27.4146, "step": 3760 }, { "epoch": 0.0699610528488948, "grad_norm": 31.53125, "learning_rate": 9.989068588662011e-06, "loss": 27.6763, "step": 3770 }, { "epoch": 0.07014662593337462, "grad_norm": 33.125, "learning_rate": 9.989039592875968e-06, "loss": 27.6253, "step": 3780 }, { "epoch": 0.07033219901785445, "grad_norm": 35.0, "learning_rate": 9.989010597089926e-06, "loss": 27.7828, "step": 3790 }, { "epoch": 0.07051777210233427, "grad_norm": 35.84375, "learning_rate": 9.988981601303883e-06, "loss": 27.6479, "step": 3800 }, { "epoch": 0.0707033451868141, "grad_norm": 33.03125, "learning_rate": 9.98895260551784e-06, "loss": 27.5623, "step": 3810 }, { "epoch": 0.07088891827129393, "grad_norm": 32.84375, "learning_rate": 9.988923609731798e-06, "loss": 27.1696, "step": 3820 }, { "epoch": 0.07107449135577376, "grad_norm": 33.84375, "learning_rate": 9.988894613945757e-06, "loss": 27.4184, "step": 3830 }, { "epoch": 0.07126006444025358, "grad_norm": 34.5625, "learning_rate": 9.988865618159713e-06, "loss": 27.6026, "step": 3840 }, { "epoch": 0.07144563752473342, "grad_norm": 35.6875, "learning_rate": 9.98883662237367e-06, "loss": 27.5197, "step": 3850 }, { "epoch": 0.07163121060921324, "grad_norm": 33.15625, "learning_rate": 9.98880762658763e-06, "loss": 27.482, "step": 3860 }, { "epoch": 0.07181678369369307, "grad_norm": 33.90625, "learning_rate": 9.988778630801587e-06, "loss": 27.7027, "step": 3870 }, { "epoch": 0.07200235677817289, "grad_norm": 32.375, "learning_rate": 9.988749635015544e-06, "loss": 27.3702, "step": 3880 }, { "epoch": 0.07218792986265272, "grad_norm": 34.03125, "learning_rate": 9.988720639229502e-06, "loss": 26.946, "step": 3890 }, { "epoch": 0.07237350294713255, "grad_norm": 36.34375, "learning_rate": 9.988691643443459e-06, "loss": 27.8434, "step": 3900 }, { "epoch": 0.07255907603161238, "grad_norm": 33.8125, "learning_rate": 9.988662647657416e-06, "loss": 27.6265, "step": 3910 }, { "epoch": 0.0727446491160922, "grad_norm": 34.59375, "learning_rate": 9.988633651871374e-06, "loss": 27.367, "step": 3920 }, { "epoch": 0.07293022220057203, "grad_norm": 35.15625, "learning_rate": 9.988604656085333e-06, "loss": 27.686, "step": 3930 }, { "epoch": 0.07311579528505185, "grad_norm": 32.625, "learning_rate": 9.98857566029929e-06, "loss": 27.4221, "step": 3940 }, { "epoch": 0.07330136836953169, "grad_norm": 32.75, "learning_rate": 9.988546664513246e-06, "loss": 27.7722, "step": 3950 }, { "epoch": 0.07348694145401151, "grad_norm": 34.78125, "learning_rate": 9.988517668727205e-06, "loss": 27.3738, "step": 3960 }, { "epoch": 0.07367251453849134, "grad_norm": 33.9375, "learning_rate": 9.988488672941163e-06, "loss": 27.7157, "step": 3970 }, { "epoch": 0.07385808762297116, "grad_norm": 34.59375, "learning_rate": 9.98845967715512e-06, "loss": 27.328, "step": 3980 }, { "epoch": 0.074043660707451, "grad_norm": 34.34375, "learning_rate": 9.988430681369077e-06, "loss": 27.4072, "step": 3990 }, { "epoch": 0.07422923379193082, "grad_norm": 32.96875, "learning_rate": 9.988401685583035e-06, "loss": 27.6186, "step": 4000 }, { "epoch": 0.07441480687641064, "grad_norm": 33.84375, "learning_rate": 9.988372689796992e-06, "loss": 27.6838, "step": 4010 }, { "epoch": 0.07460037996089047, "grad_norm": 33.0625, "learning_rate": 9.98834369401095e-06, "loss": 27.4932, "step": 4020 }, { "epoch": 0.0747859530453703, "grad_norm": 34.375, "learning_rate": 9.988314698224909e-06, "loss": 27.4686, "step": 4030 }, { "epoch": 0.07497152612985013, "grad_norm": 34.8125, "learning_rate": 9.988285702438866e-06, "loss": 27.094, "step": 4040 }, { "epoch": 0.07515709921432995, "grad_norm": 32.25, "learning_rate": 9.988256706652822e-06, "loss": 27.2472, "step": 4050 }, { "epoch": 0.07534267229880978, "grad_norm": 33.0, "learning_rate": 9.988227710866781e-06, "loss": 27.2044, "step": 4060 }, { "epoch": 0.0755282453832896, "grad_norm": 32.84375, "learning_rate": 9.988198715080738e-06, "loss": 27.0425, "step": 4070 }, { "epoch": 0.07571381846776944, "grad_norm": 36.15625, "learning_rate": 9.988169719294696e-06, "loss": 27.5593, "step": 4080 }, { "epoch": 0.07589939155224926, "grad_norm": 32.96875, "learning_rate": 9.988140723508653e-06, "loss": 26.8557, "step": 4090 }, { "epoch": 0.07608496463672909, "grad_norm": 33.25, "learning_rate": 9.98811172772261e-06, "loss": 27.2413, "step": 4100 }, { "epoch": 0.07627053772120891, "grad_norm": 33.90625, "learning_rate": 9.988082731936568e-06, "loss": 27.032, "step": 4110 }, { "epoch": 0.07645611080568875, "grad_norm": 34.15625, "learning_rate": 9.988053736150525e-06, "loss": 27.1695, "step": 4120 }, { "epoch": 0.07664168389016857, "grad_norm": 33.59375, "learning_rate": 9.988024740364485e-06, "loss": 27.4066, "step": 4130 }, { "epoch": 0.0768272569746484, "grad_norm": 36.03125, "learning_rate": 9.987995744578442e-06, "loss": 27.7367, "step": 4140 }, { "epoch": 0.07701283005912822, "grad_norm": 35.375, "learning_rate": 9.9879667487924e-06, "loss": 27.0799, "step": 4150 }, { "epoch": 0.07719840314360805, "grad_norm": 34.21875, "learning_rate": 9.987937753006357e-06, "loss": 27.4982, "step": 4160 }, { "epoch": 0.07738397622808788, "grad_norm": 33.15625, "learning_rate": 9.987908757220314e-06, "loss": 26.987, "step": 4170 }, { "epoch": 0.07756954931256771, "grad_norm": 32.53125, "learning_rate": 9.987879761434272e-06, "loss": 27.2336, "step": 4180 }, { "epoch": 0.07775512239704753, "grad_norm": 34.96875, "learning_rate": 9.987850765648229e-06, "loss": 27.14, "step": 4190 }, { "epoch": 0.07794069548152736, "grad_norm": 34.8125, "learning_rate": 9.987821769862186e-06, "loss": 27.8387, "step": 4200 }, { "epoch": 0.07812626856600718, "grad_norm": 34.09375, "learning_rate": 9.987792774076144e-06, "loss": 27.4868, "step": 4210 }, { "epoch": 0.07831184165048702, "grad_norm": 33.875, "learning_rate": 9.987763778290101e-06, "loss": 27.4351, "step": 4220 }, { "epoch": 0.07849741473496684, "grad_norm": 35.21875, "learning_rate": 9.987734782504059e-06, "loss": 27.3241, "step": 4230 }, { "epoch": 0.07868298781944667, "grad_norm": 33.0, "learning_rate": 9.987705786718018e-06, "loss": 27.6498, "step": 4240 }, { "epoch": 0.0788685609039265, "grad_norm": 33.09375, "learning_rate": 9.987676790931975e-06, "loss": 27.1418, "step": 4250 }, { "epoch": 0.07905413398840633, "grad_norm": 32.8125, "learning_rate": 9.987647795145932e-06, "loss": 27.1666, "step": 4260 }, { "epoch": 0.07923970707288615, "grad_norm": 35.3125, "learning_rate": 9.98761879935989e-06, "loss": 26.8474, "step": 4270 }, { "epoch": 0.07942528015736598, "grad_norm": 34.65625, "learning_rate": 9.987589803573847e-06, "loss": 27.1325, "step": 4280 }, { "epoch": 0.0796108532418458, "grad_norm": 33.6875, "learning_rate": 9.987560807787805e-06, "loss": 27.2594, "step": 4290 }, { "epoch": 0.07979642632632562, "grad_norm": 33.1875, "learning_rate": 9.987531812001762e-06, "loss": 26.8724, "step": 4300 }, { "epoch": 0.07998199941080546, "grad_norm": 35.375, "learning_rate": 9.987502816215721e-06, "loss": 27.1834, "step": 4310 }, { "epoch": 0.08016757249528528, "grad_norm": 33.6875, "learning_rate": 9.987473820429677e-06, "loss": 26.8924, "step": 4320 }, { "epoch": 0.08035314557976511, "grad_norm": 33.90625, "learning_rate": 9.987444824643634e-06, "loss": 26.6646, "step": 4330 }, { "epoch": 0.08053871866424493, "grad_norm": 32.59375, "learning_rate": 9.987415828857593e-06, "loss": 26.7559, "step": 4340 }, { "epoch": 0.08072429174872477, "grad_norm": 34.90625, "learning_rate": 9.987386833071551e-06, "loss": 27.3586, "step": 4350 }, { "epoch": 0.08090986483320459, "grad_norm": 33.90625, "learning_rate": 9.987357837285508e-06, "loss": 26.9466, "step": 4360 }, { "epoch": 0.08109543791768442, "grad_norm": 35.4375, "learning_rate": 9.987328841499466e-06, "loss": 27.0701, "step": 4370 }, { "epoch": 0.08128101100216424, "grad_norm": 35.4375, "learning_rate": 9.987299845713423e-06, "loss": 26.6192, "step": 4380 }, { "epoch": 0.08146658408664408, "grad_norm": 35.34375, "learning_rate": 9.98727084992738e-06, "loss": 26.7397, "step": 4390 }, { "epoch": 0.0816521571711239, "grad_norm": 33.15625, "learning_rate": 9.987241854141338e-06, "loss": 27.2757, "step": 4400 }, { "epoch": 0.08183773025560373, "grad_norm": 34.21875, "learning_rate": 9.987212858355297e-06, "loss": 27.0461, "step": 4410 }, { "epoch": 0.08202330334008355, "grad_norm": 35.15625, "learning_rate": 9.987183862569254e-06, "loss": 26.959, "step": 4420 }, { "epoch": 0.08220887642456338, "grad_norm": 32.15625, "learning_rate": 9.98715486678321e-06, "loss": 26.739, "step": 4430 }, { "epoch": 0.0823944495090432, "grad_norm": 32.625, "learning_rate": 9.98712587099717e-06, "loss": 26.9358, "step": 4440 }, { "epoch": 0.08258002259352304, "grad_norm": 33.6875, "learning_rate": 9.987096875211127e-06, "loss": 26.798, "step": 4450 }, { "epoch": 0.08276559567800286, "grad_norm": 33.4375, "learning_rate": 9.987067879425084e-06, "loss": 27.0618, "step": 4460 }, { "epoch": 0.0829511687624827, "grad_norm": 33.15625, "learning_rate": 9.987038883639041e-06, "loss": 26.6068, "step": 4470 }, { "epoch": 0.08313674184696251, "grad_norm": 34.4375, "learning_rate": 9.987009887852999e-06, "loss": 26.5002, "step": 4480 }, { "epoch": 0.08332231493144235, "grad_norm": 32.96875, "learning_rate": 9.986980892066956e-06, "loss": 27.0752, "step": 4490 }, { "epoch": 0.08350788801592217, "grad_norm": 34.625, "learning_rate": 9.986951896280914e-06, "loss": 27.043, "step": 4500 }, { "epoch": 0.083693461100402, "grad_norm": 32.96875, "learning_rate": 9.986922900494873e-06, "loss": 26.7434, "step": 4510 }, { "epoch": 0.08387903418488182, "grad_norm": 33.96875, "learning_rate": 9.98689390470883e-06, "loss": 26.9011, "step": 4520 }, { "epoch": 0.08406460726936166, "grad_norm": 34.75, "learning_rate": 9.986864908922788e-06, "loss": 27.0535, "step": 4530 }, { "epoch": 0.08425018035384148, "grad_norm": 32.59375, "learning_rate": 9.986835913136745e-06, "loss": 26.9796, "step": 4540 }, { "epoch": 0.08443575343832131, "grad_norm": 32.71875, "learning_rate": 9.986806917350702e-06, "loss": 26.9757, "step": 4550 }, { "epoch": 0.08462132652280113, "grad_norm": 33.625, "learning_rate": 9.98677792156466e-06, "loss": 26.684, "step": 4560 }, { "epoch": 0.08480689960728097, "grad_norm": 33.90625, "learning_rate": 9.986748925778617e-06, "loss": 26.4476, "step": 4570 }, { "epoch": 0.08499247269176079, "grad_norm": 33.15625, "learning_rate": 9.986719929992575e-06, "loss": 26.6824, "step": 4580 }, { "epoch": 0.08517804577624061, "grad_norm": 34.78125, "learning_rate": 9.986690934206532e-06, "loss": 26.9472, "step": 4590 }, { "epoch": 0.08536361886072044, "grad_norm": 34.25, "learning_rate": 9.98666193842049e-06, "loss": 27.1505, "step": 4600 }, { "epoch": 0.08554919194520026, "grad_norm": 33.1875, "learning_rate": 9.986632942634449e-06, "loss": 26.9236, "step": 4610 }, { "epoch": 0.0857347650296801, "grad_norm": 34.71875, "learning_rate": 9.986603946848406e-06, "loss": 27.303, "step": 4620 }, { "epoch": 0.08592033811415992, "grad_norm": 34.09375, "learning_rate": 9.986574951062363e-06, "loss": 26.6816, "step": 4630 }, { "epoch": 0.08610591119863975, "grad_norm": 34.9375, "learning_rate": 9.98654595527632e-06, "loss": 26.5109, "step": 4640 }, { "epoch": 0.08629148428311957, "grad_norm": 32.59375, "learning_rate": 9.986516959490278e-06, "loss": 26.4418, "step": 4650 }, { "epoch": 0.0864770573675994, "grad_norm": 33.8125, "learning_rate": 9.986487963704236e-06, "loss": 26.6267, "step": 4660 }, { "epoch": 0.08666263045207923, "grad_norm": 33.9375, "learning_rate": 9.986458967918193e-06, "loss": 26.784, "step": 4670 }, { "epoch": 0.08684820353655906, "grad_norm": 33.78125, "learning_rate": 9.98642997213215e-06, "loss": 26.6958, "step": 4680 }, { "epoch": 0.08703377662103888, "grad_norm": 33.4375, "learning_rate": 9.98640097634611e-06, "loss": 26.3625, "step": 4690 }, { "epoch": 0.08721934970551871, "grad_norm": 31.953125, "learning_rate": 9.986371980560065e-06, "loss": 26.9694, "step": 4700 }, { "epoch": 0.08740492278999853, "grad_norm": 33.59375, "learning_rate": 9.986342984774023e-06, "loss": 26.4038, "step": 4710 }, { "epoch": 0.08759049587447837, "grad_norm": 32.75, "learning_rate": 9.986313988987982e-06, "loss": 26.7672, "step": 4720 }, { "epoch": 0.08777606895895819, "grad_norm": 34.375, "learning_rate": 9.986284993201939e-06, "loss": 26.8111, "step": 4730 }, { "epoch": 0.08796164204343802, "grad_norm": 34.0625, "learning_rate": 9.986255997415897e-06, "loss": 26.6389, "step": 4740 }, { "epoch": 0.08814721512791784, "grad_norm": 34.4375, "learning_rate": 9.986227001629854e-06, "loss": 26.8616, "step": 4750 }, { "epoch": 0.08833278821239768, "grad_norm": 33.34375, "learning_rate": 9.986198005843811e-06, "loss": 26.621, "step": 4760 }, { "epoch": 0.0885183612968775, "grad_norm": 31.96875, "learning_rate": 9.986169010057769e-06, "loss": 26.5185, "step": 4770 }, { "epoch": 0.08870393438135733, "grad_norm": 32.5625, "learning_rate": 9.986140014271726e-06, "loss": 26.8874, "step": 4780 }, { "epoch": 0.08888950746583715, "grad_norm": 34.03125, "learning_rate": 9.986111018485685e-06, "loss": 26.7579, "step": 4790 }, { "epoch": 0.08907508055031699, "grad_norm": 35.96875, "learning_rate": 9.986082022699641e-06, "loss": 26.8933, "step": 4800 }, { "epoch": 0.08926065363479681, "grad_norm": 35.03125, "learning_rate": 9.986053026913598e-06, "loss": 26.5203, "step": 4810 }, { "epoch": 0.08944622671927664, "grad_norm": 35.1875, "learning_rate": 9.986024031127557e-06, "loss": 26.714, "step": 4820 }, { "epoch": 0.08963179980375646, "grad_norm": 34.78125, "learning_rate": 9.985995035341515e-06, "loss": 26.6803, "step": 4830 }, { "epoch": 0.0898173728882363, "grad_norm": 37.03125, "learning_rate": 9.985966039555472e-06, "loss": 26.7259, "step": 4840 }, { "epoch": 0.09000294597271612, "grad_norm": 34.28125, "learning_rate": 9.98593704376943e-06, "loss": 27.119, "step": 4850 }, { "epoch": 0.09018851905719595, "grad_norm": 33.65625, "learning_rate": 9.985908047983387e-06, "loss": 26.4196, "step": 4860 }, { "epoch": 0.09037409214167577, "grad_norm": 33.84375, "learning_rate": 9.985879052197344e-06, "loss": 26.3284, "step": 4870 }, { "epoch": 0.09055966522615559, "grad_norm": 32.84375, "learning_rate": 9.985850056411302e-06, "loss": 26.8074, "step": 4880 }, { "epoch": 0.09074523831063543, "grad_norm": 34.15625, "learning_rate": 9.985821060625261e-06, "loss": 26.4722, "step": 4890 }, { "epoch": 0.09093081139511525, "grad_norm": 33.125, "learning_rate": 9.985792064839218e-06, "loss": 26.3422, "step": 4900 }, { "epoch": 0.09111638447959508, "grad_norm": 33.75, "learning_rate": 9.985763069053174e-06, "loss": 26.2426, "step": 4910 }, { "epoch": 0.0913019575640749, "grad_norm": 34.1875, "learning_rate": 9.985734073267133e-06, "loss": 26.4652, "step": 4920 }, { "epoch": 0.09148753064855474, "grad_norm": 34.125, "learning_rate": 9.98570507748109e-06, "loss": 26.6846, "step": 4930 }, { "epoch": 0.09167310373303456, "grad_norm": 32.15625, "learning_rate": 9.985676081695048e-06, "loss": 26.6221, "step": 4940 }, { "epoch": 0.09185867681751439, "grad_norm": 32.75, "learning_rate": 9.985647085909005e-06, "loss": 26.6039, "step": 4950 }, { "epoch": 0.09204424990199421, "grad_norm": 34.15625, "learning_rate": 9.985618090122965e-06, "loss": 26.6058, "step": 4960 }, { "epoch": 0.09222982298647404, "grad_norm": 34.75, "learning_rate": 9.98558909433692e-06, "loss": 26.4243, "step": 4970 }, { "epoch": 0.09241539607095386, "grad_norm": 35.875, "learning_rate": 9.985560098550878e-06, "loss": 27.0457, "step": 4980 }, { "epoch": 0.0926009691554337, "grad_norm": 34.75, "learning_rate": 9.985531102764837e-06, "loss": 26.3863, "step": 4990 }, { "epoch": 0.09278654223991352, "grad_norm": 35.3125, "learning_rate": 9.985502106978794e-06, "loss": 26.6178, "step": 5000 }, { "epoch": 0.09278654223991352, "eval_loss": 3.30993914604187, "eval_runtime": 453.9307, "eval_samples_per_second": 3198.984, "eval_steps_per_second": 49.986, "step": 5000 }, { "epoch": 0.09297211532439335, "grad_norm": 36.4375, "learning_rate": 9.985473111192752e-06, "loss": 26.5918, "step": 5010 }, { "epoch": 0.09315768840887317, "grad_norm": 33.40625, "learning_rate": 9.985444115406709e-06, "loss": 26.9783, "step": 5020 }, { "epoch": 0.09334326149335301, "grad_norm": 36.59375, "learning_rate": 9.985415119620666e-06, "loss": 25.9906, "step": 5030 }, { "epoch": 0.09352883457783283, "grad_norm": 31.4375, "learning_rate": 9.985386123834624e-06, "loss": 25.8641, "step": 5040 }, { "epoch": 0.09371440766231266, "grad_norm": 35.0625, "learning_rate": 9.985357128048581e-06, "loss": 26.4401, "step": 5050 }, { "epoch": 0.09389998074679248, "grad_norm": 34.28125, "learning_rate": 9.98532813226254e-06, "loss": 26.3189, "step": 5060 }, { "epoch": 0.09408555383127232, "grad_norm": 33.1875, "learning_rate": 9.985299136476496e-06, "loss": 26.5397, "step": 5070 }, { "epoch": 0.09427112691575214, "grad_norm": 34.46875, "learning_rate": 9.985270140690453e-06, "loss": 26.5433, "step": 5080 }, { "epoch": 0.09445670000023197, "grad_norm": 35.75, "learning_rate": 9.985241144904413e-06, "loss": 26.7638, "step": 5090 }, { "epoch": 0.09464227308471179, "grad_norm": 33.0625, "learning_rate": 9.98521214911837e-06, "loss": 26.7023, "step": 5100 }, { "epoch": 0.09482784616919163, "grad_norm": 33.40625, "learning_rate": 9.985183153332327e-06, "loss": 26.4865, "step": 5110 }, { "epoch": 0.09501341925367145, "grad_norm": 34.1875, "learning_rate": 9.985154157546285e-06, "loss": 26.0229, "step": 5120 }, { "epoch": 0.09519899233815128, "grad_norm": 35.0625, "learning_rate": 9.985125161760242e-06, "loss": 26.5095, "step": 5130 }, { "epoch": 0.0953845654226311, "grad_norm": 33.25, "learning_rate": 9.9850961659742e-06, "loss": 26.9945, "step": 5140 }, { "epoch": 0.09557013850711094, "grad_norm": 34.8125, "learning_rate": 9.985067170188157e-06, "loss": 26.3212, "step": 5150 }, { "epoch": 0.09575571159159076, "grad_norm": 35.53125, "learning_rate": 9.985038174402114e-06, "loss": 26.4003, "step": 5160 }, { "epoch": 0.09594128467607058, "grad_norm": 34.25, "learning_rate": 9.985009178616073e-06, "loss": 26.3438, "step": 5170 }, { "epoch": 0.09612685776055041, "grad_norm": 34.25, "learning_rate": 9.98498018283003e-06, "loss": 26.6637, "step": 5180 }, { "epoch": 0.09631243084503023, "grad_norm": 34.90625, "learning_rate": 9.984951187043988e-06, "loss": 26.7155, "step": 5190 }, { "epoch": 0.09649800392951007, "grad_norm": 35.0, "learning_rate": 9.984922191257946e-06, "loss": 26.299, "step": 5200 }, { "epoch": 0.09668357701398989, "grad_norm": 34.6875, "learning_rate": 9.984893195471903e-06, "loss": 26.5553, "step": 5210 }, { "epoch": 0.09686915009846972, "grad_norm": 35.125, "learning_rate": 9.98486419968586e-06, "loss": 26.0765, "step": 5220 }, { "epoch": 0.09705472318294954, "grad_norm": 35.25, "learning_rate": 9.984835203899818e-06, "loss": 26.0202, "step": 5230 }, { "epoch": 0.09724029626742937, "grad_norm": 33.59375, "learning_rate": 9.984806208113775e-06, "loss": 26.4129, "step": 5240 }, { "epoch": 0.0974258693519092, "grad_norm": 34.78125, "learning_rate": 9.984777212327733e-06, "loss": 25.8033, "step": 5250 }, { "epoch": 0.09761144243638903, "grad_norm": 33.90625, "learning_rate": 9.98474821654169e-06, "loss": 26.5802, "step": 5260 }, { "epoch": 0.09779701552086885, "grad_norm": 34.0, "learning_rate": 9.98471922075565e-06, "loss": 26.2729, "step": 5270 }, { "epoch": 0.09798258860534868, "grad_norm": 33.46875, "learning_rate": 9.984690224969607e-06, "loss": 26.3926, "step": 5280 }, { "epoch": 0.0981681616898285, "grad_norm": 33.3125, "learning_rate": 9.984661229183562e-06, "loss": 26.2984, "step": 5290 }, { "epoch": 0.09835373477430834, "grad_norm": 32.375, "learning_rate": 9.984632233397521e-06, "loss": 26.0385, "step": 5300 }, { "epoch": 0.09853930785878816, "grad_norm": 34.78125, "learning_rate": 9.984603237611479e-06, "loss": 26.2984, "step": 5310 }, { "epoch": 0.09872488094326799, "grad_norm": 34.65625, "learning_rate": 9.984574241825436e-06, "loss": 26.4943, "step": 5320 }, { "epoch": 0.09891045402774781, "grad_norm": 33.84375, "learning_rate": 9.984545246039394e-06, "loss": 26.3305, "step": 5330 }, { "epoch": 0.09909602711222765, "grad_norm": 32.9375, "learning_rate": 9.984516250253351e-06, "loss": 26.4073, "step": 5340 }, { "epoch": 0.09928160019670747, "grad_norm": 34.5, "learning_rate": 9.984487254467309e-06, "loss": 26.3598, "step": 5350 }, { "epoch": 0.0994671732811873, "grad_norm": 33.78125, "learning_rate": 9.984458258681266e-06, "loss": 26.3925, "step": 5360 }, { "epoch": 0.09965274636566712, "grad_norm": 32.75, "learning_rate": 9.984429262895225e-06, "loss": 26.1651, "step": 5370 }, { "epoch": 0.09983831945014696, "grad_norm": 34.84375, "learning_rate": 9.984400267109182e-06, "loss": 25.8864, "step": 5380 }, { "epoch": 0.10002389253462678, "grad_norm": 33.0, "learning_rate": 9.984371271323138e-06, "loss": 25.8829, "step": 5390 }, { "epoch": 0.10020946561910661, "grad_norm": 34.46875, "learning_rate": 9.984342275537097e-06, "loss": 26.1825, "step": 5400 }, { "epoch": 0.10039503870358643, "grad_norm": 33.71875, "learning_rate": 9.984313279751055e-06, "loss": 26.0618, "step": 5410 }, { "epoch": 0.10058061178806627, "grad_norm": 33.75, "learning_rate": 9.984284283965012e-06, "loss": 26.651, "step": 5420 }, { "epoch": 0.10076618487254609, "grad_norm": 34.53125, "learning_rate": 9.98425528817897e-06, "loss": 26.0505, "step": 5430 }, { "epoch": 0.1009517579570259, "grad_norm": 34.25, "learning_rate": 9.984226292392929e-06, "loss": 26.0324, "step": 5440 }, { "epoch": 0.10113733104150574, "grad_norm": 33.21875, "learning_rate": 9.984197296606884e-06, "loss": 26.7206, "step": 5450 }, { "epoch": 0.10132290412598556, "grad_norm": 32.25, "learning_rate": 9.984168300820842e-06, "loss": 26.1656, "step": 5460 }, { "epoch": 0.1015084772104654, "grad_norm": 33.25, "learning_rate": 9.9841393050348e-06, "loss": 26.2063, "step": 5470 }, { "epoch": 0.10169405029494522, "grad_norm": 35.71875, "learning_rate": 9.984110309248758e-06, "loss": 25.8801, "step": 5480 }, { "epoch": 0.10187962337942505, "grad_norm": 33.59375, "learning_rate": 9.984081313462716e-06, "loss": 25.962, "step": 5490 }, { "epoch": 0.10206519646390487, "grad_norm": 33.78125, "learning_rate": 9.984052317676673e-06, "loss": 25.9794, "step": 5500 }, { "epoch": 0.1022507695483847, "grad_norm": 35.0625, "learning_rate": 9.98402332189063e-06, "loss": 25.5394, "step": 5510 }, { "epoch": 0.10243634263286452, "grad_norm": 33.28125, "learning_rate": 9.983994326104588e-06, "loss": 26.8701, "step": 5520 }, { "epoch": 0.10262191571734436, "grad_norm": 35.28125, "learning_rate": 9.983965330318545e-06, "loss": 25.7336, "step": 5530 }, { "epoch": 0.10280748880182418, "grad_norm": 33.65625, "learning_rate": 9.983936334532504e-06, "loss": 26.2723, "step": 5540 }, { "epoch": 0.10299306188630401, "grad_norm": 34.25, "learning_rate": 9.983907338746462e-06, "loss": 25.902, "step": 5550 }, { "epoch": 0.10317863497078383, "grad_norm": 34.03125, "learning_rate": 9.983878342960417e-06, "loss": 26.0357, "step": 5560 }, { "epoch": 0.10336420805526367, "grad_norm": 34.9375, "learning_rate": 9.983849347174377e-06, "loss": 25.7288, "step": 5570 }, { "epoch": 0.10354978113974349, "grad_norm": 36.125, "learning_rate": 9.983820351388334e-06, "loss": 26.208, "step": 5580 }, { "epoch": 0.10373535422422332, "grad_norm": 33.21875, "learning_rate": 9.983791355602291e-06, "loss": 26.1542, "step": 5590 }, { "epoch": 0.10392092730870314, "grad_norm": 33.9375, "learning_rate": 9.983762359816249e-06, "loss": 25.7529, "step": 5600 }, { "epoch": 0.10410650039318298, "grad_norm": 33.59375, "learning_rate": 9.983733364030206e-06, "loss": 26.1341, "step": 5610 }, { "epoch": 0.1042920734776628, "grad_norm": 34.34375, "learning_rate": 9.983704368244164e-06, "loss": 25.8274, "step": 5620 }, { "epoch": 0.10447764656214263, "grad_norm": 34.28125, "learning_rate": 9.983675372458121e-06, "loss": 26.0646, "step": 5630 }, { "epoch": 0.10466321964662245, "grad_norm": 33.71875, "learning_rate": 9.98364637667208e-06, "loss": 25.8354, "step": 5640 }, { "epoch": 0.10484879273110229, "grad_norm": 34.4375, "learning_rate": 9.983617380886037e-06, "loss": 25.8377, "step": 5650 }, { "epoch": 0.1050343658155821, "grad_norm": 33.5625, "learning_rate": 9.983588385099993e-06, "loss": 25.8508, "step": 5660 }, { "epoch": 0.10521993890006194, "grad_norm": 34.84375, "learning_rate": 9.983559389313952e-06, "loss": 26.1233, "step": 5670 }, { "epoch": 0.10540551198454176, "grad_norm": 34.1875, "learning_rate": 9.98353039352791e-06, "loss": 25.8028, "step": 5680 }, { "epoch": 0.1055910850690216, "grad_norm": 34.5, "learning_rate": 9.983501397741867e-06, "loss": 25.8982, "step": 5690 }, { "epoch": 0.10577665815350142, "grad_norm": 34.78125, "learning_rate": 9.983472401955825e-06, "loss": 26.0658, "step": 5700 }, { "epoch": 0.10596223123798125, "grad_norm": 33.0625, "learning_rate": 9.983443406169782e-06, "loss": 25.9276, "step": 5710 }, { "epoch": 0.10614780432246107, "grad_norm": 34.5625, "learning_rate": 9.98341441038374e-06, "loss": 25.9212, "step": 5720 }, { "epoch": 0.10633337740694089, "grad_norm": 33.8125, "learning_rate": 9.983385414597697e-06, "loss": 26.3133, "step": 5730 }, { "epoch": 0.10651895049142072, "grad_norm": 33.84375, "learning_rate": 9.983356418811654e-06, "loss": 26.1589, "step": 5740 }, { "epoch": 0.10670452357590055, "grad_norm": 33.34375, "learning_rate": 9.983327423025613e-06, "loss": 25.4855, "step": 5750 }, { "epoch": 0.10689009666038038, "grad_norm": 35.09375, "learning_rate": 9.98329842723957e-06, "loss": 25.9951, "step": 5760 }, { "epoch": 0.1070756697448602, "grad_norm": 31.828125, "learning_rate": 9.983269431453526e-06, "loss": 25.3597, "step": 5770 }, { "epoch": 0.10726124282934003, "grad_norm": 34.96875, "learning_rate": 9.983240435667485e-06, "loss": 25.9633, "step": 5780 }, { "epoch": 0.10744681591381985, "grad_norm": 36.96875, "learning_rate": 9.983211439881443e-06, "loss": 25.3149, "step": 5790 }, { "epoch": 0.10763238899829969, "grad_norm": 31.65625, "learning_rate": 9.9831824440954e-06, "loss": 25.7088, "step": 5800 }, { "epoch": 0.10781796208277951, "grad_norm": 34.65625, "learning_rate": 9.983153448309358e-06, "loss": 25.7903, "step": 5810 }, { "epoch": 0.10800353516725934, "grad_norm": 34.09375, "learning_rate": 9.983124452523315e-06, "loss": 26.3276, "step": 5820 }, { "epoch": 0.10818910825173916, "grad_norm": 34.46875, "learning_rate": 9.983095456737273e-06, "loss": 26.1414, "step": 5830 }, { "epoch": 0.108374681336219, "grad_norm": 33.53125, "learning_rate": 9.98306646095123e-06, "loss": 26.2068, "step": 5840 }, { "epoch": 0.10856025442069882, "grad_norm": 33.1875, "learning_rate": 9.983037465165189e-06, "loss": 26.2536, "step": 5850 }, { "epoch": 0.10874582750517865, "grad_norm": 35.25, "learning_rate": 9.983008469379146e-06, "loss": 25.2238, "step": 5860 }, { "epoch": 0.10893140058965847, "grad_norm": 34.84375, "learning_rate": 9.982979473593104e-06, "loss": 25.7536, "step": 5870 }, { "epoch": 0.1091169736741383, "grad_norm": 34.125, "learning_rate": 9.982950477807061e-06, "loss": 25.6309, "step": 5880 }, { "epoch": 0.10930254675861813, "grad_norm": 34.09375, "learning_rate": 9.982921482021019e-06, "loss": 25.5945, "step": 5890 }, { "epoch": 0.10948811984309796, "grad_norm": 33.90625, "learning_rate": 9.982892486234976e-06, "loss": 26.1952, "step": 5900 }, { "epoch": 0.10967369292757778, "grad_norm": 34.0625, "learning_rate": 9.982863490448933e-06, "loss": 25.4504, "step": 5910 }, { "epoch": 0.10985926601205762, "grad_norm": 35.0, "learning_rate": 9.982834494662893e-06, "loss": 26.2064, "step": 5920 }, { "epoch": 0.11004483909653744, "grad_norm": 34.6875, "learning_rate": 9.982805498876848e-06, "loss": 25.6694, "step": 5930 }, { "epoch": 0.11023041218101727, "grad_norm": 35.78125, "learning_rate": 9.982776503090806e-06, "loss": 25.8267, "step": 5940 }, { "epoch": 0.11041598526549709, "grad_norm": 33.75, "learning_rate": 9.982747507304765e-06, "loss": 25.7375, "step": 5950 }, { "epoch": 0.11060155834997693, "grad_norm": 34.09375, "learning_rate": 9.982718511518722e-06, "loss": 25.8796, "step": 5960 }, { "epoch": 0.11078713143445675, "grad_norm": 33.75, "learning_rate": 9.98268951573268e-06, "loss": 25.7719, "step": 5970 }, { "epoch": 0.11097270451893658, "grad_norm": 34.0, "learning_rate": 9.982660519946637e-06, "loss": 25.3165, "step": 5980 }, { "epoch": 0.1111582776034164, "grad_norm": 34.0625, "learning_rate": 9.982631524160594e-06, "loss": 25.6139, "step": 5990 }, { "epoch": 0.11134385068789623, "grad_norm": 33.84375, "learning_rate": 9.982602528374552e-06, "loss": 25.5529, "step": 6000 }, { "epoch": 0.11152942377237605, "grad_norm": 33.125, "learning_rate": 9.98257353258851e-06, "loss": 25.9314, "step": 6010 }, { "epoch": 0.11171499685685587, "grad_norm": 35.84375, "learning_rate": 9.982544536802468e-06, "loss": 25.7233, "step": 6020 }, { "epoch": 0.11190056994133571, "grad_norm": 36.4375, "learning_rate": 9.982515541016426e-06, "loss": 26.4676, "step": 6030 }, { "epoch": 0.11208614302581553, "grad_norm": 33.78125, "learning_rate": 9.982486545230381e-06, "loss": 25.4413, "step": 6040 }, { "epoch": 0.11227171611029536, "grad_norm": 34.65625, "learning_rate": 9.98245754944434e-06, "loss": 25.6047, "step": 6050 }, { "epoch": 0.11245728919477518, "grad_norm": 33.8125, "learning_rate": 9.982428553658298e-06, "loss": 25.6781, "step": 6060 }, { "epoch": 0.11264286227925502, "grad_norm": 33.1875, "learning_rate": 9.982399557872255e-06, "loss": 25.7205, "step": 6070 }, { "epoch": 0.11282843536373484, "grad_norm": 35.84375, "learning_rate": 9.982370562086213e-06, "loss": 25.4099, "step": 6080 }, { "epoch": 0.11301400844821467, "grad_norm": 35.5625, "learning_rate": 9.98234156630017e-06, "loss": 25.7561, "step": 6090 }, { "epoch": 0.1131995815326945, "grad_norm": 35.25, "learning_rate": 9.982312570514128e-06, "loss": 25.8102, "step": 6100 }, { "epoch": 0.11338515461717433, "grad_norm": 34.375, "learning_rate": 9.982283574728085e-06, "loss": 26.0634, "step": 6110 }, { "epoch": 0.11357072770165415, "grad_norm": 33.71875, "learning_rate": 9.982254578942044e-06, "loss": 25.6976, "step": 6120 }, { "epoch": 0.11375630078613398, "grad_norm": 34.53125, "learning_rate": 9.982225583156002e-06, "loss": 25.4026, "step": 6130 }, { "epoch": 0.1139418738706138, "grad_norm": 34.21875, "learning_rate": 9.982196587369959e-06, "loss": 25.6856, "step": 6140 }, { "epoch": 0.11412744695509364, "grad_norm": 34.8125, "learning_rate": 9.982167591583916e-06, "loss": 25.3255, "step": 6150 }, { "epoch": 0.11431302003957346, "grad_norm": 33.40625, "learning_rate": 9.982138595797874e-06, "loss": 25.7649, "step": 6160 }, { "epoch": 0.11449859312405329, "grad_norm": 35.34375, "learning_rate": 9.982109600011831e-06, "loss": 25.6869, "step": 6170 }, { "epoch": 0.11468416620853311, "grad_norm": 33.625, "learning_rate": 9.982080604225789e-06, "loss": 25.7105, "step": 6180 }, { "epoch": 0.11486973929301295, "grad_norm": 35.0, "learning_rate": 9.982051608439746e-06, "loss": 25.6583, "step": 6190 }, { "epoch": 0.11505531237749277, "grad_norm": 37.5625, "learning_rate": 9.982022612653703e-06, "loss": 25.4228, "step": 6200 }, { "epoch": 0.1152408854619726, "grad_norm": 34.90625, "learning_rate": 9.98199361686766e-06, "loss": 25.4304, "step": 6210 }, { "epoch": 0.11542645854645242, "grad_norm": 33.65625, "learning_rate": 9.981964621081618e-06, "loss": 25.5321, "step": 6220 }, { "epoch": 0.11561203163093225, "grad_norm": 33.28125, "learning_rate": 9.981935625295577e-06, "loss": 25.7566, "step": 6230 }, { "epoch": 0.11579760471541208, "grad_norm": 34.09375, "learning_rate": 9.981906629509535e-06, "loss": 25.9302, "step": 6240 }, { "epoch": 0.11598317779989191, "grad_norm": 34.03125, "learning_rate": 9.981877633723492e-06, "loss": 25.7643, "step": 6250 }, { "epoch": 0.11616875088437173, "grad_norm": 35.25, "learning_rate": 9.98184863793745e-06, "loss": 26.0756, "step": 6260 }, { "epoch": 0.11635432396885156, "grad_norm": 34.125, "learning_rate": 9.981819642151407e-06, "loss": 25.436, "step": 6270 }, { "epoch": 0.11653989705333138, "grad_norm": 34.09375, "learning_rate": 9.981790646365364e-06, "loss": 25.6467, "step": 6280 }, { "epoch": 0.11672547013781122, "grad_norm": 35.78125, "learning_rate": 9.981761650579322e-06, "loss": 25.363, "step": 6290 }, { "epoch": 0.11691104322229104, "grad_norm": 33.6875, "learning_rate": 9.98173265479328e-06, "loss": 25.2482, "step": 6300 }, { "epoch": 0.11709661630677086, "grad_norm": 33.5625, "learning_rate": 9.981703659007237e-06, "loss": 25.3449, "step": 6310 }, { "epoch": 0.1172821893912507, "grad_norm": 36.0, "learning_rate": 9.981674663221194e-06, "loss": 25.2572, "step": 6320 }, { "epoch": 0.11746776247573051, "grad_norm": 37.09375, "learning_rate": 9.981645667435153e-06, "loss": 25.7097, "step": 6330 }, { "epoch": 0.11765333556021035, "grad_norm": 34.3125, "learning_rate": 9.98161667164911e-06, "loss": 25.2182, "step": 6340 }, { "epoch": 0.11783890864469017, "grad_norm": 34.6875, "learning_rate": 9.981587675863068e-06, "loss": 25.4479, "step": 6350 }, { "epoch": 0.11802448172917, "grad_norm": 35.46875, "learning_rate": 9.981558680077025e-06, "loss": 25.6011, "step": 6360 }, { "epoch": 0.11821005481364982, "grad_norm": 35.53125, "learning_rate": 9.981529684290983e-06, "loss": 25.4611, "step": 6370 }, { "epoch": 0.11839562789812966, "grad_norm": 35.0625, "learning_rate": 9.98150068850494e-06, "loss": 25.9445, "step": 6380 }, { "epoch": 0.11858120098260948, "grad_norm": 33.78125, "learning_rate": 9.981471692718897e-06, "loss": 25.1598, "step": 6390 }, { "epoch": 0.11876677406708931, "grad_norm": 33.75, "learning_rate": 9.981442696932857e-06, "loss": 25.446, "step": 6400 }, { "epoch": 0.11895234715156913, "grad_norm": 35.03125, "learning_rate": 9.981413701146812e-06, "loss": 26.0049, "step": 6410 }, { "epoch": 0.11913792023604897, "grad_norm": 33.40625, "learning_rate": 9.98138470536077e-06, "loss": 25.3657, "step": 6420 }, { "epoch": 0.11932349332052879, "grad_norm": 34.6875, "learning_rate": 9.981355709574729e-06, "loss": 24.9989, "step": 6430 }, { "epoch": 0.11950906640500862, "grad_norm": 35.21875, "learning_rate": 9.981326713788686e-06, "loss": 25.4673, "step": 6440 }, { "epoch": 0.11969463948948844, "grad_norm": 34.03125, "learning_rate": 9.981297718002644e-06, "loss": 25.665, "step": 6450 }, { "epoch": 0.11988021257396828, "grad_norm": 34.03125, "learning_rate": 9.981268722216601e-06, "loss": 25.6645, "step": 6460 }, { "epoch": 0.1200657856584481, "grad_norm": 32.5625, "learning_rate": 9.981239726430558e-06, "loss": 25.673, "step": 6470 }, { "epoch": 0.12025135874292793, "grad_norm": 34.53125, "learning_rate": 9.981210730644516e-06, "loss": 25.2111, "step": 6480 }, { "epoch": 0.12043693182740775, "grad_norm": 34.34375, "learning_rate": 9.981181734858473e-06, "loss": 25.3616, "step": 6490 }, { "epoch": 0.12062250491188758, "grad_norm": 33.9375, "learning_rate": 9.981152739072432e-06, "loss": 25.1297, "step": 6500 }, { "epoch": 0.1208080779963674, "grad_norm": 33.96875, "learning_rate": 9.98112374328639e-06, "loss": 25.5363, "step": 6510 }, { "epoch": 0.12099365108084724, "grad_norm": 33.65625, "learning_rate": 9.981094747500345e-06, "loss": 25.7005, "step": 6520 }, { "epoch": 0.12117922416532706, "grad_norm": 35.6875, "learning_rate": 9.981065751714305e-06, "loss": 25.1232, "step": 6530 }, { "epoch": 0.1213647972498069, "grad_norm": 33.75, "learning_rate": 9.981036755928262e-06, "loss": 25.5644, "step": 6540 }, { "epoch": 0.12155037033428671, "grad_norm": 33.0, "learning_rate": 9.98100776014222e-06, "loss": 25.1729, "step": 6550 }, { "epoch": 0.12173594341876655, "grad_norm": 35.0, "learning_rate": 9.980978764356177e-06, "loss": 25.5086, "step": 6560 }, { "epoch": 0.12192151650324637, "grad_norm": 35.9375, "learning_rate": 9.980949768570134e-06, "loss": 25.3133, "step": 6570 }, { "epoch": 0.1221070895877262, "grad_norm": 33.8125, "learning_rate": 9.980920772784092e-06, "loss": 25.1091, "step": 6580 }, { "epoch": 0.12229266267220602, "grad_norm": 34.21875, "learning_rate": 9.980891776998049e-06, "loss": 24.9665, "step": 6590 }, { "epoch": 0.12247823575668584, "grad_norm": 34.59375, "learning_rate": 9.980862781212008e-06, "loss": 25.2432, "step": 6600 }, { "epoch": 0.12266380884116568, "grad_norm": 33.03125, "learning_rate": 9.980833785425966e-06, "loss": 25.2702, "step": 6610 }, { "epoch": 0.1228493819256455, "grad_norm": 32.46875, "learning_rate": 9.980804789639923e-06, "loss": 25.2844, "step": 6620 }, { "epoch": 0.12303495501012533, "grad_norm": 33.65625, "learning_rate": 9.98077579385388e-06, "loss": 25.1786, "step": 6630 }, { "epoch": 0.12322052809460515, "grad_norm": 33.4375, "learning_rate": 9.980746798067838e-06, "loss": 25.7514, "step": 6640 }, { "epoch": 0.12340610117908499, "grad_norm": 35.34375, "learning_rate": 9.980717802281795e-06, "loss": 25.1487, "step": 6650 }, { "epoch": 0.12359167426356481, "grad_norm": 32.875, "learning_rate": 9.980688806495753e-06, "loss": 25.2307, "step": 6660 }, { "epoch": 0.12377724734804464, "grad_norm": 34.4375, "learning_rate": 9.98065981070971e-06, "loss": 25.7178, "step": 6670 }, { "epoch": 0.12396282043252446, "grad_norm": 34.8125, "learning_rate": 9.980630814923667e-06, "loss": 24.9469, "step": 6680 }, { "epoch": 0.1241483935170043, "grad_norm": 35.03125, "learning_rate": 9.980601819137625e-06, "loss": 25.533, "step": 6690 }, { "epoch": 0.12433396660148412, "grad_norm": 34.6875, "learning_rate": 9.980572823351584e-06, "loss": 25.157, "step": 6700 }, { "epoch": 0.12451953968596395, "grad_norm": 34.0, "learning_rate": 9.980543827565541e-06, "loss": 25.3136, "step": 6710 }, { "epoch": 0.12470511277044377, "grad_norm": 34.375, "learning_rate": 9.980514831779499e-06, "loss": 25.2316, "step": 6720 }, { "epoch": 0.1248906858549236, "grad_norm": 33.71875, "learning_rate": 9.980485835993456e-06, "loss": 25.6247, "step": 6730 }, { "epoch": 0.12507625893940344, "grad_norm": 33.15625, "learning_rate": 9.980456840207414e-06, "loss": 25.2158, "step": 6740 }, { "epoch": 0.12526183202388325, "grad_norm": 35.1875, "learning_rate": 9.980427844421371e-06, "loss": 25.0383, "step": 6750 }, { "epoch": 0.12544740510836308, "grad_norm": 34.25, "learning_rate": 9.980398848635328e-06, "loss": 25.5181, "step": 6760 }, { "epoch": 0.12563297819284291, "grad_norm": 33.21875, "learning_rate": 9.980369852849286e-06, "loss": 25.1087, "step": 6770 }, { "epoch": 0.12581855127732275, "grad_norm": 33.46875, "learning_rate": 9.980340857063245e-06, "loss": 25.3601, "step": 6780 }, { "epoch": 0.12600412436180256, "grad_norm": 37.03125, "learning_rate": 9.9803118612772e-06, "loss": 24.8354, "step": 6790 }, { "epoch": 0.1261896974462824, "grad_norm": 36.5625, "learning_rate": 9.980282865491158e-06, "loss": 25.282, "step": 6800 }, { "epoch": 0.12637527053076222, "grad_norm": 35.0625, "learning_rate": 9.980253869705117e-06, "loss": 25.3945, "step": 6810 }, { "epoch": 0.12656084361524206, "grad_norm": 35.96875, "learning_rate": 9.980224873919074e-06, "loss": 25.0788, "step": 6820 }, { "epoch": 0.12674641669972186, "grad_norm": 35.6875, "learning_rate": 9.980195878133032e-06, "loss": 26.2018, "step": 6830 }, { "epoch": 0.1269319897842017, "grad_norm": 32.65625, "learning_rate": 9.98016688234699e-06, "loss": 25.3735, "step": 6840 }, { "epoch": 0.12711756286868153, "grad_norm": 36.625, "learning_rate": 9.980137886560947e-06, "loss": 25.4337, "step": 6850 }, { "epoch": 0.12730313595316137, "grad_norm": 32.96875, "learning_rate": 9.980108890774904e-06, "loss": 25.6338, "step": 6860 }, { "epoch": 0.12748870903764117, "grad_norm": 34.53125, "learning_rate": 9.980079894988861e-06, "loss": 25.2205, "step": 6870 }, { "epoch": 0.127674282122121, "grad_norm": 34.59375, "learning_rate": 9.98005089920282e-06, "loss": 25.2487, "step": 6880 }, { "epoch": 0.12785985520660084, "grad_norm": 34.21875, "learning_rate": 9.980021903416778e-06, "loss": 25.2615, "step": 6890 }, { "epoch": 0.12804542829108065, "grad_norm": 33.6875, "learning_rate": 9.979992907630734e-06, "loss": 25.2916, "step": 6900 }, { "epoch": 0.12823100137556048, "grad_norm": 33.09375, "learning_rate": 9.979963911844693e-06, "loss": 25.4654, "step": 6910 }, { "epoch": 0.12841657446004032, "grad_norm": 33.96875, "learning_rate": 9.97993491605865e-06, "loss": 25.4176, "step": 6920 }, { "epoch": 0.12860214754452015, "grad_norm": 35.0625, "learning_rate": 9.979905920272608e-06, "loss": 25.1046, "step": 6930 }, { "epoch": 0.12878772062899996, "grad_norm": 36.375, "learning_rate": 9.979876924486565e-06, "loss": 25.1201, "step": 6940 }, { "epoch": 0.1289732937134798, "grad_norm": 34.875, "learning_rate": 9.979847928700522e-06, "loss": 25.3452, "step": 6950 }, { "epoch": 0.12915886679795963, "grad_norm": 35.25, "learning_rate": 9.97981893291448e-06, "loss": 25.0575, "step": 6960 }, { "epoch": 0.12934443988243946, "grad_norm": 35.125, "learning_rate": 9.979789937128437e-06, "loss": 25.0933, "step": 6970 }, { "epoch": 0.12953001296691927, "grad_norm": 32.625, "learning_rate": 9.979760941342396e-06, "loss": 24.9351, "step": 6980 }, { "epoch": 0.1297155860513991, "grad_norm": 35.15625, "learning_rate": 9.979731945556354e-06, "loss": 25.2236, "step": 6990 }, { "epoch": 0.12990115913587894, "grad_norm": 35.1875, "learning_rate": 9.97970294977031e-06, "loss": 25.3424, "step": 7000 }, { "epoch": 0.13008673222035877, "grad_norm": 35.1875, "learning_rate": 9.979673953984269e-06, "loss": 25.2847, "step": 7010 }, { "epoch": 0.13027230530483858, "grad_norm": 34.71875, "learning_rate": 9.979644958198226e-06, "loss": 25.0231, "step": 7020 }, { "epoch": 0.1304578783893184, "grad_norm": 34.375, "learning_rate": 9.979615962412183e-06, "loss": 24.5707, "step": 7030 }, { "epoch": 0.13064345147379824, "grad_norm": 35.3125, "learning_rate": 9.97958696662614e-06, "loss": 25.229, "step": 7040 }, { "epoch": 0.13082902455827808, "grad_norm": 35.75, "learning_rate": 9.9795579708401e-06, "loss": 25.3528, "step": 7050 }, { "epoch": 0.13101459764275789, "grad_norm": 35.71875, "learning_rate": 9.979528975054056e-06, "loss": 25.5605, "step": 7060 }, { "epoch": 0.13120017072723772, "grad_norm": 33.46875, "learning_rate": 9.979499979268013e-06, "loss": 25.0923, "step": 7070 }, { "epoch": 0.13138574381171755, "grad_norm": 33.1875, "learning_rate": 9.979470983481972e-06, "loss": 24.9616, "step": 7080 }, { "epoch": 0.1315713168961974, "grad_norm": 36.09375, "learning_rate": 9.97944198769593e-06, "loss": 24.8532, "step": 7090 }, { "epoch": 0.1317568899806772, "grad_norm": 33.875, "learning_rate": 9.979412991909887e-06, "loss": 25.032, "step": 7100 }, { "epoch": 0.13194246306515703, "grad_norm": 33.8125, "learning_rate": 9.979383996123844e-06, "loss": 25.1883, "step": 7110 }, { "epoch": 0.13212803614963686, "grad_norm": 33.75, "learning_rate": 9.979355000337802e-06, "loss": 25.6139, "step": 7120 }, { "epoch": 0.1323136092341167, "grad_norm": 36.46875, "learning_rate": 9.97932600455176e-06, "loss": 25.5543, "step": 7130 }, { "epoch": 0.1324991823185965, "grad_norm": 34.65625, "learning_rate": 9.979297008765717e-06, "loss": 24.9935, "step": 7140 }, { "epoch": 0.13268475540307634, "grad_norm": 31.921875, "learning_rate": 9.979268012979676e-06, "loss": 25.0828, "step": 7150 }, { "epoch": 0.13287032848755617, "grad_norm": 34.71875, "learning_rate": 9.979239017193631e-06, "loss": 24.9755, "step": 7160 }, { "epoch": 0.13305590157203598, "grad_norm": 33.46875, "learning_rate": 9.979210021407589e-06, "loss": 24.9258, "step": 7170 }, { "epoch": 0.1332414746565158, "grad_norm": 35.6875, "learning_rate": 9.979181025621548e-06, "loss": 24.8584, "step": 7180 }, { "epoch": 0.13342704774099565, "grad_norm": 34.46875, "learning_rate": 9.979152029835505e-06, "loss": 25.5047, "step": 7190 }, { "epoch": 0.13361262082547548, "grad_norm": 34.09375, "learning_rate": 9.979123034049463e-06, "loss": 24.9631, "step": 7200 }, { "epoch": 0.1337981939099553, "grad_norm": 34.40625, "learning_rate": 9.97909403826342e-06, "loss": 25.2121, "step": 7210 }, { "epoch": 0.13398376699443512, "grad_norm": 35.03125, "learning_rate": 9.979065042477378e-06, "loss": 25.1507, "step": 7220 }, { "epoch": 0.13416934007891496, "grad_norm": 36.09375, "learning_rate": 9.979036046691335e-06, "loss": 24.8446, "step": 7230 }, { "epoch": 0.1343549131633948, "grad_norm": 35.8125, "learning_rate": 9.979007050905292e-06, "loss": 24.8777, "step": 7240 }, { "epoch": 0.1345404862478746, "grad_norm": 34.09375, "learning_rate": 9.97897805511925e-06, "loss": 24.8601, "step": 7250 }, { "epoch": 0.13472605933235443, "grad_norm": 33.46875, "learning_rate": 9.978949059333209e-06, "loss": 25.2262, "step": 7260 }, { "epoch": 0.13491163241683427, "grad_norm": 33.65625, "learning_rate": 9.978920063547165e-06, "loss": 24.8236, "step": 7270 }, { "epoch": 0.1350972055013141, "grad_norm": 35.6875, "learning_rate": 9.978891067761124e-06, "loss": 25.0613, "step": 7280 }, { "epoch": 0.1352827785857939, "grad_norm": 35.0625, "learning_rate": 9.978862071975081e-06, "loss": 24.7087, "step": 7290 }, { "epoch": 0.13546835167027374, "grad_norm": 36.71875, "learning_rate": 9.978833076189038e-06, "loss": 24.6291, "step": 7300 }, { "epoch": 0.13565392475475357, "grad_norm": 33.71875, "learning_rate": 9.978804080402996e-06, "loss": 24.9982, "step": 7310 }, { "epoch": 0.1358394978392334, "grad_norm": 34.40625, "learning_rate": 9.978775084616953e-06, "loss": 24.1237, "step": 7320 }, { "epoch": 0.13602507092371321, "grad_norm": 34.125, "learning_rate": 9.97874608883091e-06, "loss": 24.5642, "step": 7330 }, { "epoch": 0.13621064400819305, "grad_norm": 34.15625, "learning_rate": 9.978717093044868e-06, "loss": 24.9478, "step": 7340 }, { "epoch": 0.13639621709267288, "grad_norm": 34.15625, "learning_rate": 9.978688097258826e-06, "loss": 24.5942, "step": 7350 }, { "epoch": 0.13658179017715272, "grad_norm": 35.6875, "learning_rate": 9.978659101472785e-06, "loss": 24.7719, "step": 7360 }, { "epoch": 0.13676736326163252, "grad_norm": 33.71875, "learning_rate": 9.978630105686742e-06, "loss": 24.4966, "step": 7370 }, { "epoch": 0.13695293634611236, "grad_norm": 36.28125, "learning_rate": 9.978601109900698e-06, "loss": 24.8462, "step": 7380 }, { "epoch": 0.1371385094305922, "grad_norm": 34.8125, "learning_rate": 9.978572114114657e-06, "loss": 25.045, "step": 7390 }, { "epoch": 0.13732408251507203, "grad_norm": 35.28125, "learning_rate": 9.978543118328614e-06, "loss": 24.7852, "step": 7400 }, { "epoch": 0.13750965559955183, "grad_norm": 35.40625, "learning_rate": 9.978514122542572e-06, "loss": 24.5402, "step": 7410 }, { "epoch": 0.13769522868403167, "grad_norm": 35.125, "learning_rate": 9.978485126756529e-06, "loss": 25.0199, "step": 7420 }, { "epoch": 0.1378808017685115, "grad_norm": 33.46875, "learning_rate": 9.978456130970486e-06, "loss": 24.5031, "step": 7430 }, { "epoch": 0.13806637485299134, "grad_norm": 33.4375, "learning_rate": 9.978427135184444e-06, "loss": 24.6868, "step": 7440 }, { "epoch": 0.13825194793747114, "grad_norm": 34.875, "learning_rate": 9.978398139398401e-06, "loss": 24.7943, "step": 7450 }, { "epoch": 0.13843752102195098, "grad_norm": 33.03125, "learning_rate": 9.97836914361236e-06, "loss": 24.6984, "step": 7460 }, { "epoch": 0.1386230941064308, "grad_norm": 34.28125, "learning_rate": 9.978340147826318e-06, "loss": 24.522, "step": 7470 }, { "epoch": 0.13880866719091062, "grad_norm": 33.25, "learning_rate": 9.978311152040275e-06, "loss": 24.6206, "step": 7480 }, { "epoch": 0.13899424027539045, "grad_norm": 36.03125, "learning_rate": 9.978282156254233e-06, "loss": 24.9333, "step": 7490 }, { "epoch": 0.13917981335987029, "grad_norm": 35.375, "learning_rate": 9.97825316046819e-06, "loss": 24.736, "step": 7500 }, { "epoch": 0.13936538644435012, "grad_norm": 34.5, "learning_rate": 9.978224164682147e-06, "loss": 25.2804, "step": 7510 }, { "epoch": 0.13955095952882993, "grad_norm": 34.84375, "learning_rate": 9.978195168896105e-06, "loss": 24.9313, "step": 7520 }, { "epoch": 0.13973653261330976, "grad_norm": 32.625, "learning_rate": 9.978166173110064e-06, "loss": 24.8967, "step": 7530 }, { "epoch": 0.1399221056977896, "grad_norm": 35.5625, "learning_rate": 9.97813717732402e-06, "loss": 24.9943, "step": 7540 }, { "epoch": 0.14010767878226943, "grad_norm": 34.59375, "learning_rate": 9.978108181537977e-06, "loss": 24.8322, "step": 7550 }, { "epoch": 0.14029325186674924, "grad_norm": 33.1875, "learning_rate": 9.978079185751936e-06, "loss": 24.8442, "step": 7560 }, { "epoch": 0.14047882495122907, "grad_norm": 35.6875, "learning_rate": 9.978050189965894e-06, "loss": 24.7716, "step": 7570 }, { "epoch": 0.1406643980357089, "grad_norm": 34.15625, "learning_rate": 9.978021194179851e-06, "loss": 24.6299, "step": 7580 }, { "epoch": 0.14084997112018874, "grad_norm": 35.46875, "learning_rate": 9.977992198393808e-06, "loss": 24.8454, "step": 7590 }, { "epoch": 0.14103554420466854, "grad_norm": 33.78125, "learning_rate": 9.977963202607766e-06, "loss": 24.6056, "step": 7600 }, { "epoch": 0.14122111728914838, "grad_norm": 34.21875, "learning_rate": 9.977934206821723e-06, "loss": 25.2306, "step": 7610 }, { "epoch": 0.1414066903736282, "grad_norm": 35.40625, "learning_rate": 9.97790521103568e-06, "loss": 24.108, "step": 7620 }, { "epoch": 0.14159226345810805, "grad_norm": 33.75, "learning_rate": 9.97787621524964e-06, "loss": 24.6856, "step": 7630 }, { "epoch": 0.14177783654258785, "grad_norm": 35.21875, "learning_rate": 9.977847219463597e-06, "loss": 24.942, "step": 7640 }, { "epoch": 0.1419634096270677, "grad_norm": 33.25, "learning_rate": 9.977818223677553e-06, "loss": 24.5791, "step": 7650 }, { "epoch": 0.14214898271154752, "grad_norm": 34.96875, "learning_rate": 9.977789227891512e-06, "loss": 24.3992, "step": 7660 }, { "epoch": 0.14233455579602736, "grad_norm": 37.21875, "learning_rate": 9.97776023210547e-06, "loss": 24.6844, "step": 7670 }, { "epoch": 0.14252012888050716, "grad_norm": 35.03125, "learning_rate": 9.977731236319427e-06, "loss": 24.8277, "step": 7680 }, { "epoch": 0.142705701964987, "grad_norm": 34.4375, "learning_rate": 9.977702240533384e-06, "loss": 24.5306, "step": 7690 }, { "epoch": 0.14289127504946683, "grad_norm": 35.03125, "learning_rate": 9.977673244747342e-06, "loss": 24.653, "step": 7700 }, { "epoch": 0.14307684813394667, "grad_norm": 33.40625, "learning_rate": 9.977644248961299e-06, "loss": 24.3045, "step": 7710 }, { "epoch": 0.14326242121842647, "grad_norm": 34.9375, "learning_rate": 9.977615253175256e-06, "loss": 24.6544, "step": 7720 }, { "epoch": 0.1434479943029063, "grad_norm": 35.09375, "learning_rate": 9.977586257389215e-06, "loss": 24.8687, "step": 7730 }, { "epoch": 0.14363356738738614, "grad_norm": 35.53125, "learning_rate": 9.977557261603173e-06, "loss": 24.806, "step": 7740 }, { "epoch": 0.14381914047186595, "grad_norm": 35.5, "learning_rate": 9.977528265817129e-06, "loss": 24.1327, "step": 7750 }, { "epoch": 0.14400471355634578, "grad_norm": 36.8125, "learning_rate": 9.977499270031088e-06, "loss": 24.7527, "step": 7760 }, { "epoch": 0.14419028664082562, "grad_norm": 37.5, "learning_rate": 9.977470274245045e-06, "loss": 24.7797, "step": 7770 }, { "epoch": 0.14437585972530545, "grad_norm": 35.125, "learning_rate": 9.977441278459002e-06, "loss": 24.4506, "step": 7780 }, { "epoch": 0.14456143280978526, "grad_norm": 33.78125, "learning_rate": 9.97741228267296e-06, "loss": 24.7787, "step": 7790 }, { "epoch": 0.1447470058942651, "grad_norm": 34.40625, "learning_rate": 9.977383286886917e-06, "loss": 24.4946, "step": 7800 }, { "epoch": 0.14493257897874492, "grad_norm": 33.5, "learning_rate": 9.977354291100875e-06, "loss": 24.6189, "step": 7810 }, { "epoch": 0.14511815206322476, "grad_norm": 35.46875, "learning_rate": 9.977325295314832e-06, "loss": 24.3999, "step": 7820 }, { "epoch": 0.14530372514770457, "grad_norm": 33.3125, "learning_rate": 9.97729629952879e-06, "loss": 24.735, "step": 7830 }, { "epoch": 0.1454892982321844, "grad_norm": 35.71875, "learning_rate": 9.977267303742749e-06, "loss": 24.5313, "step": 7840 }, { "epoch": 0.14567487131666423, "grad_norm": 35.15625, "learning_rate": 9.977238307956706e-06, "loss": 24.8674, "step": 7850 }, { "epoch": 0.14586044440114407, "grad_norm": 34.6875, "learning_rate": 9.977209312170662e-06, "loss": 24.5328, "step": 7860 }, { "epoch": 0.14604601748562387, "grad_norm": 34.78125, "learning_rate": 9.97718031638462e-06, "loss": 24.0395, "step": 7870 }, { "epoch": 0.1462315905701037, "grad_norm": 33.84375, "learning_rate": 9.977151320598578e-06, "loss": 24.5783, "step": 7880 }, { "epoch": 0.14641716365458354, "grad_norm": 38.125, "learning_rate": 9.977122324812536e-06, "loss": 24.9426, "step": 7890 }, { "epoch": 0.14660273673906338, "grad_norm": 34.75, "learning_rate": 9.977093329026493e-06, "loss": 24.2899, "step": 7900 }, { "epoch": 0.14678830982354318, "grad_norm": 34.0, "learning_rate": 9.97706433324045e-06, "loss": 24.7699, "step": 7910 }, { "epoch": 0.14697388290802302, "grad_norm": 34.21875, "learning_rate": 9.977035337454408e-06, "loss": 24.9012, "step": 7920 }, { "epoch": 0.14715945599250285, "grad_norm": 34.96875, "learning_rate": 9.977006341668365e-06, "loss": 24.2317, "step": 7930 }, { "epoch": 0.1473450290769827, "grad_norm": 34.59375, "learning_rate": 9.976977345882324e-06, "loss": 24.5784, "step": 7940 }, { "epoch": 0.1475306021614625, "grad_norm": 34.3125, "learning_rate": 9.976948350096282e-06, "loss": 24.7714, "step": 7950 }, { "epoch": 0.14771617524594233, "grad_norm": 35.90625, "learning_rate": 9.97691935431024e-06, "loss": 24.5918, "step": 7960 }, { "epoch": 0.14790174833042216, "grad_norm": 36.625, "learning_rate": 9.976890358524197e-06, "loss": 24.5701, "step": 7970 }, { "epoch": 0.148087321414902, "grad_norm": 34.875, "learning_rate": 9.976861362738154e-06, "loss": 25.2149, "step": 7980 }, { "epoch": 0.1482728944993818, "grad_norm": 33.96875, "learning_rate": 9.976832366952111e-06, "loss": 24.6543, "step": 7990 }, { "epoch": 0.14845846758386164, "grad_norm": 35.0625, "learning_rate": 9.976803371166069e-06, "loss": 24.5547, "step": 8000 }, { "epoch": 0.14864404066834147, "grad_norm": 34.09375, "learning_rate": 9.976774375380028e-06, "loss": 24.2946, "step": 8010 }, { "epoch": 0.14882961375282128, "grad_norm": 33.9375, "learning_rate": 9.976745379593984e-06, "loss": 24.4692, "step": 8020 }, { "epoch": 0.1490151868373011, "grad_norm": 34.1875, "learning_rate": 9.976716383807941e-06, "loss": 25.0741, "step": 8030 }, { "epoch": 0.14920075992178095, "grad_norm": 34.0625, "learning_rate": 9.9766873880219e-06, "loss": 24.3755, "step": 8040 }, { "epoch": 0.14938633300626078, "grad_norm": 35.46875, "learning_rate": 9.976658392235858e-06, "loss": 24.51, "step": 8050 }, { "epoch": 0.1495719060907406, "grad_norm": 35.25, "learning_rate": 9.976629396449815e-06, "loss": 24.3664, "step": 8060 }, { "epoch": 0.14975747917522042, "grad_norm": 34.40625, "learning_rate": 9.976600400663772e-06, "loss": 24.8537, "step": 8070 }, { "epoch": 0.14994305225970025, "grad_norm": 36.875, "learning_rate": 9.97657140487773e-06, "loss": 25.0744, "step": 8080 }, { "epoch": 0.1501286253441801, "grad_norm": 36.25, "learning_rate": 9.976542409091687e-06, "loss": 24.8269, "step": 8090 }, { "epoch": 0.1503141984286599, "grad_norm": 34.65625, "learning_rate": 9.976513413305645e-06, "loss": 24.6299, "step": 8100 }, { "epoch": 0.15049977151313973, "grad_norm": 35.09375, "learning_rate": 9.976484417519604e-06, "loss": 24.5561, "step": 8110 }, { "epoch": 0.15068534459761956, "grad_norm": 34.78125, "learning_rate": 9.976455421733561e-06, "loss": 24.682, "step": 8120 }, { "epoch": 0.1508709176820994, "grad_norm": 35.125, "learning_rate": 9.976426425947517e-06, "loss": 24.2381, "step": 8130 }, { "epoch": 0.1510564907665792, "grad_norm": 35.375, "learning_rate": 9.976397430161476e-06, "loss": 24.5888, "step": 8140 }, { "epoch": 0.15124206385105904, "grad_norm": 32.875, "learning_rate": 9.976368434375433e-06, "loss": 25.0338, "step": 8150 }, { "epoch": 0.15142763693553887, "grad_norm": 34.8125, "learning_rate": 9.97633943858939e-06, "loss": 24.2119, "step": 8160 }, { "epoch": 0.1516132100200187, "grad_norm": 36.375, "learning_rate": 9.976310442803348e-06, "loss": 24.741, "step": 8170 }, { "epoch": 0.1517987831044985, "grad_norm": 32.59375, "learning_rate": 9.976281447017306e-06, "loss": 24.5123, "step": 8180 }, { "epoch": 0.15198435618897835, "grad_norm": 33.3125, "learning_rate": 9.976252451231263e-06, "loss": 24.778, "step": 8190 }, { "epoch": 0.15216992927345818, "grad_norm": 34.21875, "learning_rate": 9.97622345544522e-06, "loss": 24.7603, "step": 8200 }, { "epoch": 0.15235550235793802, "grad_norm": 34.4375, "learning_rate": 9.97619445965918e-06, "loss": 24.8708, "step": 8210 }, { "epoch": 0.15254107544241782, "grad_norm": 34.375, "learning_rate": 9.976165463873137e-06, "loss": 24.4007, "step": 8220 }, { "epoch": 0.15272664852689766, "grad_norm": 35.03125, "learning_rate": 9.976136468087094e-06, "loss": 24.2673, "step": 8230 }, { "epoch": 0.1529122216113775, "grad_norm": 36.0625, "learning_rate": 9.976107472301052e-06, "loss": 24.4114, "step": 8240 }, { "epoch": 0.15309779469585733, "grad_norm": 34.03125, "learning_rate": 9.976078476515009e-06, "loss": 24.5365, "step": 8250 }, { "epoch": 0.15328336778033713, "grad_norm": 35.21875, "learning_rate": 9.976049480728966e-06, "loss": 24.5248, "step": 8260 }, { "epoch": 0.15346894086481697, "grad_norm": 34.28125, "learning_rate": 9.976020484942924e-06, "loss": 24.242, "step": 8270 }, { "epoch": 0.1536545139492968, "grad_norm": 33.78125, "learning_rate": 9.975991489156881e-06, "loss": 24.4549, "step": 8280 }, { "epoch": 0.15384008703377663, "grad_norm": 34.96875, "learning_rate": 9.975962493370839e-06, "loss": 24.7525, "step": 8290 }, { "epoch": 0.15402566011825644, "grad_norm": 36.65625, "learning_rate": 9.975933497584796e-06, "loss": 24.5545, "step": 8300 }, { "epoch": 0.15421123320273628, "grad_norm": 35.3125, "learning_rate": 9.975904501798754e-06, "loss": 23.9434, "step": 8310 }, { "epoch": 0.1543968062872161, "grad_norm": 34.53125, "learning_rate": 9.975875506012713e-06, "loss": 24.3157, "step": 8320 }, { "epoch": 0.15458237937169592, "grad_norm": 35.40625, "learning_rate": 9.97584651022667e-06, "loss": 24.1783, "step": 8330 }, { "epoch": 0.15476795245617575, "grad_norm": 33.84375, "learning_rate": 9.975817514440627e-06, "loss": 24.095, "step": 8340 }, { "epoch": 0.15495352554065558, "grad_norm": 33.5625, "learning_rate": 9.975788518654585e-06, "loss": 24.2725, "step": 8350 }, { "epoch": 0.15513909862513542, "grad_norm": 34.28125, "learning_rate": 9.975759522868542e-06, "loss": 24.1065, "step": 8360 }, { "epoch": 0.15532467170961523, "grad_norm": 36.34375, "learning_rate": 9.9757305270825e-06, "loss": 24.7401, "step": 8370 }, { "epoch": 0.15551024479409506, "grad_norm": 35.65625, "learning_rate": 9.975701531296457e-06, "loss": 24.155, "step": 8380 }, { "epoch": 0.1556958178785749, "grad_norm": 35.875, "learning_rate": 9.975672535510416e-06, "loss": 24.5152, "step": 8390 }, { "epoch": 0.15588139096305473, "grad_norm": 34.875, "learning_rate": 9.975643539724372e-06, "loss": 24.716, "step": 8400 }, { "epoch": 0.15606696404753453, "grad_norm": 34.5, "learning_rate": 9.97561454393833e-06, "loss": 24.8522, "step": 8410 }, { "epoch": 0.15625253713201437, "grad_norm": 33.0625, "learning_rate": 9.975585548152288e-06, "loss": 23.9283, "step": 8420 }, { "epoch": 0.1564381102164942, "grad_norm": 34.25, "learning_rate": 9.975556552366246e-06, "loss": 24.1551, "step": 8430 }, { "epoch": 0.15662368330097404, "grad_norm": 34.15625, "learning_rate": 9.975527556580203e-06, "loss": 24.12, "step": 8440 }, { "epoch": 0.15680925638545384, "grad_norm": 33.90625, "learning_rate": 9.97549856079416e-06, "loss": 25.0664, "step": 8450 }, { "epoch": 0.15699482946993368, "grad_norm": 34.96875, "learning_rate": 9.975469565008118e-06, "loss": 24.7384, "step": 8460 }, { "epoch": 0.1571804025544135, "grad_norm": 33.90625, "learning_rate": 9.975440569222075e-06, "loss": 24.209, "step": 8470 }, { "epoch": 0.15736597563889335, "grad_norm": 32.96875, "learning_rate": 9.975411573436033e-06, "loss": 24.478, "step": 8480 }, { "epoch": 0.15755154872337315, "grad_norm": 33.4375, "learning_rate": 9.975382577649992e-06, "loss": 24.594, "step": 8490 }, { "epoch": 0.157737121807853, "grad_norm": 33.21875, "learning_rate": 9.975353581863948e-06, "loss": 23.6009, "step": 8500 }, { "epoch": 0.15792269489233282, "grad_norm": 34.375, "learning_rate": 9.975324586077905e-06, "loss": 24.5622, "step": 8510 }, { "epoch": 0.15810826797681266, "grad_norm": 33.65625, "learning_rate": 9.975295590291864e-06, "loss": 24.7091, "step": 8520 }, { "epoch": 0.15829384106129246, "grad_norm": 35.28125, "learning_rate": 9.975266594505822e-06, "loss": 24.5497, "step": 8530 }, { "epoch": 0.1584794141457723, "grad_norm": 34.3125, "learning_rate": 9.975237598719779e-06, "loss": 24.4081, "step": 8540 }, { "epoch": 0.15866498723025213, "grad_norm": 33.09375, "learning_rate": 9.975208602933736e-06, "loss": 24.4936, "step": 8550 }, { "epoch": 0.15885056031473196, "grad_norm": 34.4375, "learning_rate": 9.975179607147694e-06, "loss": 24.1394, "step": 8560 }, { "epoch": 0.15903613339921177, "grad_norm": 32.78125, "learning_rate": 9.975150611361651e-06, "loss": 23.9652, "step": 8570 }, { "epoch": 0.1592217064836916, "grad_norm": 36.875, "learning_rate": 9.975121615575609e-06, "loss": 24.6149, "step": 8580 }, { "epoch": 0.15940727956817144, "grad_norm": 33.59375, "learning_rate": 9.975092619789568e-06, "loss": 24.1793, "step": 8590 }, { "epoch": 0.15959285265265125, "grad_norm": 35.28125, "learning_rate": 9.975063624003525e-06, "loss": 24.1854, "step": 8600 }, { "epoch": 0.15977842573713108, "grad_norm": 37.34375, "learning_rate": 9.97503462821748e-06, "loss": 24.2307, "step": 8610 }, { "epoch": 0.15996399882161091, "grad_norm": 35.53125, "learning_rate": 9.97500563243144e-06, "loss": 24.1832, "step": 8620 }, { "epoch": 0.16014957190609075, "grad_norm": 33.0625, "learning_rate": 9.974976636645397e-06, "loss": 24.4742, "step": 8630 }, { "epoch": 0.16033514499057056, "grad_norm": 34.09375, "learning_rate": 9.974947640859355e-06, "loss": 23.777, "step": 8640 }, { "epoch": 0.1605207180750504, "grad_norm": 33.8125, "learning_rate": 9.974918645073312e-06, "loss": 24.4657, "step": 8650 }, { "epoch": 0.16070629115953022, "grad_norm": 34.15625, "learning_rate": 9.974889649287271e-06, "loss": 24.1973, "step": 8660 }, { "epoch": 0.16089186424401006, "grad_norm": 35.1875, "learning_rate": 9.974860653501227e-06, "loss": 24.3273, "step": 8670 }, { "epoch": 0.16107743732848986, "grad_norm": 34.375, "learning_rate": 9.974831657715184e-06, "loss": 24.1247, "step": 8680 }, { "epoch": 0.1612630104129697, "grad_norm": 33.75, "learning_rate": 9.974802661929143e-06, "loss": 24.5723, "step": 8690 }, { "epoch": 0.16144858349744953, "grad_norm": 35.8125, "learning_rate": 9.974773666143101e-06, "loss": 24.7058, "step": 8700 }, { "epoch": 0.16163415658192937, "grad_norm": 33.5625, "learning_rate": 9.974744670357058e-06, "loss": 24.4619, "step": 8710 }, { "epoch": 0.16181972966640917, "grad_norm": 33.25, "learning_rate": 9.974715674571016e-06, "loss": 24.3738, "step": 8720 }, { "epoch": 0.162005302750889, "grad_norm": 34.375, "learning_rate": 9.974686678784973e-06, "loss": 23.93, "step": 8730 }, { "epoch": 0.16219087583536884, "grad_norm": 36.09375, "learning_rate": 9.97465768299893e-06, "loss": 24.3002, "step": 8740 }, { "epoch": 0.16237644891984868, "grad_norm": 36.84375, "learning_rate": 9.974628687212888e-06, "loss": 24.1912, "step": 8750 }, { "epoch": 0.16256202200432848, "grad_norm": 35.5625, "learning_rate": 9.974599691426845e-06, "loss": 24.037, "step": 8760 }, { "epoch": 0.16274759508880832, "grad_norm": 36.65625, "learning_rate": 9.974570695640803e-06, "loss": 24.0147, "step": 8770 }, { "epoch": 0.16293316817328815, "grad_norm": 34.6875, "learning_rate": 9.97454169985476e-06, "loss": 23.9071, "step": 8780 }, { "epoch": 0.16311874125776799, "grad_norm": 35.46875, "learning_rate": 9.97451270406872e-06, "loss": 24.5642, "step": 8790 }, { "epoch": 0.1633043143422478, "grad_norm": 34.1875, "learning_rate": 9.974483708282677e-06, "loss": 24.8586, "step": 8800 }, { "epoch": 0.16348988742672763, "grad_norm": 36.5625, "learning_rate": 9.974454712496634e-06, "loss": 24.0883, "step": 8810 }, { "epoch": 0.16367546051120746, "grad_norm": 35.3125, "learning_rate": 9.974425716710591e-06, "loss": 24.2121, "step": 8820 }, { "epoch": 0.1638610335956873, "grad_norm": 36.71875, "learning_rate": 9.974396720924549e-06, "loss": 23.8294, "step": 8830 }, { "epoch": 0.1640466066801671, "grad_norm": 35.875, "learning_rate": 9.974367725138506e-06, "loss": 24.1425, "step": 8840 }, { "epoch": 0.16423217976464694, "grad_norm": 33.625, "learning_rate": 9.974338729352464e-06, "loss": 24.2274, "step": 8850 }, { "epoch": 0.16441775284912677, "grad_norm": 33.8125, "learning_rate": 9.974309733566421e-06, "loss": 24.085, "step": 8860 }, { "epoch": 0.1646033259336066, "grad_norm": 34.5, "learning_rate": 9.97428073778038e-06, "loss": 23.9913, "step": 8870 }, { "epoch": 0.1647888990180864, "grad_norm": 35.65625, "learning_rate": 9.974251741994336e-06, "loss": 24.6299, "step": 8880 }, { "epoch": 0.16497447210256624, "grad_norm": 33.25, "learning_rate": 9.974222746208293e-06, "loss": 23.583, "step": 8890 }, { "epoch": 0.16516004518704608, "grad_norm": 33.71875, "learning_rate": 9.974193750422252e-06, "loss": 23.7087, "step": 8900 }, { "epoch": 0.16534561827152588, "grad_norm": 33.78125, "learning_rate": 9.97416475463621e-06, "loss": 24.3789, "step": 8910 }, { "epoch": 0.16553119135600572, "grad_norm": 35.34375, "learning_rate": 9.974135758850167e-06, "loss": 23.9777, "step": 8920 }, { "epoch": 0.16571676444048555, "grad_norm": 34.25, "learning_rate": 9.974106763064125e-06, "loss": 24.168, "step": 8930 }, { "epoch": 0.1659023375249654, "grad_norm": 36.375, "learning_rate": 9.974077767278082e-06, "loss": 24.3817, "step": 8940 }, { "epoch": 0.1660879106094452, "grad_norm": 35.3125, "learning_rate": 9.97404877149204e-06, "loss": 23.944, "step": 8950 }, { "epoch": 0.16627348369392503, "grad_norm": 35.9375, "learning_rate": 9.974019775705997e-06, "loss": 23.5806, "step": 8960 }, { "epoch": 0.16645905677840486, "grad_norm": 32.84375, "learning_rate": 9.973990779919956e-06, "loss": 23.9688, "step": 8970 }, { "epoch": 0.1666446298628847, "grad_norm": 34.40625, "learning_rate": 9.973961784133913e-06, "loss": 24.0298, "step": 8980 }, { "epoch": 0.1668302029473645, "grad_norm": 33.4375, "learning_rate": 9.973932788347869e-06, "loss": 24.3961, "step": 8990 }, { "epoch": 0.16701577603184434, "grad_norm": 36.78125, "learning_rate": 9.973903792561828e-06, "loss": 24.6019, "step": 9000 }, { "epoch": 0.16720134911632417, "grad_norm": 34.375, "learning_rate": 9.973874796775786e-06, "loss": 24.2553, "step": 9010 }, { "epoch": 0.167386922200804, "grad_norm": 33.34375, "learning_rate": 9.973845800989743e-06, "loss": 24.6558, "step": 9020 }, { "epoch": 0.1675724952852838, "grad_norm": 34.9375, "learning_rate": 9.9738168052037e-06, "loss": 24.3046, "step": 9030 }, { "epoch": 0.16775806836976365, "grad_norm": 34.28125, "learning_rate": 9.973787809417658e-06, "loss": 23.8573, "step": 9040 }, { "epoch": 0.16794364145424348, "grad_norm": 37.71875, "learning_rate": 9.973758813631615e-06, "loss": 23.9141, "step": 9050 }, { "epoch": 0.16812921453872332, "grad_norm": 34.96875, "learning_rate": 9.973729817845573e-06, "loss": 24.0484, "step": 9060 }, { "epoch": 0.16831478762320312, "grad_norm": 34.21875, "learning_rate": 9.973700822059532e-06, "loss": 24.2046, "step": 9070 }, { "epoch": 0.16850036070768296, "grad_norm": 33.96875, "learning_rate": 9.973671826273489e-06, "loss": 23.6338, "step": 9080 }, { "epoch": 0.1686859337921628, "grad_norm": 34.75, "learning_rate": 9.973642830487445e-06, "loss": 24.2761, "step": 9090 }, { "epoch": 0.16887150687664262, "grad_norm": 35.4375, "learning_rate": 9.973613834701404e-06, "loss": 24.3176, "step": 9100 }, { "epoch": 0.16905707996112243, "grad_norm": 35.875, "learning_rate": 9.973584838915361e-06, "loss": 23.9759, "step": 9110 }, { "epoch": 0.16924265304560226, "grad_norm": 35.65625, "learning_rate": 9.973555843129319e-06, "loss": 24.0246, "step": 9120 }, { "epoch": 0.1694282261300821, "grad_norm": 34.4375, "learning_rate": 9.973526847343276e-06, "loss": 24.1898, "step": 9130 }, { "epoch": 0.16961379921456193, "grad_norm": 34.53125, "learning_rate": 9.973497851557235e-06, "loss": 23.9952, "step": 9140 }, { "epoch": 0.16979937229904174, "grad_norm": 33.375, "learning_rate": 9.973468855771191e-06, "loss": 24.178, "step": 9150 }, { "epoch": 0.16998494538352157, "grad_norm": 36.71875, "learning_rate": 9.973439859985148e-06, "loss": 24.2636, "step": 9160 }, { "epoch": 0.1701705184680014, "grad_norm": 36.3125, "learning_rate": 9.973410864199107e-06, "loss": 23.7676, "step": 9170 }, { "epoch": 0.17035609155248121, "grad_norm": 34.125, "learning_rate": 9.973381868413065e-06, "loss": 24.3863, "step": 9180 }, { "epoch": 0.17054166463696105, "grad_norm": 34.09375, "learning_rate": 9.973352872627022e-06, "loss": 24.2944, "step": 9190 }, { "epoch": 0.17072723772144088, "grad_norm": 34.5625, "learning_rate": 9.97332387684098e-06, "loss": 23.8362, "step": 9200 }, { "epoch": 0.17091281080592072, "grad_norm": 35.65625, "learning_rate": 9.973294881054937e-06, "loss": 24.0994, "step": 9210 }, { "epoch": 0.17109838389040052, "grad_norm": 33.28125, "learning_rate": 9.973265885268895e-06, "loss": 23.8749, "step": 9220 }, { "epoch": 0.17128395697488036, "grad_norm": 34.03125, "learning_rate": 9.973236889482852e-06, "loss": 24.207, "step": 9230 }, { "epoch": 0.1714695300593602, "grad_norm": 32.28125, "learning_rate": 9.973207893696811e-06, "loss": 23.9993, "step": 9240 }, { "epoch": 0.17165510314384003, "grad_norm": 35.46875, "learning_rate": 9.973178897910768e-06, "loss": 23.9986, "step": 9250 }, { "epoch": 0.17184067622831983, "grad_norm": 35.21875, "learning_rate": 9.973149902124724e-06, "loss": 23.664, "step": 9260 }, { "epoch": 0.17202624931279967, "grad_norm": 35.4375, "learning_rate": 9.973120906338683e-06, "loss": 23.8326, "step": 9270 }, { "epoch": 0.1722118223972795, "grad_norm": 35.28125, "learning_rate": 9.97309191055264e-06, "loss": 23.7484, "step": 9280 }, { "epoch": 0.17239739548175934, "grad_norm": 35.375, "learning_rate": 9.973062914766598e-06, "loss": 24.1583, "step": 9290 }, { "epoch": 0.17258296856623914, "grad_norm": 34.5625, "learning_rate": 9.973033918980555e-06, "loss": 23.8869, "step": 9300 }, { "epoch": 0.17276854165071898, "grad_norm": 32.46875, "learning_rate": 9.973004923194513e-06, "loss": 24.2223, "step": 9310 }, { "epoch": 0.1729541147351988, "grad_norm": 33.875, "learning_rate": 9.97297592740847e-06, "loss": 24.0298, "step": 9320 }, { "epoch": 0.17313968781967864, "grad_norm": 35.1875, "learning_rate": 9.972946931622428e-06, "loss": 24.2025, "step": 9330 }, { "epoch": 0.17332526090415845, "grad_norm": 34.0625, "learning_rate": 9.972917935836385e-06, "loss": 24.0105, "step": 9340 }, { "epoch": 0.17351083398863829, "grad_norm": 34.375, "learning_rate": 9.972888940050344e-06, "loss": 24.093, "step": 9350 }, { "epoch": 0.17369640707311812, "grad_norm": 32.90625, "learning_rate": 9.9728599442643e-06, "loss": 23.8456, "step": 9360 }, { "epoch": 0.17388198015759795, "grad_norm": 35.9375, "learning_rate": 9.972830948478257e-06, "loss": 23.6813, "step": 9370 }, { "epoch": 0.17406755324207776, "grad_norm": 36.5, "learning_rate": 9.972801952692216e-06, "loss": 23.8987, "step": 9380 }, { "epoch": 0.1742531263265576, "grad_norm": 36.03125, "learning_rate": 9.972772956906174e-06, "loss": 23.849, "step": 9390 }, { "epoch": 0.17443869941103743, "grad_norm": 34.25, "learning_rate": 9.972743961120131e-06, "loss": 24.0155, "step": 9400 }, { "epoch": 0.17462427249551726, "grad_norm": 35.71875, "learning_rate": 9.972714965334089e-06, "loss": 23.8576, "step": 9410 }, { "epoch": 0.17480984557999707, "grad_norm": 34.59375, "learning_rate": 9.972685969548046e-06, "loss": 23.4787, "step": 9420 }, { "epoch": 0.1749954186644769, "grad_norm": 34.59375, "learning_rate": 9.972656973762003e-06, "loss": 23.8267, "step": 9430 }, { "epoch": 0.17518099174895674, "grad_norm": 33.78125, "learning_rate": 9.972627977975961e-06, "loss": 24.1303, "step": 9440 }, { "epoch": 0.17536656483343654, "grad_norm": 35.34375, "learning_rate": 9.97259898218992e-06, "loss": 23.9669, "step": 9450 }, { "epoch": 0.17555213791791638, "grad_norm": 36.96875, "learning_rate": 9.972569986403877e-06, "loss": 24.2281, "step": 9460 }, { "epoch": 0.1757377110023962, "grad_norm": 35.5625, "learning_rate": 9.972540990617833e-06, "loss": 23.9939, "step": 9470 }, { "epoch": 0.17592328408687605, "grad_norm": 33.59375, "learning_rate": 9.972511994831792e-06, "loss": 23.9549, "step": 9480 }, { "epoch": 0.17610885717135585, "grad_norm": 35.25, "learning_rate": 9.97248299904575e-06, "loss": 23.9289, "step": 9490 }, { "epoch": 0.1762944302558357, "grad_norm": 35.625, "learning_rate": 9.972454003259707e-06, "loss": 23.6615, "step": 9500 }, { "epoch": 0.17648000334031552, "grad_norm": 34.125, "learning_rate": 9.972425007473664e-06, "loss": 23.9728, "step": 9510 }, { "epoch": 0.17666557642479536, "grad_norm": 35.6875, "learning_rate": 9.972396011687622e-06, "loss": 23.744, "step": 9520 }, { "epoch": 0.17685114950927516, "grad_norm": 33.53125, "learning_rate": 9.97236701590158e-06, "loss": 24.1058, "step": 9530 }, { "epoch": 0.177036722593755, "grad_norm": 36.03125, "learning_rate": 9.972338020115537e-06, "loss": 23.5845, "step": 9540 }, { "epoch": 0.17722229567823483, "grad_norm": 36.0, "learning_rate": 9.972309024329496e-06, "loss": 23.9865, "step": 9550 }, { "epoch": 0.17740786876271467, "grad_norm": 32.96875, "learning_rate": 9.972280028543453e-06, "loss": 23.7648, "step": 9560 }, { "epoch": 0.17759344184719447, "grad_norm": 37.125, "learning_rate": 9.97225103275741e-06, "loss": 23.5891, "step": 9570 }, { "epoch": 0.1777790149316743, "grad_norm": 34.90625, "learning_rate": 9.972222036971368e-06, "loss": 23.9162, "step": 9580 }, { "epoch": 0.17796458801615414, "grad_norm": 35.53125, "learning_rate": 9.972193041185325e-06, "loss": 24.0844, "step": 9590 }, { "epoch": 0.17815016110063397, "grad_norm": 35.59375, "learning_rate": 9.972164045399283e-06, "loss": 23.6739, "step": 9600 }, { "epoch": 0.17833573418511378, "grad_norm": 34.5625, "learning_rate": 9.97213504961324e-06, "loss": 23.8791, "step": 9610 }, { "epoch": 0.17852130726959362, "grad_norm": 33.46875, "learning_rate": 9.9721060538272e-06, "loss": 24.0582, "step": 9620 }, { "epoch": 0.17870688035407345, "grad_norm": 35.21875, "learning_rate": 9.972077058041155e-06, "loss": 23.953, "step": 9630 }, { "epoch": 0.17889245343855328, "grad_norm": 34.625, "learning_rate": 9.972048062255112e-06, "loss": 23.7487, "step": 9640 }, { "epoch": 0.1790780265230331, "grad_norm": 33.5, "learning_rate": 9.972019066469072e-06, "loss": 23.899, "step": 9650 }, { "epoch": 0.17926359960751292, "grad_norm": 35.125, "learning_rate": 9.971990070683029e-06, "loss": 24.0826, "step": 9660 }, { "epoch": 0.17944917269199276, "grad_norm": 34.34375, "learning_rate": 9.971961074896986e-06, "loss": 24.2801, "step": 9670 }, { "epoch": 0.1796347457764726, "grad_norm": 34.65625, "learning_rate": 9.971932079110944e-06, "loss": 24.1542, "step": 9680 }, { "epoch": 0.1798203188609524, "grad_norm": 33.15625, "learning_rate": 9.971903083324901e-06, "loss": 24.0658, "step": 9690 }, { "epoch": 0.18000589194543223, "grad_norm": 34.375, "learning_rate": 9.971874087538859e-06, "loss": 23.7835, "step": 9700 }, { "epoch": 0.18019146502991207, "grad_norm": 33.34375, "learning_rate": 9.971845091752816e-06, "loss": 23.8875, "step": 9710 }, { "epoch": 0.1803770381143919, "grad_norm": 33.1875, "learning_rate": 9.971816095966775e-06, "loss": 24.2697, "step": 9720 }, { "epoch": 0.1805626111988717, "grad_norm": 34.6875, "learning_rate": 9.971787100180732e-06, "loss": 23.5675, "step": 9730 }, { "epoch": 0.18074818428335154, "grad_norm": 34.3125, "learning_rate": 9.971758104394688e-06, "loss": 23.596, "step": 9740 }, { "epoch": 0.18093375736783138, "grad_norm": 36.3125, "learning_rate": 9.971729108608647e-06, "loss": 23.282, "step": 9750 }, { "epoch": 0.18111933045231118, "grad_norm": 34.6875, "learning_rate": 9.971700112822605e-06, "loss": 23.5783, "step": 9760 }, { "epoch": 0.18130490353679102, "grad_norm": 35.15625, "learning_rate": 9.971671117036562e-06, "loss": 23.8098, "step": 9770 }, { "epoch": 0.18149047662127085, "grad_norm": 35.65625, "learning_rate": 9.97164212125052e-06, "loss": 23.2739, "step": 9780 }, { "epoch": 0.1816760497057507, "grad_norm": 34.46875, "learning_rate": 9.971613125464477e-06, "loss": 23.7858, "step": 9790 }, { "epoch": 0.1818616227902305, "grad_norm": 36.1875, "learning_rate": 9.971584129678434e-06, "loss": 23.978, "step": 9800 }, { "epoch": 0.18204719587471033, "grad_norm": 35.875, "learning_rate": 9.971555133892392e-06, "loss": 23.7035, "step": 9810 }, { "epoch": 0.18223276895919016, "grad_norm": 36.6875, "learning_rate": 9.971526138106349e-06, "loss": 23.8743, "step": 9820 }, { "epoch": 0.18241834204367, "grad_norm": 33.84375, "learning_rate": 9.971497142320308e-06, "loss": 23.9896, "step": 9830 }, { "epoch": 0.1826039151281498, "grad_norm": 33.78125, "learning_rate": 9.971468146534266e-06, "loss": 23.5099, "step": 9840 }, { "epoch": 0.18278948821262964, "grad_norm": 35.5625, "learning_rate": 9.971439150748223e-06, "loss": 24.0062, "step": 9850 }, { "epoch": 0.18297506129710947, "grad_norm": 33.71875, "learning_rate": 9.97141015496218e-06, "loss": 23.7121, "step": 9860 }, { "epoch": 0.1831606343815893, "grad_norm": 35.875, "learning_rate": 9.971381159176138e-06, "loss": 23.5131, "step": 9870 }, { "epoch": 0.1833462074660691, "grad_norm": 35.84375, "learning_rate": 9.971352163390095e-06, "loss": 23.5695, "step": 9880 }, { "epoch": 0.18353178055054895, "grad_norm": 36.84375, "learning_rate": 9.971323167604053e-06, "loss": 23.6997, "step": 9890 }, { "epoch": 0.18371735363502878, "grad_norm": 34.0, "learning_rate": 9.97129417181801e-06, "loss": 23.6952, "step": 9900 }, { "epoch": 0.1839029267195086, "grad_norm": 35.5, "learning_rate": 9.971265176031967e-06, "loss": 23.6715, "step": 9910 }, { "epoch": 0.18408849980398842, "grad_norm": 37.25, "learning_rate": 9.971236180245925e-06, "loss": 23.7741, "step": 9920 }, { "epoch": 0.18427407288846825, "grad_norm": 34.15625, "learning_rate": 9.971207184459884e-06, "loss": 23.7368, "step": 9930 }, { "epoch": 0.1844596459729481, "grad_norm": 35.25, "learning_rate": 9.971178188673841e-06, "loss": 23.8807, "step": 9940 }, { "epoch": 0.18464521905742792, "grad_norm": 35.5, "learning_rate": 9.971149192887797e-06, "loss": 23.679, "step": 9950 }, { "epoch": 0.18483079214190773, "grad_norm": 34.8125, "learning_rate": 9.971120197101756e-06, "loss": 23.6042, "step": 9960 }, { "epoch": 0.18501636522638756, "grad_norm": 34.8125, "learning_rate": 9.971091201315714e-06, "loss": 23.7759, "step": 9970 }, { "epoch": 0.1852019383108674, "grad_norm": 34.6875, "learning_rate": 9.971062205529671e-06, "loss": 23.8106, "step": 9980 }, { "epoch": 0.18538751139534723, "grad_norm": 35.75, "learning_rate": 9.971033209743628e-06, "loss": 23.7437, "step": 9990 }, { "epoch": 0.18557308447982704, "grad_norm": 35.6875, "learning_rate": 9.971004213957588e-06, "loss": 23.887, "step": 10000 }, { "epoch": 0.18557308447982704, "eval_loss": 2.9664595127105713, "eval_runtime": 454.8435, "eval_samples_per_second": 3192.564, "eval_steps_per_second": 49.885, "step": 10000 }, { "epoch": 0.18575865756430687, "grad_norm": 34.6875, "learning_rate": 9.970975218171543e-06, "loss": 24.2771, "step": 10010 }, { "epoch": 0.1859442306487867, "grad_norm": 35.9375, "learning_rate": 9.9709462223855e-06, "loss": 23.6828, "step": 10020 }, { "epoch": 0.1861298037332665, "grad_norm": 34.8125, "learning_rate": 9.97091722659946e-06, "loss": 23.3928, "step": 10030 }, { "epoch": 0.18631537681774635, "grad_norm": 35.125, "learning_rate": 9.970888230813417e-06, "loss": 23.5511, "step": 10040 }, { "epoch": 0.18650094990222618, "grad_norm": 35.46875, "learning_rate": 9.970859235027375e-06, "loss": 23.9068, "step": 10050 }, { "epoch": 0.18668652298670602, "grad_norm": 34.25, "learning_rate": 9.970830239241332e-06, "loss": 23.9531, "step": 10060 }, { "epoch": 0.18687209607118582, "grad_norm": 35.53125, "learning_rate": 9.97080124345529e-06, "loss": 23.9918, "step": 10070 }, { "epoch": 0.18705766915566566, "grad_norm": 35.03125, "learning_rate": 9.970772247669247e-06, "loss": 23.7854, "step": 10080 }, { "epoch": 0.1872432422401455, "grad_norm": 35.0625, "learning_rate": 9.970743251883204e-06, "loss": 23.7965, "step": 10090 }, { "epoch": 0.18742881532462533, "grad_norm": 35.25, "learning_rate": 9.970714256097163e-06, "loss": 23.72, "step": 10100 }, { "epoch": 0.18761438840910513, "grad_norm": 34.71875, "learning_rate": 9.970685260311119e-06, "loss": 23.7046, "step": 10110 }, { "epoch": 0.18779996149358497, "grad_norm": 34.75, "learning_rate": 9.970656264525076e-06, "loss": 24.0583, "step": 10120 }, { "epoch": 0.1879855345780648, "grad_norm": 35.21875, "learning_rate": 9.970627268739036e-06, "loss": 23.5673, "step": 10130 }, { "epoch": 0.18817110766254463, "grad_norm": 35.0625, "learning_rate": 9.970598272952993e-06, "loss": 23.5273, "step": 10140 }, { "epoch": 0.18835668074702444, "grad_norm": 35.625, "learning_rate": 9.97056927716695e-06, "loss": 23.8089, "step": 10150 }, { "epoch": 0.18854225383150428, "grad_norm": 35.0, "learning_rate": 9.970540281380908e-06, "loss": 23.5046, "step": 10160 }, { "epoch": 0.1887278269159841, "grad_norm": 35.75, "learning_rate": 9.970511285594865e-06, "loss": 23.674, "step": 10170 }, { "epoch": 0.18891340000046394, "grad_norm": 33.59375, "learning_rate": 9.970482289808823e-06, "loss": 23.2634, "step": 10180 }, { "epoch": 0.18909897308494375, "grad_norm": 36.53125, "learning_rate": 9.97045329402278e-06, "loss": 23.6953, "step": 10190 }, { "epoch": 0.18928454616942358, "grad_norm": 36.4375, "learning_rate": 9.970424298236739e-06, "loss": 23.4264, "step": 10200 }, { "epoch": 0.18947011925390342, "grad_norm": 35.3125, "learning_rate": 9.970395302450696e-06, "loss": 23.1557, "step": 10210 }, { "epoch": 0.18965569233838325, "grad_norm": 34.90625, "learning_rate": 9.970366306664652e-06, "loss": 23.539, "step": 10220 }, { "epoch": 0.18984126542286306, "grad_norm": 34.09375, "learning_rate": 9.970337310878611e-06, "loss": 23.8074, "step": 10230 }, { "epoch": 0.1900268385073429, "grad_norm": 37.5625, "learning_rate": 9.970308315092569e-06, "loss": 23.6492, "step": 10240 }, { "epoch": 0.19021241159182273, "grad_norm": 33.40625, "learning_rate": 9.970279319306526e-06, "loss": 23.5333, "step": 10250 }, { "epoch": 0.19039798467630256, "grad_norm": 35.15625, "learning_rate": 9.970250323520484e-06, "loss": 23.7081, "step": 10260 }, { "epoch": 0.19058355776078237, "grad_norm": 36.21875, "learning_rate": 9.970221327734441e-06, "loss": 23.0549, "step": 10270 }, { "epoch": 0.1907691308452622, "grad_norm": 37.1875, "learning_rate": 9.970192331948398e-06, "loss": 23.529, "step": 10280 }, { "epoch": 0.19095470392974204, "grad_norm": 35.6875, "learning_rate": 9.970163336162356e-06, "loss": 23.6286, "step": 10290 }, { "epoch": 0.19114027701422187, "grad_norm": 36.9375, "learning_rate": 9.970134340376315e-06, "loss": 23.9015, "step": 10300 }, { "epoch": 0.19132585009870168, "grad_norm": 35.1875, "learning_rate": 9.970105344590272e-06, "loss": 23.5723, "step": 10310 }, { "epoch": 0.1915114231831815, "grad_norm": 33.78125, "learning_rate": 9.97007634880423e-06, "loss": 23.5897, "step": 10320 }, { "epoch": 0.19169699626766135, "grad_norm": 33.90625, "learning_rate": 9.970047353018187e-06, "loss": 24.0946, "step": 10330 }, { "epoch": 0.19188256935214115, "grad_norm": 35.15625, "learning_rate": 9.970018357232144e-06, "loss": 23.9352, "step": 10340 }, { "epoch": 0.192068142436621, "grad_norm": 37.9375, "learning_rate": 9.969989361446102e-06, "loss": 23.9665, "step": 10350 }, { "epoch": 0.19225371552110082, "grad_norm": 33.53125, "learning_rate": 9.96996036566006e-06, "loss": 23.3916, "step": 10360 }, { "epoch": 0.19243928860558066, "grad_norm": 34.3125, "learning_rate": 9.969931369874017e-06, "loss": 23.5357, "step": 10370 }, { "epoch": 0.19262486169006046, "grad_norm": 32.90625, "learning_rate": 9.969902374087974e-06, "loss": 24.2098, "step": 10380 }, { "epoch": 0.1928104347745403, "grad_norm": 36.4375, "learning_rate": 9.969873378301931e-06, "loss": 23.3536, "step": 10390 }, { "epoch": 0.19299600785902013, "grad_norm": 34.96875, "learning_rate": 9.969844382515889e-06, "loss": 23.5672, "step": 10400 }, { "epoch": 0.19318158094349996, "grad_norm": 35.21875, "learning_rate": 9.969815386729848e-06, "loss": 23.262, "step": 10410 }, { "epoch": 0.19336715402797977, "grad_norm": 34.9375, "learning_rate": 9.969786390943805e-06, "loss": 23.7718, "step": 10420 }, { "epoch": 0.1935527271124596, "grad_norm": 35.1875, "learning_rate": 9.969757395157763e-06, "loss": 23.3401, "step": 10430 }, { "epoch": 0.19373830019693944, "grad_norm": 33.28125, "learning_rate": 9.96972839937172e-06, "loss": 23.7594, "step": 10440 }, { "epoch": 0.19392387328141927, "grad_norm": 35.8125, "learning_rate": 9.969699403585678e-06, "loss": 23.7115, "step": 10450 }, { "epoch": 0.19410944636589908, "grad_norm": 35.6875, "learning_rate": 9.969670407799635e-06, "loss": 23.6501, "step": 10460 }, { "epoch": 0.19429501945037891, "grad_norm": 34.15625, "learning_rate": 9.969641412013592e-06, "loss": 23.6335, "step": 10470 }, { "epoch": 0.19448059253485875, "grad_norm": 33.75, "learning_rate": 9.969612416227552e-06, "loss": 24.3264, "step": 10480 }, { "epoch": 0.19466616561933858, "grad_norm": 34.46875, "learning_rate": 9.969583420441507e-06, "loss": 23.7675, "step": 10490 }, { "epoch": 0.1948517387038184, "grad_norm": 35.3125, "learning_rate": 9.969554424655465e-06, "loss": 23.1986, "step": 10500 }, { "epoch": 0.19503731178829822, "grad_norm": 33.8125, "learning_rate": 9.969525428869424e-06, "loss": 23.6049, "step": 10510 }, { "epoch": 0.19522288487277806, "grad_norm": 34.34375, "learning_rate": 9.969496433083381e-06, "loss": 23.5584, "step": 10520 }, { "epoch": 0.1954084579572579, "grad_norm": 35.65625, "learning_rate": 9.969467437297339e-06, "loss": 23.4357, "step": 10530 }, { "epoch": 0.1955940310417377, "grad_norm": 32.65625, "learning_rate": 9.969438441511296e-06, "loss": 23.708, "step": 10540 }, { "epoch": 0.19577960412621753, "grad_norm": 34.25, "learning_rate": 9.969409445725253e-06, "loss": 23.0793, "step": 10550 }, { "epoch": 0.19596517721069737, "grad_norm": 35.15625, "learning_rate": 9.96938044993921e-06, "loss": 23.6577, "step": 10560 }, { "epoch": 0.1961507502951772, "grad_norm": 34.03125, "learning_rate": 9.969351454153168e-06, "loss": 23.9597, "step": 10570 }, { "epoch": 0.196336323379657, "grad_norm": 34.15625, "learning_rate": 9.969322458367127e-06, "loss": 23.5803, "step": 10580 }, { "epoch": 0.19652189646413684, "grad_norm": 31.96875, "learning_rate": 9.969293462581085e-06, "loss": 23.3744, "step": 10590 }, { "epoch": 0.19670746954861668, "grad_norm": 34.9375, "learning_rate": 9.96926446679504e-06, "loss": 23.2254, "step": 10600 }, { "epoch": 0.19689304263309648, "grad_norm": 35.9375, "learning_rate": 9.969235471009e-06, "loss": 23.7611, "step": 10610 }, { "epoch": 0.19707861571757632, "grad_norm": 36.28125, "learning_rate": 9.969206475222957e-06, "loss": 23.9757, "step": 10620 }, { "epoch": 0.19726418880205615, "grad_norm": 35.53125, "learning_rate": 9.969177479436914e-06, "loss": 23.0914, "step": 10630 }, { "epoch": 0.19744976188653598, "grad_norm": 37.25, "learning_rate": 9.969148483650872e-06, "loss": 23.5468, "step": 10640 }, { "epoch": 0.1976353349710158, "grad_norm": 33.375, "learning_rate": 9.96911948786483e-06, "loss": 23.446, "step": 10650 }, { "epoch": 0.19782090805549563, "grad_norm": 35.09375, "learning_rate": 9.969090492078787e-06, "loss": 23.2178, "step": 10660 }, { "epoch": 0.19800648113997546, "grad_norm": 36.5, "learning_rate": 9.969061496292744e-06, "loss": 23.965, "step": 10670 }, { "epoch": 0.1981920542244553, "grad_norm": 34.21875, "learning_rate": 9.969032500506703e-06, "loss": 23.7035, "step": 10680 }, { "epoch": 0.1983776273089351, "grad_norm": 33.21875, "learning_rate": 9.96900350472066e-06, "loss": 23.758, "step": 10690 }, { "epoch": 0.19856320039341493, "grad_norm": 34.84375, "learning_rate": 9.968974508934616e-06, "loss": 23.4742, "step": 10700 }, { "epoch": 0.19874877347789477, "grad_norm": 34.46875, "learning_rate": 9.968945513148575e-06, "loss": 23.8078, "step": 10710 }, { "epoch": 0.1989343465623746, "grad_norm": 34.4375, "learning_rate": 9.968916517362533e-06, "loss": 23.9313, "step": 10720 }, { "epoch": 0.1991199196468544, "grad_norm": 34.59375, "learning_rate": 9.96888752157649e-06, "loss": 24.0837, "step": 10730 }, { "epoch": 0.19930549273133424, "grad_norm": 35.1875, "learning_rate": 9.968858525790448e-06, "loss": 23.7037, "step": 10740 }, { "epoch": 0.19949106581581408, "grad_norm": 33.5625, "learning_rate": 9.968829530004407e-06, "loss": 23.7664, "step": 10750 }, { "epoch": 0.1996766389002939, "grad_norm": 33.9375, "learning_rate": 9.968800534218362e-06, "loss": 23.1647, "step": 10760 }, { "epoch": 0.19986221198477372, "grad_norm": 33.8125, "learning_rate": 9.96877153843232e-06, "loss": 23.4258, "step": 10770 }, { "epoch": 0.20004778506925355, "grad_norm": 34.09375, "learning_rate": 9.968742542646279e-06, "loss": 23.2949, "step": 10780 }, { "epoch": 0.2002333581537334, "grad_norm": 35.25, "learning_rate": 9.968713546860236e-06, "loss": 23.2747, "step": 10790 }, { "epoch": 0.20041893123821322, "grad_norm": 34.4375, "learning_rate": 9.968684551074194e-06, "loss": 23.3498, "step": 10800 }, { "epoch": 0.20060450432269303, "grad_norm": 34.6875, "learning_rate": 9.968655555288151e-06, "loss": 23.1578, "step": 10810 }, { "epoch": 0.20079007740717286, "grad_norm": 35.1875, "learning_rate": 9.968626559502108e-06, "loss": 23.6403, "step": 10820 }, { "epoch": 0.2009756504916527, "grad_norm": 38.40625, "learning_rate": 9.968597563716066e-06, "loss": 23.2563, "step": 10830 }, { "epoch": 0.20116122357613253, "grad_norm": 33.375, "learning_rate": 9.968568567930023e-06, "loss": 23.6925, "step": 10840 }, { "epoch": 0.20134679666061234, "grad_norm": 36.0, "learning_rate": 9.96853957214398e-06, "loss": 23.2151, "step": 10850 }, { "epoch": 0.20153236974509217, "grad_norm": 34.09375, "learning_rate": 9.968510576357938e-06, "loss": 23.1649, "step": 10860 }, { "epoch": 0.201717942829572, "grad_norm": 35.3125, "learning_rate": 9.968481580571896e-06, "loss": 23.6179, "step": 10870 }, { "epoch": 0.2019035159140518, "grad_norm": 32.59375, "learning_rate": 9.968452584785855e-06, "loss": 23.1651, "step": 10880 }, { "epoch": 0.20208908899853165, "grad_norm": 35.21875, "learning_rate": 9.968423588999812e-06, "loss": 23.2298, "step": 10890 }, { "epoch": 0.20227466208301148, "grad_norm": 36.4375, "learning_rate": 9.96839459321377e-06, "loss": 23.0713, "step": 10900 }, { "epoch": 0.20246023516749131, "grad_norm": 34.8125, "learning_rate": 9.968365597427727e-06, "loss": 23.2793, "step": 10910 }, { "epoch": 0.20264580825197112, "grad_norm": 34.53125, "learning_rate": 9.968336601641684e-06, "loss": 23.8128, "step": 10920 }, { "epoch": 0.20283138133645096, "grad_norm": 35.59375, "learning_rate": 9.968307605855642e-06, "loss": 23.302, "step": 10930 }, { "epoch": 0.2030169544209308, "grad_norm": 34.5, "learning_rate": 9.968278610069599e-06, "loss": 23.4782, "step": 10940 }, { "epoch": 0.20320252750541062, "grad_norm": 35.875, "learning_rate": 9.968249614283556e-06, "loss": 23.7409, "step": 10950 }, { "epoch": 0.20338810058989043, "grad_norm": 37.84375, "learning_rate": 9.968220618497516e-06, "loss": 23.5444, "step": 10960 }, { "epoch": 0.20357367367437026, "grad_norm": 31.90625, "learning_rate": 9.968191622711471e-06, "loss": 23.3963, "step": 10970 }, { "epoch": 0.2037592467588501, "grad_norm": 34.125, "learning_rate": 9.968162626925429e-06, "loss": 22.8245, "step": 10980 }, { "epoch": 0.20394481984332993, "grad_norm": 33.375, "learning_rate": 9.968133631139388e-06, "loss": 23.3189, "step": 10990 }, { "epoch": 0.20413039292780974, "grad_norm": 35.28125, "learning_rate": 9.968104635353345e-06, "loss": 23.5008, "step": 11000 }, { "epoch": 0.20431596601228957, "grad_norm": 35.75, "learning_rate": 9.968075639567303e-06, "loss": 23.534, "step": 11010 }, { "epoch": 0.2045015390967694, "grad_norm": 32.78125, "learning_rate": 9.96804664378126e-06, "loss": 23.4768, "step": 11020 }, { "epoch": 0.20468711218124924, "grad_norm": 36.46875, "learning_rate": 9.968017647995217e-06, "loss": 23.6789, "step": 11030 }, { "epoch": 0.20487268526572905, "grad_norm": 32.9375, "learning_rate": 9.967988652209175e-06, "loss": 23.4918, "step": 11040 }, { "epoch": 0.20505825835020888, "grad_norm": 35.03125, "learning_rate": 9.967959656423132e-06, "loss": 22.996, "step": 11050 }, { "epoch": 0.20524383143468872, "grad_norm": 34.53125, "learning_rate": 9.967930660637091e-06, "loss": 23.3215, "step": 11060 }, { "epoch": 0.20542940451916855, "grad_norm": 35.0, "learning_rate": 9.967901664851049e-06, "loss": 22.972, "step": 11070 }, { "epoch": 0.20561497760364836, "grad_norm": 36.0, "learning_rate": 9.967872669065004e-06, "loss": 22.7126, "step": 11080 }, { "epoch": 0.2058005506881282, "grad_norm": 35.875, "learning_rate": 9.967843673278964e-06, "loss": 23.8109, "step": 11090 }, { "epoch": 0.20598612377260803, "grad_norm": 32.96875, "learning_rate": 9.967814677492921e-06, "loss": 23.2342, "step": 11100 }, { "epoch": 0.20617169685708786, "grad_norm": 37.65625, "learning_rate": 9.967785681706878e-06, "loss": 23.8215, "step": 11110 }, { "epoch": 0.20635726994156767, "grad_norm": 37.1875, "learning_rate": 9.967756685920836e-06, "loss": 23.4363, "step": 11120 }, { "epoch": 0.2065428430260475, "grad_norm": 34.5, "learning_rate": 9.967727690134793e-06, "loss": 23.4552, "step": 11130 }, { "epoch": 0.20672841611052734, "grad_norm": 34.125, "learning_rate": 9.96769869434875e-06, "loss": 23.5466, "step": 11140 }, { "epoch": 0.20691398919500717, "grad_norm": 34.5625, "learning_rate": 9.967669698562708e-06, "loss": 23.4444, "step": 11150 }, { "epoch": 0.20709956227948698, "grad_norm": 35.28125, "learning_rate": 9.967640702776667e-06, "loss": 23.0774, "step": 11160 }, { "epoch": 0.2072851353639668, "grad_norm": 35.53125, "learning_rate": 9.967611706990624e-06, "loss": 23.1711, "step": 11170 }, { "epoch": 0.20747070844844664, "grad_norm": 33.75, "learning_rate": 9.967582711204582e-06, "loss": 23.2173, "step": 11180 }, { "epoch": 0.20765628153292645, "grad_norm": 35.84375, "learning_rate": 9.96755371541854e-06, "loss": 23.6226, "step": 11190 }, { "epoch": 0.20784185461740629, "grad_norm": 34.96875, "learning_rate": 9.967524719632497e-06, "loss": 22.9892, "step": 11200 }, { "epoch": 0.20802742770188612, "grad_norm": 34.0625, "learning_rate": 9.967495723846454e-06, "loss": 23.2622, "step": 11210 }, { "epoch": 0.20821300078636595, "grad_norm": 34.03125, "learning_rate": 9.967466728060412e-06, "loss": 22.9742, "step": 11220 }, { "epoch": 0.20839857387084576, "grad_norm": 34.84375, "learning_rate": 9.96743773227437e-06, "loss": 23.3746, "step": 11230 }, { "epoch": 0.2085841469553256, "grad_norm": 33.71875, "learning_rate": 9.967408736488326e-06, "loss": 23.0632, "step": 11240 }, { "epoch": 0.20876972003980543, "grad_norm": 36.875, "learning_rate": 9.967379740702284e-06, "loss": 23.3394, "step": 11250 }, { "epoch": 0.20895529312428526, "grad_norm": 34.65625, "learning_rate": 9.967350744916243e-06, "loss": 23.298, "step": 11260 }, { "epoch": 0.20914086620876507, "grad_norm": 36.0, "learning_rate": 9.9673217491302e-06, "loss": 23.015, "step": 11270 }, { "epoch": 0.2093264392932449, "grad_norm": 35.34375, "learning_rate": 9.967292753344158e-06, "loss": 23.2773, "step": 11280 }, { "epoch": 0.20951201237772474, "grad_norm": 34.65625, "learning_rate": 9.967263757558115e-06, "loss": 23.3175, "step": 11290 }, { "epoch": 0.20969758546220457, "grad_norm": 35.96875, "learning_rate": 9.967234761772072e-06, "loss": 23.3183, "step": 11300 }, { "epoch": 0.20988315854668438, "grad_norm": 33.53125, "learning_rate": 9.96720576598603e-06, "loss": 23.4582, "step": 11310 }, { "epoch": 0.2100687316311642, "grad_norm": 34.78125, "learning_rate": 9.967176770199987e-06, "loss": 23.4951, "step": 11320 }, { "epoch": 0.21025430471564405, "grad_norm": 35.09375, "learning_rate": 9.967147774413945e-06, "loss": 23.4455, "step": 11330 }, { "epoch": 0.21043987780012388, "grad_norm": 34.59375, "learning_rate": 9.967118778627904e-06, "loss": 22.9397, "step": 11340 }, { "epoch": 0.2106254508846037, "grad_norm": 37.4375, "learning_rate": 9.96708978284186e-06, "loss": 23.5124, "step": 11350 }, { "epoch": 0.21081102396908352, "grad_norm": 33.75, "learning_rate": 9.967060787055819e-06, "loss": 23.2777, "step": 11360 }, { "epoch": 0.21099659705356336, "grad_norm": 36.375, "learning_rate": 9.967031791269776e-06, "loss": 23.5096, "step": 11370 }, { "epoch": 0.2111821701380432, "grad_norm": 35.40625, "learning_rate": 9.967002795483733e-06, "loss": 23.5407, "step": 11380 }, { "epoch": 0.211367743222523, "grad_norm": 35.375, "learning_rate": 9.96697379969769e-06, "loss": 23.0631, "step": 11390 }, { "epoch": 0.21155331630700283, "grad_norm": 33.71875, "learning_rate": 9.966944803911648e-06, "loss": 23.4863, "step": 11400 }, { "epoch": 0.21173888939148267, "grad_norm": 33.65625, "learning_rate": 9.966915808125606e-06, "loss": 22.7694, "step": 11410 }, { "epoch": 0.2119244624759625, "grad_norm": 33.84375, "learning_rate": 9.966886812339563e-06, "loss": 23.4398, "step": 11420 }, { "epoch": 0.2121100355604423, "grad_norm": 36.125, "learning_rate": 9.96685781655352e-06, "loss": 23.2906, "step": 11430 }, { "epoch": 0.21229560864492214, "grad_norm": 33.3125, "learning_rate": 9.96682882076748e-06, "loss": 23.335, "step": 11440 }, { "epoch": 0.21248118172940197, "grad_norm": 33.4375, "learning_rate": 9.966799824981435e-06, "loss": 23.3474, "step": 11450 }, { "epoch": 0.21266675481388178, "grad_norm": 35.6875, "learning_rate": 9.966770829195393e-06, "loss": 23.5648, "step": 11460 }, { "epoch": 0.21285232789836162, "grad_norm": 36.21875, "learning_rate": 9.966741833409352e-06, "loss": 23.1292, "step": 11470 }, { "epoch": 0.21303790098284145, "grad_norm": 36.78125, "learning_rate": 9.96671283762331e-06, "loss": 23.2989, "step": 11480 }, { "epoch": 0.21322347406732128, "grad_norm": 35.59375, "learning_rate": 9.966683841837267e-06, "loss": 23.4273, "step": 11490 }, { "epoch": 0.2134090471518011, "grad_norm": 35.71875, "learning_rate": 9.966654846051224e-06, "loss": 23.0285, "step": 11500 }, { "epoch": 0.21359462023628092, "grad_norm": 34.375, "learning_rate": 9.966625850265181e-06, "loss": 23.4227, "step": 11510 }, { "epoch": 0.21378019332076076, "grad_norm": 33.59375, "learning_rate": 9.966596854479139e-06, "loss": 23.3722, "step": 11520 }, { "epoch": 0.2139657664052406, "grad_norm": 34.65625, "learning_rate": 9.966567858693096e-06, "loss": 23.19, "step": 11530 }, { "epoch": 0.2141513394897204, "grad_norm": 35.375, "learning_rate": 9.966538862907055e-06, "loss": 23.5036, "step": 11540 }, { "epoch": 0.21433691257420023, "grad_norm": 34.3125, "learning_rate": 9.966509867121013e-06, "loss": 23.3387, "step": 11550 }, { "epoch": 0.21452248565868007, "grad_norm": 34.0625, "learning_rate": 9.966480871334968e-06, "loss": 23.121, "step": 11560 }, { "epoch": 0.2147080587431599, "grad_norm": 35.71875, "learning_rate": 9.966451875548928e-06, "loss": 23.4267, "step": 11570 }, { "epoch": 0.2148936318276397, "grad_norm": 36.71875, "learning_rate": 9.966422879762885e-06, "loss": 23.2037, "step": 11580 }, { "epoch": 0.21507920491211954, "grad_norm": 34.3125, "learning_rate": 9.966393883976842e-06, "loss": 23.6055, "step": 11590 }, { "epoch": 0.21526477799659938, "grad_norm": 33.5, "learning_rate": 9.9663648881908e-06, "loss": 23.0454, "step": 11600 }, { "epoch": 0.2154503510810792, "grad_norm": 35.0625, "learning_rate": 9.966335892404759e-06, "loss": 22.7495, "step": 11610 }, { "epoch": 0.21563592416555902, "grad_norm": 34.9375, "learning_rate": 9.966306896618715e-06, "loss": 23.8042, "step": 11620 }, { "epoch": 0.21582149725003885, "grad_norm": 37.03125, "learning_rate": 9.966277900832672e-06, "loss": 23.4072, "step": 11630 }, { "epoch": 0.21600707033451869, "grad_norm": 37.09375, "learning_rate": 9.966248905046631e-06, "loss": 23.2861, "step": 11640 }, { "epoch": 0.21619264341899852, "grad_norm": 34.40625, "learning_rate": 9.966219909260589e-06, "loss": 23.3566, "step": 11650 }, { "epoch": 0.21637821650347833, "grad_norm": 33.875, "learning_rate": 9.966190913474546e-06, "loss": 22.9984, "step": 11660 }, { "epoch": 0.21656378958795816, "grad_norm": 34.09375, "learning_rate": 9.966161917688503e-06, "loss": 23.0596, "step": 11670 }, { "epoch": 0.216749362672438, "grad_norm": 35.46875, "learning_rate": 9.96613292190246e-06, "loss": 23.5079, "step": 11680 }, { "epoch": 0.21693493575691783, "grad_norm": 34.96875, "learning_rate": 9.966103926116418e-06, "loss": 22.9615, "step": 11690 }, { "epoch": 0.21712050884139764, "grad_norm": 35.65625, "learning_rate": 9.966074930330376e-06, "loss": 23.3229, "step": 11700 }, { "epoch": 0.21730608192587747, "grad_norm": 33.46875, "learning_rate": 9.966045934544335e-06, "loss": 23.2428, "step": 11710 }, { "epoch": 0.2174916550103573, "grad_norm": 33.625, "learning_rate": 9.96601693875829e-06, "loss": 23.3109, "step": 11720 }, { "epoch": 0.21767722809483714, "grad_norm": 35.34375, "learning_rate": 9.965987942972248e-06, "loss": 23.313, "step": 11730 }, { "epoch": 0.21786280117931694, "grad_norm": 34.8125, "learning_rate": 9.965958947186207e-06, "loss": 22.9677, "step": 11740 }, { "epoch": 0.21804837426379678, "grad_norm": 34.46875, "learning_rate": 9.965929951400164e-06, "loss": 23.4194, "step": 11750 }, { "epoch": 0.2182339473482766, "grad_norm": 35.875, "learning_rate": 9.965900955614122e-06, "loss": 23.2412, "step": 11760 }, { "epoch": 0.21841952043275642, "grad_norm": 34.96875, "learning_rate": 9.965871959828079e-06, "loss": 23.1838, "step": 11770 }, { "epoch": 0.21860509351723625, "grad_norm": 36.4375, "learning_rate": 9.965842964042036e-06, "loss": 23.4135, "step": 11780 }, { "epoch": 0.2187906666017161, "grad_norm": 35.21875, "learning_rate": 9.965813968255994e-06, "loss": 23.0506, "step": 11790 }, { "epoch": 0.21897623968619592, "grad_norm": 34.65625, "learning_rate": 9.965784972469951e-06, "loss": 23.2738, "step": 11800 }, { "epoch": 0.21916181277067573, "grad_norm": 36.21875, "learning_rate": 9.96575597668391e-06, "loss": 23.1185, "step": 11810 }, { "epoch": 0.21934738585515556, "grad_norm": 36.46875, "learning_rate": 9.965726980897868e-06, "loss": 23.2145, "step": 11820 }, { "epoch": 0.2195329589396354, "grad_norm": 33.96875, "learning_rate": 9.965697985111824e-06, "loss": 23.4867, "step": 11830 }, { "epoch": 0.21971853202411523, "grad_norm": 36.375, "learning_rate": 9.965668989325783e-06, "loss": 22.9346, "step": 11840 }, { "epoch": 0.21990410510859504, "grad_norm": 34.34375, "learning_rate": 9.96563999353974e-06, "loss": 23.1345, "step": 11850 }, { "epoch": 0.22008967819307487, "grad_norm": 36.34375, "learning_rate": 9.965610997753697e-06, "loss": 23.0301, "step": 11860 }, { "epoch": 0.2202752512775547, "grad_norm": 35.28125, "learning_rate": 9.965582001967655e-06, "loss": 22.9159, "step": 11870 }, { "epoch": 0.22046082436203454, "grad_norm": 33.9375, "learning_rate": 9.965553006181612e-06, "loss": 23.1306, "step": 11880 }, { "epoch": 0.22064639744651435, "grad_norm": 33.9375, "learning_rate": 9.96552401039557e-06, "loss": 23.0807, "step": 11890 }, { "epoch": 0.22083197053099418, "grad_norm": 37.5625, "learning_rate": 9.965495014609527e-06, "loss": 23.1038, "step": 11900 }, { "epoch": 0.22101754361547402, "grad_norm": 36.21875, "learning_rate": 9.965466018823484e-06, "loss": 23.5351, "step": 11910 }, { "epoch": 0.22120311669995385, "grad_norm": 35.59375, "learning_rate": 9.965437023037444e-06, "loss": 23.2319, "step": 11920 }, { "epoch": 0.22138868978443366, "grad_norm": 33.125, "learning_rate": 9.965408027251401e-06, "loss": 23.1569, "step": 11930 }, { "epoch": 0.2215742628689135, "grad_norm": 35.40625, "learning_rate": 9.965379031465358e-06, "loss": 23.5166, "step": 11940 }, { "epoch": 0.22175983595339332, "grad_norm": 35.96875, "learning_rate": 9.965350035679316e-06, "loss": 23.0607, "step": 11950 }, { "epoch": 0.22194540903787316, "grad_norm": 35.0, "learning_rate": 9.965321039893273e-06, "loss": 23.1148, "step": 11960 }, { "epoch": 0.22213098212235297, "grad_norm": 36.34375, "learning_rate": 9.96529204410723e-06, "loss": 22.8368, "step": 11970 }, { "epoch": 0.2223165552068328, "grad_norm": 35.40625, "learning_rate": 9.965263048321188e-06, "loss": 22.4958, "step": 11980 }, { "epoch": 0.22250212829131263, "grad_norm": 35.96875, "learning_rate": 9.965234052535145e-06, "loss": 22.882, "step": 11990 }, { "epoch": 0.22268770137579247, "grad_norm": 37.53125, "learning_rate": 9.965205056749103e-06, "loss": 23.2451, "step": 12000 }, { "epoch": 0.22287327446027227, "grad_norm": 36.03125, "learning_rate": 9.96517606096306e-06, "loss": 22.7075, "step": 12010 }, { "epoch": 0.2230588475447521, "grad_norm": 34.5, "learning_rate": 9.96514706517702e-06, "loss": 22.9387, "step": 12020 }, { "epoch": 0.22324442062923194, "grad_norm": 34.9375, "learning_rate": 9.965118069390977e-06, "loss": 23.5015, "step": 12030 }, { "epoch": 0.22342999371371175, "grad_norm": 36.375, "learning_rate": 9.965089073604932e-06, "loss": 23.2239, "step": 12040 }, { "epoch": 0.22361556679819158, "grad_norm": 33.78125, "learning_rate": 9.965060077818892e-06, "loss": 22.8462, "step": 12050 }, { "epoch": 0.22380113988267142, "grad_norm": 35.96875, "learning_rate": 9.965031082032849e-06, "loss": 23.1589, "step": 12060 }, { "epoch": 0.22398671296715125, "grad_norm": 33.875, "learning_rate": 9.965002086246806e-06, "loss": 23.3079, "step": 12070 }, { "epoch": 0.22417228605163106, "grad_norm": 35.625, "learning_rate": 9.964973090460764e-06, "loss": 22.9087, "step": 12080 }, { "epoch": 0.2243578591361109, "grad_norm": 33.1875, "learning_rate": 9.964944094674723e-06, "loss": 23.1929, "step": 12090 }, { "epoch": 0.22454343222059073, "grad_norm": 36.125, "learning_rate": 9.964915098888679e-06, "loss": 22.967, "step": 12100 }, { "epoch": 0.22472900530507056, "grad_norm": 35.03125, "learning_rate": 9.964886103102636e-06, "loss": 22.9228, "step": 12110 }, { "epoch": 0.22491457838955037, "grad_norm": 35.03125, "learning_rate": 9.964857107316595e-06, "loss": 22.9758, "step": 12120 }, { "epoch": 0.2251001514740302, "grad_norm": 34.0625, "learning_rate": 9.964828111530553e-06, "loss": 22.2338, "step": 12130 }, { "epoch": 0.22528572455851004, "grad_norm": 36.09375, "learning_rate": 9.96479911574451e-06, "loss": 23.1649, "step": 12140 }, { "epoch": 0.22547129764298987, "grad_norm": 35.5625, "learning_rate": 9.964770119958467e-06, "loss": 23.1733, "step": 12150 }, { "epoch": 0.22565687072746968, "grad_norm": 36.53125, "learning_rate": 9.964741124172425e-06, "loss": 22.8313, "step": 12160 }, { "epoch": 0.2258424438119495, "grad_norm": 36.40625, "learning_rate": 9.964712128386382e-06, "loss": 22.7984, "step": 12170 }, { "epoch": 0.22602801689642935, "grad_norm": 34.4375, "learning_rate": 9.96468313260034e-06, "loss": 23.1878, "step": 12180 }, { "epoch": 0.22621358998090918, "grad_norm": 36.25, "learning_rate": 9.964654136814299e-06, "loss": 23.0327, "step": 12190 }, { "epoch": 0.226399163065389, "grad_norm": 36.09375, "learning_rate": 9.964625141028256e-06, "loss": 22.7311, "step": 12200 }, { "epoch": 0.22658473614986882, "grad_norm": 34.15625, "learning_rate": 9.964596145242212e-06, "loss": 22.8199, "step": 12210 }, { "epoch": 0.22677030923434865, "grad_norm": 33.9375, "learning_rate": 9.964567149456171e-06, "loss": 22.6879, "step": 12220 }, { "epoch": 0.2269558823188285, "grad_norm": 35.3125, "learning_rate": 9.964538153670128e-06, "loss": 23.2458, "step": 12230 }, { "epoch": 0.2271414554033083, "grad_norm": 32.4375, "learning_rate": 9.964509157884086e-06, "loss": 22.7513, "step": 12240 }, { "epoch": 0.22732702848778813, "grad_norm": 34.5, "learning_rate": 9.964480162098043e-06, "loss": 23.2389, "step": 12250 }, { "epoch": 0.22751260157226796, "grad_norm": 37.0625, "learning_rate": 9.964451166312e-06, "loss": 23.358, "step": 12260 }, { "epoch": 0.2276981746567478, "grad_norm": 36.75, "learning_rate": 9.964422170525958e-06, "loss": 23.0215, "step": 12270 }, { "epoch": 0.2278837477412276, "grad_norm": 34.4375, "learning_rate": 9.964393174739915e-06, "loss": 22.7708, "step": 12280 }, { "epoch": 0.22806932082570744, "grad_norm": 35.46875, "learning_rate": 9.964364178953874e-06, "loss": 23.462, "step": 12290 }, { "epoch": 0.22825489391018727, "grad_norm": 35.28125, "learning_rate": 9.964335183167832e-06, "loss": 23.129, "step": 12300 }, { "epoch": 0.22844046699466708, "grad_norm": 34.65625, "learning_rate": 9.964306187381788e-06, "loss": 23.0191, "step": 12310 }, { "epoch": 0.2286260400791469, "grad_norm": 36.09375, "learning_rate": 9.964277191595747e-06, "loss": 23.0469, "step": 12320 }, { "epoch": 0.22881161316362675, "grad_norm": 33.53125, "learning_rate": 9.964248195809704e-06, "loss": 22.6963, "step": 12330 }, { "epoch": 0.22899718624810658, "grad_norm": 35.15625, "learning_rate": 9.964219200023661e-06, "loss": 22.5734, "step": 12340 }, { "epoch": 0.2291827593325864, "grad_norm": 35.0625, "learning_rate": 9.964190204237619e-06, "loss": 22.6649, "step": 12350 }, { "epoch": 0.22936833241706622, "grad_norm": 33.59375, "learning_rate": 9.964161208451576e-06, "loss": 23.0639, "step": 12360 }, { "epoch": 0.22955390550154606, "grad_norm": 34.8125, "learning_rate": 9.964132212665534e-06, "loss": 23.0662, "step": 12370 }, { "epoch": 0.2297394785860259, "grad_norm": 34.71875, "learning_rate": 9.964103216879491e-06, "loss": 22.8609, "step": 12380 }, { "epoch": 0.2299250516705057, "grad_norm": 36.8125, "learning_rate": 9.96407422109345e-06, "loss": 23.0976, "step": 12390 }, { "epoch": 0.23011062475498553, "grad_norm": 34.75, "learning_rate": 9.964045225307408e-06, "loss": 22.8197, "step": 12400 }, { "epoch": 0.23029619783946537, "grad_norm": 35.5625, "learning_rate": 9.964016229521365e-06, "loss": 23.1144, "step": 12410 }, { "epoch": 0.2304817709239452, "grad_norm": 33.1875, "learning_rate": 9.963987233735322e-06, "loss": 22.8939, "step": 12420 }, { "epoch": 0.230667344008425, "grad_norm": 37.0, "learning_rate": 9.96395823794928e-06, "loss": 23.3261, "step": 12430 }, { "epoch": 0.23085291709290484, "grad_norm": 35.0, "learning_rate": 9.963929242163237e-06, "loss": 22.6032, "step": 12440 }, { "epoch": 0.23103849017738468, "grad_norm": 36.28125, "learning_rate": 9.963900246377195e-06, "loss": 22.7235, "step": 12450 }, { "epoch": 0.2312240632618645, "grad_norm": 34.75, "learning_rate": 9.963871250591152e-06, "loss": 22.9642, "step": 12460 }, { "epoch": 0.23140963634634432, "grad_norm": 34.78125, "learning_rate": 9.96384225480511e-06, "loss": 22.6095, "step": 12470 }, { "epoch": 0.23159520943082415, "grad_norm": 35.3125, "learning_rate": 9.963813259019067e-06, "loss": 22.7798, "step": 12480 }, { "epoch": 0.23178078251530398, "grad_norm": 37.90625, "learning_rate": 9.963784263233024e-06, "loss": 22.798, "step": 12490 }, { "epoch": 0.23196635559978382, "grad_norm": 36.6875, "learning_rate": 9.963755267446983e-06, "loss": 23.0762, "step": 12500 }, { "epoch": 0.23215192868426363, "grad_norm": 32.65625, "learning_rate": 9.96372627166094e-06, "loss": 22.8446, "step": 12510 }, { "epoch": 0.23233750176874346, "grad_norm": 35.21875, "learning_rate": 9.963697275874898e-06, "loss": 22.7945, "step": 12520 }, { "epoch": 0.2325230748532233, "grad_norm": 34.40625, "learning_rate": 9.963668280088856e-06, "loss": 22.8364, "step": 12530 }, { "epoch": 0.23270864793770313, "grad_norm": 34.96875, "learning_rate": 9.963639284302813e-06, "loss": 23.0125, "step": 12540 }, { "epoch": 0.23289422102218293, "grad_norm": 33.53125, "learning_rate": 9.96361028851677e-06, "loss": 22.2002, "step": 12550 }, { "epoch": 0.23307979410666277, "grad_norm": 33.90625, "learning_rate": 9.963581292730728e-06, "loss": 23.0257, "step": 12560 }, { "epoch": 0.2332653671911426, "grad_norm": 34.6875, "learning_rate": 9.963552296944687e-06, "loss": 22.9528, "step": 12570 }, { "epoch": 0.23345094027562244, "grad_norm": 34.0, "learning_rate": 9.963523301158643e-06, "loss": 22.9254, "step": 12580 }, { "epoch": 0.23363651336010224, "grad_norm": 38.78125, "learning_rate": 9.9634943053726e-06, "loss": 23.0386, "step": 12590 }, { "epoch": 0.23382208644458208, "grad_norm": 35.5, "learning_rate": 9.963465309586559e-06, "loss": 22.6576, "step": 12600 }, { "epoch": 0.2340076595290619, "grad_norm": 33.375, "learning_rate": 9.963436313800517e-06, "loss": 22.8264, "step": 12610 }, { "epoch": 0.23419323261354172, "grad_norm": 34.40625, "learning_rate": 9.963407318014474e-06, "loss": 22.7533, "step": 12620 }, { "epoch": 0.23437880569802155, "grad_norm": 36.84375, "learning_rate": 9.963378322228431e-06, "loss": 23.3032, "step": 12630 }, { "epoch": 0.2345643787825014, "grad_norm": 32.5, "learning_rate": 9.963349326442389e-06, "loss": 23.052, "step": 12640 }, { "epoch": 0.23474995186698122, "grad_norm": 33.28125, "learning_rate": 9.963320330656346e-06, "loss": 22.4562, "step": 12650 }, { "epoch": 0.23493552495146103, "grad_norm": 35.65625, "learning_rate": 9.963291334870304e-06, "loss": 22.9898, "step": 12660 }, { "epoch": 0.23512109803594086, "grad_norm": 33.625, "learning_rate": 9.963262339084263e-06, "loss": 22.7312, "step": 12670 }, { "epoch": 0.2353066711204207, "grad_norm": 33.09375, "learning_rate": 9.96323334329822e-06, "loss": 22.2514, "step": 12680 }, { "epoch": 0.23549224420490053, "grad_norm": 35.40625, "learning_rate": 9.963204347512176e-06, "loss": 22.7423, "step": 12690 }, { "epoch": 0.23567781728938034, "grad_norm": 33.3125, "learning_rate": 9.963175351726135e-06, "loss": 22.7989, "step": 12700 }, { "epoch": 0.23586339037386017, "grad_norm": 34.15625, "learning_rate": 9.963146355940092e-06, "loss": 23.4889, "step": 12710 }, { "epoch": 0.23604896345834, "grad_norm": 33.84375, "learning_rate": 9.96311736015405e-06, "loss": 22.6084, "step": 12720 }, { "epoch": 0.23623453654281984, "grad_norm": 35.65625, "learning_rate": 9.963088364368007e-06, "loss": 22.9018, "step": 12730 }, { "epoch": 0.23642010962729965, "grad_norm": 35.53125, "learning_rate": 9.963059368581965e-06, "loss": 22.4362, "step": 12740 }, { "epoch": 0.23660568271177948, "grad_norm": 36.25, "learning_rate": 9.963030372795922e-06, "loss": 22.874, "step": 12750 }, { "epoch": 0.23679125579625931, "grad_norm": 33.5, "learning_rate": 9.96300137700988e-06, "loss": 22.7573, "step": 12760 }, { "epoch": 0.23697682888073915, "grad_norm": 36.46875, "learning_rate": 9.962972381223838e-06, "loss": 22.9307, "step": 12770 }, { "epoch": 0.23716240196521896, "grad_norm": 35.65625, "learning_rate": 9.962943385437796e-06, "loss": 22.9256, "step": 12780 }, { "epoch": 0.2373479750496988, "grad_norm": 33.46875, "learning_rate": 9.962914389651752e-06, "loss": 22.8426, "step": 12790 }, { "epoch": 0.23753354813417862, "grad_norm": 36.28125, "learning_rate": 9.96288539386571e-06, "loss": 23.1097, "step": 12800 }, { "epoch": 0.23771912121865846, "grad_norm": 33.25, "learning_rate": 9.962856398079668e-06, "loss": 23.2849, "step": 12810 }, { "epoch": 0.23790469430313826, "grad_norm": 33.65625, "learning_rate": 9.962827402293625e-06, "loss": 22.7846, "step": 12820 }, { "epoch": 0.2380902673876181, "grad_norm": 34.6875, "learning_rate": 9.962798406507583e-06, "loss": 22.387, "step": 12830 }, { "epoch": 0.23827584047209793, "grad_norm": 37.28125, "learning_rate": 9.962769410721542e-06, "loss": 22.7212, "step": 12840 }, { "epoch": 0.23846141355657777, "grad_norm": 37.75, "learning_rate": 9.962740414935498e-06, "loss": 22.644, "step": 12850 }, { "epoch": 0.23864698664105757, "grad_norm": 37.0625, "learning_rate": 9.962711419149455e-06, "loss": 22.7497, "step": 12860 }, { "epoch": 0.2388325597255374, "grad_norm": 36.5625, "learning_rate": 9.962682423363414e-06, "loss": 22.9721, "step": 12870 }, { "epoch": 0.23901813281001724, "grad_norm": 33.46875, "learning_rate": 9.962653427577372e-06, "loss": 22.7019, "step": 12880 }, { "epoch": 0.23920370589449705, "grad_norm": 34.75, "learning_rate": 9.962624431791329e-06, "loss": 22.5389, "step": 12890 }, { "epoch": 0.23938927897897688, "grad_norm": 35.09375, "learning_rate": 9.962595436005286e-06, "loss": 22.7755, "step": 12900 }, { "epoch": 0.23957485206345672, "grad_norm": 35.6875, "learning_rate": 9.962566440219244e-06, "loss": 22.3278, "step": 12910 }, { "epoch": 0.23976042514793655, "grad_norm": 34.75, "learning_rate": 9.962537444433201e-06, "loss": 22.8705, "step": 12920 }, { "epoch": 0.23994599823241636, "grad_norm": 35.09375, "learning_rate": 9.962508448647159e-06, "loss": 22.7376, "step": 12930 }, { "epoch": 0.2401315713168962, "grad_norm": 35.5, "learning_rate": 9.962479452861116e-06, "loss": 22.7123, "step": 12940 }, { "epoch": 0.24031714440137603, "grad_norm": 32.1875, "learning_rate": 9.962450457075075e-06, "loss": 22.9982, "step": 12950 }, { "epoch": 0.24050271748585586, "grad_norm": 33.9375, "learning_rate": 9.962421461289031e-06, "loss": 22.7566, "step": 12960 }, { "epoch": 0.24068829057033567, "grad_norm": 34.15625, "learning_rate": 9.962392465502988e-06, "loss": 22.563, "step": 12970 }, { "epoch": 0.2408738636548155, "grad_norm": 33.65625, "learning_rate": 9.962363469716947e-06, "loss": 22.8709, "step": 12980 }, { "epoch": 0.24105943673929534, "grad_norm": 34.5625, "learning_rate": 9.962334473930905e-06, "loss": 22.8837, "step": 12990 }, { "epoch": 0.24124500982377517, "grad_norm": 36.375, "learning_rate": 9.962305478144862e-06, "loss": 22.7508, "step": 13000 }, { "epoch": 0.24143058290825498, "grad_norm": 34.0, "learning_rate": 9.96227648235882e-06, "loss": 23.1134, "step": 13010 }, { "epoch": 0.2416161559927348, "grad_norm": 35.25, "learning_rate": 9.962247486572777e-06, "loss": 22.9102, "step": 13020 }, { "epoch": 0.24180172907721464, "grad_norm": 34.875, "learning_rate": 9.962218490786734e-06, "loss": 22.9471, "step": 13030 }, { "epoch": 0.24198730216169448, "grad_norm": 34.53125, "learning_rate": 9.962189495000692e-06, "loss": 22.7062, "step": 13040 }, { "epoch": 0.24217287524617429, "grad_norm": 37.625, "learning_rate": 9.962160499214651e-06, "loss": 22.6628, "step": 13050 }, { "epoch": 0.24235844833065412, "grad_norm": 36.15625, "learning_rate": 9.962131503428607e-06, "loss": 22.793, "step": 13060 }, { "epoch": 0.24254402141513395, "grad_norm": 34.8125, "learning_rate": 9.962102507642564e-06, "loss": 22.6144, "step": 13070 }, { "epoch": 0.2427295944996138, "grad_norm": 35.21875, "learning_rate": 9.962073511856523e-06, "loss": 22.596, "step": 13080 }, { "epoch": 0.2429151675840936, "grad_norm": 34.9375, "learning_rate": 9.96204451607048e-06, "loss": 22.6484, "step": 13090 }, { "epoch": 0.24310074066857343, "grad_norm": 36.0, "learning_rate": 9.962015520284438e-06, "loss": 22.9452, "step": 13100 }, { "epoch": 0.24328631375305326, "grad_norm": 35.53125, "learning_rate": 9.961986524498395e-06, "loss": 23.1317, "step": 13110 }, { "epoch": 0.2434718868375331, "grad_norm": 36.03125, "learning_rate": 9.961957528712353e-06, "loss": 22.8217, "step": 13120 }, { "epoch": 0.2436574599220129, "grad_norm": 34.9375, "learning_rate": 9.96192853292631e-06, "loss": 22.6672, "step": 13130 }, { "epoch": 0.24384303300649274, "grad_norm": 36.65625, "learning_rate": 9.961899537140268e-06, "loss": 22.8079, "step": 13140 }, { "epoch": 0.24402860609097257, "grad_norm": 35.65625, "learning_rate": 9.961870541354227e-06, "loss": 22.5951, "step": 13150 }, { "epoch": 0.2442141791754524, "grad_norm": 34.875, "learning_rate": 9.961841545568184e-06, "loss": 22.7453, "step": 13160 }, { "epoch": 0.2443997522599322, "grad_norm": 35.03125, "learning_rate": 9.96181254978214e-06, "loss": 22.7423, "step": 13170 }, { "epoch": 0.24458532534441205, "grad_norm": 35.71875, "learning_rate": 9.961783553996099e-06, "loss": 22.5886, "step": 13180 }, { "epoch": 0.24477089842889188, "grad_norm": 37.09375, "learning_rate": 9.961754558210056e-06, "loss": 22.8475, "step": 13190 }, { "epoch": 0.2449564715133717, "grad_norm": 34.03125, "learning_rate": 9.961725562424014e-06, "loss": 22.7532, "step": 13200 }, { "epoch": 0.24514204459785152, "grad_norm": 35.84375, "learning_rate": 9.961696566637971e-06, "loss": 23.1457, "step": 13210 }, { "epoch": 0.24532761768233136, "grad_norm": 35.78125, "learning_rate": 9.961667570851929e-06, "loss": 22.8149, "step": 13220 }, { "epoch": 0.2455131907668112, "grad_norm": 35.8125, "learning_rate": 9.961638575065886e-06, "loss": 22.6625, "step": 13230 }, { "epoch": 0.245698763851291, "grad_norm": 34.5625, "learning_rate": 9.961609579279843e-06, "loss": 22.3682, "step": 13240 }, { "epoch": 0.24588433693577083, "grad_norm": 35.03125, "learning_rate": 9.961580583493802e-06, "loss": 22.7562, "step": 13250 }, { "epoch": 0.24606991002025067, "grad_norm": 36.03125, "learning_rate": 9.96155158770776e-06, "loss": 23.0357, "step": 13260 }, { "epoch": 0.2462554831047305, "grad_norm": 35.5625, "learning_rate": 9.961522591921717e-06, "loss": 22.7249, "step": 13270 }, { "epoch": 0.2464410561892103, "grad_norm": 34.25, "learning_rate": 9.961493596135675e-06, "loss": 22.897, "step": 13280 }, { "epoch": 0.24662662927369014, "grad_norm": 34.0, "learning_rate": 9.961464600349632e-06, "loss": 22.0965, "step": 13290 }, { "epoch": 0.24681220235816997, "grad_norm": 35.21875, "learning_rate": 9.96143560456359e-06, "loss": 22.5676, "step": 13300 }, { "epoch": 0.2469977754426498, "grad_norm": 34.28125, "learning_rate": 9.961406608777547e-06, "loss": 23.0798, "step": 13310 }, { "epoch": 0.24718334852712961, "grad_norm": 33.34375, "learning_rate": 9.961377612991506e-06, "loss": 22.7305, "step": 13320 }, { "epoch": 0.24736892161160945, "grad_norm": 34.09375, "learning_rate": 9.961348617205462e-06, "loss": 22.8526, "step": 13330 }, { "epoch": 0.24755449469608928, "grad_norm": 35.9375, "learning_rate": 9.961319621419419e-06, "loss": 22.5833, "step": 13340 }, { "epoch": 0.24774006778056912, "grad_norm": 34.46875, "learning_rate": 9.961290625633378e-06, "loss": 22.9855, "step": 13350 }, { "epoch": 0.24792564086504892, "grad_norm": 35.375, "learning_rate": 9.961261629847336e-06, "loss": 22.8834, "step": 13360 }, { "epoch": 0.24811121394952876, "grad_norm": 34.59375, "learning_rate": 9.961232634061293e-06, "loss": 22.9293, "step": 13370 }, { "epoch": 0.2482967870340086, "grad_norm": 33.9375, "learning_rate": 9.96120363827525e-06, "loss": 22.2887, "step": 13380 }, { "epoch": 0.24848236011848843, "grad_norm": 33.5625, "learning_rate": 9.961174642489208e-06, "loss": 23.0661, "step": 13390 }, { "epoch": 0.24866793320296823, "grad_norm": 36.125, "learning_rate": 9.961145646703165e-06, "loss": 23.0035, "step": 13400 }, { "epoch": 0.24885350628744807, "grad_norm": 36.875, "learning_rate": 9.961116650917123e-06, "loss": 23.2064, "step": 13410 }, { "epoch": 0.2490390793719279, "grad_norm": 34.625, "learning_rate": 9.96108765513108e-06, "loss": 22.6221, "step": 13420 }, { "epoch": 0.24922465245640774, "grad_norm": 35.21875, "learning_rate": 9.96105865934504e-06, "loss": 22.5216, "step": 13430 }, { "epoch": 0.24941022554088754, "grad_norm": 35.34375, "learning_rate": 9.961029663558995e-06, "loss": 22.5853, "step": 13440 }, { "epoch": 0.24959579862536738, "grad_norm": 36.25, "learning_rate": 9.961000667772954e-06, "loss": 22.6705, "step": 13450 }, { "epoch": 0.2497813717098472, "grad_norm": 33.9375, "learning_rate": 9.960971671986911e-06, "loss": 22.7321, "step": 13460 }, { "epoch": 0.24996694479432702, "grad_norm": 35.4375, "learning_rate": 9.960942676200869e-06, "loss": 22.6743, "step": 13470 }, { "epoch": 0.2501525178788069, "grad_norm": 33.53125, "learning_rate": 9.960913680414826e-06, "loss": 22.8717, "step": 13480 }, { "epoch": 0.2503380909632867, "grad_norm": 35.59375, "learning_rate": 9.960884684628784e-06, "loss": 22.5434, "step": 13490 }, { "epoch": 0.2505236640477665, "grad_norm": 35.40625, "learning_rate": 9.960855688842741e-06, "loss": 22.4418, "step": 13500 }, { "epoch": 0.25070923713224635, "grad_norm": 34.375, "learning_rate": 9.960826693056698e-06, "loss": 22.7085, "step": 13510 }, { "epoch": 0.25089481021672616, "grad_norm": 34.71875, "learning_rate": 9.960797697270656e-06, "loss": 22.6424, "step": 13520 }, { "epoch": 0.25108038330120597, "grad_norm": 34.46875, "learning_rate": 9.960768701484615e-06, "loss": 22.6786, "step": 13530 }, { "epoch": 0.25126595638568583, "grad_norm": 35.21875, "learning_rate": 9.960739705698572e-06, "loss": 22.7157, "step": 13540 }, { "epoch": 0.25145152947016564, "grad_norm": 34.84375, "learning_rate": 9.960710709912528e-06, "loss": 22.7138, "step": 13550 }, { "epoch": 0.2516371025546455, "grad_norm": 34.625, "learning_rate": 9.960681714126487e-06, "loss": 22.8282, "step": 13560 }, { "epoch": 0.2518226756391253, "grad_norm": 33.28125, "learning_rate": 9.960652718340445e-06, "loss": 22.7318, "step": 13570 }, { "epoch": 0.2520082487236051, "grad_norm": 35.59375, "learning_rate": 9.960623722554402e-06, "loss": 22.5847, "step": 13580 }, { "epoch": 0.252193821808085, "grad_norm": 35.0, "learning_rate": 9.96059472676836e-06, "loss": 22.6681, "step": 13590 }, { "epoch": 0.2523793948925648, "grad_norm": 35.8125, "learning_rate": 9.960565730982317e-06, "loss": 22.6368, "step": 13600 }, { "epoch": 0.2525649679770446, "grad_norm": 36.3125, "learning_rate": 9.960536735196274e-06, "loss": 22.5425, "step": 13610 }, { "epoch": 0.25275054106152445, "grad_norm": 34.78125, "learning_rate": 9.960507739410232e-06, "loss": 23.2371, "step": 13620 }, { "epoch": 0.25293611414600425, "grad_norm": 33.96875, "learning_rate": 9.96047874362419e-06, "loss": 22.8813, "step": 13630 }, { "epoch": 0.2531216872304841, "grad_norm": 34.65625, "learning_rate": 9.960449747838148e-06, "loss": 22.4802, "step": 13640 }, { "epoch": 0.2533072603149639, "grad_norm": 36.0625, "learning_rate": 9.960420752052104e-06, "loss": 22.7154, "step": 13650 }, { "epoch": 0.25349283339944373, "grad_norm": 35.21875, "learning_rate": 9.960391756266063e-06, "loss": 22.3837, "step": 13660 }, { "epoch": 0.2536784064839236, "grad_norm": 35.625, "learning_rate": 9.96036276048002e-06, "loss": 22.4617, "step": 13670 }, { "epoch": 0.2538639795684034, "grad_norm": 34.0, "learning_rate": 9.960333764693978e-06, "loss": 22.7565, "step": 13680 }, { "epoch": 0.2540495526528832, "grad_norm": 34.90625, "learning_rate": 9.960304768907935e-06, "loss": 22.7332, "step": 13690 }, { "epoch": 0.25423512573736307, "grad_norm": 34.28125, "learning_rate": 9.960275773121894e-06, "loss": 22.6901, "step": 13700 }, { "epoch": 0.25442069882184287, "grad_norm": 33.8125, "learning_rate": 9.96024677733585e-06, "loss": 22.8362, "step": 13710 }, { "epoch": 0.25460627190632273, "grad_norm": 33.78125, "learning_rate": 9.960217781549807e-06, "loss": 22.5693, "step": 13720 }, { "epoch": 0.25479184499080254, "grad_norm": 34.3125, "learning_rate": 9.960188785763766e-06, "loss": 22.2942, "step": 13730 }, { "epoch": 0.25497741807528235, "grad_norm": 35.375, "learning_rate": 9.960159789977724e-06, "loss": 23.0086, "step": 13740 }, { "epoch": 0.2551629911597622, "grad_norm": 35.90625, "learning_rate": 9.960130794191681e-06, "loss": 22.6488, "step": 13750 }, { "epoch": 0.255348564244242, "grad_norm": 35.59375, "learning_rate": 9.960101798405639e-06, "loss": 23.0556, "step": 13760 }, { "epoch": 0.2555341373287218, "grad_norm": 34.8125, "learning_rate": 9.960072802619596e-06, "loss": 22.7591, "step": 13770 }, { "epoch": 0.2557197104132017, "grad_norm": 35.59375, "learning_rate": 9.960043806833553e-06, "loss": 22.4961, "step": 13780 }, { "epoch": 0.2559052834976815, "grad_norm": 35.0, "learning_rate": 9.960014811047511e-06, "loss": 22.5756, "step": 13790 }, { "epoch": 0.2560908565821613, "grad_norm": 35.3125, "learning_rate": 9.95998581526147e-06, "loss": 22.5104, "step": 13800 }, { "epoch": 0.25627642966664116, "grad_norm": 36.6875, "learning_rate": 9.959956819475426e-06, "loss": 23.0103, "step": 13810 }, { "epoch": 0.25646200275112097, "grad_norm": 36.125, "learning_rate": 9.959927823689383e-06, "loss": 22.4589, "step": 13820 }, { "epoch": 0.2566475758356008, "grad_norm": 38.625, "learning_rate": 9.959898827903342e-06, "loss": 22.8177, "step": 13830 }, { "epoch": 0.25683314892008063, "grad_norm": 35.15625, "learning_rate": 9.9598698321173e-06, "loss": 22.5105, "step": 13840 }, { "epoch": 0.25701872200456044, "grad_norm": 35.875, "learning_rate": 9.959840836331257e-06, "loss": 22.8462, "step": 13850 }, { "epoch": 0.2572042950890403, "grad_norm": 36.1875, "learning_rate": 9.959811840545214e-06, "loss": 22.693, "step": 13860 }, { "epoch": 0.2573898681735201, "grad_norm": 35.53125, "learning_rate": 9.959782844759172e-06, "loss": 22.4419, "step": 13870 }, { "epoch": 0.2575754412579999, "grad_norm": 35.84375, "learning_rate": 9.95975384897313e-06, "loss": 22.8257, "step": 13880 }, { "epoch": 0.2577610143424798, "grad_norm": 33.9375, "learning_rate": 9.959724853187087e-06, "loss": 22.5976, "step": 13890 }, { "epoch": 0.2579465874269596, "grad_norm": 32.59375, "learning_rate": 9.959695857401046e-06, "loss": 22.5116, "step": 13900 }, { "epoch": 0.25813216051143945, "grad_norm": 34.625, "learning_rate": 9.959666861615003e-06, "loss": 22.3501, "step": 13910 }, { "epoch": 0.25831773359591925, "grad_norm": 34.09375, "learning_rate": 9.959637865828959e-06, "loss": 22.4398, "step": 13920 }, { "epoch": 0.25850330668039906, "grad_norm": 36.21875, "learning_rate": 9.959608870042918e-06, "loss": 22.5873, "step": 13930 }, { "epoch": 0.2586888797648789, "grad_norm": 36.21875, "learning_rate": 9.959579874256875e-06, "loss": 22.4183, "step": 13940 }, { "epoch": 0.2588744528493587, "grad_norm": 35.40625, "learning_rate": 9.959550878470833e-06, "loss": 22.896, "step": 13950 }, { "epoch": 0.25906002593383853, "grad_norm": 34.5, "learning_rate": 9.95952188268479e-06, "loss": 22.2744, "step": 13960 }, { "epoch": 0.2592455990183184, "grad_norm": 33.25, "learning_rate": 9.959492886898748e-06, "loss": 22.7941, "step": 13970 }, { "epoch": 0.2594311721027982, "grad_norm": 34.65625, "learning_rate": 9.959463891112705e-06, "loss": 22.2191, "step": 13980 }, { "epoch": 0.25961674518727806, "grad_norm": 35.15625, "learning_rate": 9.959434895326662e-06, "loss": 22.6185, "step": 13990 }, { "epoch": 0.25980231827175787, "grad_norm": 34.4375, "learning_rate": 9.95940589954062e-06, "loss": 22.1749, "step": 14000 }, { "epoch": 0.2599878913562377, "grad_norm": 35.25, "learning_rate": 9.959376903754579e-06, "loss": 22.8769, "step": 14010 }, { "epoch": 0.26017346444071754, "grad_norm": 33.25, "learning_rate": 9.959347907968536e-06, "loss": 22.8237, "step": 14020 }, { "epoch": 0.26035903752519735, "grad_norm": 33.4375, "learning_rate": 9.959318912182494e-06, "loss": 22.6494, "step": 14030 }, { "epoch": 0.26054461060967715, "grad_norm": 35.9375, "learning_rate": 9.959289916396451e-06, "loss": 22.4676, "step": 14040 }, { "epoch": 0.260730183694157, "grad_norm": 35.84375, "learning_rate": 9.959260920610409e-06, "loss": 22.5966, "step": 14050 }, { "epoch": 0.2609157567786368, "grad_norm": 35.46875, "learning_rate": 9.959231924824366e-06, "loss": 22.6443, "step": 14060 }, { "epoch": 0.2611013298631166, "grad_norm": 35.28125, "learning_rate": 9.959202929038323e-06, "loss": 22.7265, "step": 14070 }, { "epoch": 0.2612869029475965, "grad_norm": 33.375, "learning_rate": 9.95917393325228e-06, "loss": 22.7487, "step": 14080 }, { "epoch": 0.2614724760320763, "grad_norm": 35.6875, "learning_rate": 9.959144937466238e-06, "loss": 22.6146, "step": 14090 }, { "epoch": 0.26165804911655616, "grad_norm": 32.78125, "learning_rate": 9.959115941680196e-06, "loss": 22.4781, "step": 14100 }, { "epoch": 0.26184362220103596, "grad_norm": 33.625, "learning_rate": 9.959086945894155e-06, "loss": 22.5717, "step": 14110 }, { "epoch": 0.26202919528551577, "grad_norm": 35.625, "learning_rate": 9.959057950108112e-06, "loss": 22.8922, "step": 14120 }, { "epoch": 0.26221476836999563, "grad_norm": 34.65625, "learning_rate": 9.95902895432207e-06, "loss": 22.4869, "step": 14130 }, { "epoch": 0.26240034145447544, "grad_norm": 34.90625, "learning_rate": 9.958999958536027e-06, "loss": 22.5448, "step": 14140 }, { "epoch": 0.26258591453895525, "grad_norm": 37.15625, "learning_rate": 9.958970962749984e-06, "loss": 22.8666, "step": 14150 }, { "epoch": 0.2627714876234351, "grad_norm": 34.15625, "learning_rate": 9.958941966963942e-06, "loss": 22.4539, "step": 14160 }, { "epoch": 0.2629570607079149, "grad_norm": 34.03125, "learning_rate": 9.958912971177899e-06, "loss": 22.6467, "step": 14170 }, { "epoch": 0.2631426337923948, "grad_norm": 35.5625, "learning_rate": 9.958883975391858e-06, "loss": 22.4136, "step": 14180 }, { "epoch": 0.2633282068768746, "grad_norm": 36.6875, "learning_rate": 9.958854979605814e-06, "loss": 22.759, "step": 14190 }, { "epoch": 0.2635137799613544, "grad_norm": 34.5625, "learning_rate": 9.958825983819771e-06, "loss": 22.6078, "step": 14200 }, { "epoch": 0.26369935304583425, "grad_norm": 34.96875, "learning_rate": 9.95879698803373e-06, "loss": 22.6325, "step": 14210 }, { "epoch": 0.26388492613031406, "grad_norm": 36.59375, "learning_rate": 9.958767992247688e-06, "loss": 22.7356, "step": 14220 }, { "epoch": 0.26407049921479386, "grad_norm": 36.1875, "learning_rate": 9.958738996461645e-06, "loss": 22.3037, "step": 14230 }, { "epoch": 0.2642560722992737, "grad_norm": 35.75, "learning_rate": 9.958710000675603e-06, "loss": 22.3392, "step": 14240 }, { "epoch": 0.26444164538375353, "grad_norm": 34.0, "learning_rate": 9.95868100488956e-06, "loss": 21.9539, "step": 14250 }, { "epoch": 0.2646272184682334, "grad_norm": 34.0, "learning_rate": 9.958652009103518e-06, "loss": 22.5607, "step": 14260 }, { "epoch": 0.2648127915527132, "grad_norm": 37.40625, "learning_rate": 9.958623013317475e-06, "loss": 22.2516, "step": 14270 }, { "epoch": 0.264998364637193, "grad_norm": 35.875, "learning_rate": 9.958594017531434e-06, "loss": 21.6971, "step": 14280 }, { "epoch": 0.26518393772167287, "grad_norm": 35.46875, "learning_rate": 9.958565021745391e-06, "loss": 22.7012, "step": 14290 }, { "epoch": 0.2653695108061527, "grad_norm": 36.3125, "learning_rate": 9.958536025959347e-06, "loss": 22.4616, "step": 14300 }, { "epoch": 0.2655550838906325, "grad_norm": 37.3125, "learning_rate": 9.958507030173306e-06, "loss": 22.4242, "step": 14310 }, { "epoch": 0.26574065697511234, "grad_norm": 34.5625, "learning_rate": 9.958478034387264e-06, "loss": 22.3209, "step": 14320 }, { "epoch": 0.26592623005959215, "grad_norm": 33.09375, "learning_rate": 9.958449038601221e-06, "loss": 22.8311, "step": 14330 }, { "epoch": 0.26611180314407196, "grad_norm": 34.75, "learning_rate": 9.958420042815178e-06, "loss": 22.636, "step": 14340 }, { "epoch": 0.2662973762285518, "grad_norm": 35.78125, "learning_rate": 9.958391047029136e-06, "loss": 22.0774, "step": 14350 }, { "epoch": 0.2664829493130316, "grad_norm": 35.71875, "learning_rate": 9.958362051243093e-06, "loss": 22.4367, "step": 14360 }, { "epoch": 0.2666685223975115, "grad_norm": 35.90625, "learning_rate": 9.95833305545705e-06, "loss": 22.3071, "step": 14370 }, { "epoch": 0.2668540954819913, "grad_norm": 33.96875, "learning_rate": 9.95830405967101e-06, "loss": 22.3167, "step": 14380 }, { "epoch": 0.2670396685664711, "grad_norm": 35.5, "learning_rate": 9.958275063884967e-06, "loss": 22.671, "step": 14390 }, { "epoch": 0.26722524165095096, "grad_norm": 35.1875, "learning_rate": 9.958246068098923e-06, "loss": 22.7599, "step": 14400 }, { "epoch": 0.26741081473543077, "grad_norm": 36.8125, "learning_rate": 9.958217072312882e-06, "loss": 22.1926, "step": 14410 }, { "epoch": 0.2675963878199106, "grad_norm": 36.1875, "learning_rate": 9.95818807652684e-06, "loss": 22.4728, "step": 14420 }, { "epoch": 0.26778196090439044, "grad_norm": 35.875, "learning_rate": 9.958159080740797e-06, "loss": 22.0861, "step": 14430 }, { "epoch": 0.26796753398887024, "grad_norm": 38.03125, "learning_rate": 9.958130084954754e-06, "loss": 22.6352, "step": 14440 }, { "epoch": 0.2681531070733501, "grad_norm": 34.21875, "learning_rate": 9.958101089168712e-06, "loss": 22.3403, "step": 14450 }, { "epoch": 0.2683386801578299, "grad_norm": 34.0, "learning_rate": 9.958072093382669e-06, "loss": 22.5642, "step": 14460 }, { "epoch": 0.2685242532423097, "grad_norm": 34.71875, "learning_rate": 9.958043097596626e-06, "loss": 22.4688, "step": 14470 }, { "epoch": 0.2687098263267896, "grad_norm": 33.0625, "learning_rate": 9.958014101810584e-06, "loss": 22.3117, "step": 14480 }, { "epoch": 0.2688953994112694, "grad_norm": 34.0625, "learning_rate": 9.957985106024543e-06, "loss": 22.4656, "step": 14490 }, { "epoch": 0.2690809724957492, "grad_norm": 33.90625, "learning_rate": 9.9579561102385e-06, "loss": 22.5187, "step": 14500 }, { "epoch": 0.26926654558022906, "grad_norm": 35.84375, "learning_rate": 9.957927114452458e-06, "loss": 22.3564, "step": 14510 }, { "epoch": 0.26945211866470886, "grad_norm": 34.34375, "learning_rate": 9.957898118666415e-06, "loss": 22.2059, "step": 14520 }, { "epoch": 0.2696376917491887, "grad_norm": 36.65625, "learning_rate": 9.957869122880373e-06, "loss": 22.3464, "step": 14530 }, { "epoch": 0.26982326483366853, "grad_norm": 34.71875, "learning_rate": 9.95784012709433e-06, "loss": 22.0195, "step": 14540 }, { "epoch": 0.27000883791814834, "grad_norm": 33.46875, "learning_rate": 9.957811131308287e-06, "loss": 22.4502, "step": 14550 }, { "epoch": 0.2701944110026282, "grad_norm": 35.25, "learning_rate": 9.957782135522245e-06, "loss": 22.4365, "step": 14560 }, { "epoch": 0.270379984087108, "grad_norm": 35.5, "learning_rate": 9.957753139736202e-06, "loss": 22.2782, "step": 14570 }, { "epoch": 0.2705655571715878, "grad_norm": 35.03125, "learning_rate": 9.95772414395016e-06, "loss": 22.4778, "step": 14580 }, { "epoch": 0.2707511302560677, "grad_norm": 35.25, "learning_rate": 9.957695148164119e-06, "loss": 22.5201, "step": 14590 }, { "epoch": 0.2709367033405475, "grad_norm": 37.84375, "learning_rate": 9.957666152378076e-06, "loss": 22.5597, "step": 14600 }, { "epoch": 0.2711222764250273, "grad_norm": 35.03125, "learning_rate": 9.957637156592034e-06, "loss": 22.3051, "step": 14610 }, { "epoch": 0.27130784950950715, "grad_norm": 35.0, "learning_rate": 9.957608160805991e-06, "loss": 22.3427, "step": 14620 }, { "epoch": 0.27149342259398695, "grad_norm": 36.65625, "learning_rate": 9.957579165019948e-06, "loss": 22.1068, "step": 14630 }, { "epoch": 0.2716789956784668, "grad_norm": 35.71875, "learning_rate": 9.957550169233906e-06, "loss": 22.4494, "step": 14640 }, { "epoch": 0.2718645687629466, "grad_norm": 32.84375, "learning_rate": 9.957521173447863e-06, "loss": 22.6318, "step": 14650 }, { "epoch": 0.27205014184742643, "grad_norm": 33.78125, "learning_rate": 9.957492177661822e-06, "loss": 22.7068, "step": 14660 }, { "epoch": 0.2722357149319063, "grad_norm": 35.09375, "learning_rate": 9.957463181875778e-06, "loss": 21.9763, "step": 14670 }, { "epoch": 0.2724212880163861, "grad_norm": 35.625, "learning_rate": 9.957434186089735e-06, "loss": 22.4956, "step": 14680 }, { "epoch": 0.2726068611008659, "grad_norm": 33.40625, "learning_rate": 9.957405190303694e-06, "loss": 22.6001, "step": 14690 }, { "epoch": 0.27279243418534577, "grad_norm": 35.375, "learning_rate": 9.957376194517652e-06, "loss": 22.5121, "step": 14700 }, { "epoch": 0.2729780072698256, "grad_norm": 34.34375, "learning_rate": 9.95734719873161e-06, "loss": 22.2686, "step": 14710 }, { "epoch": 0.27316358035430544, "grad_norm": 34.90625, "learning_rate": 9.957318202945567e-06, "loss": 22.4447, "step": 14720 }, { "epoch": 0.27334915343878524, "grad_norm": 37.34375, "learning_rate": 9.957289207159524e-06, "loss": 22.6572, "step": 14730 }, { "epoch": 0.27353472652326505, "grad_norm": 35.5, "learning_rate": 9.957260211373482e-06, "loss": 22.6281, "step": 14740 }, { "epoch": 0.2737202996077449, "grad_norm": 36.125, "learning_rate": 9.957231215587439e-06, "loss": 22.1592, "step": 14750 }, { "epoch": 0.2739058726922247, "grad_norm": 35.46875, "learning_rate": 9.957202219801398e-06, "loss": 22.5348, "step": 14760 }, { "epoch": 0.2740914457767045, "grad_norm": 36.3125, "learning_rate": 9.957173224015355e-06, "loss": 22.4853, "step": 14770 }, { "epoch": 0.2742770188611844, "grad_norm": 35.84375, "learning_rate": 9.957144228229311e-06, "loss": 22.3393, "step": 14780 }, { "epoch": 0.2744625919456642, "grad_norm": 34.65625, "learning_rate": 9.95711523244327e-06, "loss": 22.3089, "step": 14790 }, { "epoch": 0.27464816503014405, "grad_norm": 34.5625, "learning_rate": 9.957086236657228e-06, "loss": 22.4888, "step": 14800 }, { "epoch": 0.27483373811462386, "grad_norm": 36.34375, "learning_rate": 9.957057240871185e-06, "loss": 21.9885, "step": 14810 }, { "epoch": 0.27501931119910367, "grad_norm": 33.75, "learning_rate": 9.957028245085142e-06, "loss": 22.3091, "step": 14820 }, { "epoch": 0.27520488428358353, "grad_norm": 35.59375, "learning_rate": 9.9569992492991e-06, "loss": 22.2194, "step": 14830 }, { "epoch": 0.27539045736806333, "grad_norm": 36.96875, "learning_rate": 9.956970253513057e-06, "loss": 22.424, "step": 14840 }, { "epoch": 0.27557603045254314, "grad_norm": 35.875, "learning_rate": 9.956941257727015e-06, "loss": 22.548, "step": 14850 }, { "epoch": 0.275761603537023, "grad_norm": 33.6875, "learning_rate": 9.956912261940974e-06, "loss": 22.2094, "step": 14860 }, { "epoch": 0.2759471766215028, "grad_norm": 36.21875, "learning_rate": 9.956883266154931e-06, "loss": 22.644, "step": 14870 }, { "epoch": 0.27613274970598267, "grad_norm": 36.1875, "learning_rate": 9.956854270368889e-06, "loss": 22.7149, "step": 14880 }, { "epoch": 0.2763183227904625, "grad_norm": 34.9375, "learning_rate": 9.956825274582846e-06, "loss": 22.0443, "step": 14890 }, { "epoch": 0.2765038958749423, "grad_norm": 34.59375, "learning_rate": 9.956796278796803e-06, "loss": 22.2534, "step": 14900 }, { "epoch": 0.27668946895942215, "grad_norm": 34.875, "learning_rate": 9.95676728301076e-06, "loss": 22.5268, "step": 14910 }, { "epoch": 0.27687504204390195, "grad_norm": 36.3125, "learning_rate": 9.956738287224718e-06, "loss": 22.2169, "step": 14920 }, { "epoch": 0.27706061512838176, "grad_norm": 34.46875, "learning_rate": 9.956709291438676e-06, "loss": 22.2188, "step": 14930 }, { "epoch": 0.2772461882128616, "grad_norm": 33.625, "learning_rate": 9.956680295652633e-06, "loss": 22.4894, "step": 14940 }, { "epoch": 0.27743176129734143, "grad_norm": 34.53125, "learning_rate": 9.95665129986659e-06, "loss": 21.8868, "step": 14950 }, { "epoch": 0.27761733438182123, "grad_norm": 35.6875, "learning_rate": 9.95662230408055e-06, "loss": 22.3338, "step": 14960 }, { "epoch": 0.2778029074663011, "grad_norm": 34.4375, "learning_rate": 9.956593308294507e-06, "loss": 22.4738, "step": 14970 }, { "epoch": 0.2779884805507809, "grad_norm": 36.6875, "learning_rate": 9.956564312508464e-06, "loss": 22.3862, "step": 14980 }, { "epoch": 0.27817405363526077, "grad_norm": 33.96875, "learning_rate": 9.956535316722422e-06, "loss": 21.9725, "step": 14990 }, { "epoch": 0.27835962671974057, "grad_norm": 34.5, "learning_rate": 9.95650632093638e-06, "loss": 22.3186, "step": 15000 }, { "epoch": 0.27835962671974057, "eval_loss": 2.7910494804382324, "eval_runtime": 454.4569, "eval_samples_per_second": 3195.28, "eval_steps_per_second": 49.928, "step": 15000 }, { "epoch": 0.2785451998042204, "grad_norm": 34.0625, "learning_rate": 9.956477325150337e-06, "loss": 22.2863, "step": 15010 }, { "epoch": 0.27873077288870024, "grad_norm": 34.96875, "learning_rate": 9.956448329364294e-06, "loss": 22.5122, "step": 15020 }, { "epoch": 0.27891634597318005, "grad_norm": 33.625, "learning_rate": 9.956419333578251e-06, "loss": 22.3634, "step": 15030 }, { "epoch": 0.27910191905765985, "grad_norm": 35.5625, "learning_rate": 9.95639033779221e-06, "loss": 22.5542, "step": 15040 }, { "epoch": 0.2792874921421397, "grad_norm": 35.25, "learning_rate": 9.956361342006166e-06, "loss": 22.201, "step": 15050 }, { "epoch": 0.2794730652266195, "grad_norm": 34.59375, "learning_rate": 9.956332346220124e-06, "loss": 22.3541, "step": 15060 }, { "epoch": 0.2796586383110994, "grad_norm": 33.875, "learning_rate": 9.956303350434083e-06, "loss": 21.9755, "step": 15070 }, { "epoch": 0.2798442113955792, "grad_norm": 34.03125, "learning_rate": 9.95627435464804e-06, "loss": 22.5443, "step": 15080 }, { "epoch": 0.280029784480059, "grad_norm": 35.46875, "learning_rate": 9.956245358861998e-06, "loss": 22.1045, "step": 15090 }, { "epoch": 0.28021535756453886, "grad_norm": 34.125, "learning_rate": 9.956216363075955e-06, "loss": 22.3805, "step": 15100 }, { "epoch": 0.28040093064901866, "grad_norm": 34.875, "learning_rate": 9.956187367289912e-06, "loss": 22.2453, "step": 15110 }, { "epoch": 0.28058650373349847, "grad_norm": 35.75, "learning_rate": 9.95615837150387e-06, "loss": 22.4519, "step": 15120 }, { "epoch": 0.28077207681797833, "grad_norm": 35.0, "learning_rate": 9.956129375717827e-06, "loss": 22.2457, "step": 15130 }, { "epoch": 0.28095764990245814, "grad_norm": 33.90625, "learning_rate": 9.956100379931786e-06, "loss": 22.1788, "step": 15140 }, { "epoch": 0.281143222986938, "grad_norm": 36.21875, "learning_rate": 9.956071384145742e-06, "loss": 22.0945, "step": 15150 }, { "epoch": 0.2813287960714178, "grad_norm": 35.90625, "learning_rate": 9.9560423883597e-06, "loss": 22.1752, "step": 15160 }, { "epoch": 0.2815143691558976, "grad_norm": 36.15625, "learning_rate": 9.956013392573658e-06, "loss": 22.5829, "step": 15170 }, { "epoch": 0.2816999422403775, "grad_norm": 37.0625, "learning_rate": 9.955984396787616e-06, "loss": 22.6663, "step": 15180 }, { "epoch": 0.2818855153248573, "grad_norm": 35.5, "learning_rate": 9.955955401001573e-06, "loss": 22.5561, "step": 15190 }, { "epoch": 0.2820710884093371, "grad_norm": 36.0, "learning_rate": 9.95592640521553e-06, "loss": 22.5047, "step": 15200 }, { "epoch": 0.28225666149381695, "grad_norm": 36.28125, "learning_rate": 9.955897409429488e-06, "loss": 22.4836, "step": 15210 }, { "epoch": 0.28244223457829676, "grad_norm": 35.96875, "learning_rate": 9.955868413643446e-06, "loss": 22.1041, "step": 15220 }, { "epoch": 0.28262780766277656, "grad_norm": 36.0, "learning_rate": 9.955839417857403e-06, "loss": 22.592, "step": 15230 }, { "epoch": 0.2828133807472564, "grad_norm": 35.375, "learning_rate": 9.955810422071362e-06, "loss": 22.2083, "step": 15240 }, { "epoch": 0.28299895383173623, "grad_norm": 34.53125, "learning_rate": 9.95578142628532e-06, "loss": 22.371, "step": 15250 }, { "epoch": 0.2831845269162161, "grad_norm": 34.25, "learning_rate": 9.955752430499275e-06, "loss": 22.5102, "step": 15260 }, { "epoch": 0.2833701000006959, "grad_norm": 34.125, "learning_rate": 9.955723434713234e-06, "loss": 22.0664, "step": 15270 }, { "epoch": 0.2835556730851757, "grad_norm": 36.1875, "learning_rate": 9.955694438927192e-06, "loss": 22.2947, "step": 15280 }, { "epoch": 0.28374124616965557, "grad_norm": 34.96875, "learning_rate": 9.955665443141149e-06, "loss": 22.093, "step": 15290 }, { "epoch": 0.2839268192541354, "grad_norm": 36.375, "learning_rate": 9.955636447355106e-06, "loss": 22.4046, "step": 15300 }, { "epoch": 0.2841123923386152, "grad_norm": 36.4375, "learning_rate": 9.955607451569066e-06, "loss": 22.7986, "step": 15310 }, { "epoch": 0.28429796542309504, "grad_norm": 37.40625, "learning_rate": 9.955578455783021e-06, "loss": 22.5896, "step": 15320 }, { "epoch": 0.28448353850757485, "grad_norm": 37.375, "learning_rate": 9.955549459996979e-06, "loss": 22.2348, "step": 15330 }, { "epoch": 0.2846691115920547, "grad_norm": 35.96875, "learning_rate": 9.955520464210938e-06, "loss": 22.1714, "step": 15340 }, { "epoch": 0.2848546846765345, "grad_norm": 35.53125, "learning_rate": 9.955491468424895e-06, "loss": 22.097, "step": 15350 }, { "epoch": 0.2850402577610143, "grad_norm": 38.03125, "learning_rate": 9.955462472638853e-06, "loss": 22.6676, "step": 15360 }, { "epoch": 0.2852258308454942, "grad_norm": 36.0625, "learning_rate": 9.95543347685281e-06, "loss": 21.9045, "step": 15370 }, { "epoch": 0.285411403929974, "grad_norm": 34.59375, "learning_rate": 9.955404481066767e-06, "loss": 22.1789, "step": 15380 }, { "epoch": 0.2855969770144538, "grad_norm": 34.4375, "learning_rate": 9.955375485280725e-06, "loss": 22.5378, "step": 15390 }, { "epoch": 0.28578255009893366, "grad_norm": 33.96875, "learning_rate": 9.955346489494682e-06, "loss": 22.4062, "step": 15400 }, { "epoch": 0.28596812318341347, "grad_norm": 35.53125, "learning_rate": 9.955317493708641e-06, "loss": 22.1425, "step": 15410 }, { "epoch": 0.28615369626789333, "grad_norm": 35.25, "learning_rate": 9.955288497922597e-06, "loss": 22.2226, "step": 15420 }, { "epoch": 0.28633926935237314, "grad_norm": 36.03125, "learning_rate": 9.955259502136554e-06, "loss": 22.172, "step": 15430 }, { "epoch": 0.28652484243685294, "grad_norm": 33.59375, "learning_rate": 9.955230506350514e-06, "loss": 21.8491, "step": 15440 }, { "epoch": 0.2867104155213328, "grad_norm": 34.59375, "learning_rate": 9.955201510564471e-06, "loss": 22.2386, "step": 15450 }, { "epoch": 0.2868959886058126, "grad_norm": 34.71875, "learning_rate": 9.955172514778428e-06, "loss": 22.1716, "step": 15460 }, { "epoch": 0.2870815616902924, "grad_norm": 35.75, "learning_rate": 9.955143518992386e-06, "loss": 21.7604, "step": 15470 }, { "epoch": 0.2872671347747723, "grad_norm": 35.21875, "learning_rate": 9.955114523206343e-06, "loss": 21.8419, "step": 15480 }, { "epoch": 0.2874527078592521, "grad_norm": 35.96875, "learning_rate": 9.9550855274203e-06, "loss": 21.8912, "step": 15490 }, { "epoch": 0.2876382809437319, "grad_norm": 35.71875, "learning_rate": 9.955056531634258e-06, "loss": 22.5902, "step": 15500 }, { "epoch": 0.28782385402821176, "grad_norm": 36.15625, "learning_rate": 9.955027535848215e-06, "loss": 22.1192, "step": 15510 }, { "epoch": 0.28800942711269156, "grad_norm": 35.25, "learning_rate": 9.954998540062175e-06, "loss": 21.5665, "step": 15520 }, { "epoch": 0.2881950001971714, "grad_norm": 33.46875, "learning_rate": 9.95496954427613e-06, "loss": 21.8936, "step": 15530 }, { "epoch": 0.28838057328165123, "grad_norm": 34.5625, "learning_rate": 9.95494054849009e-06, "loss": 22.6283, "step": 15540 }, { "epoch": 0.28856614636613104, "grad_norm": 35.3125, "learning_rate": 9.954911552704047e-06, "loss": 22.3834, "step": 15550 }, { "epoch": 0.2887517194506109, "grad_norm": 37.84375, "learning_rate": 9.954882556918004e-06, "loss": 21.9524, "step": 15560 }, { "epoch": 0.2889372925350907, "grad_norm": 33.75, "learning_rate": 9.954853561131962e-06, "loss": 22.4104, "step": 15570 }, { "epoch": 0.2891228656195705, "grad_norm": 34.53125, "learning_rate": 9.954824565345919e-06, "loss": 22.7281, "step": 15580 }, { "epoch": 0.2893084387040504, "grad_norm": 34.9375, "learning_rate": 9.954795569559876e-06, "loss": 21.9499, "step": 15590 }, { "epoch": 0.2894940117885302, "grad_norm": 35.53125, "learning_rate": 9.954766573773834e-06, "loss": 21.9476, "step": 15600 }, { "epoch": 0.28967958487301004, "grad_norm": 33.3125, "learning_rate": 9.954737577987791e-06, "loss": 21.7338, "step": 15610 }, { "epoch": 0.28986515795748985, "grad_norm": 35.6875, "learning_rate": 9.95470858220175e-06, "loss": 21.9634, "step": 15620 }, { "epoch": 0.29005073104196966, "grad_norm": 35.875, "learning_rate": 9.954679586415708e-06, "loss": 21.9819, "step": 15630 }, { "epoch": 0.2902363041264495, "grad_norm": 36.6875, "learning_rate": 9.954650590629663e-06, "loss": 22.4904, "step": 15640 }, { "epoch": 0.2904218772109293, "grad_norm": 34.78125, "learning_rate": 9.954621594843623e-06, "loss": 22.7612, "step": 15650 }, { "epoch": 0.29060745029540913, "grad_norm": 35.3125, "learning_rate": 9.95459259905758e-06, "loss": 22.3808, "step": 15660 }, { "epoch": 0.290793023379889, "grad_norm": 36.09375, "learning_rate": 9.954563603271537e-06, "loss": 22.5321, "step": 15670 }, { "epoch": 0.2909785964643688, "grad_norm": 35.46875, "learning_rate": 9.954534607485495e-06, "loss": 22.4891, "step": 15680 }, { "epoch": 0.29116416954884866, "grad_norm": 34.5625, "learning_rate": 9.954505611699452e-06, "loss": 22.188, "step": 15690 }, { "epoch": 0.29134974263332847, "grad_norm": 36.5625, "learning_rate": 9.95447661591341e-06, "loss": 22.2892, "step": 15700 }, { "epoch": 0.2915353157178083, "grad_norm": 37.6875, "learning_rate": 9.954447620127367e-06, "loss": 22.1511, "step": 15710 }, { "epoch": 0.29172088880228814, "grad_norm": 36.15625, "learning_rate": 9.954418624341326e-06, "loss": 22.2926, "step": 15720 }, { "epoch": 0.29190646188676794, "grad_norm": 33.8125, "learning_rate": 9.954389628555283e-06, "loss": 22.241, "step": 15730 }, { "epoch": 0.29209203497124775, "grad_norm": 34.71875, "learning_rate": 9.95436063276924e-06, "loss": 22.334, "step": 15740 }, { "epoch": 0.2922776080557276, "grad_norm": 36.53125, "learning_rate": 9.954331636983198e-06, "loss": 22.4288, "step": 15750 }, { "epoch": 0.2924631811402074, "grad_norm": 35.25, "learning_rate": 9.954302641197156e-06, "loss": 22.411, "step": 15760 }, { "epoch": 0.2926487542246872, "grad_norm": 35.90625, "learning_rate": 9.954273645411113e-06, "loss": 21.8901, "step": 15770 }, { "epoch": 0.2928343273091671, "grad_norm": 36.59375, "learning_rate": 9.95424464962507e-06, "loss": 22.2214, "step": 15780 }, { "epoch": 0.2930199003936469, "grad_norm": 33.90625, "learning_rate": 9.95421565383903e-06, "loss": 21.8987, "step": 15790 }, { "epoch": 0.29320547347812675, "grad_norm": 35.90625, "learning_rate": 9.954186658052985e-06, "loss": 22.1996, "step": 15800 }, { "epoch": 0.29339104656260656, "grad_norm": 34.6875, "learning_rate": 9.954157662266943e-06, "loss": 22.1059, "step": 15810 }, { "epoch": 0.29357661964708637, "grad_norm": 35.3125, "learning_rate": 9.954128666480902e-06, "loss": 22.3283, "step": 15820 }, { "epoch": 0.29376219273156623, "grad_norm": 36.59375, "learning_rate": 9.95409967069486e-06, "loss": 22.438, "step": 15830 }, { "epoch": 0.29394776581604604, "grad_norm": 37.15625, "learning_rate": 9.954070674908817e-06, "loss": 22.409, "step": 15840 }, { "epoch": 0.29413333890052584, "grad_norm": 34.8125, "learning_rate": 9.954041679122774e-06, "loss": 22.524, "step": 15850 }, { "epoch": 0.2943189119850057, "grad_norm": 37.0625, "learning_rate": 9.954012683336731e-06, "loss": 22.6223, "step": 15860 }, { "epoch": 0.2945044850694855, "grad_norm": 35.0, "learning_rate": 9.953983687550689e-06, "loss": 22.2213, "step": 15870 }, { "epoch": 0.2946900581539654, "grad_norm": 33.8125, "learning_rate": 9.953954691764646e-06, "loss": 21.861, "step": 15880 }, { "epoch": 0.2948756312384452, "grad_norm": 33.59375, "learning_rate": 9.953925695978605e-06, "loss": 22.1122, "step": 15890 }, { "epoch": 0.295061204322925, "grad_norm": 35.90625, "learning_rate": 9.953896700192563e-06, "loss": 21.8675, "step": 15900 }, { "epoch": 0.29524677740740485, "grad_norm": 32.65625, "learning_rate": 9.953867704406518e-06, "loss": 22.0003, "step": 15910 }, { "epoch": 0.29543235049188465, "grad_norm": 33.84375, "learning_rate": 9.953838708620478e-06, "loss": 22.2325, "step": 15920 }, { "epoch": 0.29561792357636446, "grad_norm": 36.15625, "learning_rate": 9.953809712834435e-06, "loss": 22.2477, "step": 15930 }, { "epoch": 0.2958034966608443, "grad_norm": 35.28125, "learning_rate": 9.953780717048392e-06, "loss": 22.0163, "step": 15940 }, { "epoch": 0.29598906974532413, "grad_norm": 35.84375, "learning_rate": 9.95375172126235e-06, "loss": 21.9324, "step": 15950 }, { "epoch": 0.296174642829804, "grad_norm": 36.90625, "learning_rate": 9.953722725476307e-06, "loss": 22.0944, "step": 15960 }, { "epoch": 0.2963602159142838, "grad_norm": 36.9375, "learning_rate": 9.953693729690265e-06, "loss": 22.2074, "step": 15970 }, { "epoch": 0.2965457889987636, "grad_norm": 35.84375, "learning_rate": 9.953664733904222e-06, "loss": 22.2047, "step": 15980 }, { "epoch": 0.29673136208324347, "grad_norm": 36.09375, "learning_rate": 9.953635738118181e-06, "loss": 22.3672, "step": 15990 }, { "epoch": 0.2969169351677233, "grad_norm": 33.78125, "learning_rate": 9.953606742332139e-06, "loss": 22.2694, "step": 16000 }, { "epoch": 0.2971025082522031, "grad_norm": 36.46875, "learning_rate": 9.953577746546094e-06, "loss": 21.7142, "step": 16010 }, { "epoch": 0.29728808133668294, "grad_norm": 34.09375, "learning_rate": 9.953548750760053e-06, "loss": 21.9962, "step": 16020 }, { "epoch": 0.29747365442116275, "grad_norm": 36.59375, "learning_rate": 9.95351975497401e-06, "loss": 22.0277, "step": 16030 }, { "epoch": 0.29765922750564255, "grad_norm": 33.15625, "learning_rate": 9.953490759187968e-06, "loss": 22.0372, "step": 16040 }, { "epoch": 0.2978448005901224, "grad_norm": 34.8125, "learning_rate": 9.953461763401926e-06, "loss": 22.3242, "step": 16050 }, { "epoch": 0.2980303736746022, "grad_norm": 36.15625, "learning_rate": 9.953432767615883e-06, "loss": 21.9268, "step": 16060 }, { "epoch": 0.2982159467590821, "grad_norm": 34.8125, "learning_rate": 9.95340377182984e-06, "loss": 21.7896, "step": 16070 }, { "epoch": 0.2984015198435619, "grad_norm": 35.625, "learning_rate": 9.953374776043798e-06, "loss": 21.933, "step": 16080 }, { "epoch": 0.2985870929280417, "grad_norm": 34.5625, "learning_rate": 9.953345780257755e-06, "loss": 22.3957, "step": 16090 }, { "epoch": 0.29877266601252156, "grad_norm": 34.3125, "learning_rate": 9.953316784471714e-06, "loss": 22.267, "step": 16100 }, { "epoch": 0.29895823909700137, "grad_norm": 34.21875, "learning_rate": 9.953287788685672e-06, "loss": 22.1046, "step": 16110 }, { "epoch": 0.2991438121814812, "grad_norm": 32.5625, "learning_rate": 9.953258792899627e-06, "loss": 22.5077, "step": 16120 }, { "epoch": 0.29932938526596103, "grad_norm": 35.28125, "learning_rate": 9.953229797113587e-06, "loss": 22.0254, "step": 16130 }, { "epoch": 0.29951495835044084, "grad_norm": 35.53125, "learning_rate": 9.953200801327544e-06, "loss": 21.9783, "step": 16140 }, { "epoch": 0.2997005314349207, "grad_norm": 35.96875, "learning_rate": 9.953171805541501e-06, "loss": 22.4283, "step": 16150 }, { "epoch": 0.2998861045194005, "grad_norm": 35.34375, "learning_rate": 9.953142809755459e-06, "loss": 21.9178, "step": 16160 }, { "epoch": 0.3000716776038803, "grad_norm": 35.5625, "learning_rate": 9.953113813969416e-06, "loss": 21.8685, "step": 16170 }, { "epoch": 0.3002572506883602, "grad_norm": 33.65625, "learning_rate": 9.953084818183374e-06, "loss": 22.4529, "step": 16180 }, { "epoch": 0.30044282377284, "grad_norm": 32.5, "learning_rate": 9.953055822397331e-06, "loss": 21.9859, "step": 16190 }, { "epoch": 0.3006283968573198, "grad_norm": 35.65625, "learning_rate": 9.95302682661129e-06, "loss": 21.9968, "step": 16200 }, { "epoch": 0.30081396994179965, "grad_norm": 35.90625, "learning_rate": 9.952997830825247e-06, "loss": 22.1074, "step": 16210 }, { "epoch": 0.30099954302627946, "grad_norm": 37.0625, "learning_rate": 9.952968835039205e-06, "loss": 21.8784, "step": 16220 }, { "epoch": 0.3011851161107593, "grad_norm": 36.71875, "learning_rate": 9.952939839253162e-06, "loss": 22.3373, "step": 16230 }, { "epoch": 0.3013706891952391, "grad_norm": 35.8125, "learning_rate": 9.95291084346712e-06, "loss": 21.973, "step": 16240 }, { "epoch": 0.30155626227971893, "grad_norm": 32.625, "learning_rate": 9.952881847681077e-06, "loss": 22.0185, "step": 16250 }, { "epoch": 0.3017418353641988, "grad_norm": 37.875, "learning_rate": 9.952852851895035e-06, "loss": 21.9603, "step": 16260 }, { "epoch": 0.3019274084486786, "grad_norm": 37.28125, "learning_rate": 9.952823856108994e-06, "loss": 22.3666, "step": 16270 }, { "epoch": 0.3021129815331584, "grad_norm": 35.65625, "learning_rate": 9.95279486032295e-06, "loss": 21.8065, "step": 16280 }, { "epoch": 0.30229855461763827, "grad_norm": 35.90625, "learning_rate": 9.952765864536907e-06, "loss": 22.0894, "step": 16290 }, { "epoch": 0.3024841277021181, "grad_norm": 36.25, "learning_rate": 9.952736868750866e-06, "loss": 22.6013, "step": 16300 }, { "epoch": 0.30266970078659794, "grad_norm": 36.34375, "learning_rate": 9.952707872964823e-06, "loss": 22.0674, "step": 16310 }, { "epoch": 0.30285527387107775, "grad_norm": 35.28125, "learning_rate": 9.95267887717878e-06, "loss": 21.6102, "step": 16320 }, { "epoch": 0.30304084695555755, "grad_norm": 35.53125, "learning_rate": 9.952649881392738e-06, "loss": 22.01, "step": 16330 }, { "epoch": 0.3032264200400374, "grad_norm": 36.21875, "learning_rate": 9.952620885606695e-06, "loss": 22.1756, "step": 16340 }, { "epoch": 0.3034119931245172, "grad_norm": 36.0625, "learning_rate": 9.952591889820653e-06, "loss": 22.1455, "step": 16350 }, { "epoch": 0.303597566208997, "grad_norm": 34.65625, "learning_rate": 9.95256289403461e-06, "loss": 22.1752, "step": 16360 }, { "epoch": 0.3037831392934769, "grad_norm": 34.75, "learning_rate": 9.95253389824857e-06, "loss": 22.277, "step": 16370 }, { "epoch": 0.3039687123779567, "grad_norm": 34.15625, "learning_rate": 9.952504902462527e-06, "loss": 22.5551, "step": 16380 }, { "epoch": 0.3041542854624365, "grad_norm": 34.0, "learning_rate": 9.952475906676482e-06, "loss": 21.9491, "step": 16390 }, { "epoch": 0.30433985854691636, "grad_norm": 35.21875, "learning_rate": 9.952446910890442e-06, "loss": 21.8338, "step": 16400 }, { "epoch": 0.30452543163139617, "grad_norm": 33.625, "learning_rate": 9.952417915104399e-06, "loss": 21.8369, "step": 16410 }, { "epoch": 0.30471100471587603, "grad_norm": 35.21875, "learning_rate": 9.952388919318356e-06, "loss": 22.0681, "step": 16420 }, { "epoch": 0.30489657780035584, "grad_norm": 34.59375, "learning_rate": 9.952359923532314e-06, "loss": 21.8606, "step": 16430 }, { "epoch": 0.30508215088483565, "grad_norm": 35.34375, "learning_rate": 9.952330927746271e-06, "loss": 21.8315, "step": 16440 }, { "epoch": 0.3052677239693155, "grad_norm": 35.09375, "learning_rate": 9.952301931960229e-06, "loss": 22.2084, "step": 16450 }, { "epoch": 0.3054532970537953, "grad_norm": 37.125, "learning_rate": 9.952272936174186e-06, "loss": 21.4615, "step": 16460 }, { "epoch": 0.3056388701382751, "grad_norm": 35.28125, "learning_rate": 9.952243940388145e-06, "loss": 21.9661, "step": 16470 }, { "epoch": 0.305824443222755, "grad_norm": 37.3125, "learning_rate": 9.952214944602103e-06, "loss": 21.8837, "step": 16480 }, { "epoch": 0.3060100163072348, "grad_norm": 34.4375, "learning_rate": 9.95218594881606e-06, "loss": 22.3747, "step": 16490 }, { "epoch": 0.30619558939171465, "grad_norm": 34.125, "learning_rate": 9.952156953030017e-06, "loss": 21.9814, "step": 16500 }, { "epoch": 0.30638116247619446, "grad_norm": 34.78125, "learning_rate": 9.952127957243975e-06, "loss": 22.081, "step": 16510 }, { "epoch": 0.30656673556067426, "grad_norm": 34.0625, "learning_rate": 9.952098961457932e-06, "loss": 22.3669, "step": 16520 }, { "epoch": 0.3067523086451541, "grad_norm": 36.40625, "learning_rate": 9.95206996567189e-06, "loss": 21.9809, "step": 16530 }, { "epoch": 0.30693788172963393, "grad_norm": 34.09375, "learning_rate": 9.952040969885847e-06, "loss": 21.7875, "step": 16540 }, { "epoch": 0.30712345481411374, "grad_norm": 35.71875, "learning_rate": 9.952011974099804e-06, "loss": 21.9626, "step": 16550 }, { "epoch": 0.3073090278985936, "grad_norm": 38.78125, "learning_rate": 9.951982978313762e-06, "loss": 21.6948, "step": 16560 }, { "epoch": 0.3074946009830734, "grad_norm": 33.6875, "learning_rate": 9.95195398252772e-06, "loss": 21.1979, "step": 16570 }, { "epoch": 0.30768017406755327, "grad_norm": 33.4375, "learning_rate": 9.951924986741678e-06, "loss": 22.1714, "step": 16580 }, { "epoch": 0.3078657471520331, "grad_norm": 34.78125, "learning_rate": 9.951895990955636e-06, "loss": 21.804, "step": 16590 }, { "epoch": 0.3080513202365129, "grad_norm": 34.09375, "learning_rate": 9.951866995169593e-06, "loss": 21.6856, "step": 16600 }, { "epoch": 0.30823689332099274, "grad_norm": 33.34375, "learning_rate": 9.95183799938355e-06, "loss": 22.2583, "step": 16610 }, { "epoch": 0.30842246640547255, "grad_norm": 34.15625, "learning_rate": 9.951809003597508e-06, "loss": 22.07, "step": 16620 }, { "epoch": 0.30860803948995236, "grad_norm": 35.84375, "learning_rate": 9.951780007811465e-06, "loss": 21.9016, "step": 16630 }, { "epoch": 0.3087936125744322, "grad_norm": 33.75, "learning_rate": 9.951751012025423e-06, "loss": 21.72, "step": 16640 }, { "epoch": 0.308979185658912, "grad_norm": 35.28125, "learning_rate": 9.951722016239382e-06, "loss": 21.76, "step": 16650 }, { "epoch": 0.30916475874339183, "grad_norm": 33.96875, "learning_rate": 9.951693020453338e-06, "loss": 22.1734, "step": 16660 }, { "epoch": 0.3093503318278717, "grad_norm": 34.9375, "learning_rate": 9.951664024667295e-06, "loss": 21.7641, "step": 16670 }, { "epoch": 0.3095359049123515, "grad_norm": 33.5625, "learning_rate": 9.951635028881254e-06, "loss": 21.7486, "step": 16680 }, { "epoch": 0.30972147799683136, "grad_norm": 35.65625, "learning_rate": 9.951606033095211e-06, "loss": 22.0392, "step": 16690 }, { "epoch": 0.30990705108131117, "grad_norm": 35.84375, "learning_rate": 9.951577037309169e-06, "loss": 21.8191, "step": 16700 }, { "epoch": 0.310092624165791, "grad_norm": 32.84375, "learning_rate": 9.951548041523126e-06, "loss": 21.7801, "step": 16710 }, { "epoch": 0.31027819725027084, "grad_norm": 38.625, "learning_rate": 9.951519045737084e-06, "loss": 22.5178, "step": 16720 }, { "epoch": 0.31046377033475064, "grad_norm": 35.28125, "learning_rate": 9.951490049951041e-06, "loss": 21.7387, "step": 16730 }, { "epoch": 0.31064934341923045, "grad_norm": 34.9375, "learning_rate": 9.951461054164999e-06, "loss": 22.385, "step": 16740 }, { "epoch": 0.3108349165037103, "grad_norm": 35.0, "learning_rate": 9.951432058378958e-06, "loss": 22.0111, "step": 16750 }, { "epoch": 0.3110204895881901, "grad_norm": 34.25, "learning_rate": 9.951403062592913e-06, "loss": 21.9642, "step": 16760 }, { "epoch": 0.31120606267267, "grad_norm": 35.59375, "learning_rate": 9.95137406680687e-06, "loss": 21.8199, "step": 16770 }, { "epoch": 0.3113916357571498, "grad_norm": 36.8125, "learning_rate": 9.95134507102083e-06, "loss": 21.8454, "step": 16780 }, { "epoch": 0.3115772088416296, "grad_norm": 34.875, "learning_rate": 9.951316075234787e-06, "loss": 21.7867, "step": 16790 }, { "epoch": 0.31176278192610946, "grad_norm": 34.875, "learning_rate": 9.951287079448745e-06, "loss": 21.7425, "step": 16800 }, { "epoch": 0.31194835501058926, "grad_norm": 33.875, "learning_rate": 9.951258083662702e-06, "loss": 22.1539, "step": 16810 }, { "epoch": 0.31213392809506907, "grad_norm": 33.5625, "learning_rate": 9.95122908787666e-06, "loss": 22.1815, "step": 16820 }, { "epoch": 0.31231950117954893, "grad_norm": 34.25, "learning_rate": 9.951200092090617e-06, "loss": 21.9751, "step": 16830 }, { "epoch": 0.31250507426402874, "grad_norm": 35.46875, "learning_rate": 9.951171096304574e-06, "loss": 22.1181, "step": 16840 }, { "epoch": 0.3126906473485086, "grad_norm": 35.75, "learning_rate": 9.951142100518533e-06, "loss": 21.8169, "step": 16850 }, { "epoch": 0.3128762204329884, "grad_norm": 37.78125, "learning_rate": 9.95111310473249e-06, "loss": 22.324, "step": 16860 }, { "epoch": 0.3130617935174682, "grad_norm": 36.25, "learning_rate": 9.951084108946447e-06, "loss": 22.0339, "step": 16870 }, { "epoch": 0.3132473666019481, "grad_norm": 33.53125, "learning_rate": 9.951055113160406e-06, "loss": 22.3883, "step": 16880 }, { "epoch": 0.3134329396864279, "grad_norm": 35.71875, "learning_rate": 9.951026117374363e-06, "loss": 21.4248, "step": 16890 }, { "epoch": 0.3136185127709077, "grad_norm": 34.0625, "learning_rate": 9.95099712158832e-06, "loss": 21.7226, "step": 16900 }, { "epoch": 0.31380408585538755, "grad_norm": 34.71875, "learning_rate": 9.950968125802278e-06, "loss": 22.1848, "step": 16910 }, { "epoch": 0.31398965893986736, "grad_norm": 35.6875, "learning_rate": 9.950939130016235e-06, "loss": 22.076, "step": 16920 }, { "epoch": 0.31417523202434716, "grad_norm": 36.5625, "learning_rate": 9.950910134230193e-06, "loss": 22.3584, "step": 16930 }, { "epoch": 0.314360805108827, "grad_norm": 35.78125, "learning_rate": 9.95088113844415e-06, "loss": 22.2669, "step": 16940 }, { "epoch": 0.31454637819330683, "grad_norm": 33.875, "learning_rate": 9.95085214265811e-06, "loss": 22.3504, "step": 16950 }, { "epoch": 0.3147319512777867, "grad_norm": 35.03125, "learning_rate": 9.950823146872067e-06, "loss": 21.7265, "step": 16960 }, { "epoch": 0.3149175243622665, "grad_norm": 33.75, "learning_rate": 9.950794151086024e-06, "loss": 22.3763, "step": 16970 }, { "epoch": 0.3151030974467463, "grad_norm": 33.25, "learning_rate": 9.950765155299981e-06, "loss": 21.5872, "step": 16980 }, { "epoch": 0.31528867053122617, "grad_norm": 35.1875, "learning_rate": 9.950736159513939e-06, "loss": 22.0631, "step": 16990 }, { "epoch": 0.315474243615706, "grad_norm": 35.46875, "learning_rate": 9.950707163727896e-06, "loss": 22.1039, "step": 17000 }, { "epoch": 0.3156598167001858, "grad_norm": 35.5625, "learning_rate": 9.950678167941854e-06, "loss": 21.9955, "step": 17010 }, { "epoch": 0.31584538978466564, "grad_norm": 35.1875, "learning_rate": 9.950649172155811e-06, "loss": 21.5005, "step": 17020 }, { "epoch": 0.31603096286914545, "grad_norm": 36.375, "learning_rate": 9.950620176369768e-06, "loss": 21.925, "step": 17030 }, { "epoch": 0.3162165359536253, "grad_norm": 34.5, "learning_rate": 9.950591180583726e-06, "loss": 21.8868, "step": 17040 }, { "epoch": 0.3164021090381051, "grad_norm": 35.03125, "learning_rate": 9.950562184797685e-06, "loss": 22.0574, "step": 17050 }, { "epoch": 0.3165876821225849, "grad_norm": 36.71875, "learning_rate": 9.950533189011642e-06, "loss": 22.0364, "step": 17060 }, { "epoch": 0.3167732552070648, "grad_norm": 33.875, "learning_rate": 9.9505041932256e-06, "loss": 21.8544, "step": 17070 }, { "epoch": 0.3169588282915446, "grad_norm": 33.75, "learning_rate": 9.950475197439557e-06, "loss": 21.9953, "step": 17080 }, { "epoch": 0.3171444013760244, "grad_norm": 34.78125, "learning_rate": 9.950446201653515e-06, "loss": 22.0436, "step": 17090 }, { "epoch": 0.31732997446050426, "grad_norm": 36.59375, "learning_rate": 9.950417205867472e-06, "loss": 21.6638, "step": 17100 }, { "epoch": 0.31751554754498407, "grad_norm": 34.0625, "learning_rate": 9.95038821008143e-06, "loss": 21.9026, "step": 17110 }, { "epoch": 0.31770112062946393, "grad_norm": 35.375, "learning_rate": 9.950359214295387e-06, "loss": 21.7069, "step": 17120 }, { "epoch": 0.31788669371394374, "grad_norm": 34.84375, "learning_rate": 9.950330218509346e-06, "loss": 21.673, "step": 17130 }, { "epoch": 0.31807226679842354, "grad_norm": 35.78125, "learning_rate": 9.950301222723302e-06, "loss": 21.9466, "step": 17140 }, { "epoch": 0.3182578398829034, "grad_norm": 37.34375, "learning_rate": 9.950272226937259e-06, "loss": 21.7084, "step": 17150 }, { "epoch": 0.3184434129673832, "grad_norm": 34.03125, "learning_rate": 9.950243231151218e-06, "loss": 21.572, "step": 17160 }, { "epoch": 0.318628986051863, "grad_norm": 35.9375, "learning_rate": 9.950214235365176e-06, "loss": 22.2523, "step": 17170 }, { "epoch": 0.3188145591363429, "grad_norm": 34.34375, "learning_rate": 9.950185239579133e-06, "loss": 22.0218, "step": 17180 }, { "epoch": 0.3190001322208227, "grad_norm": 33.78125, "learning_rate": 9.95015624379309e-06, "loss": 21.2069, "step": 17190 }, { "epoch": 0.3191857053053025, "grad_norm": 34.90625, "learning_rate": 9.950127248007048e-06, "loss": 22.0038, "step": 17200 }, { "epoch": 0.31937127838978235, "grad_norm": 33.375, "learning_rate": 9.950098252221005e-06, "loss": 21.5058, "step": 17210 }, { "epoch": 0.31955685147426216, "grad_norm": 34.34375, "learning_rate": 9.950069256434963e-06, "loss": 21.8254, "step": 17220 }, { "epoch": 0.319742424558742, "grad_norm": 36.15625, "learning_rate": 9.950040260648922e-06, "loss": 21.5007, "step": 17230 }, { "epoch": 0.31992799764322183, "grad_norm": 33.84375, "learning_rate": 9.950011264862879e-06, "loss": 22.6218, "step": 17240 }, { "epoch": 0.32011357072770164, "grad_norm": 34.96875, "learning_rate": 9.949982269076835e-06, "loss": 21.892, "step": 17250 }, { "epoch": 0.3202991438121815, "grad_norm": 35.15625, "learning_rate": 9.949953273290794e-06, "loss": 21.8203, "step": 17260 }, { "epoch": 0.3204847168966613, "grad_norm": 32.78125, "learning_rate": 9.949924277504751e-06, "loss": 22.0928, "step": 17270 }, { "epoch": 0.3206702899811411, "grad_norm": 34.09375, "learning_rate": 9.949895281718709e-06, "loss": 22.1172, "step": 17280 }, { "epoch": 0.32085586306562097, "grad_norm": 37.1875, "learning_rate": 9.949866285932666e-06, "loss": 21.4919, "step": 17290 }, { "epoch": 0.3210414361501008, "grad_norm": 36.09375, "learning_rate": 9.949837290146623e-06, "loss": 21.8468, "step": 17300 }, { "epoch": 0.32122700923458064, "grad_norm": 36.21875, "learning_rate": 9.949808294360581e-06, "loss": 21.9655, "step": 17310 }, { "epoch": 0.32141258231906045, "grad_norm": 35.8125, "learning_rate": 9.949779298574538e-06, "loss": 21.8789, "step": 17320 }, { "epoch": 0.32159815540354025, "grad_norm": 35.9375, "learning_rate": 9.949750302788497e-06, "loss": 21.9964, "step": 17330 }, { "epoch": 0.3217837284880201, "grad_norm": 35.40625, "learning_rate": 9.949721307002455e-06, "loss": 22.188, "step": 17340 }, { "epoch": 0.3219693015724999, "grad_norm": 35.0, "learning_rate": 9.94969231121641e-06, "loss": 21.9757, "step": 17350 }, { "epoch": 0.32215487465697973, "grad_norm": 35.34375, "learning_rate": 9.94966331543037e-06, "loss": 21.7447, "step": 17360 }, { "epoch": 0.3223404477414596, "grad_norm": 36.0625, "learning_rate": 9.949634319644327e-06, "loss": 21.7478, "step": 17370 }, { "epoch": 0.3225260208259394, "grad_norm": 36.125, "learning_rate": 9.949605323858284e-06, "loss": 22.5053, "step": 17380 }, { "epoch": 0.32271159391041926, "grad_norm": 35.09375, "learning_rate": 9.949576328072242e-06, "loss": 21.8131, "step": 17390 }, { "epoch": 0.32289716699489907, "grad_norm": 36.75, "learning_rate": 9.949547332286201e-06, "loss": 22.0129, "step": 17400 }, { "epoch": 0.32308274007937887, "grad_norm": 33.84375, "learning_rate": 9.949518336500157e-06, "loss": 21.799, "step": 17410 }, { "epoch": 0.32326831316385873, "grad_norm": 34.875, "learning_rate": 9.949489340714114e-06, "loss": 21.6667, "step": 17420 }, { "epoch": 0.32345388624833854, "grad_norm": 34.375, "learning_rate": 9.949460344928073e-06, "loss": 22.099, "step": 17430 }, { "epoch": 0.32363945933281835, "grad_norm": 35.375, "learning_rate": 9.94943134914203e-06, "loss": 21.9377, "step": 17440 }, { "epoch": 0.3238250324172982, "grad_norm": 36.8125, "learning_rate": 9.949402353355988e-06, "loss": 22.0756, "step": 17450 }, { "epoch": 0.324010605501778, "grad_norm": 33.84375, "learning_rate": 9.949373357569945e-06, "loss": 21.7746, "step": 17460 }, { "epoch": 0.3241961785862578, "grad_norm": 36.78125, "learning_rate": 9.949344361783903e-06, "loss": 21.9471, "step": 17470 }, { "epoch": 0.3243817516707377, "grad_norm": 36.4375, "learning_rate": 9.94931536599786e-06, "loss": 21.7579, "step": 17480 }, { "epoch": 0.3245673247552175, "grad_norm": 34.96875, "learning_rate": 9.949286370211818e-06, "loss": 21.5875, "step": 17490 }, { "epoch": 0.32475289783969735, "grad_norm": 34.53125, "learning_rate": 9.949257374425777e-06, "loss": 21.6696, "step": 17500 }, { "epoch": 0.32493847092417716, "grad_norm": 34.6875, "learning_rate": 9.949228378639732e-06, "loss": 22.0292, "step": 17510 }, { "epoch": 0.32512404400865696, "grad_norm": 34.34375, "learning_rate": 9.94919938285369e-06, "loss": 21.6901, "step": 17520 }, { "epoch": 0.3253096170931368, "grad_norm": 34.71875, "learning_rate": 9.949170387067649e-06, "loss": 21.5895, "step": 17530 }, { "epoch": 0.32549519017761663, "grad_norm": 34.625, "learning_rate": 9.949141391281606e-06, "loss": 21.7416, "step": 17540 }, { "epoch": 0.32568076326209644, "grad_norm": 34.0, "learning_rate": 9.949112395495564e-06, "loss": 21.7771, "step": 17550 }, { "epoch": 0.3258663363465763, "grad_norm": 35.875, "learning_rate": 9.949083399709521e-06, "loss": 21.8724, "step": 17560 }, { "epoch": 0.3260519094310561, "grad_norm": 34.78125, "learning_rate": 9.949054403923479e-06, "loss": 21.6644, "step": 17570 }, { "epoch": 0.32623748251553597, "grad_norm": 35.0, "learning_rate": 9.949025408137436e-06, "loss": 22.1336, "step": 17580 }, { "epoch": 0.3264230556000158, "grad_norm": 34.46875, "learning_rate": 9.948996412351393e-06, "loss": 21.7064, "step": 17590 }, { "epoch": 0.3266086286844956, "grad_norm": 35.375, "learning_rate": 9.94896741656535e-06, "loss": 21.7408, "step": 17600 }, { "epoch": 0.32679420176897545, "grad_norm": 33.625, "learning_rate": 9.94893842077931e-06, "loss": 21.6225, "step": 17610 }, { "epoch": 0.32697977485345525, "grad_norm": 35.84375, "learning_rate": 9.948909424993266e-06, "loss": 21.4826, "step": 17620 }, { "epoch": 0.32716534793793506, "grad_norm": 33.90625, "learning_rate": 9.948880429207225e-06, "loss": 21.8825, "step": 17630 }, { "epoch": 0.3273509210224149, "grad_norm": 36.03125, "learning_rate": 9.948851433421182e-06, "loss": 21.4889, "step": 17640 }, { "epoch": 0.3275364941068947, "grad_norm": 32.84375, "learning_rate": 9.94882243763514e-06, "loss": 21.5165, "step": 17650 }, { "epoch": 0.3277220671913746, "grad_norm": 35.0625, "learning_rate": 9.948793441849097e-06, "loss": 21.6394, "step": 17660 }, { "epoch": 0.3279076402758544, "grad_norm": 35.0625, "learning_rate": 9.948764446063054e-06, "loss": 21.623, "step": 17670 }, { "epoch": 0.3280932133603342, "grad_norm": 34.125, "learning_rate": 9.948735450277012e-06, "loss": 22.1933, "step": 17680 }, { "epoch": 0.32827878644481406, "grad_norm": 36.65625, "learning_rate": 9.948706454490969e-06, "loss": 21.7992, "step": 17690 }, { "epoch": 0.32846435952929387, "grad_norm": 35.15625, "learning_rate": 9.948677458704927e-06, "loss": 21.9871, "step": 17700 }, { "epoch": 0.3286499326137737, "grad_norm": 38.0, "learning_rate": 9.948648462918886e-06, "loss": 22.3474, "step": 17710 }, { "epoch": 0.32883550569825354, "grad_norm": 36.0, "learning_rate": 9.948619467132843e-06, "loss": 22.0369, "step": 17720 }, { "epoch": 0.32902107878273334, "grad_norm": 35.3125, "learning_rate": 9.948590471346799e-06, "loss": 21.3599, "step": 17730 }, { "epoch": 0.3292066518672132, "grad_norm": 34.90625, "learning_rate": 9.948561475560758e-06, "loss": 21.8559, "step": 17740 }, { "epoch": 0.329392224951693, "grad_norm": 35.875, "learning_rate": 9.948532479774715e-06, "loss": 21.8818, "step": 17750 }, { "epoch": 0.3295777980361728, "grad_norm": 32.96875, "learning_rate": 9.948503483988673e-06, "loss": 21.4791, "step": 17760 }, { "epoch": 0.3297633711206527, "grad_norm": 33.21875, "learning_rate": 9.94847448820263e-06, "loss": 21.6707, "step": 17770 }, { "epoch": 0.3299489442051325, "grad_norm": 34.875, "learning_rate": 9.948445492416588e-06, "loss": 21.4599, "step": 17780 }, { "epoch": 0.3301345172896123, "grad_norm": 35.625, "learning_rate": 9.948416496630545e-06, "loss": 21.9671, "step": 17790 }, { "epoch": 0.33032009037409216, "grad_norm": 34.0625, "learning_rate": 9.948387500844502e-06, "loss": 21.7585, "step": 17800 }, { "epoch": 0.33050566345857196, "grad_norm": 36.21875, "learning_rate": 9.948358505058461e-06, "loss": 21.5565, "step": 17810 }, { "epoch": 0.33069123654305177, "grad_norm": 33.5625, "learning_rate": 9.948329509272419e-06, "loss": 21.3623, "step": 17820 }, { "epoch": 0.33087680962753163, "grad_norm": 35.21875, "learning_rate": 9.948300513486376e-06, "loss": 21.5242, "step": 17830 }, { "epoch": 0.33106238271201144, "grad_norm": 33.96875, "learning_rate": 9.948271517700334e-06, "loss": 22.0939, "step": 17840 }, { "epoch": 0.3312479557964913, "grad_norm": 33.96875, "learning_rate": 9.948242521914291e-06, "loss": 21.8529, "step": 17850 }, { "epoch": 0.3314335288809711, "grad_norm": 36.78125, "learning_rate": 9.948213526128248e-06, "loss": 21.7178, "step": 17860 }, { "epoch": 0.3316191019654509, "grad_norm": 36.84375, "learning_rate": 9.948184530342206e-06, "loss": 21.9362, "step": 17870 }, { "epoch": 0.3318046750499308, "grad_norm": 35.375, "learning_rate": 9.948155534556165e-06, "loss": 21.678, "step": 17880 }, { "epoch": 0.3319902481344106, "grad_norm": 35.78125, "learning_rate": 9.94812653877012e-06, "loss": 21.7038, "step": 17890 }, { "epoch": 0.3321758212188904, "grad_norm": 36.3125, "learning_rate": 9.948097542984078e-06, "loss": 21.7922, "step": 17900 }, { "epoch": 0.33236139430337025, "grad_norm": 35.65625, "learning_rate": 9.948068547198037e-06, "loss": 21.4636, "step": 17910 }, { "epoch": 0.33254696738785006, "grad_norm": 33.84375, "learning_rate": 9.948039551411995e-06, "loss": 21.4673, "step": 17920 }, { "epoch": 0.3327325404723299, "grad_norm": 34.625, "learning_rate": 9.948010555625952e-06, "loss": 21.6747, "step": 17930 }, { "epoch": 0.3329181135568097, "grad_norm": 34.46875, "learning_rate": 9.94798155983991e-06, "loss": 21.5015, "step": 17940 }, { "epoch": 0.33310368664128953, "grad_norm": 34.65625, "learning_rate": 9.947952564053867e-06, "loss": 22.1432, "step": 17950 }, { "epoch": 0.3332892597257694, "grad_norm": 33.96875, "learning_rate": 9.947923568267824e-06, "loss": 21.695, "step": 17960 }, { "epoch": 0.3334748328102492, "grad_norm": 37.53125, "learning_rate": 9.947894572481782e-06, "loss": 21.7699, "step": 17970 }, { "epoch": 0.333660405894729, "grad_norm": 33.53125, "learning_rate": 9.94786557669574e-06, "loss": 21.9693, "step": 17980 }, { "epoch": 0.33384597897920887, "grad_norm": 36.03125, "learning_rate": 9.947836580909698e-06, "loss": 21.9864, "step": 17990 }, { "epoch": 0.3340315520636887, "grad_norm": 35.40625, "learning_rate": 9.947807585123654e-06, "loss": 21.6196, "step": 18000 }, { "epoch": 0.33421712514816854, "grad_norm": 36.0625, "learning_rate": 9.947778589337613e-06, "loss": 21.6735, "step": 18010 }, { "epoch": 0.33440269823264834, "grad_norm": 33.78125, "learning_rate": 9.94774959355157e-06, "loss": 21.4556, "step": 18020 }, { "epoch": 0.33458827131712815, "grad_norm": 34.125, "learning_rate": 9.947720597765528e-06, "loss": 22.1459, "step": 18030 }, { "epoch": 0.334773844401608, "grad_norm": 34.65625, "learning_rate": 9.947691601979485e-06, "loss": 21.8858, "step": 18040 }, { "epoch": 0.3349594174860878, "grad_norm": 37.84375, "learning_rate": 9.947662606193443e-06, "loss": 21.5832, "step": 18050 }, { "epoch": 0.3351449905705676, "grad_norm": 34.6875, "learning_rate": 9.9476336104074e-06, "loss": 21.8271, "step": 18060 }, { "epoch": 0.3353305636550475, "grad_norm": 36.25, "learning_rate": 9.947604614621357e-06, "loss": 21.8723, "step": 18070 }, { "epoch": 0.3355161367395273, "grad_norm": 34.9375, "learning_rate": 9.947575618835315e-06, "loss": 21.7849, "step": 18080 }, { "epoch": 0.3357017098240071, "grad_norm": 35.8125, "learning_rate": 9.947546623049274e-06, "loss": 21.3291, "step": 18090 }, { "epoch": 0.33588728290848696, "grad_norm": 33.65625, "learning_rate": 9.94751762726323e-06, "loss": 21.2726, "step": 18100 }, { "epoch": 0.33607285599296677, "grad_norm": 34.09375, "learning_rate": 9.947488631477189e-06, "loss": 22.0293, "step": 18110 }, { "epoch": 0.33625842907744663, "grad_norm": 34.0, "learning_rate": 9.947459635691146e-06, "loss": 21.8386, "step": 18120 }, { "epoch": 0.33644400216192644, "grad_norm": 35.09375, "learning_rate": 9.947430639905104e-06, "loss": 21.636, "step": 18130 }, { "epoch": 0.33662957524640624, "grad_norm": 31.484375, "learning_rate": 9.947401644119061e-06, "loss": 21.6889, "step": 18140 }, { "epoch": 0.3368151483308861, "grad_norm": 36.78125, "learning_rate": 9.947372648333018e-06, "loss": 21.4289, "step": 18150 }, { "epoch": 0.3370007214153659, "grad_norm": 34.9375, "learning_rate": 9.947343652546976e-06, "loss": 21.5468, "step": 18160 }, { "epoch": 0.3371862944998457, "grad_norm": 34.25, "learning_rate": 9.947314656760933e-06, "loss": 21.5357, "step": 18170 }, { "epoch": 0.3373718675843256, "grad_norm": 33.6875, "learning_rate": 9.94728566097489e-06, "loss": 21.5039, "step": 18180 }, { "epoch": 0.3375574406688054, "grad_norm": 36.46875, "learning_rate": 9.94725666518885e-06, "loss": 21.9166, "step": 18190 }, { "epoch": 0.33774301375328525, "grad_norm": 35.09375, "learning_rate": 9.947227669402807e-06, "loss": 22.2752, "step": 18200 }, { "epoch": 0.33792858683776505, "grad_norm": 34.8125, "learning_rate": 9.947198673616763e-06, "loss": 21.7475, "step": 18210 }, { "epoch": 0.33811415992224486, "grad_norm": 36.3125, "learning_rate": 9.947169677830722e-06, "loss": 22.0181, "step": 18220 }, { "epoch": 0.3382997330067247, "grad_norm": 34.875, "learning_rate": 9.94714068204468e-06, "loss": 21.1761, "step": 18230 }, { "epoch": 0.33848530609120453, "grad_norm": 33.65625, "learning_rate": 9.947111686258637e-06, "loss": 21.6197, "step": 18240 }, { "epoch": 0.33867087917568434, "grad_norm": 34.1875, "learning_rate": 9.947082690472594e-06, "loss": 21.3926, "step": 18250 }, { "epoch": 0.3388564522601642, "grad_norm": 34.3125, "learning_rate": 9.947053694686552e-06, "loss": 21.5869, "step": 18260 }, { "epoch": 0.339042025344644, "grad_norm": 34.25, "learning_rate": 9.947024698900509e-06, "loss": 22.0923, "step": 18270 }, { "epoch": 0.33922759842912387, "grad_norm": 36.625, "learning_rate": 9.946995703114466e-06, "loss": 21.7863, "step": 18280 }, { "epoch": 0.3394131715136037, "grad_norm": 34.21875, "learning_rate": 9.946966707328425e-06, "loss": 21.7017, "step": 18290 }, { "epoch": 0.3395987445980835, "grad_norm": 35.03125, "learning_rate": 9.946937711542383e-06, "loss": 21.9386, "step": 18300 }, { "epoch": 0.33978431768256334, "grad_norm": 35.5, "learning_rate": 9.94690871575634e-06, "loss": 21.9028, "step": 18310 }, { "epoch": 0.33996989076704315, "grad_norm": 36.46875, "learning_rate": 9.946879719970298e-06, "loss": 21.3511, "step": 18320 }, { "epoch": 0.34015546385152295, "grad_norm": 35.1875, "learning_rate": 9.946850724184255e-06, "loss": 21.4002, "step": 18330 }, { "epoch": 0.3403410369360028, "grad_norm": 33.96875, "learning_rate": 9.946821728398212e-06, "loss": 21.6701, "step": 18340 }, { "epoch": 0.3405266100204826, "grad_norm": 34.4375, "learning_rate": 9.94679273261217e-06, "loss": 21.6811, "step": 18350 }, { "epoch": 0.34071218310496243, "grad_norm": 33.09375, "learning_rate": 9.946763736826129e-06, "loss": 21.7387, "step": 18360 }, { "epoch": 0.3408977561894423, "grad_norm": 35.0625, "learning_rate": 9.946734741040085e-06, "loss": 22.08, "step": 18370 }, { "epoch": 0.3410833292739221, "grad_norm": 35.90625, "learning_rate": 9.946705745254042e-06, "loss": 21.8435, "step": 18380 }, { "epoch": 0.34126890235840196, "grad_norm": 33.8125, "learning_rate": 9.946676749468001e-06, "loss": 21.7757, "step": 18390 }, { "epoch": 0.34145447544288177, "grad_norm": 35.625, "learning_rate": 9.946647753681959e-06, "loss": 21.6643, "step": 18400 }, { "epoch": 0.3416400485273616, "grad_norm": 36.875, "learning_rate": 9.946618757895916e-06, "loss": 21.495, "step": 18410 }, { "epoch": 0.34182562161184143, "grad_norm": 37.375, "learning_rate": 9.946589762109873e-06, "loss": 21.7553, "step": 18420 }, { "epoch": 0.34201119469632124, "grad_norm": 34.625, "learning_rate": 9.94656076632383e-06, "loss": 21.4652, "step": 18430 }, { "epoch": 0.34219676778080105, "grad_norm": 34.71875, "learning_rate": 9.946531770537788e-06, "loss": 21.7942, "step": 18440 }, { "epoch": 0.3423823408652809, "grad_norm": 35.21875, "learning_rate": 9.946502774751746e-06, "loss": 21.7887, "step": 18450 }, { "epoch": 0.3425679139497607, "grad_norm": 34.5, "learning_rate": 9.946473778965705e-06, "loss": 21.8405, "step": 18460 }, { "epoch": 0.3427534870342406, "grad_norm": 33.3125, "learning_rate": 9.946444783179662e-06, "loss": 21.3681, "step": 18470 }, { "epoch": 0.3429390601187204, "grad_norm": 34.90625, "learning_rate": 9.946415787393618e-06, "loss": 21.4823, "step": 18480 }, { "epoch": 0.3431246332032002, "grad_norm": 34.78125, "learning_rate": 9.946386791607577e-06, "loss": 21.7108, "step": 18490 }, { "epoch": 0.34331020628768005, "grad_norm": 33.875, "learning_rate": 9.946357795821534e-06, "loss": 21.6695, "step": 18500 }, { "epoch": 0.34349577937215986, "grad_norm": 33.8125, "learning_rate": 9.946328800035492e-06, "loss": 21.7427, "step": 18510 }, { "epoch": 0.34368135245663967, "grad_norm": 35.03125, "learning_rate": 9.94629980424945e-06, "loss": 21.5864, "step": 18520 }, { "epoch": 0.34386692554111953, "grad_norm": 33.1875, "learning_rate": 9.946270808463407e-06, "loss": 21.6827, "step": 18530 }, { "epoch": 0.34405249862559933, "grad_norm": 36.78125, "learning_rate": 9.946241812677364e-06, "loss": 22.1864, "step": 18540 }, { "epoch": 0.3442380717100792, "grad_norm": 34.53125, "learning_rate": 9.946212816891321e-06, "loss": 21.6761, "step": 18550 }, { "epoch": 0.344423644794559, "grad_norm": 35.75, "learning_rate": 9.94618382110528e-06, "loss": 21.5846, "step": 18560 }, { "epoch": 0.3446092178790388, "grad_norm": 34.0625, "learning_rate": 9.946154825319238e-06, "loss": 21.4387, "step": 18570 }, { "epoch": 0.34479479096351867, "grad_norm": 34.1875, "learning_rate": 9.946125829533195e-06, "loss": 21.8834, "step": 18580 }, { "epoch": 0.3449803640479985, "grad_norm": 32.5, "learning_rate": 9.946096833747153e-06, "loss": 21.6082, "step": 18590 }, { "epoch": 0.3451659371324783, "grad_norm": 34.71875, "learning_rate": 9.94606783796111e-06, "loss": 21.4526, "step": 18600 }, { "epoch": 0.34535151021695815, "grad_norm": 35.8125, "learning_rate": 9.946038842175068e-06, "loss": 21.8455, "step": 18610 }, { "epoch": 0.34553708330143795, "grad_norm": 34.78125, "learning_rate": 9.946009846389025e-06, "loss": 21.6449, "step": 18620 }, { "epoch": 0.34572265638591776, "grad_norm": 35.125, "learning_rate": 9.945980850602982e-06, "loss": 21.0863, "step": 18630 }, { "epoch": 0.3459082294703976, "grad_norm": 34.84375, "learning_rate": 9.94595185481694e-06, "loss": 21.0995, "step": 18640 }, { "epoch": 0.3460938025548774, "grad_norm": 37.25, "learning_rate": 9.945922859030897e-06, "loss": 21.4652, "step": 18650 }, { "epoch": 0.3462793756393573, "grad_norm": 33.875, "learning_rate": 9.945893863244855e-06, "loss": 21.5573, "step": 18660 }, { "epoch": 0.3464649487238371, "grad_norm": 32.21875, "learning_rate": 9.945864867458814e-06, "loss": 21.6797, "step": 18670 }, { "epoch": 0.3466505218083169, "grad_norm": 35.8125, "learning_rate": 9.945835871672771e-06, "loss": 21.5204, "step": 18680 }, { "epoch": 0.34683609489279676, "grad_norm": 34.625, "learning_rate": 9.945806875886728e-06, "loss": 21.4348, "step": 18690 }, { "epoch": 0.34702166797727657, "grad_norm": 35.25, "learning_rate": 9.945777880100686e-06, "loss": 21.6341, "step": 18700 }, { "epoch": 0.3472072410617564, "grad_norm": 36.0625, "learning_rate": 9.945748884314643e-06, "loss": 21.5154, "step": 18710 }, { "epoch": 0.34739281414623624, "grad_norm": 34.40625, "learning_rate": 9.9457198885286e-06, "loss": 21.8781, "step": 18720 }, { "epoch": 0.34757838723071605, "grad_norm": 36.40625, "learning_rate": 9.945690892742558e-06, "loss": 21.9311, "step": 18730 }, { "epoch": 0.3477639603151959, "grad_norm": 34.625, "learning_rate": 9.945661896956517e-06, "loss": 21.7879, "step": 18740 }, { "epoch": 0.3479495333996757, "grad_norm": 34.78125, "learning_rate": 9.945632901170473e-06, "loss": 21.7787, "step": 18750 }, { "epoch": 0.3481351064841555, "grad_norm": 36.125, "learning_rate": 9.94560390538443e-06, "loss": 21.6464, "step": 18760 }, { "epoch": 0.3483206795686354, "grad_norm": 34.5625, "learning_rate": 9.94557490959839e-06, "loss": 21.5736, "step": 18770 }, { "epoch": 0.3485062526531152, "grad_norm": 34.75, "learning_rate": 9.945545913812347e-06, "loss": 21.753, "step": 18780 }, { "epoch": 0.348691825737595, "grad_norm": 36.125, "learning_rate": 9.945516918026304e-06, "loss": 21.4789, "step": 18790 }, { "epoch": 0.34887739882207486, "grad_norm": 34.90625, "learning_rate": 9.945487922240262e-06, "loss": 21.8735, "step": 18800 }, { "epoch": 0.34906297190655466, "grad_norm": 35.21875, "learning_rate": 9.945458926454219e-06, "loss": 21.6979, "step": 18810 }, { "epoch": 0.3492485449910345, "grad_norm": 35.03125, "learning_rate": 9.945429930668176e-06, "loss": 21.2156, "step": 18820 }, { "epoch": 0.34943411807551433, "grad_norm": 35.125, "learning_rate": 9.945400934882134e-06, "loss": 22.11, "step": 18830 }, { "epoch": 0.34961969115999414, "grad_norm": 34.90625, "learning_rate": 9.945371939096093e-06, "loss": 21.4453, "step": 18840 }, { "epoch": 0.349805264244474, "grad_norm": 34.5, "learning_rate": 9.945342943310049e-06, "loss": 21.5445, "step": 18850 }, { "epoch": 0.3499908373289538, "grad_norm": 36.71875, "learning_rate": 9.945313947524006e-06, "loss": 21.6646, "step": 18860 }, { "epoch": 0.3501764104134336, "grad_norm": 33.53125, "learning_rate": 9.945284951737965e-06, "loss": 21.4822, "step": 18870 }, { "epoch": 0.3503619834979135, "grad_norm": 35.71875, "learning_rate": 9.945255955951923e-06, "loss": 21.2241, "step": 18880 }, { "epoch": 0.3505475565823933, "grad_norm": 33.78125, "learning_rate": 9.94522696016588e-06, "loss": 21.5542, "step": 18890 }, { "epoch": 0.3507331296668731, "grad_norm": 33.15625, "learning_rate": 9.945197964379837e-06, "loss": 21.23, "step": 18900 }, { "epoch": 0.35091870275135295, "grad_norm": 34.9375, "learning_rate": 9.945168968593795e-06, "loss": 21.4728, "step": 18910 }, { "epoch": 0.35110427583583276, "grad_norm": 35.84375, "learning_rate": 9.945139972807752e-06, "loss": 21.7163, "step": 18920 }, { "epoch": 0.3512898489203126, "grad_norm": 35.03125, "learning_rate": 9.94511097702171e-06, "loss": 21.5852, "step": 18930 }, { "epoch": 0.3514754220047924, "grad_norm": 36.84375, "learning_rate": 9.945081981235669e-06, "loss": 21.5413, "step": 18940 }, { "epoch": 0.35166099508927223, "grad_norm": 34.625, "learning_rate": 9.945052985449626e-06, "loss": 21.824, "step": 18950 }, { "epoch": 0.3518465681737521, "grad_norm": 36.28125, "learning_rate": 9.945023989663582e-06, "loss": 21.5073, "step": 18960 }, { "epoch": 0.3520321412582319, "grad_norm": 35.25, "learning_rate": 9.944994993877541e-06, "loss": 21.6476, "step": 18970 }, { "epoch": 0.3522177143427117, "grad_norm": 36.21875, "learning_rate": 9.944965998091498e-06, "loss": 21.9834, "step": 18980 }, { "epoch": 0.35240328742719157, "grad_norm": 35.75, "learning_rate": 9.944937002305456e-06, "loss": 21.3338, "step": 18990 }, { "epoch": 0.3525888605116714, "grad_norm": 32.65625, "learning_rate": 9.944908006519413e-06, "loss": 21.7214, "step": 19000 }, { "epoch": 0.35277443359615124, "grad_norm": 35.03125, "learning_rate": 9.944879010733372e-06, "loss": 21.3903, "step": 19010 }, { "epoch": 0.35296000668063104, "grad_norm": 36.28125, "learning_rate": 9.944850014947328e-06, "loss": 21.4519, "step": 19020 }, { "epoch": 0.35314557976511085, "grad_norm": 35.0625, "learning_rate": 9.944821019161285e-06, "loss": 21.318, "step": 19030 }, { "epoch": 0.3533311528495907, "grad_norm": 36.03125, "learning_rate": 9.944792023375245e-06, "loss": 21.5192, "step": 19040 }, { "epoch": 0.3535167259340705, "grad_norm": 36.65625, "learning_rate": 9.944763027589202e-06, "loss": 21.8759, "step": 19050 }, { "epoch": 0.3537022990185503, "grad_norm": 36.5625, "learning_rate": 9.94473403180316e-06, "loss": 21.4364, "step": 19060 }, { "epoch": 0.3538878721030302, "grad_norm": 33.71875, "learning_rate": 9.944705036017117e-06, "loss": 21.6865, "step": 19070 }, { "epoch": 0.35407344518751, "grad_norm": 33.5625, "learning_rate": 9.944676040231074e-06, "loss": 21.4553, "step": 19080 }, { "epoch": 0.35425901827198986, "grad_norm": 34.375, "learning_rate": 9.944647044445032e-06, "loss": 21.4714, "step": 19090 }, { "epoch": 0.35444459135646966, "grad_norm": 35.5, "learning_rate": 9.944618048658989e-06, "loss": 21.2766, "step": 19100 }, { "epoch": 0.35463016444094947, "grad_norm": 36.40625, "learning_rate": 9.944589052872946e-06, "loss": 21.7674, "step": 19110 }, { "epoch": 0.35481573752542933, "grad_norm": 33.59375, "learning_rate": 9.944560057086904e-06, "loss": 21.3237, "step": 19120 }, { "epoch": 0.35500131060990914, "grad_norm": 34.21875, "learning_rate": 9.944531061300861e-06, "loss": 21.6426, "step": 19130 }, { "epoch": 0.35518688369438894, "grad_norm": 38.28125, "learning_rate": 9.94450206551482e-06, "loss": 21.7993, "step": 19140 }, { "epoch": 0.3553724567788688, "grad_norm": 34.78125, "learning_rate": 9.944473069728778e-06, "loss": 21.6046, "step": 19150 }, { "epoch": 0.3555580298633486, "grad_norm": 34.625, "learning_rate": 9.944444073942735e-06, "loss": 21.9528, "step": 19160 }, { "epoch": 0.3557436029478285, "grad_norm": 33.4375, "learning_rate": 9.944415078156693e-06, "loss": 21.5006, "step": 19170 }, { "epoch": 0.3559291760323083, "grad_norm": 33.1875, "learning_rate": 9.94438608237065e-06, "loss": 21.2048, "step": 19180 }, { "epoch": 0.3561147491167881, "grad_norm": 34.15625, "learning_rate": 9.944357086584607e-06, "loss": 21.4316, "step": 19190 }, { "epoch": 0.35630032220126795, "grad_norm": 35.46875, "learning_rate": 9.944328090798565e-06, "loss": 21.9165, "step": 19200 }, { "epoch": 0.35648589528574776, "grad_norm": 36.6875, "learning_rate": 9.944299095012522e-06, "loss": 21.4528, "step": 19210 }, { "epoch": 0.35667146837022756, "grad_norm": 34.5625, "learning_rate": 9.944270099226481e-06, "loss": 21.9393, "step": 19220 }, { "epoch": 0.3568570414547074, "grad_norm": 36.8125, "learning_rate": 9.944241103440437e-06, "loss": 21.4406, "step": 19230 }, { "epoch": 0.35704261453918723, "grad_norm": 34.09375, "learning_rate": 9.944212107654394e-06, "loss": 21.279, "step": 19240 }, { "epoch": 0.35722818762366704, "grad_norm": 34.875, "learning_rate": 9.944183111868353e-06, "loss": 20.9569, "step": 19250 }, { "epoch": 0.3574137607081469, "grad_norm": 34.96875, "learning_rate": 9.944154116082311e-06, "loss": 21.32, "step": 19260 }, { "epoch": 0.3575993337926267, "grad_norm": 34.21875, "learning_rate": 9.944125120296268e-06, "loss": 21.2426, "step": 19270 }, { "epoch": 0.35778490687710657, "grad_norm": 34.53125, "learning_rate": 9.944096124510226e-06, "loss": 21.2983, "step": 19280 }, { "epoch": 0.3579704799615864, "grad_norm": 35.375, "learning_rate": 9.944067128724183e-06, "loss": 21.687, "step": 19290 }, { "epoch": 0.3581560530460662, "grad_norm": 35.5, "learning_rate": 9.94403813293814e-06, "loss": 21.6977, "step": 19300 }, { "epoch": 0.35834162613054604, "grad_norm": 35.3125, "learning_rate": 9.944009137152098e-06, "loss": 21.5511, "step": 19310 }, { "epoch": 0.35852719921502585, "grad_norm": 34.84375, "learning_rate": 9.943980141366057e-06, "loss": 21.4945, "step": 19320 }, { "epoch": 0.35871277229950566, "grad_norm": 33.125, "learning_rate": 9.943951145580014e-06, "loss": 21.4095, "step": 19330 }, { "epoch": 0.3588983453839855, "grad_norm": 35.4375, "learning_rate": 9.94392214979397e-06, "loss": 21.623, "step": 19340 }, { "epoch": 0.3590839184684653, "grad_norm": 35.96875, "learning_rate": 9.94389315400793e-06, "loss": 21.2826, "step": 19350 }, { "epoch": 0.3592694915529452, "grad_norm": 33.1875, "learning_rate": 9.943864158221887e-06, "loss": 21.2331, "step": 19360 }, { "epoch": 0.359455064637425, "grad_norm": 33.0, "learning_rate": 9.943835162435844e-06, "loss": 21.4414, "step": 19370 }, { "epoch": 0.3596406377219048, "grad_norm": 34.4375, "learning_rate": 9.943806166649801e-06, "loss": 21.6911, "step": 19380 }, { "epoch": 0.35982621080638466, "grad_norm": 36.46875, "learning_rate": 9.943777170863759e-06, "loss": 21.9565, "step": 19390 }, { "epoch": 0.36001178389086447, "grad_norm": 33.5625, "learning_rate": 9.943748175077716e-06, "loss": 21.1798, "step": 19400 }, { "epoch": 0.3601973569753443, "grad_norm": 35.0625, "learning_rate": 9.943719179291674e-06, "loss": 21.36, "step": 19410 }, { "epoch": 0.36038293005982414, "grad_norm": 35.15625, "learning_rate": 9.943690183505633e-06, "loss": 21.8302, "step": 19420 }, { "epoch": 0.36056850314430394, "grad_norm": 35.09375, "learning_rate": 9.94366118771959e-06, "loss": 21.5303, "step": 19430 }, { "epoch": 0.3607540762287838, "grad_norm": 34.625, "learning_rate": 9.943632191933546e-06, "loss": 21.3503, "step": 19440 }, { "epoch": 0.3609396493132636, "grad_norm": 35.5625, "learning_rate": 9.943603196147505e-06, "loss": 21.5557, "step": 19450 }, { "epoch": 0.3611252223977434, "grad_norm": 36.0, "learning_rate": 9.943574200361462e-06, "loss": 21.2134, "step": 19460 }, { "epoch": 0.3613107954822233, "grad_norm": 36.375, "learning_rate": 9.94354520457542e-06, "loss": 21.1214, "step": 19470 }, { "epoch": 0.3614963685667031, "grad_norm": 35.4375, "learning_rate": 9.943516208789377e-06, "loss": 21.4935, "step": 19480 }, { "epoch": 0.3616819416511829, "grad_norm": 34.65625, "learning_rate": 9.943487213003336e-06, "loss": 21.3517, "step": 19490 }, { "epoch": 0.36186751473566275, "grad_norm": 34.4375, "learning_rate": 9.943458217217292e-06, "loss": 21.4391, "step": 19500 }, { "epoch": 0.36205308782014256, "grad_norm": 37.65625, "learning_rate": 9.94342922143125e-06, "loss": 21.6687, "step": 19510 }, { "epoch": 0.36223866090462237, "grad_norm": 35.65625, "learning_rate": 9.943400225645209e-06, "loss": 20.8702, "step": 19520 }, { "epoch": 0.36242423398910223, "grad_norm": 36.90625, "learning_rate": 9.943371229859166e-06, "loss": 21.0082, "step": 19530 }, { "epoch": 0.36260980707358204, "grad_norm": 33.25, "learning_rate": 9.943342234073123e-06, "loss": 21.6859, "step": 19540 }, { "epoch": 0.3627953801580619, "grad_norm": 35.90625, "learning_rate": 9.94331323828708e-06, "loss": 21.2681, "step": 19550 }, { "epoch": 0.3629809532425417, "grad_norm": 35.75, "learning_rate": 9.943284242501038e-06, "loss": 21.4027, "step": 19560 }, { "epoch": 0.3631665263270215, "grad_norm": 35.0625, "learning_rate": 9.943255246714996e-06, "loss": 21.4005, "step": 19570 }, { "epoch": 0.3633520994115014, "grad_norm": 35.0, "learning_rate": 9.943226250928953e-06, "loss": 22.0243, "step": 19580 }, { "epoch": 0.3635376724959812, "grad_norm": 33.84375, "learning_rate": 9.943197255142912e-06, "loss": 21.1517, "step": 19590 }, { "epoch": 0.363723245580461, "grad_norm": 34.84375, "learning_rate": 9.94316825935687e-06, "loss": 21.5607, "step": 19600 }, { "epoch": 0.36390881866494085, "grad_norm": 34.40625, "learning_rate": 9.943139263570825e-06, "loss": 21.4868, "step": 19610 }, { "epoch": 0.36409439174942065, "grad_norm": 36.9375, "learning_rate": 9.943110267784784e-06, "loss": 21.6598, "step": 19620 }, { "epoch": 0.3642799648339005, "grad_norm": 34.0625, "learning_rate": 9.943081271998742e-06, "loss": 21.3738, "step": 19630 }, { "epoch": 0.3644655379183803, "grad_norm": 37.0, "learning_rate": 9.943052276212699e-06, "loss": 21.3922, "step": 19640 }, { "epoch": 0.36465111100286013, "grad_norm": 34.78125, "learning_rate": 9.943023280426657e-06, "loss": 21.692, "step": 19650 }, { "epoch": 0.36483668408734, "grad_norm": 34.9375, "learning_rate": 9.942994284640614e-06, "loss": 21.7272, "step": 19660 }, { "epoch": 0.3650222571718198, "grad_norm": 36.0, "learning_rate": 9.942965288854571e-06, "loss": 21.4188, "step": 19670 }, { "epoch": 0.3652078302562996, "grad_norm": 33.375, "learning_rate": 9.942936293068529e-06, "loss": 21.5631, "step": 19680 }, { "epoch": 0.36539340334077947, "grad_norm": 33.40625, "learning_rate": 9.942907297282486e-06, "loss": 21.3399, "step": 19690 }, { "epoch": 0.36557897642525927, "grad_norm": 32.84375, "learning_rate": 9.942878301496445e-06, "loss": 21.2938, "step": 19700 }, { "epoch": 0.36576454950973913, "grad_norm": 34.0625, "learning_rate": 9.942849305710401e-06, "loss": 21.2609, "step": 19710 }, { "epoch": 0.36595012259421894, "grad_norm": 35.21875, "learning_rate": 9.942820309924358e-06, "loss": 21.7159, "step": 19720 }, { "epoch": 0.36613569567869875, "grad_norm": 36.09375, "learning_rate": 9.942791314138317e-06, "loss": 21.7571, "step": 19730 }, { "epoch": 0.3663212687631786, "grad_norm": 37.9375, "learning_rate": 9.942762318352275e-06, "loss": 21.2937, "step": 19740 }, { "epoch": 0.3665068418476584, "grad_norm": 37.84375, "learning_rate": 9.942733322566232e-06, "loss": 21.4546, "step": 19750 }, { "epoch": 0.3666924149321382, "grad_norm": 35.71875, "learning_rate": 9.94270432678019e-06, "loss": 21.3717, "step": 19760 }, { "epoch": 0.3668779880166181, "grad_norm": 34.8125, "learning_rate": 9.942675330994147e-06, "loss": 22.14, "step": 19770 }, { "epoch": 0.3670635611010979, "grad_norm": 36.3125, "learning_rate": 9.942646335208105e-06, "loss": 21.1358, "step": 19780 }, { "epoch": 0.3672491341855777, "grad_norm": 34.84375, "learning_rate": 9.942617339422062e-06, "loss": 21.3856, "step": 19790 }, { "epoch": 0.36743470727005756, "grad_norm": 36.03125, "learning_rate": 9.942588343636021e-06, "loss": 21.5621, "step": 19800 }, { "epoch": 0.36762028035453737, "grad_norm": 34.15625, "learning_rate": 9.942559347849978e-06, "loss": 21.6981, "step": 19810 }, { "epoch": 0.3678058534390172, "grad_norm": 35.8125, "learning_rate": 9.942530352063934e-06, "loss": 21.2015, "step": 19820 }, { "epoch": 0.36799142652349703, "grad_norm": 37.4375, "learning_rate": 9.942501356277893e-06, "loss": 22.0159, "step": 19830 }, { "epoch": 0.36817699960797684, "grad_norm": 34.21875, "learning_rate": 9.94247236049185e-06, "loss": 21.571, "step": 19840 }, { "epoch": 0.3683625726924567, "grad_norm": 35.375, "learning_rate": 9.942443364705808e-06, "loss": 21.416, "step": 19850 }, { "epoch": 0.3685481457769365, "grad_norm": 34.625, "learning_rate": 9.942414368919765e-06, "loss": 20.9028, "step": 19860 }, { "epoch": 0.3687337188614163, "grad_norm": 35.4375, "learning_rate": 9.942385373133723e-06, "loss": 21.527, "step": 19870 }, { "epoch": 0.3689192919458962, "grad_norm": 34.375, "learning_rate": 9.94235637734768e-06, "loss": 21.1836, "step": 19880 }, { "epoch": 0.369104865030376, "grad_norm": 38.21875, "learning_rate": 9.942327381561638e-06, "loss": 21.2912, "step": 19890 }, { "epoch": 0.36929043811485585, "grad_norm": 35.875, "learning_rate": 9.942298385775597e-06, "loss": 21.7566, "step": 19900 }, { "epoch": 0.36947601119933565, "grad_norm": 37.375, "learning_rate": 9.942269389989554e-06, "loss": 21.5261, "step": 19910 }, { "epoch": 0.36966158428381546, "grad_norm": 35.96875, "learning_rate": 9.942240394203512e-06, "loss": 21.7249, "step": 19920 }, { "epoch": 0.3698471573682953, "grad_norm": 34.5625, "learning_rate": 9.942211398417469e-06, "loss": 21.9828, "step": 19930 }, { "epoch": 0.3700327304527751, "grad_norm": 35.96875, "learning_rate": 9.942182402631426e-06, "loss": 21.419, "step": 19940 }, { "epoch": 0.37021830353725493, "grad_norm": 35.59375, "learning_rate": 9.942153406845384e-06, "loss": 21.6967, "step": 19950 }, { "epoch": 0.3704038766217348, "grad_norm": 33.3125, "learning_rate": 9.942124411059341e-06, "loss": 21.5348, "step": 19960 }, { "epoch": 0.3705894497062146, "grad_norm": 35.34375, "learning_rate": 9.9420954152733e-06, "loss": 21.3894, "step": 19970 }, { "epoch": 0.37077502279069446, "grad_norm": 33.28125, "learning_rate": 9.942066419487256e-06, "loss": 21.2541, "step": 19980 }, { "epoch": 0.37096059587517427, "grad_norm": 36.15625, "learning_rate": 9.942037423701213e-06, "loss": 21.0304, "step": 19990 }, { "epoch": 0.3711461689596541, "grad_norm": 35.65625, "learning_rate": 9.942008427915173e-06, "loss": 21.6275, "step": 20000 }, { "epoch": 0.3711461689596541, "eval_loss": 2.6757473945617676, "eval_runtime": 453.3076, "eval_samples_per_second": 3203.381, "eval_steps_per_second": 50.054, "step": 20000 }, { "epoch": 0.37133174204413394, "grad_norm": 37.40625, "learning_rate": 9.94197943212913e-06, "loss": 21.1517, "step": 20010 }, { "epoch": 0.37151731512861375, "grad_norm": 35.5, "learning_rate": 9.941950436343087e-06, "loss": 21.8961, "step": 20020 }, { "epoch": 0.37170288821309355, "grad_norm": 33.875, "learning_rate": 9.941921440557045e-06, "loss": 21.297, "step": 20030 }, { "epoch": 0.3718884612975734, "grad_norm": 34.4375, "learning_rate": 9.941892444771002e-06, "loss": 21.2152, "step": 20040 }, { "epoch": 0.3720740343820532, "grad_norm": 33.75, "learning_rate": 9.94186344898496e-06, "loss": 21.5122, "step": 20050 }, { "epoch": 0.372259607466533, "grad_norm": 33.375, "learning_rate": 9.941834453198917e-06, "loss": 21.7115, "step": 20060 }, { "epoch": 0.3724451805510129, "grad_norm": 34.90625, "learning_rate": 9.941805457412876e-06, "loss": 21.4614, "step": 20070 }, { "epoch": 0.3726307536354927, "grad_norm": 34.28125, "learning_rate": 9.941776461626833e-06, "loss": 21.2661, "step": 20080 }, { "epoch": 0.37281632671997256, "grad_norm": 32.09375, "learning_rate": 9.94174746584079e-06, "loss": 21.056, "step": 20090 }, { "epoch": 0.37300189980445236, "grad_norm": 34.96875, "learning_rate": 9.941718470054748e-06, "loss": 21.249, "step": 20100 }, { "epoch": 0.37318747288893217, "grad_norm": 34.5, "learning_rate": 9.941689474268706e-06, "loss": 20.9543, "step": 20110 }, { "epoch": 0.37337304597341203, "grad_norm": 35.21875, "learning_rate": 9.941660478482663e-06, "loss": 21.4361, "step": 20120 }, { "epoch": 0.37355861905789184, "grad_norm": 34.875, "learning_rate": 9.94163148269662e-06, "loss": 21.5762, "step": 20130 }, { "epoch": 0.37374419214237165, "grad_norm": 37.21875, "learning_rate": 9.941602486910578e-06, "loss": 21.4474, "step": 20140 }, { "epoch": 0.3739297652268515, "grad_norm": 36.28125, "learning_rate": 9.941573491124535e-06, "loss": 21.4356, "step": 20150 }, { "epoch": 0.3741153383113313, "grad_norm": 33.8125, "learning_rate": 9.941544495338493e-06, "loss": 21.2281, "step": 20160 }, { "epoch": 0.3743009113958112, "grad_norm": 32.8125, "learning_rate": 9.94151549955245e-06, "loss": 21.3117, "step": 20170 }, { "epoch": 0.374486484480291, "grad_norm": 35.0, "learning_rate": 9.94148650376641e-06, "loss": 21.3198, "step": 20180 }, { "epoch": 0.3746720575647708, "grad_norm": 37.8125, "learning_rate": 9.941457507980367e-06, "loss": 21.4875, "step": 20190 }, { "epoch": 0.37485763064925065, "grad_norm": 35.03125, "learning_rate": 9.941428512194324e-06, "loss": 21.4072, "step": 20200 }, { "epoch": 0.37504320373373046, "grad_norm": 36.40625, "learning_rate": 9.941399516408281e-06, "loss": 21.2224, "step": 20210 }, { "epoch": 0.37522877681821026, "grad_norm": 36.03125, "learning_rate": 9.941370520622239e-06, "loss": 21.606, "step": 20220 }, { "epoch": 0.3754143499026901, "grad_norm": 35.28125, "learning_rate": 9.941341524836196e-06, "loss": 21.2329, "step": 20230 }, { "epoch": 0.37559992298716993, "grad_norm": 36.28125, "learning_rate": 9.941312529050154e-06, "loss": 20.9903, "step": 20240 }, { "epoch": 0.3757854960716498, "grad_norm": 34.59375, "learning_rate": 9.941283533264111e-06, "loss": 21.4073, "step": 20250 }, { "epoch": 0.3759710691561296, "grad_norm": 34.9375, "learning_rate": 9.941254537478069e-06, "loss": 21.637, "step": 20260 }, { "epoch": 0.3761566422406094, "grad_norm": 34.15625, "learning_rate": 9.941225541692026e-06, "loss": 21.1687, "step": 20270 }, { "epoch": 0.37634221532508927, "grad_norm": 36.84375, "learning_rate": 9.941196545905985e-06, "loss": 21.4842, "step": 20280 }, { "epoch": 0.3765277884095691, "grad_norm": 36.28125, "learning_rate": 9.941167550119942e-06, "loss": 21.6633, "step": 20290 }, { "epoch": 0.3767133614940489, "grad_norm": 37.9375, "learning_rate": 9.941138554333898e-06, "loss": 21.217, "step": 20300 }, { "epoch": 0.37689893457852874, "grad_norm": 34.75, "learning_rate": 9.941109558547857e-06, "loss": 21.5193, "step": 20310 }, { "epoch": 0.37708450766300855, "grad_norm": 33.09375, "learning_rate": 9.941080562761815e-06, "loss": 21.1617, "step": 20320 }, { "epoch": 0.37727008074748836, "grad_norm": 34.53125, "learning_rate": 9.941051566975772e-06, "loss": 21.3097, "step": 20330 }, { "epoch": 0.3774556538319682, "grad_norm": 33.90625, "learning_rate": 9.94102257118973e-06, "loss": 21.4043, "step": 20340 }, { "epoch": 0.377641226916448, "grad_norm": 35.625, "learning_rate": 9.940993575403689e-06, "loss": 21.6863, "step": 20350 }, { "epoch": 0.3778268000009279, "grad_norm": 33.78125, "learning_rate": 9.940964579617644e-06, "loss": 21.3448, "step": 20360 }, { "epoch": 0.3780123730854077, "grad_norm": 36.78125, "learning_rate": 9.940935583831602e-06, "loss": 21.0266, "step": 20370 }, { "epoch": 0.3781979461698875, "grad_norm": 34.84375, "learning_rate": 9.94090658804556e-06, "loss": 21.4672, "step": 20380 }, { "epoch": 0.37838351925436736, "grad_norm": 33.78125, "learning_rate": 9.940877592259518e-06, "loss": 21.7418, "step": 20390 }, { "epoch": 0.37856909233884717, "grad_norm": 35.65625, "learning_rate": 9.940848596473476e-06, "loss": 21.626, "step": 20400 }, { "epoch": 0.378754665423327, "grad_norm": 34.5, "learning_rate": 9.940819600687433e-06, "loss": 21.6884, "step": 20410 }, { "epoch": 0.37894023850780684, "grad_norm": 35.6875, "learning_rate": 9.94079060490139e-06, "loss": 21.1694, "step": 20420 }, { "epoch": 0.37912581159228664, "grad_norm": 35.28125, "learning_rate": 9.940761609115348e-06, "loss": 21.7092, "step": 20430 }, { "epoch": 0.3793113846767665, "grad_norm": 36.65625, "learning_rate": 9.940732613329305e-06, "loss": 21.0703, "step": 20440 }, { "epoch": 0.3794969577612463, "grad_norm": 36.6875, "learning_rate": 9.940703617543264e-06, "loss": 21.3644, "step": 20450 }, { "epoch": 0.3796825308457261, "grad_norm": 35.34375, "learning_rate": 9.94067462175722e-06, "loss": 21.7173, "step": 20460 }, { "epoch": 0.379868103930206, "grad_norm": 35.03125, "learning_rate": 9.940645625971177e-06, "loss": 21.8825, "step": 20470 }, { "epoch": 0.3800536770146858, "grad_norm": 35.3125, "learning_rate": 9.940616630185137e-06, "loss": 21.3377, "step": 20480 }, { "epoch": 0.3802392500991656, "grad_norm": 33.875, "learning_rate": 9.940587634399094e-06, "loss": 21.2556, "step": 20490 }, { "epoch": 0.38042482318364546, "grad_norm": 35.15625, "learning_rate": 9.940558638613051e-06, "loss": 21.1311, "step": 20500 }, { "epoch": 0.38061039626812526, "grad_norm": 34.78125, "learning_rate": 9.940529642827009e-06, "loss": 21.4698, "step": 20510 }, { "epoch": 0.3807959693526051, "grad_norm": 35.8125, "learning_rate": 9.940500647040966e-06, "loss": 21.5138, "step": 20520 }, { "epoch": 0.38098154243708493, "grad_norm": 36.46875, "learning_rate": 9.940471651254924e-06, "loss": 21.1583, "step": 20530 }, { "epoch": 0.38116711552156474, "grad_norm": 33.375, "learning_rate": 9.940442655468881e-06, "loss": 21.1859, "step": 20540 }, { "epoch": 0.3813526886060446, "grad_norm": 35.03125, "learning_rate": 9.94041365968284e-06, "loss": 20.9751, "step": 20550 }, { "epoch": 0.3815382616905244, "grad_norm": 34.96875, "learning_rate": 9.940384663896798e-06, "loss": 21.1197, "step": 20560 }, { "epoch": 0.3817238347750042, "grad_norm": 35.9375, "learning_rate": 9.940355668110753e-06, "loss": 21.0839, "step": 20570 }, { "epoch": 0.3819094078594841, "grad_norm": 36.96875, "learning_rate": 9.940326672324712e-06, "loss": 21.6738, "step": 20580 }, { "epoch": 0.3820949809439639, "grad_norm": 35.71875, "learning_rate": 9.94029767653867e-06, "loss": 21.366, "step": 20590 }, { "epoch": 0.38228055402844374, "grad_norm": 36.84375, "learning_rate": 9.940268680752627e-06, "loss": 21.195, "step": 20600 }, { "epoch": 0.38246612711292355, "grad_norm": 34.84375, "learning_rate": 9.940239684966585e-06, "loss": 21.5918, "step": 20610 }, { "epoch": 0.38265170019740335, "grad_norm": 36.1875, "learning_rate": 9.940210689180542e-06, "loss": 20.7986, "step": 20620 }, { "epoch": 0.3828372732818832, "grad_norm": 34.09375, "learning_rate": 9.9401816933945e-06, "loss": 21.4164, "step": 20630 }, { "epoch": 0.383022846366363, "grad_norm": 34.96875, "learning_rate": 9.940152697608457e-06, "loss": 21.2902, "step": 20640 }, { "epoch": 0.38320841945084283, "grad_norm": 32.65625, "learning_rate": 9.940123701822416e-06, "loss": 21.2691, "step": 20650 }, { "epoch": 0.3833939925353227, "grad_norm": 36.75, "learning_rate": 9.940094706036373e-06, "loss": 20.983, "step": 20660 }, { "epoch": 0.3835795656198025, "grad_norm": 35.15625, "learning_rate": 9.94006571025033e-06, "loss": 20.9418, "step": 20670 }, { "epoch": 0.3837651387042823, "grad_norm": 37.3125, "learning_rate": 9.940036714464288e-06, "loss": 21.948, "step": 20680 }, { "epoch": 0.38395071178876217, "grad_norm": 34.78125, "learning_rate": 9.940007718678245e-06, "loss": 22.0029, "step": 20690 }, { "epoch": 0.384136284873242, "grad_norm": 34.90625, "learning_rate": 9.939978722892203e-06, "loss": 21.1508, "step": 20700 }, { "epoch": 0.38432185795772184, "grad_norm": 33.90625, "learning_rate": 9.93994972710616e-06, "loss": 21.2856, "step": 20710 }, { "epoch": 0.38450743104220164, "grad_norm": 35.0, "learning_rate": 9.939920731320118e-06, "loss": 21.1426, "step": 20720 }, { "epoch": 0.38469300412668145, "grad_norm": 35.28125, "learning_rate": 9.939891735534075e-06, "loss": 21.1099, "step": 20730 }, { "epoch": 0.3848785772111613, "grad_norm": 35.375, "learning_rate": 9.939862739748033e-06, "loss": 21.4204, "step": 20740 }, { "epoch": 0.3850641502956411, "grad_norm": 34.28125, "learning_rate": 9.93983374396199e-06, "loss": 21.0373, "step": 20750 }, { "epoch": 0.3852497233801209, "grad_norm": 34.90625, "learning_rate": 9.939804748175949e-06, "loss": 22.1613, "step": 20760 }, { "epoch": 0.3854352964646008, "grad_norm": 35.53125, "learning_rate": 9.939775752389906e-06, "loss": 21.1425, "step": 20770 }, { "epoch": 0.3856208695490806, "grad_norm": 35.3125, "learning_rate": 9.939746756603864e-06, "loss": 21.195, "step": 20780 }, { "epoch": 0.38580644263356045, "grad_norm": 35.8125, "learning_rate": 9.939717760817821e-06, "loss": 21.2977, "step": 20790 }, { "epoch": 0.38599201571804026, "grad_norm": 36.46875, "learning_rate": 9.939688765031779e-06, "loss": 21.1699, "step": 20800 }, { "epoch": 0.38617758880252007, "grad_norm": 35.3125, "learning_rate": 9.939659769245736e-06, "loss": 20.9866, "step": 20810 }, { "epoch": 0.38636316188699993, "grad_norm": 36.65625, "learning_rate": 9.939630773459693e-06, "loss": 21.6496, "step": 20820 }, { "epoch": 0.38654873497147973, "grad_norm": 34.71875, "learning_rate": 9.939601777673653e-06, "loss": 21.3789, "step": 20830 }, { "epoch": 0.38673430805595954, "grad_norm": 33.28125, "learning_rate": 9.939572781887608e-06, "loss": 21.2779, "step": 20840 }, { "epoch": 0.3869198811404394, "grad_norm": 34.15625, "learning_rate": 9.939543786101566e-06, "loss": 21.5408, "step": 20850 }, { "epoch": 0.3871054542249192, "grad_norm": 35.0, "learning_rate": 9.939514790315525e-06, "loss": 21.4127, "step": 20860 }, { "epoch": 0.38729102730939907, "grad_norm": 34.46875, "learning_rate": 9.939485794529482e-06, "loss": 21.4833, "step": 20870 }, { "epoch": 0.3874766003938789, "grad_norm": 35.625, "learning_rate": 9.93945679874344e-06, "loss": 21.41, "step": 20880 }, { "epoch": 0.3876621734783587, "grad_norm": 33.6875, "learning_rate": 9.939427802957397e-06, "loss": 21.1335, "step": 20890 }, { "epoch": 0.38784774656283855, "grad_norm": 34.8125, "learning_rate": 9.939398807171354e-06, "loss": 21.3565, "step": 20900 }, { "epoch": 0.38803331964731835, "grad_norm": 35.40625, "learning_rate": 9.939369811385312e-06, "loss": 20.9849, "step": 20910 }, { "epoch": 0.38821889273179816, "grad_norm": 35.0, "learning_rate": 9.93934081559927e-06, "loss": 20.8741, "step": 20920 }, { "epoch": 0.388404465816278, "grad_norm": 33.625, "learning_rate": 9.939311819813228e-06, "loss": 20.8931, "step": 20930 }, { "epoch": 0.38859003890075783, "grad_norm": 36.5, "learning_rate": 9.939282824027186e-06, "loss": 21.2333, "step": 20940 }, { "epoch": 0.38877561198523763, "grad_norm": 35.96875, "learning_rate": 9.939253828241141e-06, "loss": 21.4989, "step": 20950 }, { "epoch": 0.3889611850697175, "grad_norm": 34.75, "learning_rate": 9.9392248324551e-06, "loss": 20.8834, "step": 20960 }, { "epoch": 0.3891467581541973, "grad_norm": 33.9375, "learning_rate": 9.939195836669058e-06, "loss": 21.2821, "step": 20970 }, { "epoch": 0.38933233123867717, "grad_norm": 35.03125, "learning_rate": 9.939166840883015e-06, "loss": 21.2434, "step": 20980 }, { "epoch": 0.38951790432315697, "grad_norm": 35.71875, "learning_rate": 9.939137845096973e-06, "loss": 21.0523, "step": 20990 }, { "epoch": 0.3897034774076368, "grad_norm": 35.25, "learning_rate": 9.93910884931093e-06, "loss": 21.281, "step": 21000 }, { "epoch": 0.38988905049211664, "grad_norm": 35.46875, "learning_rate": 9.939079853524888e-06, "loss": 21.435, "step": 21010 }, { "epoch": 0.39007462357659645, "grad_norm": 33.46875, "learning_rate": 9.939050857738845e-06, "loss": 21.3981, "step": 21020 }, { "epoch": 0.39026019666107625, "grad_norm": 34.6875, "learning_rate": 9.939021861952804e-06, "loss": 21.0117, "step": 21030 }, { "epoch": 0.3904457697455561, "grad_norm": 35.40625, "learning_rate": 9.938992866166762e-06, "loss": 21.1832, "step": 21040 }, { "epoch": 0.3906313428300359, "grad_norm": 34.8125, "learning_rate": 9.938963870380717e-06, "loss": 21.2241, "step": 21050 }, { "epoch": 0.3908169159145158, "grad_norm": 33.0, "learning_rate": 9.938934874594676e-06, "loss": 21.5346, "step": 21060 }, { "epoch": 0.3910024889989956, "grad_norm": 34.6875, "learning_rate": 9.938905878808634e-06, "loss": 21.1235, "step": 21070 }, { "epoch": 0.3911880620834754, "grad_norm": 34.0625, "learning_rate": 9.938876883022591e-06, "loss": 21.1906, "step": 21080 }, { "epoch": 0.39137363516795526, "grad_norm": 33.40625, "learning_rate": 9.938847887236549e-06, "loss": 21.4451, "step": 21090 }, { "epoch": 0.39155920825243506, "grad_norm": 34.875, "learning_rate": 9.938818891450508e-06, "loss": 20.9447, "step": 21100 }, { "epoch": 0.39174478133691487, "grad_norm": 35.71875, "learning_rate": 9.938789895664463e-06, "loss": 21.0956, "step": 21110 }, { "epoch": 0.39193035442139473, "grad_norm": 33.125, "learning_rate": 9.93876089987842e-06, "loss": 20.8059, "step": 21120 }, { "epoch": 0.39211592750587454, "grad_norm": 32.8125, "learning_rate": 9.93873190409238e-06, "loss": 21.3003, "step": 21130 }, { "epoch": 0.3923015005903544, "grad_norm": 34.53125, "learning_rate": 9.938702908306337e-06, "loss": 21.3167, "step": 21140 }, { "epoch": 0.3924870736748342, "grad_norm": 35.03125, "learning_rate": 9.938673912520295e-06, "loss": 21.307, "step": 21150 }, { "epoch": 0.392672646759314, "grad_norm": 35.71875, "learning_rate": 9.938644916734252e-06, "loss": 21.1579, "step": 21160 }, { "epoch": 0.3928582198437939, "grad_norm": 34.59375, "learning_rate": 9.93861592094821e-06, "loss": 21.4428, "step": 21170 }, { "epoch": 0.3930437929282737, "grad_norm": 33.59375, "learning_rate": 9.938586925162167e-06, "loss": 21.4122, "step": 21180 }, { "epoch": 0.3932293660127535, "grad_norm": 36.0625, "learning_rate": 9.938557929376124e-06, "loss": 21.1277, "step": 21190 }, { "epoch": 0.39341493909723335, "grad_norm": 36.03125, "learning_rate": 9.938528933590082e-06, "loss": 21.1074, "step": 21200 }, { "epoch": 0.39360051218171316, "grad_norm": 32.875, "learning_rate": 9.938499937804039e-06, "loss": 21.2574, "step": 21210 }, { "epoch": 0.39378608526619296, "grad_norm": 33.65625, "learning_rate": 9.938470942017997e-06, "loss": 21.3344, "step": 21220 }, { "epoch": 0.3939716583506728, "grad_norm": 33.46875, "learning_rate": 9.938441946231954e-06, "loss": 21.6166, "step": 21230 }, { "epoch": 0.39415723143515263, "grad_norm": 32.65625, "learning_rate": 9.938412950445913e-06, "loss": 21.6185, "step": 21240 }, { "epoch": 0.3943428045196325, "grad_norm": 36.875, "learning_rate": 9.93838395465987e-06, "loss": 21.2259, "step": 21250 }, { "epoch": 0.3945283776041123, "grad_norm": 34.5, "learning_rate": 9.938354958873828e-06, "loss": 21.6313, "step": 21260 }, { "epoch": 0.3947139506885921, "grad_norm": 32.90625, "learning_rate": 9.938325963087785e-06, "loss": 21.2726, "step": 21270 }, { "epoch": 0.39489952377307197, "grad_norm": 35.03125, "learning_rate": 9.938296967301743e-06, "loss": 21.2475, "step": 21280 }, { "epoch": 0.3950850968575518, "grad_norm": 37.65625, "learning_rate": 9.9382679715157e-06, "loss": 21.1077, "step": 21290 }, { "epoch": 0.3952706699420316, "grad_norm": 35.0, "learning_rate": 9.938238975729657e-06, "loss": 21.5344, "step": 21300 }, { "epoch": 0.39545624302651144, "grad_norm": 35.78125, "learning_rate": 9.938209979943617e-06, "loss": 21.3853, "step": 21310 }, { "epoch": 0.39564181611099125, "grad_norm": 34.96875, "learning_rate": 9.938180984157572e-06, "loss": 21.4214, "step": 21320 }, { "epoch": 0.3958273891954711, "grad_norm": 34.8125, "learning_rate": 9.93815198837153e-06, "loss": 21.111, "step": 21330 }, { "epoch": 0.3960129622799509, "grad_norm": 34.8125, "learning_rate": 9.938122992585489e-06, "loss": 21.2438, "step": 21340 }, { "epoch": 0.3961985353644307, "grad_norm": 34.84375, "learning_rate": 9.938093996799446e-06, "loss": 20.9801, "step": 21350 }, { "epoch": 0.3963841084489106, "grad_norm": 36.125, "learning_rate": 9.938065001013404e-06, "loss": 20.6129, "step": 21360 }, { "epoch": 0.3965696815333904, "grad_norm": 32.96875, "learning_rate": 9.938036005227361e-06, "loss": 21.0012, "step": 21370 }, { "epoch": 0.3967552546178702, "grad_norm": 33.9375, "learning_rate": 9.938007009441318e-06, "loss": 21.3073, "step": 21380 }, { "epoch": 0.39694082770235006, "grad_norm": 37.4375, "learning_rate": 9.937978013655276e-06, "loss": 21.4929, "step": 21390 }, { "epoch": 0.39712640078682987, "grad_norm": 33.4375, "learning_rate": 9.937949017869233e-06, "loss": 21.2244, "step": 21400 }, { "epoch": 0.39731197387130973, "grad_norm": 35.5625, "learning_rate": 9.937920022083192e-06, "loss": 21.2312, "step": 21410 }, { "epoch": 0.39749754695578954, "grad_norm": 35.375, "learning_rate": 9.93789102629715e-06, "loss": 21.3317, "step": 21420 }, { "epoch": 0.39768312004026934, "grad_norm": 35.375, "learning_rate": 9.937862030511105e-06, "loss": 21.4387, "step": 21430 }, { "epoch": 0.3978686931247492, "grad_norm": 35.59375, "learning_rate": 9.937833034725065e-06, "loss": 21.0185, "step": 21440 }, { "epoch": 0.398054266209229, "grad_norm": 34.5, "learning_rate": 9.937804038939022e-06, "loss": 21.4504, "step": 21450 }, { "epoch": 0.3982398392937088, "grad_norm": 34.53125, "learning_rate": 9.93777504315298e-06, "loss": 21.5877, "step": 21460 }, { "epoch": 0.3984254123781887, "grad_norm": 34.75, "learning_rate": 9.937746047366937e-06, "loss": 21.2731, "step": 21470 }, { "epoch": 0.3986109854626685, "grad_norm": 34.59375, "learning_rate": 9.937717051580894e-06, "loss": 21.3098, "step": 21480 }, { "epoch": 0.3987965585471483, "grad_norm": 36.78125, "learning_rate": 9.937688055794852e-06, "loss": 21.2444, "step": 21490 }, { "epoch": 0.39898213163162816, "grad_norm": 35.53125, "learning_rate": 9.937659060008809e-06, "loss": 21.0413, "step": 21500 }, { "epoch": 0.39916770471610796, "grad_norm": 35.75, "learning_rate": 9.937630064222768e-06, "loss": 21.0791, "step": 21510 }, { "epoch": 0.3993532778005878, "grad_norm": 34.1875, "learning_rate": 9.937601068436726e-06, "loss": 21.4088, "step": 21520 }, { "epoch": 0.39953885088506763, "grad_norm": 35.78125, "learning_rate": 9.937572072650683e-06, "loss": 21.4432, "step": 21530 }, { "epoch": 0.39972442396954744, "grad_norm": 34.53125, "learning_rate": 9.93754307686464e-06, "loss": 20.6209, "step": 21540 }, { "epoch": 0.3999099970540273, "grad_norm": 35.5625, "learning_rate": 9.937514081078598e-06, "loss": 20.9569, "step": 21550 }, { "epoch": 0.4000955701385071, "grad_norm": 35.59375, "learning_rate": 9.937485085292555e-06, "loss": 21.1618, "step": 21560 }, { "epoch": 0.4002811432229869, "grad_norm": 34.65625, "learning_rate": 9.937456089506513e-06, "loss": 21.1128, "step": 21570 }, { "epoch": 0.4004667163074668, "grad_norm": 34.28125, "learning_rate": 9.937427093720472e-06, "loss": 21.432, "step": 21580 }, { "epoch": 0.4006522893919466, "grad_norm": 33.9375, "learning_rate": 9.937398097934427e-06, "loss": 20.9126, "step": 21590 }, { "epoch": 0.40083786247642644, "grad_norm": 36.15625, "learning_rate": 9.937369102148385e-06, "loss": 21.2893, "step": 21600 }, { "epoch": 0.40102343556090625, "grad_norm": 36.09375, "learning_rate": 9.937340106362344e-06, "loss": 20.9199, "step": 21610 }, { "epoch": 0.40120900864538606, "grad_norm": 36.375, "learning_rate": 9.937311110576301e-06, "loss": 21.4188, "step": 21620 }, { "epoch": 0.4013945817298659, "grad_norm": 35.90625, "learning_rate": 9.937282114790259e-06, "loss": 21.0579, "step": 21630 }, { "epoch": 0.4015801548143457, "grad_norm": 37.46875, "learning_rate": 9.937253119004216e-06, "loss": 21.2522, "step": 21640 }, { "epoch": 0.40176572789882553, "grad_norm": 35.0625, "learning_rate": 9.937224123218174e-06, "loss": 21.4908, "step": 21650 }, { "epoch": 0.4019513009833054, "grad_norm": 33.1875, "learning_rate": 9.937195127432131e-06, "loss": 21.2079, "step": 21660 }, { "epoch": 0.4021368740677852, "grad_norm": 36.125, "learning_rate": 9.937166131646088e-06, "loss": 21.2321, "step": 21670 }, { "epoch": 0.40232244715226506, "grad_norm": 33.0, "learning_rate": 9.937137135860046e-06, "loss": 20.916, "step": 21680 }, { "epoch": 0.40250802023674487, "grad_norm": 34.125, "learning_rate": 9.937108140074005e-06, "loss": 21.2056, "step": 21690 }, { "epoch": 0.4026935933212247, "grad_norm": 34.78125, "learning_rate": 9.93707914428796e-06, "loss": 20.9596, "step": 21700 }, { "epoch": 0.40287916640570454, "grad_norm": 33.90625, "learning_rate": 9.93705014850192e-06, "loss": 20.9652, "step": 21710 }, { "epoch": 0.40306473949018434, "grad_norm": 35.9375, "learning_rate": 9.937021152715877e-06, "loss": 21.0897, "step": 21720 }, { "epoch": 0.40325031257466415, "grad_norm": 34.875, "learning_rate": 9.936992156929834e-06, "loss": 20.8345, "step": 21730 }, { "epoch": 0.403435885659144, "grad_norm": 35.9375, "learning_rate": 9.936963161143792e-06, "loss": 21.1519, "step": 21740 }, { "epoch": 0.4036214587436238, "grad_norm": 33.15625, "learning_rate": 9.93693416535775e-06, "loss": 20.8396, "step": 21750 }, { "epoch": 0.4038070318281036, "grad_norm": 34.3125, "learning_rate": 9.936905169571707e-06, "loss": 21.4036, "step": 21760 }, { "epoch": 0.4039926049125835, "grad_norm": 35.21875, "learning_rate": 9.936876173785664e-06, "loss": 21.1442, "step": 21770 }, { "epoch": 0.4041781779970633, "grad_norm": 35.40625, "learning_rate": 9.936847177999622e-06, "loss": 20.9068, "step": 21780 }, { "epoch": 0.40436375108154315, "grad_norm": 36.1875, "learning_rate": 9.93681818221358e-06, "loss": 21.2187, "step": 21790 }, { "epoch": 0.40454932416602296, "grad_norm": 35.9375, "learning_rate": 9.936789186427536e-06, "loss": 21.6709, "step": 21800 }, { "epoch": 0.40473489725050277, "grad_norm": 35.65625, "learning_rate": 9.936760190641494e-06, "loss": 20.8711, "step": 21810 }, { "epoch": 0.40492047033498263, "grad_norm": 33.875, "learning_rate": 9.936731194855453e-06, "loss": 21.3662, "step": 21820 }, { "epoch": 0.40510604341946244, "grad_norm": 36.71875, "learning_rate": 9.93670219906941e-06, "loss": 21.4065, "step": 21830 }, { "epoch": 0.40529161650394224, "grad_norm": 33.0, "learning_rate": 9.936673203283368e-06, "loss": 21.2187, "step": 21840 }, { "epoch": 0.4054771895884221, "grad_norm": 34.78125, "learning_rate": 9.936644207497325e-06, "loss": 21.1473, "step": 21850 }, { "epoch": 0.4056627626729019, "grad_norm": 34.78125, "learning_rate": 9.936615211711282e-06, "loss": 20.9902, "step": 21860 }, { "epoch": 0.4058483357573818, "grad_norm": 35.34375, "learning_rate": 9.93658621592524e-06, "loss": 21.4328, "step": 21870 }, { "epoch": 0.4060339088418616, "grad_norm": 35.25, "learning_rate": 9.936557220139197e-06, "loss": 21.0445, "step": 21880 }, { "epoch": 0.4062194819263414, "grad_norm": 35.375, "learning_rate": 9.936528224353156e-06, "loss": 21.3687, "step": 21890 }, { "epoch": 0.40640505501082125, "grad_norm": 35.8125, "learning_rate": 9.936499228567114e-06, "loss": 20.9341, "step": 21900 }, { "epoch": 0.40659062809530105, "grad_norm": 33.6875, "learning_rate": 9.93647023278107e-06, "loss": 21.1997, "step": 21910 }, { "epoch": 0.40677620117978086, "grad_norm": 33.6875, "learning_rate": 9.936441236995029e-06, "loss": 21.0207, "step": 21920 }, { "epoch": 0.4069617742642607, "grad_norm": 34.15625, "learning_rate": 9.936412241208986e-06, "loss": 21.2807, "step": 21930 }, { "epoch": 0.40714734734874053, "grad_norm": 36.78125, "learning_rate": 9.936383245422943e-06, "loss": 21.2499, "step": 21940 }, { "epoch": 0.4073329204332204, "grad_norm": 34.3125, "learning_rate": 9.9363542496369e-06, "loss": 21.1466, "step": 21950 }, { "epoch": 0.4075184935177002, "grad_norm": 35.125, "learning_rate": 9.93632525385086e-06, "loss": 20.8732, "step": 21960 }, { "epoch": 0.40770406660218, "grad_norm": 34.5, "learning_rate": 9.936296258064816e-06, "loss": 21.2597, "step": 21970 }, { "epoch": 0.40788963968665987, "grad_norm": 33.40625, "learning_rate": 9.936267262278773e-06, "loss": 21.0341, "step": 21980 }, { "epoch": 0.4080752127711397, "grad_norm": 34.90625, "learning_rate": 9.936238266492732e-06, "loss": 20.9766, "step": 21990 }, { "epoch": 0.4082607858556195, "grad_norm": 33.9375, "learning_rate": 9.93620927070669e-06, "loss": 21.0238, "step": 22000 }, { "epoch": 0.40844635894009934, "grad_norm": 36.1875, "learning_rate": 9.936180274920647e-06, "loss": 21.1887, "step": 22010 }, { "epoch": 0.40863193202457915, "grad_norm": 35.78125, "learning_rate": 9.936151279134604e-06, "loss": 21.0594, "step": 22020 }, { "epoch": 0.408817505109059, "grad_norm": 36.59375, "learning_rate": 9.936122283348562e-06, "loss": 21.4135, "step": 22030 }, { "epoch": 0.4090030781935388, "grad_norm": 35.03125, "learning_rate": 9.93609328756252e-06, "loss": 21.2114, "step": 22040 }, { "epoch": 0.4091886512780186, "grad_norm": 38.0, "learning_rate": 9.936064291776477e-06, "loss": 21.0613, "step": 22050 }, { "epoch": 0.4093742243624985, "grad_norm": 38.5625, "learning_rate": 9.936035295990436e-06, "loss": 21.36, "step": 22060 }, { "epoch": 0.4095597974469783, "grad_norm": 35.90625, "learning_rate": 9.936006300204391e-06, "loss": 21.3268, "step": 22070 }, { "epoch": 0.4097453705314581, "grad_norm": 37.0625, "learning_rate": 9.935977304418349e-06, "loss": 21.247, "step": 22080 }, { "epoch": 0.40993094361593796, "grad_norm": 35.8125, "learning_rate": 9.935948308632308e-06, "loss": 21.2327, "step": 22090 }, { "epoch": 0.41011651670041777, "grad_norm": 34.53125, "learning_rate": 9.935919312846265e-06, "loss": 21.3096, "step": 22100 }, { "epoch": 0.4103020897848976, "grad_norm": 35.0, "learning_rate": 9.935890317060223e-06, "loss": 21.2457, "step": 22110 }, { "epoch": 0.41048766286937743, "grad_norm": 34.65625, "learning_rate": 9.93586132127418e-06, "loss": 21.1049, "step": 22120 }, { "epoch": 0.41067323595385724, "grad_norm": 37.65625, "learning_rate": 9.935832325488138e-06, "loss": 21.487, "step": 22130 }, { "epoch": 0.4108588090383371, "grad_norm": 35.125, "learning_rate": 9.935803329702095e-06, "loss": 21.1487, "step": 22140 }, { "epoch": 0.4110443821228169, "grad_norm": 34.90625, "learning_rate": 9.935774333916052e-06, "loss": 20.6907, "step": 22150 }, { "epoch": 0.4112299552072967, "grad_norm": 34.1875, "learning_rate": 9.935745338130011e-06, "loss": 21.3479, "step": 22160 }, { "epoch": 0.4114155282917766, "grad_norm": 36.125, "learning_rate": 9.935716342343969e-06, "loss": 20.9197, "step": 22170 }, { "epoch": 0.4116011013762564, "grad_norm": 33.6875, "learning_rate": 9.935687346557925e-06, "loss": 21.1271, "step": 22180 }, { "epoch": 0.4117866744607362, "grad_norm": 34.4375, "learning_rate": 9.935658350771884e-06, "loss": 21.4446, "step": 22190 }, { "epoch": 0.41197224754521605, "grad_norm": 33.125, "learning_rate": 9.935629354985841e-06, "loss": 20.7713, "step": 22200 }, { "epoch": 0.41215782062969586, "grad_norm": 34.78125, "learning_rate": 9.935600359199798e-06, "loss": 21.1482, "step": 22210 }, { "epoch": 0.4123433937141757, "grad_norm": 35.09375, "learning_rate": 9.935571363413756e-06, "loss": 20.867, "step": 22220 }, { "epoch": 0.4125289667986555, "grad_norm": 34.1875, "learning_rate": 9.935542367627713e-06, "loss": 21.1484, "step": 22230 }, { "epoch": 0.41271453988313533, "grad_norm": 34.25, "learning_rate": 9.93551337184167e-06, "loss": 21.3381, "step": 22240 }, { "epoch": 0.4129001129676152, "grad_norm": 34.84375, "learning_rate": 9.935484376055628e-06, "loss": 21.6229, "step": 22250 }, { "epoch": 0.413085686052095, "grad_norm": 34.21875, "learning_rate": 9.935455380269586e-06, "loss": 20.8437, "step": 22260 }, { "epoch": 0.4132712591365748, "grad_norm": 32.3125, "learning_rate": 9.935426384483545e-06, "loss": 21.2779, "step": 22270 }, { "epoch": 0.41345683222105467, "grad_norm": 34.59375, "learning_rate": 9.935397388697502e-06, "loss": 21.1612, "step": 22280 }, { "epoch": 0.4136424053055345, "grad_norm": 33.78125, "learning_rate": 9.93536839291146e-06, "loss": 20.7929, "step": 22290 }, { "epoch": 0.41382797839001434, "grad_norm": 33.5625, "learning_rate": 9.935339397125417e-06, "loss": 21.1799, "step": 22300 }, { "epoch": 0.41401355147449415, "grad_norm": 34.75, "learning_rate": 9.935310401339374e-06, "loss": 20.9251, "step": 22310 }, { "epoch": 0.41419912455897395, "grad_norm": 36.5, "learning_rate": 9.935281405553332e-06, "loss": 20.9843, "step": 22320 }, { "epoch": 0.4143846976434538, "grad_norm": 33.1875, "learning_rate": 9.935252409767289e-06, "loss": 20.8824, "step": 22330 }, { "epoch": 0.4145702707279336, "grad_norm": 35.90625, "learning_rate": 9.935223413981246e-06, "loss": 21.3414, "step": 22340 }, { "epoch": 0.4147558438124134, "grad_norm": 36.375, "learning_rate": 9.935194418195204e-06, "loss": 20.9215, "step": 22350 }, { "epoch": 0.4149414168968933, "grad_norm": 35.6875, "learning_rate": 9.935165422409161e-06, "loss": 21.0936, "step": 22360 }, { "epoch": 0.4151269899813731, "grad_norm": 33.8125, "learning_rate": 9.93513642662312e-06, "loss": 20.9315, "step": 22370 }, { "epoch": 0.4153125630658529, "grad_norm": 34.4375, "learning_rate": 9.935107430837078e-06, "loss": 20.7623, "step": 22380 }, { "epoch": 0.41549813615033276, "grad_norm": 36.625, "learning_rate": 9.935078435051034e-06, "loss": 21.2462, "step": 22390 }, { "epoch": 0.41568370923481257, "grad_norm": 35.5, "learning_rate": 9.935049439264993e-06, "loss": 20.9974, "step": 22400 }, { "epoch": 0.41586928231929243, "grad_norm": 35.65625, "learning_rate": 9.93502044347895e-06, "loss": 21.6205, "step": 22410 }, { "epoch": 0.41605485540377224, "grad_norm": 33.625, "learning_rate": 9.934991447692907e-06, "loss": 20.8627, "step": 22420 }, { "epoch": 0.41624042848825205, "grad_norm": 35.125, "learning_rate": 9.934962451906865e-06, "loss": 21.0586, "step": 22430 }, { "epoch": 0.4164260015727319, "grad_norm": 36.3125, "learning_rate": 9.934933456120824e-06, "loss": 20.8756, "step": 22440 }, { "epoch": 0.4166115746572117, "grad_norm": 34.21875, "learning_rate": 9.93490446033478e-06, "loss": 21.1442, "step": 22450 }, { "epoch": 0.4167971477416915, "grad_norm": 34.8125, "learning_rate": 9.934875464548737e-06, "loss": 21.3179, "step": 22460 }, { "epoch": 0.4169827208261714, "grad_norm": 35.0625, "learning_rate": 9.934846468762696e-06, "loss": 21.1509, "step": 22470 }, { "epoch": 0.4171682939106512, "grad_norm": 35.09375, "learning_rate": 9.934817472976654e-06, "loss": 20.9087, "step": 22480 }, { "epoch": 0.41735386699513105, "grad_norm": 36.9375, "learning_rate": 9.934788477190611e-06, "loss": 21.0097, "step": 22490 }, { "epoch": 0.41753944007961086, "grad_norm": 37.125, "learning_rate": 9.934759481404568e-06, "loss": 21.1743, "step": 22500 }, { "epoch": 0.41772501316409066, "grad_norm": 34.4375, "learning_rate": 9.934730485618526e-06, "loss": 21.2852, "step": 22510 }, { "epoch": 0.4179105862485705, "grad_norm": 32.96875, "learning_rate": 9.934701489832483e-06, "loss": 20.5226, "step": 22520 }, { "epoch": 0.41809615933305033, "grad_norm": 33.6875, "learning_rate": 9.93467249404644e-06, "loss": 21.4159, "step": 22530 }, { "epoch": 0.41828173241753014, "grad_norm": 34.40625, "learning_rate": 9.9346434982604e-06, "loss": 20.9896, "step": 22540 }, { "epoch": 0.41846730550201, "grad_norm": 33.9375, "learning_rate": 9.934614502474355e-06, "loss": 21.1083, "step": 22550 }, { "epoch": 0.4186528785864898, "grad_norm": 34.34375, "learning_rate": 9.934585506688313e-06, "loss": 21.1427, "step": 22560 }, { "epoch": 0.41883845167096967, "grad_norm": 35.53125, "learning_rate": 9.934556510902272e-06, "loss": 21.1074, "step": 22570 }, { "epoch": 0.4190240247554495, "grad_norm": 36.4375, "learning_rate": 9.93452751511623e-06, "loss": 21.1957, "step": 22580 }, { "epoch": 0.4192095978399293, "grad_norm": 35.71875, "learning_rate": 9.934498519330187e-06, "loss": 21.3737, "step": 22590 }, { "epoch": 0.41939517092440914, "grad_norm": 35.28125, "learning_rate": 9.934469523544144e-06, "loss": 21.1098, "step": 22600 }, { "epoch": 0.41958074400888895, "grad_norm": 34.84375, "learning_rate": 9.934440527758102e-06, "loss": 21.206, "step": 22610 }, { "epoch": 0.41976631709336876, "grad_norm": 35.03125, "learning_rate": 9.934411531972059e-06, "loss": 21.6308, "step": 22620 }, { "epoch": 0.4199518901778486, "grad_norm": 34.8125, "learning_rate": 9.934382536186016e-06, "loss": 20.8924, "step": 22630 }, { "epoch": 0.4201374632623284, "grad_norm": 35.78125, "learning_rate": 9.934353540399975e-06, "loss": 20.9669, "step": 22640 }, { "epoch": 0.42032303634680823, "grad_norm": 35.46875, "learning_rate": 9.934324544613933e-06, "loss": 21.2526, "step": 22650 }, { "epoch": 0.4205086094312881, "grad_norm": 33.1875, "learning_rate": 9.934295548827889e-06, "loss": 20.8902, "step": 22660 }, { "epoch": 0.4206941825157679, "grad_norm": 35.8125, "learning_rate": 9.934266553041848e-06, "loss": 21.2633, "step": 22670 }, { "epoch": 0.42087975560024776, "grad_norm": 37.9375, "learning_rate": 9.934237557255805e-06, "loss": 21.1654, "step": 22680 }, { "epoch": 0.42106532868472757, "grad_norm": 34.78125, "learning_rate": 9.934208561469762e-06, "loss": 20.9407, "step": 22690 }, { "epoch": 0.4212509017692074, "grad_norm": 33.09375, "learning_rate": 9.93417956568372e-06, "loss": 21.1454, "step": 22700 }, { "epoch": 0.42143647485368724, "grad_norm": 34.03125, "learning_rate": 9.934150569897677e-06, "loss": 21.0258, "step": 22710 }, { "epoch": 0.42162204793816704, "grad_norm": 34.65625, "learning_rate": 9.934121574111635e-06, "loss": 21.0839, "step": 22720 }, { "epoch": 0.42180762102264685, "grad_norm": 34.09375, "learning_rate": 9.934092578325592e-06, "loss": 21.1634, "step": 22730 }, { "epoch": 0.4219931941071267, "grad_norm": 35.46875, "learning_rate": 9.934063582539551e-06, "loss": 21.1216, "step": 22740 }, { "epoch": 0.4221787671916065, "grad_norm": 34.40625, "learning_rate": 9.934034586753509e-06, "loss": 21.0727, "step": 22750 }, { "epoch": 0.4223643402760864, "grad_norm": 35.625, "learning_rate": 9.934005590967466e-06, "loss": 20.6274, "step": 22760 }, { "epoch": 0.4225499133605662, "grad_norm": 33.09375, "learning_rate": 9.933976595181423e-06, "loss": 21.0867, "step": 22770 }, { "epoch": 0.422735486445046, "grad_norm": 36.0625, "learning_rate": 9.933947599395381e-06, "loss": 21.0255, "step": 22780 }, { "epoch": 0.42292105952952586, "grad_norm": 34.84375, "learning_rate": 9.933918603609338e-06, "loss": 21.1949, "step": 22790 }, { "epoch": 0.42310663261400566, "grad_norm": 35.5625, "learning_rate": 9.933889607823296e-06, "loss": 20.9742, "step": 22800 }, { "epoch": 0.42329220569848547, "grad_norm": 34.5625, "learning_rate": 9.933860612037253e-06, "loss": 21.0635, "step": 22810 }, { "epoch": 0.42347777878296533, "grad_norm": 33.71875, "learning_rate": 9.93383161625121e-06, "loss": 20.6119, "step": 22820 }, { "epoch": 0.42366335186744514, "grad_norm": 36.5625, "learning_rate": 9.933802620465168e-06, "loss": 20.8544, "step": 22830 }, { "epoch": 0.423848924951925, "grad_norm": 35.28125, "learning_rate": 9.933773624679125e-06, "loss": 21.1555, "step": 22840 }, { "epoch": 0.4240344980364048, "grad_norm": 35.84375, "learning_rate": 9.933744628893084e-06, "loss": 20.7869, "step": 22850 }, { "epoch": 0.4242200711208846, "grad_norm": 33.5625, "learning_rate": 9.933715633107042e-06, "loss": 21.0017, "step": 22860 }, { "epoch": 0.4244056442053645, "grad_norm": 35.125, "learning_rate": 9.933686637321e-06, "loss": 21.0555, "step": 22870 }, { "epoch": 0.4245912172898443, "grad_norm": 33.90625, "learning_rate": 9.933657641534957e-06, "loss": 21.1488, "step": 22880 }, { "epoch": 0.4247767903743241, "grad_norm": 34.40625, "learning_rate": 9.933628645748914e-06, "loss": 20.8459, "step": 22890 }, { "epoch": 0.42496236345880395, "grad_norm": 33.96875, "learning_rate": 9.933599649962871e-06, "loss": 20.7651, "step": 22900 }, { "epoch": 0.42514793654328376, "grad_norm": 33.1875, "learning_rate": 9.933570654176829e-06, "loss": 20.9696, "step": 22910 }, { "epoch": 0.42533350962776356, "grad_norm": 35.96875, "learning_rate": 9.933541658390788e-06, "loss": 21.2031, "step": 22920 }, { "epoch": 0.4255190827122434, "grad_norm": 35.0, "learning_rate": 9.933512662604744e-06, "loss": 21.2462, "step": 22930 }, { "epoch": 0.42570465579672323, "grad_norm": 35.15625, "learning_rate": 9.933483666818701e-06, "loss": 20.9242, "step": 22940 }, { "epoch": 0.4258902288812031, "grad_norm": 35.84375, "learning_rate": 9.93345467103266e-06, "loss": 20.7291, "step": 22950 }, { "epoch": 0.4260758019656829, "grad_norm": 36.75, "learning_rate": 9.933425675246618e-06, "loss": 21.4801, "step": 22960 }, { "epoch": 0.4262613750501627, "grad_norm": 36.34375, "learning_rate": 9.933396679460575e-06, "loss": 21.3721, "step": 22970 }, { "epoch": 0.42644694813464257, "grad_norm": 35.1875, "learning_rate": 9.933367683674532e-06, "loss": 20.9337, "step": 22980 }, { "epoch": 0.4266325212191224, "grad_norm": 35.28125, "learning_rate": 9.93333868788849e-06, "loss": 21.2105, "step": 22990 }, { "epoch": 0.4268180943036022, "grad_norm": 33.78125, "learning_rate": 9.933309692102447e-06, "loss": 20.954, "step": 23000 }, { "epoch": 0.42700366738808204, "grad_norm": 37.15625, "learning_rate": 9.933280696316405e-06, "loss": 21.0472, "step": 23010 }, { "epoch": 0.42718924047256185, "grad_norm": 36.25, "learning_rate": 9.933251700530364e-06, "loss": 20.8671, "step": 23020 }, { "epoch": 0.4273748135570417, "grad_norm": 34.03125, "learning_rate": 9.933222704744321e-06, "loss": 20.8912, "step": 23030 }, { "epoch": 0.4275603866415215, "grad_norm": 33.90625, "learning_rate": 9.933193708958277e-06, "loss": 21.3617, "step": 23040 }, { "epoch": 0.4277459597260013, "grad_norm": 34.6875, "learning_rate": 9.933164713172236e-06, "loss": 21.0914, "step": 23050 }, { "epoch": 0.4279315328104812, "grad_norm": 35.65625, "learning_rate": 9.933135717386193e-06, "loss": 21.1626, "step": 23060 }, { "epoch": 0.428117105894961, "grad_norm": 33.53125, "learning_rate": 9.93310672160015e-06, "loss": 21.385, "step": 23070 }, { "epoch": 0.4283026789794408, "grad_norm": 34.90625, "learning_rate": 9.933077725814108e-06, "loss": 21.1424, "step": 23080 }, { "epoch": 0.42848825206392066, "grad_norm": 34.125, "learning_rate": 9.933048730028066e-06, "loss": 20.7616, "step": 23090 }, { "epoch": 0.42867382514840047, "grad_norm": 34.6875, "learning_rate": 9.933019734242023e-06, "loss": 20.5844, "step": 23100 }, { "epoch": 0.42885939823288033, "grad_norm": 35.0625, "learning_rate": 9.93299073845598e-06, "loss": 21.0437, "step": 23110 }, { "epoch": 0.42904497131736014, "grad_norm": 34.34375, "learning_rate": 9.93296174266994e-06, "loss": 20.9484, "step": 23120 }, { "epoch": 0.42923054440183994, "grad_norm": 33.125, "learning_rate": 9.932932746883897e-06, "loss": 21.0415, "step": 23130 }, { "epoch": 0.4294161174863198, "grad_norm": 34.15625, "learning_rate": 9.932903751097853e-06, "loss": 21.0687, "step": 23140 }, { "epoch": 0.4296016905707996, "grad_norm": 34.0625, "learning_rate": 9.932874755311812e-06, "loss": 20.5685, "step": 23150 }, { "epoch": 0.4297872636552794, "grad_norm": 36.09375, "learning_rate": 9.932845759525769e-06, "loss": 21.2205, "step": 23160 }, { "epoch": 0.4299728367397593, "grad_norm": 33.71875, "learning_rate": 9.932816763739727e-06, "loss": 21.0816, "step": 23170 }, { "epoch": 0.4301584098242391, "grad_norm": 36.15625, "learning_rate": 9.932787767953684e-06, "loss": 20.8513, "step": 23180 }, { "epoch": 0.4303439829087189, "grad_norm": 34.28125, "learning_rate": 9.932758772167643e-06, "loss": 20.9409, "step": 23190 }, { "epoch": 0.43052955599319875, "grad_norm": 34.1875, "learning_rate": 9.932729776381599e-06, "loss": 20.9464, "step": 23200 }, { "epoch": 0.43071512907767856, "grad_norm": 35.25, "learning_rate": 9.932700780595556e-06, "loss": 21.2037, "step": 23210 }, { "epoch": 0.4309007021621584, "grad_norm": 34.6875, "learning_rate": 9.932671784809515e-06, "loss": 20.9092, "step": 23220 }, { "epoch": 0.43108627524663823, "grad_norm": 36.53125, "learning_rate": 9.932642789023473e-06, "loss": 20.9398, "step": 23230 }, { "epoch": 0.43127184833111804, "grad_norm": 35.5625, "learning_rate": 9.93261379323743e-06, "loss": 20.7783, "step": 23240 }, { "epoch": 0.4314574214155979, "grad_norm": 35.34375, "learning_rate": 9.932584797451387e-06, "loss": 20.5817, "step": 23250 }, { "epoch": 0.4316429945000777, "grad_norm": 35.125, "learning_rate": 9.932555801665345e-06, "loss": 20.9545, "step": 23260 }, { "epoch": 0.4318285675845575, "grad_norm": 36.59375, "learning_rate": 9.932526805879302e-06, "loss": 20.4197, "step": 23270 }, { "epoch": 0.43201414066903737, "grad_norm": 34.59375, "learning_rate": 9.93249781009326e-06, "loss": 21.3306, "step": 23280 }, { "epoch": 0.4321997137535172, "grad_norm": 34.34375, "learning_rate": 9.932468814307217e-06, "loss": 20.4233, "step": 23290 }, { "epoch": 0.43238528683799704, "grad_norm": 35.59375, "learning_rate": 9.932439818521176e-06, "loss": 20.9082, "step": 23300 }, { "epoch": 0.43257085992247685, "grad_norm": 35.375, "learning_rate": 9.932410822735132e-06, "loss": 21.0376, "step": 23310 }, { "epoch": 0.43275643300695665, "grad_norm": 37.09375, "learning_rate": 9.93238182694909e-06, "loss": 20.735, "step": 23320 }, { "epoch": 0.4329420060914365, "grad_norm": 34.65625, "learning_rate": 9.932352831163048e-06, "loss": 20.7816, "step": 23330 }, { "epoch": 0.4331275791759163, "grad_norm": 35.03125, "learning_rate": 9.932323835377006e-06, "loss": 21.1626, "step": 23340 }, { "epoch": 0.43331315226039613, "grad_norm": 33.21875, "learning_rate": 9.932294839590963e-06, "loss": 21.2156, "step": 23350 }, { "epoch": 0.433498725344876, "grad_norm": 34.71875, "learning_rate": 9.93226584380492e-06, "loss": 20.5191, "step": 23360 }, { "epoch": 0.4336842984293558, "grad_norm": 34.0625, "learning_rate": 9.932236848018878e-06, "loss": 20.6801, "step": 23370 }, { "epoch": 0.43386987151383566, "grad_norm": 34.25, "learning_rate": 9.932207852232835e-06, "loss": 20.9476, "step": 23380 }, { "epoch": 0.43405544459831547, "grad_norm": 35.0625, "learning_rate": 9.932178856446793e-06, "loss": 21.1848, "step": 23390 }, { "epoch": 0.43424101768279527, "grad_norm": 34.125, "learning_rate": 9.932149860660752e-06, "loss": 20.944, "step": 23400 }, { "epoch": 0.43442659076727513, "grad_norm": 35.71875, "learning_rate": 9.932120864874708e-06, "loss": 20.6635, "step": 23410 }, { "epoch": 0.43461216385175494, "grad_norm": 36.25, "learning_rate": 9.932091869088665e-06, "loss": 20.9886, "step": 23420 }, { "epoch": 0.43479773693623475, "grad_norm": 34.28125, "learning_rate": 9.932062873302624e-06, "loss": 21.3632, "step": 23430 }, { "epoch": 0.4349833100207146, "grad_norm": 35.15625, "learning_rate": 9.932033877516582e-06, "loss": 21.2067, "step": 23440 }, { "epoch": 0.4351688831051944, "grad_norm": 34.84375, "learning_rate": 9.932004881730539e-06, "loss": 20.9518, "step": 23450 }, { "epoch": 0.4353544561896743, "grad_norm": 35.34375, "learning_rate": 9.931975885944496e-06, "loss": 20.9001, "step": 23460 }, { "epoch": 0.4355400292741541, "grad_norm": 34.0625, "learning_rate": 9.931946890158454e-06, "loss": 20.7958, "step": 23470 }, { "epoch": 0.4357256023586339, "grad_norm": 34.1875, "learning_rate": 9.931917894372411e-06, "loss": 20.5711, "step": 23480 }, { "epoch": 0.43591117544311375, "grad_norm": 36.40625, "learning_rate": 9.931888898586369e-06, "loss": 20.796, "step": 23490 }, { "epoch": 0.43609674852759356, "grad_norm": 34.25, "learning_rate": 9.931859902800328e-06, "loss": 20.722, "step": 23500 }, { "epoch": 0.43628232161207336, "grad_norm": 37.15625, "learning_rate": 9.931830907014285e-06, "loss": 20.9826, "step": 23510 }, { "epoch": 0.4364678946965532, "grad_norm": 34.375, "learning_rate": 9.93180191122824e-06, "loss": 21.0188, "step": 23520 }, { "epoch": 0.43665346778103303, "grad_norm": 37.125, "learning_rate": 9.9317729154422e-06, "loss": 21.2313, "step": 23530 }, { "epoch": 0.43683904086551284, "grad_norm": 34.71875, "learning_rate": 9.931743919656157e-06, "loss": 20.5476, "step": 23540 }, { "epoch": 0.4370246139499927, "grad_norm": 35.125, "learning_rate": 9.931714923870115e-06, "loss": 21.0134, "step": 23550 }, { "epoch": 0.4372101870344725, "grad_norm": 35.6875, "learning_rate": 9.931685928084072e-06, "loss": 20.7415, "step": 23560 }, { "epoch": 0.43739576011895237, "grad_norm": 36.125, "learning_rate": 9.93165693229803e-06, "loss": 21.2975, "step": 23570 }, { "epoch": 0.4375813332034322, "grad_norm": 35.34375, "learning_rate": 9.931627936511987e-06, "loss": 20.9121, "step": 23580 }, { "epoch": 0.437766906287912, "grad_norm": 36.78125, "learning_rate": 9.931598940725944e-06, "loss": 20.9767, "step": 23590 }, { "epoch": 0.43795247937239185, "grad_norm": 35.15625, "learning_rate": 9.931569944939903e-06, "loss": 21.0601, "step": 23600 }, { "epoch": 0.43813805245687165, "grad_norm": 36.625, "learning_rate": 9.931540949153861e-06, "loss": 21.2006, "step": 23610 }, { "epoch": 0.43832362554135146, "grad_norm": 33.21875, "learning_rate": 9.931511953367818e-06, "loss": 20.9193, "step": 23620 }, { "epoch": 0.4385091986258313, "grad_norm": 35.5625, "learning_rate": 9.931482957581776e-06, "loss": 21.2494, "step": 23630 }, { "epoch": 0.4386947717103111, "grad_norm": 35.5, "learning_rate": 9.931453961795733e-06, "loss": 20.8032, "step": 23640 }, { "epoch": 0.438880344794791, "grad_norm": 34.09375, "learning_rate": 9.93142496600969e-06, "loss": 20.7432, "step": 23650 }, { "epoch": 0.4390659178792708, "grad_norm": 34.53125, "learning_rate": 9.931395970223648e-06, "loss": 21.1021, "step": 23660 }, { "epoch": 0.4392514909637506, "grad_norm": 33.625, "learning_rate": 9.931366974437607e-06, "loss": 21.2628, "step": 23670 }, { "epoch": 0.43943706404823046, "grad_norm": 34.4375, "learning_rate": 9.931337978651563e-06, "loss": 20.8008, "step": 23680 }, { "epoch": 0.43962263713271027, "grad_norm": 35.875, "learning_rate": 9.93130898286552e-06, "loss": 20.8276, "step": 23690 }, { "epoch": 0.4398082102171901, "grad_norm": 33.90625, "learning_rate": 9.93127998707948e-06, "loss": 20.5879, "step": 23700 }, { "epoch": 0.43999378330166994, "grad_norm": 36.25, "learning_rate": 9.931250991293437e-06, "loss": 20.905, "step": 23710 }, { "epoch": 0.44017935638614974, "grad_norm": 35.8125, "learning_rate": 9.931221995507394e-06, "loss": 21.0795, "step": 23720 }, { "epoch": 0.4403649294706296, "grad_norm": 35.40625, "learning_rate": 9.931192999721351e-06, "loss": 20.6996, "step": 23730 }, { "epoch": 0.4405505025551094, "grad_norm": 33.625, "learning_rate": 9.931164003935309e-06, "loss": 20.894, "step": 23740 }, { "epoch": 0.4407360756395892, "grad_norm": 35.0625, "learning_rate": 9.931135008149266e-06, "loss": 20.8419, "step": 23750 }, { "epoch": 0.4409216487240691, "grad_norm": 34.9375, "learning_rate": 9.931106012363224e-06, "loss": 20.6036, "step": 23760 }, { "epoch": 0.4411072218085489, "grad_norm": 34.34375, "learning_rate": 9.931077016577181e-06, "loss": 20.7, "step": 23770 }, { "epoch": 0.4412927948930287, "grad_norm": 35.84375, "learning_rate": 9.93104802079114e-06, "loss": 21.0251, "step": 23780 }, { "epoch": 0.44147836797750856, "grad_norm": 35.4375, "learning_rate": 9.931019025005096e-06, "loss": 20.7861, "step": 23790 }, { "epoch": 0.44166394106198836, "grad_norm": 34.53125, "learning_rate": 9.930990029219055e-06, "loss": 21.0714, "step": 23800 }, { "epoch": 0.44184951414646817, "grad_norm": 35.1875, "learning_rate": 9.930961033433012e-06, "loss": 21.0033, "step": 23810 }, { "epoch": 0.44203508723094803, "grad_norm": 34.75, "learning_rate": 9.93093203764697e-06, "loss": 20.7072, "step": 23820 }, { "epoch": 0.44222066031542784, "grad_norm": 35.625, "learning_rate": 9.930903041860927e-06, "loss": 20.7695, "step": 23830 }, { "epoch": 0.4424062333999077, "grad_norm": 36.40625, "learning_rate": 9.930874046074885e-06, "loss": 20.7267, "step": 23840 }, { "epoch": 0.4425918064843875, "grad_norm": 36.03125, "learning_rate": 9.930845050288842e-06, "loss": 20.6114, "step": 23850 }, { "epoch": 0.4427773795688673, "grad_norm": 35.4375, "learning_rate": 9.9308160545028e-06, "loss": 20.9765, "step": 23860 }, { "epoch": 0.4429629526533472, "grad_norm": 34.625, "learning_rate": 9.930787058716757e-06, "loss": 20.9792, "step": 23870 }, { "epoch": 0.443148525737827, "grad_norm": 34.25, "learning_rate": 9.930758062930716e-06, "loss": 21.1687, "step": 23880 }, { "epoch": 0.4433340988223068, "grad_norm": 33.6875, "learning_rate": 9.930729067144673e-06, "loss": 20.8174, "step": 23890 }, { "epoch": 0.44351967190678665, "grad_norm": 33.03125, "learning_rate": 9.930700071358629e-06, "loss": 20.5752, "step": 23900 }, { "epoch": 0.44370524499126646, "grad_norm": 37.15625, "learning_rate": 9.930671075572588e-06, "loss": 20.5927, "step": 23910 }, { "epoch": 0.4438908180757463, "grad_norm": 36.375, "learning_rate": 9.930642079786546e-06, "loss": 21.0733, "step": 23920 }, { "epoch": 0.4440763911602261, "grad_norm": 35.375, "learning_rate": 9.930613084000503e-06, "loss": 20.7896, "step": 23930 }, { "epoch": 0.44426196424470593, "grad_norm": 33.34375, "learning_rate": 9.93058408821446e-06, "loss": 20.5514, "step": 23940 }, { "epoch": 0.4444475373291858, "grad_norm": 33.25, "learning_rate": 9.930555092428418e-06, "loss": 21.1296, "step": 23950 }, { "epoch": 0.4446331104136656, "grad_norm": 35.96875, "learning_rate": 9.930526096642375e-06, "loss": 20.7448, "step": 23960 }, { "epoch": 0.4448186834981454, "grad_norm": 34.59375, "learning_rate": 9.930497100856333e-06, "loss": 21.14, "step": 23970 }, { "epoch": 0.44500425658262527, "grad_norm": 34.28125, "learning_rate": 9.930468105070292e-06, "loss": 20.7303, "step": 23980 }, { "epoch": 0.4451898296671051, "grad_norm": 34.09375, "learning_rate": 9.930439109284249e-06, "loss": 21.1835, "step": 23990 }, { "epoch": 0.44537540275158494, "grad_norm": 35.4375, "learning_rate": 9.930410113498205e-06, "loss": 20.9196, "step": 24000 }, { "epoch": 0.44556097583606474, "grad_norm": 33.75, "learning_rate": 9.930381117712164e-06, "loss": 21.0292, "step": 24010 }, { "epoch": 0.44574654892054455, "grad_norm": 33.78125, "learning_rate": 9.930352121926121e-06, "loss": 20.8839, "step": 24020 }, { "epoch": 0.4459321220050244, "grad_norm": 36.25, "learning_rate": 9.930323126140079e-06, "loss": 21.0878, "step": 24030 }, { "epoch": 0.4461176950895042, "grad_norm": 34.84375, "learning_rate": 9.930294130354036e-06, "loss": 20.8566, "step": 24040 }, { "epoch": 0.446303268173984, "grad_norm": 34.8125, "learning_rate": 9.930265134567995e-06, "loss": 20.5984, "step": 24050 }, { "epoch": 0.4464888412584639, "grad_norm": 34.59375, "learning_rate": 9.930236138781951e-06, "loss": 20.4023, "step": 24060 }, { "epoch": 0.4466744143429437, "grad_norm": 33.5625, "learning_rate": 9.930207142995908e-06, "loss": 20.9537, "step": 24070 }, { "epoch": 0.4468599874274235, "grad_norm": 34.5625, "learning_rate": 9.930178147209868e-06, "loss": 20.6655, "step": 24080 }, { "epoch": 0.44704556051190336, "grad_norm": 34.53125, "learning_rate": 9.930149151423825e-06, "loss": 20.7582, "step": 24090 }, { "epoch": 0.44723113359638317, "grad_norm": 34.53125, "learning_rate": 9.930120155637782e-06, "loss": 20.7693, "step": 24100 }, { "epoch": 0.44741670668086303, "grad_norm": 34.28125, "learning_rate": 9.93009115985174e-06, "loss": 21.0091, "step": 24110 }, { "epoch": 0.44760227976534284, "grad_norm": 33.71875, "learning_rate": 9.930062164065697e-06, "loss": 20.9896, "step": 24120 }, { "epoch": 0.44778785284982264, "grad_norm": 36.53125, "learning_rate": 9.930033168279655e-06, "loss": 20.8672, "step": 24130 }, { "epoch": 0.4479734259343025, "grad_norm": 36.28125, "learning_rate": 9.930004172493612e-06, "loss": 20.6846, "step": 24140 }, { "epoch": 0.4481589990187823, "grad_norm": 35.71875, "learning_rate": 9.929975176707571e-06, "loss": 20.8901, "step": 24150 }, { "epoch": 0.4483445721032621, "grad_norm": 33.65625, "learning_rate": 9.929946180921527e-06, "loss": 21.0096, "step": 24160 }, { "epoch": 0.448530145187742, "grad_norm": 34.28125, "learning_rate": 9.929917185135484e-06, "loss": 20.8181, "step": 24170 }, { "epoch": 0.4487157182722218, "grad_norm": 35.3125, "learning_rate": 9.929888189349443e-06, "loss": 20.9108, "step": 24180 }, { "epoch": 0.44890129135670165, "grad_norm": 37.625, "learning_rate": 9.9298591935634e-06, "loss": 20.9324, "step": 24190 }, { "epoch": 0.44908686444118145, "grad_norm": 36.09375, "learning_rate": 9.929830197777358e-06, "loss": 20.987, "step": 24200 }, { "epoch": 0.44927243752566126, "grad_norm": 35.1875, "learning_rate": 9.929801201991315e-06, "loss": 20.7505, "step": 24210 }, { "epoch": 0.4494580106101411, "grad_norm": 36.125, "learning_rate": 9.929772206205273e-06, "loss": 20.9636, "step": 24220 }, { "epoch": 0.44964358369462093, "grad_norm": 35.28125, "learning_rate": 9.92974321041923e-06, "loss": 21.0674, "step": 24230 }, { "epoch": 0.44982915677910074, "grad_norm": 34.3125, "learning_rate": 9.929714214633188e-06, "loss": 20.6893, "step": 24240 }, { "epoch": 0.4500147298635806, "grad_norm": 36.625, "learning_rate": 9.929685218847147e-06, "loss": 20.9585, "step": 24250 }, { "epoch": 0.4502003029480604, "grad_norm": 35.5, "learning_rate": 9.929656223061104e-06, "loss": 20.3475, "step": 24260 }, { "epoch": 0.45038587603254027, "grad_norm": 34.6875, "learning_rate": 9.92962722727506e-06, "loss": 20.5898, "step": 24270 }, { "epoch": 0.4505714491170201, "grad_norm": 36.21875, "learning_rate": 9.929598231489019e-06, "loss": 20.5172, "step": 24280 }, { "epoch": 0.4507570222014999, "grad_norm": 34.53125, "learning_rate": 9.929569235702976e-06, "loss": 20.8779, "step": 24290 }, { "epoch": 0.45094259528597974, "grad_norm": 34.25, "learning_rate": 9.929540239916934e-06, "loss": 20.8615, "step": 24300 }, { "epoch": 0.45112816837045955, "grad_norm": 34.78125, "learning_rate": 9.929511244130891e-06, "loss": 21.23, "step": 24310 }, { "epoch": 0.45131374145493935, "grad_norm": 35.0625, "learning_rate": 9.929482248344849e-06, "loss": 21.1603, "step": 24320 }, { "epoch": 0.4514993145394192, "grad_norm": 34.875, "learning_rate": 9.929453252558806e-06, "loss": 21.007, "step": 24330 }, { "epoch": 0.451684887623899, "grad_norm": 33.5625, "learning_rate": 9.929424256772763e-06, "loss": 20.4047, "step": 24340 }, { "epoch": 0.45187046070837883, "grad_norm": 34.90625, "learning_rate": 9.929395260986721e-06, "loss": 21.1434, "step": 24350 }, { "epoch": 0.4520560337928587, "grad_norm": 34.8125, "learning_rate": 9.92936626520068e-06, "loss": 21.1945, "step": 24360 }, { "epoch": 0.4522416068773385, "grad_norm": 34.625, "learning_rate": 9.929337269414637e-06, "loss": 20.697, "step": 24370 }, { "epoch": 0.45242717996181836, "grad_norm": 34.96875, "learning_rate": 9.929308273628595e-06, "loss": 20.7853, "step": 24380 }, { "epoch": 0.45261275304629817, "grad_norm": 34.375, "learning_rate": 9.929279277842552e-06, "loss": 20.2789, "step": 24390 }, { "epoch": 0.452798326130778, "grad_norm": 35.90625, "learning_rate": 9.92925028205651e-06, "loss": 20.8062, "step": 24400 }, { "epoch": 0.45298389921525783, "grad_norm": 34.75, "learning_rate": 9.929221286270467e-06, "loss": 20.4341, "step": 24410 }, { "epoch": 0.45316947229973764, "grad_norm": 35.34375, "learning_rate": 9.929192290484424e-06, "loss": 20.4916, "step": 24420 }, { "epoch": 0.45335504538421745, "grad_norm": 35.53125, "learning_rate": 9.929163294698382e-06, "loss": 20.987, "step": 24430 }, { "epoch": 0.4535406184686973, "grad_norm": 34.03125, "learning_rate": 9.92913429891234e-06, "loss": 21.2552, "step": 24440 }, { "epoch": 0.4537261915531771, "grad_norm": 33.28125, "learning_rate": 9.929105303126297e-06, "loss": 20.5123, "step": 24450 }, { "epoch": 0.453911764637657, "grad_norm": 36.25, "learning_rate": 9.929076307340256e-06, "loss": 20.896, "step": 24460 }, { "epoch": 0.4540973377221368, "grad_norm": 34.5, "learning_rate": 9.929047311554213e-06, "loss": 20.9112, "step": 24470 }, { "epoch": 0.4542829108066166, "grad_norm": 36.375, "learning_rate": 9.92901831576817e-06, "loss": 20.9512, "step": 24480 }, { "epoch": 0.45446848389109645, "grad_norm": 35.21875, "learning_rate": 9.928989319982128e-06, "loss": 20.469, "step": 24490 }, { "epoch": 0.45465405697557626, "grad_norm": 37.09375, "learning_rate": 9.928960324196085e-06, "loss": 20.8936, "step": 24500 }, { "epoch": 0.45483963006005607, "grad_norm": 35.125, "learning_rate": 9.928931328410043e-06, "loss": 20.4707, "step": 24510 }, { "epoch": 0.45502520314453593, "grad_norm": 35.46875, "learning_rate": 9.928902332624e-06, "loss": 20.5323, "step": 24520 }, { "epoch": 0.45521077622901573, "grad_norm": 33.90625, "learning_rate": 9.92887333683796e-06, "loss": 21.0152, "step": 24530 }, { "epoch": 0.4553963493134956, "grad_norm": 34.65625, "learning_rate": 9.928844341051915e-06, "loss": 20.7327, "step": 24540 }, { "epoch": 0.4555819223979754, "grad_norm": 35.34375, "learning_rate": 9.928815345265872e-06, "loss": 20.7267, "step": 24550 }, { "epoch": 0.4557674954824552, "grad_norm": 33.34375, "learning_rate": 9.928786349479832e-06, "loss": 20.4997, "step": 24560 }, { "epoch": 0.45595306856693507, "grad_norm": 37.34375, "learning_rate": 9.928757353693789e-06, "loss": 20.8639, "step": 24570 }, { "epoch": 0.4561386416514149, "grad_norm": 34.15625, "learning_rate": 9.928728357907746e-06, "loss": 21.0693, "step": 24580 }, { "epoch": 0.4563242147358947, "grad_norm": 33.75, "learning_rate": 9.928699362121704e-06, "loss": 21.0186, "step": 24590 }, { "epoch": 0.45650978782037455, "grad_norm": 37.59375, "learning_rate": 9.928670366335661e-06, "loss": 21.1377, "step": 24600 }, { "epoch": 0.45669536090485435, "grad_norm": 35.125, "learning_rate": 9.928641370549619e-06, "loss": 20.7898, "step": 24610 }, { "epoch": 0.45688093398933416, "grad_norm": 35.0, "learning_rate": 9.928612374763576e-06, "loss": 20.8915, "step": 24620 }, { "epoch": 0.457066507073814, "grad_norm": 34.96875, "learning_rate": 9.928583378977535e-06, "loss": 20.677, "step": 24630 }, { "epoch": 0.4572520801582938, "grad_norm": 34.78125, "learning_rate": 9.928554383191492e-06, "loss": 20.5464, "step": 24640 }, { "epoch": 0.4574376532427737, "grad_norm": 35.0625, "learning_rate": 9.928525387405448e-06, "loss": 20.7197, "step": 24650 }, { "epoch": 0.4576232263272535, "grad_norm": 35.28125, "learning_rate": 9.928496391619407e-06, "loss": 21.186, "step": 24660 }, { "epoch": 0.4578087994117333, "grad_norm": 35.71875, "learning_rate": 9.928467395833365e-06, "loss": 21.1732, "step": 24670 }, { "epoch": 0.45799437249621316, "grad_norm": 34.3125, "learning_rate": 9.928438400047322e-06, "loss": 20.2372, "step": 24680 }, { "epoch": 0.45817994558069297, "grad_norm": 34.1875, "learning_rate": 9.92840940426128e-06, "loss": 20.5822, "step": 24690 }, { "epoch": 0.4583655186651728, "grad_norm": 34.34375, "learning_rate": 9.928380408475237e-06, "loss": 20.9295, "step": 24700 }, { "epoch": 0.45855109174965264, "grad_norm": 34.6875, "learning_rate": 9.928351412689194e-06, "loss": 20.7293, "step": 24710 }, { "epoch": 0.45873666483413245, "grad_norm": 34.40625, "learning_rate": 9.928322416903152e-06, "loss": 21.0629, "step": 24720 }, { "epoch": 0.4589222379186123, "grad_norm": 37.03125, "learning_rate": 9.92829342111711e-06, "loss": 20.6274, "step": 24730 }, { "epoch": 0.4591078110030921, "grad_norm": 34.625, "learning_rate": 9.928264425331068e-06, "loss": 20.7442, "step": 24740 }, { "epoch": 0.4592933840875719, "grad_norm": 34.28125, "learning_rate": 9.928235429545024e-06, "loss": 20.7463, "step": 24750 }, { "epoch": 0.4594789571720518, "grad_norm": 36.5625, "learning_rate": 9.928206433758983e-06, "loss": 21.1036, "step": 24760 }, { "epoch": 0.4596645302565316, "grad_norm": 35.25, "learning_rate": 9.92817743797294e-06, "loss": 20.6891, "step": 24770 }, { "epoch": 0.4598501033410114, "grad_norm": 33.6875, "learning_rate": 9.928148442186898e-06, "loss": 20.3999, "step": 24780 }, { "epoch": 0.46003567642549126, "grad_norm": 36.6875, "learning_rate": 9.928119446400855e-06, "loss": 21.0503, "step": 24790 }, { "epoch": 0.46022124950997106, "grad_norm": 36.15625, "learning_rate": 9.928090450614813e-06, "loss": 20.7396, "step": 24800 }, { "epoch": 0.4604068225944509, "grad_norm": 37.03125, "learning_rate": 9.92806145482877e-06, "loss": 20.8848, "step": 24810 }, { "epoch": 0.46059239567893073, "grad_norm": 33.375, "learning_rate": 9.928032459042727e-06, "loss": 20.9374, "step": 24820 }, { "epoch": 0.46077796876341054, "grad_norm": 35.75, "learning_rate": 9.928003463256685e-06, "loss": 20.8332, "step": 24830 }, { "epoch": 0.4609635418478904, "grad_norm": 35.09375, "learning_rate": 9.927974467470644e-06, "loss": 20.6518, "step": 24840 }, { "epoch": 0.4611491149323702, "grad_norm": 35.78125, "learning_rate": 9.927945471684601e-06, "loss": 20.633, "step": 24850 }, { "epoch": 0.46133468801685, "grad_norm": 34.3125, "learning_rate": 9.927916475898559e-06, "loss": 21.1518, "step": 24860 }, { "epoch": 0.4615202611013299, "grad_norm": 34.625, "learning_rate": 9.927887480112516e-06, "loss": 20.6709, "step": 24870 }, { "epoch": 0.4617058341858097, "grad_norm": 35.34375, "learning_rate": 9.927858484326474e-06, "loss": 20.86, "step": 24880 }, { "epoch": 0.46189140727028954, "grad_norm": 34.875, "learning_rate": 9.927829488540431e-06, "loss": 21.1381, "step": 24890 }, { "epoch": 0.46207698035476935, "grad_norm": 32.96875, "learning_rate": 9.927800492754388e-06, "loss": 20.7075, "step": 24900 }, { "epoch": 0.46226255343924916, "grad_norm": 35.59375, "learning_rate": 9.927771496968346e-06, "loss": 20.5517, "step": 24910 }, { "epoch": 0.462448126523729, "grad_norm": 36.0625, "learning_rate": 9.927742501182303e-06, "loss": 20.7381, "step": 24920 }, { "epoch": 0.4626336996082088, "grad_norm": 37.375, "learning_rate": 9.92771350539626e-06, "loss": 20.8368, "step": 24930 }, { "epoch": 0.46281927269268863, "grad_norm": 35.25, "learning_rate": 9.92768450961022e-06, "loss": 21.0123, "step": 24940 }, { "epoch": 0.4630048457771685, "grad_norm": 35.03125, "learning_rate": 9.927655513824177e-06, "loss": 20.7466, "step": 24950 }, { "epoch": 0.4631904188616483, "grad_norm": 35.90625, "learning_rate": 9.927626518038135e-06, "loss": 20.4304, "step": 24960 }, { "epoch": 0.4633759919461281, "grad_norm": 34.46875, "learning_rate": 9.927597522252092e-06, "loss": 20.482, "step": 24970 }, { "epoch": 0.46356156503060797, "grad_norm": 37.25, "learning_rate": 9.92756852646605e-06, "loss": 20.8669, "step": 24980 }, { "epoch": 0.4637471381150878, "grad_norm": 36.25, "learning_rate": 9.927539530680007e-06, "loss": 20.7218, "step": 24990 }, { "epoch": 0.46393271119956764, "grad_norm": 33.96875, "learning_rate": 9.927510534893964e-06, "loss": 20.7564, "step": 25000 }, { "epoch": 0.46393271119956764, "eval_loss": 2.5967071056365967, "eval_runtime": 453.4834, "eval_samples_per_second": 3202.139, "eval_steps_per_second": 50.035, "step": 25000 }, { "epoch": 0.46411828428404744, "grad_norm": 37.1875, "learning_rate": 9.927481539107923e-06, "loss": 21.032, "step": 25010 }, { "epoch": 0.46430385736852725, "grad_norm": 34.0, "learning_rate": 9.927452543321879e-06, "loss": 20.8725, "step": 25020 }, { "epoch": 0.4644894304530071, "grad_norm": 35.34375, "learning_rate": 9.927423547535836e-06, "loss": 20.9236, "step": 25030 }, { "epoch": 0.4646750035374869, "grad_norm": 35.90625, "learning_rate": 9.927394551749796e-06, "loss": 20.7448, "step": 25040 }, { "epoch": 0.4648605766219667, "grad_norm": 33.875, "learning_rate": 9.927365555963753e-06, "loss": 21.2387, "step": 25050 }, { "epoch": 0.4650461497064466, "grad_norm": 34.8125, "learning_rate": 9.92733656017771e-06, "loss": 20.801, "step": 25060 }, { "epoch": 0.4652317227909264, "grad_norm": 36.1875, "learning_rate": 9.927307564391668e-06, "loss": 20.7705, "step": 25070 }, { "epoch": 0.46541729587540626, "grad_norm": 34.53125, "learning_rate": 9.927278568605625e-06, "loss": 20.7148, "step": 25080 }, { "epoch": 0.46560286895988606, "grad_norm": 34.875, "learning_rate": 9.927249572819583e-06, "loss": 20.4595, "step": 25090 }, { "epoch": 0.46578844204436587, "grad_norm": 35.46875, "learning_rate": 9.92722057703354e-06, "loss": 20.9314, "step": 25100 }, { "epoch": 0.46597401512884573, "grad_norm": 33.90625, "learning_rate": 9.927191581247499e-06, "loss": 20.3269, "step": 25110 }, { "epoch": 0.46615958821332554, "grad_norm": 36.3125, "learning_rate": 9.927162585461456e-06, "loss": 21.2732, "step": 25120 }, { "epoch": 0.46634516129780534, "grad_norm": 33.46875, "learning_rate": 9.927133589675412e-06, "loss": 21.0565, "step": 25130 }, { "epoch": 0.4665307343822852, "grad_norm": 35.53125, "learning_rate": 9.927104593889371e-06, "loss": 21.2097, "step": 25140 }, { "epoch": 0.466716307466765, "grad_norm": 34.78125, "learning_rate": 9.927075598103329e-06, "loss": 20.6643, "step": 25150 }, { "epoch": 0.4669018805512449, "grad_norm": 33.71875, "learning_rate": 9.927046602317286e-06, "loss": 20.9921, "step": 25160 }, { "epoch": 0.4670874536357247, "grad_norm": 31.984375, "learning_rate": 9.927017606531244e-06, "loss": 20.5886, "step": 25170 }, { "epoch": 0.4672730267202045, "grad_norm": 33.90625, "learning_rate": 9.926988610745201e-06, "loss": 20.725, "step": 25180 }, { "epoch": 0.46745859980468435, "grad_norm": 35.1875, "learning_rate": 9.926959614959158e-06, "loss": 20.9366, "step": 25190 }, { "epoch": 0.46764417288916416, "grad_norm": 34.6875, "learning_rate": 9.926930619173116e-06, "loss": 21.0935, "step": 25200 }, { "epoch": 0.46782974597364396, "grad_norm": 34.6875, "learning_rate": 9.926901623387075e-06, "loss": 20.3594, "step": 25210 }, { "epoch": 0.4680153190581238, "grad_norm": 36.96875, "learning_rate": 9.926872627601032e-06, "loss": 20.7214, "step": 25220 }, { "epoch": 0.46820089214260363, "grad_norm": 37.375, "learning_rate": 9.92684363181499e-06, "loss": 20.9528, "step": 25230 }, { "epoch": 0.46838646522708344, "grad_norm": 34.1875, "learning_rate": 9.926814636028947e-06, "loss": 20.7266, "step": 25240 }, { "epoch": 0.4685720383115633, "grad_norm": 34.5625, "learning_rate": 9.926785640242904e-06, "loss": 20.522, "step": 25250 }, { "epoch": 0.4687576113960431, "grad_norm": 33.65625, "learning_rate": 9.926756644456862e-06, "loss": 20.7444, "step": 25260 }, { "epoch": 0.46894318448052297, "grad_norm": 32.21875, "learning_rate": 9.92672764867082e-06, "loss": 20.1346, "step": 25270 }, { "epoch": 0.4691287575650028, "grad_norm": 34.96875, "learning_rate": 9.926698652884777e-06, "loss": 20.6407, "step": 25280 }, { "epoch": 0.4693143306494826, "grad_norm": 33.875, "learning_rate": 9.926669657098734e-06, "loss": 20.6497, "step": 25290 }, { "epoch": 0.46949990373396244, "grad_norm": 34.75, "learning_rate": 9.926640661312692e-06, "loss": 20.4089, "step": 25300 }, { "epoch": 0.46968547681844225, "grad_norm": 34.8125, "learning_rate": 9.92661166552665e-06, "loss": 20.2013, "step": 25310 }, { "epoch": 0.46987104990292206, "grad_norm": 34.84375, "learning_rate": 9.926582669740608e-06, "loss": 20.8357, "step": 25320 }, { "epoch": 0.4700566229874019, "grad_norm": 34.375, "learning_rate": 9.926553673954565e-06, "loss": 20.3461, "step": 25330 }, { "epoch": 0.4702421960718817, "grad_norm": 33.78125, "learning_rate": 9.926524678168523e-06, "loss": 20.1499, "step": 25340 }, { "epoch": 0.4704277691563616, "grad_norm": 35.71875, "learning_rate": 9.92649568238248e-06, "loss": 20.4552, "step": 25350 }, { "epoch": 0.4706133422408414, "grad_norm": 34.28125, "learning_rate": 9.926466686596438e-06, "loss": 20.6206, "step": 25360 }, { "epoch": 0.4707989153253212, "grad_norm": 35.28125, "learning_rate": 9.926437690810395e-06, "loss": 20.7498, "step": 25370 }, { "epoch": 0.47098448840980106, "grad_norm": 34.1875, "learning_rate": 9.926408695024352e-06, "loss": 20.6467, "step": 25380 }, { "epoch": 0.47117006149428087, "grad_norm": 33.0625, "learning_rate": 9.926379699238312e-06, "loss": 20.624, "step": 25390 }, { "epoch": 0.4713556345787607, "grad_norm": 33.21875, "learning_rate": 9.926350703452267e-06, "loss": 20.5546, "step": 25400 }, { "epoch": 0.47154120766324054, "grad_norm": 35.875, "learning_rate": 9.926321707666225e-06, "loss": 20.8728, "step": 25410 }, { "epoch": 0.47172678074772034, "grad_norm": 35.03125, "learning_rate": 9.926292711880184e-06, "loss": 20.7638, "step": 25420 }, { "epoch": 0.4719123538322002, "grad_norm": 33.6875, "learning_rate": 9.926263716094141e-06, "loss": 20.8226, "step": 25430 }, { "epoch": 0.47209792691668, "grad_norm": 35.09375, "learning_rate": 9.926234720308099e-06, "loss": 20.8098, "step": 25440 }, { "epoch": 0.4722835000011598, "grad_norm": 35.90625, "learning_rate": 9.926205724522056e-06, "loss": 20.0728, "step": 25450 }, { "epoch": 0.4724690730856397, "grad_norm": 35.65625, "learning_rate": 9.926176728736013e-06, "loss": 20.8324, "step": 25460 }, { "epoch": 0.4726546461701195, "grad_norm": 36.15625, "learning_rate": 9.92614773294997e-06, "loss": 20.5694, "step": 25470 }, { "epoch": 0.4728402192545993, "grad_norm": 34.4375, "learning_rate": 9.926118737163928e-06, "loss": 21.1526, "step": 25480 }, { "epoch": 0.47302579233907915, "grad_norm": 35.875, "learning_rate": 9.926089741377887e-06, "loss": 21.0011, "step": 25490 }, { "epoch": 0.47321136542355896, "grad_norm": 36.5625, "learning_rate": 9.926060745591843e-06, "loss": 20.2465, "step": 25500 }, { "epoch": 0.47339693850803877, "grad_norm": 37.40625, "learning_rate": 9.9260317498058e-06, "loss": 20.8743, "step": 25510 }, { "epoch": 0.47358251159251863, "grad_norm": 35.4375, "learning_rate": 9.92600275401976e-06, "loss": 20.9921, "step": 25520 }, { "epoch": 0.47376808467699844, "grad_norm": 33.03125, "learning_rate": 9.925973758233717e-06, "loss": 20.6059, "step": 25530 }, { "epoch": 0.4739536577614783, "grad_norm": 35.09375, "learning_rate": 9.925944762447674e-06, "loss": 20.8631, "step": 25540 }, { "epoch": 0.4741392308459581, "grad_norm": 35.0, "learning_rate": 9.925915766661632e-06, "loss": 20.3443, "step": 25550 }, { "epoch": 0.4743248039304379, "grad_norm": 36.75, "learning_rate": 9.92588677087559e-06, "loss": 20.7723, "step": 25560 }, { "epoch": 0.4745103770149178, "grad_norm": 34.6875, "learning_rate": 9.925857775089547e-06, "loss": 20.4458, "step": 25570 }, { "epoch": 0.4746959500993976, "grad_norm": 35.96875, "learning_rate": 9.925828779303504e-06, "loss": 20.3749, "step": 25580 }, { "epoch": 0.4748815231838774, "grad_norm": 36.625, "learning_rate": 9.925799783517463e-06, "loss": 20.3563, "step": 25590 }, { "epoch": 0.47506709626835725, "grad_norm": 34.875, "learning_rate": 9.92577078773142e-06, "loss": 20.9151, "step": 25600 }, { "epoch": 0.47525266935283705, "grad_norm": 35.28125, "learning_rate": 9.925741791945376e-06, "loss": 20.473, "step": 25610 }, { "epoch": 0.4754382424373169, "grad_norm": 36.3125, "learning_rate": 9.925712796159335e-06, "loss": 20.9318, "step": 25620 }, { "epoch": 0.4756238155217967, "grad_norm": 36.28125, "learning_rate": 9.925683800373293e-06, "loss": 20.9008, "step": 25630 }, { "epoch": 0.47580938860627653, "grad_norm": 35.71875, "learning_rate": 9.92565480458725e-06, "loss": 20.6804, "step": 25640 }, { "epoch": 0.4759949616907564, "grad_norm": 36.625, "learning_rate": 9.925625808801208e-06, "loss": 20.6793, "step": 25650 }, { "epoch": 0.4761805347752362, "grad_norm": 36.40625, "learning_rate": 9.925596813015167e-06, "loss": 20.6986, "step": 25660 }, { "epoch": 0.476366107859716, "grad_norm": 36.0, "learning_rate": 9.925567817229122e-06, "loss": 20.4197, "step": 25670 }, { "epoch": 0.47655168094419587, "grad_norm": 34.21875, "learning_rate": 9.92553882144308e-06, "loss": 20.203, "step": 25680 }, { "epoch": 0.47673725402867567, "grad_norm": 37.375, "learning_rate": 9.925509825657039e-06, "loss": 21.2316, "step": 25690 }, { "epoch": 0.47692282711315553, "grad_norm": 36.03125, "learning_rate": 9.925480829870996e-06, "loss": 20.5119, "step": 25700 }, { "epoch": 0.47710840019763534, "grad_norm": 34.25, "learning_rate": 9.925451834084954e-06, "loss": 20.4633, "step": 25710 }, { "epoch": 0.47729397328211515, "grad_norm": 36.15625, "learning_rate": 9.925422838298911e-06, "loss": 20.5057, "step": 25720 }, { "epoch": 0.477479546366595, "grad_norm": 35.3125, "learning_rate": 9.925393842512868e-06, "loss": 20.8923, "step": 25730 }, { "epoch": 0.4776651194510748, "grad_norm": 33.59375, "learning_rate": 9.925364846726826e-06, "loss": 20.7508, "step": 25740 }, { "epoch": 0.4778506925355546, "grad_norm": 34.5625, "learning_rate": 9.925335850940783e-06, "loss": 20.3849, "step": 25750 }, { "epoch": 0.4780362656200345, "grad_norm": 34.875, "learning_rate": 9.925306855154742e-06, "loss": 21.1023, "step": 25760 }, { "epoch": 0.4782218387045143, "grad_norm": 35.25, "learning_rate": 9.925277859368698e-06, "loss": 20.5539, "step": 25770 }, { "epoch": 0.4784074117889941, "grad_norm": 35.75, "learning_rate": 9.925248863582656e-06, "loss": 20.4852, "step": 25780 }, { "epoch": 0.47859298487347396, "grad_norm": 33.40625, "learning_rate": 9.925219867796615e-06, "loss": 20.5779, "step": 25790 }, { "epoch": 0.47877855795795377, "grad_norm": 34.96875, "learning_rate": 9.925190872010572e-06, "loss": 20.5704, "step": 25800 }, { "epoch": 0.4789641310424336, "grad_norm": 34.78125, "learning_rate": 9.92516187622453e-06, "loss": 20.5373, "step": 25810 }, { "epoch": 0.47914970412691343, "grad_norm": 34.5, "learning_rate": 9.925132880438487e-06, "loss": 20.758, "step": 25820 }, { "epoch": 0.47933527721139324, "grad_norm": 35.9375, "learning_rate": 9.925103884652444e-06, "loss": 20.5638, "step": 25830 }, { "epoch": 0.4795208502958731, "grad_norm": 33.5625, "learning_rate": 9.925074888866402e-06, "loss": 20.8422, "step": 25840 }, { "epoch": 0.4797064233803529, "grad_norm": 33.5625, "learning_rate": 9.925045893080359e-06, "loss": 20.8382, "step": 25850 }, { "epoch": 0.4798919964648327, "grad_norm": 35.375, "learning_rate": 9.925016897294316e-06, "loss": 20.6228, "step": 25860 }, { "epoch": 0.4800775695493126, "grad_norm": 33.96875, "learning_rate": 9.924987901508276e-06, "loss": 20.3393, "step": 25870 }, { "epoch": 0.4802631426337924, "grad_norm": 35.09375, "learning_rate": 9.924958905722231e-06, "loss": 20.7853, "step": 25880 }, { "epoch": 0.48044871571827225, "grad_norm": 33.53125, "learning_rate": 9.92492990993619e-06, "loss": 21.0041, "step": 25890 }, { "epoch": 0.48063428880275205, "grad_norm": 38.21875, "learning_rate": 9.924900914150148e-06, "loss": 20.4117, "step": 25900 }, { "epoch": 0.48081986188723186, "grad_norm": 34.0625, "learning_rate": 9.924871918364105e-06, "loss": 20.6509, "step": 25910 }, { "epoch": 0.4810054349717117, "grad_norm": 36.0, "learning_rate": 9.924842922578063e-06, "loss": 20.895, "step": 25920 }, { "epoch": 0.4811910080561915, "grad_norm": 36.28125, "learning_rate": 9.92481392679202e-06, "loss": 20.8673, "step": 25930 }, { "epoch": 0.48137658114067133, "grad_norm": 34.25, "learning_rate": 9.924784931005977e-06, "loss": 20.5624, "step": 25940 }, { "epoch": 0.4815621542251512, "grad_norm": 35.125, "learning_rate": 9.924755935219935e-06, "loss": 21.1527, "step": 25950 }, { "epoch": 0.481747727309631, "grad_norm": 36.71875, "learning_rate": 9.924726939433892e-06, "loss": 20.7524, "step": 25960 }, { "epoch": 0.48193330039411086, "grad_norm": 33.65625, "learning_rate": 9.924697943647851e-06, "loss": 21.1041, "step": 25970 }, { "epoch": 0.48211887347859067, "grad_norm": 36.59375, "learning_rate": 9.924668947861809e-06, "loss": 20.6638, "step": 25980 }, { "epoch": 0.4823044465630705, "grad_norm": 33.59375, "learning_rate": 9.924639952075764e-06, "loss": 20.6987, "step": 25990 }, { "epoch": 0.48249001964755034, "grad_norm": 34.625, "learning_rate": 9.924610956289724e-06, "loss": 20.7549, "step": 26000 }, { "epoch": 0.48267559273203015, "grad_norm": 36.125, "learning_rate": 9.924581960503681e-06, "loss": 20.5746, "step": 26010 }, { "epoch": 0.48286116581650995, "grad_norm": 33.40625, "learning_rate": 9.924552964717638e-06, "loss": 20.6575, "step": 26020 }, { "epoch": 0.4830467389009898, "grad_norm": 34.0625, "learning_rate": 9.924523968931596e-06, "loss": 20.1084, "step": 26030 }, { "epoch": 0.4832323119854696, "grad_norm": 37.03125, "learning_rate": 9.924494973145553e-06, "loss": 20.3671, "step": 26040 }, { "epoch": 0.4834178850699495, "grad_norm": 34.125, "learning_rate": 9.92446597735951e-06, "loss": 20.4181, "step": 26050 }, { "epoch": 0.4836034581544293, "grad_norm": 35.875, "learning_rate": 9.924436981573468e-06, "loss": 20.5477, "step": 26060 }, { "epoch": 0.4837890312389091, "grad_norm": 34.90625, "learning_rate": 9.924407985787427e-06, "loss": 20.2599, "step": 26070 }, { "epoch": 0.48397460432338896, "grad_norm": 33.1875, "learning_rate": 9.924378990001385e-06, "loss": 20.6862, "step": 26080 }, { "epoch": 0.48416017740786876, "grad_norm": 35.1875, "learning_rate": 9.92434999421534e-06, "loss": 20.6373, "step": 26090 }, { "epoch": 0.48434575049234857, "grad_norm": 34.8125, "learning_rate": 9.9243209984293e-06, "loss": 20.8698, "step": 26100 }, { "epoch": 0.48453132357682843, "grad_norm": 35.5625, "learning_rate": 9.924292002643257e-06, "loss": 20.5171, "step": 26110 }, { "epoch": 0.48471689666130824, "grad_norm": 36.0625, "learning_rate": 9.924263006857214e-06, "loss": 20.0415, "step": 26120 }, { "epoch": 0.48490246974578805, "grad_norm": 34.46875, "learning_rate": 9.924234011071172e-06, "loss": 20.0809, "step": 26130 }, { "epoch": 0.4850880428302679, "grad_norm": 35.5625, "learning_rate": 9.92420501528513e-06, "loss": 20.8953, "step": 26140 }, { "epoch": 0.4852736159147477, "grad_norm": 33.59375, "learning_rate": 9.924176019499086e-06, "loss": 20.5997, "step": 26150 }, { "epoch": 0.4854591889992276, "grad_norm": 33.6875, "learning_rate": 9.924147023713044e-06, "loss": 20.538, "step": 26160 }, { "epoch": 0.4856447620837074, "grad_norm": 32.59375, "learning_rate": 9.924118027927003e-06, "loss": 20.5563, "step": 26170 }, { "epoch": 0.4858303351681872, "grad_norm": 35.28125, "learning_rate": 9.92408903214096e-06, "loss": 20.3517, "step": 26180 }, { "epoch": 0.48601590825266705, "grad_norm": 34.34375, "learning_rate": 9.924060036354918e-06, "loss": 20.4909, "step": 26190 }, { "epoch": 0.48620148133714686, "grad_norm": 36.0625, "learning_rate": 9.924031040568875e-06, "loss": 20.3212, "step": 26200 }, { "epoch": 0.48638705442162666, "grad_norm": 33.84375, "learning_rate": 9.924002044782832e-06, "loss": 20.7427, "step": 26210 }, { "epoch": 0.4865726275061065, "grad_norm": 35.1875, "learning_rate": 9.92397304899679e-06, "loss": 20.5698, "step": 26220 }, { "epoch": 0.48675820059058633, "grad_norm": 38.0625, "learning_rate": 9.923944053210747e-06, "loss": 20.6616, "step": 26230 }, { "epoch": 0.4869437736750662, "grad_norm": 33.6875, "learning_rate": 9.923915057424706e-06, "loss": 20.3389, "step": 26240 }, { "epoch": 0.487129346759546, "grad_norm": 35.59375, "learning_rate": 9.923886061638664e-06, "loss": 20.3957, "step": 26250 }, { "epoch": 0.4873149198440258, "grad_norm": 34.25, "learning_rate": 9.92385706585262e-06, "loss": 20.4382, "step": 26260 }, { "epoch": 0.48750049292850567, "grad_norm": 34.84375, "learning_rate": 9.923828070066579e-06, "loss": 20.4939, "step": 26270 }, { "epoch": 0.4876860660129855, "grad_norm": 34.875, "learning_rate": 9.923799074280536e-06, "loss": 20.4227, "step": 26280 }, { "epoch": 0.4878716390974653, "grad_norm": 34.875, "learning_rate": 9.923770078494493e-06, "loss": 20.8318, "step": 26290 }, { "epoch": 0.48805721218194514, "grad_norm": 33.09375, "learning_rate": 9.923741082708451e-06, "loss": 20.5788, "step": 26300 }, { "epoch": 0.48824278526642495, "grad_norm": 35.0625, "learning_rate": 9.923712086922408e-06, "loss": 20.7702, "step": 26310 }, { "epoch": 0.4884283583509048, "grad_norm": 35.40625, "learning_rate": 9.923683091136366e-06, "loss": 20.8507, "step": 26320 }, { "epoch": 0.4886139314353846, "grad_norm": 34.75, "learning_rate": 9.923654095350323e-06, "loss": 20.8756, "step": 26330 }, { "epoch": 0.4887995045198644, "grad_norm": 34.0625, "learning_rate": 9.923625099564282e-06, "loss": 20.7579, "step": 26340 }, { "epoch": 0.4889850776043443, "grad_norm": 34.25, "learning_rate": 9.92359610377824e-06, "loss": 20.6045, "step": 26350 }, { "epoch": 0.4891706506888241, "grad_norm": 35.3125, "learning_rate": 9.923567107992195e-06, "loss": 20.2022, "step": 26360 }, { "epoch": 0.4893562237733039, "grad_norm": 34.0625, "learning_rate": 9.923538112206154e-06, "loss": 20.2575, "step": 26370 }, { "epoch": 0.48954179685778376, "grad_norm": 35.0625, "learning_rate": 9.923509116420112e-06, "loss": 20.9015, "step": 26380 }, { "epoch": 0.48972736994226357, "grad_norm": 33.875, "learning_rate": 9.92348012063407e-06, "loss": 20.532, "step": 26390 }, { "epoch": 0.4899129430267434, "grad_norm": 33.65625, "learning_rate": 9.923451124848027e-06, "loss": 20.6322, "step": 26400 }, { "epoch": 0.49009851611122324, "grad_norm": 34.5625, "learning_rate": 9.923422129061984e-06, "loss": 20.2689, "step": 26410 }, { "epoch": 0.49028408919570304, "grad_norm": 33.9375, "learning_rate": 9.923393133275941e-06, "loss": 20.7293, "step": 26420 }, { "epoch": 0.4904696622801829, "grad_norm": 34.5, "learning_rate": 9.923364137489899e-06, "loss": 20.89, "step": 26430 }, { "epoch": 0.4906552353646627, "grad_norm": 33.5625, "learning_rate": 9.923335141703856e-06, "loss": 19.9809, "step": 26440 }, { "epoch": 0.4908408084491425, "grad_norm": 36.375, "learning_rate": 9.923306145917815e-06, "loss": 20.7523, "step": 26450 }, { "epoch": 0.4910263815336224, "grad_norm": 36.5, "learning_rate": 9.923277150131773e-06, "loss": 20.9323, "step": 26460 }, { "epoch": 0.4912119546181022, "grad_norm": 37.65625, "learning_rate": 9.923248154345728e-06, "loss": 20.9648, "step": 26470 }, { "epoch": 0.491397527702582, "grad_norm": 33.28125, "learning_rate": 9.923219158559688e-06, "loss": 20.694, "step": 26480 }, { "epoch": 0.49158310078706186, "grad_norm": 34.75, "learning_rate": 9.923190162773645e-06, "loss": 20.6665, "step": 26490 }, { "epoch": 0.49176867387154166, "grad_norm": 34.25, "learning_rate": 9.923161166987602e-06, "loss": 20.4606, "step": 26500 }, { "epoch": 0.4919542469560215, "grad_norm": 35.375, "learning_rate": 9.92313217120156e-06, "loss": 20.5022, "step": 26510 }, { "epoch": 0.49213982004050133, "grad_norm": 34.9375, "learning_rate": 9.923103175415517e-06, "loss": 20.5076, "step": 26520 }, { "epoch": 0.49232539312498114, "grad_norm": 33.3125, "learning_rate": 9.923074179629475e-06, "loss": 20.7874, "step": 26530 }, { "epoch": 0.492510966209461, "grad_norm": 35.4375, "learning_rate": 9.923045183843432e-06, "loss": 20.9914, "step": 26540 }, { "epoch": 0.4926965392939408, "grad_norm": 35.25, "learning_rate": 9.923016188057391e-06, "loss": 19.7607, "step": 26550 }, { "epoch": 0.4928821123784206, "grad_norm": 34.9375, "learning_rate": 9.922987192271349e-06, "loss": 21.0187, "step": 26560 }, { "epoch": 0.4930676854629005, "grad_norm": 35.96875, "learning_rate": 9.922958196485306e-06, "loss": 20.2126, "step": 26570 }, { "epoch": 0.4932532585473803, "grad_norm": 35.0, "learning_rate": 9.922929200699263e-06, "loss": 20.2824, "step": 26580 }, { "epoch": 0.49343883163186014, "grad_norm": 37.78125, "learning_rate": 9.92290020491322e-06, "loss": 21.1446, "step": 26590 }, { "epoch": 0.49362440471633995, "grad_norm": 35.9375, "learning_rate": 9.922871209127178e-06, "loss": 20.6024, "step": 26600 }, { "epoch": 0.49380997780081975, "grad_norm": 34.75, "learning_rate": 9.922842213341136e-06, "loss": 20.4418, "step": 26610 }, { "epoch": 0.4939955508852996, "grad_norm": 33.78125, "learning_rate": 9.922813217555095e-06, "loss": 20.5188, "step": 26620 }, { "epoch": 0.4941811239697794, "grad_norm": 34.21875, "learning_rate": 9.92278422176905e-06, "loss": 20.2447, "step": 26630 }, { "epoch": 0.49436669705425923, "grad_norm": 35.59375, "learning_rate": 9.922755225983008e-06, "loss": 20.735, "step": 26640 }, { "epoch": 0.4945522701387391, "grad_norm": 37.03125, "learning_rate": 9.922726230196967e-06, "loss": 21.0627, "step": 26650 }, { "epoch": 0.4947378432232189, "grad_norm": 35.0625, "learning_rate": 9.922697234410924e-06, "loss": 20.1211, "step": 26660 }, { "epoch": 0.4949234163076987, "grad_norm": 34.90625, "learning_rate": 9.922668238624882e-06, "loss": 20.5947, "step": 26670 }, { "epoch": 0.49510898939217857, "grad_norm": 34.3125, "learning_rate": 9.922639242838839e-06, "loss": 20.4556, "step": 26680 }, { "epoch": 0.4952945624766584, "grad_norm": 32.75, "learning_rate": 9.922610247052797e-06, "loss": 20.0388, "step": 26690 }, { "epoch": 0.49548013556113824, "grad_norm": 33.4375, "learning_rate": 9.922581251266754e-06, "loss": 20.3324, "step": 26700 }, { "epoch": 0.49566570864561804, "grad_norm": 35.09375, "learning_rate": 9.922552255480711e-06, "loss": 20.444, "step": 26710 }, { "epoch": 0.49585128173009785, "grad_norm": 35.15625, "learning_rate": 9.92252325969467e-06, "loss": 20.5939, "step": 26720 }, { "epoch": 0.4960368548145777, "grad_norm": 35.84375, "learning_rate": 9.922494263908628e-06, "loss": 20.3899, "step": 26730 }, { "epoch": 0.4962224278990575, "grad_norm": 34.8125, "learning_rate": 9.922465268122584e-06, "loss": 20.5074, "step": 26740 }, { "epoch": 0.4964080009835373, "grad_norm": 37.03125, "learning_rate": 9.922436272336543e-06, "loss": 20.4035, "step": 26750 }, { "epoch": 0.4965935740680172, "grad_norm": 34.5, "learning_rate": 9.9224072765505e-06, "loss": 21.0219, "step": 26760 }, { "epoch": 0.496779147152497, "grad_norm": 33.75, "learning_rate": 9.922378280764457e-06, "loss": 20.5443, "step": 26770 }, { "epoch": 0.49696472023697685, "grad_norm": 35.09375, "learning_rate": 9.922349284978415e-06, "loss": 20.6201, "step": 26780 }, { "epoch": 0.49715029332145666, "grad_norm": 33.40625, "learning_rate": 9.922320289192372e-06, "loss": 20.0897, "step": 26790 }, { "epoch": 0.49733586640593647, "grad_norm": 33.90625, "learning_rate": 9.92229129340633e-06, "loss": 20.9902, "step": 26800 }, { "epoch": 0.49752143949041633, "grad_norm": 34.3125, "learning_rate": 9.922262297620287e-06, "loss": 21.3532, "step": 26810 }, { "epoch": 0.49770701257489613, "grad_norm": 34.375, "learning_rate": 9.922233301834246e-06, "loss": 20.4501, "step": 26820 }, { "epoch": 0.49789258565937594, "grad_norm": 34.8125, "learning_rate": 9.922204306048204e-06, "loss": 20.4959, "step": 26830 }, { "epoch": 0.4980781587438558, "grad_norm": 35.75, "learning_rate": 9.922175310262161e-06, "loss": 20.5544, "step": 26840 }, { "epoch": 0.4982637318283356, "grad_norm": 33.0, "learning_rate": 9.922146314476118e-06, "loss": 20.6951, "step": 26850 }, { "epoch": 0.49844930491281547, "grad_norm": 34.625, "learning_rate": 9.922117318690076e-06, "loss": 20.583, "step": 26860 }, { "epoch": 0.4986348779972953, "grad_norm": 36.875, "learning_rate": 9.922088322904033e-06, "loss": 20.3859, "step": 26870 }, { "epoch": 0.4988204510817751, "grad_norm": 34.0, "learning_rate": 9.92205932711799e-06, "loss": 20.0682, "step": 26880 }, { "epoch": 0.49900602416625495, "grad_norm": 35.375, "learning_rate": 9.922030331331948e-06, "loss": 20.5781, "step": 26890 }, { "epoch": 0.49919159725073475, "grad_norm": 34.75, "learning_rate": 9.922001335545905e-06, "loss": 20.8437, "step": 26900 }, { "epoch": 0.49937717033521456, "grad_norm": 34.4375, "learning_rate": 9.921972339759863e-06, "loss": 20.4446, "step": 26910 }, { "epoch": 0.4995627434196944, "grad_norm": 34.71875, "learning_rate": 9.92194334397382e-06, "loss": 20.7822, "step": 26920 }, { "epoch": 0.49974831650417423, "grad_norm": 34.46875, "learning_rate": 9.92191434818778e-06, "loss": 20.6735, "step": 26930 }, { "epoch": 0.49993388958865403, "grad_norm": 34.21875, "learning_rate": 9.921885352401737e-06, "loss": 21.0007, "step": 26940 }, { "epoch": 0.5001194626731339, "grad_norm": 33.46875, "learning_rate": 9.921856356615694e-06, "loss": 20.2895, "step": 26950 }, { "epoch": 0.5003050357576138, "grad_norm": 36.625, "learning_rate": 9.921827360829652e-06, "loss": 20.3101, "step": 26960 }, { "epoch": 0.5004906088420935, "grad_norm": 36.53125, "learning_rate": 9.921798365043609e-06, "loss": 20.482, "step": 26970 }, { "epoch": 0.5006761819265734, "grad_norm": 33.84375, "learning_rate": 9.921769369257566e-06, "loss": 20.8419, "step": 26980 }, { "epoch": 0.5008617550110532, "grad_norm": 36.25, "learning_rate": 9.921740373471524e-06, "loss": 20.4644, "step": 26990 }, { "epoch": 0.501047328095533, "grad_norm": 34.625, "learning_rate": 9.921711377685483e-06, "loss": 20.5031, "step": 27000 }, { "epoch": 0.5012329011800128, "grad_norm": 34.65625, "learning_rate": 9.921682381899439e-06, "loss": 20.6603, "step": 27010 }, { "epoch": 0.5014184742644927, "grad_norm": 33.78125, "learning_rate": 9.921653386113396e-06, "loss": 20.7035, "step": 27020 }, { "epoch": 0.5016040473489725, "grad_norm": 34.65625, "learning_rate": 9.921624390327355e-06, "loss": 21.0303, "step": 27030 }, { "epoch": 0.5017896204334523, "grad_norm": 33.5, "learning_rate": 9.921595394541313e-06, "loss": 20.2431, "step": 27040 }, { "epoch": 0.5019751935179322, "grad_norm": 36.8125, "learning_rate": 9.92156639875527e-06, "loss": 20.9503, "step": 27050 }, { "epoch": 0.5021607666024119, "grad_norm": 36.25, "learning_rate": 9.921537402969227e-06, "loss": 20.5597, "step": 27060 }, { "epoch": 0.5023463396868918, "grad_norm": 35.53125, "learning_rate": 9.921508407183185e-06, "loss": 20.2874, "step": 27070 }, { "epoch": 0.5025319127713717, "grad_norm": 34.5625, "learning_rate": 9.921479411397142e-06, "loss": 20.2342, "step": 27080 }, { "epoch": 0.5027174858558515, "grad_norm": 35.625, "learning_rate": 9.9214504156111e-06, "loss": 20.2432, "step": 27090 }, { "epoch": 0.5029030589403313, "grad_norm": 36.71875, "learning_rate": 9.921421419825059e-06, "loss": 20.8367, "step": 27100 }, { "epoch": 0.5030886320248111, "grad_norm": 35.75, "learning_rate": 9.921392424039014e-06, "loss": 20.4609, "step": 27110 }, { "epoch": 0.503274205109291, "grad_norm": 35.5, "learning_rate": 9.921363428252972e-06, "loss": 20.4391, "step": 27120 }, { "epoch": 0.5034597781937707, "grad_norm": 34.0625, "learning_rate": 9.921334432466931e-06, "loss": 20.4591, "step": 27130 }, { "epoch": 0.5036453512782506, "grad_norm": 35.03125, "learning_rate": 9.921305436680888e-06, "loss": 20.8571, "step": 27140 }, { "epoch": 0.5038309243627305, "grad_norm": 38.15625, "learning_rate": 9.921276440894846e-06, "loss": 20.877, "step": 27150 }, { "epoch": 0.5040164974472102, "grad_norm": 35.125, "learning_rate": 9.921247445108803e-06, "loss": 20.7956, "step": 27160 }, { "epoch": 0.5042020705316901, "grad_norm": 35.46875, "learning_rate": 9.92121844932276e-06, "loss": 20.4186, "step": 27170 }, { "epoch": 0.50438764361617, "grad_norm": 35.25, "learning_rate": 9.921189453536718e-06, "loss": 20.5766, "step": 27180 }, { "epoch": 0.5045732167006497, "grad_norm": 35.40625, "learning_rate": 9.921160457750675e-06, "loss": 20.2852, "step": 27190 }, { "epoch": 0.5047587897851296, "grad_norm": 36.28125, "learning_rate": 9.921131461964634e-06, "loss": 20.5424, "step": 27200 }, { "epoch": 0.5049443628696094, "grad_norm": 35.4375, "learning_rate": 9.921102466178592e-06, "loss": 20.4194, "step": 27210 }, { "epoch": 0.5051299359540892, "grad_norm": 33.0625, "learning_rate": 9.921073470392548e-06, "loss": 20.407, "step": 27220 }, { "epoch": 0.505315509038569, "grad_norm": 35.65625, "learning_rate": 9.921044474606507e-06, "loss": 20.6512, "step": 27230 }, { "epoch": 0.5055010821230489, "grad_norm": 34.09375, "learning_rate": 9.921015478820464e-06, "loss": 20.7855, "step": 27240 }, { "epoch": 0.5056866552075286, "grad_norm": 36.6875, "learning_rate": 9.920986483034421e-06, "loss": 20.5273, "step": 27250 }, { "epoch": 0.5058722282920085, "grad_norm": 33.96875, "learning_rate": 9.920957487248379e-06, "loss": 20.1875, "step": 27260 }, { "epoch": 0.5060578013764884, "grad_norm": 32.96875, "learning_rate": 9.920928491462336e-06, "loss": 20.7281, "step": 27270 }, { "epoch": 0.5062433744609682, "grad_norm": 34.5, "learning_rate": 9.920899495676294e-06, "loss": 20.489, "step": 27280 }, { "epoch": 0.506428947545448, "grad_norm": 32.625, "learning_rate": 9.920870499890251e-06, "loss": 20.938, "step": 27290 }, { "epoch": 0.5066145206299278, "grad_norm": 34.46875, "learning_rate": 9.92084150410421e-06, "loss": 20.6521, "step": 27300 }, { "epoch": 0.5068000937144077, "grad_norm": 34.1875, "learning_rate": 9.920812508318168e-06, "loss": 20.7266, "step": 27310 }, { "epoch": 0.5069856667988875, "grad_norm": 35.125, "learning_rate": 9.920783512532125e-06, "loss": 20.4679, "step": 27320 }, { "epoch": 0.5071712398833673, "grad_norm": 34.25, "learning_rate": 9.920754516746082e-06, "loss": 20.5604, "step": 27330 }, { "epoch": 0.5073568129678472, "grad_norm": 35.90625, "learning_rate": 9.92072552096004e-06, "loss": 20.9174, "step": 27340 }, { "epoch": 0.5075423860523269, "grad_norm": 34.90625, "learning_rate": 9.920696525173997e-06, "loss": 20.5337, "step": 27350 }, { "epoch": 0.5077279591368068, "grad_norm": 35.375, "learning_rate": 9.920667529387955e-06, "loss": 20.4249, "step": 27360 }, { "epoch": 0.5079135322212867, "grad_norm": 34.125, "learning_rate": 9.920638533601912e-06, "loss": 19.9978, "step": 27370 }, { "epoch": 0.5080991053057664, "grad_norm": 32.4375, "learning_rate": 9.92060953781587e-06, "loss": 20.4063, "step": 27380 }, { "epoch": 0.5082846783902463, "grad_norm": 36.59375, "learning_rate": 9.920580542029827e-06, "loss": 20.2915, "step": 27390 }, { "epoch": 0.5084702514747261, "grad_norm": 34.90625, "learning_rate": 9.920551546243786e-06, "loss": 20.5562, "step": 27400 }, { "epoch": 0.5086558245592059, "grad_norm": 36.875, "learning_rate": 9.920522550457743e-06, "loss": 20.6528, "step": 27410 }, { "epoch": 0.5088413976436857, "grad_norm": 34.09375, "learning_rate": 9.9204935546717e-06, "loss": 20.4691, "step": 27420 }, { "epoch": 0.5090269707281656, "grad_norm": 35.09375, "learning_rate": 9.920464558885658e-06, "loss": 20.3996, "step": 27430 }, { "epoch": 0.5092125438126455, "grad_norm": 34.8125, "learning_rate": 9.920435563099616e-06, "loss": 20.4117, "step": 27440 }, { "epoch": 0.5093981168971252, "grad_norm": 36.125, "learning_rate": 9.920406567313573e-06, "loss": 20.6251, "step": 27450 }, { "epoch": 0.5095836899816051, "grad_norm": 35.84375, "learning_rate": 9.92037757152753e-06, "loss": 20.3157, "step": 27460 }, { "epoch": 0.5097692630660849, "grad_norm": 33.46875, "learning_rate": 9.920348575741488e-06, "loss": 20.4708, "step": 27470 }, { "epoch": 0.5099548361505647, "grad_norm": 34.375, "learning_rate": 9.920319579955447e-06, "loss": 20.7527, "step": 27480 }, { "epoch": 0.5101404092350446, "grad_norm": 34.25, "learning_rate": 9.920290584169403e-06, "loss": 20.7704, "step": 27490 }, { "epoch": 0.5103259823195244, "grad_norm": 35.90625, "learning_rate": 9.92026158838336e-06, "loss": 20.2382, "step": 27500 }, { "epoch": 0.5105115554040042, "grad_norm": 36.3125, "learning_rate": 9.920232592597319e-06, "loss": 20.698, "step": 27510 }, { "epoch": 0.510697128488484, "grad_norm": 33.9375, "learning_rate": 9.920203596811277e-06, "loss": 20.4556, "step": 27520 }, { "epoch": 0.5108827015729639, "grad_norm": 34.5625, "learning_rate": 9.920174601025234e-06, "loss": 20.5015, "step": 27530 }, { "epoch": 0.5110682746574436, "grad_norm": 36.5, "learning_rate": 9.920145605239191e-06, "loss": 20.179, "step": 27540 }, { "epoch": 0.5112538477419235, "grad_norm": 33.75, "learning_rate": 9.920116609453149e-06, "loss": 20.2646, "step": 27550 }, { "epoch": 0.5114394208264034, "grad_norm": 34.75, "learning_rate": 9.920087613667106e-06, "loss": 20.6251, "step": 27560 }, { "epoch": 0.5116249939108831, "grad_norm": 34.34375, "learning_rate": 9.920058617881064e-06, "loss": 19.9915, "step": 27570 }, { "epoch": 0.511810566995363, "grad_norm": 34.78125, "learning_rate": 9.920029622095023e-06, "loss": 20.6833, "step": 27580 }, { "epoch": 0.5119961400798428, "grad_norm": 32.09375, "learning_rate": 9.92000062630898e-06, "loss": 20.4652, "step": 27590 }, { "epoch": 0.5121817131643226, "grad_norm": 36.53125, "learning_rate": 9.919971630522936e-06, "loss": 20.854, "step": 27600 }, { "epoch": 0.5123672862488025, "grad_norm": 34.0, "learning_rate": 9.919942634736895e-06, "loss": 20.5599, "step": 27610 }, { "epoch": 0.5125528593332823, "grad_norm": 33.34375, "learning_rate": 9.919913638950852e-06, "loss": 20.8087, "step": 27620 }, { "epoch": 0.5127384324177622, "grad_norm": 34.59375, "learning_rate": 9.91988464316481e-06, "loss": 20.2152, "step": 27630 }, { "epoch": 0.5129240055022419, "grad_norm": 35.34375, "learning_rate": 9.919855647378767e-06, "loss": 20.4706, "step": 27640 }, { "epoch": 0.5131095785867218, "grad_norm": 33.71875, "learning_rate": 9.919826651592725e-06, "loss": 20.428, "step": 27650 }, { "epoch": 0.5132951516712017, "grad_norm": 34.25, "learning_rate": 9.919797655806682e-06, "loss": 20.24, "step": 27660 }, { "epoch": 0.5134807247556814, "grad_norm": 35.21875, "learning_rate": 9.91976866002064e-06, "loss": 20.7069, "step": 27670 }, { "epoch": 0.5136662978401613, "grad_norm": 36.5, "learning_rate": 9.919739664234598e-06, "loss": 20.4921, "step": 27680 }, { "epoch": 0.5138518709246411, "grad_norm": 34.21875, "learning_rate": 9.919710668448556e-06, "loss": 20.3458, "step": 27690 }, { "epoch": 0.5140374440091209, "grad_norm": 34.0, "learning_rate": 9.919681672662512e-06, "loss": 20.4302, "step": 27700 }, { "epoch": 0.5142230170936007, "grad_norm": 33.96875, "learning_rate": 9.91965267687647e-06, "loss": 20.6798, "step": 27710 }, { "epoch": 0.5144085901780806, "grad_norm": 36.96875, "learning_rate": 9.919623681090428e-06, "loss": 20.571, "step": 27720 }, { "epoch": 0.5145941632625604, "grad_norm": 34.59375, "learning_rate": 9.919594685304385e-06, "loss": 19.6917, "step": 27730 }, { "epoch": 0.5147797363470402, "grad_norm": 34.40625, "learning_rate": 9.919565689518343e-06, "loss": 20.6663, "step": 27740 }, { "epoch": 0.5149653094315201, "grad_norm": 35.71875, "learning_rate": 9.919536693732302e-06, "loss": 20.3554, "step": 27750 }, { "epoch": 0.5151508825159998, "grad_norm": 34.6875, "learning_rate": 9.919507697946258e-06, "loss": 20.623, "step": 27760 }, { "epoch": 0.5153364556004797, "grad_norm": 34.0625, "learning_rate": 9.919478702160215e-06, "loss": 20.3847, "step": 27770 }, { "epoch": 0.5155220286849596, "grad_norm": 38.625, "learning_rate": 9.919449706374174e-06, "loss": 20.4449, "step": 27780 }, { "epoch": 0.5157076017694393, "grad_norm": 34.03125, "learning_rate": 9.919420710588132e-06, "loss": 20.3852, "step": 27790 }, { "epoch": 0.5158931748539192, "grad_norm": 36.0625, "learning_rate": 9.919391714802089e-06, "loss": 20.4759, "step": 27800 }, { "epoch": 0.516078747938399, "grad_norm": 34.4375, "learning_rate": 9.919362719016046e-06, "loss": 20.3306, "step": 27810 }, { "epoch": 0.5162643210228789, "grad_norm": 32.90625, "learning_rate": 9.919333723230004e-06, "loss": 20.121, "step": 27820 }, { "epoch": 0.5164498941073586, "grad_norm": 35.15625, "learning_rate": 9.919304727443961e-06, "loss": 20.4499, "step": 27830 }, { "epoch": 0.5166354671918385, "grad_norm": 37.40625, "learning_rate": 9.919275731657919e-06, "loss": 20.4448, "step": 27840 }, { "epoch": 0.5168210402763184, "grad_norm": 34.96875, "learning_rate": 9.919246735871878e-06, "loss": 20.3247, "step": 27850 }, { "epoch": 0.5170066133607981, "grad_norm": 36.6875, "learning_rate": 9.919217740085833e-06, "loss": 20.2849, "step": 27860 }, { "epoch": 0.517192186445278, "grad_norm": 34.21875, "learning_rate": 9.919188744299791e-06, "loss": 20.6523, "step": 27870 }, { "epoch": 0.5173777595297578, "grad_norm": 34.59375, "learning_rate": 9.91915974851375e-06, "loss": 20.3804, "step": 27880 }, { "epoch": 0.5175633326142376, "grad_norm": 35.15625, "learning_rate": 9.919130752727707e-06, "loss": 20.2975, "step": 27890 }, { "epoch": 0.5177489056987175, "grad_norm": 35.84375, "learning_rate": 9.919101756941665e-06, "loss": 20.4741, "step": 27900 }, { "epoch": 0.5179344787831973, "grad_norm": 35.3125, "learning_rate": 9.919072761155622e-06, "loss": 20.7598, "step": 27910 }, { "epoch": 0.5181200518676771, "grad_norm": 34.9375, "learning_rate": 9.91904376536958e-06, "loss": 19.9323, "step": 27920 }, { "epoch": 0.5183056249521569, "grad_norm": 35.09375, "learning_rate": 9.919014769583537e-06, "loss": 20.2018, "step": 27930 }, { "epoch": 0.5184911980366368, "grad_norm": 38.0, "learning_rate": 9.918985773797494e-06, "loss": 20.4047, "step": 27940 }, { "epoch": 0.5186767711211165, "grad_norm": 34.34375, "learning_rate": 9.918956778011452e-06, "loss": 20.4592, "step": 27950 }, { "epoch": 0.5188623442055964, "grad_norm": 34.15625, "learning_rate": 9.918927782225411e-06, "loss": 20.2424, "step": 27960 }, { "epoch": 0.5190479172900763, "grad_norm": 36.0625, "learning_rate": 9.918898786439367e-06, "loss": 20.5012, "step": 27970 }, { "epoch": 0.5192334903745561, "grad_norm": 34.53125, "learning_rate": 9.918869790653324e-06, "loss": 20.6693, "step": 27980 }, { "epoch": 0.5194190634590359, "grad_norm": 35.78125, "learning_rate": 9.918840794867283e-06, "loss": 20.6029, "step": 27990 }, { "epoch": 0.5196046365435157, "grad_norm": 33.6875, "learning_rate": 9.91881179908124e-06, "loss": 20.3617, "step": 28000 }, { "epoch": 0.5197902096279956, "grad_norm": 32.34375, "learning_rate": 9.918782803295198e-06, "loss": 20.85, "step": 28010 }, { "epoch": 0.5199757827124754, "grad_norm": 36.375, "learning_rate": 9.918753807509155e-06, "loss": 20.1941, "step": 28020 }, { "epoch": 0.5201613557969552, "grad_norm": 35.78125, "learning_rate": 9.918724811723113e-06, "loss": 20.3447, "step": 28030 }, { "epoch": 0.5203469288814351, "grad_norm": 35.0625, "learning_rate": 9.91869581593707e-06, "loss": 20.8577, "step": 28040 }, { "epoch": 0.5205325019659148, "grad_norm": 36.0625, "learning_rate": 9.918666820151028e-06, "loss": 20.098, "step": 28050 }, { "epoch": 0.5207180750503947, "grad_norm": 34.90625, "learning_rate": 9.918637824364987e-06, "loss": 20.6016, "step": 28060 }, { "epoch": 0.5209036481348746, "grad_norm": 32.3125, "learning_rate": 9.918608828578944e-06, "loss": 20.3424, "step": 28070 }, { "epoch": 0.5210892212193543, "grad_norm": 36.46875, "learning_rate": 9.9185798327929e-06, "loss": 20.371, "step": 28080 }, { "epoch": 0.5212747943038342, "grad_norm": 35.96875, "learning_rate": 9.918550837006859e-06, "loss": 20.4307, "step": 28090 }, { "epoch": 0.521460367388314, "grad_norm": 35.34375, "learning_rate": 9.918521841220816e-06, "loss": 20.6163, "step": 28100 }, { "epoch": 0.5216459404727938, "grad_norm": 34.25, "learning_rate": 9.918492845434774e-06, "loss": 20.8442, "step": 28110 }, { "epoch": 0.5218315135572736, "grad_norm": 34.5, "learning_rate": 9.918463849648731e-06, "loss": 20.2093, "step": 28120 }, { "epoch": 0.5220170866417535, "grad_norm": 34.8125, "learning_rate": 9.918434853862689e-06, "loss": 20.2828, "step": 28130 }, { "epoch": 0.5222026597262333, "grad_norm": 37.0625, "learning_rate": 9.918405858076646e-06, "loss": 19.9799, "step": 28140 }, { "epoch": 0.5223882328107131, "grad_norm": 34.5, "learning_rate": 9.918376862290603e-06, "loss": 20.411, "step": 28150 }, { "epoch": 0.522573805895193, "grad_norm": 34.6875, "learning_rate": 9.918347866504562e-06, "loss": 20.6861, "step": 28160 }, { "epoch": 0.5227593789796728, "grad_norm": 36.09375, "learning_rate": 9.91831887071852e-06, "loss": 20.4573, "step": 28170 }, { "epoch": 0.5229449520641526, "grad_norm": 33.90625, "learning_rate": 9.918289874932477e-06, "loss": 20.773, "step": 28180 }, { "epoch": 0.5231305251486325, "grad_norm": 36.25, "learning_rate": 9.918260879146435e-06, "loss": 20.5433, "step": 28190 }, { "epoch": 0.5233160982331123, "grad_norm": 34.5625, "learning_rate": 9.918231883360392e-06, "loss": 20.2934, "step": 28200 }, { "epoch": 0.5235016713175921, "grad_norm": 38.0625, "learning_rate": 9.91820288757435e-06, "loss": 20.8898, "step": 28210 }, { "epoch": 0.5236872444020719, "grad_norm": 33.90625, "learning_rate": 9.918173891788307e-06, "loss": 20.4065, "step": 28220 }, { "epoch": 0.5238728174865518, "grad_norm": 35.375, "learning_rate": 9.918144896002266e-06, "loss": 20.3046, "step": 28230 }, { "epoch": 0.5240583905710315, "grad_norm": 33.71875, "learning_rate": 9.918115900216222e-06, "loss": 19.9315, "step": 28240 }, { "epoch": 0.5242439636555114, "grad_norm": 35.71875, "learning_rate": 9.918086904430179e-06, "loss": 20.2579, "step": 28250 }, { "epoch": 0.5244295367399913, "grad_norm": 35.09375, "learning_rate": 9.918057908644138e-06, "loss": 20.2974, "step": 28260 }, { "epoch": 0.524615109824471, "grad_norm": 34.5, "learning_rate": 9.918028912858096e-06, "loss": 20.3451, "step": 28270 }, { "epoch": 0.5248006829089509, "grad_norm": 32.625, "learning_rate": 9.917999917072053e-06, "loss": 20.5492, "step": 28280 }, { "epoch": 0.5249862559934307, "grad_norm": 35.34375, "learning_rate": 9.91797092128601e-06, "loss": 20.4729, "step": 28290 }, { "epoch": 0.5251718290779105, "grad_norm": 34.84375, "learning_rate": 9.917941925499968e-06, "loss": 20.4557, "step": 28300 }, { "epoch": 0.5253574021623904, "grad_norm": 34.4375, "learning_rate": 9.917912929713925e-06, "loss": 20.4742, "step": 28310 }, { "epoch": 0.5255429752468702, "grad_norm": 36.59375, "learning_rate": 9.917883933927883e-06, "loss": 20.2819, "step": 28320 }, { "epoch": 0.5257285483313501, "grad_norm": 31.46875, "learning_rate": 9.917854938141842e-06, "loss": 20.7438, "step": 28330 }, { "epoch": 0.5259141214158298, "grad_norm": 34.6875, "learning_rate": 9.9178259423558e-06, "loss": 20.2566, "step": 28340 }, { "epoch": 0.5260996945003097, "grad_norm": 34.375, "learning_rate": 9.917796946569755e-06, "loss": 20.168, "step": 28350 }, { "epoch": 0.5262852675847896, "grad_norm": 35.46875, "learning_rate": 9.917767950783714e-06, "loss": 19.9336, "step": 28360 }, { "epoch": 0.5264708406692693, "grad_norm": 35.15625, "learning_rate": 9.917738954997671e-06, "loss": 20.2843, "step": 28370 }, { "epoch": 0.5266564137537492, "grad_norm": 35.53125, "learning_rate": 9.917709959211629e-06, "loss": 20.5998, "step": 28380 }, { "epoch": 0.526841986838229, "grad_norm": 34.71875, "learning_rate": 9.917680963425586e-06, "loss": 20.7026, "step": 28390 }, { "epoch": 0.5270275599227088, "grad_norm": 36.125, "learning_rate": 9.917651967639544e-06, "loss": 20.4475, "step": 28400 }, { "epoch": 0.5272131330071886, "grad_norm": 34.5, "learning_rate": 9.917622971853501e-06, "loss": 20.56, "step": 28410 }, { "epoch": 0.5273987060916685, "grad_norm": 35.09375, "learning_rate": 9.917593976067458e-06, "loss": 20.4937, "step": 28420 }, { "epoch": 0.5275842791761483, "grad_norm": 33.53125, "learning_rate": 9.917564980281416e-06, "loss": 20.5536, "step": 28430 }, { "epoch": 0.5277698522606281, "grad_norm": 34.25, "learning_rate": 9.917535984495375e-06, "loss": 19.9744, "step": 28440 }, { "epoch": 0.527955425345108, "grad_norm": 33.25, "learning_rate": 9.91750698870933e-06, "loss": 20.3831, "step": 28450 }, { "epoch": 0.5281409984295877, "grad_norm": 35.6875, "learning_rate": 9.91747799292329e-06, "loss": 20.5233, "step": 28460 }, { "epoch": 0.5283265715140676, "grad_norm": 35.4375, "learning_rate": 9.917448997137247e-06, "loss": 20.7939, "step": 28470 }, { "epoch": 0.5285121445985475, "grad_norm": 34.65625, "learning_rate": 9.917420001351205e-06, "loss": 20.3844, "step": 28480 }, { "epoch": 0.5286977176830272, "grad_norm": 34.90625, "learning_rate": 9.917391005565162e-06, "loss": 20.6473, "step": 28490 }, { "epoch": 0.5288832907675071, "grad_norm": 34.65625, "learning_rate": 9.91736200977912e-06, "loss": 20.1855, "step": 28500 }, { "epoch": 0.5290688638519869, "grad_norm": 35.6875, "learning_rate": 9.917333013993077e-06, "loss": 20.6092, "step": 28510 }, { "epoch": 0.5292544369364668, "grad_norm": 34.4375, "learning_rate": 9.917304018207034e-06, "loss": 20.351, "step": 28520 }, { "epoch": 0.5294400100209465, "grad_norm": 33.59375, "learning_rate": 9.917275022420992e-06, "loss": 19.9355, "step": 28530 }, { "epoch": 0.5296255831054264, "grad_norm": 35.84375, "learning_rate": 9.91724602663495e-06, "loss": 20.3667, "step": 28540 }, { "epoch": 0.5298111561899063, "grad_norm": 33.34375, "learning_rate": 9.917217030848908e-06, "loss": 20.1995, "step": 28550 }, { "epoch": 0.529996729274386, "grad_norm": 35.65625, "learning_rate": 9.917188035062864e-06, "loss": 20.2276, "step": 28560 }, { "epoch": 0.5301823023588659, "grad_norm": 35.75, "learning_rate": 9.917159039276823e-06, "loss": 20.5546, "step": 28570 }, { "epoch": 0.5303678754433457, "grad_norm": 36.9375, "learning_rate": 9.91713004349078e-06, "loss": 20.7107, "step": 28580 }, { "epoch": 0.5305534485278255, "grad_norm": 35.875, "learning_rate": 9.917101047704738e-06, "loss": 20.6607, "step": 28590 }, { "epoch": 0.5307390216123054, "grad_norm": 33.90625, "learning_rate": 9.917072051918695e-06, "loss": 20.3141, "step": 28600 }, { "epoch": 0.5309245946967852, "grad_norm": 37.34375, "learning_rate": 9.917043056132653e-06, "loss": 20.4263, "step": 28610 }, { "epoch": 0.531110167781265, "grad_norm": 34.15625, "learning_rate": 9.91701406034661e-06, "loss": 20.6688, "step": 28620 }, { "epoch": 0.5312957408657448, "grad_norm": 33.65625, "learning_rate": 9.916985064560567e-06, "loss": 20.6133, "step": 28630 }, { "epoch": 0.5314813139502247, "grad_norm": 33.625, "learning_rate": 9.916956068774526e-06, "loss": 20.1227, "step": 28640 }, { "epoch": 0.5316668870347044, "grad_norm": 36.28125, "learning_rate": 9.916927072988484e-06, "loss": 20.2326, "step": 28650 }, { "epoch": 0.5318524601191843, "grad_norm": 35.8125, "learning_rate": 9.916898077202441e-06, "loss": 20.3055, "step": 28660 }, { "epoch": 0.5320380332036642, "grad_norm": 34.21875, "learning_rate": 9.916869081416399e-06, "loss": 20.5771, "step": 28670 }, { "epoch": 0.5322236062881439, "grad_norm": 33.96875, "learning_rate": 9.916840085630356e-06, "loss": 20.036, "step": 28680 }, { "epoch": 0.5324091793726238, "grad_norm": 34.0, "learning_rate": 9.916811089844314e-06, "loss": 20.4672, "step": 28690 }, { "epoch": 0.5325947524571036, "grad_norm": 35.90625, "learning_rate": 9.916782094058271e-06, "loss": 20.2936, "step": 28700 }, { "epoch": 0.5327803255415835, "grad_norm": 34.4375, "learning_rate": 9.91675309827223e-06, "loss": 20.5766, "step": 28710 }, { "epoch": 0.5329658986260633, "grad_norm": 32.40625, "learning_rate": 9.916724102486186e-06, "loss": 20.6091, "step": 28720 }, { "epoch": 0.5331514717105431, "grad_norm": 34.5, "learning_rate": 9.916695106700143e-06, "loss": 20.1864, "step": 28730 }, { "epoch": 0.533337044795023, "grad_norm": 37.6875, "learning_rate": 9.916666110914102e-06, "loss": 20.4298, "step": 28740 }, { "epoch": 0.5335226178795027, "grad_norm": 34.9375, "learning_rate": 9.91663711512806e-06, "loss": 20.1732, "step": 28750 }, { "epoch": 0.5337081909639826, "grad_norm": 35.5, "learning_rate": 9.916608119342017e-06, "loss": 20.4916, "step": 28760 }, { "epoch": 0.5338937640484624, "grad_norm": 35.125, "learning_rate": 9.916579123555974e-06, "loss": 20.4197, "step": 28770 }, { "epoch": 0.5340793371329422, "grad_norm": 37.21875, "learning_rate": 9.916550127769932e-06, "loss": 20.1008, "step": 28780 }, { "epoch": 0.5342649102174221, "grad_norm": 35.96875, "learning_rate": 9.91652113198389e-06, "loss": 20.4591, "step": 28790 }, { "epoch": 0.5344504833019019, "grad_norm": 35.03125, "learning_rate": 9.916492136197847e-06, "loss": 20.3487, "step": 28800 }, { "epoch": 0.5346360563863817, "grad_norm": 33.84375, "learning_rate": 9.916463140411806e-06, "loss": 20.0548, "step": 28810 }, { "epoch": 0.5348216294708615, "grad_norm": 34.375, "learning_rate": 9.916434144625763e-06, "loss": 20.0794, "step": 28820 }, { "epoch": 0.5350072025553414, "grad_norm": 36.84375, "learning_rate": 9.916405148839719e-06, "loss": 20.7802, "step": 28830 }, { "epoch": 0.5351927756398211, "grad_norm": 35.03125, "learning_rate": 9.916376153053678e-06, "loss": 20.5046, "step": 28840 }, { "epoch": 0.535378348724301, "grad_norm": 36.15625, "learning_rate": 9.916347157267635e-06, "loss": 20.5881, "step": 28850 }, { "epoch": 0.5355639218087809, "grad_norm": 36.375, "learning_rate": 9.916318161481593e-06, "loss": 20.5849, "step": 28860 }, { "epoch": 0.5357494948932607, "grad_norm": 35.71875, "learning_rate": 9.91628916569555e-06, "loss": 20.5164, "step": 28870 }, { "epoch": 0.5359350679777405, "grad_norm": 36.28125, "learning_rate": 9.916260169909508e-06, "loss": 20.0847, "step": 28880 }, { "epoch": 0.5361206410622203, "grad_norm": 34.21875, "learning_rate": 9.916231174123465e-06, "loss": 19.9122, "step": 28890 }, { "epoch": 0.5363062141467002, "grad_norm": 34.0, "learning_rate": 9.916202178337422e-06, "loss": 20.5583, "step": 28900 }, { "epoch": 0.53649178723118, "grad_norm": 34.1875, "learning_rate": 9.916173182551382e-06, "loss": 20.2385, "step": 28910 }, { "epoch": 0.5366773603156598, "grad_norm": 33.5, "learning_rate": 9.916144186765339e-06, "loss": 20.233, "step": 28920 }, { "epoch": 0.5368629334001397, "grad_norm": 35.6875, "learning_rate": 9.916115190979296e-06, "loss": 20.5665, "step": 28930 }, { "epoch": 0.5370485064846194, "grad_norm": 34.53125, "learning_rate": 9.916086195193254e-06, "loss": 20.1071, "step": 28940 }, { "epoch": 0.5372340795690993, "grad_norm": 35.34375, "learning_rate": 9.916057199407211e-06, "loss": 20.5049, "step": 28950 }, { "epoch": 0.5374196526535792, "grad_norm": 34.96875, "learning_rate": 9.916028203621169e-06, "loss": 20.1108, "step": 28960 }, { "epoch": 0.5376052257380589, "grad_norm": 37.625, "learning_rate": 9.915999207835126e-06, "loss": 20.7692, "step": 28970 }, { "epoch": 0.5377907988225388, "grad_norm": 37.0625, "learning_rate": 9.915970212049083e-06, "loss": 20.4374, "step": 28980 }, { "epoch": 0.5379763719070186, "grad_norm": 34.28125, "learning_rate": 9.91594121626304e-06, "loss": 20.5759, "step": 28990 }, { "epoch": 0.5381619449914984, "grad_norm": 34.6875, "learning_rate": 9.915912220476998e-06, "loss": 20.2427, "step": 29000 }, { "epoch": 0.5383475180759782, "grad_norm": 36.25, "learning_rate": 9.915883224690956e-06, "loss": 20.5409, "step": 29010 }, { "epoch": 0.5385330911604581, "grad_norm": 35.25, "learning_rate": 9.915854228904915e-06, "loss": 20.6585, "step": 29020 }, { "epoch": 0.5387186642449379, "grad_norm": 35.0, "learning_rate": 9.915825233118872e-06, "loss": 20.4239, "step": 29030 }, { "epoch": 0.5389042373294177, "grad_norm": 35.5, "learning_rate": 9.91579623733283e-06, "loss": 20.3294, "step": 29040 }, { "epoch": 0.5390898104138976, "grad_norm": 32.21875, "learning_rate": 9.915767241546787e-06, "loss": 20.3876, "step": 29050 }, { "epoch": 0.5392753834983774, "grad_norm": 34.3125, "learning_rate": 9.915738245760744e-06, "loss": 20.8428, "step": 29060 }, { "epoch": 0.5394609565828572, "grad_norm": 35.3125, "learning_rate": 9.915709249974702e-06, "loss": 20.0861, "step": 29070 }, { "epoch": 0.5396465296673371, "grad_norm": 35.9375, "learning_rate": 9.91568025418866e-06, "loss": 20.6351, "step": 29080 }, { "epoch": 0.5398321027518169, "grad_norm": 34.75, "learning_rate": 9.915651258402618e-06, "loss": 20.3223, "step": 29090 }, { "epoch": 0.5400176758362967, "grad_norm": 34.90625, "learning_rate": 9.915622262616574e-06, "loss": 20.0895, "step": 29100 }, { "epoch": 0.5402032489207765, "grad_norm": 33.9375, "learning_rate": 9.915593266830531e-06, "loss": 20.2776, "step": 29110 }, { "epoch": 0.5403888220052564, "grad_norm": 35.75, "learning_rate": 9.91556427104449e-06, "loss": 20.3674, "step": 29120 }, { "epoch": 0.5405743950897361, "grad_norm": 33.75, "learning_rate": 9.915535275258448e-06, "loss": 20.5777, "step": 29130 }, { "epoch": 0.540759968174216, "grad_norm": 34.84375, "learning_rate": 9.915506279472405e-06, "loss": 20.3793, "step": 29140 }, { "epoch": 0.5409455412586959, "grad_norm": 35.78125, "learning_rate": 9.915477283686363e-06, "loss": 20.5047, "step": 29150 }, { "epoch": 0.5411311143431756, "grad_norm": 35.46875, "learning_rate": 9.91544828790032e-06, "loss": 20.7663, "step": 29160 }, { "epoch": 0.5413166874276555, "grad_norm": 35.84375, "learning_rate": 9.915419292114278e-06, "loss": 20.3021, "step": 29170 }, { "epoch": 0.5415022605121353, "grad_norm": 35.21875, "learning_rate": 9.915390296328235e-06, "loss": 20.4529, "step": 29180 }, { "epoch": 0.5416878335966151, "grad_norm": 34.8125, "learning_rate": 9.915361300542194e-06, "loss": 20.3839, "step": 29190 }, { "epoch": 0.541873406681095, "grad_norm": 33.0625, "learning_rate": 9.91533230475615e-06, "loss": 20.3712, "step": 29200 }, { "epoch": 0.5420589797655748, "grad_norm": 34.1875, "learning_rate": 9.915303308970107e-06, "loss": 20.1465, "step": 29210 }, { "epoch": 0.5422445528500546, "grad_norm": 36.90625, "learning_rate": 9.915274313184066e-06, "loss": 20.0052, "step": 29220 }, { "epoch": 0.5424301259345344, "grad_norm": 36.6875, "learning_rate": 9.915245317398024e-06, "loss": 20.3256, "step": 29230 }, { "epoch": 0.5426156990190143, "grad_norm": 36.125, "learning_rate": 9.915216321611981e-06, "loss": 20.0692, "step": 29240 }, { "epoch": 0.5428012721034942, "grad_norm": 34.46875, "learning_rate": 9.915187325825938e-06, "loss": 20.5439, "step": 29250 }, { "epoch": 0.5429868451879739, "grad_norm": 34.78125, "learning_rate": 9.915158330039896e-06, "loss": 20.3697, "step": 29260 }, { "epoch": 0.5431724182724538, "grad_norm": 34.0625, "learning_rate": 9.915129334253853e-06, "loss": 20.0717, "step": 29270 }, { "epoch": 0.5433579913569336, "grad_norm": 33.84375, "learning_rate": 9.91510033846781e-06, "loss": 20.1454, "step": 29280 }, { "epoch": 0.5435435644414134, "grad_norm": 33.78125, "learning_rate": 9.91507134268177e-06, "loss": 20.4484, "step": 29290 }, { "epoch": 0.5437291375258932, "grad_norm": 34.0625, "learning_rate": 9.915042346895727e-06, "loss": 20.3169, "step": 29300 }, { "epoch": 0.5439147106103731, "grad_norm": 33.25, "learning_rate": 9.915013351109683e-06, "loss": 20.0393, "step": 29310 }, { "epoch": 0.5441002836948529, "grad_norm": 35.625, "learning_rate": 9.914984355323642e-06, "loss": 20.2945, "step": 29320 }, { "epoch": 0.5442858567793327, "grad_norm": 33.96875, "learning_rate": 9.9149553595376e-06, "loss": 20.3528, "step": 29330 }, { "epoch": 0.5444714298638126, "grad_norm": 35.03125, "learning_rate": 9.914926363751557e-06, "loss": 20.6093, "step": 29340 }, { "epoch": 0.5446570029482923, "grad_norm": 34.5, "learning_rate": 9.914897367965514e-06, "loss": 20.2775, "step": 29350 }, { "epoch": 0.5448425760327722, "grad_norm": 35.5, "learning_rate": 9.914868372179473e-06, "loss": 21.0042, "step": 29360 }, { "epoch": 0.5450281491172521, "grad_norm": 35.65625, "learning_rate": 9.914839376393429e-06, "loss": 20.6964, "step": 29370 }, { "epoch": 0.5452137222017318, "grad_norm": 35.8125, "learning_rate": 9.914810380607386e-06, "loss": 20.6275, "step": 29380 }, { "epoch": 0.5453992952862117, "grad_norm": 35.09375, "learning_rate": 9.914781384821346e-06, "loss": 19.9444, "step": 29390 }, { "epoch": 0.5455848683706915, "grad_norm": 35.21875, "learning_rate": 9.914752389035303e-06, "loss": 20.543, "step": 29400 }, { "epoch": 0.5457704414551714, "grad_norm": 37.21875, "learning_rate": 9.91472339324926e-06, "loss": 20.303, "step": 29410 }, { "epoch": 0.5459560145396511, "grad_norm": 34.9375, "learning_rate": 9.914694397463218e-06, "loss": 20.1036, "step": 29420 }, { "epoch": 0.546141587624131, "grad_norm": 33.78125, "learning_rate": 9.914665401677175e-06, "loss": 20.4787, "step": 29430 }, { "epoch": 0.5463271607086109, "grad_norm": 34.5, "learning_rate": 9.914636405891133e-06, "loss": 20.1483, "step": 29440 }, { "epoch": 0.5465127337930906, "grad_norm": 34.53125, "learning_rate": 9.91460741010509e-06, "loss": 20.1377, "step": 29450 }, { "epoch": 0.5466983068775705, "grad_norm": 32.65625, "learning_rate": 9.914578414319047e-06, "loss": 20.1915, "step": 29460 }, { "epoch": 0.5468838799620503, "grad_norm": 36.3125, "learning_rate": 9.914549418533005e-06, "loss": 20.0376, "step": 29470 }, { "epoch": 0.5470694530465301, "grad_norm": 36.34375, "learning_rate": 9.914520422746962e-06, "loss": 20.5274, "step": 29480 }, { "epoch": 0.54725502613101, "grad_norm": 37.5625, "learning_rate": 9.914491426960921e-06, "loss": 20.618, "step": 29490 }, { "epoch": 0.5474405992154898, "grad_norm": 34.78125, "learning_rate": 9.914462431174879e-06, "loss": 20.2392, "step": 29500 }, { "epoch": 0.5476261722999696, "grad_norm": 35.5, "learning_rate": 9.914433435388836e-06, "loss": 20.0363, "step": 29510 }, { "epoch": 0.5478117453844494, "grad_norm": 35.6875, "learning_rate": 9.914404439602794e-06, "loss": 19.957, "step": 29520 }, { "epoch": 0.5479973184689293, "grad_norm": 33.3125, "learning_rate": 9.914375443816751e-06, "loss": 20.0681, "step": 29530 }, { "epoch": 0.548182891553409, "grad_norm": 32.8125, "learning_rate": 9.914346448030708e-06, "loss": 20.4819, "step": 29540 }, { "epoch": 0.5483684646378889, "grad_norm": 35.1875, "learning_rate": 9.914317452244666e-06, "loss": 20.1795, "step": 29550 }, { "epoch": 0.5485540377223688, "grad_norm": 34.25, "learning_rate": 9.914288456458623e-06, "loss": 20.3919, "step": 29560 }, { "epoch": 0.5487396108068485, "grad_norm": 35.03125, "learning_rate": 9.914259460672582e-06, "loss": 20.1524, "step": 29570 }, { "epoch": 0.5489251838913284, "grad_norm": 33.4375, "learning_rate": 9.914230464886538e-06, "loss": 20.2308, "step": 29580 }, { "epoch": 0.5491107569758082, "grad_norm": 35.4375, "learning_rate": 9.914201469100495e-06, "loss": 19.9178, "step": 29590 }, { "epoch": 0.5492963300602881, "grad_norm": 34.8125, "learning_rate": 9.914172473314454e-06, "loss": 20.6476, "step": 29600 }, { "epoch": 0.5494819031447679, "grad_norm": 34.40625, "learning_rate": 9.914143477528412e-06, "loss": 20.2983, "step": 29610 }, { "epoch": 0.5496674762292477, "grad_norm": 35.09375, "learning_rate": 9.91411448174237e-06, "loss": 20.609, "step": 29620 }, { "epoch": 0.5498530493137276, "grad_norm": 35.375, "learning_rate": 9.914085485956327e-06, "loss": 20.1885, "step": 29630 }, { "epoch": 0.5500386223982073, "grad_norm": 34.1875, "learning_rate": 9.914056490170284e-06, "loss": 19.9611, "step": 29640 }, { "epoch": 0.5502241954826872, "grad_norm": 33.9375, "learning_rate": 9.914027494384242e-06, "loss": 19.8009, "step": 29650 }, { "epoch": 0.5504097685671671, "grad_norm": 34.34375, "learning_rate": 9.913998498598199e-06, "loss": 20.1612, "step": 29660 }, { "epoch": 0.5505953416516468, "grad_norm": 35.1875, "learning_rate": 9.913969502812158e-06, "loss": 20.1682, "step": 29670 }, { "epoch": 0.5507809147361267, "grad_norm": 32.9375, "learning_rate": 9.913940507026115e-06, "loss": 20.3603, "step": 29680 }, { "epoch": 0.5509664878206065, "grad_norm": 34.09375, "learning_rate": 9.913911511240071e-06, "loss": 20.2844, "step": 29690 }, { "epoch": 0.5511520609050863, "grad_norm": 35.78125, "learning_rate": 9.91388251545403e-06, "loss": 20.8106, "step": 29700 }, { "epoch": 0.5513376339895661, "grad_norm": 35.25, "learning_rate": 9.913853519667988e-06, "loss": 20.042, "step": 29710 }, { "epoch": 0.551523207074046, "grad_norm": 35.15625, "learning_rate": 9.913824523881945e-06, "loss": 20.7616, "step": 29720 }, { "epoch": 0.5517087801585258, "grad_norm": 37.375, "learning_rate": 9.913795528095902e-06, "loss": 20.4981, "step": 29730 }, { "epoch": 0.5518943532430056, "grad_norm": 34.34375, "learning_rate": 9.91376653230986e-06, "loss": 20.4069, "step": 29740 }, { "epoch": 0.5520799263274855, "grad_norm": 34.25, "learning_rate": 9.913737536523817e-06, "loss": 20.3144, "step": 29750 }, { "epoch": 0.5522654994119653, "grad_norm": 35.28125, "learning_rate": 9.913708540737775e-06, "loss": 20.3102, "step": 29760 }, { "epoch": 0.5524510724964451, "grad_norm": 34.09375, "learning_rate": 9.913679544951734e-06, "loss": 20.3573, "step": 29770 }, { "epoch": 0.552636645580925, "grad_norm": 37.34375, "learning_rate": 9.913650549165691e-06, "loss": 19.9171, "step": 29780 }, { "epoch": 0.5528222186654048, "grad_norm": 32.78125, "learning_rate": 9.913621553379647e-06, "loss": 20.3493, "step": 29790 }, { "epoch": 0.5530077917498846, "grad_norm": 35.28125, "learning_rate": 9.913592557593606e-06, "loss": 20.3546, "step": 29800 }, { "epoch": 0.5531933648343644, "grad_norm": 33.09375, "learning_rate": 9.913563561807563e-06, "loss": 20.5089, "step": 29810 }, { "epoch": 0.5533789379188443, "grad_norm": 35.03125, "learning_rate": 9.91353456602152e-06, "loss": 20.2858, "step": 29820 }, { "epoch": 0.553564511003324, "grad_norm": 33.9375, "learning_rate": 9.913505570235478e-06, "loss": 20.5666, "step": 29830 }, { "epoch": 0.5537500840878039, "grad_norm": 33.3125, "learning_rate": 9.913476574449437e-06, "loss": 20.3237, "step": 29840 }, { "epoch": 0.5539356571722838, "grad_norm": 33.4375, "learning_rate": 9.913447578663393e-06, "loss": 20.3019, "step": 29850 }, { "epoch": 0.5541212302567635, "grad_norm": 34.4375, "learning_rate": 9.91341858287735e-06, "loss": 20.2433, "step": 29860 }, { "epoch": 0.5543068033412434, "grad_norm": 37.40625, "learning_rate": 9.91338958709131e-06, "loss": 20.648, "step": 29870 }, { "epoch": 0.5544923764257232, "grad_norm": 34.28125, "learning_rate": 9.913360591305267e-06, "loss": 20.0626, "step": 29880 }, { "epoch": 0.554677949510203, "grad_norm": 37.0, "learning_rate": 9.913331595519224e-06, "loss": 19.7648, "step": 29890 }, { "epoch": 0.5548635225946829, "grad_norm": 35.71875, "learning_rate": 9.913302599733182e-06, "loss": 20.2157, "step": 29900 }, { "epoch": 0.5550490956791627, "grad_norm": 35.125, "learning_rate": 9.91327360394714e-06, "loss": 20.0799, "step": 29910 }, { "epoch": 0.5552346687636425, "grad_norm": 36.6875, "learning_rate": 9.913244608161097e-06, "loss": 20.2929, "step": 29920 }, { "epoch": 0.5554202418481223, "grad_norm": 34.25, "learning_rate": 9.913215612375054e-06, "loss": 20.0606, "step": 29930 }, { "epoch": 0.5556058149326022, "grad_norm": 35.3125, "learning_rate": 9.913186616589011e-06, "loss": 20.3643, "step": 29940 }, { "epoch": 0.555791388017082, "grad_norm": 34.84375, "learning_rate": 9.91315762080297e-06, "loss": 20.3112, "step": 29950 }, { "epoch": 0.5559769611015618, "grad_norm": 34.46875, "learning_rate": 9.913128625016926e-06, "loss": 20.1955, "step": 29960 }, { "epoch": 0.5561625341860417, "grad_norm": 34.4375, "learning_rate": 9.913099629230885e-06, "loss": 20.1614, "step": 29970 }, { "epoch": 0.5563481072705215, "grad_norm": 35.625, "learning_rate": 9.913070633444843e-06, "loss": 20.1444, "step": 29980 }, { "epoch": 0.5565336803550013, "grad_norm": 34.34375, "learning_rate": 9.9130416376588e-06, "loss": 20.4401, "step": 29990 }, { "epoch": 0.5567192534394811, "grad_norm": 35.65625, "learning_rate": 9.913012641872758e-06, "loss": 20.0201, "step": 30000 }, { "epoch": 0.5567192534394811, "eval_loss": 2.5262889862060547, "eval_runtime": 455.282, "eval_samples_per_second": 3189.489, "eval_steps_per_second": 49.837, "step": 30000 }, { "epoch": 0.556904826523961, "grad_norm": 36.46875, "learning_rate": 9.912983646086715e-06, "loss": 19.7191, "step": 30010 }, { "epoch": 0.5570903996084408, "grad_norm": 34.21875, "learning_rate": 9.912954650300672e-06, "loss": 20.2245, "step": 30020 }, { "epoch": 0.5572759726929206, "grad_norm": 35.1875, "learning_rate": 9.91292565451463e-06, "loss": 20.1206, "step": 30030 }, { "epoch": 0.5574615457774005, "grad_norm": 33.0, "learning_rate": 9.912896658728587e-06, "loss": 20.1384, "step": 30040 }, { "epoch": 0.5576471188618802, "grad_norm": 34.1875, "learning_rate": 9.912867662942546e-06, "loss": 20.4741, "step": 30050 }, { "epoch": 0.5578326919463601, "grad_norm": 33.9375, "learning_rate": 9.912838667156502e-06, "loss": 19.7416, "step": 30060 }, { "epoch": 0.55801826503084, "grad_norm": 35.4375, "learning_rate": 9.91280967137046e-06, "loss": 20.5265, "step": 30070 }, { "epoch": 0.5582038381153197, "grad_norm": 33.90625, "learning_rate": 9.912780675584419e-06, "loss": 20.1603, "step": 30080 }, { "epoch": 0.5583894111997996, "grad_norm": 37.40625, "learning_rate": 9.912751679798376e-06, "loss": 20.2072, "step": 30090 }, { "epoch": 0.5585749842842794, "grad_norm": 34.90625, "learning_rate": 9.912722684012333e-06, "loss": 20.7503, "step": 30100 }, { "epoch": 0.5587605573687592, "grad_norm": 34.71875, "learning_rate": 9.91269368822629e-06, "loss": 20.1174, "step": 30110 }, { "epoch": 0.558946130453239, "grad_norm": 33.6875, "learning_rate": 9.912664692440248e-06, "loss": 19.6637, "step": 30120 }, { "epoch": 0.5591317035377189, "grad_norm": 33.6875, "learning_rate": 9.912635696654206e-06, "loss": 20.3168, "step": 30130 }, { "epoch": 0.5593172766221988, "grad_norm": 35.15625, "learning_rate": 9.912606700868163e-06, "loss": 20.4504, "step": 30140 }, { "epoch": 0.5595028497066785, "grad_norm": 32.21875, "learning_rate": 9.912577705082122e-06, "loss": 19.8619, "step": 30150 }, { "epoch": 0.5596884227911584, "grad_norm": 34.46875, "learning_rate": 9.91254870929608e-06, "loss": 20.3352, "step": 30160 }, { "epoch": 0.5598739958756382, "grad_norm": 35.09375, "learning_rate": 9.912519713510035e-06, "loss": 20.1644, "step": 30170 }, { "epoch": 0.560059568960118, "grad_norm": 35.6875, "learning_rate": 9.912490717723994e-06, "loss": 20.4346, "step": 30180 }, { "epoch": 0.5602451420445979, "grad_norm": 34.90625, "learning_rate": 9.912461721937952e-06, "loss": 20.3004, "step": 30190 }, { "epoch": 0.5604307151290777, "grad_norm": 35.3125, "learning_rate": 9.912432726151909e-06, "loss": 19.9627, "step": 30200 }, { "epoch": 0.5606162882135575, "grad_norm": 36.46875, "learning_rate": 9.912403730365866e-06, "loss": 20.271, "step": 30210 }, { "epoch": 0.5608018612980373, "grad_norm": 34.9375, "learning_rate": 9.912374734579824e-06, "loss": 20.5842, "step": 30220 }, { "epoch": 0.5609874343825172, "grad_norm": 34.03125, "learning_rate": 9.912345738793781e-06, "loss": 20.1096, "step": 30230 }, { "epoch": 0.5611730074669969, "grad_norm": 35.6875, "learning_rate": 9.912316743007739e-06, "loss": 20.4759, "step": 30240 }, { "epoch": 0.5613585805514768, "grad_norm": 33.0, "learning_rate": 9.912287747221698e-06, "loss": 20.4337, "step": 30250 }, { "epoch": 0.5615441536359567, "grad_norm": 35.34375, "learning_rate": 9.912258751435655e-06, "loss": 20.2153, "step": 30260 }, { "epoch": 0.5617297267204364, "grad_norm": 37.25, "learning_rate": 9.912229755649613e-06, "loss": 19.9206, "step": 30270 }, { "epoch": 0.5619152998049163, "grad_norm": 35.5625, "learning_rate": 9.91220075986357e-06, "loss": 19.9902, "step": 30280 }, { "epoch": 0.5621008728893961, "grad_norm": 35.0625, "learning_rate": 9.912171764077527e-06, "loss": 19.9837, "step": 30290 }, { "epoch": 0.562286445973876, "grad_norm": 33.53125, "learning_rate": 9.912142768291485e-06, "loss": 20.1273, "step": 30300 }, { "epoch": 0.5624720190583558, "grad_norm": 36.96875, "learning_rate": 9.912113772505442e-06, "loss": 20.5882, "step": 30310 }, { "epoch": 0.5626575921428356, "grad_norm": 34.46875, "learning_rate": 9.912084776719401e-06, "loss": 20.1443, "step": 30320 }, { "epoch": 0.5628431652273155, "grad_norm": 33.875, "learning_rate": 9.912055780933357e-06, "loss": 20.4219, "step": 30330 }, { "epoch": 0.5630287383117952, "grad_norm": 34.375, "learning_rate": 9.912026785147314e-06, "loss": 20.6782, "step": 30340 }, { "epoch": 0.5632143113962751, "grad_norm": 36.65625, "learning_rate": 9.911997789361274e-06, "loss": 20.4529, "step": 30350 }, { "epoch": 0.563399884480755, "grad_norm": 32.75, "learning_rate": 9.911968793575231e-06, "loss": 20.2558, "step": 30360 }, { "epoch": 0.5635854575652347, "grad_norm": 34.46875, "learning_rate": 9.911939797789188e-06, "loss": 20.1141, "step": 30370 }, { "epoch": 0.5637710306497146, "grad_norm": 34.375, "learning_rate": 9.911910802003146e-06, "loss": 20.2123, "step": 30380 }, { "epoch": 0.5639566037341944, "grad_norm": 34.65625, "learning_rate": 9.911881806217103e-06, "loss": 20.2384, "step": 30390 }, { "epoch": 0.5641421768186742, "grad_norm": 36.96875, "learning_rate": 9.91185281043106e-06, "loss": 20.17, "step": 30400 }, { "epoch": 0.564327749903154, "grad_norm": 34.0625, "learning_rate": 9.911823814645018e-06, "loss": 20.2601, "step": 30410 }, { "epoch": 0.5645133229876339, "grad_norm": 33.875, "learning_rate": 9.911794818858977e-06, "loss": 20.4207, "step": 30420 }, { "epoch": 0.5646988960721137, "grad_norm": 34.90625, "learning_rate": 9.911765823072935e-06, "loss": 20.1737, "step": 30430 }, { "epoch": 0.5648844691565935, "grad_norm": 34.4375, "learning_rate": 9.91173682728689e-06, "loss": 20.2786, "step": 30440 }, { "epoch": 0.5650700422410734, "grad_norm": 34.59375, "learning_rate": 9.91170783150085e-06, "loss": 20.2238, "step": 30450 }, { "epoch": 0.5652556153255531, "grad_norm": 35.4375, "learning_rate": 9.911678835714807e-06, "loss": 20.7817, "step": 30460 }, { "epoch": 0.565441188410033, "grad_norm": 36.0625, "learning_rate": 9.911649839928764e-06, "loss": 20.3482, "step": 30470 }, { "epoch": 0.5656267614945129, "grad_norm": 34.1875, "learning_rate": 9.911620844142722e-06, "loss": 20.3726, "step": 30480 }, { "epoch": 0.5658123345789927, "grad_norm": 36.28125, "learning_rate": 9.911591848356679e-06, "loss": 20.351, "step": 30490 }, { "epoch": 0.5659979076634725, "grad_norm": 35.625, "learning_rate": 9.911562852570636e-06, "loss": 20.0794, "step": 30500 }, { "epoch": 0.5661834807479523, "grad_norm": 34.3125, "learning_rate": 9.911533856784594e-06, "loss": 20.5169, "step": 30510 }, { "epoch": 0.5663690538324322, "grad_norm": 37.6875, "learning_rate": 9.911504860998551e-06, "loss": 20.1937, "step": 30520 }, { "epoch": 0.5665546269169119, "grad_norm": 34.5, "learning_rate": 9.91147586521251e-06, "loss": 20.0421, "step": 30530 }, { "epoch": 0.5667402000013918, "grad_norm": 36.625, "learning_rate": 9.911446869426468e-06, "loss": 20.5098, "step": 30540 }, { "epoch": 0.5669257730858717, "grad_norm": 33.78125, "learning_rate": 9.911417873640425e-06, "loss": 20.1718, "step": 30550 }, { "epoch": 0.5671113461703514, "grad_norm": 35.375, "learning_rate": 9.911388877854383e-06, "loss": 20.703, "step": 30560 }, { "epoch": 0.5672969192548313, "grad_norm": 35.84375, "learning_rate": 9.91135988206834e-06, "loss": 20.0026, "step": 30570 }, { "epoch": 0.5674824923393111, "grad_norm": 37.21875, "learning_rate": 9.911330886282297e-06, "loss": 19.9628, "step": 30580 }, { "epoch": 0.5676680654237909, "grad_norm": 35.59375, "learning_rate": 9.911301890496255e-06, "loss": 20.4465, "step": 30590 }, { "epoch": 0.5678536385082708, "grad_norm": 35.4375, "learning_rate": 9.911272894710212e-06, "loss": 20.0498, "step": 30600 }, { "epoch": 0.5680392115927506, "grad_norm": 38.21875, "learning_rate": 9.91124389892417e-06, "loss": 20.5764, "step": 30610 }, { "epoch": 0.5682247846772304, "grad_norm": 34.34375, "learning_rate": 9.911214903138127e-06, "loss": 20.0934, "step": 30620 }, { "epoch": 0.5684103577617102, "grad_norm": 34.75, "learning_rate": 9.911185907352086e-06, "loss": 19.9147, "step": 30630 }, { "epoch": 0.5685959308461901, "grad_norm": 33.65625, "learning_rate": 9.911156911566043e-06, "loss": 20.4771, "step": 30640 }, { "epoch": 0.5687815039306698, "grad_norm": 34.96875, "learning_rate": 9.91112791578e-06, "loss": 19.7005, "step": 30650 }, { "epoch": 0.5689670770151497, "grad_norm": 34.21875, "learning_rate": 9.911098919993958e-06, "loss": 20.2171, "step": 30660 }, { "epoch": 0.5691526500996296, "grad_norm": 34.6875, "learning_rate": 9.911069924207916e-06, "loss": 20.4261, "step": 30670 }, { "epoch": 0.5693382231841094, "grad_norm": 35.5, "learning_rate": 9.911040928421873e-06, "loss": 19.8097, "step": 30680 }, { "epoch": 0.5695237962685892, "grad_norm": 37.15625, "learning_rate": 9.91101193263583e-06, "loss": 20.2062, "step": 30690 }, { "epoch": 0.569709369353069, "grad_norm": 37.3125, "learning_rate": 9.91098293684979e-06, "loss": 20.2614, "step": 30700 }, { "epoch": 0.5698949424375489, "grad_norm": 34.96875, "learning_rate": 9.910953941063745e-06, "loss": 20.3764, "step": 30710 }, { "epoch": 0.5700805155220287, "grad_norm": 35.1875, "learning_rate": 9.910924945277703e-06, "loss": 19.9522, "step": 30720 }, { "epoch": 0.5702660886065085, "grad_norm": 35.9375, "learning_rate": 9.910895949491662e-06, "loss": 19.9363, "step": 30730 }, { "epoch": 0.5704516616909884, "grad_norm": 36.71875, "learning_rate": 9.91086695370562e-06, "loss": 20.4394, "step": 30740 }, { "epoch": 0.5706372347754681, "grad_norm": 36.78125, "learning_rate": 9.910837957919577e-06, "loss": 20.6453, "step": 30750 }, { "epoch": 0.570822807859948, "grad_norm": 34.3125, "learning_rate": 9.910808962133534e-06, "loss": 20.342, "step": 30760 }, { "epoch": 0.5710083809444279, "grad_norm": 36.25, "learning_rate": 9.910779966347491e-06, "loss": 20.1425, "step": 30770 }, { "epoch": 0.5711939540289076, "grad_norm": 36.40625, "learning_rate": 9.910750970561449e-06, "loss": 19.9823, "step": 30780 }, { "epoch": 0.5713795271133875, "grad_norm": 32.53125, "learning_rate": 9.910721974775406e-06, "loss": 19.6331, "step": 30790 }, { "epoch": 0.5715651001978673, "grad_norm": 35.34375, "learning_rate": 9.910692978989365e-06, "loss": 20.1497, "step": 30800 }, { "epoch": 0.5717506732823471, "grad_norm": 33.40625, "learning_rate": 9.910663983203321e-06, "loss": 20.4599, "step": 30810 }, { "epoch": 0.5719362463668269, "grad_norm": 35.96875, "learning_rate": 9.910634987417278e-06, "loss": 20.056, "step": 30820 }, { "epoch": 0.5721218194513068, "grad_norm": 35.84375, "learning_rate": 9.910605991631238e-06, "loss": 19.8844, "step": 30830 }, { "epoch": 0.5723073925357867, "grad_norm": 34.34375, "learning_rate": 9.910576995845195e-06, "loss": 20.1571, "step": 30840 }, { "epoch": 0.5724929656202664, "grad_norm": 33.75, "learning_rate": 9.910548000059152e-06, "loss": 20.6345, "step": 30850 }, { "epoch": 0.5726785387047463, "grad_norm": 35.4375, "learning_rate": 9.91051900427311e-06, "loss": 20.0653, "step": 30860 }, { "epoch": 0.5728641117892261, "grad_norm": 35.03125, "learning_rate": 9.910490008487067e-06, "loss": 20.1354, "step": 30870 }, { "epoch": 0.5730496848737059, "grad_norm": 36.5625, "learning_rate": 9.910461012701025e-06, "loss": 19.7588, "step": 30880 }, { "epoch": 0.5732352579581858, "grad_norm": 36.34375, "learning_rate": 9.910432016914982e-06, "loss": 20.1258, "step": 30890 }, { "epoch": 0.5734208310426656, "grad_norm": 35.9375, "learning_rate": 9.910403021128941e-06, "loss": 19.8419, "step": 30900 }, { "epoch": 0.5736064041271454, "grad_norm": 36.09375, "learning_rate": 9.910374025342899e-06, "loss": 20.1061, "step": 30910 }, { "epoch": 0.5737919772116252, "grad_norm": 37.25, "learning_rate": 9.910345029556854e-06, "loss": 20.4488, "step": 30920 }, { "epoch": 0.5739775502961051, "grad_norm": 34.40625, "learning_rate": 9.910316033770813e-06, "loss": 20.1627, "step": 30930 }, { "epoch": 0.5741631233805848, "grad_norm": 35.34375, "learning_rate": 9.91028703798477e-06, "loss": 20.1895, "step": 30940 }, { "epoch": 0.5743486964650647, "grad_norm": 35.15625, "learning_rate": 9.910258042198728e-06, "loss": 20.327, "step": 30950 }, { "epoch": 0.5745342695495446, "grad_norm": 35.09375, "learning_rate": 9.910229046412686e-06, "loss": 19.9475, "step": 30960 }, { "epoch": 0.5747198426340243, "grad_norm": 34.96875, "learning_rate": 9.910200050626643e-06, "loss": 20.4849, "step": 30970 }, { "epoch": 0.5749054157185042, "grad_norm": 35.28125, "learning_rate": 9.9101710548406e-06, "loss": 20.0981, "step": 30980 }, { "epoch": 0.575090988802984, "grad_norm": 35.53125, "learning_rate": 9.910142059054558e-06, "loss": 20.0985, "step": 30990 }, { "epoch": 0.5752765618874638, "grad_norm": 36.1875, "learning_rate": 9.910113063268517e-06, "loss": 20.5353, "step": 31000 }, { "epoch": 0.5754621349719437, "grad_norm": 34.6875, "learning_rate": 9.910084067482474e-06, "loss": 19.9665, "step": 31010 }, { "epoch": 0.5756477080564235, "grad_norm": 34.40625, "learning_rate": 9.910055071696432e-06, "loss": 20.7101, "step": 31020 }, { "epoch": 0.5758332811409034, "grad_norm": 37.21875, "learning_rate": 9.910026075910389e-06, "loss": 20.1796, "step": 31030 }, { "epoch": 0.5760188542253831, "grad_norm": 34.53125, "learning_rate": 9.909997080124347e-06, "loss": 20.1421, "step": 31040 }, { "epoch": 0.576204427309863, "grad_norm": 32.40625, "learning_rate": 9.909968084338304e-06, "loss": 20.1085, "step": 31050 }, { "epoch": 0.5763900003943428, "grad_norm": 34.78125, "learning_rate": 9.909939088552261e-06, "loss": 20.3996, "step": 31060 }, { "epoch": 0.5765755734788226, "grad_norm": 34.875, "learning_rate": 9.909910092766219e-06, "loss": 20.6353, "step": 31070 }, { "epoch": 0.5767611465633025, "grad_norm": 35.6875, "learning_rate": 9.909881096980176e-06, "loss": 20.0609, "step": 31080 }, { "epoch": 0.5769467196477823, "grad_norm": 34.625, "learning_rate": 9.909852101194134e-06, "loss": 19.9845, "step": 31090 }, { "epoch": 0.5771322927322621, "grad_norm": 34.125, "learning_rate": 9.909823105408091e-06, "loss": 19.8181, "step": 31100 }, { "epoch": 0.5773178658167419, "grad_norm": 34.25, "learning_rate": 9.90979410962205e-06, "loss": 20.0183, "step": 31110 }, { "epoch": 0.5775034389012218, "grad_norm": 34.09375, "learning_rate": 9.909765113836007e-06, "loss": 19.9869, "step": 31120 }, { "epoch": 0.5776890119857016, "grad_norm": 34.21875, "learning_rate": 9.909736118049965e-06, "loss": 20.2748, "step": 31130 }, { "epoch": 0.5778745850701814, "grad_norm": 34.09375, "learning_rate": 9.909707122263922e-06, "loss": 19.7719, "step": 31140 }, { "epoch": 0.5780601581546613, "grad_norm": 36.15625, "learning_rate": 9.90967812647788e-06, "loss": 20.0686, "step": 31150 }, { "epoch": 0.578245731239141, "grad_norm": 36.4375, "learning_rate": 9.909649130691837e-06, "loss": 20.0449, "step": 31160 }, { "epoch": 0.5784313043236209, "grad_norm": 34.65625, "learning_rate": 9.909620134905795e-06, "loss": 20.3113, "step": 31170 }, { "epoch": 0.5786168774081007, "grad_norm": 35.5, "learning_rate": 9.909591139119754e-06, "loss": 20.0866, "step": 31180 }, { "epoch": 0.5788024504925806, "grad_norm": 34.5625, "learning_rate": 9.90956214333371e-06, "loss": 19.9436, "step": 31190 }, { "epoch": 0.5789880235770604, "grad_norm": 34.3125, "learning_rate": 9.909533147547667e-06, "loss": 20.16, "step": 31200 }, { "epoch": 0.5791735966615402, "grad_norm": 32.8125, "learning_rate": 9.909504151761626e-06, "loss": 20.0966, "step": 31210 }, { "epoch": 0.5793591697460201, "grad_norm": 36.03125, "learning_rate": 9.909475155975583e-06, "loss": 19.8559, "step": 31220 }, { "epoch": 0.5795447428304998, "grad_norm": 34.625, "learning_rate": 9.90944616018954e-06, "loss": 20.0601, "step": 31230 }, { "epoch": 0.5797303159149797, "grad_norm": 36.28125, "learning_rate": 9.909417164403498e-06, "loss": 19.8656, "step": 31240 }, { "epoch": 0.5799158889994596, "grad_norm": 34.125, "learning_rate": 9.909388168617455e-06, "loss": 20.3291, "step": 31250 }, { "epoch": 0.5801014620839393, "grad_norm": 33.84375, "learning_rate": 9.909359172831413e-06, "loss": 20.4572, "step": 31260 }, { "epoch": 0.5802870351684192, "grad_norm": 34.03125, "learning_rate": 9.90933017704537e-06, "loss": 19.946, "step": 31270 }, { "epoch": 0.580472608252899, "grad_norm": 33.9375, "learning_rate": 9.90930118125933e-06, "loss": 20.13, "step": 31280 }, { "epoch": 0.5806581813373788, "grad_norm": 33.90625, "learning_rate": 9.909272185473287e-06, "loss": 20.2398, "step": 31290 }, { "epoch": 0.5808437544218586, "grad_norm": 33.09375, "learning_rate": 9.909243189687243e-06, "loss": 20.1966, "step": 31300 }, { "epoch": 0.5810293275063385, "grad_norm": 34.8125, "learning_rate": 9.909214193901202e-06, "loss": 20.0873, "step": 31310 }, { "epoch": 0.5812149005908183, "grad_norm": 34.875, "learning_rate": 9.909185198115159e-06, "loss": 19.9998, "step": 31320 }, { "epoch": 0.5814004736752981, "grad_norm": 33.8125, "learning_rate": 9.909156202329116e-06, "loss": 20.1087, "step": 31330 }, { "epoch": 0.581586046759778, "grad_norm": 36.125, "learning_rate": 9.909127206543074e-06, "loss": 20.1545, "step": 31340 }, { "epoch": 0.5817716198442577, "grad_norm": 34.40625, "learning_rate": 9.909098210757031e-06, "loss": 20.2046, "step": 31350 }, { "epoch": 0.5819571929287376, "grad_norm": 37.1875, "learning_rate": 9.909069214970989e-06, "loss": 20.0161, "step": 31360 }, { "epoch": 0.5821427660132175, "grad_norm": 36.125, "learning_rate": 9.909040219184946e-06, "loss": 19.5715, "step": 31370 }, { "epoch": 0.5823283390976973, "grad_norm": 35.59375, "learning_rate": 9.909011223398905e-06, "loss": 19.9348, "step": 31380 }, { "epoch": 0.5825139121821771, "grad_norm": 35.28125, "learning_rate": 9.908982227612863e-06, "loss": 20.2421, "step": 31390 }, { "epoch": 0.5826994852666569, "grad_norm": 35.09375, "learning_rate": 9.908953231826818e-06, "loss": 19.6778, "step": 31400 }, { "epoch": 0.5828850583511368, "grad_norm": 33.65625, "learning_rate": 9.908924236040777e-06, "loss": 20.1597, "step": 31410 }, { "epoch": 0.5830706314356165, "grad_norm": 34.0625, "learning_rate": 9.908895240254735e-06, "loss": 19.7993, "step": 31420 }, { "epoch": 0.5832562045200964, "grad_norm": 32.1875, "learning_rate": 9.908866244468692e-06, "loss": 19.741, "step": 31430 }, { "epoch": 0.5834417776045763, "grad_norm": 35.96875, "learning_rate": 9.90883724868265e-06, "loss": 19.8878, "step": 31440 }, { "epoch": 0.583627350689056, "grad_norm": 35.09375, "learning_rate": 9.908808252896609e-06, "loss": 20.1307, "step": 31450 }, { "epoch": 0.5838129237735359, "grad_norm": 36.75, "learning_rate": 9.908779257110564e-06, "loss": 20.0339, "step": 31460 }, { "epoch": 0.5839984968580157, "grad_norm": 36.1875, "learning_rate": 9.908750261324522e-06, "loss": 20.0168, "step": 31470 }, { "epoch": 0.5841840699424955, "grad_norm": 33.65625, "learning_rate": 9.908721265538481e-06, "loss": 19.8763, "step": 31480 }, { "epoch": 0.5843696430269754, "grad_norm": 35.71875, "learning_rate": 9.908692269752438e-06, "loss": 19.7346, "step": 31490 }, { "epoch": 0.5845552161114552, "grad_norm": 34.90625, "learning_rate": 9.908663273966396e-06, "loss": 19.9969, "step": 31500 }, { "epoch": 0.584740789195935, "grad_norm": 35.8125, "learning_rate": 9.908634278180353e-06, "loss": 19.6326, "step": 31510 }, { "epoch": 0.5849263622804148, "grad_norm": 35.4375, "learning_rate": 9.90860528239431e-06, "loss": 20.0874, "step": 31520 }, { "epoch": 0.5851119353648947, "grad_norm": 36.0625, "learning_rate": 9.908576286608268e-06, "loss": 20.0785, "step": 31530 }, { "epoch": 0.5852975084493744, "grad_norm": 37.03125, "learning_rate": 9.908547290822225e-06, "loss": 20.1829, "step": 31540 }, { "epoch": 0.5854830815338543, "grad_norm": 36.34375, "learning_rate": 9.908518295036183e-06, "loss": 20.1522, "step": 31550 }, { "epoch": 0.5856686546183342, "grad_norm": 34.0625, "learning_rate": 9.90848929925014e-06, "loss": 20.0708, "step": 31560 }, { "epoch": 0.585854227702814, "grad_norm": 35.84375, "learning_rate": 9.908460303464098e-06, "loss": 19.979, "step": 31570 }, { "epoch": 0.5860398007872938, "grad_norm": 34.3125, "learning_rate": 9.908431307678055e-06, "loss": 20.0508, "step": 31580 }, { "epoch": 0.5862253738717736, "grad_norm": 35.3125, "learning_rate": 9.908402311892014e-06, "loss": 20.0383, "step": 31590 }, { "epoch": 0.5864109469562535, "grad_norm": 36.5, "learning_rate": 9.908373316105971e-06, "loss": 19.7061, "step": 31600 }, { "epoch": 0.5865965200407333, "grad_norm": 34.21875, "learning_rate": 9.908344320319929e-06, "loss": 20.0088, "step": 31610 }, { "epoch": 0.5867820931252131, "grad_norm": 36.875, "learning_rate": 9.908315324533886e-06, "loss": 20.0155, "step": 31620 }, { "epoch": 0.586967666209693, "grad_norm": 34.28125, "learning_rate": 9.908286328747844e-06, "loss": 19.8223, "step": 31630 }, { "epoch": 0.5871532392941727, "grad_norm": 36.21875, "learning_rate": 9.908257332961801e-06, "loss": 19.9218, "step": 31640 }, { "epoch": 0.5873388123786526, "grad_norm": 35.125, "learning_rate": 9.908228337175759e-06, "loss": 20.3176, "step": 31650 }, { "epoch": 0.5875243854631325, "grad_norm": 34.5, "learning_rate": 9.908199341389718e-06, "loss": 19.8512, "step": 31660 }, { "epoch": 0.5877099585476122, "grad_norm": 33.65625, "learning_rate": 9.908170345603673e-06, "loss": 19.7684, "step": 31670 }, { "epoch": 0.5878955316320921, "grad_norm": 35.0625, "learning_rate": 9.90814134981763e-06, "loss": 19.7739, "step": 31680 }, { "epoch": 0.5880811047165719, "grad_norm": 33.90625, "learning_rate": 9.90811235403159e-06, "loss": 19.9795, "step": 31690 }, { "epoch": 0.5882666778010517, "grad_norm": 33.34375, "learning_rate": 9.908083358245547e-06, "loss": 19.7892, "step": 31700 }, { "epoch": 0.5884522508855315, "grad_norm": 33.3125, "learning_rate": 9.908054362459505e-06, "loss": 20.0117, "step": 31710 }, { "epoch": 0.5886378239700114, "grad_norm": 34.65625, "learning_rate": 9.908025366673462e-06, "loss": 20.1514, "step": 31720 }, { "epoch": 0.5888233970544913, "grad_norm": 34.15625, "learning_rate": 9.90799637088742e-06, "loss": 20.2976, "step": 31730 }, { "epoch": 0.589008970138971, "grad_norm": 34.9375, "learning_rate": 9.907967375101377e-06, "loss": 20.0669, "step": 31740 }, { "epoch": 0.5891945432234509, "grad_norm": 33.125, "learning_rate": 9.907938379315334e-06, "loss": 19.8704, "step": 31750 }, { "epoch": 0.5893801163079307, "grad_norm": 34.34375, "learning_rate": 9.907909383529293e-06, "loss": 19.7355, "step": 31760 }, { "epoch": 0.5895656893924105, "grad_norm": 34.8125, "learning_rate": 9.90788038774325e-06, "loss": 19.8751, "step": 31770 }, { "epoch": 0.5897512624768904, "grad_norm": 33.5625, "learning_rate": 9.907851391957207e-06, "loss": 19.3739, "step": 31780 }, { "epoch": 0.5899368355613702, "grad_norm": 33.9375, "learning_rate": 9.907822396171166e-06, "loss": 20.0175, "step": 31790 }, { "epoch": 0.59012240864585, "grad_norm": 35.9375, "learning_rate": 9.907793400385123e-06, "loss": 19.7817, "step": 31800 }, { "epoch": 0.5903079817303298, "grad_norm": 35.15625, "learning_rate": 9.90776440459908e-06, "loss": 19.909, "step": 31810 }, { "epoch": 0.5904935548148097, "grad_norm": 34.8125, "learning_rate": 9.907735408813038e-06, "loss": 20.1357, "step": 31820 }, { "epoch": 0.5906791278992894, "grad_norm": 34.25, "learning_rate": 9.907706413026995e-06, "loss": 19.3806, "step": 31830 }, { "epoch": 0.5908647009837693, "grad_norm": 34.84375, "learning_rate": 9.907677417240953e-06, "loss": 19.9921, "step": 31840 }, { "epoch": 0.5910502740682492, "grad_norm": 33.21875, "learning_rate": 9.90764842145491e-06, "loss": 19.6932, "step": 31850 }, { "epoch": 0.5912358471527289, "grad_norm": 34.875, "learning_rate": 9.90761942566887e-06, "loss": 19.9271, "step": 31860 }, { "epoch": 0.5914214202372088, "grad_norm": 35.1875, "learning_rate": 9.907590429882827e-06, "loss": 20.209, "step": 31870 }, { "epoch": 0.5916069933216886, "grad_norm": 34.625, "learning_rate": 9.907561434096784e-06, "loss": 19.9407, "step": 31880 }, { "epoch": 0.5917925664061684, "grad_norm": 35.0625, "learning_rate": 9.907532438310741e-06, "loss": 20.1724, "step": 31890 }, { "epoch": 0.5919781394906483, "grad_norm": 33.96875, "learning_rate": 9.907503442524699e-06, "loss": 19.872, "step": 31900 }, { "epoch": 0.5921637125751281, "grad_norm": 34.28125, "learning_rate": 9.907474446738656e-06, "loss": 20.0196, "step": 31910 }, { "epoch": 0.592349285659608, "grad_norm": 33.34375, "learning_rate": 9.907445450952614e-06, "loss": 20.2614, "step": 31920 }, { "epoch": 0.5925348587440877, "grad_norm": 35.1875, "learning_rate": 9.907416455166573e-06, "loss": 19.9631, "step": 31930 }, { "epoch": 0.5927204318285676, "grad_norm": 33.15625, "learning_rate": 9.907387459380528e-06, "loss": 19.8016, "step": 31940 }, { "epoch": 0.5929060049130475, "grad_norm": 35.09375, "learning_rate": 9.907358463594486e-06, "loss": 19.6743, "step": 31950 }, { "epoch": 0.5930915779975272, "grad_norm": 35.09375, "learning_rate": 9.907329467808445e-06, "loss": 20.0119, "step": 31960 }, { "epoch": 0.5932771510820071, "grad_norm": 36.375, "learning_rate": 9.907300472022402e-06, "loss": 20.3043, "step": 31970 }, { "epoch": 0.5934627241664869, "grad_norm": 34.84375, "learning_rate": 9.90727147623636e-06, "loss": 20.0462, "step": 31980 }, { "epoch": 0.5936482972509667, "grad_norm": 35.75, "learning_rate": 9.907242480450317e-06, "loss": 19.8187, "step": 31990 }, { "epoch": 0.5938338703354465, "grad_norm": 34.875, "learning_rate": 9.907213484664275e-06, "loss": 20.3801, "step": 32000 }, { "epoch": 0.5940194434199264, "grad_norm": 35.15625, "learning_rate": 9.907184488878232e-06, "loss": 20.3156, "step": 32010 }, { "epoch": 0.5942050165044062, "grad_norm": 36.3125, "learning_rate": 9.90715549309219e-06, "loss": 20.2171, "step": 32020 }, { "epoch": 0.594390589588886, "grad_norm": 35.84375, "learning_rate": 9.907126497306147e-06, "loss": 20.237, "step": 32030 }, { "epoch": 0.5945761626733659, "grad_norm": 35.5625, "learning_rate": 9.907097501520106e-06, "loss": 20.6729, "step": 32040 }, { "epoch": 0.5947617357578456, "grad_norm": 35.375, "learning_rate": 9.907068505734062e-06, "loss": 20.135, "step": 32050 }, { "epoch": 0.5949473088423255, "grad_norm": 36.75, "learning_rate": 9.90703950994802e-06, "loss": 20.1497, "step": 32060 }, { "epoch": 0.5951328819268054, "grad_norm": 34.8125, "learning_rate": 9.907010514161978e-06, "loss": 19.918, "step": 32070 }, { "epoch": 0.5953184550112851, "grad_norm": 33.53125, "learning_rate": 9.906981518375936e-06, "loss": 20.1092, "step": 32080 }, { "epoch": 0.595504028095765, "grad_norm": 34.8125, "learning_rate": 9.906952522589893e-06, "loss": 20.1522, "step": 32090 }, { "epoch": 0.5956896011802448, "grad_norm": 35.5, "learning_rate": 9.90692352680385e-06, "loss": 20.0647, "step": 32100 }, { "epoch": 0.5958751742647247, "grad_norm": 35.375, "learning_rate": 9.906894531017808e-06, "loss": 20.1672, "step": 32110 }, { "epoch": 0.5960607473492044, "grad_norm": 32.96875, "learning_rate": 9.906865535231765e-06, "loss": 19.8313, "step": 32120 }, { "epoch": 0.5962463204336843, "grad_norm": 35.34375, "learning_rate": 9.906836539445723e-06, "loss": 20.6778, "step": 32130 }, { "epoch": 0.5964318935181642, "grad_norm": 33.6875, "learning_rate": 9.906807543659682e-06, "loss": 20.0226, "step": 32140 }, { "epoch": 0.5966174666026439, "grad_norm": 36.03125, "learning_rate": 9.906778547873637e-06, "loss": 19.692, "step": 32150 }, { "epoch": 0.5968030396871238, "grad_norm": 34.375, "learning_rate": 9.906749552087595e-06, "loss": 20.0812, "step": 32160 }, { "epoch": 0.5969886127716036, "grad_norm": 34.5625, "learning_rate": 9.906720556301554e-06, "loss": 20.4617, "step": 32170 }, { "epoch": 0.5971741858560834, "grad_norm": 34.625, "learning_rate": 9.906691560515511e-06, "loss": 20.2078, "step": 32180 }, { "epoch": 0.5973597589405633, "grad_norm": 35.40625, "learning_rate": 9.906662564729469e-06, "loss": 19.5445, "step": 32190 }, { "epoch": 0.5975453320250431, "grad_norm": 35.3125, "learning_rate": 9.906633568943426e-06, "loss": 20.0572, "step": 32200 }, { "epoch": 0.5977309051095229, "grad_norm": 35.46875, "learning_rate": 9.906604573157383e-06, "loss": 20.1442, "step": 32210 }, { "epoch": 0.5979164781940027, "grad_norm": 35.75, "learning_rate": 9.906575577371341e-06, "loss": 20.0047, "step": 32220 }, { "epoch": 0.5981020512784826, "grad_norm": 36.40625, "learning_rate": 9.906546581585298e-06, "loss": 20.1083, "step": 32230 }, { "epoch": 0.5982876243629623, "grad_norm": 34.9375, "learning_rate": 9.906517585799257e-06, "loss": 20.0461, "step": 32240 }, { "epoch": 0.5984731974474422, "grad_norm": 36.0, "learning_rate": 9.906488590013215e-06, "loss": 20.0892, "step": 32250 }, { "epoch": 0.5986587705319221, "grad_norm": 34.875, "learning_rate": 9.90645959422717e-06, "loss": 19.8061, "step": 32260 }, { "epoch": 0.5988443436164019, "grad_norm": 36.0, "learning_rate": 9.90643059844113e-06, "loss": 20.1581, "step": 32270 }, { "epoch": 0.5990299167008817, "grad_norm": 34.90625, "learning_rate": 9.906401602655087e-06, "loss": 19.8351, "step": 32280 }, { "epoch": 0.5992154897853615, "grad_norm": 37.03125, "learning_rate": 9.906372606869044e-06, "loss": 20.1744, "step": 32290 }, { "epoch": 0.5994010628698414, "grad_norm": 34.75, "learning_rate": 9.906343611083002e-06, "loss": 19.8549, "step": 32300 }, { "epoch": 0.5995866359543212, "grad_norm": 35.53125, "learning_rate": 9.906314615296961e-06, "loss": 19.901, "step": 32310 }, { "epoch": 0.599772209038801, "grad_norm": 35.59375, "learning_rate": 9.906285619510917e-06, "loss": 20.4425, "step": 32320 }, { "epoch": 0.5999577821232809, "grad_norm": 37.6875, "learning_rate": 9.906256623724874e-06, "loss": 20.1879, "step": 32330 }, { "epoch": 0.6001433552077606, "grad_norm": 35.25, "learning_rate": 9.906227627938833e-06, "loss": 19.9083, "step": 32340 }, { "epoch": 0.6003289282922405, "grad_norm": 35.5625, "learning_rate": 9.90619863215279e-06, "loss": 19.769, "step": 32350 }, { "epoch": 0.6005145013767204, "grad_norm": 36.09375, "learning_rate": 9.906169636366748e-06, "loss": 19.8395, "step": 32360 }, { "epoch": 0.6007000744612001, "grad_norm": 36.34375, "learning_rate": 9.906140640580705e-06, "loss": 20.1598, "step": 32370 }, { "epoch": 0.60088564754568, "grad_norm": 36.8125, "learning_rate": 9.906111644794663e-06, "loss": 19.9506, "step": 32380 }, { "epoch": 0.6010712206301598, "grad_norm": 35.1875, "learning_rate": 9.90608264900862e-06, "loss": 20.0782, "step": 32390 }, { "epoch": 0.6012567937146396, "grad_norm": 35.09375, "learning_rate": 9.906053653222578e-06, "loss": 19.96, "step": 32400 }, { "epoch": 0.6014423667991194, "grad_norm": 35.03125, "learning_rate": 9.906024657436537e-06, "loss": 20.2103, "step": 32410 }, { "epoch": 0.6016279398835993, "grad_norm": 33.46875, "learning_rate": 9.905995661650492e-06, "loss": 20.0204, "step": 32420 }, { "epoch": 0.6018135129680791, "grad_norm": 35.6875, "learning_rate": 9.90596666586445e-06, "loss": 19.9806, "step": 32430 }, { "epoch": 0.6019990860525589, "grad_norm": 35.25, "learning_rate": 9.905937670078409e-06, "loss": 19.869, "step": 32440 }, { "epoch": 0.6021846591370388, "grad_norm": 36.84375, "learning_rate": 9.905908674292366e-06, "loss": 20.2251, "step": 32450 }, { "epoch": 0.6023702322215186, "grad_norm": 34.25, "learning_rate": 9.905879678506324e-06, "loss": 20.3163, "step": 32460 }, { "epoch": 0.6025558053059984, "grad_norm": 34.25, "learning_rate": 9.905850682720281e-06, "loss": 19.7672, "step": 32470 }, { "epoch": 0.6027413783904783, "grad_norm": 34.8125, "learning_rate": 9.905821686934239e-06, "loss": 20.1227, "step": 32480 }, { "epoch": 0.6029269514749581, "grad_norm": 35.90625, "learning_rate": 9.905792691148196e-06, "loss": 20.2727, "step": 32490 }, { "epoch": 0.6031125245594379, "grad_norm": 35.5625, "learning_rate": 9.905763695362153e-06, "loss": 19.9203, "step": 32500 }, { "epoch": 0.6032980976439177, "grad_norm": 32.9375, "learning_rate": 9.905734699576112e-06, "loss": 19.4214, "step": 32510 }, { "epoch": 0.6034836707283976, "grad_norm": 35.5625, "learning_rate": 9.90570570379007e-06, "loss": 19.8526, "step": 32520 }, { "epoch": 0.6036692438128773, "grad_norm": 35.28125, "learning_rate": 9.905676708004026e-06, "loss": 19.5245, "step": 32530 }, { "epoch": 0.6038548168973572, "grad_norm": 38.78125, "learning_rate": 9.905647712217985e-06, "loss": 20.1409, "step": 32540 }, { "epoch": 0.6040403899818371, "grad_norm": 34.6875, "learning_rate": 9.905618716431942e-06, "loss": 19.7652, "step": 32550 }, { "epoch": 0.6042259630663168, "grad_norm": 34.46875, "learning_rate": 9.9055897206459e-06, "loss": 20.3414, "step": 32560 }, { "epoch": 0.6044115361507967, "grad_norm": 36.09375, "learning_rate": 9.905560724859857e-06, "loss": 20.0828, "step": 32570 }, { "epoch": 0.6045971092352765, "grad_norm": 35.0, "learning_rate": 9.905531729073814e-06, "loss": 20.2113, "step": 32580 }, { "epoch": 0.6047826823197563, "grad_norm": 35.03125, "learning_rate": 9.905502733287772e-06, "loss": 19.9929, "step": 32590 }, { "epoch": 0.6049682554042362, "grad_norm": 33.90625, "learning_rate": 9.90547373750173e-06, "loss": 19.9798, "step": 32600 }, { "epoch": 0.605153828488716, "grad_norm": 35.0625, "learning_rate": 9.905444741715687e-06, "loss": 20.134, "step": 32610 }, { "epoch": 0.6053394015731959, "grad_norm": 35.21875, "learning_rate": 9.905415745929646e-06, "loss": 20.5119, "step": 32620 }, { "epoch": 0.6055249746576756, "grad_norm": 36.15625, "learning_rate": 9.905386750143603e-06, "loss": 20.1409, "step": 32630 }, { "epoch": 0.6057105477421555, "grad_norm": 34.75, "learning_rate": 9.90535775435756e-06, "loss": 19.9287, "step": 32640 }, { "epoch": 0.6058961208266354, "grad_norm": 34.625, "learning_rate": 9.905328758571518e-06, "loss": 20.036, "step": 32650 }, { "epoch": 0.6060816939111151, "grad_norm": 34.53125, "learning_rate": 9.905299762785475e-06, "loss": 19.8514, "step": 32660 }, { "epoch": 0.606267266995595, "grad_norm": 34.78125, "learning_rate": 9.905270766999433e-06, "loss": 19.7428, "step": 32670 }, { "epoch": 0.6064528400800748, "grad_norm": 32.53125, "learning_rate": 9.90524177121339e-06, "loss": 19.5254, "step": 32680 }, { "epoch": 0.6066384131645546, "grad_norm": 35.9375, "learning_rate": 9.905212775427348e-06, "loss": 19.9658, "step": 32690 }, { "epoch": 0.6068239862490344, "grad_norm": 36.375, "learning_rate": 9.905183779641305e-06, "loss": 19.4217, "step": 32700 }, { "epoch": 0.6070095593335143, "grad_norm": 35.3125, "learning_rate": 9.905154783855262e-06, "loss": 19.6262, "step": 32710 }, { "epoch": 0.607195132417994, "grad_norm": 35.5625, "learning_rate": 9.905125788069221e-06, "loss": 19.8789, "step": 32720 }, { "epoch": 0.6073807055024739, "grad_norm": 36.3125, "learning_rate": 9.905096792283179e-06, "loss": 19.6746, "step": 32730 }, { "epoch": 0.6075662785869538, "grad_norm": 37.15625, "learning_rate": 9.905067796497135e-06, "loss": 19.791, "step": 32740 }, { "epoch": 0.6077518516714335, "grad_norm": 33.1875, "learning_rate": 9.905038800711094e-06, "loss": 19.8378, "step": 32750 }, { "epoch": 0.6079374247559134, "grad_norm": 36.8125, "learning_rate": 9.905009804925051e-06, "loss": 20.3618, "step": 32760 }, { "epoch": 0.6081229978403933, "grad_norm": 36.65625, "learning_rate": 9.904980809139008e-06, "loss": 19.9257, "step": 32770 }, { "epoch": 0.608308570924873, "grad_norm": 35.03125, "learning_rate": 9.904951813352966e-06, "loss": 20.2374, "step": 32780 }, { "epoch": 0.6084941440093529, "grad_norm": 34.0, "learning_rate": 9.904922817566925e-06, "loss": 19.7495, "step": 32790 }, { "epoch": 0.6086797170938327, "grad_norm": 34.75, "learning_rate": 9.90489382178088e-06, "loss": 19.7513, "step": 32800 }, { "epoch": 0.6088652901783126, "grad_norm": 35.5, "learning_rate": 9.904864825994838e-06, "loss": 20.0577, "step": 32810 }, { "epoch": 0.6090508632627923, "grad_norm": 33.53125, "learning_rate": 9.904835830208797e-06, "loss": 19.5465, "step": 32820 }, { "epoch": 0.6092364363472722, "grad_norm": 34.78125, "learning_rate": 9.904806834422755e-06, "loss": 20.4513, "step": 32830 }, { "epoch": 0.6094220094317521, "grad_norm": 35.5, "learning_rate": 9.904777838636712e-06, "loss": 20.2129, "step": 32840 }, { "epoch": 0.6096075825162318, "grad_norm": 35.03125, "learning_rate": 9.90474884285067e-06, "loss": 19.8204, "step": 32850 }, { "epoch": 0.6097931556007117, "grad_norm": 34.75, "learning_rate": 9.904719847064627e-06, "loss": 19.8698, "step": 32860 }, { "epoch": 0.6099787286851915, "grad_norm": 36.0, "learning_rate": 9.904690851278584e-06, "loss": 20.123, "step": 32870 }, { "epoch": 0.6101643017696713, "grad_norm": 35.375, "learning_rate": 9.904661855492542e-06, "loss": 20.2864, "step": 32880 }, { "epoch": 0.6103498748541512, "grad_norm": 36.6875, "learning_rate": 9.9046328597065e-06, "loss": 20.3273, "step": 32890 }, { "epoch": 0.610535447938631, "grad_norm": 34.78125, "learning_rate": 9.904603863920456e-06, "loss": 20.335, "step": 32900 }, { "epoch": 0.6107210210231108, "grad_norm": 33.125, "learning_rate": 9.904574868134414e-06, "loss": 19.5401, "step": 32910 }, { "epoch": 0.6109065941075906, "grad_norm": 35.25, "learning_rate": 9.904545872348373e-06, "loss": 20.044, "step": 32920 }, { "epoch": 0.6110921671920705, "grad_norm": 35.875, "learning_rate": 9.90451687656233e-06, "loss": 19.6458, "step": 32930 }, { "epoch": 0.6112777402765502, "grad_norm": 34.6875, "learning_rate": 9.904487880776288e-06, "loss": 19.9171, "step": 32940 }, { "epoch": 0.6114633133610301, "grad_norm": 35.4375, "learning_rate": 9.904458884990245e-06, "loss": 19.9779, "step": 32950 }, { "epoch": 0.61164888644551, "grad_norm": 33.09375, "learning_rate": 9.904429889204203e-06, "loss": 19.7718, "step": 32960 }, { "epoch": 0.6118344595299897, "grad_norm": 36.40625, "learning_rate": 9.90440089341816e-06, "loss": 19.9427, "step": 32970 }, { "epoch": 0.6120200326144696, "grad_norm": 33.1875, "learning_rate": 9.904371897632117e-06, "loss": 19.7202, "step": 32980 }, { "epoch": 0.6122056056989494, "grad_norm": 36.0625, "learning_rate": 9.904342901846077e-06, "loss": 19.4229, "step": 32990 }, { "epoch": 0.6123911787834293, "grad_norm": 34.375, "learning_rate": 9.904313906060034e-06, "loss": 20.1885, "step": 33000 }, { "epoch": 0.612576751867909, "grad_norm": 38.125, "learning_rate": 9.90428491027399e-06, "loss": 20.1329, "step": 33010 }, { "epoch": 0.6127623249523889, "grad_norm": 35.0625, "learning_rate": 9.904255914487949e-06, "loss": 19.4893, "step": 33020 }, { "epoch": 0.6129478980368688, "grad_norm": 35.375, "learning_rate": 9.904226918701906e-06, "loss": 19.8205, "step": 33030 }, { "epoch": 0.6131334711213485, "grad_norm": 33.34375, "learning_rate": 9.904197922915864e-06, "loss": 20.1173, "step": 33040 }, { "epoch": 0.6133190442058284, "grad_norm": 34.8125, "learning_rate": 9.904168927129821e-06, "loss": 20.4497, "step": 33050 }, { "epoch": 0.6135046172903083, "grad_norm": 34.65625, "learning_rate": 9.904139931343778e-06, "loss": 20.0398, "step": 33060 }, { "epoch": 0.613690190374788, "grad_norm": 34.46875, "learning_rate": 9.904110935557736e-06, "loss": 20.0827, "step": 33070 }, { "epoch": 0.6138757634592679, "grad_norm": 33.0, "learning_rate": 9.904081939771693e-06, "loss": 19.8621, "step": 33080 }, { "epoch": 0.6140613365437477, "grad_norm": 35.34375, "learning_rate": 9.904052943985652e-06, "loss": 20.097, "step": 33090 }, { "epoch": 0.6142469096282275, "grad_norm": 34.96875, "learning_rate": 9.90402394819961e-06, "loss": 19.9632, "step": 33100 }, { "epoch": 0.6144324827127073, "grad_norm": 36.40625, "learning_rate": 9.903994952413567e-06, "loss": 19.9311, "step": 33110 }, { "epoch": 0.6146180557971872, "grad_norm": 35.40625, "learning_rate": 9.903965956627524e-06, "loss": 19.8179, "step": 33120 }, { "epoch": 0.614803628881667, "grad_norm": 35.15625, "learning_rate": 9.903936960841482e-06, "loss": 19.5918, "step": 33130 }, { "epoch": 0.6149892019661468, "grad_norm": 32.78125, "learning_rate": 9.90390796505544e-06, "loss": 19.6736, "step": 33140 }, { "epoch": 0.6151747750506267, "grad_norm": 34.0, "learning_rate": 9.903878969269397e-06, "loss": 19.9292, "step": 33150 }, { "epoch": 0.6153603481351065, "grad_norm": 33.84375, "learning_rate": 9.903849973483354e-06, "loss": 20.5943, "step": 33160 }, { "epoch": 0.6155459212195863, "grad_norm": 35.96875, "learning_rate": 9.903820977697312e-06, "loss": 20.0126, "step": 33170 }, { "epoch": 0.6157314943040662, "grad_norm": 35.09375, "learning_rate": 9.903791981911269e-06, "loss": 20.3576, "step": 33180 }, { "epoch": 0.615917067388546, "grad_norm": 34.9375, "learning_rate": 9.903762986125226e-06, "loss": 19.9106, "step": 33190 }, { "epoch": 0.6161026404730258, "grad_norm": 33.25, "learning_rate": 9.903733990339185e-06, "loss": 19.8425, "step": 33200 }, { "epoch": 0.6162882135575056, "grad_norm": 35.78125, "learning_rate": 9.903704994553143e-06, "loss": 19.8126, "step": 33210 }, { "epoch": 0.6164737866419855, "grad_norm": 34.0, "learning_rate": 9.9036759987671e-06, "loss": 19.5984, "step": 33220 }, { "epoch": 0.6166593597264652, "grad_norm": 34.5625, "learning_rate": 9.903647002981058e-06, "loss": 20.4081, "step": 33230 }, { "epoch": 0.6168449328109451, "grad_norm": 34.21875, "learning_rate": 9.903618007195015e-06, "loss": 20.1874, "step": 33240 }, { "epoch": 0.617030505895425, "grad_norm": 34.59375, "learning_rate": 9.903589011408972e-06, "loss": 19.9073, "step": 33250 }, { "epoch": 0.6172160789799047, "grad_norm": 37.0625, "learning_rate": 9.90356001562293e-06, "loss": 19.5716, "step": 33260 }, { "epoch": 0.6174016520643846, "grad_norm": 33.8125, "learning_rate": 9.903531019836889e-06, "loss": 19.334, "step": 33270 }, { "epoch": 0.6175872251488644, "grad_norm": 33.125, "learning_rate": 9.903502024050845e-06, "loss": 20.0157, "step": 33280 }, { "epoch": 0.6177727982333442, "grad_norm": 33.28125, "learning_rate": 9.903473028264802e-06, "loss": 19.8102, "step": 33290 }, { "epoch": 0.617958371317824, "grad_norm": 33.65625, "learning_rate": 9.903444032478761e-06, "loss": 19.8039, "step": 33300 }, { "epoch": 0.6181439444023039, "grad_norm": 34.78125, "learning_rate": 9.903415036692719e-06, "loss": 19.945, "step": 33310 }, { "epoch": 0.6183295174867837, "grad_norm": 35.21875, "learning_rate": 9.903386040906676e-06, "loss": 20.1298, "step": 33320 }, { "epoch": 0.6185150905712635, "grad_norm": 35.65625, "learning_rate": 9.903357045120633e-06, "loss": 20.0132, "step": 33330 }, { "epoch": 0.6187006636557434, "grad_norm": 36.4375, "learning_rate": 9.90332804933459e-06, "loss": 19.7294, "step": 33340 }, { "epoch": 0.6188862367402233, "grad_norm": 34.59375, "learning_rate": 9.903299053548548e-06, "loss": 20.4257, "step": 33350 }, { "epoch": 0.619071809824703, "grad_norm": 36.46875, "learning_rate": 9.903270057762506e-06, "loss": 19.9613, "step": 33360 }, { "epoch": 0.6192573829091829, "grad_norm": 33.65625, "learning_rate": 9.903241061976465e-06, "loss": 19.5663, "step": 33370 }, { "epoch": 0.6194429559936627, "grad_norm": 36.78125, "learning_rate": 9.903212066190422e-06, "loss": 19.7608, "step": 33380 }, { "epoch": 0.6196285290781425, "grad_norm": 35.375, "learning_rate": 9.903183070404378e-06, "loss": 19.912, "step": 33390 }, { "epoch": 0.6198141021626223, "grad_norm": 34.34375, "learning_rate": 9.903154074618337e-06, "loss": 19.6677, "step": 33400 }, { "epoch": 0.6199996752471022, "grad_norm": 35.46875, "learning_rate": 9.903125078832294e-06, "loss": 19.9394, "step": 33410 }, { "epoch": 0.620185248331582, "grad_norm": 36.4375, "learning_rate": 9.903096083046252e-06, "loss": 19.5774, "step": 33420 }, { "epoch": 0.6203708214160618, "grad_norm": 36.28125, "learning_rate": 9.90306708726021e-06, "loss": 19.8794, "step": 33430 }, { "epoch": 0.6205563945005417, "grad_norm": 37.65625, "learning_rate": 9.903038091474167e-06, "loss": 20.2317, "step": 33440 }, { "epoch": 0.6207419675850214, "grad_norm": 35.75, "learning_rate": 9.903009095688124e-06, "loss": 20.0775, "step": 33450 }, { "epoch": 0.6209275406695013, "grad_norm": 35.0, "learning_rate": 9.902980099902081e-06, "loss": 20.0078, "step": 33460 }, { "epoch": 0.6211131137539811, "grad_norm": 35.71875, "learning_rate": 9.90295110411604e-06, "loss": 19.8658, "step": 33470 }, { "epoch": 0.6212986868384609, "grad_norm": 34.71875, "learning_rate": 9.902922108329998e-06, "loss": 19.6372, "step": 33480 }, { "epoch": 0.6214842599229408, "grad_norm": 36.0625, "learning_rate": 9.902893112543954e-06, "loss": 19.5675, "step": 33490 }, { "epoch": 0.6216698330074206, "grad_norm": 35.25, "learning_rate": 9.902864116757913e-06, "loss": 19.9335, "step": 33500 }, { "epoch": 0.6218554060919004, "grad_norm": 34.28125, "learning_rate": 9.90283512097187e-06, "loss": 19.6987, "step": 33510 }, { "epoch": 0.6220409791763802, "grad_norm": 32.0625, "learning_rate": 9.902806125185828e-06, "loss": 19.9386, "step": 33520 }, { "epoch": 0.6222265522608601, "grad_norm": 36.5625, "learning_rate": 9.902777129399785e-06, "loss": 19.802, "step": 33530 }, { "epoch": 0.62241212534534, "grad_norm": 34.71875, "learning_rate": 9.902748133613742e-06, "loss": 19.8391, "step": 33540 }, { "epoch": 0.6225976984298197, "grad_norm": 35.875, "learning_rate": 9.9027191378277e-06, "loss": 19.5903, "step": 33550 }, { "epoch": 0.6227832715142996, "grad_norm": 34.375, "learning_rate": 9.902690142041657e-06, "loss": 19.9132, "step": 33560 }, { "epoch": 0.6229688445987794, "grad_norm": 36.96875, "learning_rate": 9.902661146255616e-06, "loss": 19.7901, "step": 33570 }, { "epoch": 0.6231544176832592, "grad_norm": 33.25, "learning_rate": 9.902632150469574e-06, "loss": 19.8296, "step": 33580 }, { "epoch": 0.623339990767739, "grad_norm": 36.0625, "learning_rate": 9.902603154683531e-06, "loss": 19.664, "step": 33590 }, { "epoch": 0.6235255638522189, "grad_norm": 35.84375, "learning_rate": 9.902574158897489e-06, "loss": 19.6831, "step": 33600 }, { "epoch": 0.6237111369366987, "grad_norm": 35.3125, "learning_rate": 9.902545163111446e-06, "loss": 19.5117, "step": 33610 }, { "epoch": 0.6238967100211785, "grad_norm": 36.40625, "learning_rate": 9.902516167325403e-06, "loss": 19.7426, "step": 33620 }, { "epoch": 0.6240822831056584, "grad_norm": 34.96875, "learning_rate": 9.90248717153936e-06, "loss": 19.4962, "step": 33630 }, { "epoch": 0.6242678561901381, "grad_norm": 35.28125, "learning_rate": 9.902458175753318e-06, "loss": 19.9088, "step": 33640 }, { "epoch": 0.624453429274618, "grad_norm": 34.15625, "learning_rate": 9.902429179967277e-06, "loss": 19.5959, "step": 33650 }, { "epoch": 0.6246390023590979, "grad_norm": 35.40625, "learning_rate": 9.902400184181233e-06, "loss": 19.8292, "step": 33660 }, { "epoch": 0.6248245754435776, "grad_norm": 35.46875, "learning_rate": 9.90237118839519e-06, "loss": 19.9731, "step": 33670 }, { "epoch": 0.6250101485280575, "grad_norm": 33.78125, "learning_rate": 9.90234219260915e-06, "loss": 19.4511, "step": 33680 }, { "epoch": 0.6251957216125373, "grad_norm": 35.1875, "learning_rate": 9.902313196823107e-06, "loss": 19.9273, "step": 33690 }, { "epoch": 0.6253812946970172, "grad_norm": 34.34375, "learning_rate": 9.902284201037064e-06, "loss": 20.1118, "step": 33700 }, { "epoch": 0.625566867781497, "grad_norm": 33.1875, "learning_rate": 9.902255205251022e-06, "loss": 19.8202, "step": 33710 }, { "epoch": 0.6257524408659768, "grad_norm": 37.34375, "learning_rate": 9.902226209464979e-06, "loss": 20.2005, "step": 33720 }, { "epoch": 0.6259380139504567, "grad_norm": 34.4375, "learning_rate": 9.902197213678936e-06, "loss": 20.3347, "step": 33730 }, { "epoch": 0.6261235870349364, "grad_norm": 35.78125, "learning_rate": 9.902168217892894e-06, "loss": 20.3401, "step": 33740 }, { "epoch": 0.6263091601194163, "grad_norm": 36.0625, "learning_rate": 9.902139222106853e-06, "loss": 19.5948, "step": 33750 }, { "epoch": 0.6264947332038961, "grad_norm": 34.75, "learning_rate": 9.902110226320809e-06, "loss": 20.1637, "step": 33760 }, { "epoch": 0.6266803062883759, "grad_norm": 34.0, "learning_rate": 9.902081230534766e-06, "loss": 19.9732, "step": 33770 }, { "epoch": 0.6268658793728558, "grad_norm": 33.65625, "learning_rate": 9.902052234748725e-06, "loss": 19.7471, "step": 33780 }, { "epoch": 0.6270514524573356, "grad_norm": 33.875, "learning_rate": 9.902023238962683e-06, "loss": 19.665, "step": 33790 }, { "epoch": 0.6272370255418154, "grad_norm": 34.5, "learning_rate": 9.90199424317664e-06, "loss": 19.9919, "step": 33800 }, { "epoch": 0.6274225986262952, "grad_norm": 34.34375, "learning_rate": 9.901965247390597e-06, "loss": 19.9491, "step": 33810 }, { "epoch": 0.6276081717107751, "grad_norm": 35.0, "learning_rate": 9.901936251604555e-06, "loss": 19.8758, "step": 33820 }, { "epoch": 0.6277937447952548, "grad_norm": 37.84375, "learning_rate": 9.901907255818512e-06, "loss": 19.9295, "step": 33830 }, { "epoch": 0.6279793178797347, "grad_norm": 35.28125, "learning_rate": 9.90187826003247e-06, "loss": 19.8433, "step": 33840 }, { "epoch": 0.6281648909642146, "grad_norm": 36.28125, "learning_rate": 9.901849264246429e-06, "loss": 19.9194, "step": 33850 }, { "epoch": 0.6283504640486943, "grad_norm": 35.78125, "learning_rate": 9.901820268460386e-06, "loss": 19.8389, "step": 33860 }, { "epoch": 0.6285360371331742, "grad_norm": 35.3125, "learning_rate": 9.901791272674342e-06, "loss": 19.8355, "step": 33870 }, { "epoch": 0.628721610217654, "grad_norm": 36.125, "learning_rate": 9.901762276888301e-06, "loss": 19.8938, "step": 33880 }, { "epoch": 0.6289071833021339, "grad_norm": 36.65625, "learning_rate": 9.901733281102258e-06, "loss": 20.1254, "step": 33890 }, { "epoch": 0.6290927563866137, "grad_norm": 35.0625, "learning_rate": 9.901704285316216e-06, "loss": 19.8177, "step": 33900 }, { "epoch": 0.6292783294710935, "grad_norm": 35.84375, "learning_rate": 9.901675289530173e-06, "loss": 19.5455, "step": 33910 }, { "epoch": 0.6294639025555734, "grad_norm": 35.65625, "learning_rate": 9.90164629374413e-06, "loss": 19.5187, "step": 33920 }, { "epoch": 0.6296494756400531, "grad_norm": 36.65625, "learning_rate": 9.901617297958088e-06, "loss": 19.8171, "step": 33930 }, { "epoch": 0.629835048724533, "grad_norm": 35.875, "learning_rate": 9.901588302172045e-06, "loss": 20.0522, "step": 33940 }, { "epoch": 0.6300206218090129, "grad_norm": 34.375, "learning_rate": 9.901559306386005e-06, "loss": 19.8402, "step": 33950 }, { "epoch": 0.6302061948934926, "grad_norm": 34.84375, "learning_rate": 9.901530310599962e-06, "loss": 19.9172, "step": 33960 }, { "epoch": 0.6303917679779725, "grad_norm": 36.8125, "learning_rate": 9.90150131481392e-06, "loss": 20.0115, "step": 33970 }, { "epoch": 0.6305773410624523, "grad_norm": 34.9375, "learning_rate": 9.901472319027877e-06, "loss": 19.6994, "step": 33980 }, { "epoch": 0.6307629141469321, "grad_norm": 34.90625, "learning_rate": 9.901443323241834e-06, "loss": 19.816, "step": 33990 }, { "epoch": 0.630948487231412, "grad_norm": 34.8125, "learning_rate": 9.901414327455792e-06, "loss": 19.6213, "step": 34000 }, { "epoch": 0.6311340603158918, "grad_norm": 34.9375, "learning_rate": 9.901385331669749e-06, "loss": 20.15, "step": 34010 }, { "epoch": 0.6313196334003716, "grad_norm": 35.03125, "learning_rate": 9.901356335883708e-06, "loss": 19.3955, "step": 34020 }, { "epoch": 0.6315052064848514, "grad_norm": 36.3125, "learning_rate": 9.901327340097664e-06, "loss": 19.6105, "step": 34030 }, { "epoch": 0.6316907795693313, "grad_norm": 34.875, "learning_rate": 9.901298344311621e-06, "loss": 19.9898, "step": 34040 }, { "epoch": 0.6318763526538111, "grad_norm": 35.75, "learning_rate": 9.90126934852558e-06, "loss": 19.8148, "step": 34050 }, { "epoch": 0.6320619257382909, "grad_norm": 36.5, "learning_rate": 9.901240352739538e-06, "loss": 19.6133, "step": 34060 }, { "epoch": 0.6322474988227708, "grad_norm": 31.59375, "learning_rate": 9.901211356953495e-06, "loss": 20.1236, "step": 34070 }, { "epoch": 0.6324330719072506, "grad_norm": 35.15625, "learning_rate": 9.901182361167453e-06, "loss": 20.4988, "step": 34080 }, { "epoch": 0.6326186449917304, "grad_norm": 35.625, "learning_rate": 9.90115336538141e-06, "loss": 19.7418, "step": 34090 }, { "epoch": 0.6328042180762102, "grad_norm": 34.8125, "learning_rate": 9.901124369595367e-06, "loss": 19.2006, "step": 34100 }, { "epoch": 0.6329897911606901, "grad_norm": 35.40625, "learning_rate": 9.901095373809325e-06, "loss": 19.9027, "step": 34110 }, { "epoch": 0.6331753642451698, "grad_norm": 34.71875, "learning_rate": 9.901066378023282e-06, "loss": 19.8453, "step": 34120 }, { "epoch": 0.6333609373296497, "grad_norm": 36.40625, "learning_rate": 9.901037382237241e-06, "loss": 19.7594, "step": 34130 }, { "epoch": 0.6335465104141296, "grad_norm": 35.15625, "learning_rate": 9.901008386451197e-06, "loss": 19.8507, "step": 34140 }, { "epoch": 0.6337320834986093, "grad_norm": 33.40625, "learning_rate": 9.900979390665156e-06, "loss": 19.9106, "step": 34150 }, { "epoch": 0.6339176565830892, "grad_norm": 35.6875, "learning_rate": 9.900950394879113e-06, "loss": 19.5232, "step": 34160 }, { "epoch": 0.634103229667569, "grad_norm": 35.9375, "learning_rate": 9.900921399093071e-06, "loss": 20.0958, "step": 34170 }, { "epoch": 0.6342888027520488, "grad_norm": 35.53125, "learning_rate": 9.900892403307028e-06, "loss": 19.7891, "step": 34180 }, { "epoch": 0.6344743758365287, "grad_norm": 35.5, "learning_rate": 9.900863407520986e-06, "loss": 19.612, "step": 34190 }, { "epoch": 0.6346599489210085, "grad_norm": 36.3125, "learning_rate": 9.900834411734943e-06, "loss": 19.3468, "step": 34200 }, { "epoch": 0.6348455220054883, "grad_norm": 34.71875, "learning_rate": 9.9008054159489e-06, "loss": 19.8482, "step": 34210 }, { "epoch": 0.6350310950899681, "grad_norm": 36.78125, "learning_rate": 9.900776420162858e-06, "loss": 19.6807, "step": 34220 }, { "epoch": 0.635216668174448, "grad_norm": 37.4375, "learning_rate": 9.900747424376817e-06, "loss": 19.9276, "step": 34230 }, { "epoch": 0.6354022412589279, "grad_norm": 34.21875, "learning_rate": 9.900718428590774e-06, "loss": 19.8411, "step": 34240 }, { "epoch": 0.6355878143434076, "grad_norm": 36.1875, "learning_rate": 9.90068943280473e-06, "loss": 19.7345, "step": 34250 }, { "epoch": 0.6357733874278875, "grad_norm": 32.90625, "learning_rate": 9.90066043701869e-06, "loss": 19.9992, "step": 34260 }, { "epoch": 0.6359589605123673, "grad_norm": 35.25, "learning_rate": 9.900631441232647e-06, "loss": 19.8676, "step": 34270 }, { "epoch": 0.6361445335968471, "grad_norm": 37.65625, "learning_rate": 9.900602445446604e-06, "loss": 20.3433, "step": 34280 }, { "epoch": 0.636330106681327, "grad_norm": 34.875, "learning_rate": 9.900573449660561e-06, "loss": 19.8522, "step": 34290 }, { "epoch": 0.6365156797658068, "grad_norm": 35.46875, "learning_rate": 9.900544453874519e-06, "loss": 20.1288, "step": 34300 }, { "epoch": 0.6367012528502866, "grad_norm": 34.5625, "learning_rate": 9.900515458088476e-06, "loss": 19.6961, "step": 34310 }, { "epoch": 0.6368868259347664, "grad_norm": 34.6875, "learning_rate": 9.900486462302434e-06, "loss": 19.2526, "step": 34320 }, { "epoch": 0.6370723990192463, "grad_norm": 35.0, "learning_rate": 9.900457466516393e-06, "loss": 19.4355, "step": 34330 }, { "epoch": 0.637257972103726, "grad_norm": 34.75, "learning_rate": 9.90042847073035e-06, "loss": 19.768, "step": 34340 }, { "epoch": 0.6374435451882059, "grad_norm": 36.21875, "learning_rate": 9.900399474944306e-06, "loss": 19.6199, "step": 34350 }, { "epoch": 0.6376291182726858, "grad_norm": 33.34375, "learning_rate": 9.900370479158265e-06, "loss": 19.721, "step": 34360 }, { "epoch": 0.6378146913571655, "grad_norm": 35.0625, "learning_rate": 9.900341483372222e-06, "loss": 19.5648, "step": 34370 }, { "epoch": 0.6380002644416454, "grad_norm": 36.5, "learning_rate": 9.90031248758618e-06, "loss": 19.8864, "step": 34380 }, { "epoch": 0.6381858375261252, "grad_norm": 36.53125, "learning_rate": 9.900283491800137e-06, "loss": 19.8828, "step": 34390 }, { "epoch": 0.638371410610605, "grad_norm": 33.9375, "learning_rate": 9.900254496014096e-06, "loss": 19.6419, "step": 34400 }, { "epoch": 0.6385569836950848, "grad_norm": 36.625, "learning_rate": 9.900225500228052e-06, "loss": 19.6846, "step": 34410 }, { "epoch": 0.6387425567795647, "grad_norm": 34.78125, "learning_rate": 9.90019650444201e-06, "loss": 20.1099, "step": 34420 }, { "epoch": 0.6389281298640446, "grad_norm": 37.34375, "learning_rate": 9.900167508655969e-06, "loss": 19.9376, "step": 34430 }, { "epoch": 0.6391137029485243, "grad_norm": 36.65625, "learning_rate": 9.900138512869926e-06, "loss": 19.7929, "step": 34440 }, { "epoch": 0.6392992760330042, "grad_norm": 35.5, "learning_rate": 9.900109517083883e-06, "loss": 20.0175, "step": 34450 }, { "epoch": 0.639484849117484, "grad_norm": 34.65625, "learning_rate": 9.90008052129784e-06, "loss": 19.6058, "step": 34460 }, { "epoch": 0.6396704222019638, "grad_norm": 35.375, "learning_rate": 9.900051525511798e-06, "loss": 19.6707, "step": 34470 }, { "epoch": 0.6398559952864437, "grad_norm": 35.5625, "learning_rate": 9.900022529725756e-06, "loss": 20.0595, "step": 34480 }, { "epoch": 0.6400415683709235, "grad_norm": 34.875, "learning_rate": 9.899993533939713e-06, "loss": 20.3709, "step": 34490 }, { "epoch": 0.6402271414554033, "grad_norm": 34.53125, "learning_rate": 9.899964538153672e-06, "loss": 20.1387, "step": 34500 }, { "epoch": 0.6404127145398831, "grad_norm": 34.9375, "learning_rate": 9.899935542367628e-06, "loss": 19.8495, "step": 34510 }, { "epoch": 0.640598287624363, "grad_norm": 33.9375, "learning_rate": 9.899906546581585e-06, "loss": 19.439, "step": 34520 }, { "epoch": 0.6407838607088427, "grad_norm": 36.15625, "learning_rate": 9.899877550795544e-06, "loss": 19.9088, "step": 34530 }, { "epoch": 0.6409694337933226, "grad_norm": 36.03125, "learning_rate": 9.899848555009502e-06, "loss": 20.363, "step": 34540 }, { "epoch": 0.6411550068778025, "grad_norm": 35.84375, "learning_rate": 9.899819559223459e-06, "loss": 19.5979, "step": 34550 }, { "epoch": 0.6413405799622822, "grad_norm": 35.5, "learning_rate": 9.899790563437417e-06, "loss": 19.6288, "step": 34560 }, { "epoch": 0.6415261530467621, "grad_norm": 32.9375, "learning_rate": 9.899761567651374e-06, "loss": 19.7712, "step": 34570 }, { "epoch": 0.6417117261312419, "grad_norm": 34.78125, "learning_rate": 9.899732571865331e-06, "loss": 19.8826, "step": 34580 }, { "epoch": 0.6418972992157218, "grad_norm": 35.53125, "learning_rate": 9.899703576079289e-06, "loss": 19.6118, "step": 34590 }, { "epoch": 0.6420828723002016, "grad_norm": 35.28125, "learning_rate": 9.899674580293248e-06, "loss": 20.0948, "step": 34600 }, { "epoch": 0.6422684453846814, "grad_norm": 34.90625, "learning_rate": 9.899645584507205e-06, "loss": 19.9192, "step": 34610 }, { "epoch": 0.6424540184691613, "grad_norm": 35.21875, "learning_rate": 9.899616588721161e-06, "loss": 20.5888, "step": 34620 }, { "epoch": 0.642639591553641, "grad_norm": 34.59375, "learning_rate": 9.89958759293512e-06, "loss": 19.441, "step": 34630 }, { "epoch": 0.6428251646381209, "grad_norm": 35.21875, "learning_rate": 9.899558597149077e-06, "loss": 19.7351, "step": 34640 }, { "epoch": 0.6430107377226008, "grad_norm": 35.625, "learning_rate": 9.899529601363035e-06, "loss": 19.9079, "step": 34650 }, { "epoch": 0.6431963108070805, "grad_norm": 36.4375, "learning_rate": 9.899500605576992e-06, "loss": 19.9245, "step": 34660 }, { "epoch": 0.6433818838915604, "grad_norm": 35.75, "learning_rate": 9.89947160979095e-06, "loss": 19.9472, "step": 34670 }, { "epoch": 0.6435674569760402, "grad_norm": 35.09375, "learning_rate": 9.899442614004907e-06, "loss": 19.8009, "step": 34680 }, { "epoch": 0.64375303006052, "grad_norm": 35.15625, "learning_rate": 9.899413618218865e-06, "loss": 20.0998, "step": 34690 }, { "epoch": 0.6439386031449998, "grad_norm": 35.96875, "learning_rate": 9.899384622432822e-06, "loss": 19.661, "step": 34700 }, { "epoch": 0.6441241762294797, "grad_norm": 35.28125, "learning_rate": 9.899355626646781e-06, "loss": 20.1212, "step": 34710 }, { "epoch": 0.6443097493139595, "grad_norm": 36.4375, "learning_rate": 9.899326630860738e-06, "loss": 19.703, "step": 34720 }, { "epoch": 0.6444953223984393, "grad_norm": 36.09375, "learning_rate": 9.899297635074694e-06, "loss": 19.6669, "step": 34730 }, { "epoch": 0.6446808954829192, "grad_norm": 36.34375, "learning_rate": 9.899268639288653e-06, "loss": 20.0713, "step": 34740 }, { "epoch": 0.6448664685673989, "grad_norm": 34.8125, "learning_rate": 9.89923964350261e-06, "loss": 19.7988, "step": 34750 }, { "epoch": 0.6450520416518788, "grad_norm": 34.875, "learning_rate": 9.899210647716568e-06, "loss": 19.8849, "step": 34760 }, { "epoch": 0.6452376147363587, "grad_norm": 34.34375, "learning_rate": 9.899181651930525e-06, "loss": 19.7736, "step": 34770 }, { "epoch": 0.6454231878208385, "grad_norm": 36.3125, "learning_rate": 9.899152656144483e-06, "loss": 20.3017, "step": 34780 }, { "epoch": 0.6456087609053183, "grad_norm": 33.65625, "learning_rate": 9.89912366035844e-06, "loss": 20.0971, "step": 34790 }, { "epoch": 0.6457943339897981, "grad_norm": 36.03125, "learning_rate": 9.899094664572398e-06, "loss": 19.5588, "step": 34800 }, { "epoch": 0.645979907074278, "grad_norm": 33.84375, "learning_rate": 9.899065668786357e-06, "loss": 19.7038, "step": 34810 }, { "epoch": 0.6461654801587577, "grad_norm": 35.78125, "learning_rate": 9.899036673000314e-06, "loss": 20.0109, "step": 34820 }, { "epoch": 0.6463510532432376, "grad_norm": 35.03125, "learning_rate": 9.899007677214272e-06, "loss": 19.5806, "step": 34830 }, { "epoch": 0.6465366263277175, "grad_norm": 34.125, "learning_rate": 9.898978681428229e-06, "loss": 19.8743, "step": 34840 }, { "epoch": 0.6467221994121972, "grad_norm": 36.375, "learning_rate": 9.898949685642186e-06, "loss": 19.4995, "step": 34850 }, { "epoch": 0.6469077724966771, "grad_norm": 35.46875, "learning_rate": 9.898920689856144e-06, "loss": 19.5762, "step": 34860 }, { "epoch": 0.6470933455811569, "grad_norm": 35.375, "learning_rate": 9.898891694070101e-06, "loss": 19.7716, "step": 34870 }, { "epoch": 0.6472789186656367, "grad_norm": 34.5, "learning_rate": 9.89886269828406e-06, "loss": 19.8697, "step": 34880 }, { "epoch": 0.6474644917501166, "grad_norm": 36.03125, "learning_rate": 9.898833702498016e-06, "loss": 19.7747, "step": 34890 }, { "epoch": 0.6476500648345964, "grad_norm": 35.15625, "learning_rate": 9.898804706711973e-06, "loss": 19.6159, "step": 34900 }, { "epoch": 0.6478356379190762, "grad_norm": 35.53125, "learning_rate": 9.898775710925933e-06, "loss": 20.2549, "step": 34910 }, { "epoch": 0.648021211003556, "grad_norm": 35.09375, "learning_rate": 9.89874671513989e-06, "loss": 20.0461, "step": 34920 }, { "epoch": 0.6482067840880359, "grad_norm": 34.125, "learning_rate": 9.898717719353847e-06, "loss": 19.3693, "step": 34930 }, { "epoch": 0.6483923571725156, "grad_norm": 34.625, "learning_rate": 9.898688723567805e-06, "loss": 19.9014, "step": 34940 }, { "epoch": 0.6485779302569955, "grad_norm": 37.65625, "learning_rate": 9.898659727781762e-06, "loss": 20.2016, "step": 34950 }, { "epoch": 0.6487635033414754, "grad_norm": 35.53125, "learning_rate": 9.89863073199572e-06, "loss": 20.0671, "step": 34960 }, { "epoch": 0.6489490764259552, "grad_norm": 34.5, "learning_rate": 9.898601736209677e-06, "loss": 19.811, "step": 34970 }, { "epoch": 0.649134649510435, "grad_norm": 35.40625, "learning_rate": 9.898572740423636e-06, "loss": 19.5194, "step": 34980 }, { "epoch": 0.6493202225949148, "grad_norm": 38.28125, "learning_rate": 9.898543744637594e-06, "loss": 19.9472, "step": 34990 }, { "epoch": 0.6495057956793947, "grad_norm": 33.96875, "learning_rate": 9.89851474885155e-06, "loss": 19.7037, "step": 35000 }, { "epoch": 0.6495057956793947, "eval_loss": 2.4709317684173584, "eval_runtime": 455.6259, "eval_samples_per_second": 3187.082, "eval_steps_per_second": 49.8, "step": 35000 }, { "epoch": 0.6496913687638745, "grad_norm": 35.21875, "learning_rate": 9.898485753065508e-06, "loss": 19.7997, "step": 35010 }, { "epoch": 0.6498769418483543, "grad_norm": 37.65625, "learning_rate": 9.898456757279466e-06, "loss": 20.4342, "step": 35020 }, { "epoch": 0.6500625149328342, "grad_norm": 34.59375, "learning_rate": 9.898427761493423e-06, "loss": 19.5847, "step": 35030 }, { "epoch": 0.6502480880173139, "grad_norm": 35.5, "learning_rate": 9.89839876570738e-06, "loss": 19.8744, "step": 35040 }, { "epoch": 0.6504336611017938, "grad_norm": 36.15625, "learning_rate": 9.898369769921338e-06, "loss": 19.74, "step": 35050 }, { "epoch": 0.6506192341862737, "grad_norm": 36.96875, "learning_rate": 9.898340774135295e-06, "loss": 19.1062, "step": 35060 }, { "epoch": 0.6508048072707534, "grad_norm": 36.21875, "learning_rate": 9.898311778349253e-06, "loss": 19.9495, "step": 35070 }, { "epoch": 0.6509903803552333, "grad_norm": 33.96875, "learning_rate": 9.898282782563212e-06, "loss": 19.6127, "step": 35080 }, { "epoch": 0.6511759534397131, "grad_norm": 34.03125, "learning_rate": 9.89825378677717e-06, "loss": 19.839, "step": 35090 }, { "epoch": 0.6513615265241929, "grad_norm": 35.8125, "learning_rate": 9.898224790991125e-06, "loss": 19.3891, "step": 35100 }, { "epoch": 0.6515470996086727, "grad_norm": 37.0, "learning_rate": 9.898195795205084e-06, "loss": 19.8909, "step": 35110 }, { "epoch": 0.6517326726931526, "grad_norm": 33.90625, "learning_rate": 9.898166799419041e-06, "loss": 19.5183, "step": 35120 }, { "epoch": 0.6519182457776325, "grad_norm": 36.625, "learning_rate": 9.898137803632999e-06, "loss": 20.0471, "step": 35130 }, { "epoch": 0.6521038188621122, "grad_norm": 35.0, "learning_rate": 9.898108807846956e-06, "loss": 19.6311, "step": 35140 }, { "epoch": 0.6522893919465921, "grad_norm": 35.90625, "learning_rate": 9.898079812060914e-06, "loss": 19.8913, "step": 35150 }, { "epoch": 0.6524749650310719, "grad_norm": 35.5, "learning_rate": 9.898050816274871e-06, "loss": 19.7133, "step": 35160 }, { "epoch": 0.6526605381155517, "grad_norm": 36.65625, "learning_rate": 9.898021820488829e-06, "loss": 19.5409, "step": 35170 }, { "epoch": 0.6528461112000316, "grad_norm": 36.78125, "learning_rate": 9.897992824702786e-06, "loss": 19.4254, "step": 35180 }, { "epoch": 0.6530316842845114, "grad_norm": 36.09375, "learning_rate": 9.897963828916745e-06, "loss": 20.0565, "step": 35190 }, { "epoch": 0.6532172573689912, "grad_norm": 34.0, "learning_rate": 9.897934833130702e-06, "loss": 19.5167, "step": 35200 }, { "epoch": 0.653402830453471, "grad_norm": 36.03125, "learning_rate": 9.89790583734466e-06, "loss": 19.5704, "step": 35210 }, { "epoch": 0.6535884035379509, "grad_norm": 35.15625, "learning_rate": 9.897876841558617e-06, "loss": 19.4049, "step": 35220 }, { "epoch": 0.6537739766224306, "grad_norm": 35.78125, "learning_rate": 9.897847845772575e-06, "loss": 19.5957, "step": 35230 }, { "epoch": 0.6539595497069105, "grad_norm": 34.46875, "learning_rate": 9.897818849986532e-06, "loss": 19.1128, "step": 35240 }, { "epoch": 0.6541451227913904, "grad_norm": 36.09375, "learning_rate": 9.89778985420049e-06, "loss": 19.9392, "step": 35250 }, { "epoch": 0.6543306958758701, "grad_norm": 34.28125, "learning_rate": 9.897760858414447e-06, "loss": 19.5701, "step": 35260 }, { "epoch": 0.65451626896035, "grad_norm": 37.03125, "learning_rate": 9.897731862628404e-06, "loss": 19.6126, "step": 35270 }, { "epoch": 0.6547018420448298, "grad_norm": 35.25, "learning_rate": 9.897702866842362e-06, "loss": 19.5181, "step": 35280 }, { "epoch": 0.6548874151293096, "grad_norm": 33.96875, "learning_rate": 9.89767387105632e-06, "loss": 20.0623, "step": 35290 }, { "epoch": 0.6550729882137895, "grad_norm": 34.96875, "learning_rate": 9.897644875270278e-06, "loss": 19.9774, "step": 35300 }, { "epoch": 0.6552585612982693, "grad_norm": 35.0, "learning_rate": 9.897615879484236e-06, "loss": 20.0012, "step": 35310 }, { "epoch": 0.6554441343827492, "grad_norm": 33.4375, "learning_rate": 9.897586883698193e-06, "loss": 19.374, "step": 35320 }, { "epoch": 0.6556297074672289, "grad_norm": 36.15625, "learning_rate": 9.89755788791215e-06, "loss": 19.6139, "step": 35330 }, { "epoch": 0.6558152805517088, "grad_norm": 34.71875, "learning_rate": 9.897528892126108e-06, "loss": 20.0223, "step": 35340 }, { "epoch": 0.6560008536361887, "grad_norm": 36.3125, "learning_rate": 9.897499896340065e-06, "loss": 19.6657, "step": 35350 }, { "epoch": 0.6561864267206684, "grad_norm": 35.90625, "learning_rate": 9.897470900554024e-06, "loss": 19.4794, "step": 35360 }, { "epoch": 0.6563719998051483, "grad_norm": 35.1875, "learning_rate": 9.89744190476798e-06, "loss": 19.885, "step": 35370 }, { "epoch": 0.6565575728896281, "grad_norm": 34.875, "learning_rate": 9.897412908981937e-06, "loss": 19.5217, "step": 35380 }, { "epoch": 0.6567431459741079, "grad_norm": 38.28125, "learning_rate": 9.897383913195897e-06, "loss": 19.5472, "step": 35390 }, { "epoch": 0.6569287190585877, "grad_norm": 35.5, "learning_rate": 9.897354917409854e-06, "loss": 20.072, "step": 35400 }, { "epoch": 0.6571142921430676, "grad_norm": 36.125, "learning_rate": 9.897325921623811e-06, "loss": 19.4725, "step": 35410 }, { "epoch": 0.6572998652275474, "grad_norm": 34.21875, "learning_rate": 9.897296925837769e-06, "loss": 19.6822, "step": 35420 }, { "epoch": 0.6574854383120272, "grad_norm": 34.96875, "learning_rate": 9.897267930051726e-06, "loss": 19.8072, "step": 35430 }, { "epoch": 0.6576710113965071, "grad_norm": 35.90625, "learning_rate": 9.897238934265684e-06, "loss": 19.9793, "step": 35440 }, { "epoch": 0.6578565844809868, "grad_norm": 35.625, "learning_rate": 9.897209938479641e-06, "loss": 19.8958, "step": 35450 }, { "epoch": 0.6580421575654667, "grad_norm": 36.75, "learning_rate": 9.8971809426936e-06, "loss": 19.4194, "step": 35460 }, { "epoch": 0.6582277306499466, "grad_norm": 37.4375, "learning_rate": 9.897151946907558e-06, "loss": 20.0411, "step": 35470 }, { "epoch": 0.6584133037344264, "grad_norm": 35.5625, "learning_rate": 9.897122951121513e-06, "loss": 19.4788, "step": 35480 }, { "epoch": 0.6585988768189062, "grad_norm": 35.75, "learning_rate": 9.897093955335472e-06, "loss": 19.7224, "step": 35490 }, { "epoch": 0.658784449903386, "grad_norm": 33.25, "learning_rate": 9.89706495954943e-06, "loss": 19.5881, "step": 35500 }, { "epoch": 0.6589700229878659, "grad_norm": 35.1875, "learning_rate": 9.897035963763387e-06, "loss": 19.8756, "step": 35510 }, { "epoch": 0.6591555960723456, "grad_norm": 35.40625, "learning_rate": 9.897006967977345e-06, "loss": 19.8553, "step": 35520 }, { "epoch": 0.6593411691568255, "grad_norm": 35.9375, "learning_rate": 9.896977972191302e-06, "loss": 19.6849, "step": 35530 }, { "epoch": 0.6595267422413054, "grad_norm": 36.0, "learning_rate": 9.89694897640526e-06, "loss": 19.5625, "step": 35540 }, { "epoch": 0.6597123153257851, "grad_norm": 34.21875, "learning_rate": 9.896919980619217e-06, "loss": 19.38, "step": 35550 }, { "epoch": 0.659897888410265, "grad_norm": 35.125, "learning_rate": 9.896890984833176e-06, "loss": 20.0188, "step": 35560 }, { "epoch": 0.6600834614947448, "grad_norm": 36.90625, "learning_rate": 9.896861989047133e-06, "loss": 19.509, "step": 35570 }, { "epoch": 0.6602690345792246, "grad_norm": 36.0625, "learning_rate": 9.89683299326109e-06, "loss": 19.6172, "step": 35580 }, { "epoch": 0.6604546076637045, "grad_norm": 36.75, "learning_rate": 9.896803997475048e-06, "loss": 20.0115, "step": 35590 }, { "epoch": 0.6606401807481843, "grad_norm": 35.9375, "learning_rate": 9.896775001689006e-06, "loss": 19.4408, "step": 35600 }, { "epoch": 0.6608257538326641, "grad_norm": 36.8125, "learning_rate": 9.896746005902963e-06, "loss": 19.8966, "step": 35610 }, { "epoch": 0.6610113269171439, "grad_norm": 35.4375, "learning_rate": 9.89671701011692e-06, "loss": 19.3682, "step": 35620 }, { "epoch": 0.6611969000016238, "grad_norm": 36.6875, "learning_rate": 9.896688014330878e-06, "loss": 19.6727, "step": 35630 }, { "epoch": 0.6613824730861035, "grad_norm": 36.0625, "learning_rate": 9.896659018544835e-06, "loss": 19.741, "step": 35640 }, { "epoch": 0.6615680461705834, "grad_norm": 36.15625, "learning_rate": 9.896630022758793e-06, "loss": 20.0741, "step": 35650 }, { "epoch": 0.6617536192550633, "grad_norm": 35.09375, "learning_rate": 9.896601026972752e-06, "loss": 19.2563, "step": 35660 }, { "epoch": 0.6619391923395431, "grad_norm": 36.25, "learning_rate": 9.896572031186709e-06, "loss": 19.8901, "step": 35670 }, { "epoch": 0.6621247654240229, "grad_norm": 34.71875, "learning_rate": 9.896543035400666e-06, "loss": 19.9751, "step": 35680 }, { "epoch": 0.6623103385085027, "grad_norm": 33.90625, "learning_rate": 9.896514039614624e-06, "loss": 19.0984, "step": 35690 }, { "epoch": 0.6624959115929826, "grad_norm": 35.09375, "learning_rate": 9.896485043828581e-06, "loss": 19.549, "step": 35700 }, { "epoch": 0.6626814846774624, "grad_norm": 34.40625, "learning_rate": 9.896456048042539e-06, "loss": 19.4845, "step": 35710 }, { "epoch": 0.6628670577619422, "grad_norm": 35.8125, "learning_rate": 9.896427052256496e-06, "loss": 19.5595, "step": 35720 }, { "epoch": 0.6630526308464221, "grad_norm": 36.21875, "learning_rate": 9.896398056470453e-06, "loss": 20.1986, "step": 35730 }, { "epoch": 0.6632382039309018, "grad_norm": 35.09375, "learning_rate": 9.896369060684413e-06, "loss": 19.7129, "step": 35740 }, { "epoch": 0.6634237770153817, "grad_norm": 37.5, "learning_rate": 9.896340064898368e-06, "loss": 19.6799, "step": 35750 }, { "epoch": 0.6636093500998616, "grad_norm": 34.65625, "learning_rate": 9.896311069112326e-06, "loss": 19.6695, "step": 35760 }, { "epoch": 0.6637949231843413, "grad_norm": 36.28125, "learning_rate": 9.896282073326285e-06, "loss": 19.8379, "step": 35770 }, { "epoch": 0.6639804962688212, "grad_norm": 36.0625, "learning_rate": 9.896253077540242e-06, "loss": 19.4283, "step": 35780 }, { "epoch": 0.664166069353301, "grad_norm": 36.75, "learning_rate": 9.8962240817542e-06, "loss": 19.5346, "step": 35790 }, { "epoch": 0.6643516424377808, "grad_norm": 36.625, "learning_rate": 9.896195085968157e-06, "loss": 19.9317, "step": 35800 }, { "epoch": 0.6645372155222606, "grad_norm": 36.65625, "learning_rate": 9.896166090182114e-06, "loss": 20.057, "step": 35810 }, { "epoch": 0.6647227886067405, "grad_norm": 37.375, "learning_rate": 9.896137094396072e-06, "loss": 19.5665, "step": 35820 }, { "epoch": 0.6649083616912203, "grad_norm": 34.875, "learning_rate": 9.89610809861003e-06, "loss": 19.5112, "step": 35830 }, { "epoch": 0.6650939347757001, "grad_norm": 35.03125, "learning_rate": 9.896079102823988e-06, "loss": 19.7005, "step": 35840 }, { "epoch": 0.66527950786018, "grad_norm": 36.46875, "learning_rate": 9.896050107037944e-06, "loss": 19.6946, "step": 35850 }, { "epoch": 0.6654650809446598, "grad_norm": 35.96875, "learning_rate": 9.896021111251901e-06, "loss": 19.9033, "step": 35860 }, { "epoch": 0.6656506540291396, "grad_norm": 35.15625, "learning_rate": 9.89599211546586e-06, "loss": 19.7513, "step": 35870 }, { "epoch": 0.6658362271136194, "grad_norm": 34.28125, "learning_rate": 9.895963119679818e-06, "loss": 19.5514, "step": 35880 }, { "epoch": 0.6660218001980993, "grad_norm": 34.75, "learning_rate": 9.895934123893775e-06, "loss": 19.5261, "step": 35890 }, { "epoch": 0.6662073732825791, "grad_norm": 34.25, "learning_rate": 9.895905128107733e-06, "loss": 20.0393, "step": 35900 }, { "epoch": 0.6663929463670589, "grad_norm": 34.40625, "learning_rate": 9.89587613232169e-06, "loss": 19.6479, "step": 35910 }, { "epoch": 0.6665785194515388, "grad_norm": 37.53125, "learning_rate": 9.895847136535648e-06, "loss": 19.9184, "step": 35920 }, { "epoch": 0.6667640925360185, "grad_norm": 36.28125, "learning_rate": 9.895818140749605e-06, "loss": 19.9199, "step": 35930 }, { "epoch": 0.6669496656204984, "grad_norm": 38.84375, "learning_rate": 9.895789144963564e-06, "loss": 20.2039, "step": 35940 }, { "epoch": 0.6671352387049783, "grad_norm": 33.3125, "learning_rate": 9.895760149177522e-06, "loss": 19.3214, "step": 35950 }, { "epoch": 0.667320811789458, "grad_norm": 36.34375, "learning_rate": 9.895731153391477e-06, "loss": 19.4983, "step": 35960 }, { "epoch": 0.6675063848739379, "grad_norm": 32.9375, "learning_rate": 9.895702157605436e-06, "loss": 19.6202, "step": 35970 }, { "epoch": 0.6676919579584177, "grad_norm": 37.0625, "learning_rate": 9.895673161819394e-06, "loss": 19.7313, "step": 35980 }, { "epoch": 0.6678775310428975, "grad_norm": 36.90625, "learning_rate": 9.895644166033351e-06, "loss": 19.6996, "step": 35990 }, { "epoch": 0.6680631041273773, "grad_norm": 34.625, "learning_rate": 9.895615170247309e-06, "loss": 19.2428, "step": 36000 }, { "epoch": 0.6682486772118572, "grad_norm": 35.0, "learning_rate": 9.895586174461268e-06, "loss": 20.2196, "step": 36010 }, { "epoch": 0.6684342502963371, "grad_norm": 35.0625, "learning_rate": 9.895557178675223e-06, "loss": 19.687, "step": 36020 }, { "epoch": 0.6686198233808168, "grad_norm": 35.53125, "learning_rate": 9.89552818288918e-06, "loss": 19.5315, "step": 36030 }, { "epoch": 0.6688053964652967, "grad_norm": 34.5625, "learning_rate": 9.89549918710314e-06, "loss": 19.8379, "step": 36040 }, { "epoch": 0.6689909695497765, "grad_norm": 33.53125, "learning_rate": 9.895470191317097e-06, "loss": 19.9126, "step": 36050 }, { "epoch": 0.6691765426342563, "grad_norm": 36.5, "learning_rate": 9.895441195531055e-06, "loss": 19.7775, "step": 36060 }, { "epoch": 0.6693621157187362, "grad_norm": 35.375, "learning_rate": 9.895412199745012e-06, "loss": 19.745, "step": 36070 }, { "epoch": 0.669547688803216, "grad_norm": 35.375, "learning_rate": 9.89538320395897e-06, "loss": 19.7045, "step": 36080 }, { "epoch": 0.6697332618876958, "grad_norm": 34.6875, "learning_rate": 9.895354208172927e-06, "loss": 18.9317, "step": 36090 }, { "epoch": 0.6699188349721756, "grad_norm": 35.59375, "learning_rate": 9.895325212386884e-06, "loss": 19.8415, "step": 36100 }, { "epoch": 0.6701044080566555, "grad_norm": 34.71875, "learning_rate": 9.895296216600843e-06, "loss": 19.7913, "step": 36110 }, { "epoch": 0.6702899811411352, "grad_norm": 35.59375, "learning_rate": 9.895267220814799e-06, "loss": 19.5304, "step": 36120 }, { "epoch": 0.6704755542256151, "grad_norm": 34.96875, "learning_rate": 9.895238225028757e-06, "loss": 19.4498, "step": 36130 }, { "epoch": 0.670661127310095, "grad_norm": 35.25, "learning_rate": 9.895209229242716e-06, "loss": 19.6703, "step": 36140 }, { "epoch": 0.6708467003945747, "grad_norm": 34.21875, "learning_rate": 9.895180233456673e-06, "loss": 19.4621, "step": 36150 }, { "epoch": 0.6710322734790546, "grad_norm": 34.625, "learning_rate": 9.89515123767063e-06, "loss": 19.6551, "step": 36160 }, { "epoch": 0.6712178465635344, "grad_norm": 33.625, "learning_rate": 9.895122241884588e-06, "loss": 19.5649, "step": 36170 }, { "epoch": 0.6714034196480142, "grad_norm": 37.3125, "learning_rate": 9.895093246098545e-06, "loss": 19.501, "step": 36180 }, { "epoch": 0.6715889927324941, "grad_norm": 33.90625, "learning_rate": 9.895064250312503e-06, "loss": 19.7173, "step": 36190 }, { "epoch": 0.6717745658169739, "grad_norm": 37.9375, "learning_rate": 9.89503525452646e-06, "loss": 19.8307, "step": 36200 }, { "epoch": 0.6719601389014538, "grad_norm": 37.375, "learning_rate": 9.895006258740418e-06, "loss": 19.3379, "step": 36210 }, { "epoch": 0.6721457119859335, "grad_norm": 35.4375, "learning_rate": 9.894977262954377e-06, "loss": 19.5971, "step": 36220 }, { "epoch": 0.6723312850704134, "grad_norm": 35.78125, "learning_rate": 9.894948267168332e-06, "loss": 19.6289, "step": 36230 }, { "epoch": 0.6725168581548933, "grad_norm": 34.59375, "learning_rate": 9.894919271382291e-06, "loss": 19.5154, "step": 36240 }, { "epoch": 0.672702431239373, "grad_norm": 37.40625, "learning_rate": 9.894890275596249e-06, "loss": 19.7842, "step": 36250 }, { "epoch": 0.6728880043238529, "grad_norm": 35.71875, "learning_rate": 9.894861279810206e-06, "loss": 19.624, "step": 36260 }, { "epoch": 0.6730735774083327, "grad_norm": 36.0625, "learning_rate": 9.894832284024164e-06, "loss": 19.7039, "step": 36270 }, { "epoch": 0.6732591504928125, "grad_norm": 35.125, "learning_rate": 9.894803288238121e-06, "loss": 19.9208, "step": 36280 }, { "epoch": 0.6734447235772923, "grad_norm": 35.34375, "learning_rate": 9.894774292452078e-06, "loss": 19.3393, "step": 36290 }, { "epoch": 0.6736302966617722, "grad_norm": 35.53125, "learning_rate": 9.894745296666036e-06, "loss": 19.7506, "step": 36300 }, { "epoch": 0.673815869746252, "grad_norm": 37.28125, "learning_rate": 9.894716300879993e-06, "loss": 19.8507, "step": 36310 }, { "epoch": 0.6740014428307318, "grad_norm": 35.78125, "learning_rate": 9.894687305093952e-06, "loss": 19.6936, "step": 36320 }, { "epoch": 0.6741870159152117, "grad_norm": 37.65625, "learning_rate": 9.89465830930791e-06, "loss": 19.689, "step": 36330 }, { "epoch": 0.6743725889996914, "grad_norm": 37.0625, "learning_rate": 9.894629313521865e-06, "loss": 19.8617, "step": 36340 }, { "epoch": 0.6745581620841713, "grad_norm": 38.6875, "learning_rate": 9.894600317735825e-06, "loss": 19.7849, "step": 36350 }, { "epoch": 0.6747437351686512, "grad_norm": 36.71875, "learning_rate": 9.894571321949782e-06, "loss": 19.5811, "step": 36360 }, { "epoch": 0.6749293082531309, "grad_norm": 35.625, "learning_rate": 9.89454232616374e-06, "loss": 19.6559, "step": 36370 }, { "epoch": 0.6751148813376108, "grad_norm": 36.90625, "learning_rate": 9.894513330377697e-06, "loss": 19.5922, "step": 36380 }, { "epoch": 0.6753004544220906, "grad_norm": 37.0, "learning_rate": 9.894484334591654e-06, "loss": 19.6276, "step": 36390 }, { "epoch": 0.6754860275065705, "grad_norm": 35.84375, "learning_rate": 9.894455338805612e-06, "loss": 19.9161, "step": 36400 }, { "epoch": 0.6756716005910502, "grad_norm": 36.6875, "learning_rate": 9.894426343019569e-06, "loss": 19.9491, "step": 36410 }, { "epoch": 0.6758571736755301, "grad_norm": 35.03125, "learning_rate": 9.894397347233528e-06, "loss": 19.4294, "step": 36420 }, { "epoch": 0.67604274676001, "grad_norm": 35.25, "learning_rate": 9.894368351447486e-06, "loss": 19.8872, "step": 36430 }, { "epoch": 0.6762283198444897, "grad_norm": 35.84375, "learning_rate": 9.894339355661441e-06, "loss": 19.9173, "step": 36440 }, { "epoch": 0.6764138929289696, "grad_norm": 36.59375, "learning_rate": 9.8943103598754e-06, "loss": 19.9363, "step": 36450 }, { "epoch": 0.6765994660134494, "grad_norm": 36.03125, "learning_rate": 9.894281364089358e-06, "loss": 20.222, "step": 36460 }, { "epoch": 0.6767850390979292, "grad_norm": 36.09375, "learning_rate": 9.894252368303315e-06, "loss": 19.7718, "step": 36470 }, { "epoch": 0.6769706121824091, "grad_norm": 36.5, "learning_rate": 9.894223372517273e-06, "loss": 19.5082, "step": 36480 }, { "epoch": 0.6771561852668889, "grad_norm": 33.59375, "learning_rate": 9.894194376731232e-06, "loss": 19.2585, "step": 36490 }, { "epoch": 0.6773417583513687, "grad_norm": 34.875, "learning_rate": 9.894165380945187e-06, "loss": 19.7396, "step": 36500 }, { "epoch": 0.6775273314358485, "grad_norm": 36.6875, "learning_rate": 9.894136385159145e-06, "loss": 20.1396, "step": 36510 }, { "epoch": 0.6777129045203284, "grad_norm": 34.9375, "learning_rate": 9.894107389373104e-06, "loss": 19.3631, "step": 36520 }, { "epoch": 0.6778984776048081, "grad_norm": 36.0625, "learning_rate": 9.894078393587061e-06, "loss": 19.5291, "step": 36530 }, { "epoch": 0.678084050689288, "grad_norm": 35.625, "learning_rate": 9.894049397801019e-06, "loss": 19.3359, "step": 36540 }, { "epoch": 0.6782696237737679, "grad_norm": 34.03125, "learning_rate": 9.894020402014976e-06, "loss": 19.4528, "step": 36550 }, { "epoch": 0.6784551968582477, "grad_norm": 35.0, "learning_rate": 9.893991406228934e-06, "loss": 19.4136, "step": 36560 }, { "epoch": 0.6786407699427275, "grad_norm": 35.125, "learning_rate": 9.893962410442891e-06, "loss": 19.4879, "step": 36570 }, { "epoch": 0.6788263430272073, "grad_norm": 35.4375, "learning_rate": 9.893933414656848e-06, "loss": 19.7995, "step": 36580 }, { "epoch": 0.6790119161116872, "grad_norm": 33.875, "learning_rate": 9.893904418870807e-06, "loss": 19.754, "step": 36590 }, { "epoch": 0.679197489196167, "grad_norm": 37.03125, "learning_rate": 9.893875423084765e-06, "loss": 20.028, "step": 36600 }, { "epoch": 0.6793830622806468, "grad_norm": 35.46875, "learning_rate": 9.89384642729872e-06, "loss": 19.4872, "step": 36610 }, { "epoch": 0.6795686353651267, "grad_norm": 36.8125, "learning_rate": 9.89381743151268e-06, "loss": 19.7629, "step": 36620 }, { "epoch": 0.6797542084496064, "grad_norm": 32.21875, "learning_rate": 9.893788435726637e-06, "loss": 19.2777, "step": 36630 }, { "epoch": 0.6799397815340863, "grad_norm": 35.375, "learning_rate": 9.893759439940594e-06, "loss": 19.8635, "step": 36640 }, { "epoch": 0.6801253546185662, "grad_norm": 37.40625, "learning_rate": 9.893730444154552e-06, "loss": 19.7788, "step": 36650 }, { "epoch": 0.6803109277030459, "grad_norm": 35.375, "learning_rate": 9.89370144836851e-06, "loss": 19.6674, "step": 36660 }, { "epoch": 0.6804965007875258, "grad_norm": 35.6875, "learning_rate": 9.893672452582467e-06, "loss": 19.4339, "step": 36670 }, { "epoch": 0.6806820738720056, "grad_norm": 34.09375, "learning_rate": 9.893643456796424e-06, "loss": 19.9585, "step": 36680 }, { "epoch": 0.6808676469564854, "grad_norm": 37.40625, "learning_rate": 9.893614461010382e-06, "loss": 19.705, "step": 36690 }, { "epoch": 0.6810532200409652, "grad_norm": 34.71875, "learning_rate": 9.89358546522434e-06, "loss": 19.4342, "step": 36700 }, { "epoch": 0.6812387931254451, "grad_norm": 34.65625, "learning_rate": 9.893556469438296e-06, "loss": 19.4025, "step": 36710 }, { "epoch": 0.6814243662099249, "grad_norm": 34.09375, "learning_rate": 9.893527473652255e-06, "loss": 19.8077, "step": 36720 }, { "epoch": 0.6816099392944047, "grad_norm": 34.46875, "learning_rate": 9.893498477866213e-06, "loss": 19.4602, "step": 36730 }, { "epoch": 0.6817955123788846, "grad_norm": 35.78125, "learning_rate": 9.89346948208017e-06, "loss": 19.6626, "step": 36740 }, { "epoch": 0.6819810854633644, "grad_norm": 34.3125, "learning_rate": 9.893440486294128e-06, "loss": 19.751, "step": 36750 }, { "epoch": 0.6821666585478442, "grad_norm": 33.3125, "learning_rate": 9.893411490508085e-06, "loss": 19.3236, "step": 36760 }, { "epoch": 0.6823522316323241, "grad_norm": 35.15625, "learning_rate": 9.893382494722042e-06, "loss": 19.6032, "step": 36770 }, { "epoch": 0.6825378047168039, "grad_norm": 36.8125, "learning_rate": 9.893353498936e-06, "loss": 19.9787, "step": 36780 }, { "epoch": 0.6827233778012837, "grad_norm": 37.0, "learning_rate": 9.893324503149957e-06, "loss": 20.0506, "step": 36790 }, { "epoch": 0.6829089508857635, "grad_norm": 35.6875, "learning_rate": 9.893295507363916e-06, "loss": 19.2396, "step": 36800 }, { "epoch": 0.6830945239702434, "grad_norm": 33.75, "learning_rate": 9.893266511577874e-06, "loss": 19.7276, "step": 36810 }, { "epoch": 0.6832800970547231, "grad_norm": 35.5, "learning_rate": 9.89323751579183e-06, "loss": 19.6616, "step": 36820 }, { "epoch": 0.683465670139203, "grad_norm": 35.625, "learning_rate": 9.893208520005789e-06, "loss": 19.53, "step": 36830 }, { "epoch": 0.6836512432236829, "grad_norm": 34.8125, "learning_rate": 9.893179524219746e-06, "loss": 19.5154, "step": 36840 }, { "epoch": 0.6838368163081626, "grad_norm": 34.53125, "learning_rate": 9.893150528433703e-06, "loss": 19.7291, "step": 36850 }, { "epoch": 0.6840223893926425, "grad_norm": 35.96875, "learning_rate": 9.89312153264766e-06, "loss": 19.5387, "step": 36860 }, { "epoch": 0.6842079624771223, "grad_norm": 35.53125, "learning_rate": 9.893092536861618e-06, "loss": 19.7129, "step": 36870 }, { "epoch": 0.6843935355616021, "grad_norm": 34.71875, "learning_rate": 9.893063541075576e-06, "loss": 19.8325, "step": 36880 }, { "epoch": 0.684579108646082, "grad_norm": 34.625, "learning_rate": 9.893034545289533e-06, "loss": 19.6929, "step": 36890 }, { "epoch": 0.6847646817305618, "grad_norm": 37.28125, "learning_rate": 9.893005549503492e-06, "loss": 19.5159, "step": 36900 }, { "epoch": 0.6849502548150417, "grad_norm": 34.28125, "learning_rate": 9.89297655371745e-06, "loss": 19.5611, "step": 36910 }, { "epoch": 0.6851358278995214, "grad_norm": 34.59375, "learning_rate": 9.892947557931407e-06, "loss": 19.5881, "step": 36920 }, { "epoch": 0.6853214009840013, "grad_norm": 35.0625, "learning_rate": 9.892918562145364e-06, "loss": 19.5037, "step": 36930 }, { "epoch": 0.6855069740684812, "grad_norm": 34.53125, "learning_rate": 9.892889566359322e-06, "loss": 19.7405, "step": 36940 }, { "epoch": 0.6856925471529609, "grad_norm": 34.96875, "learning_rate": 9.89286057057328e-06, "loss": 19.6976, "step": 36950 }, { "epoch": 0.6858781202374408, "grad_norm": 33.96875, "learning_rate": 9.892831574787237e-06, "loss": 19.5494, "step": 36960 }, { "epoch": 0.6860636933219206, "grad_norm": 36.40625, "learning_rate": 9.892802579001196e-06, "loss": 19.4275, "step": 36970 }, { "epoch": 0.6862492664064004, "grad_norm": 36.03125, "learning_rate": 9.892773583215151e-06, "loss": 19.6989, "step": 36980 }, { "epoch": 0.6864348394908802, "grad_norm": 37.59375, "learning_rate": 9.892744587429109e-06, "loss": 19.6063, "step": 36990 }, { "epoch": 0.6866204125753601, "grad_norm": 38.40625, "learning_rate": 9.892715591643068e-06, "loss": 19.5551, "step": 37000 }, { "epoch": 0.6868059856598399, "grad_norm": 36.21875, "learning_rate": 9.892686595857025e-06, "loss": 19.2527, "step": 37010 }, { "epoch": 0.6869915587443197, "grad_norm": 36.15625, "learning_rate": 9.892657600070983e-06, "loss": 19.5331, "step": 37020 }, { "epoch": 0.6871771318287996, "grad_norm": 35.5, "learning_rate": 9.89262860428494e-06, "loss": 19.0556, "step": 37030 }, { "epoch": 0.6873627049132793, "grad_norm": 34.125, "learning_rate": 9.892599608498898e-06, "loss": 19.6261, "step": 37040 }, { "epoch": 0.6875482779977592, "grad_norm": 37.3125, "learning_rate": 9.892570612712855e-06, "loss": 19.2994, "step": 37050 }, { "epoch": 0.6877338510822391, "grad_norm": 37.5625, "learning_rate": 9.892541616926812e-06, "loss": 19.6494, "step": 37060 }, { "epoch": 0.6879194241667188, "grad_norm": 34.96875, "learning_rate": 9.892512621140771e-06, "loss": 19.6874, "step": 37070 }, { "epoch": 0.6881049972511987, "grad_norm": 35.125, "learning_rate": 9.892483625354729e-06, "loss": 19.6228, "step": 37080 }, { "epoch": 0.6882905703356785, "grad_norm": 36.875, "learning_rate": 9.892454629568685e-06, "loss": 19.251, "step": 37090 }, { "epoch": 0.6884761434201584, "grad_norm": 34.90625, "learning_rate": 9.892425633782644e-06, "loss": 19.8272, "step": 37100 }, { "epoch": 0.6886617165046381, "grad_norm": 37.8125, "learning_rate": 9.892396637996601e-06, "loss": 19.4844, "step": 37110 }, { "epoch": 0.688847289589118, "grad_norm": 33.09375, "learning_rate": 9.892367642210558e-06, "loss": 19.3807, "step": 37120 }, { "epoch": 0.6890328626735979, "grad_norm": 32.59375, "learning_rate": 9.892338646424516e-06, "loss": 19.1829, "step": 37130 }, { "epoch": 0.6892184357580776, "grad_norm": 34.65625, "learning_rate": 9.892309650638473e-06, "loss": 19.678, "step": 37140 }, { "epoch": 0.6894040088425575, "grad_norm": 34.34375, "learning_rate": 9.89228065485243e-06, "loss": 20.2141, "step": 37150 }, { "epoch": 0.6895895819270373, "grad_norm": 35.125, "learning_rate": 9.892251659066388e-06, "loss": 19.9404, "step": 37160 }, { "epoch": 0.6897751550115171, "grad_norm": 34.71875, "learning_rate": 9.892222663280347e-06, "loss": 19.518, "step": 37170 }, { "epoch": 0.689960728095997, "grad_norm": 35.0625, "learning_rate": 9.892193667494305e-06, "loss": 19.57, "step": 37180 }, { "epoch": 0.6901463011804768, "grad_norm": 35.46875, "learning_rate": 9.892164671708262e-06, "loss": 19.5693, "step": 37190 }, { "epoch": 0.6903318742649566, "grad_norm": 35.59375, "learning_rate": 9.89213567592222e-06, "loss": 19.3073, "step": 37200 }, { "epoch": 0.6905174473494364, "grad_norm": 35.8125, "learning_rate": 9.892106680136177e-06, "loss": 19.3111, "step": 37210 }, { "epoch": 0.6907030204339163, "grad_norm": 34.46875, "learning_rate": 9.892077684350134e-06, "loss": 19.6284, "step": 37220 }, { "epoch": 0.690888593518396, "grad_norm": 35.1875, "learning_rate": 9.892048688564092e-06, "loss": 19.6832, "step": 37230 }, { "epoch": 0.6910741666028759, "grad_norm": 37.375, "learning_rate": 9.892019692778049e-06, "loss": 19.5748, "step": 37240 }, { "epoch": 0.6912597396873558, "grad_norm": 35.28125, "learning_rate": 9.891990696992006e-06, "loss": 19.7095, "step": 37250 }, { "epoch": 0.6914453127718355, "grad_norm": 35.15625, "learning_rate": 9.891961701205964e-06, "loss": 19.378, "step": 37260 }, { "epoch": 0.6916308858563154, "grad_norm": 34.625, "learning_rate": 9.891932705419921e-06, "loss": 19.7614, "step": 37270 }, { "epoch": 0.6918164589407952, "grad_norm": 37.125, "learning_rate": 9.89190370963388e-06, "loss": 19.5859, "step": 37280 }, { "epoch": 0.6920020320252751, "grad_norm": 35.71875, "learning_rate": 9.891874713847838e-06, "loss": 19.326, "step": 37290 }, { "epoch": 0.6921876051097549, "grad_norm": 35.03125, "learning_rate": 9.891845718061795e-06, "loss": 19.6066, "step": 37300 }, { "epoch": 0.6923731781942347, "grad_norm": 38.125, "learning_rate": 9.891816722275753e-06, "loss": 19.2031, "step": 37310 }, { "epoch": 0.6925587512787146, "grad_norm": 34.5, "learning_rate": 9.89178772648971e-06, "loss": 19.5362, "step": 37320 }, { "epoch": 0.6927443243631943, "grad_norm": 37.3125, "learning_rate": 9.891758730703667e-06, "loss": 19.9289, "step": 37330 }, { "epoch": 0.6929298974476742, "grad_norm": 35.65625, "learning_rate": 9.891729734917625e-06, "loss": 19.7964, "step": 37340 }, { "epoch": 0.693115470532154, "grad_norm": 36.75, "learning_rate": 9.891700739131584e-06, "loss": 19.2571, "step": 37350 }, { "epoch": 0.6933010436166338, "grad_norm": 35.1875, "learning_rate": 9.89167174334554e-06, "loss": 19.4191, "step": 37360 }, { "epoch": 0.6934866167011137, "grad_norm": 34.5625, "learning_rate": 9.891642747559497e-06, "loss": 19.1347, "step": 37370 }, { "epoch": 0.6936721897855935, "grad_norm": 38.59375, "learning_rate": 9.891613751773456e-06, "loss": 19.4612, "step": 37380 }, { "epoch": 0.6938577628700733, "grad_norm": 35.25, "learning_rate": 9.891584755987414e-06, "loss": 19.9225, "step": 37390 }, { "epoch": 0.6940433359545531, "grad_norm": 34.5625, "learning_rate": 9.891555760201371e-06, "loss": 19.8018, "step": 37400 }, { "epoch": 0.694228909039033, "grad_norm": 35.375, "learning_rate": 9.891526764415328e-06, "loss": 19.4325, "step": 37410 }, { "epoch": 0.6944144821235128, "grad_norm": 33.3125, "learning_rate": 9.891497768629286e-06, "loss": 19.7925, "step": 37420 }, { "epoch": 0.6946000552079926, "grad_norm": 37.28125, "learning_rate": 9.891468772843243e-06, "loss": 19.9303, "step": 37430 }, { "epoch": 0.6947856282924725, "grad_norm": 35.46875, "learning_rate": 9.8914397770572e-06, "loss": 19.8091, "step": 37440 }, { "epoch": 0.6949712013769523, "grad_norm": 37.6875, "learning_rate": 9.89141078127116e-06, "loss": 19.6351, "step": 37450 }, { "epoch": 0.6951567744614321, "grad_norm": 35.90625, "learning_rate": 9.891381785485115e-06, "loss": 19.8791, "step": 37460 }, { "epoch": 0.695342347545912, "grad_norm": 36.34375, "learning_rate": 9.891352789699073e-06, "loss": 19.6898, "step": 37470 }, { "epoch": 0.6955279206303918, "grad_norm": 35.6875, "learning_rate": 9.891323793913032e-06, "loss": 19.9378, "step": 37480 }, { "epoch": 0.6957134937148716, "grad_norm": 36.59375, "learning_rate": 9.89129479812699e-06, "loss": 19.562, "step": 37490 }, { "epoch": 0.6958990667993514, "grad_norm": 36.21875, "learning_rate": 9.891265802340947e-06, "loss": 19.947, "step": 37500 }, { "epoch": 0.6960846398838313, "grad_norm": 35.0625, "learning_rate": 9.891236806554904e-06, "loss": 19.8907, "step": 37510 }, { "epoch": 0.696270212968311, "grad_norm": 36.71875, "learning_rate": 9.891207810768862e-06, "loss": 20.0342, "step": 37520 }, { "epoch": 0.6964557860527909, "grad_norm": 35.09375, "learning_rate": 9.891178814982819e-06, "loss": 19.3272, "step": 37530 }, { "epoch": 0.6966413591372708, "grad_norm": 36.34375, "learning_rate": 9.891149819196776e-06, "loss": 19.3683, "step": 37540 }, { "epoch": 0.6968269322217505, "grad_norm": 36.25, "learning_rate": 9.891120823410735e-06, "loss": 19.7305, "step": 37550 }, { "epoch": 0.6970125053062304, "grad_norm": 35.25, "learning_rate": 9.891091827624693e-06, "loss": 19.2455, "step": 37560 }, { "epoch": 0.6971980783907102, "grad_norm": 37.25, "learning_rate": 9.891062831838649e-06, "loss": 19.4296, "step": 37570 }, { "epoch": 0.69738365147519, "grad_norm": 35.84375, "learning_rate": 9.891033836052608e-06, "loss": 20.02, "step": 37580 }, { "epoch": 0.6975692245596699, "grad_norm": 34.3125, "learning_rate": 9.891004840266565e-06, "loss": 19.2752, "step": 37590 }, { "epoch": 0.6977547976441497, "grad_norm": 36.21875, "learning_rate": 9.890975844480523e-06, "loss": 19.6226, "step": 37600 }, { "epoch": 0.6979403707286295, "grad_norm": 36.1875, "learning_rate": 9.89094684869448e-06, "loss": 19.3974, "step": 37610 }, { "epoch": 0.6981259438131093, "grad_norm": 34.3125, "learning_rate": 9.890917852908437e-06, "loss": 19.5617, "step": 37620 }, { "epoch": 0.6983115168975892, "grad_norm": 34.46875, "learning_rate": 9.890888857122395e-06, "loss": 19.4508, "step": 37630 }, { "epoch": 0.698497089982069, "grad_norm": 35.4375, "learning_rate": 9.890859861336352e-06, "loss": 19.2594, "step": 37640 }, { "epoch": 0.6986826630665488, "grad_norm": 36.59375, "learning_rate": 9.890830865550311e-06, "loss": 19.4359, "step": 37650 }, { "epoch": 0.6988682361510287, "grad_norm": 34.53125, "learning_rate": 9.890801869764269e-06, "loss": 19.662, "step": 37660 }, { "epoch": 0.6990538092355085, "grad_norm": 34.6875, "learning_rate": 9.890772873978226e-06, "loss": 19.5305, "step": 37670 }, { "epoch": 0.6992393823199883, "grad_norm": 34.09375, "learning_rate": 9.890743878192183e-06, "loss": 19.688, "step": 37680 }, { "epoch": 0.6994249554044681, "grad_norm": 35.15625, "learning_rate": 9.890714882406141e-06, "loss": 19.4327, "step": 37690 }, { "epoch": 0.699610528488948, "grad_norm": 36.375, "learning_rate": 9.890685886620098e-06, "loss": 19.6052, "step": 37700 }, { "epoch": 0.6997961015734278, "grad_norm": 34.125, "learning_rate": 9.890656890834056e-06, "loss": 19.9092, "step": 37710 }, { "epoch": 0.6999816746579076, "grad_norm": 35.78125, "learning_rate": 9.890627895048013e-06, "loss": 20.0716, "step": 37720 }, { "epoch": 0.7001672477423875, "grad_norm": 35.90625, "learning_rate": 9.89059889926197e-06, "loss": 19.438, "step": 37730 }, { "epoch": 0.7003528208268672, "grad_norm": 35.8125, "learning_rate": 9.890569903475928e-06, "loss": 19.2955, "step": 37740 }, { "epoch": 0.7005383939113471, "grad_norm": 37.78125, "learning_rate": 9.890540907689887e-06, "loss": 19.7556, "step": 37750 }, { "epoch": 0.700723966995827, "grad_norm": 34.5625, "learning_rate": 9.890511911903844e-06, "loss": 19.8376, "step": 37760 }, { "epoch": 0.7009095400803067, "grad_norm": 34.65625, "learning_rate": 9.890482916117802e-06, "loss": 19.174, "step": 37770 }, { "epoch": 0.7010951131647866, "grad_norm": 36.3125, "learning_rate": 9.89045392033176e-06, "loss": 19.5626, "step": 37780 }, { "epoch": 0.7012806862492664, "grad_norm": 35.0625, "learning_rate": 9.890424924545717e-06, "loss": 19.6777, "step": 37790 }, { "epoch": 0.7014662593337462, "grad_norm": 34.28125, "learning_rate": 9.890395928759674e-06, "loss": 19.4296, "step": 37800 }, { "epoch": 0.701651832418226, "grad_norm": 35.125, "learning_rate": 9.890366932973631e-06, "loss": 19.8895, "step": 37810 }, { "epoch": 0.7018374055027059, "grad_norm": 35.40625, "learning_rate": 9.890337937187589e-06, "loss": 19.6531, "step": 37820 }, { "epoch": 0.7020229785871858, "grad_norm": 34.6875, "learning_rate": 9.890308941401548e-06, "loss": 20.0812, "step": 37830 }, { "epoch": 0.7022085516716655, "grad_norm": 37.1875, "learning_rate": 9.890279945615504e-06, "loss": 19.4826, "step": 37840 }, { "epoch": 0.7023941247561454, "grad_norm": 35.1875, "learning_rate": 9.890250949829461e-06, "loss": 19.8685, "step": 37850 }, { "epoch": 0.7025796978406252, "grad_norm": 36.9375, "learning_rate": 9.89022195404342e-06, "loss": 20.043, "step": 37860 }, { "epoch": 0.702765270925105, "grad_norm": 37.09375, "learning_rate": 9.890192958257378e-06, "loss": 19.6171, "step": 37870 }, { "epoch": 0.7029508440095849, "grad_norm": 36.03125, "learning_rate": 9.890163962471335e-06, "loss": 19.0865, "step": 37880 }, { "epoch": 0.7031364170940647, "grad_norm": 35.09375, "learning_rate": 9.890134966685292e-06, "loss": 19.6744, "step": 37890 }, { "epoch": 0.7033219901785445, "grad_norm": 35.96875, "learning_rate": 9.89010597089925e-06, "loss": 19.748, "step": 37900 }, { "epoch": 0.7035075632630243, "grad_norm": 36.1875, "learning_rate": 9.890076975113207e-06, "loss": 19.6734, "step": 37910 }, { "epoch": 0.7036931363475042, "grad_norm": 33.34375, "learning_rate": 9.890047979327165e-06, "loss": 19.5977, "step": 37920 }, { "epoch": 0.7038787094319839, "grad_norm": 35.65625, "learning_rate": 9.890018983541124e-06, "loss": 19.6002, "step": 37930 }, { "epoch": 0.7040642825164638, "grad_norm": 36.375, "learning_rate": 9.889989987755081e-06, "loss": 19.5954, "step": 37940 }, { "epoch": 0.7042498556009437, "grad_norm": 34.375, "learning_rate": 9.889960991969037e-06, "loss": 19.6573, "step": 37950 }, { "epoch": 0.7044354286854234, "grad_norm": 36.5625, "learning_rate": 9.889931996182996e-06, "loss": 19.284, "step": 37960 }, { "epoch": 0.7046210017699033, "grad_norm": 34.125, "learning_rate": 9.889903000396953e-06, "loss": 19.5239, "step": 37970 }, { "epoch": 0.7048065748543831, "grad_norm": 34.09375, "learning_rate": 9.88987400461091e-06, "loss": 19.323, "step": 37980 }, { "epoch": 0.704992147938863, "grad_norm": 35.65625, "learning_rate": 9.889845008824868e-06, "loss": 18.7075, "step": 37990 }, { "epoch": 0.7051777210233428, "grad_norm": 35.59375, "learning_rate": 9.889816013038826e-06, "loss": 19.7255, "step": 38000 }, { "epoch": 0.7053632941078226, "grad_norm": 36.0625, "learning_rate": 9.889787017252783e-06, "loss": 19.4898, "step": 38010 }, { "epoch": 0.7055488671923025, "grad_norm": 36.0625, "learning_rate": 9.88975802146674e-06, "loss": 19.3605, "step": 38020 }, { "epoch": 0.7057344402767822, "grad_norm": 33.9375, "learning_rate": 9.8897290256807e-06, "loss": 19.6154, "step": 38030 }, { "epoch": 0.7059200133612621, "grad_norm": 35.65625, "learning_rate": 9.889700029894657e-06, "loss": 19.6784, "step": 38040 }, { "epoch": 0.706105586445742, "grad_norm": 35.15625, "learning_rate": 9.889671034108613e-06, "loss": 19.6619, "step": 38050 }, { "epoch": 0.7062911595302217, "grad_norm": 35.84375, "learning_rate": 9.889642038322572e-06, "loss": 18.8527, "step": 38060 }, { "epoch": 0.7064767326147016, "grad_norm": 34.5, "learning_rate": 9.889613042536529e-06, "loss": 19.0215, "step": 38070 }, { "epoch": 0.7066623056991814, "grad_norm": 34.90625, "learning_rate": 9.889584046750487e-06, "loss": 19.3586, "step": 38080 }, { "epoch": 0.7068478787836612, "grad_norm": 37.1875, "learning_rate": 9.889555050964444e-06, "loss": 19.8383, "step": 38090 }, { "epoch": 0.707033451868141, "grad_norm": 35.8125, "learning_rate": 9.889526055178403e-06, "loss": 19.9191, "step": 38100 }, { "epoch": 0.7072190249526209, "grad_norm": 35.875, "learning_rate": 9.889497059392359e-06, "loss": 19.1323, "step": 38110 }, { "epoch": 0.7074045980371007, "grad_norm": 36.0, "learning_rate": 9.889468063606316e-06, "loss": 20.3555, "step": 38120 }, { "epoch": 0.7075901711215805, "grad_norm": 36.1875, "learning_rate": 9.889439067820275e-06, "loss": 19.5806, "step": 38130 }, { "epoch": 0.7077757442060604, "grad_norm": 36.25, "learning_rate": 9.889410072034233e-06, "loss": 19.4524, "step": 38140 }, { "epoch": 0.7079613172905401, "grad_norm": 34.3125, "learning_rate": 9.88938107624819e-06, "loss": 18.9645, "step": 38150 }, { "epoch": 0.70814689037502, "grad_norm": 33.875, "learning_rate": 9.889352080462147e-06, "loss": 19.4386, "step": 38160 }, { "epoch": 0.7083324634594999, "grad_norm": 33.625, "learning_rate": 9.889323084676105e-06, "loss": 19.4483, "step": 38170 }, { "epoch": 0.7085180365439797, "grad_norm": 37.375, "learning_rate": 9.889294088890062e-06, "loss": 19.0382, "step": 38180 }, { "epoch": 0.7087036096284595, "grad_norm": 35.90625, "learning_rate": 9.88926509310402e-06, "loss": 19.0689, "step": 38190 }, { "epoch": 0.7088891827129393, "grad_norm": 37.3125, "learning_rate": 9.889236097317979e-06, "loss": 19.3899, "step": 38200 }, { "epoch": 0.7090747557974192, "grad_norm": 34.09375, "learning_rate": 9.889207101531935e-06, "loss": 19.3655, "step": 38210 }, { "epoch": 0.7092603288818989, "grad_norm": 35.875, "learning_rate": 9.889178105745892e-06, "loss": 19.9081, "step": 38220 }, { "epoch": 0.7094459019663788, "grad_norm": 36.625, "learning_rate": 9.889149109959851e-06, "loss": 19.4929, "step": 38230 }, { "epoch": 0.7096314750508587, "grad_norm": 34.0, "learning_rate": 9.889120114173808e-06, "loss": 19.5349, "step": 38240 }, { "epoch": 0.7098170481353384, "grad_norm": 35.75, "learning_rate": 9.889091118387766e-06, "loss": 19.5176, "step": 38250 }, { "epoch": 0.7100026212198183, "grad_norm": 34.96875, "learning_rate": 9.889062122601723e-06, "loss": 19.3043, "step": 38260 }, { "epoch": 0.7101881943042981, "grad_norm": 35.34375, "learning_rate": 9.88903312681568e-06, "loss": 20.086, "step": 38270 }, { "epoch": 0.7103737673887779, "grad_norm": 36.03125, "learning_rate": 9.889004131029638e-06, "loss": 19.7873, "step": 38280 }, { "epoch": 0.7105593404732578, "grad_norm": 36.09375, "learning_rate": 9.888975135243595e-06, "loss": 19.4605, "step": 38290 }, { "epoch": 0.7107449135577376, "grad_norm": 35.53125, "learning_rate": 9.888946139457553e-06, "loss": 19.2167, "step": 38300 }, { "epoch": 0.7109304866422174, "grad_norm": 38.0625, "learning_rate": 9.888917143671512e-06, "loss": 19.7455, "step": 38310 }, { "epoch": 0.7111160597266972, "grad_norm": 36.90625, "learning_rate": 9.888888147885468e-06, "loss": 19.2411, "step": 38320 }, { "epoch": 0.7113016328111771, "grad_norm": 35.65625, "learning_rate": 9.888859152099425e-06, "loss": 19.6434, "step": 38330 }, { "epoch": 0.711487205895657, "grad_norm": 35.5, "learning_rate": 9.888830156313384e-06, "loss": 19.2703, "step": 38340 }, { "epoch": 0.7116727789801367, "grad_norm": 36.0, "learning_rate": 9.888801160527342e-06, "loss": 19.8718, "step": 38350 }, { "epoch": 0.7118583520646166, "grad_norm": 35.875, "learning_rate": 9.888772164741299e-06, "loss": 19.3253, "step": 38360 }, { "epoch": 0.7120439251490964, "grad_norm": 36.5, "learning_rate": 9.888743168955256e-06, "loss": 19.5194, "step": 38370 }, { "epoch": 0.7122294982335762, "grad_norm": 35.28125, "learning_rate": 9.888714173169214e-06, "loss": 19.5886, "step": 38380 }, { "epoch": 0.712415071318056, "grad_norm": 35.53125, "learning_rate": 9.888685177383171e-06, "loss": 19.4309, "step": 38390 }, { "epoch": 0.7126006444025359, "grad_norm": 36.0, "learning_rate": 9.888656181597129e-06, "loss": 19.3463, "step": 38400 }, { "epoch": 0.7127862174870156, "grad_norm": 36.53125, "learning_rate": 9.888627185811088e-06, "loss": 19.6, "step": 38410 }, { "epoch": 0.7129717905714955, "grad_norm": 35.5, "learning_rate": 9.888598190025045e-06, "loss": 19.4615, "step": 38420 }, { "epoch": 0.7131573636559754, "grad_norm": 35.0, "learning_rate": 9.888569194239001e-06, "loss": 19.4513, "step": 38430 }, { "epoch": 0.7133429367404551, "grad_norm": 37.0625, "learning_rate": 9.88854019845296e-06, "loss": 19.8098, "step": 38440 }, { "epoch": 0.713528509824935, "grad_norm": 37.25, "learning_rate": 9.888511202666917e-06, "loss": 19.3014, "step": 38450 }, { "epoch": 0.7137140829094148, "grad_norm": 33.9375, "learning_rate": 9.888482206880875e-06, "loss": 19.7513, "step": 38460 }, { "epoch": 0.7138996559938946, "grad_norm": 34.625, "learning_rate": 9.888453211094832e-06, "loss": 19.3369, "step": 38470 }, { "epoch": 0.7140852290783745, "grad_norm": 38.0625, "learning_rate": 9.88842421530879e-06, "loss": 19.3678, "step": 38480 }, { "epoch": 0.7142708021628543, "grad_norm": 36.375, "learning_rate": 9.888395219522747e-06, "loss": 19.3446, "step": 38490 }, { "epoch": 0.7144563752473341, "grad_norm": 35.15625, "learning_rate": 9.888366223736704e-06, "loss": 19.1207, "step": 38500 }, { "epoch": 0.7146419483318139, "grad_norm": 35.28125, "learning_rate": 9.888337227950663e-06, "loss": 19.5949, "step": 38510 }, { "epoch": 0.7148275214162938, "grad_norm": 35.1875, "learning_rate": 9.888308232164621e-06, "loss": 19.5172, "step": 38520 }, { "epoch": 0.7150130945007737, "grad_norm": 36.71875, "learning_rate": 9.888279236378578e-06, "loss": 19.6522, "step": 38530 }, { "epoch": 0.7151986675852534, "grad_norm": 34.03125, "learning_rate": 9.888250240592536e-06, "loss": 19.8201, "step": 38540 }, { "epoch": 0.7153842406697333, "grad_norm": 37.0625, "learning_rate": 9.888221244806493e-06, "loss": 19.2714, "step": 38550 }, { "epoch": 0.7155698137542131, "grad_norm": 36.78125, "learning_rate": 9.88819224902045e-06, "loss": 19.2433, "step": 38560 }, { "epoch": 0.7157553868386929, "grad_norm": 35.78125, "learning_rate": 9.888163253234408e-06, "loss": 19.69, "step": 38570 }, { "epoch": 0.7159409599231727, "grad_norm": 34.5625, "learning_rate": 9.888134257448367e-06, "loss": 19.7708, "step": 38580 }, { "epoch": 0.7161265330076526, "grad_norm": 34.71875, "learning_rate": 9.888105261662323e-06, "loss": 19.347, "step": 38590 }, { "epoch": 0.7163121060921324, "grad_norm": 33.90625, "learning_rate": 9.88807626587628e-06, "loss": 19.3059, "step": 38600 }, { "epoch": 0.7164976791766122, "grad_norm": 37.15625, "learning_rate": 9.88804727009024e-06, "loss": 19.8677, "step": 38610 }, { "epoch": 0.7166832522610921, "grad_norm": 37.9375, "learning_rate": 9.888018274304197e-06, "loss": 19.4334, "step": 38620 }, { "epoch": 0.7168688253455718, "grad_norm": 35.25, "learning_rate": 9.887989278518154e-06, "loss": 19.2413, "step": 38630 }, { "epoch": 0.7170543984300517, "grad_norm": 37.40625, "learning_rate": 9.887960282732111e-06, "loss": 19.6072, "step": 38640 }, { "epoch": 0.7172399715145316, "grad_norm": 35.625, "learning_rate": 9.887931286946069e-06, "loss": 19.5617, "step": 38650 }, { "epoch": 0.7174255445990113, "grad_norm": 35.8125, "learning_rate": 9.887902291160026e-06, "loss": 19.2944, "step": 38660 }, { "epoch": 0.7176111176834912, "grad_norm": 36.78125, "learning_rate": 9.887873295373984e-06, "loss": 19.6959, "step": 38670 }, { "epoch": 0.717796690767971, "grad_norm": 37.4375, "learning_rate": 9.887844299587943e-06, "loss": 19.6264, "step": 38680 }, { "epoch": 0.7179822638524508, "grad_norm": 34.40625, "learning_rate": 9.8878153038019e-06, "loss": 19.4896, "step": 38690 }, { "epoch": 0.7181678369369306, "grad_norm": 34.53125, "learning_rate": 9.887786308015856e-06, "loss": 19.5352, "step": 38700 }, { "epoch": 0.7183534100214105, "grad_norm": 37.84375, "learning_rate": 9.887757312229815e-06, "loss": 19.5476, "step": 38710 }, { "epoch": 0.7185389831058904, "grad_norm": 37.0625, "learning_rate": 9.887728316443772e-06, "loss": 19.3343, "step": 38720 }, { "epoch": 0.7187245561903701, "grad_norm": 33.96875, "learning_rate": 9.88769932065773e-06, "loss": 19.3166, "step": 38730 }, { "epoch": 0.71891012927485, "grad_norm": 36.34375, "learning_rate": 9.887670324871687e-06, "loss": 19.3764, "step": 38740 }, { "epoch": 0.7190957023593298, "grad_norm": 36.65625, "learning_rate": 9.887641329085645e-06, "loss": 19.6944, "step": 38750 }, { "epoch": 0.7192812754438096, "grad_norm": 35.46875, "learning_rate": 9.887612333299602e-06, "loss": 19.4655, "step": 38760 }, { "epoch": 0.7194668485282895, "grad_norm": 36.375, "learning_rate": 9.88758333751356e-06, "loss": 19.8738, "step": 38770 }, { "epoch": 0.7196524216127693, "grad_norm": 35.0, "learning_rate": 9.887554341727517e-06, "loss": 19.594, "step": 38780 }, { "epoch": 0.7198379946972491, "grad_norm": 37.875, "learning_rate": 9.887525345941476e-06, "loss": 19.2177, "step": 38790 }, { "epoch": 0.7200235677817289, "grad_norm": 34.21875, "learning_rate": 9.887496350155432e-06, "loss": 19.5523, "step": 38800 }, { "epoch": 0.7202091408662088, "grad_norm": 37.1875, "learning_rate": 9.88746735436939e-06, "loss": 19.6112, "step": 38810 }, { "epoch": 0.7203947139506885, "grad_norm": 33.9375, "learning_rate": 9.887438358583348e-06, "loss": 19.4369, "step": 38820 }, { "epoch": 0.7205802870351684, "grad_norm": 38.28125, "learning_rate": 9.887409362797306e-06, "loss": 19.5935, "step": 38830 }, { "epoch": 0.7207658601196483, "grad_norm": 35.0, "learning_rate": 9.887380367011263e-06, "loss": 19.5737, "step": 38840 }, { "epoch": 0.720951433204128, "grad_norm": 35.96875, "learning_rate": 9.88735137122522e-06, "loss": 19.4008, "step": 38850 }, { "epoch": 0.7211370062886079, "grad_norm": 36.125, "learning_rate": 9.887322375439178e-06, "loss": 19.1818, "step": 38860 }, { "epoch": 0.7213225793730877, "grad_norm": 34.03125, "learning_rate": 9.887293379653135e-06, "loss": 19.4143, "step": 38870 }, { "epoch": 0.7215081524575676, "grad_norm": 34.46875, "learning_rate": 9.887264383867093e-06, "loss": 19.5025, "step": 38880 }, { "epoch": 0.7216937255420474, "grad_norm": 37.75, "learning_rate": 9.887235388081052e-06, "loss": 19.3903, "step": 38890 }, { "epoch": 0.7218792986265272, "grad_norm": 36.15625, "learning_rate": 9.88720639229501e-06, "loss": 19.2237, "step": 38900 }, { "epoch": 0.7220648717110071, "grad_norm": 35.21875, "learning_rate": 9.887177396508965e-06, "loss": 19.0859, "step": 38910 }, { "epoch": 0.7222504447954868, "grad_norm": 36.5, "learning_rate": 9.887148400722924e-06, "loss": 19.3678, "step": 38920 }, { "epoch": 0.7224360178799667, "grad_norm": 36.9375, "learning_rate": 9.887119404936881e-06, "loss": 19.5165, "step": 38930 }, { "epoch": 0.7226215909644466, "grad_norm": 35.40625, "learning_rate": 9.887090409150839e-06, "loss": 19.8087, "step": 38940 }, { "epoch": 0.7228071640489263, "grad_norm": 36.6875, "learning_rate": 9.887061413364796e-06, "loss": 19.7642, "step": 38950 }, { "epoch": 0.7229927371334062, "grad_norm": 35.09375, "learning_rate": 9.887032417578754e-06, "loss": 19.627, "step": 38960 }, { "epoch": 0.723178310217886, "grad_norm": 35.84375, "learning_rate": 9.887003421792711e-06, "loss": 19.519, "step": 38970 }, { "epoch": 0.7233638833023658, "grad_norm": 35.625, "learning_rate": 9.886974426006668e-06, "loss": 19.3384, "step": 38980 }, { "epoch": 0.7235494563868456, "grad_norm": 35.15625, "learning_rate": 9.886945430220628e-06, "loss": 20.0005, "step": 38990 }, { "epoch": 0.7237350294713255, "grad_norm": 38.75, "learning_rate": 9.886916434434585e-06, "loss": 19.7862, "step": 39000 }, { "epoch": 0.7239206025558053, "grad_norm": 35.1875, "learning_rate": 9.886887438648542e-06, "loss": 19.2984, "step": 39010 }, { "epoch": 0.7241061756402851, "grad_norm": 37.0625, "learning_rate": 9.8868584428625e-06, "loss": 19.7105, "step": 39020 }, { "epoch": 0.724291748724765, "grad_norm": 35.71875, "learning_rate": 9.886829447076457e-06, "loss": 19.3705, "step": 39030 }, { "epoch": 0.7244773218092447, "grad_norm": 35.0, "learning_rate": 9.886800451290415e-06, "loss": 19.2526, "step": 39040 }, { "epoch": 0.7246628948937246, "grad_norm": 34.84375, "learning_rate": 9.886771455504372e-06, "loss": 19.6478, "step": 39050 }, { "epoch": 0.7248484679782045, "grad_norm": 36.53125, "learning_rate": 9.886742459718331e-06, "loss": 19.5643, "step": 39060 }, { "epoch": 0.7250340410626843, "grad_norm": 39.0625, "learning_rate": 9.886713463932287e-06, "loss": 19.0415, "step": 39070 }, { "epoch": 0.7252196141471641, "grad_norm": 33.71875, "learning_rate": 9.886684468146244e-06, "loss": 19.0971, "step": 39080 }, { "epoch": 0.7254051872316439, "grad_norm": 34.90625, "learning_rate": 9.886655472360203e-06, "loss": 19.5161, "step": 39090 }, { "epoch": 0.7255907603161238, "grad_norm": 36.0625, "learning_rate": 9.88662647657416e-06, "loss": 19.7788, "step": 39100 }, { "epoch": 0.7257763334006035, "grad_norm": 35.90625, "learning_rate": 9.886597480788118e-06, "loss": 19.4104, "step": 39110 }, { "epoch": 0.7259619064850834, "grad_norm": 35.5, "learning_rate": 9.886568485002075e-06, "loss": 19.7646, "step": 39120 }, { "epoch": 0.7261474795695633, "grad_norm": 35.625, "learning_rate": 9.886539489216033e-06, "loss": 19.6163, "step": 39130 }, { "epoch": 0.726333052654043, "grad_norm": 36.34375, "learning_rate": 9.88651049342999e-06, "loss": 19.5463, "step": 39140 }, { "epoch": 0.7265186257385229, "grad_norm": 35.5625, "learning_rate": 9.886481497643948e-06, "loss": 19.2152, "step": 39150 }, { "epoch": 0.7267041988230027, "grad_norm": 34.84375, "learning_rate": 9.886452501857907e-06, "loss": 19.5463, "step": 39160 }, { "epoch": 0.7268897719074825, "grad_norm": 34.65625, "learning_rate": 9.886423506071864e-06, "loss": 19.6092, "step": 39170 }, { "epoch": 0.7270753449919624, "grad_norm": 34.59375, "learning_rate": 9.88639451028582e-06, "loss": 19.4284, "step": 39180 }, { "epoch": 0.7272609180764422, "grad_norm": 34.8125, "learning_rate": 9.886365514499779e-06, "loss": 19.3534, "step": 39190 }, { "epoch": 0.727446491160922, "grad_norm": 36.0, "learning_rate": 9.886336518713736e-06, "loss": 19.8356, "step": 39200 }, { "epoch": 0.7276320642454018, "grad_norm": 35.28125, "learning_rate": 9.886307522927694e-06, "loss": 19.2134, "step": 39210 }, { "epoch": 0.7278176373298817, "grad_norm": 37.53125, "learning_rate": 9.886278527141651e-06, "loss": 19.133, "step": 39220 }, { "epoch": 0.7280032104143614, "grad_norm": 35.375, "learning_rate": 9.886249531355609e-06, "loss": 19.0139, "step": 39230 }, { "epoch": 0.7281887834988413, "grad_norm": 37.25, "learning_rate": 9.886220535569566e-06, "loss": 19.1991, "step": 39240 }, { "epoch": 0.7283743565833212, "grad_norm": 34.84375, "learning_rate": 9.886191539783523e-06, "loss": 19.4178, "step": 39250 }, { "epoch": 0.728559929667801, "grad_norm": 35.28125, "learning_rate": 9.886162543997483e-06, "loss": 19.7294, "step": 39260 }, { "epoch": 0.7287455027522808, "grad_norm": 36.90625, "learning_rate": 9.88613354821144e-06, "loss": 19.5717, "step": 39270 }, { "epoch": 0.7289310758367606, "grad_norm": 36.5, "learning_rate": 9.886104552425397e-06, "loss": 19.4598, "step": 39280 }, { "epoch": 0.7291166489212405, "grad_norm": 36.96875, "learning_rate": 9.886075556639355e-06, "loss": 19.3378, "step": 39290 }, { "epoch": 0.7293022220057203, "grad_norm": 37.21875, "learning_rate": 9.886046560853312e-06, "loss": 19.6847, "step": 39300 }, { "epoch": 0.7294877950902001, "grad_norm": 35.3125, "learning_rate": 9.88601756506727e-06, "loss": 19.398, "step": 39310 }, { "epoch": 0.72967336817468, "grad_norm": 35.375, "learning_rate": 9.885988569281227e-06, "loss": 19.577, "step": 39320 }, { "epoch": 0.7298589412591597, "grad_norm": 34.8125, "learning_rate": 9.885959573495184e-06, "loss": 19.5882, "step": 39330 }, { "epoch": 0.7300445143436396, "grad_norm": 34.71875, "learning_rate": 9.885930577709142e-06, "loss": 19.9676, "step": 39340 }, { "epoch": 0.7302300874281195, "grad_norm": 34.1875, "learning_rate": 9.8859015819231e-06, "loss": 18.9682, "step": 39350 }, { "epoch": 0.7304156605125992, "grad_norm": 35.0625, "learning_rate": 9.885872586137057e-06, "loss": 18.9754, "step": 39360 }, { "epoch": 0.7306012335970791, "grad_norm": 36.59375, "learning_rate": 9.885843590351016e-06, "loss": 19.5236, "step": 39370 }, { "epoch": 0.7307868066815589, "grad_norm": 35.34375, "learning_rate": 9.885814594564973e-06, "loss": 19.5679, "step": 39380 }, { "epoch": 0.7309723797660387, "grad_norm": 34.25, "learning_rate": 9.88578559877893e-06, "loss": 19.5254, "step": 39390 }, { "epoch": 0.7311579528505185, "grad_norm": 35.0, "learning_rate": 9.885756602992888e-06, "loss": 19.4094, "step": 39400 }, { "epoch": 0.7313435259349984, "grad_norm": 35.40625, "learning_rate": 9.885727607206845e-06, "loss": 19.3836, "step": 39410 }, { "epoch": 0.7315290990194783, "grad_norm": 36.3125, "learning_rate": 9.885698611420803e-06, "loss": 19.7532, "step": 39420 }, { "epoch": 0.731714672103958, "grad_norm": 36.78125, "learning_rate": 9.88566961563476e-06, "loss": 19.3568, "step": 39430 }, { "epoch": 0.7319002451884379, "grad_norm": 34.90625, "learning_rate": 9.88564061984872e-06, "loss": 19.3629, "step": 39440 }, { "epoch": 0.7320858182729177, "grad_norm": 35.875, "learning_rate": 9.885611624062675e-06, "loss": 19.2368, "step": 39450 }, { "epoch": 0.7322713913573975, "grad_norm": 33.84375, "learning_rate": 9.885582628276632e-06, "loss": 19.4356, "step": 39460 }, { "epoch": 0.7324569644418774, "grad_norm": 34.8125, "learning_rate": 9.885553632490592e-06, "loss": 19.1461, "step": 39470 }, { "epoch": 0.7326425375263572, "grad_norm": 36.34375, "learning_rate": 9.885524636704549e-06, "loss": 19.5205, "step": 39480 }, { "epoch": 0.732828110610837, "grad_norm": 36.5, "learning_rate": 9.885495640918506e-06, "loss": 19.2334, "step": 39490 }, { "epoch": 0.7330136836953168, "grad_norm": 34.40625, "learning_rate": 9.885466645132464e-06, "loss": 19.2247, "step": 39500 }, { "epoch": 0.7331992567797967, "grad_norm": 36.90625, "learning_rate": 9.885437649346421e-06, "loss": 19.5144, "step": 39510 }, { "epoch": 0.7333848298642764, "grad_norm": 36.25, "learning_rate": 9.885408653560379e-06, "loss": 18.9981, "step": 39520 }, { "epoch": 0.7335704029487563, "grad_norm": 35.4375, "learning_rate": 9.885379657774336e-06, "loss": 19.2192, "step": 39530 }, { "epoch": 0.7337559760332362, "grad_norm": 36.8125, "learning_rate": 9.885350661988295e-06, "loss": 19.2174, "step": 39540 }, { "epoch": 0.7339415491177159, "grad_norm": 33.6875, "learning_rate": 9.88532166620225e-06, "loss": 19.6817, "step": 39550 }, { "epoch": 0.7341271222021958, "grad_norm": 36.875, "learning_rate": 9.885292670416208e-06, "loss": 18.8573, "step": 39560 }, { "epoch": 0.7343126952866756, "grad_norm": 37.375, "learning_rate": 9.885263674630167e-06, "loss": 19.7325, "step": 39570 }, { "epoch": 0.7344982683711554, "grad_norm": 37.125, "learning_rate": 9.885234678844125e-06, "loss": 19.4219, "step": 39580 }, { "epoch": 0.7346838414556353, "grad_norm": 35.46875, "learning_rate": 9.885205683058082e-06, "loss": 19.1926, "step": 39590 }, { "epoch": 0.7348694145401151, "grad_norm": 35.71875, "learning_rate": 9.88517668727204e-06, "loss": 19.064, "step": 39600 }, { "epoch": 0.735054987624595, "grad_norm": 36.34375, "learning_rate": 9.885147691485997e-06, "loss": 19.0486, "step": 39610 }, { "epoch": 0.7352405607090747, "grad_norm": 36.03125, "learning_rate": 9.885118695699954e-06, "loss": 19.7487, "step": 39620 }, { "epoch": 0.7354261337935546, "grad_norm": 36.25, "learning_rate": 9.885089699913912e-06, "loss": 19.5051, "step": 39630 }, { "epoch": 0.7356117068780345, "grad_norm": 36.375, "learning_rate": 9.88506070412787e-06, "loss": 19.2603, "step": 39640 }, { "epoch": 0.7357972799625142, "grad_norm": 35.5625, "learning_rate": 9.885031708341828e-06, "loss": 19.957, "step": 39650 }, { "epoch": 0.7359828530469941, "grad_norm": 36.34375, "learning_rate": 9.885002712555784e-06, "loss": 19.3855, "step": 39660 }, { "epoch": 0.7361684261314739, "grad_norm": 34.1875, "learning_rate": 9.884973716769743e-06, "loss": 19.6183, "step": 39670 }, { "epoch": 0.7363539992159537, "grad_norm": 37.125, "learning_rate": 9.8849447209837e-06, "loss": 19.1751, "step": 39680 }, { "epoch": 0.7365395723004335, "grad_norm": 35.15625, "learning_rate": 9.884915725197658e-06, "loss": 19.4365, "step": 39690 }, { "epoch": 0.7367251453849134, "grad_norm": 37.28125, "learning_rate": 9.884886729411615e-06, "loss": 19.867, "step": 39700 }, { "epoch": 0.7369107184693932, "grad_norm": 34.4375, "learning_rate": 9.884857733625574e-06, "loss": 19.4728, "step": 39710 }, { "epoch": 0.737096291553873, "grad_norm": 36.9375, "learning_rate": 9.88482873783953e-06, "loss": 19.0685, "step": 39720 }, { "epoch": 0.7372818646383529, "grad_norm": 35.34375, "learning_rate": 9.884799742053487e-06, "loss": 19.7118, "step": 39730 }, { "epoch": 0.7374674377228326, "grad_norm": 36.5, "learning_rate": 9.884770746267447e-06, "loss": 19.6547, "step": 39740 }, { "epoch": 0.7376530108073125, "grad_norm": 34.84375, "learning_rate": 9.884741750481404e-06, "loss": 19.3336, "step": 39750 }, { "epoch": 0.7378385838917924, "grad_norm": 33.96875, "learning_rate": 9.884712754695361e-06, "loss": 19.3172, "step": 39760 }, { "epoch": 0.7380241569762722, "grad_norm": 35.15625, "learning_rate": 9.884683758909319e-06, "loss": 19.659, "step": 39770 }, { "epoch": 0.738209730060752, "grad_norm": 34.75, "learning_rate": 9.884654763123276e-06, "loss": 19.564, "step": 39780 }, { "epoch": 0.7383953031452318, "grad_norm": 36.9375, "learning_rate": 9.884625767337234e-06, "loss": 19.4628, "step": 39790 }, { "epoch": 0.7385808762297117, "grad_norm": 35.28125, "learning_rate": 9.884596771551191e-06, "loss": 19.3677, "step": 39800 }, { "epoch": 0.7387664493141914, "grad_norm": 36.03125, "learning_rate": 9.884567775765148e-06, "loss": 19.5696, "step": 39810 }, { "epoch": 0.7389520223986713, "grad_norm": 34.5, "learning_rate": 9.884538779979106e-06, "loss": 19.2716, "step": 39820 }, { "epoch": 0.7391375954831512, "grad_norm": 35.53125, "learning_rate": 9.884509784193063e-06, "loss": 19.2286, "step": 39830 }, { "epoch": 0.7393231685676309, "grad_norm": 35.46875, "learning_rate": 9.884480788407022e-06, "loss": 19.1631, "step": 39840 }, { "epoch": 0.7395087416521108, "grad_norm": 35.96875, "learning_rate": 9.88445179262098e-06, "loss": 19.4365, "step": 39850 }, { "epoch": 0.7396943147365906, "grad_norm": 35.96875, "learning_rate": 9.884422796834937e-06, "loss": 19.5979, "step": 39860 }, { "epoch": 0.7398798878210704, "grad_norm": 36.9375, "learning_rate": 9.884393801048895e-06, "loss": 19.6595, "step": 39870 }, { "epoch": 0.7400654609055503, "grad_norm": 37.28125, "learning_rate": 9.884364805262852e-06, "loss": 19.4817, "step": 39880 }, { "epoch": 0.7402510339900301, "grad_norm": 36.03125, "learning_rate": 9.88433580947681e-06, "loss": 19.3088, "step": 39890 }, { "epoch": 0.7404366070745099, "grad_norm": 34.75, "learning_rate": 9.884306813690767e-06, "loss": 19.4918, "step": 39900 }, { "epoch": 0.7406221801589897, "grad_norm": 35.5, "learning_rate": 9.884277817904724e-06, "loss": 19.435, "step": 39910 }, { "epoch": 0.7408077532434696, "grad_norm": 33.78125, "learning_rate": 9.884248822118683e-06, "loss": 19.268, "step": 39920 }, { "epoch": 0.7409933263279493, "grad_norm": 37.5, "learning_rate": 9.884219826332639e-06, "loss": 19.885, "step": 39930 }, { "epoch": 0.7411788994124292, "grad_norm": 34.9375, "learning_rate": 9.884190830546596e-06, "loss": 19.6043, "step": 39940 }, { "epoch": 0.7413644724969091, "grad_norm": 36.65625, "learning_rate": 9.884161834760556e-06, "loss": 18.9762, "step": 39950 }, { "epoch": 0.7415500455813889, "grad_norm": 35.5, "learning_rate": 9.884132838974513e-06, "loss": 19.6159, "step": 39960 }, { "epoch": 0.7417356186658687, "grad_norm": 35.53125, "learning_rate": 9.88410384318847e-06, "loss": 19.5883, "step": 39970 }, { "epoch": 0.7419211917503485, "grad_norm": 35.21875, "learning_rate": 9.884074847402428e-06, "loss": 19.4468, "step": 39980 }, { "epoch": 0.7421067648348284, "grad_norm": 35.34375, "learning_rate": 9.884045851616385e-06, "loss": 19.7061, "step": 39990 }, { "epoch": 0.7422923379193082, "grad_norm": 37.5625, "learning_rate": 9.884016855830343e-06, "loss": 19.2119, "step": 40000 }, { "epoch": 0.7422923379193082, "eval_loss": 2.4196152687072754, "eval_runtime": 454.1953, "eval_samples_per_second": 3197.12, "eval_steps_per_second": 49.956, "step": 40000 }, { "epoch": 0.742477911003788, "grad_norm": 35.1875, "learning_rate": 9.8839878600443e-06, "loss": 19.1801, "step": 40010 }, { "epoch": 0.7426634840882679, "grad_norm": 35.59375, "learning_rate": 9.883958864258259e-06, "loss": 19.2543, "step": 40020 }, { "epoch": 0.7428490571727476, "grad_norm": 35.34375, "learning_rate": 9.883929868472216e-06, "loss": 19.4882, "step": 40030 }, { "epoch": 0.7430346302572275, "grad_norm": 37.21875, "learning_rate": 9.883900872686172e-06, "loss": 19.6511, "step": 40040 }, { "epoch": 0.7432202033417074, "grad_norm": 37.5, "learning_rate": 9.883871876900131e-06, "loss": 19.5237, "step": 40050 }, { "epoch": 0.7434057764261871, "grad_norm": 37.0, "learning_rate": 9.883842881114089e-06, "loss": 19.3757, "step": 40060 }, { "epoch": 0.743591349510667, "grad_norm": 36.03125, "learning_rate": 9.883813885328046e-06, "loss": 19.4206, "step": 40070 }, { "epoch": 0.7437769225951468, "grad_norm": 34.375, "learning_rate": 9.883784889542004e-06, "loss": 19.3413, "step": 40080 }, { "epoch": 0.7439624956796266, "grad_norm": 35.25, "learning_rate": 9.883755893755961e-06, "loss": 19.2141, "step": 40090 }, { "epoch": 0.7441480687641064, "grad_norm": 36.21875, "learning_rate": 9.883726897969918e-06, "loss": 19.7938, "step": 40100 }, { "epoch": 0.7443336418485863, "grad_norm": 35.875, "learning_rate": 9.883697902183876e-06, "loss": 19.4773, "step": 40110 }, { "epoch": 0.744519214933066, "grad_norm": 34.75, "learning_rate": 9.883668906397835e-06, "loss": 19.4294, "step": 40120 }, { "epoch": 0.7447047880175459, "grad_norm": 36.375, "learning_rate": 9.883639910611792e-06, "loss": 19.7319, "step": 40130 }, { "epoch": 0.7448903611020258, "grad_norm": 37.96875, "learning_rate": 9.883610914825748e-06, "loss": 19.207, "step": 40140 }, { "epoch": 0.7450759341865056, "grad_norm": 36.65625, "learning_rate": 9.883581919039707e-06, "loss": 19.2734, "step": 40150 }, { "epoch": 0.7452615072709854, "grad_norm": 36.125, "learning_rate": 9.883552923253664e-06, "loss": 19.6617, "step": 40160 }, { "epoch": 0.7454470803554653, "grad_norm": 37.25, "learning_rate": 9.883523927467622e-06, "loss": 19.5435, "step": 40170 }, { "epoch": 0.7456326534399451, "grad_norm": 34.3125, "learning_rate": 9.88349493168158e-06, "loss": 19.359, "step": 40180 }, { "epoch": 0.7458182265244249, "grad_norm": 37.34375, "learning_rate": 9.883465935895538e-06, "loss": 19.4984, "step": 40190 }, { "epoch": 0.7460037996089047, "grad_norm": 35.3125, "learning_rate": 9.883436940109494e-06, "loss": 19.529, "step": 40200 }, { "epoch": 0.7461893726933846, "grad_norm": 37.34375, "learning_rate": 9.883407944323452e-06, "loss": 19.4569, "step": 40210 }, { "epoch": 0.7463749457778643, "grad_norm": 35.28125, "learning_rate": 9.88337894853741e-06, "loss": 19.7786, "step": 40220 }, { "epoch": 0.7465605188623442, "grad_norm": 37.03125, "learning_rate": 9.883349952751368e-06, "loss": 19.3416, "step": 40230 }, { "epoch": 0.7467460919468241, "grad_norm": 34.96875, "learning_rate": 9.883320956965325e-06, "loss": 19.2966, "step": 40240 }, { "epoch": 0.7469316650313038, "grad_norm": 34.59375, "learning_rate": 9.883291961179283e-06, "loss": 19.4214, "step": 40250 }, { "epoch": 0.7471172381157837, "grad_norm": 36.59375, "learning_rate": 9.88326296539324e-06, "loss": 19.2123, "step": 40260 }, { "epoch": 0.7473028112002635, "grad_norm": 35.375, "learning_rate": 9.883233969607198e-06, "loss": 19.0819, "step": 40270 }, { "epoch": 0.7474883842847433, "grad_norm": 35.125, "learning_rate": 9.883204973821155e-06, "loss": 19.7437, "step": 40280 }, { "epoch": 0.7476739573692232, "grad_norm": 36.6875, "learning_rate": 9.883175978035112e-06, "loss": 19.0334, "step": 40290 }, { "epoch": 0.747859530453703, "grad_norm": 34.3125, "learning_rate": 9.883146982249072e-06, "loss": 19.398, "step": 40300 }, { "epoch": 0.7480451035381829, "grad_norm": 36.6875, "learning_rate": 9.883117986463027e-06, "loss": 19.3823, "step": 40310 }, { "epoch": 0.7482306766226626, "grad_norm": 36.53125, "learning_rate": 9.883088990676986e-06, "loss": 19.3756, "step": 40320 }, { "epoch": 0.7484162497071425, "grad_norm": 38.0, "learning_rate": 9.883059994890944e-06, "loss": 18.9045, "step": 40330 }, { "epoch": 0.7486018227916224, "grad_norm": 36.96875, "learning_rate": 9.883030999104901e-06, "loss": 19.2252, "step": 40340 }, { "epoch": 0.7487873958761021, "grad_norm": 37.1875, "learning_rate": 9.883002003318859e-06, "loss": 19.2014, "step": 40350 }, { "epoch": 0.748972968960582, "grad_norm": 33.34375, "learning_rate": 9.882973007532816e-06, "loss": 19.202, "step": 40360 }, { "epoch": 0.7491585420450618, "grad_norm": 33.0625, "learning_rate": 9.882944011746773e-06, "loss": 19.8227, "step": 40370 }, { "epoch": 0.7493441151295416, "grad_norm": 33.875, "learning_rate": 9.88291501596073e-06, "loss": 19.4171, "step": 40380 }, { "epoch": 0.7495296882140214, "grad_norm": 36.21875, "learning_rate": 9.882886020174688e-06, "loss": 19.1841, "step": 40390 }, { "epoch": 0.7497152612985013, "grad_norm": 38.03125, "learning_rate": 9.882857024388647e-06, "loss": 19.3367, "step": 40400 }, { "epoch": 0.749900834382981, "grad_norm": 36.15625, "learning_rate": 9.882828028602603e-06, "loss": 19.5167, "step": 40410 }, { "epoch": 0.7500864074674609, "grad_norm": 36.0625, "learning_rate": 9.88279903281656e-06, "loss": 19.3782, "step": 40420 }, { "epoch": 0.7502719805519408, "grad_norm": 37.125, "learning_rate": 9.88277003703052e-06, "loss": 19.5295, "step": 40430 }, { "epoch": 0.7504575536364205, "grad_norm": 34.46875, "learning_rate": 9.882741041244477e-06, "loss": 19.3648, "step": 40440 }, { "epoch": 0.7506431267209004, "grad_norm": 35.40625, "learning_rate": 9.882712045458434e-06, "loss": 19.4462, "step": 40450 }, { "epoch": 0.7508286998053803, "grad_norm": 37.78125, "learning_rate": 9.882683049672392e-06, "loss": 19.5866, "step": 40460 }, { "epoch": 0.75101427288986, "grad_norm": 36.46875, "learning_rate": 9.88265405388635e-06, "loss": 19.1955, "step": 40470 }, { "epoch": 0.7511998459743399, "grad_norm": 36.59375, "learning_rate": 9.882625058100307e-06, "loss": 19.3081, "step": 40480 }, { "epoch": 0.7513854190588197, "grad_norm": 35.5625, "learning_rate": 9.882596062314264e-06, "loss": 19.5015, "step": 40490 }, { "epoch": 0.7515709921432996, "grad_norm": 36.90625, "learning_rate": 9.882567066528223e-06, "loss": 19.6425, "step": 40500 }, { "epoch": 0.7517565652277793, "grad_norm": 37.1875, "learning_rate": 9.88253807074218e-06, "loss": 19.2909, "step": 40510 }, { "epoch": 0.7519421383122592, "grad_norm": 33.59375, "learning_rate": 9.882509074956136e-06, "loss": 19.621, "step": 40520 }, { "epoch": 0.7521277113967391, "grad_norm": 36.34375, "learning_rate": 9.882480079170095e-06, "loss": 19.4051, "step": 40530 }, { "epoch": 0.7523132844812188, "grad_norm": 36.125, "learning_rate": 9.882451083384053e-06, "loss": 19.5807, "step": 40540 }, { "epoch": 0.7524988575656987, "grad_norm": 36.25, "learning_rate": 9.88242208759801e-06, "loss": 19.2389, "step": 40550 }, { "epoch": 0.7526844306501785, "grad_norm": 33.71875, "learning_rate": 9.882393091811968e-06, "loss": 19.5497, "step": 40560 }, { "epoch": 0.7528700037346583, "grad_norm": 36.6875, "learning_rate": 9.882364096025925e-06, "loss": 19.7689, "step": 40570 }, { "epoch": 0.7530555768191382, "grad_norm": 37.34375, "learning_rate": 9.882335100239882e-06, "loss": 19.1836, "step": 40580 }, { "epoch": 0.753241149903618, "grad_norm": 34.21875, "learning_rate": 9.88230610445384e-06, "loss": 18.9019, "step": 40590 }, { "epoch": 0.7534267229880978, "grad_norm": 35.53125, "learning_rate": 9.882277108667799e-06, "loss": 19.0727, "step": 40600 }, { "epoch": 0.7536122960725776, "grad_norm": 34.8125, "learning_rate": 9.882248112881756e-06, "loss": 19.1506, "step": 40610 }, { "epoch": 0.7537978691570575, "grad_norm": 35.625, "learning_rate": 9.882219117095714e-06, "loss": 19.1912, "step": 40620 }, { "epoch": 0.7539834422415372, "grad_norm": 35.84375, "learning_rate": 9.882190121309671e-06, "loss": 19.2547, "step": 40630 }, { "epoch": 0.7541690153260171, "grad_norm": 36.375, "learning_rate": 9.882161125523628e-06, "loss": 19.6462, "step": 40640 }, { "epoch": 0.754354588410497, "grad_norm": 37.21875, "learning_rate": 9.882132129737586e-06, "loss": 19.0219, "step": 40650 }, { "epoch": 0.7545401614949767, "grad_norm": 35.25, "learning_rate": 9.882103133951543e-06, "loss": 19.2065, "step": 40660 }, { "epoch": 0.7547257345794566, "grad_norm": 35.03125, "learning_rate": 9.882074138165502e-06, "loss": 19.058, "step": 40670 }, { "epoch": 0.7549113076639364, "grad_norm": 36.21875, "learning_rate": 9.882045142379458e-06, "loss": 19.1856, "step": 40680 }, { "epoch": 0.7550968807484163, "grad_norm": 35.5625, "learning_rate": 9.882016146593416e-06, "loss": 19.3971, "step": 40690 }, { "epoch": 0.755282453832896, "grad_norm": 34.59375, "learning_rate": 9.881987150807375e-06, "loss": 19.5486, "step": 40700 }, { "epoch": 0.7554680269173759, "grad_norm": 35.65625, "learning_rate": 9.881958155021332e-06, "loss": 19.3793, "step": 40710 }, { "epoch": 0.7556536000018558, "grad_norm": 37.78125, "learning_rate": 9.88192915923529e-06, "loss": 19.3927, "step": 40720 }, { "epoch": 0.7558391730863355, "grad_norm": 34.34375, "learning_rate": 9.881900163449247e-06, "loss": 19.2427, "step": 40730 }, { "epoch": 0.7560247461708154, "grad_norm": 35.46875, "learning_rate": 9.881871167663204e-06, "loss": 19.363, "step": 40740 }, { "epoch": 0.7562103192552952, "grad_norm": 34.28125, "learning_rate": 9.881842171877162e-06, "loss": 19.2003, "step": 40750 }, { "epoch": 0.756395892339775, "grad_norm": 36.21875, "learning_rate": 9.881813176091119e-06, "loss": 19.225, "step": 40760 }, { "epoch": 0.7565814654242549, "grad_norm": 35.375, "learning_rate": 9.881784180305078e-06, "loss": 19.8474, "step": 40770 }, { "epoch": 0.7567670385087347, "grad_norm": 35.46875, "learning_rate": 9.881755184519036e-06, "loss": 19.3724, "step": 40780 }, { "epoch": 0.7569526115932145, "grad_norm": 36.125, "learning_rate": 9.881726188732991e-06, "loss": 19.03, "step": 40790 }, { "epoch": 0.7571381846776943, "grad_norm": 36.75, "learning_rate": 9.88169719294695e-06, "loss": 19.4802, "step": 40800 }, { "epoch": 0.7573237577621742, "grad_norm": 34.03125, "learning_rate": 9.881668197160908e-06, "loss": 19.0697, "step": 40810 }, { "epoch": 0.757509330846654, "grad_norm": 35.4375, "learning_rate": 9.881639201374865e-06, "loss": 19.6207, "step": 40820 }, { "epoch": 0.7576949039311338, "grad_norm": 35.09375, "learning_rate": 9.881610205588823e-06, "loss": 19.4589, "step": 40830 }, { "epoch": 0.7578804770156137, "grad_norm": 34.4375, "learning_rate": 9.88158120980278e-06, "loss": 19.5961, "step": 40840 }, { "epoch": 0.7580660501000935, "grad_norm": 34.625, "learning_rate": 9.881552214016737e-06, "loss": 19.2363, "step": 40850 }, { "epoch": 0.7582516231845733, "grad_norm": 37.0, "learning_rate": 9.881523218230695e-06, "loss": 18.9855, "step": 40860 }, { "epoch": 0.7584371962690531, "grad_norm": 36.3125, "learning_rate": 9.881494222444652e-06, "loss": 19.5753, "step": 40870 }, { "epoch": 0.758622769353533, "grad_norm": 37.625, "learning_rate": 9.881465226658611e-06, "loss": 19.2938, "step": 40880 }, { "epoch": 0.7588083424380128, "grad_norm": 34.75, "learning_rate": 9.881436230872569e-06, "loss": 19.4143, "step": 40890 }, { "epoch": 0.7589939155224926, "grad_norm": 36.03125, "learning_rate": 9.881407235086526e-06, "loss": 19.3378, "step": 40900 }, { "epoch": 0.7591794886069725, "grad_norm": 34.65625, "learning_rate": 9.881378239300484e-06, "loss": 19.4041, "step": 40910 }, { "epoch": 0.7593650616914522, "grad_norm": 36.09375, "learning_rate": 9.881349243514441e-06, "loss": 19.7739, "step": 40920 }, { "epoch": 0.7595506347759321, "grad_norm": 36.59375, "learning_rate": 9.881320247728398e-06, "loss": 19.1821, "step": 40930 }, { "epoch": 0.759736207860412, "grad_norm": 34.6875, "learning_rate": 9.881291251942356e-06, "loss": 18.7575, "step": 40940 }, { "epoch": 0.7599217809448917, "grad_norm": 37.6875, "learning_rate": 9.881262256156313e-06, "loss": 19.4397, "step": 40950 }, { "epoch": 0.7601073540293716, "grad_norm": 36.28125, "learning_rate": 9.88123326037027e-06, "loss": 19.3813, "step": 40960 }, { "epoch": 0.7602929271138514, "grad_norm": 34.84375, "learning_rate": 9.881204264584228e-06, "loss": 19.342, "step": 40970 }, { "epoch": 0.7604785001983312, "grad_norm": 35.9375, "learning_rate": 9.881175268798187e-06, "loss": 18.8295, "step": 40980 }, { "epoch": 0.760664073282811, "grad_norm": 35.0, "learning_rate": 9.881146273012145e-06, "loss": 19.1556, "step": 40990 }, { "epoch": 0.7608496463672909, "grad_norm": 35.34375, "learning_rate": 9.8811172772261e-06, "loss": 19.3345, "step": 41000 }, { "epoch": 0.7610352194517707, "grad_norm": 37.96875, "learning_rate": 9.88108828144006e-06, "loss": 19.3571, "step": 41010 }, { "epoch": 0.7612207925362505, "grad_norm": 35.4375, "learning_rate": 9.881059285654017e-06, "loss": 19.0342, "step": 41020 }, { "epoch": 0.7614063656207304, "grad_norm": 35.59375, "learning_rate": 9.881030289867974e-06, "loss": 19.2773, "step": 41030 }, { "epoch": 0.7615919387052102, "grad_norm": 36.96875, "learning_rate": 9.881001294081932e-06, "loss": 19.5343, "step": 41040 }, { "epoch": 0.76177751178969, "grad_norm": 34.40625, "learning_rate": 9.88097229829589e-06, "loss": 18.9572, "step": 41050 }, { "epoch": 0.7619630848741699, "grad_norm": 35.65625, "learning_rate": 9.880943302509846e-06, "loss": 19.2055, "step": 41060 }, { "epoch": 0.7621486579586497, "grad_norm": 34.0, "learning_rate": 9.880914306723804e-06, "loss": 19.3073, "step": 41070 }, { "epoch": 0.7623342310431295, "grad_norm": 35.40625, "learning_rate": 9.880885310937763e-06, "loss": 19.5048, "step": 41080 }, { "epoch": 0.7625198041276093, "grad_norm": 36.1875, "learning_rate": 9.88085631515172e-06, "loss": 19.8103, "step": 41090 }, { "epoch": 0.7627053772120892, "grad_norm": 36.65625, "learning_rate": 9.880827319365678e-06, "loss": 19.5557, "step": 41100 }, { "epoch": 0.762890950296569, "grad_norm": 34.53125, "learning_rate": 9.880798323579635e-06, "loss": 19.5534, "step": 41110 }, { "epoch": 0.7630765233810488, "grad_norm": 35.6875, "learning_rate": 9.880769327793593e-06, "loss": 19.303, "step": 41120 }, { "epoch": 0.7632620964655287, "grad_norm": 34.71875, "learning_rate": 9.88074033200755e-06, "loss": 19.164, "step": 41130 }, { "epoch": 0.7634476695500084, "grad_norm": 37.0625, "learning_rate": 9.880711336221507e-06, "loss": 19.1969, "step": 41140 }, { "epoch": 0.7636332426344883, "grad_norm": 34.28125, "learning_rate": 9.880682340435466e-06, "loss": 19.5003, "step": 41150 }, { "epoch": 0.7638188157189681, "grad_norm": 36.78125, "learning_rate": 9.880653344649422e-06, "loss": 19.3433, "step": 41160 }, { "epoch": 0.7640043888034479, "grad_norm": 37.75, "learning_rate": 9.88062434886338e-06, "loss": 18.9825, "step": 41170 }, { "epoch": 0.7641899618879278, "grad_norm": 36.34375, "learning_rate": 9.880595353077339e-06, "loss": 19.0039, "step": 41180 }, { "epoch": 0.7643755349724076, "grad_norm": 36.8125, "learning_rate": 9.880566357291296e-06, "loss": 19.4506, "step": 41190 }, { "epoch": 0.7645611080568875, "grad_norm": 33.78125, "learning_rate": 9.880537361505253e-06, "loss": 19.3604, "step": 41200 }, { "epoch": 0.7647466811413672, "grad_norm": 37.875, "learning_rate": 9.880508365719211e-06, "loss": 19.2318, "step": 41210 }, { "epoch": 0.7649322542258471, "grad_norm": 35.5625, "learning_rate": 9.880479369933168e-06, "loss": 19.3256, "step": 41220 }, { "epoch": 0.765117827310327, "grad_norm": 37.40625, "learning_rate": 9.880450374147126e-06, "loss": 19.5014, "step": 41230 }, { "epoch": 0.7653034003948067, "grad_norm": 36.9375, "learning_rate": 9.880421378361083e-06, "loss": 19.2619, "step": 41240 }, { "epoch": 0.7654889734792866, "grad_norm": 37.53125, "learning_rate": 9.880392382575042e-06, "loss": 19.1359, "step": 41250 }, { "epoch": 0.7656745465637664, "grad_norm": 35.375, "learning_rate": 9.880363386789e-06, "loss": 19.3461, "step": 41260 }, { "epoch": 0.7658601196482462, "grad_norm": 36.71875, "learning_rate": 9.880334391002955e-06, "loss": 18.8452, "step": 41270 }, { "epoch": 0.766045692732726, "grad_norm": 36.03125, "learning_rate": 9.880305395216914e-06, "loss": 19.2407, "step": 41280 }, { "epoch": 0.7662312658172059, "grad_norm": 36.78125, "learning_rate": 9.880276399430872e-06, "loss": 19.4143, "step": 41290 }, { "epoch": 0.7664168389016857, "grad_norm": 35.8125, "learning_rate": 9.88024740364483e-06, "loss": 19.3873, "step": 41300 }, { "epoch": 0.7666024119861655, "grad_norm": 35.125, "learning_rate": 9.880218407858787e-06, "loss": 19.6352, "step": 41310 }, { "epoch": 0.7667879850706454, "grad_norm": 36.0625, "learning_rate": 9.880189412072744e-06, "loss": 19.2883, "step": 41320 }, { "epoch": 0.7669735581551251, "grad_norm": 36.40625, "learning_rate": 9.880160416286701e-06, "loss": 18.7959, "step": 41330 }, { "epoch": 0.767159131239605, "grad_norm": 34.78125, "learning_rate": 9.880131420500659e-06, "loss": 18.8009, "step": 41340 }, { "epoch": 0.7673447043240849, "grad_norm": 36.5625, "learning_rate": 9.880102424714618e-06, "loss": 19.1237, "step": 41350 }, { "epoch": 0.7675302774085646, "grad_norm": 38.78125, "learning_rate": 9.880073428928575e-06, "loss": 18.9787, "step": 41360 }, { "epoch": 0.7677158504930445, "grad_norm": 32.71875, "learning_rate": 9.880044433142533e-06, "loss": 19.2766, "step": 41370 }, { "epoch": 0.7679014235775243, "grad_norm": 35.1875, "learning_rate": 9.88001543735649e-06, "loss": 18.937, "step": 41380 }, { "epoch": 0.7680869966620042, "grad_norm": 35.40625, "learning_rate": 9.879986441570448e-06, "loss": 19.729, "step": 41390 }, { "epoch": 0.768272569746484, "grad_norm": 34.71875, "learning_rate": 9.879957445784405e-06, "loss": 19.1228, "step": 41400 }, { "epoch": 0.7684581428309638, "grad_norm": 36.5, "learning_rate": 9.879928449998362e-06, "loss": 19.3379, "step": 41410 }, { "epoch": 0.7686437159154437, "grad_norm": 36.75, "learning_rate": 9.87989945421232e-06, "loss": 19.4041, "step": 41420 }, { "epoch": 0.7688292889999234, "grad_norm": 37.03125, "learning_rate": 9.879870458426277e-06, "loss": 19.2628, "step": 41430 }, { "epoch": 0.7690148620844033, "grad_norm": 34.875, "learning_rate": 9.879841462640235e-06, "loss": 19.4629, "step": 41440 }, { "epoch": 0.7692004351688831, "grad_norm": 35.96875, "learning_rate": 9.879812466854192e-06, "loss": 19.196, "step": 41450 }, { "epoch": 0.7693860082533629, "grad_norm": 38.15625, "learning_rate": 9.879783471068151e-06, "loss": 19.752, "step": 41460 }, { "epoch": 0.7695715813378428, "grad_norm": 38.75, "learning_rate": 9.879754475282109e-06, "loss": 19.2405, "step": 41470 }, { "epoch": 0.7697571544223226, "grad_norm": 37.71875, "learning_rate": 9.879725479496066e-06, "loss": 19.2407, "step": 41480 }, { "epoch": 0.7699427275068024, "grad_norm": 36.78125, "learning_rate": 9.879696483710023e-06, "loss": 19.1094, "step": 41490 }, { "epoch": 0.7701283005912822, "grad_norm": 36.1875, "learning_rate": 9.87966748792398e-06, "loss": 19.3451, "step": 41500 }, { "epoch": 0.7703138736757621, "grad_norm": 36.53125, "learning_rate": 9.879638492137938e-06, "loss": 18.9383, "step": 41510 }, { "epoch": 0.7704994467602418, "grad_norm": 34.0, "learning_rate": 9.879609496351896e-06, "loss": 19.115, "step": 41520 }, { "epoch": 0.7706850198447217, "grad_norm": 35.78125, "learning_rate": 9.879580500565855e-06, "loss": 19.4492, "step": 41530 }, { "epoch": 0.7708705929292016, "grad_norm": 37.40625, "learning_rate": 9.87955150477981e-06, "loss": 19.3029, "step": 41540 }, { "epoch": 0.7710561660136813, "grad_norm": 35.15625, "learning_rate": 9.879522508993768e-06, "loss": 18.9063, "step": 41550 }, { "epoch": 0.7712417390981612, "grad_norm": 35.71875, "learning_rate": 9.879493513207727e-06, "loss": 19.6495, "step": 41560 }, { "epoch": 0.771427312182641, "grad_norm": 36.1875, "learning_rate": 9.879464517421684e-06, "loss": 19.384, "step": 41570 }, { "epoch": 0.7716128852671209, "grad_norm": 35.375, "learning_rate": 9.879435521635642e-06, "loss": 19.2114, "step": 41580 }, { "epoch": 0.7717984583516007, "grad_norm": 34.53125, "learning_rate": 9.879406525849599e-06, "loss": 19.6205, "step": 41590 }, { "epoch": 0.7719840314360805, "grad_norm": 33.59375, "learning_rate": 9.879377530063557e-06, "loss": 19.7098, "step": 41600 }, { "epoch": 0.7721696045205604, "grad_norm": 36.25, "learning_rate": 9.879348534277514e-06, "loss": 19.5254, "step": 41610 }, { "epoch": 0.7723551776050401, "grad_norm": 33.375, "learning_rate": 9.879319538491471e-06, "loss": 19.4273, "step": 41620 }, { "epoch": 0.77254075068952, "grad_norm": 36.46875, "learning_rate": 9.87929054270543e-06, "loss": 19.7937, "step": 41630 }, { "epoch": 0.7727263237739999, "grad_norm": 37.375, "learning_rate": 9.879261546919388e-06, "loss": 19.3302, "step": 41640 }, { "epoch": 0.7729118968584796, "grad_norm": 34.75, "learning_rate": 9.879232551133344e-06, "loss": 19.7385, "step": 41650 }, { "epoch": 0.7730974699429595, "grad_norm": 35.15625, "learning_rate": 9.879203555347303e-06, "loss": 19.3012, "step": 41660 }, { "epoch": 0.7732830430274393, "grad_norm": 36.46875, "learning_rate": 9.87917455956126e-06, "loss": 19.6957, "step": 41670 }, { "epoch": 0.7734686161119191, "grad_norm": 35.5625, "learning_rate": 9.879145563775217e-06, "loss": 19.4753, "step": 41680 }, { "epoch": 0.773654189196399, "grad_norm": 36.09375, "learning_rate": 9.879116567989175e-06, "loss": 18.989, "step": 41690 }, { "epoch": 0.7738397622808788, "grad_norm": 35.53125, "learning_rate": 9.879087572203132e-06, "loss": 19.5413, "step": 41700 }, { "epoch": 0.7740253353653586, "grad_norm": 35.4375, "learning_rate": 9.87905857641709e-06, "loss": 19.1875, "step": 41710 }, { "epoch": 0.7742109084498384, "grad_norm": 35.875, "learning_rate": 9.879029580631047e-06, "loss": 18.9351, "step": 41720 }, { "epoch": 0.7743964815343183, "grad_norm": 35.1875, "learning_rate": 9.879000584845006e-06, "loss": 19.5943, "step": 41730 }, { "epoch": 0.7745820546187981, "grad_norm": 35.15625, "learning_rate": 9.878971589058964e-06, "loss": 19.2853, "step": 41740 }, { "epoch": 0.7747676277032779, "grad_norm": 36.65625, "learning_rate": 9.87894259327292e-06, "loss": 19.1505, "step": 41750 }, { "epoch": 0.7749532007877578, "grad_norm": 37.03125, "learning_rate": 9.878913597486878e-06, "loss": 19.5033, "step": 41760 }, { "epoch": 0.7751387738722376, "grad_norm": 35.78125, "learning_rate": 9.878884601700836e-06, "loss": 19.2482, "step": 41770 }, { "epoch": 0.7753243469567174, "grad_norm": 36.84375, "learning_rate": 9.878855605914793e-06, "loss": 19.6271, "step": 41780 }, { "epoch": 0.7755099200411972, "grad_norm": 36.375, "learning_rate": 9.87882661012875e-06, "loss": 19.6722, "step": 41790 }, { "epoch": 0.7756954931256771, "grad_norm": 36.09375, "learning_rate": 9.87879761434271e-06, "loss": 19.1386, "step": 41800 }, { "epoch": 0.7758810662101568, "grad_norm": 35.0, "learning_rate": 9.878768618556665e-06, "loss": 19.687, "step": 41810 }, { "epoch": 0.7760666392946367, "grad_norm": 35.5, "learning_rate": 9.878739622770623e-06, "loss": 19.3746, "step": 41820 }, { "epoch": 0.7762522123791166, "grad_norm": 36.71875, "learning_rate": 9.878710626984582e-06, "loss": 19.3001, "step": 41830 }, { "epoch": 0.7764377854635963, "grad_norm": 35.875, "learning_rate": 9.87868163119854e-06, "loss": 19.1276, "step": 41840 }, { "epoch": 0.7766233585480762, "grad_norm": 36.59375, "learning_rate": 9.878652635412497e-06, "loss": 19.1937, "step": 41850 }, { "epoch": 0.776808931632556, "grad_norm": 34.5, "learning_rate": 9.878623639626454e-06, "loss": 19.5688, "step": 41860 }, { "epoch": 0.7769945047170358, "grad_norm": 36.84375, "learning_rate": 9.878594643840412e-06, "loss": 18.9137, "step": 41870 }, { "epoch": 0.7771800778015157, "grad_norm": 36.625, "learning_rate": 9.878565648054369e-06, "loss": 19.4192, "step": 41880 }, { "epoch": 0.7773656508859955, "grad_norm": 37.34375, "learning_rate": 9.878536652268326e-06, "loss": 19.2262, "step": 41890 }, { "epoch": 0.7775512239704753, "grad_norm": 36.09375, "learning_rate": 9.878507656482284e-06, "loss": 18.9775, "step": 41900 }, { "epoch": 0.7777367970549551, "grad_norm": 35.25, "learning_rate": 9.878478660696241e-06, "loss": 19.2364, "step": 41910 }, { "epoch": 0.777922370139435, "grad_norm": 35.65625, "learning_rate": 9.878449664910199e-06, "loss": 19.5075, "step": 41920 }, { "epoch": 0.7781079432239149, "grad_norm": 37.15625, "learning_rate": 9.878420669124156e-06, "loss": 19.3397, "step": 41930 }, { "epoch": 0.7782935163083946, "grad_norm": 33.5625, "learning_rate": 9.878391673338115e-06, "loss": 19.0782, "step": 41940 }, { "epoch": 0.7784790893928745, "grad_norm": 35.5, "learning_rate": 9.878362677552073e-06, "loss": 19.4959, "step": 41950 }, { "epoch": 0.7786646624773543, "grad_norm": 35.0, "learning_rate": 9.87833368176603e-06, "loss": 19.4202, "step": 41960 }, { "epoch": 0.7788502355618341, "grad_norm": 38.875, "learning_rate": 9.878304685979987e-06, "loss": 19.3642, "step": 41970 }, { "epoch": 0.7790358086463139, "grad_norm": 35.0, "learning_rate": 9.878275690193945e-06, "loss": 19.0663, "step": 41980 }, { "epoch": 0.7792213817307938, "grad_norm": 36.8125, "learning_rate": 9.878246694407902e-06, "loss": 19.0382, "step": 41990 }, { "epoch": 0.7794069548152736, "grad_norm": 36.5625, "learning_rate": 9.87821769862186e-06, "loss": 19.1091, "step": 42000 }, { "epoch": 0.7795925278997534, "grad_norm": 34.6875, "learning_rate": 9.878188702835819e-06, "loss": 19.1205, "step": 42010 }, { "epoch": 0.7797781009842333, "grad_norm": 34.6875, "learning_rate": 9.878159707049774e-06, "loss": 19.4904, "step": 42020 }, { "epoch": 0.779963674068713, "grad_norm": 35.84375, "learning_rate": 9.878130711263732e-06, "loss": 18.6078, "step": 42030 }, { "epoch": 0.7801492471531929, "grad_norm": 36.84375, "learning_rate": 9.878101715477691e-06, "loss": 18.9939, "step": 42040 }, { "epoch": 0.7803348202376728, "grad_norm": 35.96875, "learning_rate": 9.878072719691648e-06, "loss": 19.1132, "step": 42050 }, { "epoch": 0.7805203933221525, "grad_norm": 35.59375, "learning_rate": 9.878043723905606e-06, "loss": 19.2332, "step": 42060 }, { "epoch": 0.7807059664066324, "grad_norm": 37.875, "learning_rate": 9.878014728119563e-06, "loss": 19.1597, "step": 42070 }, { "epoch": 0.7808915394911122, "grad_norm": 36.65625, "learning_rate": 9.87798573233352e-06, "loss": 19.1368, "step": 42080 }, { "epoch": 0.781077112575592, "grad_norm": 36.21875, "learning_rate": 9.877956736547478e-06, "loss": 19.6976, "step": 42090 }, { "epoch": 0.7812626856600718, "grad_norm": 38.0625, "learning_rate": 9.877927740761435e-06, "loss": 18.8875, "step": 42100 }, { "epoch": 0.7814482587445517, "grad_norm": 36.8125, "learning_rate": 9.877898744975394e-06, "loss": 19.1315, "step": 42110 }, { "epoch": 0.7816338318290316, "grad_norm": 36.0, "learning_rate": 9.877869749189352e-06, "loss": 19.1785, "step": 42120 }, { "epoch": 0.7818194049135113, "grad_norm": 36.65625, "learning_rate": 9.877840753403308e-06, "loss": 19.2687, "step": 42130 }, { "epoch": 0.7820049779979912, "grad_norm": 36.0, "learning_rate": 9.877811757617267e-06, "loss": 19.1766, "step": 42140 }, { "epoch": 0.782190551082471, "grad_norm": 35.75, "learning_rate": 9.877782761831224e-06, "loss": 19.1511, "step": 42150 }, { "epoch": 0.7823761241669508, "grad_norm": 37.3125, "learning_rate": 9.877753766045181e-06, "loss": 19.0198, "step": 42160 }, { "epoch": 0.7825616972514307, "grad_norm": 34.5, "learning_rate": 9.877724770259139e-06, "loss": 19.6603, "step": 42170 }, { "epoch": 0.7827472703359105, "grad_norm": 34.65625, "learning_rate": 9.877695774473096e-06, "loss": 19.1918, "step": 42180 }, { "epoch": 0.7829328434203903, "grad_norm": 35.84375, "learning_rate": 9.877666778687054e-06, "loss": 19.2404, "step": 42190 }, { "epoch": 0.7831184165048701, "grad_norm": 36.28125, "learning_rate": 9.877637782901011e-06, "loss": 19.3464, "step": 42200 }, { "epoch": 0.78330398958935, "grad_norm": 36.125, "learning_rate": 9.87760878711497e-06, "loss": 19.2844, "step": 42210 }, { "epoch": 0.7834895626738297, "grad_norm": 36.4375, "learning_rate": 9.877579791328928e-06, "loss": 18.8615, "step": 42220 }, { "epoch": 0.7836751357583096, "grad_norm": 34.96875, "learning_rate": 9.877550795542885e-06, "loss": 18.8328, "step": 42230 }, { "epoch": 0.7838607088427895, "grad_norm": 34.125, "learning_rate": 9.877521799756842e-06, "loss": 19.1479, "step": 42240 }, { "epoch": 0.7840462819272692, "grad_norm": 34.9375, "learning_rate": 9.8774928039708e-06, "loss": 19.2093, "step": 42250 }, { "epoch": 0.7842318550117491, "grad_norm": 36.6875, "learning_rate": 9.877463808184757e-06, "loss": 18.9336, "step": 42260 }, { "epoch": 0.7844174280962289, "grad_norm": 37.125, "learning_rate": 9.877434812398715e-06, "loss": 18.7095, "step": 42270 }, { "epoch": 0.7846030011807088, "grad_norm": 36.5, "learning_rate": 9.877405816612674e-06, "loss": 19.2101, "step": 42280 }, { "epoch": 0.7847885742651886, "grad_norm": 35.90625, "learning_rate": 9.87737682082663e-06, "loss": 19.6382, "step": 42290 }, { "epoch": 0.7849741473496684, "grad_norm": 34.0625, "learning_rate": 9.877347825040587e-06, "loss": 19.6099, "step": 42300 }, { "epoch": 0.7851597204341483, "grad_norm": 36.8125, "learning_rate": 9.877318829254546e-06, "loss": 19.2404, "step": 42310 }, { "epoch": 0.785345293518628, "grad_norm": 36.03125, "learning_rate": 9.877289833468503e-06, "loss": 19.0874, "step": 42320 }, { "epoch": 0.7855308666031079, "grad_norm": 35.46875, "learning_rate": 9.87726083768246e-06, "loss": 19.0245, "step": 42330 }, { "epoch": 0.7857164396875878, "grad_norm": 36.625, "learning_rate": 9.877231841896418e-06, "loss": 19.3828, "step": 42340 }, { "epoch": 0.7859020127720675, "grad_norm": 37.625, "learning_rate": 9.877202846110376e-06, "loss": 18.6832, "step": 42350 }, { "epoch": 0.7860875858565474, "grad_norm": 34.375, "learning_rate": 9.877173850324333e-06, "loss": 19.379, "step": 42360 }, { "epoch": 0.7862731589410272, "grad_norm": 36.75, "learning_rate": 9.87714485453829e-06, "loss": 19.779, "step": 42370 }, { "epoch": 0.786458732025507, "grad_norm": 36.75, "learning_rate": 9.877115858752248e-06, "loss": 19.2348, "step": 42380 }, { "epoch": 0.7866443051099868, "grad_norm": 35.375, "learning_rate": 9.877086862966207e-06, "loss": 19.3421, "step": 42390 }, { "epoch": 0.7868298781944667, "grad_norm": 35.15625, "learning_rate": 9.877057867180163e-06, "loss": 19.0024, "step": 42400 }, { "epoch": 0.7870154512789465, "grad_norm": 36.96875, "learning_rate": 9.877028871394122e-06, "loss": 18.825, "step": 42410 }, { "epoch": 0.7872010243634263, "grad_norm": 36.65625, "learning_rate": 9.876999875608079e-06, "loss": 19.4176, "step": 42420 }, { "epoch": 0.7873865974479062, "grad_norm": 36.4375, "learning_rate": 9.876970879822037e-06, "loss": 19.0654, "step": 42430 }, { "epoch": 0.7875721705323859, "grad_norm": 36.625, "learning_rate": 9.876941884035994e-06, "loss": 19.5037, "step": 42440 }, { "epoch": 0.7877577436168658, "grad_norm": 35.71875, "learning_rate": 9.876912888249951e-06, "loss": 18.6874, "step": 42450 }, { "epoch": 0.7879433167013457, "grad_norm": 36.40625, "learning_rate": 9.876883892463909e-06, "loss": 19.3074, "step": 42460 }, { "epoch": 0.7881288897858255, "grad_norm": 34.96875, "learning_rate": 9.876854896677866e-06, "loss": 18.9853, "step": 42470 }, { "epoch": 0.7883144628703053, "grad_norm": 35.3125, "learning_rate": 9.876825900891824e-06, "loss": 18.9235, "step": 42480 }, { "epoch": 0.7885000359547851, "grad_norm": 36.90625, "learning_rate": 9.876796905105783e-06, "loss": 19.4066, "step": 42490 }, { "epoch": 0.788685609039265, "grad_norm": 36.1875, "learning_rate": 9.876767909319738e-06, "loss": 19.2456, "step": 42500 }, { "epoch": 0.7888711821237447, "grad_norm": 36.59375, "learning_rate": 9.876738913533696e-06, "loss": 18.9103, "step": 42510 }, { "epoch": 0.7890567552082246, "grad_norm": 34.03125, "learning_rate": 9.876709917747655e-06, "loss": 19.2111, "step": 42520 }, { "epoch": 0.7892423282927045, "grad_norm": 35.21875, "learning_rate": 9.876680921961612e-06, "loss": 19.4737, "step": 42530 }, { "epoch": 0.7894279013771842, "grad_norm": 35.65625, "learning_rate": 9.87665192617557e-06, "loss": 19.1734, "step": 42540 }, { "epoch": 0.7896134744616641, "grad_norm": 38.78125, "learning_rate": 9.876622930389527e-06, "loss": 19.2003, "step": 42550 }, { "epoch": 0.7897990475461439, "grad_norm": 35.53125, "learning_rate": 9.876593934603485e-06, "loss": 19.1934, "step": 42560 }, { "epoch": 0.7899846206306237, "grad_norm": 36.0625, "learning_rate": 9.876564938817442e-06, "loss": 19.3247, "step": 42570 }, { "epoch": 0.7901701937151036, "grad_norm": 36.28125, "learning_rate": 9.8765359430314e-06, "loss": 19.6564, "step": 42580 }, { "epoch": 0.7903557667995834, "grad_norm": 36.625, "learning_rate": 9.876506947245358e-06, "loss": 19.0949, "step": 42590 }, { "epoch": 0.7905413398840632, "grad_norm": 34.9375, "learning_rate": 9.876477951459316e-06, "loss": 18.5577, "step": 42600 }, { "epoch": 0.790726912968543, "grad_norm": 35.9375, "learning_rate": 9.876448955673272e-06, "loss": 18.6629, "step": 42610 }, { "epoch": 0.7909124860530229, "grad_norm": 35.40625, "learning_rate": 9.87641995988723e-06, "loss": 19.3618, "step": 42620 }, { "epoch": 0.7910980591375028, "grad_norm": 34.3125, "learning_rate": 9.876390964101188e-06, "loss": 18.985, "step": 42630 }, { "epoch": 0.7912836322219825, "grad_norm": 37.25, "learning_rate": 9.876361968315145e-06, "loss": 19.3863, "step": 42640 }, { "epoch": 0.7914692053064624, "grad_norm": 35.84375, "learning_rate": 9.876332972529103e-06, "loss": 19.2617, "step": 42650 }, { "epoch": 0.7916547783909422, "grad_norm": 35.75, "learning_rate": 9.87630397674306e-06, "loss": 19.6245, "step": 42660 }, { "epoch": 0.791840351475422, "grad_norm": 37.1875, "learning_rate": 9.876274980957018e-06, "loss": 18.9741, "step": 42670 }, { "epoch": 0.7920259245599018, "grad_norm": 37.21875, "learning_rate": 9.876245985170975e-06, "loss": 19.222, "step": 42680 }, { "epoch": 0.7922114976443817, "grad_norm": 33.5625, "learning_rate": 9.876216989384934e-06, "loss": 19.3086, "step": 42690 }, { "epoch": 0.7923970707288615, "grad_norm": 36.59375, "learning_rate": 9.876187993598892e-06, "loss": 19.1836, "step": 42700 }, { "epoch": 0.7925826438133413, "grad_norm": 36.625, "learning_rate": 9.876158997812849e-06, "loss": 19.0206, "step": 42710 }, { "epoch": 0.7927682168978212, "grad_norm": 35.5, "learning_rate": 9.876130002026806e-06, "loss": 19.2777, "step": 42720 }, { "epoch": 0.7929537899823009, "grad_norm": 38.15625, "learning_rate": 9.876101006240764e-06, "loss": 19.4687, "step": 42730 }, { "epoch": 0.7931393630667808, "grad_norm": 36.78125, "learning_rate": 9.876072010454721e-06, "loss": 19.2891, "step": 42740 }, { "epoch": 0.7933249361512607, "grad_norm": 36.3125, "learning_rate": 9.876043014668679e-06, "loss": 19.1073, "step": 42750 }, { "epoch": 0.7935105092357404, "grad_norm": 37.15625, "learning_rate": 9.876014018882638e-06, "loss": 19.2614, "step": 42760 }, { "epoch": 0.7936960823202203, "grad_norm": 35.5625, "learning_rate": 9.875985023096593e-06, "loss": 19.5217, "step": 42770 }, { "epoch": 0.7938816554047001, "grad_norm": 35.5625, "learning_rate": 9.875956027310551e-06, "loss": 18.8875, "step": 42780 }, { "epoch": 0.7940672284891799, "grad_norm": 34.9375, "learning_rate": 9.87592703152451e-06, "loss": 18.9884, "step": 42790 }, { "epoch": 0.7942528015736597, "grad_norm": 34.59375, "learning_rate": 9.875898035738467e-06, "loss": 18.7005, "step": 42800 }, { "epoch": 0.7944383746581396, "grad_norm": 34.78125, "learning_rate": 9.875869039952425e-06, "loss": 19.4181, "step": 42810 }, { "epoch": 0.7946239477426195, "grad_norm": 35.1875, "learning_rate": 9.875840044166382e-06, "loss": 19.495, "step": 42820 }, { "epoch": 0.7948095208270992, "grad_norm": 36.21875, "learning_rate": 9.87581104838034e-06, "loss": 19.6189, "step": 42830 }, { "epoch": 0.7949950939115791, "grad_norm": 34.40625, "learning_rate": 9.875782052594297e-06, "loss": 19.131, "step": 42840 }, { "epoch": 0.7951806669960589, "grad_norm": 38.84375, "learning_rate": 9.875753056808254e-06, "loss": 19.5435, "step": 42850 }, { "epoch": 0.7953662400805387, "grad_norm": 36.46875, "learning_rate": 9.875724061022214e-06, "loss": 19.4428, "step": 42860 }, { "epoch": 0.7955518131650186, "grad_norm": 34.53125, "learning_rate": 9.875695065236171e-06, "loss": 19.3201, "step": 42870 }, { "epoch": 0.7957373862494984, "grad_norm": 37.0, "learning_rate": 9.875666069450127e-06, "loss": 19.0514, "step": 42880 }, { "epoch": 0.7959229593339782, "grad_norm": 36.25, "learning_rate": 9.875637073664086e-06, "loss": 18.7184, "step": 42890 }, { "epoch": 0.796108532418458, "grad_norm": 35.375, "learning_rate": 9.875608077878043e-06, "loss": 19.1007, "step": 42900 }, { "epoch": 0.7962941055029379, "grad_norm": 33.46875, "learning_rate": 9.875579082092e-06, "loss": 18.8951, "step": 42910 }, { "epoch": 0.7964796785874176, "grad_norm": 37.375, "learning_rate": 9.875550086305958e-06, "loss": 19.4892, "step": 42920 }, { "epoch": 0.7966652516718975, "grad_norm": 35.78125, "learning_rate": 9.875521090519915e-06, "loss": 19.6579, "step": 42930 }, { "epoch": 0.7968508247563774, "grad_norm": 36.375, "learning_rate": 9.875492094733873e-06, "loss": 19.2619, "step": 42940 }, { "epoch": 0.7970363978408571, "grad_norm": 38.0625, "learning_rate": 9.87546309894783e-06, "loss": 19.5734, "step": 42950 }, { "epoch": 0.797221970925337, "grad_norm": 36.21875, "learning_rate": 9.875434103161788e-06, "loss": 18.9373, "step": 42960 }, { "epoch": 0.7974075440098168, "grad_norm": 35.9375, "learning_rate": 9.875405107375747e-06, "loss": 19.2821, "step": 42970 }, { "epoch": 0.7975931170942966, "grad_norm": 35.4375, "learning_rate": 9.875376111589704e-06, "loss": 19.2418, "step": 42980 }, { "epoch": 0.7977786901787765, "grad_norm": 36.03125, "learning_rate": 9.875347115803662e-06, "loss": 19.0795, "step": 42990 }, { "epoch": 0.7979642632632563, "grad_norm": 36.40625, "learning_rate": 9.875318120017619e-06, "loss": 19.5488, "step": 43000 }, { "epoch": 0.7981498363477362, "grad_norm": 34.9375, "learning_rate": 9.875289124231576e-06, "loss": 19.3044, "step": 43010 }, { "epoch": 0.7983354094322159, "grad_norm": 36.53125, "learning_rate": 9.875260128445534e-06, "loss": 19.1615, "step": 43020 }, { "epoch": 0.7985209825166958, "grad_norm": 35.96875, "learning_rate": 9.875231132659491e-06, "loss": 19.5529, "step": 43030 }, { "epoch": 0.7987065556011756, "grad_norm": 36.59375, "learning_rate": 9.875202136873449e-06, "loss": 18.9387, "step": 43040 }, { "epoch": 0.7988921286856554, "grad_norm": 35.4375, "learning_rate": 9.875173141087406e-06, "loss": 19.0329, "step": 43050 }, { "epoch": 0.7990777017701353, "grad_norm": 36.8125, "learning_rate": 9.875144145301363e-06, "loss": 19.6343, "step": 43060 }, { "epoch": 0.7992632748546151, "grad_norm": 36.40625, "learning_rate": 9.875115149515322e-06, "loss": 19.1546, "step": 43070 }, { "epoch": 0.7994488479390949, "grad_norm": 35.96875, "learning_rate": 9.87508615372928e-06, "loss": 18.9671, "step": 43080 }, { "epoch": 0.7996344210235747, "grad_norm": 36.53125, "learning_rate": 9.875057157943236e-06, "loss": 19.1909, "step": 43090 }, { "epoch": 0.7998199941080546, "grad_norm": 36.28125, "learning_rate": 9.875028162157195e-06, "loss": 19.067, "step": 43100 }, { "epoch": 0.8000055671925344, "grad_norm": 35.375, "learning_rate": 9.874999166371152e-06, "loss": 18.7279, "step": 43110 }, { "epoch": 0.8001911402770142, "grad_norm": 36.40625, "learning_rate": 9.87497017058511e-06, "loss": 19.0467, "step": 43120 }, { "epoch": 0.8003767133614941, "grad_norm": 39.53125, "learning_rate": 9.874941174799067e-06, "loss": 19.5147, "step": 43130 }, { "epoch": 0.8005622864459738, "grad_norm": 36.625, "learning_rate": 9.874912179013026e-06, "loss": 18.7374, "step": 43140 }, { "epoch": 0.8007478595304537, "grad_norm": 34.3125, "learning_rate": 9.874883183226982e-06, "loss": 19.4482, "step": 43150 }, { "epoch": 0.8009334326149335, "grad_norm": 35.8125, "learning_rate": 9.874854187440939e-06, "loss": 19.1165, "step": 43160 }, { "epoch": 0.8011190056994134, "grad_norm": 35.65625, "learning_rate": 9.874825191654898e-06, "loss": 18.8476, "step": 43170 }, { "epoch": 0.8013045787838932, "grad_norm": 36.6875, "learning_rate": 9.874796195868856e-06, "loss": 18.6734, "step": 43180 }, { "epoch": 0.801490151868373, "grad_norm": 36.3125, "learning_rate": 9.874767200082813e-06, "loss": 19.189, "step": 43190 }, { "epoch": 0.8016757249528529, "grad_norm": 35.8125, "learning_rate": 9.87473820429677e-06, "loss": 19.4344, "step": 43200 }, { "epoch": 0.8018612980373326, "grad_norm": 38.03125, "learning_rate": 9.874709208510728e-06, "loss": 19.112, "step": 43210 }, { "epoch": 0.8020468711218125, "grad_norm": 37.0625, "learning_rate": 9.874680212724685e-06, "loss": 19.2927, "step": 43220 }, { "epoch": 0.8022324442062924, "grad_norm": 35.15625, "learning_rate": 9.874651216938643e-06, "loss": 19.2974, "step": 43230 }, { "epoch": 0.8024180172907721, "grad_norm": 34.84375, "learning_rate": 9.874622221152602e-06, "loss": 18.9434, "step": 43240 }, { "epoch": 0.802603590375252, "grad_norm": 37.84375, "learning_rate": 9.874593225366557e-06, "loss": 18.9947, "step": 43250 }, { "epoch": 0.8027891634597318, "grad_norm": 36.3125, "learning_rate": 9.874564229580515e-06, "loss": 19.3009, "step": 43260 }, { "epoch": 0.8029747365442116, "grad_norm": 33.3125, "learning_rate": 9.874535233794474e-06, "loss": 18.7793, "step": 43270 }, { "epoch": 0.8031603096286914, "grad_norm": 36.78125, "learning_rate": 9.874506238008431e-06, "loss": 19.2566, "step": 43280 }, { "epoch": 0.8033458827131713, "grad_norm": 36.59375, "learning_rate": 9.874477242222389e-06, "loss": 19.238, "step": 43290 }, { "epoch": 0.8035314557976511, "grad_norm": 36.4375, "learning_rate": 9.874448246436346e-06, "loss": 19.245, "step": 43300 }, { "epoch": 0.8037170288821309, "grad_norm": 35.25, "learning_rate": 9.874419250650304e-06, "loss": 19.1339, "step": 43310 }, { "epoch": 0.8039026019666108, "grad_norm": 34.9375, "learning_rate": 9.874390254864261e-06, "loss": 18.6303, "step": 43320 }, { "epoch": 0.8040881750510905, "grad_norm": 36.3125, "learning_rate": 9.874361259078218e-06, "loss": 18.7144, "step": 43330 }, { "epoch": 0.8042737481355704, "grad_norm": 37.4375, "learning_rate": 9.874332263292178e-06, "loss": 19.6017, "step": 43340 }, { "epoch": 0.8044593212200503, "grad_norm": 35.28125, "learning_rate": 9.874303267506135e-06, "loss": 19.0519, "step": 43350 }, { "epoch": 0.8046448943045301, "grad_norm": 36.53125, "learning_rate": 9.87427427172009e-06, "loss": 19.1844, "step": 43360 }, { "epoch": 0.8048304673890099, "grad_norm": 36.15625, "learning_rate": 9.87424527593405e-06, "loss": 18.981, "step": 43370 }, { "epoch": 0.8050160404734897, "grad_norm": 37.0625, "learning_rate": 9.874216280148007e-06, "loss": 19.1258, "step": 43380 }, { "epoch": 0.8052016135579696, "grad_norm": 35.21875, "learning_rate": 9.874187284361965e-06, "loss": 18.9282, "step": 43390 }, { "epoch": 0.8053871866424493, "grad_norm": 39.625, "learning_rate": 9.874158288575922e-06, "loss": 18.9325, "step": 43400 }, { "epoch": 0.8055727597269292, "grad_norm": 35.3125, "learning_rate": 9.87412929278988e-06, "loss": 18.5758, "step": 43410 }, { "epoch": 0.8057583328114091, "grad_norm": 35.09375, "learning_rate": 9.874100297003837e-06, "loss": 18.8267, "step": 43420 }, { "epoch": 0.8059439058958888, "grad_norm": 34.21875, "learning_rate": 9.874071301217794e-06, "loss": 19.2754, "step": 43430 }, { "epoch": 0.8061294789803687, "grad_norm": 36.65625, "learning_rate": 9.874042305431752e-06, "loss": 19.3909, "step": 43440 }, { "epoch": 0.8063150520648485, "grad_norm": 35.03125, "learning_rate": 9.87401330964571e-06, "loss": 19.1061, "step": 43450 }, { "epoch": 0.8065006251493283, "grad_norm": 37.875, "learning_rate": 9.873984313859668e-06, "loss": 19.1138, "step": 43460 }, { "epoch": 0.8066861982338082, "grad_norm": 36.71875, "learning_rate": 9.873955318073626e-06, "loss": 19.7451, "step": 43470 }, { "epoch": 0.806871771318288, "grad_norm": 36.5, "learning_rate": 9.873926322287583e-06, "loss": 19.0428, "step": 43480 }, { "epoch": 0.8070573444027678, "grad_norm": 36.8125, "learning_rate": 9.87389732650154e-06, "loss": 18.7527, "step": 43490 }, { "epoch": 0.8072429174872476, "grad_norm": 36.5625, "learning_rate": 9.873868330715498e-06, "loss": 19.3082, "step": 43500 }, { "epoch": 0.8074284905717275, "grad_norm": 35.9375, "learning_rate": 9.873839334929455e-06, "loss": 19.3901, "step": 43510 }, { "epoch": 0.8076140636562072, "grad_norm": 36.8125, "learning_rate": 9.873810339143413e-06, "loss": 19.0965, "step": 43520 }, { "epoch": 0.8077996367406871, "grad_norm": 36.75, "learning_rate": 9.87378134335737e-06, "loss": 19.2141, "step": 43530 }, { "epoch": 0.807985209825167, "grad_norm": 33.71875, "learning_rate": 9.873752347571327e-06, "loss": 19.1556, "step": 43540 }, { "epoch": 0.8081707829096468, "grad_norm": 37.71875, "learning_rate": 9.873723351785286e-06, "loss": 19.2163, "step": 43550 }, { "epoch": 0.8083563559941266, "grad_norm": 34.5, "learning_rate": 9.873694355999244e-06, "loss": 19.1022, "step": 43560 }, { "epoch": 0.8085419290786064, "grad_norm": 37.5, "learning_rate": 9.873665360213201e-06, "loss": 19.0576, "step": 43570 }, { "epoch": 0.8087275021630863, "grad_norm": 36.625, "learning_rate": 9.873636364427159e-06, "loss": 19.0831, "step": 43580 }, { "epoch": 0.8089130752475661, "grad_norm": 35.125, "learning_rate": 9.873607368641116e-06, "loss": 18.7494, "step": 43590 }, { "epoch": 0.8090986483320459, "grad_norm": 36.625, "learning_rate": 9.873578372855074e-06, "loss": 19.0648, "step": 43600 }, { "epoch": 0.8092842214165258, "grad_norm": 36.5, "learning_rate": 9.873549377069031e-06, "loss": 19.0214, "step": 43610 }, { "epoch": 0.8094697945010055, "grad_norm": 36.8125, "learning_rate": 9.87352038128299e-06, "loss": 18.9287, "step": 43620 }, { "epoch": 0.8096553675854854, "grad_norm": 35.15625, "learning_rate": 9.873491385496946e-06, "loss": 19.3647, "step": 43630 }, { "epoch": 0.8098409406699653, "grad_norm": 35.3125, "learning_rate": 9.873462389710903e-06, "loss": 18.8901, "step": 43640 }, { "epoch": 0.810026513754445, "grad_norm": 33.78125, "learning_rate": 9.873433393924862e-06, "loss": 19.4528, "step": 43650 }, { "epoch": 0.8102120868389249, "grad_norm": 36.75, "learning_rate": 9.87340439813882e-06, "loss": 18.7406, "step": 43660 }, { "epoch": 0.8103976599234047, "grad_norm": 37.125, "learning_rate": 9.873375402352777e-06, "loss": 19.3565, "step": 43670 }, { "epoch": 0.8105832330078845, "grad_norm": 36.9375, "learning_rate": 9.873346406566734e-06, "loss": 19.6584, "step": 43680 }, { "epoch": 0.8107688060923643, "grad_norm": 36.3125, "learning_rate": 9.873317410780692e-06, "loss": 19.1017, "step": 43690 }, { "epoch": 0.8109543791768442, "grad_norm": 36.5625, "learning_rate": 9.87328841499465e-06, "loss": 19.0014, "step": 43700 }, { "epoch": 0.8111399522613241, "grad_norm": 35.96875, "learning_rate": 9.873259419208607e-06, "loss": 19.361, "step": 43710 }, { "epoch": 0.8113255253458038, "grad_norm": 35.96875, "learning_rate": 9.873230423422566e-06, "loss": 18.9856, "step": 43720 }, { "epoch": 0.8115110984302837, "grad_norm": 34.25, "learning_rate": 9.873201427636523e-06, "loss": 19.1586, "step": 43730 }, { "epoch": 0.8116966715147635, "grad_norm": 35.59375, "learning_rate": 9.873172431850479e-06, "loss": 19.5068, "step": 43740 }, { "epoch": 0.8118822445992433, "grad_norm": 34.59375, "learning_rate": 9.873143436064438e-06, "loss": 19.1461, "step": 43750 }, { "epoch": 0.8120678176837232, "grad_norm": 37.71875, "learning_rate": 9.873114440278395e-06, "loss": 19.1016, "step": 43760 }, { "epoch": 0.812253390768203, "grad_norm": 34.34375, "learning_rate": 9.873085444492353e-06, "loss": 19.1093, "step": 43770 }, { "epoch": 0.8124389638526828, "grad_norm": 34.9375, "learning_rate": 9.87305644870631e-06, "loss": 18.9712, "step": 43780 }, { "epoch": 0.8126245369371626, "grad_norm": 36.6875, "learning_rate": 9.873027452920268e-06, "loss": 19.6167, "step": 43790 }, { "epoch": 0.8128101100216425, "grad_norm": 36.53125, "learning_rate": 9.872998457134225e-06, "loss": 19.0376, "step": 43800 }, { "epoch": 0.8129956831061222, "grad_norm": 35.46875, "learning_rate": 9.872969461348182e-06, "loss": 19.1814, "step": 43810 }, { "epoch": 0.8131812561906021, "grad_norm": 36.0, "learning_rate": 9.872940465562142e-06, "loss": 19.1973, "step": 43820 }, { "epoch": 0.813366829275082, "grad_norm": 38.5625, "learning_rate": 9.872911469776099e-06, "loss": 19.2537, "step": 43830 }, { "epoch": 0.8135524023595617, "grad_norm": 38.25, "learning_rate": 9.872882473990055e-06, "loss": 19.3154, "step": 43840 }, { "epoch": 0.8137379754440416, "grad_norm": 37.03125, "learning_rate": 9.872853478204014e-06, "loss": 19.3999, "step": 43850 }, { "epoch": 0.8139235485285214, "grad_norm": 34.375, "learning_rate": 9.872824482417971e-06, "loss": 18.8485, "step": 43860 }, { "epoch": 0.8141091216130012, "grad_norm": 35.25, "learning_rate": 9.872795486631929e-06, "loss": 19.1239, "step": 43870 }, { "epoch": 0.8142946946974811, "grad_norm": 38.3125, "learning_rate": 9.872766490845886e-06, "loss": 19.4616, "step": 43880 }, { "epoch": 0.8144802677819609, "grad_norm": 37.625, "learning_rate": 9.872737495059843e-06, "loss": 19.0829, "step": 43890 }, { "epoch": 0.8146658408664408, "grad_norm": 37.8125, "learning_rate": 9.8727084992738e-06, "loss": 19.356, "step": 43900 }, { "epoch": 0.8148514139509205, "grad_norm": 39.5625, "learning_rate": 9.872679503487758e-06, "loss": 19.0015, "step": 43910 }, { "epoch": 0.8150369870354004, "grad_norm": 35.53125, "learning_rate": 9.872650507701717e-06, "loss": 19.1196, "step": 43920 }, { "epoch": 0.8152225601198803, "grad_norm": 37.625, "learning_rate": 9.872621511915675e-06, "loss": 19.5022, "step": 43930 }, { "epoch": 0.81540813320436, "grad_norm": 35.25, "learning_rate": 9.872592516129632e-06, "loss": 18.9608, "step": 43940 }, { "epoch": 0.8155937062888399, "grad_norm": 35.25, "learning_rate": 9.87256352034359e-06, "loss": 19.2786, "step": 43950 }, { "epoch": 0.8157792793733197, "grad_norm": 35.46875, "learning_rate": 9.872534524557547e-06, "loss": 19.0429, "step": 43960 }, { "epoch": 0.8159648524577995, "grad_norm": 36.9375, "learning_rate": 9.872505528771504e-06, "loss": 19.0499, "step": 43970 }, { "epoch": 0.8161504255422793, "grad_norm": 35.1875, "learning_rate": 9.872476532985462e-06, "loss": 19.377, "step": 43980 }, { "epoch": 0.8163359986267592, "grad_norm": 34.71875, "learning_rate": 9.87244753719942e-06, "loss": 19.3778, "step": 43990 }, { "epoch": 0.816521571711239, "grad_norm": 37.625, "learning_rate": 9.872418541413378e-06, "loss": 19.1619, "step": 44000 }, { "epoch": 0.8167071447957188, "grad_norm": 34.96875, "learning_rate": 9.872389545627334e-06, "loss": 19.2044, "step": 44010 }, { "epoch": 0.8168927178801987, "grad_norm": 36.46875, "learning_rate": 9.872360549841291e-06, "loss": 18.8557, "step": 44020 }, { "epoch": 0.8170782909646784, "grad_norm": 37.0625, "learning_rate": 9.87233155405525e-06, "loss": 19.3249, "step": 44030 }, { "epoch": 0.8172638640491583, "grad_norm": 33.6875, "learning_rate": 9.872302558269208e-06, "loss": 19.2119, "step": 44040 }, { "epoch": 0.8174494371336382, "grad_norm": 36.375, "learning_rate": 9.872273562483165e-06, "loss": 18.7741, "step": 44050 }, { "epoch": 0.817635010218118, "grad_norm": 38.40625, "learning_rate": 9.872244566697123e-06, "loss": 18.7126, "step": 44060 }, { "epoch": 0.8178205833025978, "grad_norm": 36.78125, "learning_rate": 9.87221557091108e-06, "loss": 19.2019, "step": 44070 }, { "epoch": 0.8180061563870776, "grad_norm": 36.0625, "learning_rate": 9.872186575125038e-06, "loss": 19.5095, "step": 44080 }, { "epoch": 0.8181917294715575, "grad_norm": 37.53125, "learning_rate": 9.872157579338995e-06, "loss": 19.4252, "step": 44090 }, { "epoch": 0.8183773025560372, "grad_norm": 37.21875, "learning_rate": 9.872128583552954e-06, "loss": 19.17, "step": 44100 }, { "epoch": 0.8185628756405171, "grad_norm": 33.5, "learning_rate": 9.87209958776691e-06, "loss": 19.4603, "step": 44110 }, { "epoch": 0.818748448724997, "grad_norm": 36.71875, "learning_rate": 9.872070591980867e-06, "loss": 19.2598, "step": 44120 }, { "epoch": 0.8189340218094767, "grad_norm": 34.5625, "learning_rate": 9.872041596194826e-06, "loss": 18.7924, "step": 44130 }, { "epoch": 0.8191195948939566, "grad_norm": 37.40625, "learning_rate": 9.872012600408784e-06, "loss": 19.4831, "step": 44140 }, { "epoch": 0.8193051679784364, "grad_norm": 35.03125, "learning_rate": 9.871983604622741e-06, "loss": 18.8814, "step": 44150 }, { "epoch": 0.8194907410629162, "grad_norm": 36.1875, "learning_rate": 9.871954608836698e-06, "loss": 19.2846, "step": 44160 }, { "epoch": 0.8196763141473961, "grad_norm": 35.46875, "learning_rate": 9.871925613050656e-06, "loss": 18.9267, "step": 44170 }, { "epoch": 0.8198618872318759, "grad_norm": 36.875, "learning_rate": 9.871896617264613e-06, "loss": 18.9907, "step": 44180 }, { "epoch": 0.8200474603163557, "grad_norm": 38.53125, "learning_rate": 9.87186762147857e-06, "loss": 19.8266, "step": 44190 }, { "epoch": 0.8202330334008355, "grad_norm": 37.1875, "learning_rate": 9.87183862569253e-06, "loss": 19.0446, "step": 44200 }, { "epoch": 0.8204186064853154, "grad_norm": 35.375, "learning_rate": 9.871809629906487e-06, "loss": 19.2261, "step": 44210 }, { "epoch": 0.8206041795697951, "grad_norm": 34.9375, "learning_rate": 9.871780634120443e-06, "loss": 19.3818, "step": 44220 }, { "epoch": 0.820789752654275, "grad_norm": 36.3125, "learning_rate": 9.871751638334402e-06, "loss": 19.2278, "step": 44230 }, { "epoch": 0.8209753257387549, "grad_norm": 38.21875, "learning_rate": 9.87172264254836e-06, "loss": 19.2026, "step": 44240 }, { "epoch": 0.8211608988232347, "grad_norm": 37.21875, "learning_rate": 9.871693646762317e-06, "loss": 19.0903, "step": 44250 }, { "epoch": 0.8213464719077145, "grad_norm": 37.28125, "learning_rate": 9.871664650976274e-06, "loss": 18.8226, "step": 44260 }, { "epoch": 0.8215320449921943, "grad_norm": 35.40625, "learning_rate": 9.871635655190232e-06, "loss": 19.3683, "step": 44270 }, { "epoch": 0.8217176180766742, "grad_norm": 35.21875, "learning_rate": 9.871606659404189e-06, "loss": 18.8462, "step": 44280 }, { "epoch": 0.821903191161154, "grad_norm": 38.71875, "learning_rate": 9.871577663618146e-06, "loss": 19.0862, "step": 44290 }, { "epoch": 0.8220887642456338, "grad_norm": 38.125, "learning_rate": 9.871548667832106e-06, "loss": 19.0924, "step": 44300 }, { "epoch": 0.8222743373301137, "grad_norm": 35.53125, "learning_rate": 9.871519672046063e-06, "loss": 18.8592, "step": 44310 }, { "epoch": 0.8224599104145934, "grad_norm": 36.875, "learning_rate": 9.87149067626002e-06, "loss": 19.0449, "step": 44320 }, { "epoch": 0.8226454834990733, "grad_norm": 32.75, "learning_rate": 9.871461680473978e-06, "loss": 18.9665, "step": 44330 }, { "epoch": 0.8228310565835532, "grad_norm": 34.0, "learning_rate": 9.871432684687935e-06, "loss": 18.4968, "step": 44340 }, { "epoch": 0.8230166296680329, "grad_norm": 34.6875, "learning_rate": 9.871403688901893e-06, "loss": 19.4038, "step": 44350 }, { "epoch": 0.8232022027525128, "grad_norm": 38.4375, "learning_rate": 9.87137469311585e-06, "loss": 19.172, "step": 44360 }, { "epoch": 0.8233877758369926, "grad_norm": 34.90625, "learning_rate": 9.871345697329809e-06, "loss": 19.3212, "step": 44370 }, { "epoch": 0.8235733489214724, "grad_norm": 35.03125, "learning_rate": 9.871316701543765e-06, "loss": 18.7932, "step": 44380 }, { "epoch": 0.8237589220059522, "grad_norm": 35.46875, "learning_rate": 9.871287705757722e-06, "loss": 19.2297, "step": 44390 }, { "epoch": 0.8239444950904321, "grad_norm": 36.78125, "learning_rate": 9.871258709971681e-06, "loss": 18.9214, "step": 44400 }, { "epoch": 0.8241300681749119, "grad_norm": 37.0, "learning_rate": 9.871229714185639e-06, "loss": 19.1307, "step": 44410 }, { "epoch": 0.8243156412593917, "grad_norm": 35.78125, "learning_rate": 9.871200718399596e-06, "loss": 19.2335, "step": 44420 }, { "epoch": 0.8245012143438716, "grad_norm": 35.90625, "learning_rate": 9.871171722613554e-06, "loss": 19.2364, "step": 44430 }, { "epoch": 0.8246867874283514, "grad_norm": 35.65625, "learning_rate": 9.871142726827511e-06, "loss": 18.697, "step": 44440 }, { "epoch": 0.8248723605128312, "grad_norm": 37.125, "learning_rate": 9.871113731041468e-06, "loss": 19.2022, "step": 44450 }, { "epoch": 0.825057933597311, "grad_norm": 35.90625, "learning_rate": 9.871084735255426e-06, "loss": 19.0279, "step": 44460 }, { "epoch": 0.8252435066817909, "grad_norm": 35.25, "learning_rate": 9.871055739469383e-06, "loss": 18.9795, "step": 44470 }, { "epoch": 0.8254290797662707, "grad_norm": 36.84375, "learning_rate": 9.871026743683342e-06, "loss": 19.2522, "step": 44480 }, { "epoch": 0.8256146528507505, "grad_norm": 36.1875, "learning_rate": 9.870997747897298e-06, "loss": 18.8613, "step": 44490 }, { "epoch": 0.8258002259352304, "grad_norm": 36.65625, "learning_rate": 9.870968752111257e-06, "loss": 19.3883, "step": 44500 }, { "epoch": 0.8259857990197101, "grad_norm": 36.09375, "learning_rate": 9.870939756325215e-06, "loss": 19.3544, "step": 44510 }, { "epoch": 0.82617137210419, "grad_norm": 38.5, "learning_rate": 9.870910760539172e-06, "loss": 19.3535, "step": 44520 }, { "epoch": 0.8263569451886699, "grad_norm": 34.75, "learning_rate": 9.87088176475313e-06, "loss": 18.8685, "step": 44530 }, { "epoch": 0.8265425182731496, "grad_norm": 38.8125, "learning_rate": 9.870852768967087e-06, "loss": 19.0754, "step": 44540 }, { "epoch": 0.8267280913576295, "grad_norm": 36.46875, "learning_rate": 9.870823773181044e-06, "loss": 19.056, "step": 44550 }, { "epoch": 0.8269136644421093, "grad_norm": 36.1875, "learning_rate": 9.870794777395002e-06, "loss": 18.5412, "step": 44560 }, { "epoch": 0.8270992375265891, "grad_norm": 34.15625, "learning_rate": 9.870765781608959e-06, "loss": 18.657, "step": 44570 }, { "epoch": 0.827284810611069, "grad_norm": 37.0, "learning_rate": 9.870736785822918e-06, "loss": 19.3666, "step": 44580 }, { "epoch": 0.8274703836955488, "grad_norm": 36.65625, "learning_rate": 9.870707790036875e-06, "loss": 19.1981, "step": 44590 }, { "epoch": 0.8276559567800287, "grad_norm": 35.4375, "learning_rate": 9.870678794250831e-06, "loss": 19.325, "step": 44600 }, { "epoch": 0.8278415298645084, "grad_norm": 36.375, "learning_rate": 9.87064979846479e-06, "loss": 19.4554, "step": 44610 }, { "epoch": 0.8280271029489883, "grad_norm": 36.78125, "learning_rate": 9.870620802678748e-06, "loss": 18.9955, "step": 44620 }, { "epoch": 0.8282126760334682, "grad_norm": 38.1875, "learning_rate": 9.870591806892705e-06, "loss": 19.2107, "step": 44630 }, { "epoch": 0.8283982491179479, "grad_norm": 37.5, "learning_rate": 9.870562811106662e-06, "loss": 18.8142, "step": 44640 }, { "epoch": 0.8285838222024278, "grad_norm": 38.34375, "learning_rate": 9.87053381532062e-06, "loss": 19.3055, "step": 44650 }, { "epoch": 0.8287693952869076, "grad_norm": 36.0, "learning_rate": 9.870504819534577e-06, "loss": 18.8541, "step": 44660 }, { "epoch": 0.8289549683713874, "grad_norm": 39.78125, "learning_rate": 9.870475823748535e-06, "loss": 19.1764, "step": 44670 }, { "epoch": 0.8291405414558672, "grad_norm": 36.03125, "learning_rate": 9.870446827962494e-06, "loss": 18.7747, "step": 44680 }, { "epoch": 0.8293261145403471, "grad_norm": 34.875, "learning_rate": 9.870417832176451e-06, "loss": 19.1716, "step": 44690 }, { "epoch": 0.8295116876248269, "grad_norm": 36.4375, "learning_rate": 9.870388836390407e-06, "loss": 19.152, "step": 44700 }, { "epoch": 0.8296972607093067, "grad_norm": 37.46875, "learning_rate": 9.870359840604366e-06, "loss": 18.8802, "step": 44710 }, { "epoch": 0.8298828337937866, "grad_norm": 35.5625, "learning_rate": 9.870330844818323e-06, "loss": 19.1369, "step": 44720 }, { "epoch": 0.8300684068782663, "grad_norm": 35.75, "learning_rate": 9.870301849032281e-06, "loss": 19.0659, "step": 44730 }, { "epoch": 0.8302539799627462, "grad_norm": 36.3125, "learning_rate": 9.870272853246238e-06, "loss": 19.2007, "step": 44740 }, { "epoch": 0.830439553047226, "grad_norm": 37.09375, "learning_rate": 9.870243857460197e-06, "loss": 19.3437, "step": 44750 }, { "epoch": 0.8306251261317058, "grad_norm": 36.28125, "learning_rate": 9.870214861674153e-06, "loss": 19.456, "step": 44760 }, { "epoch": 0.8308106992161857, "grad_norm": 35.625, "learning_rate": 9.87018586588811e-06, "loss": 18.8098, "step": 44770 }, { "epoch": 0.8309962723006655, "grad_norm": 36.0, "learning_rate": 9.87015687010207e-06, "loss": 19.4965, "step": 44780 }, { "epoch": 0.8311818453851454, "grad_norm": 36.125, "learning_rate": 9.870127874316027e-06, "loss": 19.1793, "step": 44790 }, { "epoch": 0.8313674184696251, "grad_norm": 36.78125, "learning_rate": 9.870098878529984e-06, "loss": 19.0221, "step": 44800 }, { "epoch": 0.831552991554105, "grad_norm": 36.15625, "learning_rate": 9.870069882743942e-06, "loss": 18.9155, "step": 44810 }, { "epoch": 0.8317385646385849, "grad_norm": 36.09375, "learning_rate": 9.8700408869579e-06, "loss": 18.9602, "step": 44820 }, { "epoch": 0.8319241377230646, "grad_norm": 37.4375, "learning_rate": 9.870011891171857e-06, "loss": 19.3968, "step": 44830 }, { "epoch": 0.8321097108075445, "grad_norm": 38.1875, "learning_rate": 9.869982895385814e-06, "loss": 18.7445, "step": 44840 }, { "epoch": 0.8322952838920243, "grad_norm": 34.875, "learning_rate": 9.869953899599773e-06, "loss": 19.3119, "step": 44850 }, { "epoch": 0.8324808569765041, "grad_norm": 36.0625, "learning_rate": 9.869924903813729e-06, "loss": 18.8227, "step": 44860 }, { "epoch": 0.832666430060984, "grad_norm": 35.15625, "learning_rate": 9.869895908027686e-06, "loss": 18.7788, "step": 44870 }, { "epoch": 0.8328520031454638, "grad_norm": 34.03125, "learning_rate": 9.869866912241645e-06, "loss": 19.126, "step": 44880 }, { "epoch": 0.8330375762299436, "grad_norm": 38.15625, "learning_rate": 9.869837916455603e-06, "loss": 18.9767, "step": 44890 }, { "epoch": 0.8332231493144234, "grad_norm": 37.15625, "learning_rate": 9.86980892066956e-06, "loss": 18.608, "step": 44900 }, { "epoch": 0.8334087223989033, "grad_norm": 37.03125, "learning_rate": 9.869779924883518e-06, "loss": 19.1132, "step": 44910 }, { "epoch": 0.833594295483383, "grad_norm": 35.0625, "learning_rate": 9.869750929097475e-06, "loss": 18.8629, "step": 44920 }, { "epoch": 0.8337798685678629, "grad_norm": 36.6875, "learning_rate": 9.869721933311432e-06, "loss": 19.265, "step": 44930 }, { "epoch": 0.8339654416523428, "grad_norm": 34.09375, "learning_rate": 9.86969293752539e-06, "loss": 19.1547, "step": 44940 }, { "epoch": 0.8341510147368225, "grad_norm": 34.78125, "learning_rate": 9.869663941739349e-06, "loss": 19.0749, "step": 44950 }, { "epoch": 0.8343365878213024, "grad_norm": 36.28125, "learning_rate": 9.869634945953306e-06, "loss": 19.0023, "step": 44960 }, { "epoch": 0.8345221609057822, "grad_norm": 37.09375, "learning_rate": 9.869605950167262e-06, "loss": 19.0009, "step": 44970 }, { "epoch": 0.8347077339902621, "grad_norm": 34.96875, "learning_rate": 9.869576954381221e-06, "loss": 18.7841, "step": 44980 }, { "epoch": 0.8348933070747419, "grad_norm": 34.9375, "learning_rate": 9.869547958595179e-06, "loss": 18.6512, "step": 44990 }, { "epoch": 0.8350788801592217, "grad_norm": 36.5, "learning_rate": 9.869518962809136e-06, "loss": 19.053, "step": 45000 }, { "epoch": 0.8350788801592217, "eval_loss": 2.3824570178985596, "eval_runtime": 454.6243, "eval_samples_per_second": 3194.103, "eval_steps_per_second": 49.909, "step": 45000 }, { "epoch": 0.8352644532437016, "grad_norm": 35.46875, "learning_rate": 9.869489967023093e-06, "loss": 18.7658, "step": 45010 }, { "epoch": 0.8354500263281813, "grad_norm": 38.0625, "learning_rate": 9.86946097123705e-06, "loss": 19.191, "step": 45020 }, { "epoch": 0.8356355994126612, "grad_norm": 35.5625, "learning_rate": 9.869431975451008e-06, "loss": 19.1063, "step": 45030 }, { "epoch": 0.835821172497141, "grad_norm": 36.21875, "learning_rate": 9.869402979664966e-06, "loss": 19.1574, "step": 45040 }, { "epoch": 0.8360067455816208, "grad_norm": 36.3125, "learning_rate": 9.869373983878923e-06, "loss": 18.7662, "step": 45050 }, { "epoch": 0.8361923186661007, "grad_norm": 34.78125, "learning_rate": 9.869344988092882e-06, "loss": 19.5668, "step": 45060 }, { "epoch": 0.8363778917505805, "grad_norm": 37.59375, "learning_rate": 9.86931599230684e-06, "loss": 18.7352, "step": 45070 }, { "epoch": 0.8365634648350603, "grad_norm": 35.15625, "learning_rate": 9.869286996520795e-06, "loss": 18.9398, "step": 45080 }, { "epoch": 0.8367490379195401, "grad_norm": 37.15625, "learning_rate": 9.869258000734754e-06, "loss": 18.3637, "step": 45090 }, { "epoch": 0.83693461100402, "grad_norm": 37.28125, "learning_rate": 9.869229004948712e-06, "loss": 18.9402, "step": 45100 }, { "epoch": 0.8371201840884998, "grad_norm": 36.21875, "learning_rate": 9.869200009162669e-06, "loss": 18.9655, "step": 45110 }, { "epoch": 0.8373057571729796, "grad_norm": 36.03125, "learning_rate": 9.869171013376627e-06, "loss": 19.3164, "step": 45120 }, { "epoch": 0.8374913302574595, "grad_norm": 37.5, "learning_rate": 9.869142017590584e-06, "loss": 19.1801, "step": 45130 }, { "epoch": 0.8376769033419393, "grad_norm": 34.65625, "learning_rate": 9.869113021804541e-06, "loss": 19.3652, "step": 45140 }, { "epoch": 0.8378624764264191, "grad_norm": 37.5, "learning_rate": 9.869084026018499e-06, "loss": 18.9179, "step": 45150 }, { "epoch": 0.838048049510899, "grad_norm": 35.875, "learning_rate": 9.869055030232458e-06, "loss": 19.1069, "step": 45160 }, { "epoch": 0.8382336225953788, "grad_norm": 32.875, "learning_rate": 9.869026034446415e-06, "loss": 18.7498, "step": 45170 }, { "epoch": 0.8384191956798586, "grad_norm": 36.15625, "learning_rate": 9.868997038660373e-06, "loss": 19.555, "step": 45180 }, { "epoch": 0.8386047687643384, "grad_norm": 34.9375, "learning_rate": 9.86896804287433e-06, "loss": 18.9055, "step": 45190 }, { "epoch": 0.8387903418488183, "grad_norm": 36.3125, "learning_rate": 9.868939047088287e-06, "loss": 18.6655, "step": 45200 }, { "epoch": 0.838975914933298, "grad_norm": 35.5, "learning_rate": 9.868910051302245e-06, "loss": 19.2085, "step": 45210 }, { "epoch": 0.8391614880177779, "grad_norm": 36.75, "learning_rate": 9.868881055516202e-06, "loss": 19.3331, "step": 45220 }, { "epoch": 0.8393470611022578, "grad_norm": 35.3125, "learning_rate": 9.868852059730161e-06, "loss": 19.1247, "step": 45230 }, { "epoch": 0.8395326341867375, "grad_norm": 36.59375, "learning_rate": 9.868823063944117e-06, "loss": 19.0293, "step": 45240 }, { "epoch": 0.8397182072712174, "grad_norm": 34.65625, "learning_rate": 9.868794068158074e-06, "loss": 18.9617, "step": 45250 }, { "epoch": 0.8399037803556972, "grad_norm": 34.28125, "learning_rate": 9.868765072372034e-06, "loss": 18.8723, "step": 45260 }, { "epoch": 0.840089353440177, "grad_norm": 37.09375, "learning_rate": 9.868736076585991e-06, "loss": 19.0723, "step": 45270 }, { "epoch": 0.8402749265246569, "grad_norm": 36.46875, "learning_rate": 9.868707080799948e-06, "loss": 18.9186, "step": 45280 }, { "epoch": 0.8404604996091367, "grad_norm": 37.03125, "learning_rate": 9.868678085013906e-06, "loss": 19.2951, "step": 45290 }, { "epoch": 0.8406460726936165, "grad_norm": 35.71875, "learning_rate": 9.868649089227863e-06, "loss": 18.891, "step": 45300 }, { "epoch": 0.8408316457780963, "grad_norm": 38.75, "learning_rate": 9.86862009344182e-06, "loss": 19.1318, "step": 45310 }, { "epoch": 0.8410172188625762, "grad_norm": 36.0625, "learning_rate": 9.868591097655778e-06, "loss": 18.8914, "step": 45320 }, { "epoch": 0.841202791947056, "grad_norm": 33.65625, "learning_rate": 9.868562101869737e-06, "loss": 18.6757, "step": 45330 }, { "epoch": 0.8413883650315358, "grad_norm": 37.125, "learning_rate": 9.868533106083695e-06, "loss": 19.2032, "step": 45340 }, { "epoch": 0.8415739381160157, "grad_norm": 34.6875, "learning_rate": 9.86850411029765e-06, "loss": 18.8749, "step": 45350 }, { "epoch": 0.8417595112004955, "grad_norm": 35.65625, "learning_rate": 9.86847511451161e-06, "loss": 19.2826, "step": 45360 }, { "epoch": 0.8419450842849753, "grad_norm": 36.875, "learning_rate": 9.868446118725567e-06, "loss": 18.9139, "step": 45370 }, { "epoch": 0.8421306573694551, "grad_norm": 38.0, "learning_rate": 9.868417122939524e-06, "loss": 18.9206, "step": 45380 }, { "epoch": 0.842316230453935, "grad_norm": 35.6875, "learning_rate": 9.868388127153482e-06, "loss": 18.9808, "step": 45390 }, { "epoch": 0.8425018035384148, "grad_norm": 34.4375, "learning_rate": 9.868359131367439e-06, "loss": 18.8303, "step": 45400 }, { "epoch": 0.8426873766228946, "grad_norm": 36.5625, "learning_rate": 9.868330135581396e-06, "loss": 18.862, "step": 45410 }, { "epoch": 0.8428729497073745, "grad_norm": 37.1875, "learning_rate": 9.868301139795354e-06, "loss": 19.1974, "step": 45420 }, { "epoch": 0.8430585227918542, "grad_norm": 36.78125, "learning_rate": 9.868272144009313e-06, "loss": 19.5463, "step": 45430 }, { "epoch": 0.8432440958763341, "grad_norm": 34.59375, "learning_rate": 9.86824314822327e-06, "loss": 18.7967, "step": 45440 }, { "epoch": 0.843429668960814, "grad_norm": 36.75, "learning_rate": 9.868214152437226e-06, "loss": 19.0146, "step": 45450 }, { "epoch": 0.8436152420452937, "grad_norm": 36.65625, "learning_rate": 9.868185156651185e-06, "loss": 18.5762, "step": 45460 }, { "epoch": 0.8438008151297736, "grad_norm": 38.625, "learning_rate": 9.868156160865143e-06, "loss": 19.1253, "step": 45470 }, { "epoch": 0.8439863882142534, "grad_norm": 35.96875, "learning_rate": 9.8681271650791e-06, "loss": 18.8985, "step": 45480 }, { "epoch": 0.8441719612987333, "grad_norm": 36.625, "learning_rate": 9.868098169293057e-06, "loss": 18.4796, "step": 45490 }, { "epoch": 0.844357534383213, "grad_norm": 35.5625, "learning_rate": 9.868069173507015e-06, "loss": 18.9359, "step": 45500 }, { "epoch": 0.8445431074676929, "grad_norm": 35.125, "learning_rate": 9.868040177720972e-06, "loss": 19.2384, "step": 45510 }, { "epoch": 0.8447286805521728, "grad_norm": 38.3125, "learning_rate": 9.86801118193493e-06, "loss": 18.8187, "step": 45520 }, { "epoch": 0.8449142536366525, "grad_norm": 37.375, "learning_rate": 9.867982186148887e-06, "loss": 19.2896, "step": 45530 }, { "epoch": 0.8450998267211324, "grad_norm": 34.875, "learning_rate": 9.867953190362846e-06, "loss": 18.9661, "step": 45540 }, { "epoch": 0.8452853998056122, "grad_norm": 34.15625, "learning_rate": 9.867924194576803e-06, "loss": 19.0476, "step": 45550 }, { "epoch": 0.845470972890092, "grad_norm": 36.8125, "learning_rate": 9.867895198790761e-06, "loss": 19.2686, "step": 45560 }, { "epoch": 0.8456565459745718, "grad_norm": 36.5, "learning_rate": 9.867866203004718e-06, "loss": 19.37, "step": 45570 }, { "epoch": 0.8458421190590517, "grad_norm": 35.9375, "learning_rate": 9.867837207218676e-06, "loss": 19.5579, "step": 45580 }, { "epoch": 0.8460276921435315, "grad_norm": 35.6875, "learning_rate": 9.867808211432633e-06, "loss": 19.0715, "step": 45590 }, { "epoch": 0.8462132652280113, "grad_norm": 35.375, "learning_rate": 9.86777921564659e-06, "loss": 19.0432, "step": 45600 }, { "epoch": 0.8463988383124912, "grad_norm": 36.875, "learning_rate": 9.867750219860548e-06, "loss": 18.8202, "step": 45610 }, { "epoch": 0.8465844113969709, "grad_norm": 34.375, "learning_rate": 9.867721224074505e-06, "loss": 18.784, "step": 45620 }, { "epoch": 0.8467699844814508, "grad_norm": 36.0, "learning_rate": 9.867692228288463e-06, "loss": 18.8642, "step": 45630 }, { "epoch": 0.8469555575659307, "grad_norm": 36.875, "learning_rate": 9.867663232502422e-06, "loss": 19.0021, "step": 45640 }, { "epoch": 0.8471411306504104, "grad_norm": 35.09375, "learning_rate": 9.86763423671638e-06, "loss": 18.3599, "step": 45650 }, { "epoch": 0.8473267037348903, "grad_norm": 36.59375, "learning_rate": 9.867605240930337e-06, "loss": 18.9329, "step": 45660 }, { "epoch": 0.8475122768193701, "grad_norm": 36.9375, "learning_rate": 9.867576245144294e-06, "loss": 18.6595, "step": 45670 }, { "epoch": 0.84769784990385, "grad_norm": 35.96875, "learning_rate": 9.867547249358251e-06, "loss": 19.3835, "step": 45680 }, { "epoch": 0.8478834229883297, "grad_norm": 36.875, "learning_rate": 9.867518253572209e-06, "loss": 18.894, "step": 45690 }, { "epoch": 0.8480689960728096, "grad_norm": 35.40625, "learning_rate": 9.867489257786166e-06, "loss": 18.9299, "step": 45700 }, { "epoch": 0.8482545691572895, "grad_norm": 37.46875, "learning_rate": 9.867460262000125e-06, "loss": 19.3052, "step": 45710 }, { "epoch": 0.8484401422417692, "grad_norm": 34.59375, "learning_rate": 9.867431266214081e-06, "loss": 18.9821, "step": 45720 }, { "epoch": 0.8486257153262491, "grad_norm": 36.3125, "learning_rate": 9.867402270428039e-06, "loss": 18.9228, "step": 45730 }, { "epoch": 0.848811288410729, "grad_norm": 35.90625, "learning_rate": 9.867373274641998e-06, "loss": 19.1518, "step": 45740 }, { "epoch": 0.8489968614952087, "grad_norm": 36.4375, "learning_rate": 9.867344278855955e-06, "loss": 18.6011, "step": 45750 }, { "epoch": 0.8491824345796886, "grad_norm": 34.75, "learning_rate": 9.867315283069912e-06, "loss": 18.7753, "step": 45760 }, { "epoch": 0.8493680076641684, "grad_norm": 36.75, "learning_rate": 9.86728628728387e-06, "loss": 19.2827, "step": 45770 }, { "epoch": 0.8495535807486482, "grad_norm": 35.53125, "learning_rate": 9.867257291497827e-06, "loss": 18.5532, "step": 45780 }, { "epoch": 0.849739153833128, "grad_norm": 37.34375, "learning_rate": 9.867228295711785e-06, "loss": 19.2479, "step": 45790 }, { "epoch": 0.8499247269176079, "grad_norm": 37.6875, "learning_rate": 9.867199299925742e-06, "loss": 19.3353, "step": 45800 }, { "epoch": 0.8501103000020876, "grad_norm": 37.03125, "learning_rate": 9.867170304139701e-06, "loss": 19.4561, "step": 45810 }, { "epoch": 0.8502958730865675, "grad_norm": 36.5, "learning_rate": 9.867141308353659e-06, "loss": 19.2609, "step": 45820 }, { "epoch": 0.8504814461710474, "grad_norm": 35.59375, "learning_rate": 9.867112312567614e-06, "loss": 18.6296, "step": 45830 }, { "epoch": 0.8506670192555271, "grad_norm": 36.25, "learning_rate": 9.867083316781573e-06, "loss": 18.561, "step": 45840 }, { "epoch": 0.850852592340007, "grad_norm": 37.0, "learning_rate": 9.86705432099553e-06, "loss": 19.1894, "step": 45850 }, { "epoch": 0.8510381654244868, "grad_norm": 39.0625, "learning_rate": 9.867025325209488e-06, "loss": 19.0677, "step": 45860 }, { "epoch": 0.8512237385089667, "grad_norm": 37.15625, "learning_rate": 9.866996329423446e-06, "loss": 19.3011, "step": 45870 }, { "epoch": 0.8514093115934465, "grad_norm": 38.15625, "learning_rate": 9.866967333637403e-06, "loss": 18.3706, "step": 45880 }, { "epoch": 0.8515948846779263, "grad_norm": 36.5625, "learning_rate": 9.86693833785136e-06, "loss": 18.8431, "step": 45890 }, { "epoch": 0.8517804577624062, "grad_norm": 37.78125, "learning_rate": 9.866909342065318e-06, "loss": 18.7596, "step": 45900 }, { "epoch": 0.8519660308468859, "grad_norm": 37.40625, "learning_rate": 9.866880346279277e-06, "loss": 19.2939, "step": 45910 }, { "epoch": 0.8521516039313658, "grad_norm": 36.96875, "learning_rate": 9.866851350493234e-06, "loss": 18.9418, "step": 45920 }, { "epoch": 0.8523371770158457, "grad_norm": 34.34375, "learning_rate": 9.866822354707192e-06, "loss": 18.5307, "step": 45930 }, { "epoch": 0.8525227501003254, "grad_norm": 39.0625, "learning_rate": 9.866793358921149e-06, "loss": 19.5498, "step": 45940 }, { "epoch": 0.8527083231848053, "grad_norm": 35.28125, "learning_rate": 9.866764363135107e-06, "loss": 18.329, "step": 45950 }, { "epoch": 0.8528938962692851, "grad_norm": 37.09375, "learning_rate": 9.866735367349064e-06, "loss": 18.971, "step": 45960 }, { "epoch": 0.8530794693537649, "grad_norm": 36.03125, "learning_rate": 9.866706371563021e-06, "loss": 18.7174, "step": 45970 }, { "epoch": 0.8532650424382447, "grad_norm": 35.25, "learning_rate": 9.866677375776979e-06, "loss": 18.7271, "step": 45980 }, { "epoch": 0.8534506155227246, "grad_norm": 33.03125, "learning_rate": 9.866648379990936e-06, "loss": 19.0984, "step": 45990 }, { "epoch": 0.8536361886072044, "grad_norm": 37.09375, "learning_rate": 9.866619384204894e-06, "loss": 19.3761, "step": 46000 }, { "epoch": 0.8538217616916842, "grad_norm": 38.1875, "learning_rate": 9.866590388418853e-06, "loss": 18.9924, "step": 46010 }, { "epoch": 0.8540073347761641, "grad_norm": 35.90625, "learning_rate": 9.86656139263281e-06, "loss": 18.9491, "step": 46020 }, { "epoch": 0.854192907860644, "grad_norm": 34.875, "learning_rate": 9.866532396846767e-06, "loss": 19.4705, "step": 46030 }, { "epoch": 0.8543784809451237, "grad_norm": 36.09375, "learning_rate": 9.866503401060725e-06, "loss": 18.1977, "step": 46040 }, { "epoch": 0.8545640540296036, "grad_norm": 36.25, "learning_rate": 9.866474405274682e-06, "loss": 18.6364, "step": 46050 }, { "epoch": 0.8547496271140834, "grad_norm": 34.6875, "learning_rate": 9.86644540948864e-06, "loss": 18.6337, "step": 46060 }, { "epoch": 0.8549352001985632, "grad_norm": 36.9375, "learning_rate": 9.866416413702597e-06, "loss": 19.3606, "step": 46070 }, { "epoch": 0.855120773283043, "grad_norm": 36.59375, "learning_rate": 9.866387417916555e-06, "loss": 19.6339, "step": 46080 }, { "epoch": 0.8553063463675229, "grad_norm": 36.6875, "learning_rate": 9.866358422130514e-06, "loss": 19.3116, "step": 46090 }, { "epoch": 0.8554919194520026, "grad_norm": 37.21875, "learning_rate": 9.86632942634447e-06, "loss": 19.3043, "step": 46100 }, { "epoch": 0.8556774925364825, "grad_norm": 36.1875, "learning_rate": 9.866300430558427e-06, "loss": 18.8968, "step": 46110 }, { "epoch": 0.8558630656209624, "grad_norm": 36.9375, "learning_rate": 9.866271434772386e-06, "loss": 19.0006, "step": 46120 }, { "epoch": 0.8560486387054421, "grad_norm": 35.8125, "learning_rate": 9.866242438986343e-06, "loss": 19.0822, "step": 46130 }, { "epoch": 0.856234211789922, "grad_norm": 38.0, "learning_rate": 9.8662134432003e-06, "loss": 18.9034, "step": 46140 }, { "epoch": 0.8564197848744018, "grad_norm": 35.46875, "learning_rate": 9.866184447414258e-06, "loss": 18.5902, "step": 46150 }, { "epoch": 0.8566053579588816, "grad_norm": 35.84375, "learning_rate": 9.866155451628215e-06, "loss": 18.9346, "step": 46160 }, { "epoch": 0.8567909310433615, "grad_norm": 37.09375, "learning_rate": 9.866126455842173e-06, "loss": 19.1755, "step": 46170 }, { "epoch": 0.8569765041278413, "grad_norm": 37.46875, "learning_rate": 9.86609746005613e-06, "loss": 19.1133, "step": 46180 }, { "epoch": 0.8571620772123211, "grad_norm": 35.875, "learning_rate": 9.86606846427009e-06, "loss": 18.9241, "step": 46190 }, { "epoch": 0.8573476502968009, "grad_norm": 35.46875, "learning_rate": 9.866039468484045e-06, "loss": 19.4059, "step": 46200 }, { "epoch": 0.8575332233812808, "grad_norm": 33.375, "learning_rate": 9.866010472698003e-06, "loss": 19.2368, "step": 46210 }, { "epoch": 0.8577187964657607, "grad_norm": 35.875, "learning_rate": 9.865981476911962e-06, "loss": 18.9356, "step": 46220 }, { "epoch": 0.8579043695502404, "grad_norm": 36.6875, "learning_rate": 9.865952481125919e-06, "loss": 19.2764, "step": 46230 }, { "epoch": 0.8580899426347203, "grad_norm": 36.8125, "learning_rate": 9.865923485339876e-06, "loss": 19.1283, "step": 46240 }, { "epoch": 0.8582755157192001, "grad_norm": 36.4375, "learning_rate": 9.865894489553834e-06, "loss": 18.773, "step": 46250 }, { "epoch": 0.8584610888036799, "grad_norm": 34.15625, "learning_rate": 9.865865493767791e-06, "loss": 19.0013, "step": 46260 }, { "epoch": 0.8586466618881597, "grad_norm": 35.5, "learning_rate": 9.865836497981749e-06, "loss": 19.106, "step": 46270 }, { "epoch": 0.8588322349726396, "grad_norm": 36.46875, "learning_rate": 9.865807502195706e-06, "loss": 19.1127, "step": 46280 }, { "epoch": 0.8590178080571194, "grad_norm": 36.03125, "learning_rate": 9.865778506409665e-06, "loss": 19.1271, "step": 46290 }, { "epoch": 0.8592033811415992, "grad_norm": 36.25, "learning_rate": 9.865749510623623e-06, "loss": 18.7041, "step": 46300 }, { "epoch": 0.8593889542260791, "grad_norm": 35.96875, "learning_rate": 9.865720514837578e-06, "loss": 19.1869, "step": 46310 }, { "epoch": 0.8595745273105588, "grad_norm": 37.34375, "learning_rate": 9.865691519051537e-06, "loss": 19.1163, "step": 46320 }, { "epoch": 0.8597601003950387, "grad_norm": 38.09375, "learning_rate": 9.865662523265495e-06, "loss": 18.9266, "step": 46330 }, { "epoch": 0.8599456734795186, "grad_norm": 36.0, "learning_rate": 9.865633527479452e-06, "loss": 19.044, "step": 46340 }, { "epoch": 0.8601312465639983, "grad_norm": 36.96875, "learning_rate": 9.86560453169341e-06, "loss": 19.1646, "step": 46350 }, { "epoch": 0.8603168196484782, "grad_norm": 38.4375, "learning_rate": 9.865575535907369e-06, "loss": 18.5803, "step": 46360 }, { "epoch": 0.860502392732958, "grad_norm": 36.25, "learning_rate": 9.865546540121324e-06, "loss": 19.0525, "step": 46370 }, { "epoch": 0.8606879658174378, "grad_norm": 36.78125, "learning_rate": 9.865517544335282e-06, "loss": 18.6633, "step": 46380 }, { "epoch": 0.8608735389019176, "grad_norm": 35.59375, "learning_rate": 9.865488548549241e-06, "loss": 19.2111, "step": 46390 }, { "epoch": 0.8610591119863975, "grad_norm": 37.34375, "learning_rate": 9.865459552763198e-06, "loss": 19.2641, "step": 46400 }, { "epoch": 0.8612446850708774, "grad_norm": 35.1875, "learning_rate": 9.865430556977156e-06, "loss": 19.0881, "step": 46410 }, { "epoch": 0.8614302581553571, "grad_norm": 38.5625, "learning_rate": 9.865401561191113e-06, "loss": 19.2938, "step": 46420 }, { "epoch": 0.861615831239837, "grad_norm": 36.21875, "learning_rate": 9.86537256540507e-06, "loss": 18.9959, "step": 46430 }, { "epoch": 0.8618014043243168, "grad_norm": 36.40625, "learning_rate": 9.865343569619028e-06, "loss": 19.1233, "step": 46440 }, { "epoch": 0.8619869774087966, "grad_norm": 39.0625, "learning_rate": 9.865314573832985e-06, "loss": 18.7088, "step": 46450 }, { "epoch": 0.8621725504932765, "grad_norm": 35.25, "learning_rate": 9.865285578046944e-06, "loss": 18.7742, "step": 46460 }, { "epoch": 0.8623581235777563, "grad_norm": 37.875, "learning_rate": 9.8652565822609e-06, "loss": 19.0445, "step": 46470 }, { "epoch": 0.8625436966622361, "grad_norm": 36.125, "learning_rate": 9.865227586474858e-06, "loss": 18.9246, "step": 46480 }, { "epoch": 0.8627292697467159, "grad_norm": 35.8125, "learning_rate": 9.865198590688817e-06, "loss": 19.4552, "step": 46490 }, { "epoch": 0.8629148428311958, "grad_norm": 37.59375, "learning_rate": 9.865169594902774e-06, "loss": 19.0426, "step": 46500 }, { "epoch": 0.8631004159156755, "grad_norm": 35.0, "learning_rate": 9.865140599116732e-06, "loss": 18.9247, "step": 46510 }, { "epoch": 0.8632859890001554, "grad_norm": 36.28125, "learning_rate": 9.865111603330689e-06, "loss": 19.2894, "step": 46520 }, { "epoch": 0.8634715620846353, "grad_norm": 36.78125, "learning_rate": 9.865082607544646e-06, "loss": 18.7233, "step": 46530 }, { "epoch": 0.863657135169115, "grad_norm": 35.25, "learning_rate": 9.865053611758604e-06, "loss": 19.2367, "step": 46540 }, { "epoch": 0.8638427082535949, "grad_norm": 38.46875, "learning_rate": 9.865024615972561e-06, "loss": 19.3671, "step": 46550 }, { "epoch": 0.8640282813380747, "grad_norm": 33.9375, "learning_rate": 9.864995620186519e-06, "loss": 19.4396, "step": 46560 }, { "epoch": 0.8642138544225546, "grad_norm": 37.15625, "learning_rate": 9.864966624400478e-06, "loss": 18.6888, "step": 46570 }, { "epoch": 0.8643994275070344, "grad_norm": 35.40625, "learning_rate": 9.864937628614433e-06, "loss": 19.0491, "step": 46580 }, { "epoch": 0.8645850005915142, "grad_norm": 38.65625, "learning_rate": 9.864908632828392e-06, "loss": 19.1106, "step": 46590 }, { "epoch": 0.8647705736759941, "grad_norm": 34.84375, "learning_rate": 9.86487963704235e-06, "loss": 19.0092, "step": 46600 }, { "epoch": 0.8649561467604738, "grad_norm": 35.625, "learning_rate": 9.864850641256307e-06, "loss": 19.4733, "step": 46610 }, { "epoch": 0.8651417198449537, "grad_norm": 38.1875, "learning_rate": 9.864821645470265e-06, "loss": 18.892, "step": 46620 }, { "epoch": 0.8653272929294336, "grad_norm": 35.6875, "learning_rate": 9.864792649684222e-06, "loss": 18.8302, "step": 46630 }, { "epoch": 0.8655128660139133, "grad_norm": 35.09375, "learning_rate": 9.86476365389818e-06, "loss": 19.2244, "step": 46640 }, { "epoch": 0.8656984390983932, "grad_norm": 35.40625, "learning_rate": 9.864734658112137e-06, "loss": 18.9491, "step": 46650 }, { "epoch": 0.865884012182873, "grad_norm": 35.9375, "learning_rate": 9.864705662326094e-06, "loss": 18.688, "step": 46660 }, { "epoch": 0.8660695852673528, "grad_norm": 36.4375, "learning_rate": 9.864676666540053e-06, "loss": 18.8324, "step": 46670 }, { "epoch": 0.8662551583518326, "grad_norm": 37.3125, "learning_rate": 9.86464767075401e-06, "loss": 18.8215, "step": 46680 }, { "epoch": 0.8664407314363125, "grad_norm": 36.65625, "learning_rate": 9.864618674967967e-06, "loss": 18.8459, "step": 46690 }, { "epoch": 0.8666263045207923, "grad_norm": 35.34375, "learning_rate": 9.864589679181926e-06, "loss": 18.5788, "step": 46700 }, { "epoch": 0.8668118776052721, "grad_norm": 36.3125, "learning_rate": 9.864560683395883e-06, "loss": 18.8399, "step": 46710 }, { "epoch": 0.866997450689752, "grad_norm": 36.1875, "learning_rate": 9.86453168760984e-06, "loss": 19.4052, "step": 46720 }, { "epoch": 0.8671830237742317, "grad_norm": 36.46875, "learning_rate": 9.864502691823798e-06, "loss": 18.9626, "step": 46730 }, { "epoch": 0.8673685968587116, "grad_norm": 37.1875, "learning_rate": 9.864473696037755e-06, "loss": 18.9508, "step": 46740 }, { "epoch": 0.8675541699431915, "grad_norm": 34.65625, "learning_rate": 9.864444700251713e-06, "loss": 18.9008, "step": 46750 }, { "epoch": 0.8677397430276713, "grad_norm": 38.40625, "learning_rate": 9.86441570446567e-06, "loss": 18.8394, "step": 46760 }, { "epoch": 0.8679253161121511, "grad_norm": 35.96875, "learning_rate": 9.86438670867963e-06, "loss": 18.9625, "step": 46770 }, { "epoch": 0.8681108891966309, "grad_norm": 34.5625, "learning_rate": 9.864357712893587e-06, "loss": 18.8886, "step": 46780 }, { "epoch": 0.8682964622811108, "grad_norm": 38.40625, "learning_rate": 9.864328717107542e-06, "loss": 18.8803, "step": 46790 }, { "epoch": 0.8684820353655905, "grad_norm": 35.84375, "learning_rate": 9.864299721321501e-06, "loss": 19.1182, "step": 46800 }, { "epoch": 0.8686676084500704, "grad_norm": 37.40625, "learning_rate": 9.864270725535459e-06, "loss": 18.4934, "step": 46810 }, { "epoch": 0.8688531815345503, "grad_norm": 36.875, "learning_rate": 9.864241729749416e-06, "loss": 18.8362, "step": 46820 }, { "epoch": 0.86903875461903, "grad_norm": 37.46875, "learning_rate": 9.864212733963374e-06, "loss": 18.8735, "step": 46830 }, { "epoch": 0.8692243277035099, "grad_norm": 34.375, "learning_rate": 9.864183738177333e-06, "loss": 19.1104, "step": 46840 }, { "epoch": 0.8694099007879897, "grad_norm": 36.375, "learning_rate": 9.864154742391288e-06, "loss": 18.7271, "step": 46850 }, { "epoch": 0.8695954738724695, "grad_norm": 37.0, "learning_rate": 9.864125746605246e-06, "loss": 19.1983, "step": 46860 }, { "epoch": 0.8697810469569494, "grad_norm": 36.5625, "learning_rate": 9.864096750819205e-06, "loss": 19.5429, "step": 46870 }, { "epoch": 0.8699666200414292, "grad_norm": 36.90625, "learning_rate": 9.864067755033162e-06, "loss": 19.0335, "step": 46880 }, { "epoch": 0.870152193125909, "grad_norm": 38.6875, "learning_rate": 9.86403875924712e-06, "loss": 19.148, "step": 46890 }, { "epoch": 0.8703377662103888, "grad_norm": 36.5, "learning_rate": 9.864009763461077e-06, "loss": 19.0772, "step": 46900 }, { "epoch": 0.8705233392948687, "grad_norm": 37.5625, "learning_rate": 9.863980767675035e-06, "loss": 18.9625, "step": 46910 }, { "epoch": 0.8707089123793486, "grad_norm": 36.0, "learning_rate": 9.863951771888992e-06, "loss": 18.4917, "step": 46920 }, { "epoch": 0.8708944854638283, "grad_norm": 34.78125, "learning_rate": 9.86392277610295e-06, "loss": 19.0948, "step": 46930 }, { "epoch": 0.8710800585483082, "grad_norm": 36.84375, "learning_rate": 9.863893780316908e-06, "loss": 18.7454, "step": 46940 }, { "epoch": 0.871265631632788, "grad_norm": 35.4375, "learning_rate": 9.863864784530866e-06, "loss": 19.0614, "step": 46950 }, { "epoch": 0.8714512047172678, "grad_norm": 35.65625, "learning_rate": 9.863835788744822e-06, "loss": 19.0184, "step": 46960 }, { "epoch": 0.8716367778017476, "grad_norm": 36.0625, "learning_rate": 9.86380679295878e-06, "loss": 18.9749, "step": 46970 }, { "epoch": 0.8718223508862275, "grad_norm": 38.625, "learning_rate": 9.863777797172738e-06, "loss": 18.9738, "step": 46980 }, { "epoch": 0.8720079239707073, "grad_norm": 36.0625, "learning_rate": 9.863748801386696e-06, "loss": 18.6942, "step": 46990 }, { "epoch": 0.8721934970551871, "grad_norm": 37.3125, "learning_rate": 9.863719805600653e-06, "loss": 19.1092, "step": 47000 }, { "epoch": 0.872379070139667, "grad_norm": 36.375, "learning_rate": 9.86369080981461e-06, "loss": 19.1041, "step": 47010 }, { "epoch": 0.8725646432241467, "grad_norm": 36.46875, "learning_rate": 9.863661814028568e-06, "loss": 18.8907, "step": 47020 }, { "epoch": 0.8727502163086266, "grad_norm": 36.625, "learning_rate": 9.863632818242525e-06, "loss": 18.604, "step": 47030 }, { "epoch": 0.8729357893931065, "grad_norm": 34.40625, "learning_rate": 9.863603822456483e-06, "loss": 18.9317, "step": 47040 }, { "epoch": 0.8731213624775862, "grad_norm": 35.78125, "learning_rate": 9.863574826670442e-06, "loss": 18.8261, "step": 47050 }, { "epoch": 0.8733069355620661, "grad_norm": 35.375, "learning_rate": 9.863545830884397e-06, "loss": 18.7842, "step": 47060 }, { "epoch": 0.8734925086465459, "grad_norm": 35.65625, "learning_rate": 9.863516835098356e-06, "loss": 19.1264, "step": 47070 }, { "epoch": 0.8736780817310257, "grad_norm": 37.59375, "learning_rate": 9.863487839312314e-06, "loss": 18.9965, "step": 47080 }, { "epoch": 0.8738636548155055, "grad_norm": 34.65625, "learning_rate": 9.863458843526271e-06, "loss": 18.7441, "step": 47090 }, { "epoch": 0.8740492278999854, "grad_norm": 35.03125, "learning_rate": 9.863429847740229e-06, "loss": 19.2158, "step": 47100 }, { "epoch": 0.8742348009844653, "grad_norm": 37.53125, "learning_rate": 9.863400851954186e-06, "loss": 19.4903, "step": 47110 }, { "epoch": 0.874420374068945, "grad_norm": 35.09375, "learning_rate": 9.863371856168144e-06, "loss": 19.3462, "step": 47120 }, { "epoch": 0.8746059471534249, "grad_norm": 34.96875, "learning_rate": 9.863342860382101e-06, "loss": 18.5112, "step": 47130 }, { "epoch": 0.8747915202379047, "grad_norm": 35.84375, "learning_rate": 9.863313864596058e-06, "loss": 18.7684, "step": 47140 }, { "epoch": 0.8749770933223845, "grad_norm": 37.84375, "learning_rate": 9.863284868810017e-06, "loss": 18.657, "step": 47150 }, { "epoch": 0.8751626664068644, "grad_norm": 34.625, "learning_rate": 9.863255873023975e-06, "loss": 18.6456, "step": 47160 }, { "epoch": 0.8753482394913442, "grad_norm": 34.75, "learning_rate": 9.86322687723793e-06, "loss": 18.837, "step": 47170 }, { "epoch": 0.875533812575824, "grad_norm": 36.25, "learning_rate": 9.86319788145189e-06, "loss": 18.7192, "step": 47180 }, { "epoch": 0.8757193856603038, "grad_norm": 36.1875, "learning_rate": 9.863168885665847e-06, "loss": 18.5609, "step": 47190 }, { "epoch": 0.8759049587447837, "grad_norm": 35.5625, "learning_rate": 9.863139889879804e-06, "loss": 18.5446, "step": 47200 }, { "epoch": 0.8760905318292634, "grad_norm": 35.34375, "learning_rate": 9.863110894093762e-06, "loss": 19.2299, "step": 47210 }, { "epoch": 0.8762761049137433, "grad_norm": 36.78125, "learning_rate": 9.86308189830772e-06, "loss": 18.9342, "step": 47220 }, { "epoch": 0.8764616779982232, "grad_norm": 35.5, "learning_rate": 9.863052902521677e-06, "loss": 18.7533, "step": 47230 }, { "epoch": 0.8766472510827029, "grad_norm": 36.28125, "learning_rate": 9.863023906735634e-06, "loss": 18.6551, "step": 47240 }, { "epoch": 0.8768328241671828, "grad_norm": 36.84375, "learning_rate": 9.862994910949593e-06, "loss": 18.4443, "step": 47250 }, { "epoch": 0.8770183972516626, "grad_norm": 38.1875, "learning_rate": 9.86296591516355e-06, "loss": 18.5278, "step": 47260 }, { "epoch": 0.8772039703361424, "grad_norm": 37.03125, "learning_rate": 9.862936919377508e-06, "loss": 18.8955, "step": 47270 }, { "epoch": 0.8773895434206223, "grad_norm": 37.125, "learning_rate": 9.862907923591465e-06, "loss": 18.8201, "step": 47280 }, { "epoch": 0.8775751165051021, "grad_norm": 35.0625, "learning_rate": 9.862878927805423e-06, "loss": 18.9443, "step": 47290 }, { "epoch": 0.877760689589582, "grad_norm": 34.78125, "learning_rate": 9.86284993201938e-06, "loss": 19.0257, "step": 47300 }, { "epoch": 0.8779462626740617, "grad_norm": 37.40625, "learning_rate": 9.862820936233338e-06, "loss": 19.4971, "step": 47310 }, { "epoch": 0.8781318357585416, "grad_norm": 35.75, "learning_rate": 9.862791940447297e-06, "loss": 19.3668, "step": 47320 }, { "epoch": 0.8783174088430215, "grad_norm": 36.40625, "learning_rate": 9.862762944661252e-06, "loss": 18.8734, "step": 47330 }, { "epoch": 0.8785029819275012, "grad_norm": 35.8125, "learning_rate": 9.86273394887521e-06, "loss": 19.0038, "step": 47340 }, { "epoch": 0.8786885550119811, "grad_norm": 36.65625, "learning_rate": 9.862704953089169e-06, "loss": 18.8259, "step": 47350 }, { "epoch": 0.8788741280964609, "grad_norm": 36.875, "learning_rate": 9.862675957303126e-06, "loss": 18.5228, "step": 47360 }, { "epoch": 0.8790597011809407, "grad_norm": 36.6875, "learning_rate": 9.862646961517084e-06, "loss": 19.1345, "step": 47370 }, { "epoch": 0.8792452742654205, "grad_norm": 36.0625, "learning_rate": 9.862617965731041e-06, "loss": 19.2282, "step": 47380 }, { "epoch": 0.8794308473499004, "grad_norm": 37.0, "learning_rate": 9.862588969944999e-06, "loss": 19.0534, "step": 47390 }, { "epoch": 0.8796164204343802, "grad_norm": 36.59375, "learning_rate": 9.862559974158956e-06, "loss": 19.0539, "step": 47400 }, { "epoch": 0.87980199351886, "grad_norm": 37.78125, "learning_rate": 9.862530978372913e-06, "loss": 18.8846, "step": 47410 }, { "epoch": 0.8799875666033399, "grad_norm": 35.5625, "learning_rate": 9.862501982586873e-06, "loss": 19.2657, "step": 47420 }, { "epoch": 0.8801731396878196, "grad_norm": 37.09375, "learning_rate": 9.86247298680083e-06, "loss": 18.8702, "step": 47430 }, { "epoch": 0.8803587127722995, "grad_norm": 36.71875, "learning_rate": 9.862443991014786e-06, "loss": 18.7863, "step": 47440 }, { "epoch": 0.8805442858567794, "grad_norm": 36.90625, "learning_rate": 9.862414995228745e-06, "loss": 19.1751, "step": 47450 }, { "epoch": 0.8807298589412592, "grad_norm": 36.75, "learning_rate": 9.862385999442702e-06, "loss": 18.5023, "step": 47460 }, { "epoch": 0.880915432025739, "grad_norm": 35.375, "learning_rate": 9.86235700365666e-06, "loss": 18.9846, "step": 47470 }, { "epoch": 0.8811010051102188, "grad_norm": 34.90625, "learning_rate": 9.862328007870617e-06, "loss": 19.0925, "step": 47480 }, { "epoch": 0.8812865781946987, "grad_norm": 37.6875, "learning_rate": 9.862299012084574e-06, "loss": 19.0749, "step": 47490 }, { "epoch": 0.8814721512791784, "grad_norm": 38.9375, "learning_rate": 9.862270016298532e-06, "loss": 18.9493, "step": 47500 }, { "epoch": 0.8816577243636583, "grad_norm": 35.15625, "learning_rate": 9.86224102051249e-06, "loss": 19.2803, "step": 47510 }, { "epoch": 0.8818432974481382, "grad_norm": 39.0, "learning_rate": 9.862212024726448e-06, "loss": 19.335, "step": 47520 }, { "epoch": 0.8820288705326179, "grad_norm": 35.5625, "learning_rate": 9.862183028940406e-06, "loss": 18.7052, "step": 47530 }, { "epoch": 0.8822144436170978, "grad_norm": 38.09375, "learning_rate": 9.862154033154363e-06, "loss": 19.1641, "step": 47540 }, { "epoch": 0.8824000167015776, "grad_norm": 37.125, "learning_rate": 9.86212503736832e-06, "loss": 19.243, "step": 47550 }, { "epoch": 0.8825855897860574, "grad_norm": 37.71875, "learning_rate": 9.862096041582278e-06, "loss": 19.053, "step": 47560 }, { "epoch": 0.8827711628705373, "grad_norm": 32.5, "learning_rate": 9.862067045796235e-06, "loss": 18.7586, "step": 47570 }, { "epoch": 0.8829567359550171, "grad_norm": 35.03125, "learning_rate": 9.862038050010193e-06, "loss": 19.1617, "step": 47580 }, { "epoch": 0.8831423090394969, "grad_norm": 36.59375, "learning_rate": 9.86200905422415e-06, "loss": 18.6394, "step": 47590 }, { "epoch": 0.8833278821239767, "grad_norm": 36.75, "learning_rate": 9.861980058438108e-06, "loss": 18.9452, "step": 47600 }, { "epoch": 0.8835134552084566, "grad_norm": 35.0, "learning_rate": 9.861951062652065e-06, "loss": 19.132, "step": 47610 }, { "epoch": 0.8836990282929363, "grad_norm": 38.15625, "learning_rate": 9.861922066866022e-06, "loss": 19.0798, "step": 47620 }, { "epoch": 0.8838846013774162, "grad_norm": 35.96875, "learning_rate": 9.861893071079981e-06, "loss": 18.363, "step": 47630 }, { "epoch": 0.8840701744618961, "grad_norm": 35.59375, "learning_rate": 9.861864075293939e-06, "loss": 18.5769, "step": 47640 }, { "epoch": 0.8842557475463759, "grad_norm": 35.1875, "learning_rate": 9.861835079507896e-06, "loss": 18.8611, "step": 47650 }, { "epoch": 0.8844413206308557, "grad_norm": 36.125, "learning_rate": 9.861806083721854e-06, "loss": 18.9997, "step": 47660 }, { "epoch": 0.8846268937153355, "grad_norm": 35.5625, "learning_rate": 9.861777087935811e-06, "loss": 18.8766, "step": 47670 }, { "epoch": 0.8848124667998154, "grad_norm": 34.6875, "learning_rate": 9.861748092149768e-06, "loss": 19.2543, "step": 47680 }, { "epoch": 0.8849980398842952, "grad_norm": 35.25, "learning_rate": 9.861719096363726e-06, "loss": 18.987, "step": 47690 }, { "epoch": 0.885183612968775, "grad_norm": 38.28125, "learning_rate": 9.861690100577685e-06, "loss": 18.7889, "step": 47700 }, { "epoch": 0.8853691860532549, "grad_norm": 36.9375, "learning_rate": 9.86166110479164e-06, "loss": 18.8273, "step": 47710 }, { "epoch": 0.8855547591377346, "grad_norm": 38.03125, "learning_rate": 9.861632109005598e-06, "loss": 18.6823, "step": 47720 }, { "epoch": 0.8857403322222145, "grad_norm": 36.03125, "learning_rate": 9.861603113219557e-06, "loss": 18.8164, "step": 47730 }, { "epoch": 0.8859259053066944, "grad_norm": 33.46875, "learning_rate": 9.861574117433515e-06, "loss": 18.9653, "step": 47740 }, { "epoch": 0.8861114783911741, "grad_norm": 35.03125, "learning_rate": 9.861545121647472e-06, "loss": 18.7712, "step": 47750 }, { "epoch": 0.886297051475654, "grad_norm": 35.5, "learning_rate": 9.86151612586143e-06, "loss": 19.0222, "step": 47760 }, { "epoch": 0.8864826245601338, "grad_norm": 37.375, "learning_rate": 9.861487130075387e-06, "loss": 19.0491, "step": 47770 }, { "epoch": 0.8866681976446136, "grad_norm": 37.46875, "learning_rate": 9.861458134289344e-06, "loss": 19.0221, "step": 47780 }, { "epoch": 0.8868537707290934, "grad_norm": 38.96875, "learning_rate": 9.861429138503302e-06, "loss": 19.1566, "step": 47790 }, { "epoch": 0.8870393438135733, "grad_norm": 37.34375, "learning_rate": 9.86140014271726e-06, "loss": 18.9649, "step": 47800 }, { "epoch": 0.887224916898053, "grad_norm": 36.28125, "learning_rate": 9.861371146931216e-06, "loss": 18.9498, "step": 47810 }, { "epoch": 0.8874104899825329, "grad_norm": 37.125, "learning_rate": 9.861342151145174e-06, "loss": 18.8002, "step": 47820 }, { "epoch": 0.8875960630670128, "grad_norm": 35.5, "learning_rate": 9.861313155359133e-06, "loss": 19.0088, "step": 47830 }, { "epoch": 0.8877816361514926, "grad_norm": 37.53125, "learning_rate": 9.86128415957309e-06, "loss": 18.5887, "step": 47840 }, { "epoch": 0.8879672092359724, "grad_norm": 36.375, "learning_rate": 9.861255163787048e-06, "loss": 18.9607, "step": 47850 }, { "epoch": 0.8881527823204522, "grad_norm": 35.15625, "learning_rate": 9.861226168001005e-06, "loss": 18.6494, "step": 47860 }, { "epoch": 0.8883383554049321, "grad_norm": 35.78125, "learning_rate": 9.861197172214963e-06, "loss": 19.3655, "step": 47870 }, { "epoch": 0.8885239284894119, "grad_norm": 36.9375, "learning_rate": 9.86116817642892e-06, "loss": 19.1031, "step": 47880 }, { "epoch": 0.8887095015738917, "grad_norm": 37.15625, "learning_rate": 9.861139180642877e-06, "loss": 18.8792, "step": 47890 }, { "epoch": 0.8888950746583716, "grad_norm": 36.09375, "learning_rate": 9.861110184856837e-06, "loss": 18.7694, "step": 47900 }, { "epoch": 0.8890806477428513, "grad_norm": 34.125, "learning_rate": 9.861081189070794e-06, "loss": 19.1518, "step": 47910 }, { "epoch": 0.8892662208273312, "grad_norm": 35.90625, "learning_rate": 9.86105219328475e-06, "loss": 19.2373, "step": 47920 }, { "epoch": 0.8894517939118111, "grad_norm": 38.25, "learning_rate": 9.861023197498709e-06, "loss": 18.6068, "step": 47930 }, { "epoch": 0.8896373669962908, "grad_norm": 37.46875, "learning_rate": 9.860994201712666e-06, "loss": 18.9451, "step": 47940 }, { "epoch": 0.8898229400807707, "grad_norm": 38.53125, "learning_rate": 9.860965205926624e-06, "loss": 18.7446, "step": 47950 }, { "epoch": 0.8900085131652505, "grad_norm": 36.28125, "learning_rate": 9.860936210140581e-06, "loss": 18.5875, "step": 47960 }, { "epoch": 0.8901940862497303, "grad_norm": 35.0, "learning_rate": 9.860907214354538e-06, "loss": 19.145, "step": 47970 }, { "epoch": 0.8903796593342101, "grad_norm": 39.1875, "learning_rate": 9.860878218568496e-06, "loss": 18.6372, "step": 47980 }, { "epoch": 0.89056523241869, "grad_norm": 36.1875, "learning_rate": 9.860849222782453e-06, "loss": 19.0401, "step": 47990 }, { "epoch": 0.8907508055031699, "grad_norm": 36.03125, "learning_rate": 9.860820226996412e-06, "loss": 19.0229, "step": 48000 }, { "epoch": 0.8909363785876496, "grad_norm": 36.125, "learning_rate": 9.86079123121037e-06, "loss": 18.9806, "step": 48010 }, { "epoch": 0.8911219516721295, "grad_norm": 36.25, "learning_rate": 9.860762235424327e-06, "loss": 19.2438, "step": 48020 }, { "epoch": 0.8913075247566093, "grad_norm": 36.875, "learning_rate": 9.860733239638285e-06, "loss": 18.9621, "step": 48030 }, { "epoch": 0.8914930978410891, "grad_norm": 33.46875, "learning_rate": 9.860704243852242e-06, "loss": 18.8874, "step": 48040 }, { "epoch": 0.891678670925569, "grad_norm": 35.65625, "learning_rate": 9.8606752480662e-06, "loss": 18.5955, "step": 48050 }, { "epoch": 0.8918642440100488, "grad_norm": 35.21875, "learning_rate": 9.860646252280157e-06, "loss": 19.0383, "step": 48060 }, { "epoch": 0.8920498170945286, "grad_norm": 39.28125, "learning_rate": 9.860617256494114e-06, "loss": 19.1283, "step": 48070 }, { "epoch": 0.8922353901790084, "grad_norm": 36.65625, "learning_rate": 9.860588260708072e-06, "loss": 18.4688, "step": 48080 }, { "epoch": 0.8924209632634883, "grad_norm": 35.59375, "learning_rate": 9.860559264922029e-06, "loss": 18.8262, "step": 48090 }, { "epoch": 0.892606536347968, "grad_norm": 35.53125, "learning_rate": 9.860530269135988e-06, "loss": 18.8162, "step": 48100 }, { "epoch": 0.8927921094324479, "grad_norm": 36.75, "learning_rate": 9.860501273349945e-06, "loss": 18.8916, "step": 48110 }, { "epoch": 0.8929776825169278, "grad_norm": 36.90625, "learning_rate": 9.860472277563903e-06, "loss": 18.898, "step": 48120 }, { "epoch": 0.8931632556014075, "grad_norm": 39.03125, "learning_rate": 9.86044328177786e-06, "loss": 18.7842, "step": 48130 }, { "epoch": 0.8933488286858874, "grad_norm": 35.375, "learning_rate": 9.860414285991818e-06, "loss": 19.06, "step": 48140 }, { "epoch": 0.8935344017703672, "grad_norm": 34.03125, "learning_rate": 9.860385290205775e-06, "loss": 18.5382, "step": 48150 }, { "epoch": 0.893719974854847, "grad_norm": 34.25, "learning_rate": 9.860356294419732e-06, "loss": 19.0277, "step": 48160 }, { "epoch": 0.8939055479393269, "grad_norm": 36.21875, "learning_rate": 9.86032729863369e-06, "loss": 18.6719, "step": 48170 }, { "epoch": 0.8940911210238067, "grad_norm": 35.3125, "learning_rate": 9.860298302847649e-06, "loss": 18.9148, "step": 48180 }, { "epoch": 0.8942766941082866, "grad_norm": 35.78125, "learning_rate": 9.860269307061605e-06, "loss": 19.3434, "step": 48190 }, { "epoch": 0.8944622671927663, "grad_norm": 37.125, "learning_rate": 9.860240311275562e-06, "loss": 18.9732, "step": 48200 }, { "epoch": 0.8946478402772462, "grad_norm": 34.59375, "learning_rate": 9.860211315489521e-06, "loss": 18.8076, "step": 48210 }, { "epoch": 0.8948334133617261, "grad_norm": 36.90625, "learning_rate": 9.860182319703479e-06, "loss": 18.5892, "step": 48220 }, { "epoch": 0.8950189864462058, "grad_norm": 38.1875, "learning_rate": 9.860153323917436e-06, "loss": 18.7777, "step": 48230 }, { "epoch": 0.8952045595306857, "grad_norm": 35.84375, "learning_rate": 9.860124328131393e-06, "loss": 18.6854, "step": 48240 }, { "epoch": 0.8953901326151655, "grad_norm": 36.96875, "learning_rate": 9.86009533234535e-06, "loss": 18.9375, "step": 48250 }, { "epoch": 0.8955757056996453, "grad_norm": 36.5625, "learning_rate": 9.860066336559308e-06, "loss": 18.5619, "step": 48260 }, { "epoch": 0.8957612787841251, "grad_norm": 35.46875, "learning_rate": 9.860037340773266e-06, "loss": 18.8577, "step": 48270 }, { "epoch": 0.895946851868605, "grad_norm": 35.0, "learning_rate": 9.860008344987225e-06, "loss": 19.1907, "step": 48280 }, { "epoch": 0.8961324249530848, "grad_norm": 36.25, "learning_rate": 9.859979349201182e-06, "loss": 18.8677, "step": 48290 }, { "epoch": 0.8963179980375646, "grad_norm": 36.375, "learning_rate": 9.859950353415138e-06, "loss": 18.9737, "step": 48300 }, { "epoch": 0.8965035711220445, "grad_norm": 36.71875, "learning_rate": 9.859921357629097e-06, "loss": 19.0863, "step": 48310 }, { "epoch": 0.8966891442065242, "grad_norm": 35.875, "learning_rate": 9.859892361843054e-06, "loss": 18.5042, "step": 48320 }, { "epoch": 0.8968747172910041, "grad_norm": 37.15625, "learning_rate": 9.859863366057012e-06, "loss": 19.0693, "step": 48330 }, { "epoch": 0.897060290375484, "grad_norm": 37.5, "learning_rate": 9.85983437027097e-06, "loss": 18.991, "step": 48340 }, { "epoch": 0.8972458634599638, "grad_norm": 35.125, "learning_rate": 9.859805374484927e-06, "loss": 18.5443, "step": 48350 }, { "epoch": 0.8974314365444436, "grad_norm": 39.0625, "learning_rate": 9.859776378698884e-06, "loss": 18.6623, "step": 48360 }, { "epoch": 0.8976170096289234, "grad_norm": 37.15625, "learning_rate": 9.859747382912841e-06, "loss": 18.8904, "step": 48370 }, { "epoch": 0.8978025827134033, "grad_norm": 35.71875, "learning_rate": 9.8597183871268e-06, "loss": 18.4478, "step": 48380 }, { "epoch": 0.897988155797883, "grad_norm": 35.6875, "learning_rate": 9.859689391340758e-06, "loss": 18.6417, "step": 48390 }, { "epoch": 0.8981737288823629, "grad_norm": 36.71875, "learning_rate": 9.859660395554714e-06, "loss": 18.5121, "step": 48400 }, { "epoch": 0.8983593019668428, "grad_norm": 38.53125, "learning_rate": 9.859631399768673e-06, "loss": 18.3646, "step": 48410 }, { "epoch": 0.8985448750513225, "grad_norm": 37.15625, "learning_rate": 9.85960240398263e-06, "loss": 18.9429, "step": 48420 }, { "epoch": 0.8987304481358024, "grad_norm": 36.46875, "learning_rate": 9.859573408196588e-06, "loss": 19.2573, "step": 48430 }, { "epoch": 0.8989160212202822, "grad_norm": 37.28125, "learning_rate": 9.859544412410545e-06, "loss": 19.204, "step": 48440 }, { "epoch": 0.899101594304762, "grad_norm": 37.8125, "learning_rate": 9.859515416624504e-06, "loss": 18.7448, "step": 48450 }, { "epoch": 0.8992871673892419, "grad_norm": 35.6875, "learning_rate": 9.85948642083846e-06, "loss": 19.055, "step": 48460 }, { "epoch": 0.8994727404737217, "grad_norm": 37.71875, "learning_rate": 9.859457425052417e-06, "loss": 18.3525, "step": 48470 }, { "epoch": 0.8996583135582015, "grad_norm": 35.1875, "learning_rate": 9.859428429266376e-06, "loss": 18.8309, "step": 48480 }, { "epoch": 0.8998438866426813, "grad_norm": 34.84375, "learning_rate": 9.859399433480334e-06, "loss": 18.8916, "step": 48490 }, { "epoch": 0.9000294597271612, "grad_norm": 38.09375, "learning_rate": 9.859370437694291e-06, "loss": 19.0495, "step": 48500 }, { "epoch": 0.900215032811641, "grad_norm": 38.71875, "learning_rate": 9.859341441908249e-06, "loss": 19.1123, "step": 48510 }, { "epoch": 0.9004006058961208, "grad_norm": 39.46875, "learning_rate": 9.859312446122206e-06, "loss": 19.1905, "step": 48520 }, { "epoch": 0.9005861789806007, "grad_norm": 39.09375, "learning_rate": 9.859283450336163e-06, "loss": 19.0494, "step": 48530 }, { "epoch": 0.9007717520650805, "grad_norm": 35.4375, "learning_rate": 9.85925445455012e-06, "loss": 18.9496, "step": 48540 }, { "epoch": 0.9009573251495603, "grad_norm": 36.03125, "learning_rate": 9.85922545876408e-06, "loss": 18.5396, "step": 48550 }, { "epoch": 0.9011428982340401, "grad_norm": 38.8125, "learning_rate": 9.859196462978036e-06, "loss": 18.9276, "step": 48560 }, { "epoch": 0.90132847131852, "grad_norm": 39.15625, "learning_rate": 9.859167467191993e-06, "loss": 18.6237, "step": 48570 }, { "epoch": 0.9015140444029998, "grad_norm": 38.1875, "learning_rate": 9.859138471405952e-06, "loss": 19.0431, "step": 48580 }, { "epoch": 0.9016996174874796, "grad_norm": 35.53125, "learning_rate": 9.85910947561991e-06, "loss": 19.2482, "step": 48590 }, { "epoch": 0.9018851905719595, "grad_norm": 36.4375, "learning_rate": 9.859080479833867e-06, "loss": 19.0075, "step": 48600 }, { "epoch": 0.9020707636564392, "grad_norm": 36.125, "learning_rate": 9.859051484047824e-06, "loss": 19.0288, "step": 48610 }, { "epoch": 0.9022563367409191, "grad_norm": 37.6875, "learning_rate": 9.859022488261782e-06, "loss": 18.5252, "step": 48620 }, { "epoch": 0.902441909825399, "grad_norm": 37.0, "learning_rate": 9.858993492475739e-06, "loss": 19.3277, "step": 48630 }, { "epoch": 0.9026274829098787, "grad_norm": 35.8125, "learning_rate": 9.858964496689697e-06, "loss": 18.7398, "step": 48640 }, { "epoch": 0.9028130559943586, "grad_norm": 37.8125, "learning_rate": 9.858935500903654e-06, "loss": 19.119, "step": 48650 }, { "epoch": 0.9029986290788384, "grad_norm": 37.46875, "learning_rate": 9.858906505117613e-06, "loss": 18.9497, "step": 48660 }, { "epoch": 0.9031842021633182, "grad_norm": 35.3125, "learning_rate": 9.858877509331569e-06, "loss": 18.5547, "step": 48670 }, { "epoch": 0.903369775247798, "grad_norm": 38.53125, "learning_rate": 9.858848513545526e-06, "loss": 19.0391, "step": 48680 }, { "epoch": 0.9035553483322779, "grad_norm": 33.28125, "learning_rate": 9.858819517759485e-06, "loss": 18.7029, "step": 48690 }, { "epoch": 0.9037409214167577, "grad_norm": 37.46875, "learning_rate": 9.858790521973443e-06, "loss": 18.9116, "step": 48700 }, { "epoch": 0.9039264945012375, "grad_norm": 36.5, "learning_rate": 9.8587615261874e-06, "loss": 19.0272, "step": 48710 }, { "epoch": 0.9041120675857174, "grad_norm": 36.1875, "learning_rate": 9.858732530401357e-06, "loss": 19.181, "step": 48720 }, { "epoch": 0.9042976406701972, "grad_norm": 36.96875, "learning_rate": 9.858703534615315e-06, "loss": 18.9267, "step": 48730 }, { "epoch": 0.904483213754677, "grad_norm": 36.96875, "learning_rate": 9.858674538829272e-06, "loss": 19.3512, "step": 48740 }, { "epoch": 0.9046687868391569, "grad_norm": 37.53125, "learning_rate": 9.85864554304323e-06, "loss": 19.0552, "step": 48750 }, { "epoch": 0.9048543599236367, "grad_norm": 37.8125, "learning_rate": 9.858616547257189e-06, "loss": 18.5666, "step": 48760 }, { "epoch": 0.9050399330081165, "grad_norm": 34.96875, "learning_rate": 9.858587551471146e-06, "loss": 18.9902, "step": 48770 }, { "epoch": 0.9052255060925963, "grad_norm": 36.59375, "learning_rate": 9.858558555685102e-06, "loss": 19.0585, "step": 48780 }, { "epoch": 0.9054110791770762, "grad_norm": 36.375, "learning_rate": 9.858529559899061e-06, "loss": 18.8288, "step": 48790 }, { "epoch": 0.905596652261556, "grad_norm": 36.8125, "learning_rate": 9.858500564113018e-06, "loss": 18.8442, "step": 48800 }, { "epoch": 0.9057822253460358, "grad_norm": 36.625, "learning_rate": 9.858471568326976e-06, "loss": 18.3578, "step": 48810 }, { "epoch": 0.9059677984305157, "grad_norm": 39.15625, "learning_rate": 9.858442572540933e-06, "loss": 18.6956, "step": 48820 }, { "epoch": 0.9061533715149954, "grad_norm": 36.65625, "learning_rate": 9.85841357675489e-06, "loss": 18.7518, "step": 48830 }, { "epoch": 0.9063389445994753, "grad_norm": 39.1875, "learning_rate": 9.858384580968848e-06, "loss": 19.176, "step": 48840 }, { "epoch": 0.9065245176839551, "grad_norm": 39.0625, "learning_rate": 9.858355585182805e-06, "loss": 18.8138, "step": 48850 }, { "epoch": 0.9067100907684349, "grad_norm": 34.03125, "learning_rate": 9.858326589396765e-06, "loss": 19.1294, "step": 48860 }, { "epoch": 0.9068956638529148, "grad_norm": 37.3125, "learning_rate": 9.858297593610722e-06, "loss": 19.0567, "step": 48870 }, { "epoch": 0.9070812369373946, "grad_norm": 35.8125, "learning_rate": 9.85826859782468e-06, "loss": 18.7387, "step": 48880 }, { "epoch": 0.9072668100218745, "grad_norm": 36.96875, "learning_rate": 9.858239602038637e-06, "loss": 19.0438, "step": 48890 }, { "epoch": 0.9074523831063542, "grad_norm": 34.25, "learning_rate": 9.858210606252594e-06, "loss": 18.7993, "step": 48900 }, { "epoch": 0.9076379561908341, "grad_norm": 37.03125, "learning_rate": 9.858181610466552e-06, "loss": 19.0855, "step": 48910 }, { "epoch": 0.907823529275314, "grad_norm": 35.625, "learning_rate": 9.858152614680509e-06, "loss": 18.388, "step": 48920 }, { "epoch": 0.9080091023597937, "grad_norm": 38.625, "learning_rate": 9.858123618894468e-06, "loss": 19.0493, "step": 48930 }, { "epoch": 0.9081946754442736, "grad_norm": 35.6875, "learning_rate": 9.858094623108424e-06, "loss": 18.9766, "step": 48940 }, { "epoch": 0.9083802485287534, "grad_norm": 36.40625, "learning_rate": 9.858065627322381e-06, "loss": 18.6864, "step": 48950 }, { "epoch": 0.9085658216132332, "grad_norm": 37.9375, "learning_rate": 9.85803663153634e-06, "loss": 18.833, "step": 48960 }, { "epoch": 0.908751394697713, "grad_norm": 37.125, "learning_rate": 9.858007635750298e-06, "loss": 18.6554, "step": 48970 }, { "epoch": 0.9089369677821929, "grad_norm": 34.1875, "learning_rate": 9.857978639964255e-06, "loss": 18.564, "step": 48980 }, { "epoch": 0.9091225408666727, "grad_norm": 35.6875, "learning_rate": 9.857949644178213e-06, "loss": 18.8914, "step": 48990 }, { "epoch": 0.9093081139511525, "grad_norm": 34.71875, "learning_rate": 9.85792064839217e-06, "loss": 19.0573, "step": 49000 }, { "epoch": 0.9094936870356324, "grad_norm": 33.625, "learning_rate": 9.857891652606127e-06, "loss": 18.3762, "step": 49010 }, { "epoch": 0.9096792601201121, "grad_norm": 37.6875, "learning_rate": 9.857862656820085e-06, "loss": 19.0526, "step": 49020 }, { "epoch": 0.909864833204592, "grad_norm": 37.5, "learning_rate": 9.857833661034044e-06, "loss": 18.6206, "step": 49030 }, { "epoch": 0.9100504062890719, "grad_norm": 36.90625, "learning_rate": 9.857804665248001e-06, "loss": 18.535, "step": 49040 }, { "epoch": 0.9102359793735516, "grad_norm": 36.25, "learning_rate": 9.857775669461957e-06, "loss": 18.6812, "step": 49050 }, { "epoch": 0.9104215524580315, "grad_norm": 36.1875, "learning_rate": 9.857746673675916e-06, "loss": 18.8887, "step": 49060 }, { "epoch": 0.9106071255425113, "grad_norm": 35.5625, "learning_rate": 9.857717677889873e-06, "loss": 18.5824, "step": 49070 }, { "epoch": 0.9107926986269912, "grad_norm": 35.5, "learning_rate": 9.857688682103831e-06, "loss": 18.4892, "step": 49080 }, { "epoch": 0.9109782717114709, "grad_norm": 35.875, "learning_rate": 9.857659686317788e-06, "loss": 18.8687, "step": 49090 }, { "epoch": 0.9111638447959508, "grad_norm": 35.03125, "learning_rate": 9.857630690531746e-06, "loss": 19.2528, "step": 49100 }, { "epoch": 0.9113494178804307, "grad_norm": 37.1875, "learning_rate": 9.857601694745703e-06, "loss": 18.4546, "step": 49110 }, { "epoch": 0.9115349909649104, "grad_norm": 36.5625, "learning_rate": 9.85757269895966e-06, "loss": 18.619, "step": 49120 }, { "epoch": 0.9117205640493903, "grad_norm": 37.625, "learning_rate": 9.857543703173618e-06, "loss": 18.5371, "step": 49130 }, { "epoch": 0.9119061371338701, "grad_norm": 36.5, "learning_rate": 9.857514707387577e-06, "loss": 19.0488, "step": 49140 }, { "epoch": 0.9120917102183499, "grad_norm": 36.3125, "learning_rate": 9.857485711601533e-06, "loss": 18.4348, "step": 49150 }, { "epoch": 0.9122772833028298, "grad_norm": 35.53125, "learning_rate": 9.857456715815492e-06, "loss": 19.2483, "step": 49160 }, { "epoch": 0.9124628563873096, "grad_norm": 37.90625, "learning_rate": 9.85742772002945e-06, "loss": 18.7975, "step": 49170 }, { "epoch": 0.9126484294717894, "grad_norm": 35.71875, "learning_rate": 9.857398724243407e-06, "loss": 18.9149, "step": 49180 }, { "epoch": 0.9128340025562692, "grad_norm": 36.125, "learning_rate": 9.857369728457364e-06, "loss": 18.691, "step": 49190 }, { "epoch": 0.9130195756407491, "grad_norm": 36.90625, "learning_rate": 9.857340732671321e-06, "loss": 18.4709, "step": 49200 }, { "epoch": 0.9132051487252288, "grad_norm": 34.5625, "learning_rate": 9.857311736885279e-06, "loss": 18.5962, "step": 49210 }, { "epoch": 0.9133907218097087, "grad_norm": 35.375, "learning_rate": 9.857282741099236e-06, "loss": 18.9627, "step": 49220 }, { "epoch": 0.9135762948941886, "grad_norm": 34.8125, "learning_rate": 9.857253745313194e-06, "loss": 18.841, "step": 49230 }, { "epoch": 0.9137618679786683, "grad_norm": 34.8125, "learning_rate": 9.857224749527153e-06, "loss": 18.694, "step": 49240 }, { "epoch": 0.9139474410631482, "grad_norm": 37.6875, "learning_rate": 9.85719575374111e-06, "loss": 19.1713, "step": 49250 }, { "epoch": 0.914133014147628, "grad_norm": 35.625, "learning_rate": 9.857166757955066e-06, "loss": 18.9923, "step": 49260 }, { "epoch": 0.9143185872321079, "grad_norm": 36.78125, "learning_rate": 9.857137762169025e-06, "loss": 18.8704, "step": 49270 }, { "epoch": 0.9145041603165877, "grad_norm": 35.875, "learning_rate": 9.857108766382982e-06, "loss": 19.2151, "step": 49280 }, { "epoch": 0.9146897334010675, "grad_norm": 35.71875, "learning_rate": 9.85707977059694e-06, "loss": 18.6477, "step": 49290 }, { "epoch": 0.9148753064855474, "grad_norm": 36.875, "learning_rate": 9.857050774810897e-06, "loss": 18.5986, "step": 49300 }, { "epoch": 0.9150608795700271, "grad_norm": 35.375, "learning_rate": 9.857021779024855e-06, "loss": 18.695, "step": 49310 }, { "epoch": 0.915246452654507, "grad_norm": 36.0, "learning_rate": 9.856992783238812e-06, "loss": 18.5976, "step": 49320 }, { "epoch": 0.9154320257389869, "grad_norm": 36.28125, "learning_rate": 9.85696378745277e-06, "loss": 18.6872, "step": 49330 }, { "epoch": 0.9156175988234666, "grad_norm": 37.875, "learning_rate": 9.856934791666729e-06, "loss": 18.8967, "step": 49340 }, { "epoch": 0.9158031719079465, "grad_norm": 35.59375, "learning_rate": 9.856905795880686e-06, "loss": 18.6512, "step": 49350 }, { "epoch": 0.9159887449924263, "grad_norm": 35.9375, "learning_rate": 9.856876800094643e-06, "loss": 18.7375, "step": 49360 }, { "epoch": 0.9161743180769061, "grad_norm": 36.40625, "learning_rate": 9.8568478043086e-06, "loss": 18.6419, "step": 49370 }, { "epoch": 0.9163598911613859, "grad_norm": 35.0, "learning_rate": 9.856818808522558e-06, "loss": 18.6441, "step": 49380 }, { "epoch": 0.9165454642458658, "grad_norm": 35.25, "learning_rate": 9.856789812736516e-06, "loss": 19.2166, "step": 49390 }, { "epoch": 0.9167310373303456, "grad_norm": 36.1875, "learning_rate": 9.856760816950473e-06, "loss": 18.9291, "step": 49400 }, { "epoch": 0.9169166104148254, "grad_norm": 38.6875, "learning_rate": 9.856731821164432e-06, "loss": 18.5892, "step": 49410 }, { "epoch": 0.9171021834993053, "grad_norm": 37.28125, "learning_rate": 9.856702825378388e-06, "loss": 18.7727, "step": 49420 }, { "epoch": 0.9172877565837851, "grad_norm": 35.40625, "learning_rate": 9.856673829592345e-06, "loss": 19.09, "step": 49430 }, { "epoch": 0.9174733296682649, "grad_norm": 37.1875, "learning_rate": 9.856644833806304e-06, "loss": 18.3979, "step": 49440 }, { "epoch": 0.9176589027527448, "grad_norm": 35.25, "learning_rate": 9.856615838020262e-06, "loss": 18.9984, "step": 49450 }, { "epoch": 0.9178444758372246, "grad_norm": 35.875, "learning_rate": 9.856586842234219e-06, "loss": 18.7745, "step": 49460 }, { "epoch": 0.9180300489217044, "grad_norm": 36.625, "learning_rate": 9.856557846448177e-06, "loss": 18.9508, "step": 49470 }, { "epoch": 0.9182156220061842, "grad_norm": 36.5625, "learning_rate": 9.856528850662134e-06, "loss": 18.8949, "step": 49480 }, { "epoch": 0.9184011950906641, "grad_norm": 37.5, "learning_rate": 9.856499854876091e-06, "loss": 19.3296, "step": 49490 }, { "epoch": 0.9185867681751438, "grad_norm": 35.71875, "learning_rate": 9.856470859090049e-06, "loss": 19.2355, "step": 49500 }, { "epoch": 0.9187723412596237, "grad_norm": 36.03125, "learning_rate": 9.856441863304008e-06, "loss": 18.7253, "step": 49510 }, { "epoch": 0.9189579143441036, "grad_norm": 36.90625, "learning_rate": 9.856412867517965e-06, "loss": 18.6996, "step": 49520 }, { "epoch": 0.9191434874285833, "grad_norm": 36.34375, "learning_rate": 9.856383871731921e-06, "loss": 19.0339, "step": 49530 }, { "epoch": 0.9193290605130632, "grad_norm": 35.59375, "learning_rate": 9.85635487594588e-06, "loss": 18.3551, "step": 49540 }, { "epoch": 0.919514633597543, "grad_norm": 37.1875, "learning_rate": 9.856325880159837e-06, "loss": 18.76, "step": 49550 }, { "epoch": 0.9197002066820228, "grad_norm": 36.5625, "learning_rate": 9.856296884373795e-06, "loss": 18.2547, "step": 49560 }, { "epoch": 0.9198857797665027, "grad_norm": 37.5, "learning_rate": 9.856267888587752e-06, "loss": 18.8997, "step": 49570 }, { "epoch": 0.9200713528509825, "grad_norm": 36.78125, "learning_rate": 9.85623889280171e-06, "loss": 18.4645, "step": 49580 }, { "epoch": 0.9202569259354623, "grad_norm": 36.78125, "learning_rate": 9.856209897015667e-06, "loss": 18.8955, "step": 49590 }, { "epoch": 0.9204424990199421, "grad_norm": 37.59375, "learning_rate": 9.856180901229625e-06, "loss": 18.3526, "step": 49600 }, { "epoch": 0.920628072104422, "grad_norm": 36.53125, "learning_rate": 9.856151905443584e-06, "loss": 19.0135, "step": 49610 }, { "epoch": 0.9208136451889019, "grad_norm": 36.28125, "learning_rate": 9.856122909657541e-06, "loss": 18.5804, "step": 49620 }, { "epoch": 0.9209992182733816, "grad_norm": 37.40625, "learning_rate": 9.856093913871498e-06, "loss": 18.6843, "step": 49630 }, { "epoch": 0.9211847913578615, "grad_norm": 34.40625, "learning_rate": 9.856064918085456e-06, "loss": 19.2493, "step": 49640 }, { "epoch": 0.9213703644423413, "grad_norm": 36.875, "learning_rate": 9.856035922299413e-06, "loss": 18.6309, "step": 49650 }, { "epoch": 0.9215559375268211, "grad_norm": 37.5, "learning_rate": 9.85600692651337e-06, "loss": 18.8726, "step": 49660 }, { "epoch": 0.9217415106113009, "grad_norm": 36.375, "learning_rate": 9.855977930727328e-06, "loss": 18.5765, "step": 49670 }, { "epoch": 0.9219270836957808, "grad_norm": 36.21875, "learning_rate": 9.855948934941285e-06, "loss": 18.8799, "step": 49680 }, { "epoch": 0.9221126567802606, "grad_norm": 35.84375, "learning_rate": 9.855919939155243e-06, "loss": 18.758, "step": 49690 }, { "epoch": 0.9222982298647404, "grad_norm": 36.96875, "learning_rate": 9.8558909433692e-06, "loss": 18.7222, "step": 49700 }, { "epoch": 0.9224838029492203, "grad_norm": 35.78125, "learning_rate": 9.855861947583158e-06, "loss": 18.7792, "step": 49710 }, { "epoch": 0.9226693760337, "grad_norm": 35.40625, "learning_rate": 9.855832951797117e-06, "loss": 18.7375, "step": 49720 }, { "epoch": 0.9228549491181799, "grad_norm": 36.9375, "learning_rate": 9.855803956011074e-06, "loss": 18.734, "step": 49730 }, { "epoch": 0.9230405222026598, "grad_norm": 37.46875, "learning_rate": 9.855774960225032e-06, "loss": 18.7155, "step": 49740 }, { "epoch": 0.9232260952871395, "grad_norm": 38.0, "learning_rate": 9.855745964438989e-06, "loss": 19.2284, "step": 49750 }, { "epoch": 0.9234116683716194, "grad_norm": 38.1875, "learning_rate": 9.855716968652946e-06, "loss": 19.4265, "step": 49760 }, { "epoch": 0.9235972414560992, "grad_norm": 36.46875, "learning_rate": 9.855687972866904e-06, "loss": 18.7159, "step": 49770 }, { "epoch": 0.9237828145405791, "grad_norm": 37.53125, "learning_rate": 9.855658977080861e-06, "loss": 18.4277, "step": 49780 }, { "epoch": 0.9239683876250588, "grad_norm": 37.15625, "learning_rate": 9.85562998129482e-06, "loss": 18.7183, "step": 49790 }, { "epoch": 0.9241539607095387, "grad_norm": 36.03125, "learning_rate": 9.855600985508776e-06, "loss": 18.8495, "step": 49800 }, { "epoch": 0.9243395337940186, "grad_norm": 36.0625, "learning_rate": 9.855571989722733e-06, "loss": 19.4189, "step": 49810 }, { "epoch": 0.9245251068784983, "grad_norm": 37.625, "learning_rate": 9.855542993936693e-06, "loss": 18.4683, "step": 49820 }, { "epoch": 0.9247106799629782, "grad_norm": 37.09375, "learning_rate": 9.85551399815065e-06, "loss": 19.0064, "step": 49830 }, { "epoch": 0.924896253047458, "grad_norm": 38.1875, "learning_rate": 9.855485002364607e-06, "loss": 18.644, "step": 49840 }, { "epoch": 0.9250818261319378, "grad_norm": 37.25, "learning_rate": 9.855456006578565e-06, "loss": 19.2591, "step": 49850 }, { "epoch": 0.9252673992164177, "grad_norm": 36.90625, "learning_rate": 9.855427010792522e-06, "loss": 18.8081, "step": 49860 }, { "epoch": 0.9254529723008975, "grad_norm": 36.625, "learning_rate": 9.85539801500648e-06, "loss": 19.0542, "step": 49870 }, { "epoch": 0.9256385453853773, "grad_norm": 35.8125, "learning_rate": 9.855369019220437e-06, "loss": 18.7709, "step": 49880 }, { "epoch": 0.9258241184698571, "grad_norm": 38.25, "learning_rate": 9.855340023434396e-06, "loss": 19.0913, "step": 49890 }, { "epoch": 0.926009691554337, "grad_norm": 35.8125, "learning_rate": 9.855311027648352e-06, "loss": 18.8659, "step": 49900 }, { "epoch": 0.9261952646388167, "grad_norm": 37.15625, "learning_rate": 9.85528203186231e-06, "loss": 18.3361, "step": 49910 }, { "epoch": 0.9263808377232966, "grad_norm": 34.78125, "learning_rate": 9.855253036076268e-06, "loss": 19.2276, "step": 49920 }, { "epoch": 0.9265664108077765, "grad_norm": 37.34375, "learning_rate": 9.855224040290226e-06, "loss": 18.7894, "step": 49930 }, { "epoch": 0.9267519838922562, "grad_norm": 35.96875, "learning_rate": 9.855195044504183e-06, "loss": 18.6204, "step": 49940 }, { "epoch": 0.9269375569767361, "grad_norm": 36.90625, "learning_rate": 9.85516604871814e-06, "loss": 18.4182, "step": 49950 }, { "epoch": 0.9271231300612159, "grad_norm": 35.53125, "learning_rate": 9.855137052932098e-06, "loss": 18.6139, "step": 49960 }, { "epoch": 0.9273087031456958, "grad_norm": 37.53125, "learning_rate": 9.855108057146055e-06, "loss": 18.7786, "step": 49970 }, { "epoch": 0.9274942762301756, "grad_norm": 37.375, "learning_rate": 9.855079061360013e-06, "loss": 18.8478, "step": 49980 }, { "epoch": 0.9276798493146554, "grad_norm": 34.125, "learning_rate": 9.855050065573972e-06, "loss": 18.4067, "step": 49990 }, { "epoch": 0.9278654223991353, "grad_norm": 36.40625, "learning_rate": 9.85502106978793e-06, "loss": 18.7262, "step": 50000 }, { "epoch": 0.9278654223991353, "eval_loss": 2.3473334312438965, "eval_runtime": 455.3845, "eval_samples_per_second": 3188.771, "eval_steps_per_second": 49.826, "step": 50000 }, { "epoch": 0.928050995483615, "grad_norm": 36.09375, "learning_rate": 9.854992074001885e-06, "loss": 18.631, "step": 50010 }, { "epoch": 0.9282365685680949, "grad_norm": 38.5, "learning_rate": 9.854963078215844e-06, "loss": 18.7004, "step": 50020 }, { "epoch": 0.9284221416525748, "grad_norm": 37.15625, "learning_rate": 9.854934082429802e-06, "loss": 18.9797, "step": 50030 }, { "epoch": 0.9286077147370545, "grad_norm": 37.1875, "learning_rate": 9.854905086643759e-06, "loss": 18.6544, "step": 50040 }, { "epoch": 0.9287932878215344, "grad_norm": 36.0625, "learning_rate": 9.854876090857716e-06, "loss": 18.7909, "step": 50050 }, { "epoch": 0.9289788609060142, "grad_norm": 38.3125, "learning_rate": 9.854847095071675e-06, "loss": 19.1664, "step": 50060 }, { "epoch": 0.929164433990494, "grad_norm": 37.15625, "learning_rate": 9.854818099285631e-06, "loss": 18.7882, "step": 50070 }, { "epoch": 0.9293500070749738, "grad_norm": 34.5625, "learning_rate": 9.854789103499589e-06, "loss": 18.8722, "step": 50080 }, { "epoch": 0.9295355801594537, "grad_norm": 39.28125, "learning_rate": 9.854760107713548e-06, "loss": 18.4605, "step": 50090 }, { "epoch": 0.9297211532439335, "grad_norm": 36.0625, "learning_rate": 9.854731111927505e-06, "loss": 18.957, "step": 50100 }, { "epoch": 0.9299067263284133, "grad_norm": 36.9375, "learning_rate": 9.854702116141462e-06, "loss": 18.9987, "step": 50110 }, { "epoch": 0.9300922994128932, "grad_norm": 37.09375, "learning_rate": 9.85467312035542e-06, "loss": 18.3386, "step": 50120 }, { "epoch": 0.9302778724973729, "grad_norm": 36.78125, "learning_rate": 9.854644124569377e-06, "loss": 18.7949, "step": 50130 }, { "epoch": 0.9304634455818528, "grad_norm": 35.59375, "learning_rate": 9.854615128783335e-06, "loss": 18.5927, "step": 50140 }, { "epoch": 0.9306490186663327, "grad_norm": 35.78125, "learning_rate": 9.854586132997292e-06, "loss": 18.7013, "step": 50150 }, { "epoch": 0.9308345917508125, "grad_norm": 37.1875, "learning_rate": 9.85455713721125e-06, "loss": 18.6819, "step": 50160 }, { "epoch": 0.9310201648352923, "grad_norm": 36.3125, "learning_rate": 9.854528141425207e-06, "loss": 19.1635, "step": 50170 }, { "epoch": 0.9312057379197721, "grad_norm": 34.40625, "learning_rate": 9.854499145639164e-06, "loss": 18.2726, "step": 50180 }, { "epoch": 0.931391311004252, "grad_norm": 37.28125, "learning_rate": 9.854470149853122e-06, "loss": 18.9638, "step": 50190 }, { "epoch": 0.9315768840887317, "grad_norm": 35.28125, "learning_rate": 9.85444115406708e-06, "loss": 19.0953, "step": 50200 }, { "epoch": 0.9317624571732116, "grad_norm": 38.5, "learning_rate": 9.854412158281038e-06, "loss": 18.7024, "step": 50210 }, { "epoch": 0.9319480302576915, "grad_norm": 37.0, "learning_rate": 9.854383162494996e-06, "loss": 18.825, "step": 50220 }, { "epoch": 0.9321336033421712, "grad_norm": 36.59375, "learning_rate": 9.854354166708953e-06, "loss": 19.0501, "step": 50230 }, { "epoch": 0.9323191764266511, "grad_norm": 35.90625, "learning_rate": 9.85432517092291e-06, "loss": 18.4288, "step": 50240 }, { "epoch": 0.9325047495111309, "grad_norm": 35.90625, "learning_rate": 9.854296175136868e-06, "loss": 19.0825, "step": 50250 }, { "epoch": 0.9326903225956107, "grad_norm": 38.90625, "learning_rate": 9.854267179350825e-06, "loss": 19.0764, "step": 50260 }, { "epoch": 0.9328758956800905, "grad_norm": 37.75, "learning_rate": 9.854238183564784e-06, "loss": 18.8452, "step": 50270 }, { "epoch": 0.9330614687645704, "grad_norm": 34.59375, "learning_rate": 9.85420918777874e-06, "loss": 18.7928, "step": 50280 }, { "epoch": 0.9332470418490502, "grad_norm": 33.78125, "learning_rate": 9.854180191992697e-06, "loss": 18.1697, "step": 50290 }, { "epoch": 0.93343261493353, "grad_norm": 37.875, "learning_rate": 9.854151196206657e-06, "loss": 18.7602, "step": 50300 }, { "epoch": 0.9336181880180099, "grad_norm": 35.84375, "learning_rate": 9.854122200420614e-06, "loss": 18.7859, "step": 50310 }, { "epoch": 0.9338037611024897, "grad_norm": 37.0625, "learning_rate": 9.854093204634571e-06, "loss": 18.5895, "step": 50320 }, { "epoch": 0.9339893341869695, "grad_norm": 34.59375, "learning_rate": 9.854064208848529e-06, "loss": 18.5902, "step": 50330 }, { "epoch": 0.9341749072714494, "grad_norm": 37.28125, "learning_rate": 9.854035213062486e-06, "loss": 19.0408, "step": 50340 }, { "epoch": 0.9343604803559292, "grad_norm": 34.125, "learning_rate": 9.854006217276444e-06, "loss": 18.3564, "step": 50350 }, { "epoch": 0.934546053440409, "grad_norm": 38.4375, "learning_rate": 9.853977221490401e-06, "loss": 18.8423, "step": 50360 }, { "epoch": 0.9347316265248888, "grad_norm": 35.8125, "learning_rate": 9.85394822570436e-06, "loss": 18.3884, "step": 50370 }, { "epoch": 0.9349171996093687, "grad_norm": 35.4375, "learning_rate": 9.853919229918318e-06, "loss": 19.1228, "step": 50380 }, { "epoch": 0.9351027726938484, "grad_norm": 38.21875, "learning_rate": 9.853890234132273e-06, "loss": 18.5342, "step": 50390 }, { "epoch": 0.9352883457783283, "grad_norm": 35.125, "learning_rate": 9.853861238346232e-06, "loss": 19.0061, "step": 50400 }, { "epoch": 0.9354739188628082, "grad_norm": 36.53125, "learning_rate": 9.85383224256019e-06, "loss": 18.631, "step": 50410 }, { "epoch": 0.9356594919472879, "grad_norm": 37.0625, "learning_rate": 9.853803246774147e-06, "loss": 18.9214, "step": 50420 }, { "epoch": 0.9358450650317678, "grad_norm": 38.3125, "learning_rate": 9.853774250988105e-06, "loss": 18.8773, "step": 50430 }, { "epoch": 0.9360306381162476, "grad_norm": 38.125, "learning_rate": 9.853745255202062e-06, "loss": 19.1509, "step": 50440 }, { "epoch": 0.9362162112007274, "grad_norm": 35.28125, "learning_rate": 9.85371625941602e-06, "loss": 18.7762, "step": 50450 }, { "epoch": 0.9364017842852073, "grad_norm": 37.09375, "learning_rate": 9.853687263629977e-06, "loss": 18.6366, "step": 50460 }, { "epoch": 0.9365873573696871, "grad_norm": 37.71875, "learning_rate": 9.853658267843936e-06, "loss": 18.7425, "step": 50470 }, { "epoch": 0.9367729304541669, "grad_norm": 36.28125, "learning_rate": 9.853629272057893e-06, "loss": 18.5633, "step": 50480 }, { "epoch": 0.9369585035386467, "grad_norm": 37.65625, "learning_rate": 9.853600276271849e-06, "loss": 18.5449, "step": 50490 }, { "epoch": 0.9371440766231266, "grad_norm": 36.34375, "learning_rate": 9.853571280485808e-06, "loss": 19.0544, "step": 50500 }, { "epoch": 0.9373296497076065, "grad_norm": 37.71875, "learning_rate": 9.853542284699766e-06, "loss": 18.8355, "step": 50510 }, { "epoch": 0.9375152227920862, "grad_norm": 37.03125, "learning_rate": 9.853513288913723e-06, "loss": 18.7822, "step": 50520 }, { "epoch": 0.9377007958765661, "grad_norm": 35.71875, "learning_rate": 9.85348429312768e-06, "loss": 18.5719, "step": 50530 }, { "epoch": 0.9378863689610459, "grad_norm": 35.84375, "learning_rate": 9.85345529734164e-06, "loss": 18.8113, "step": 50540 }, { "epoch": 0.9380719420455257, "grad_norm": 37.28125, "learning_rate": 9.853426301555595e-06, "loss": 18.984, "step": 50550 }, { "epoch": 0.9382575151300055, "grad_norm": 35.28125, "learning_rate": 9.853397305769553e-06, "loss": 18.7287, "step": 50560 }, { "epoch": 0.9384430882144854, "grad_norm": 35.96875, "learning_rate": 9.853368309983512e-06, "loss": 18.6256, "step": 50570 }, { "epoch": 0.9386286612989652, "grad_norm": 35.96875, "learning_rate": 9.853339314197469e-06, "loss": 18.8356, "step": 50580 }, { "epoch": 0.938814234383445, "grad_norm": 37.75, "learning_rate": 9.853310318411426e-06, "loss": 18.5802, "step": 50590 }, { "epoch": 0.9389998074679249, "grad_norm": 35.84375, "learning_rate": 9.853281322625384e-06, "loss": 18.9444, "step": 50600 }, { "epoch": 0.9391853805524046, "grad_norm": 37.4375, "learning_rate": 9.853252326839341e-06, "loss": 18.9265, "step": 50610 }, { "epoch": 0.9393709536368845, "grad_norm": 35.625, "learning_rate": 9.853223331053299e-06, "loss": 19.3268, "step": 50620 }, { "epoch": 0.9395565267213644, "grad_norm": 35.25, "learning_rate": 9.853194335267256e-06, "loss": 18.3115, "step": 50630 }, { "epoch": 0.9397420998058441, "grad_norm": 37.53125, "learning_rate": 9.853165339481214e-06, "loss": 19.0, "step": 50640 }, { "epoch": 0.939927672890324, "grad_norm": 36.4375, "learning_rate": 9.853136343695173e-06, "loss": 18.8586, "step": 50650 }, { "epoch": 0.9401132459748038, "grad_norm": 37.53125, "learning_rate": 9.853107347909128e-06, "loss": 18.5812, "step": 50660 }, { "epoch": 0.9402988190592837, "grad_norm": 37.25, "learning_rate": 9.853078352123087e-06, "loss": 18.7525, "step": 50670 }, { "epoch": 0.9404843921437634, "grad_norm": 37.4375, "learning_rate": 9.853049356337045e-06, "loss": 18.8099, "step": 50680 }, { "epoch": 0.9406699652282433, "grad_norm": 37.875, "learning_rate": 9.853020360551002e-06, "loss": 18.8271, "step": 50690 }, { "epoch": 0.9408555383127232, "grad_norm": 36.59375, "learning_rate": 9.85299136476496e-06, "loss": 18.7296, "step": 50700 }, { "epoch": 0.9410411113972029, "grad_norm": 36.75, "learning_rate": 9.852962368978917e-06, "loss": 18.7892, "step": 50710 }, { "epoch": 0.9412266844816828, "grad_norm": 35.6875, "learning_rate": 9.852933373192874e-06, "loss": 18.672, "step": 50720 }, { "epoch": 0.9414122575661626, "grad_norm": 37.28125, "learning_rate": 9.852904377406832e-06, "loss": 18.4865, "step": 50730 }, { "epoch": 0.9415978306506424, "grad_norm": 36.59375, "learning_rate": 9.85287538162079e-06, "loss": 18.6127, "step": 50740 }, { "epoch": 0.9417834037351223, "grad_norm": 35.0625, "learning_rate": 9.852846385834748e-06, "loss": 18.6142, "step": 50750 }, { "epoch": 0.9419689768196021, "grad_norm": 35.71875, "learning_rate": 9.852817390048704e-06, "loss": 18.4889, "step": 50760 }, { "epoch": 0.9421545499040819, "grad_norm": 36.03125, "learning_rate": 9.852788394262661e-06, "loss": 18.0942, "step": 50770 }, { "epoch": 0.9423401229885617, "grad_norm": 38.0625, "learning_rate": 9.85275939847662e-06, "loss": 18.5683, "step": 50780 }, { "epoch": 0.9425256960730416, "grad_norm": 37.0625, "learning_rate": 9.852730402690578e-06, "loss": 18.7116, "step": 50790 }, { "epoch": 0.9427112691575213, "grad_norm": 34.65625, "learning_rate": 9.852701406904535e-06, "loss": 18.6401, "step": 50800 }, { "epoch": 0.9428968422420012, "grad_norm": 36.125, "learning_rate": 9.852672411118493e-06, "loss": 19.084, "step": 50810 }, { "epoch": 0.9430824153264811, "grad_norm": 37.34375, "learning_rate": 9.85264341533245e-06, "loss": 18.4527, "step": 50820 }, { "epoch": 0.9432679884109608, "grad_norm": 34.96875, "learning_rate": 9.852614419546408e-06, "loss": 18.6703, "step": 50830 }, { "epoch": 0.9434535614954407, "grad_norm": 39.15625, "learning_rate": 9.852585423760365e-06, "loss": 18.8782, "step": 50840 }, { "epoch": 0.9436391345799205, "grad_norm": 33.625, "learning_rate": 9.852556427974324e-06, "loss": 18.3635, "step": 50850 }, { "epoch": 0.9438247076644004, "grad_norm": 38.25, "learning_rate": 9.852527432188282e-06, "loss": 18.8192, "step": 50860 }, { "epoch": 0.9440102807488802, "grad_norm": 35.9375, "learning_rate": 9.852498436402237e-06, "loss": 18.9524, "step": 50870 }, { "epoch": 0.94419585383336, "grad_norm": 37.84375, "learning_rate": 9.852469440616196e-06, "loss": 18.9869, "step": 50880 }, { "epoch": 0.9443814269178399, "grad_norm": 37.4375, "learning_rate": 9.852440444830154e-06, "loss": 18.8108, "step": 50890 }, { "epoch": 0.9445670000023196, "grad_norm": 36.1875, "learning_rate": 9.852411449044111e-06, "loss": 18.7039, "step": 50900 }, { "epoch": 0.9447525730867995, "grad_norm": 38.59375, "learning_rate": 9.852382453258069e-06, "loss": 18.6205, "step": 50910 }, { "epoch": 0.9449381461712794, "grad_norm": 38.09375, "learning_rate": 9.852353457472026e-06, "loss": 18.9012, "step": 50920 }, { "epoch": 0.9451237192557591, "grad_norm": 35.03125, "learning_rate": 9.852324461685983e-06, "loss": 18.9504, "step": 50930 }, { "epoch": 0.945309292340239, "grad_norm": 36.78125, "learning_rate": 9.85229546589994e-06, "loss": 18.4827, "step": 50940 }, { "epoch": 0.9454948654247188, "grad_norm": 37.5625, "learning_rate": 9.8522664701139e-06, "loss": 19.0303, "step": 50950 }, { "epoch": 0.9456804385091986, "grad_norm": 38.09375, "learning_rate": 9.852237474327857e-06, "loss": 18.2305, "step": 50960 }, { "epoch": 0.9458660115936784, "grad_norm": 35.65625, "learning_rate": 9.852208478541815e-06, "loss": 18.5792, "step": 50970 }, { "epoch": 0.9460515846781583, "grad_norm": 38.46875, "learning_rate": 9.852179482755772e-06, "loss": 18.5712, "step": 50980 }, { "epoch": 0.9462371577626381, "grad_norm": 37.34375, "learning_rate": 9.85215048696973e-06, "loss": 18.6123, "step": 50990 }, { "epoch": 0.9464227308471179, "grad_norm": 37.125, "learning_rate": 9.852121491183687e-06, "loss": 18.5901, "step": 51000 }, { "epoch": 0.9466083039315978, "grad_norm": 36.46875, "learning_rate": 9.852092495397644e-06, "loss": 19.0493, "step": 51010 }, { "epoch": 0.9467938770160775, "grad_norm": 35.375, "learning_rate": 9.852063499611603e-06, "loss": 18.5143, "step": 51020 }, { "epoch": 0.9469794501005574, "grad_norm": 34.375, "learning_rate": 9.85203450382556e-06, "loss": 18.7262, "step": 51030 }, { "epoch": 0.9471650231850373, "grad_norm": 35.65625, "learning_rate": 9.852005508039517e-06, "loss": 18.5407, "step": 51040 }, { "epoch": 0.9473505962695171, "grad_norm": 35.84375, "learning_rate": 9.851976512253476e-06, "loss": 18.7274, "step": 51050 }, { "epoch": 0.9475361693539969, "grad_norm": 35.5, "learning_rate": 9.851947516467433e-06, "loss": 18.6067, "step": 51060 }, { "epoch": 0.9477217424384767, "grad_norm": 35.96875, "learning_rate": 9.85191852068139e-06, "loss": 18.9094, "step": 51070 }, { "epoch": 0.9479073155229566, "grad_norm": 34.96875, "learning_rate": 9.851889524895348e-06, "loss": 18.7977, "step": 51080 }, { "epoch": 0.9480928886074363, "grad_norm": 37.75, "learning_rate": 9.851860529109305e-06, "loss": 19.2816, "step": 51090 }, { "epoch": 0.9482784616919162, "grad_norm": 34.4375, "learning_rate": 9.851831533323263e-06, "loss": 19.179, "step": 51100 }, { "epoch": 0.9484640347763961, "grad_norm": 35.96875, "learning_rate": 9.85180253753722e-06, "loss": 18.4052, "step": 51110 }, { "epoch": 0.9486496078608758, "grad_norm": 36.0, "learning_rate": 9.85177354175118e-06, "loss": 18.9433, "step": 51120 }, { "epoch": 0.9488351809453557, "grad_norm": 37.4375, "learning_rate": 9.851744545965137e-06, "loss": 19.0316, "step": 51130 }, { "epoch": 0.9490207540298355, "grad_norm": 37.40625, "learning_rate": 9.851715550179092e-06, "loss": 18.5718, "step": 51140 }, { "epoch": 0.9492063271143153, "grad_norm": 36.5625, "learning_rate": 9.851686554393051e-06, "loss": 18.4393, "step": 51150 }, { "epoch": 0.9493919001987952, "grad_norm": 35.84375, "learning_rate": 9.851657558607009e-06, "loss": 18.1548, "step": 51160 }, { "epoch": 0.949577473283275, "grad_norm": 36.40625, "learning_rate": 9.851628562820966e-06, "loss": 18.8235, "step": 51170 }, { "epoch": 0.9497630463677548, "grad_norm": 34.90625, "learning_rate": 9.851599567034924e-06, "loss": 18.7454, "step": 51180 }, { "epoch": 0.9499486194522346, "grad_norm": 37.21875, "learning_rate": 9.851570571248881e-06, "loss": 18.0793, "step": 51190 }, { "epoch": 0.9501341925367145, "grad_norm": 36.125, "learning_rate": 9.851541575462838e-06, "loss": 18.5923, "step": 51200 }, { "epoch": 0.9503197656211944, "grad_norm": 37.3125, "learning_rate": 9.851512579676796e-06, "loss": 19.1447, "step": 51210 }, { "epoch": 0.9505053387056741, "grad_norm": 35.96875, "learning_rate": 9.851483583890753e-06, "loss": 18.39, "step": 51220 }, { "epoch": 0.950690911790154, "grad_norm": 37.96875, "learning_rate": 9.851454588104712e-06, "loss": 18.8331, "step": 51230 }, { "epoch": 0.9508764848746338, "grad_norm": 37.1875, "learning_rate": 9.85142559231867e-06, "loss": 18.3683, "step": 51240 }, { "epoch": 0.9510620579591136, "grad_norm": 36.4375, "learning_rate": 9.851396596532627e-06, "loss": 18.497, "step": 51250 }, { "epoch": 0.9512476310435934, "grad_norm": 37.3125, "learning_rate": 9.851367600746585e-06, "loss": 18.5485, "step": 51260 }, { "epoch": 0.9514332041280733, "grad_norm": 37.5625, "learning_rate": 9.851338604960542e-06, "loss": 18.4425, "step": 51270 }, { "epoch": 0.9516187772125531, "grad_norm": 37.96875, "learning_rate": 9.8513096091745e-06, "loss": 18.5681, "step": 51280 }, { "epoch": 0.9518043502970329, "grad_norm": 36.53125, "learning_rate": 9.851280613388457e-06, "loss": 18.5103, "step": 51290 }, { "epoch": 0.9519899233815128, "grad_norm": 35.46875, "learning_rate": 9.851251617602414e-06, "loss": 18.7339, "step": 51300 }, { "epoch": 0.9521754964659925, "grad_norm": 35.28125, "learning_rate": 9.851222621816372e-06, "loss": 18.6217, "step": 51310 }, { "epoch": 0.9523610695504724, "grad_norm": 37.125, "learning_rate": 9.851193626030329e-06, "loss": 18.5986, "step": 51320 }, { "epoch": 0.9525466426349523, "grad_norm": 39.3125, "learning_rate": 9.851164630244288e-06, "loss": 18.7082, "step": 51330 }, { "epoch": 0.952732215719432, "grad_norm": 35.78125, "learning_rate": 9.851135634458246e-06, "loss": 18.6332, "step": 51340 }, { "epoch": 0.9529177888039119, "grad_norm": 35.78125, "learning_rate": 9.851106638672201e-06, "loss": 18.7516, "step": 51350 }, { "epoch": 0.9531033618883917, "grad_norm": 35.75, "learning_rate": 9.85107764288616e-06, "loss": 18.8567, "step": 51360 }, { "epoch": 0.9532889349728715, "grad_norm": 35.15625, "learning_rate": 9.851048647100118e-06, "loss": 18.5148, "step": 51370 }, { "epoch": 0.9534745080573513, "grad_norm": 37.71875, "learning_rate": 9.851019651314075e-06, "loss": 18.6169, "step": 51380 }, { "epoch": 0.9536600811418312, "grad_norm": 36.375, "learning_rate": 9.850990655528033e-06, "loss": 18.8565, "step": 51390 }, { "epoch": 0.9538456542263111, "grad_norm": 36.28125, "learning_rate": 9.850961659741992e-06, "loss": 18.5506, "step": 51400 }, { "epoch": 0.9540312273107908, "grad_norm": 37.90625, "learning_rate": 9.850932663955947e-06, "loss": 18.6192, "step": 51410 }, { "epoch": 0.9542168003952707, "grad_norm": 38.71875, "learning_rate": 9.850903668169905e-06, "loss": 18.5314, "step": 51420 }, { "epoch": 0.9544023734797505, "grad_norm": 35.84375, "learning_rate": 9.850874672383864e-06, "loss": 18.5046, "step": 51430 }, { "epoch": 0.9545879465642303, "grad_norm": 34.71875, "learning_rate": 9.850845676597821e-06, "loss": 18.7414, "step": 51440 }, { "epoch": 0.9547735196487102, "grad_norm": 38.21875, "learning_rate": 9.850816680811779e-06, "loss": 18.5634, "step": 51450 }, { "epoch": 0.95495909273319, "grad_norm": 38.0, "learning_rate": 9.850787685025736e-06, "loss": 18.6599, "step": 51460 }, { "epoch": 0.9551446658176698, "grad_norm": 36.65625, "learning_rate": 9.850758689239694e-06, "loss": 18.7972, "step": 51470 }, { "epoch": 0.9553302389021496, "grad_norm": 38.3125, "learning_rate": 9.850729693453651e-06, "loss": 18.6985, "step": 51480 }, { "epoch": 0.9555158119866295, "grad_norm": 36.40625, "learning_rate": 9.850700697667608e-06, "loss": 18.596, "step": 51490 }, { "epoch": 0.9557013850711092, "grad_norm": 36.65625, "learning_rate": 9.850671701881567e-06, "loss": 18.604, "step": 51500 }, { "epoch": 0.9558869581555891, "grad_norm": 38.15625, "learning_rate": 9.850642706095523e-06, "loss": 18.7036, "step": 51510 }, { "epoch": 0.956072531240069, "grad_norm": 37.21875, "learning_rate": 9.85061371030948e-06, "loss": 19.1632, "step": 51520 }, { "epoch": 0.9562581043245487, "grad_norm": 34.875, "learning_rate": 9.85058471452344e-06, "loss": 18.6759, "step": 51530 }, { "epoch": 0.9564436774090286, "grad_norm": 37.4375, "learning_rate": 9.850555718737397e-06, "loss": 18.9442, "step": 51540 }, { "epoch": 0.9566292504935084, "grad_norm": 36.96875, "learning_rate": 9.850526722951354e-06, "loss": 18.8462, "step": 51550 }, { "epoch": 0.9568148235779882, "grad_norm": 37.40625, "learning_rate": 9.850497727165312e-06, "loss": 18.6035, "step": 51560 }, { "epoch": 0.9570003966624681, "grad_norm": 36.46875, "learning_rate": 9.85046873137927e-06, "loss": 18.6443, "step": 51570 }, { "epoch": 0.9571859697469479, "grad_norm": 39.125, "learning_rate": 9.850439735593227e-06, "loss": 18.9952, "step": 51580 }, { "epoch": 0.9573715428314278, "grad_norm": 38.1875, "learning_rate": 9.850410739807184e-06, "loss": 18.9026, "step": 51590 }, { "epoch": 0.9575571159159075, "grad_norm": 35.5625, "learning_rate": 9.850381744021143e-06, "loss": 18.7487, "step": 51600 }, { "epoch": 0.9577426890003874, "grad_norm": 38.0, "learning_rate": 9.8503527482351e-06, "loss": 18.9155, "step": 51610 }, { "epoch": 0.9579282620848673, "grad_norm": 34.65625, "learning_rate": 9.850323752449056e-06, "loss": 18.8469, "step": 51620 }, { "epoch": 0.958113835169347, "grad_norm": 37.0625, "learning_rate": 9.850294756663015e-06, "loss": 18.7591, "step": 51630 }, { "epoch": 0.9582994082538269, "grad_norm": 33.6875, "learning_rate": 9.850265760876973e-06, "loss": 18.4839, "step": 51640 }, { "epoch": 0.9584849813383067, "grad_norm": 34.71875, "learning_rate": 9.85023676509093e-06, "loss": 18.9344, "step": 51650 }, { "epoch": 0.9586705544227865, "grad_norm": 39.59375, "learning_rate": 9.850207769304888e-06, "loss": 18.9862, "step": 51660 }, { "epoch": 0.9588561275072663, "grad_norm": 37.875, "learning_rate": 9.850178773518845e-06, "loss": 18.5865, "step": 51670 }, { "epoch": 0.9590417005917462, "grad_norm": 36.8125, "learning_rate": 9.850149777732802e-06, "loss": 18.9963, "step": 51680 }, { "epoch": 0.959227273676226, "grad_norm": 37.21875, "learning_rate": 9.85012078194676e-06, "loss": 18.5694, "step": 51690 }, { "epoch": 0.9594128467607058, "grad_norm": 37.65625, "learning_rate": 9.850091786160719e-06, "loss": 18.5936, "step": 51700 }, { "epoch": 0.9595984198451857, "grad_norm": 38.09375, "learning_rate": 9.850062790374676e-06, "loss": 18.8217, "step": 51710 }, { "epoch": 0.9597839929296654, "grad_norm": 35.96875, "learning_rate": 9.850033794588634e-06, "loss": 18.8549, "step": 51720 }, { "epoch": 0.9599695660141453, "grad_norm": 36.46875, "learning_rate": 9.850004798802591e-06, "loss": 19.1592, "step": 51730 }, { "epoch": 0.9601551390986252, "grad_norm": 36.40625, "learning_rate": 9.849975803016549e-06, "loss": 18.723, "step": 51740 }, { "epoch": 0.960340712183105, "grad_norm": 36.5625, "learning_rate": 9.849946807230506e-06, "loss": 18.8267, "step": 51750 }, { "epoch": 0.9605262852675848, "grad_norm": 36.78125, "learning_rate": 9.849917811444463e-06, "loss": 18.6763, "step": 51760 }, { "epoch": 0.9607118583520646, "grad_norm": 33.90625, "learning_rate": 9.84988881565842e-06, "loss": 18.8519, "step": 51770 }, { "epoch": 0.9608974314365445, "grad_norm": 36.59375, "learning_rate": 9.849859819872378e-06, "loss": 18.4374, "step": 51780 }, { "epoch": 0.9610830045210242, "grad_norm": 36.4375, "learning_rate": 9.849830824086336e-06, "loss": 19.033, "step": 51790 }, { "epoch": 0.9612685776055041, "grad_norm": 36.3125, "learning_rate": 9.849801828300293e-06, "loss": 18.6082, "step": 51800 }, { "epoch": 0.961454150689984, "grad_norm": 37.625, "learning_rate": 9.849772832514252e-06, "loss": 18.4141, "step": 51810 }, { "epoch": 0.9616397237744637, "grad_norm": 35.34375, "learning_rate": 9.84974383672821e-06, "loss": 18.9108, "step": 51820 }, { "epoch": 0.9618252968589436, "grad_norm": 35.8125, "learning_rate": 9.849714840942167e-06, "loss": 18.8733, "step": 51830 }, { "epoch": 0.9620108699434234, "grad_norm": 37.21875, "learning_rate": 9.849685845156124e-06, "loss": 18.7665, "step": 51840 }, { "epoch": 0.9621964430279032, "grad_norm": 35.4375, "learning_rate": 9.849656849370082e-06, "loss": 18.7238, "step": 51850 }, { "epoch": 0.962382016112383, "grad_norm": 34.59375, "learning_rate": 9.84962785358404e-06, "loss": 18.8423, "step": 51860 }, { "epoch": 0.9625675891968629, "grad_norm": 36.65625, "learning_rate": 9.849598857797997e-06, "loss": 18.623, "step": 51870 }, { "epoch": 0.9627531622813427, "grad_norm": 36.84375, "learning_rate": 9.849569862011956e-06, "loss": 18.4343, "step": 51880 }, { "epoch": 0.9629387353658225, "grad_norm": 34.9375, "learning_rate": 9.849540866225911e-06, "loss": 18.7979, "step": 51890 }, { "epoch": 0.9631243084503024, "grad_norm": 36.875, "learning_rate": 9.849511870439869e-06, "loss": 18.9062, "step": 51900 }, { "epoch": 0.9633098815347821, "grad_norm": 38.59375, "learning_rate": 9.849482874653828e-06, "loss": 18.7673, "step": 51910 }, { "epoch": 0.963495454619262, "grad_norm": 36.71875, "learning_rate": 9.849453878867785e-06, "loss": 18.5212, "step": 51920 }, { "epoch": 0.9636810277037419, "grad_norm": 36.625, "learning_rate": 9.849424883081743e-06, "loss": 18.6261, "step": 51930 }, { "epoch": 0.9638666007882217, "grad_norm": 37.84375, "learning_rate": 9.8493958872957e-06, "loss": 19.0427, "step": 51940 }, { "epoch": 0.9640521738727015, "grad_norm": 36.9375, "learning_rate": 9.849366891509658e-06, "loss": 18.7989, "step": 51950 }, { "epoch": 0.9642377469571813, "grad_norm": 34.875, "learning_rate": 9.849337895723615e-06, "loss": 18.5892, "step": 51960 }, { "epoch": 0.9644233200416612, "grad_norm": 37.0, "learning_rate": 9.849308899937572e-06, "loss": 18.9571, "step": 51970 }, { "epoch": 0.964608893126141, "grad_norm": 34.90625, "learning_rate": 9.849279904151531e-06, "loss": 18.6608, "step": 51980 }, { "epoch": 0.9647944662106208, "grad_norm": 36.0, "learning_rate": 9.849250908365489e-06, "loss": 18.6838, "step": 51990 }, { "epoch": 0.9649800392951007, "grad_norm": 35.96875, "learning_rate": 9.849221912579445e-06, "loss": 18.5241, "step": 52000 }, { "epoch": 0.9651656123795804, "grad_norm": 34.5625, "learning_rate": 9.849192916793404e-06, "loss": 18.7984, "step": 52010 }, { "epoch": 0.9653511854640603, "grad_norm": 38.59375, "learning_rate": 9.849163921007361e-06, "loss": 18.8855, "step": 52020 }, { "epoch": 0.9655367585485402, "grad_norm": 38.34375, "learning_rate": 9.849134925221319e-06, "loss": 18.8712, "step": 52030 }, { "epoch": 0.9657223316330199, "grad_norm": 37.5, "learning_rate": 9.849105929435276e-06, "loss": 18.4082, "step": 52040 }, { "epoch": 0.9659079047174998, "grad_norm": 35.9375, "learning_rate": 9.849076933649233e-06, "loss": 18.8569, "step": 52050 }, { "epoch": 0.9660934778019796, "grad_norm": 35.5, "learning_rate": 9.84904793786319e-06, "loss": 19.0493, "step": 52060 }, { "epoch": 0.9662790508864594, "grad_norm": 35.0, "learning_rate": 9.849018942077148e-06, "loss": 18.684, "step": 52070 }, { "epoch": 0.9664646239709392, "grad_norm": 36.4375, "learning_rate": 9.848989946291107e-06, "loss": 18.4338, "step": 52080 }, { "epoch": 0.9666501970554191, "grad_norm": 37.75, "learning_rate": 9.848960950505065e-06, "loss": 19.0642, "step": 52090 }, { "epoch": 0.966835770139899, "grad_norm": 35.84375, "learning_rate": 9.84893195471902e-06, "loss": 18.8723, "step": 52100 }, { "epoch": 0.9670213432243787, "grad_norm": 37.125, "learning_rate": 9.84890295893298e-06, "loss": 18.7843, "step": 52110 }, { "epoch": 0.9672069163088586, "grad_norm": 36.1875, "learning_rate": 9.848873963146937e-06, "loss": 18.6314, "step": 52120 }, { "epoch": 0.9673924893933384, "grad_norm": 34.96875, "learning_rate": 9.848844967360894e-06, "loss": 18.5492, "step": 52130 }, { "epoch": 0.9675780624778182, "grad_norm": 35.8125, "learning_rate": 9.848815971574852e-06, "loss": 18.4559, "step": 52140 }, { "epoch": 0.967763635562298, "grad_norm": 36.40625, "learning_rate": 9.848786975788809e-06, "loss": 18.3422, "step": 52150 }, { "epoch": 0.9679492086467779, "grad_norm": 37.65625, "learning_rate": 9.848757980002766e-06, "loss": 18.8372, "step": 52160 }, { "epoch": 0.9681347817312577, "grad_norm": 37.59375, "learning_rate": 9.848728984216724e-06, "loss": 18.6309, "step": 52170 }, { "epoch": 0.9683203548157375, "grad_norm": 36.5625, "learning_rate": 9.848699988430683e-06, "loss": 18.9326, "step": 52180 }, { "epoch": 0.9685059279002174, "grad_norm": 36.625, "learning_rate": 9.84867099264464e-06, "loss": 18.8778, "step": 52190 }, { "epoch": 0.9686915009846971, "grad_norm": 37.6875, "learning_rate": 9.848641996858598e-06, "loss": 18.4787, "step": 52200 }, { "epoch": 0.968877074069177, "grad_norm": 35.84375, "learning_rate": 9.848613001072555e-06, "loss": 18.6075, "step": 52210 }, { "epoch": 0.9690626471536569, "grad_norm": 37.8125, "learning_rate": 9.848584005286513e-06, "loss": 18.5023, "step": 52220 }, { "epoch": 0.9692482202381366, "grad_norm": 35.75, "learning_rate": 9.84855500950047e-06, "loss": 18.3154, "step": 52230 }, { "epoch": 0.9694337933226165, "grad_norm": 37.28125, "learning_rate": 9.848526013714427e-06, "loss": 19.0297, "step": 52240 }, { "epoch": 0.9696193664070963, "grad_norm": 35.9375, "learning_rate": 9.848497017928385e-06, "loss": 18.438, "step": 52250 }, { "epoch": 0.9698049394915761, "grad_norm": 36.53125, "learning_rate": 9.848468022142342e-06, "loss": 18.473, "step": 52260 }, { "epoch": 0.969990512576056, "grad_norm": 37.34375, "learning_rate": 9.8484390263563e-06, "loss": 19.1365, "step": 52270 }, { "epoch": 0.9701760856605358, "grad_norm": 37.40625, "learning_rate": 9.848410030570257e-06, "loss": 18.9908, "step": 52280 }, { "epoch": 0.9703616587450157, "grad_norm": 37.96875, "learning_rate": 9.848381034784216e-06, "loss": 19.0627, "step": 52290 }, { "epoch": 0.9705472318294954, "grad_norm": 36.59375, "learning_rate": 9.848352038998174e-06, "loss": 18.6671, "step": 52300 }, { "epoch": 0.9707328049139753, "grad_norm": 37.0625, "learning_rate": 9.848323043212131e-06, "loss": 18.7139, "step": 52310 }, { "epoch": 0.9709183779984552, "grad_norm": 36.34375, "learning_rate": 9.848294047426088e-06, "loss": 18.8432, "step": 52320 }, { "epoch": 0.9711039510829349, "grad_norm": 35.34375, "learning_rate": 9.848265051640046e-06, "loss": 18.6022, "step": 52330 }, { "epoch": 0.9712895241674148, "grad_norm": 36.9375, "learning_rate": 9.848236055854003e-06, "loss": 18.6221, "step": 52340 }, { "epoch": 0.9714750972518946, "grad_norm": 37.59375, "learning_rate": 9.84820706006796e-06, "loss": 18.5104, "step": 52350 }, { "epoch": 0.9716606703363744, "grad_norm": 37.28125, "learning_rate": 9.84817806428192e-06, "loss": 18.7772, "step": 52360 }, { "epoch": 0.9718462434208542, "grad_norm": 38.84375, "learning_rate": 9.848149068495875e-06, "loss": 18.6431, "step": 52370 }, { "epoch": 0.9720318165053341, "grad_norm": 37.15625, "learning_rate": 9.848120072709833e-06, "loss": 18.5793, "step": 52380 }, { "epoch": 0.9722173895898139, "grad_norm": 36.125, "learning_rate": 9.848091076923792e-06, "loss": 18.8981, "step": 52390 }, { "epoch": 0.9724029626742937, "grad_norm": 37.28125, "learning_rate": 9.84806208113775e-06, "loss": 18.5569, "step": 52400 }, { "epoch": 0.9725885357587736, "grad_norm": 35.8125, "learning_rate": 9.848033085351707e-06, "loss": 18.7243, "step": 52410 }, { "epoch": 0.9727741088432533, "grad_norm": 37.4375, "learning_rate": 9.848004089565664e-06, "loss": 19.0858, "step": 52420 }, { "epoch": 0.9729596819277332, "grad_norm": 36.5, "learning_rate": 9.847975093779622e-06, "loss": 18.5945, "step": 52430 }, { "epoch": 0.973145255012213, "grad_norm": 38.4375, "learning_rate": 9.847946097993579e-06, "loss": 18.5578, "step": 52440 }, { "epoch": 0.9733308280966928, "grad_norm": 35.40625, "learning_rate": 9.847917102207536e-06, "loss": 18.5476, "step": 52450 }, { "epoch": 0.9735164011811727, "grad_norm": 37.34375, "learning_rate": 9.847888106421495e-06, "loss": 18.6942, "step": 52460 }, { "epoch": 0.9737019742656525, "grad_norm": 35.71875, "learning_rate": 9.847859110635453e-06, "loss": 18.4927, "step": 52470 }, { "epoch": 0.9738875473501324, "grad_norm": 34.90625, "learning_rate": 9.847830114849409e-06, "loss": 18.5665, "step": 52480 }, { "epoch": 0.9740731204346121, "grad_norm": 35.46875, "learning_rate": 9.847801119063368e-06, "loss": 18.9818, "step": 52490 }, { "epoch": 0.974258693519092, "grad_norm": 35.90625, "learning_rate": 9.847772123277325e-06, "loss": 18.3819, "step": 52500 }, { "epoch": 0.9744442666035719, "grad_norm": 37.40625, "learning_rate": 9.847743127491283e-06, "loss": 18.266, "step": 52510 }, { "epoch": 0.9746298396880516, "grad_norm": 36.40625, "learning_rate": 9.84771413170524e-06, "loss": 19.177, "step": 52520 }, { "epoch": 0.9748154127725315, "grad_norm": 35.25, "learning_rate": 9.847685135919197e-06, "loss": 18.7006, "step": 52530 }, { "epoch": 0.9750009858570113, "grad_norm": 36.75, "learning_rate": 9.847656140133155e-06, "loss": 18.8634, "step": 52540 }, { "epoch": 0.9751865589414911, "grad_norm": 36.25, "learning_rate": 9.847627144347112e-06, "loss": 18.3737, "step": 52550 }, { "epoch": 0.975372132025971, "grad_norm": 37.21875, "learning_rate": 9.847598148561071e-06, "loss": 18.6165, "step": 52560 }, { "epoch": 0.9755577051104508, "grad_norm": 37.03125, "learning_rate": 9.847569152775029e-06, "loss": 18.7161, "step": 52570 }, { "epoch": 0.9757432781949306, "grad_norm": 34.65625, "learning_rate": 9.847540156988986e-06, "loss": 19.034, "step": 52580 }, { "epoch": 0.9759288512794104, "grad_norm": 37.09375, "learning_rate": 9.847511161202943e-06, "loss": 18.5442, "step": 52590 }, { "epoch": 0.9761144243638903, "grad_norm": 36.09375, "learning_rate": 9.847482165416901e-06, "loss": 18.6233, "step": 52600 }, { "epoch": 0.97629999744837, "grad_norm": 36.5, "learning_rate": 9.847453169630858e-06, "loss": 18.7935, "step": 52610 }, { "epoch": 0.9764855705328499, "grad_norm": 38.0, "learning_rate": 9.847424173844816e-06, "loss": 19.0615, "step": 52620 }, { "epoch": 0.9766711436173298, "grad_norm": 36.78125, "learning_rate": 9.847395178058775e-06, "loss": 18.2889, "step": 52630 }, { "epoch": 0.9768567167018096, "grad_norm": 37.28125, "learning_rate": 9.84736618227273e-06, "loss": 18.338, "step": 52640 }, { "epoch": 0.9770422897862894, "grad_norm": 38.15625, "learning_rate": 9.847337186486688e-06, "loss": 18.6097, "step": 52650 }, { "epoch": 0.9772278628707692, "grad_norm": 37.5625, "learning_rate": 9.847308190700647e-06, "loss": 18.959, "step": 52660 }, { "epoch": 0.9774134359552491, "grad_norm": 35.9375, "learning_rate": 9.847279194914604e-06, "loss": 18.3508, "step": 52670 }, { "epoch": 0.9775990090397289, "grad_norm": 36.65625, "learning_rate": 9.847250199128562e-06, "loss": 18.6836, "step": 52680 }, { "epoch": 0.9777845821242087, "grad_norm": 35.6875, "learning_rate": 9.84722120334252e-06, "loss": 18.8362, "step": 52690 }, { "epoch": 0.9779701552086886, "grad_norm": 35.78125, "learning_rate": 9.847192207556477e-06, "loss": 18.6928, "step": 52700 }, { "epoch": 0.9781557282931683, "grad_norm": 34.125, "learning_rate": 9.847163211770434e-06, "loss": 18.8454, "step": 52710 }, { "epoch": 0.9783413013776482, "grad_norm": 35.78125, "learning_rate": 9.847134215984391e-06, "loss": 18.703, "step": 52720 }, { "epoch": 0.978526874462128, "grad_norm": 35.25, "learning_rate": 9.847105220198349e-06, "loss": 18.9489, "step": 52730 }, { "epoch": 0.9787124475466078, "grad_norm": 36.875, "learning_rate": 9.847076224412308e-06, "loss": 19.0491, "step": 52740 }, { "epoch": 0.9788980206310877, "grad_norm": 37.0, "learning_rate": 9.847047228626264e-06, "loss": 18.4266, "step": 52750 }, { "epoch": 0.9790835937155675, "grad_norm": 35.84375, "learning_rate": 9.847018232840223e-06, "loss": 18.529, "step": 52760 }, { "epoch": 0.9792691668000473, "grad_norm": 36.0625, "learning_rate": 9.84698923705418e-06, "loss": 18.3869, "step": 52770 }, { "epoch": 0.9794547398845271, "grad_norm": 38.0, "learning_rate": 9.846960241268138e-06, "loss": 18.7703, "step": 52780 }, { "epoch": 0.979640312969007, "grad_norm": 38.09375, "learning_rate": 9.846931245482095e-06, "loss": 18.6845, "step": 52790 }, { "epoch": 0.9798258860534867, "grad_norm": 35.96875, "learning_rate": 9.846902249696052e-06, "loss": 18.5895, "step": 52800 }, { "epoch": 0.9800114591379666, "grad_norm": 36.75, "learning_rate": 9.84687325391001e-06, "loss": 18.6341, "step": 52810 }, { "epoch": 0.9801970322224465, "grad_norm": 38.5625, "learning_rate": 9.846844258123967e-06, "loss": 18.6194, "step": 52820 }, { "epoch": 0.9803826053069263, "grad_norm": 36.28125, "learning_rate": 9.846815262337925e-06, "loss": 18.7706, "step": 52830 }, { "epoch": 0.9805681783914061, "grad_norm": 37.4375, "learning_rate": 9.846786266551884e-06, "loss": 18.7345, "step": 52840 }, { "epoch": 0.980753751475886, "grad_norm": 37.125, "learning_rate": 9.84675727076584e-06, "loss": 18.2325, "step": 52850 }, { "epoch": 0.9809393245603658, "grad_norm": 36.53125, "learning_rate": 9.846728274979797e-06, "loss": 18.2759, "step": 52860 }, { "epoch": 0.9811248976448456, "grad_norm": 35.5625, "learning_rate": 9.846699279193756e-06, "loss": 18.5422, "step": 52870 }, { "epoch": 0.9813104707293254, "grad_norm": 36.21875, "learning_rate": 9.846670283407713e-06, "loss": 18.8652, "step": 52880 }, { "epoch": 0.9814960438138053, "grad_norm": 36.78125, "learning_rate": 9.84664128762167e-06, "loss": 18.8132, "step": 52890 }, { "epoch": 0.981681616898285, "grad_norm": 34.1875, "learning_rate": 9.846612291835628e-06, "loss": 18.7067, "step": 52900 }, { "epoch": 0.9818671899827649, "grad_norm": 38.78125, "learning_rate": 9.846583296049586e-06, "loss": 18.8695, "step": 52910 }, { "epoch": 0.9820527630672448, "grad_norm": 36.84375, "learning_rate": 9.846554300263543e-06, "loss": 18.9407, "step": 52920 }, { "epoch": 0.9822383361517245, "grad_norm": 37.0, "learning_rate": 9.8465253044775e-06, "loss": 18.5586, "step": 52930 }, { "epoch": 0.9824239092362044, "grad_norm": 37.0, "learning_rate": 9.84649630869146e-06, "loss": 18.701, "step": 52940 }, { "epoch": 0.9826094823206842, "grad_norm": 33.875, "learning_rate": 9.846467312905417e-06, "loss": 18.6175, "step": 52950 }, { "epoch": 0.982795055405164, "grad_norm": 36.78125, "learning_rate": 9.846438317119373e-06, "loss": 18.4298, "step": 52960 }, { "epoch": 0.9829806284896438, "grad_norm": 37.75, "learning_rate": 9.846409321333332e-06, "loss": 18.3273, "step": 52970 }, { "epoch": 0.9831662015741237, "grad_norm": 36.9375, "learning_rate": 9.846380325547289e-06, "loss": 18.5626, "step": 52980 }, { "epoch": 0.9833517746586035, "grad_norm": 36.21875, "learning_rate": 9.846351329761247e-06, "loss": 18.6245, "step": 52990 }, { "epoch": 0.9835373477430833, "grad_norm": 35.09375, "learning_rate": 9.846322333975204e-06, "loss": 18.2085, "step": 53000 }, { "epoch": 0.9837229208275632, "grad_norm": 37.78125, "learning_rate": 9.846293338189161e-06, "loss": 19.0781, "step": 53010 }, { "epoch": 0.983908493912043, "grad_norm": 37.15625, "learning_rate": 9.846264342403119e-06, "loss": 18.8288, "step": 53020 }, { "epoch": 0.9840940669965228, "grad_norm": 36.25, "learning_rate": 9.846235346617076e-06, "loss": 18.6047, "step": 53030 }, { "epoch": 0.9842796400810027, "grad_norm": 36.3125, "learning_rate": 9.846206350831035e-06, "loss": 18.9381, "step": 53040 }, { "epoch": 0.9844652131654825, "grad_norm": 36.4375, "learning_rate": 9.846177355044993e-06, "loss": 18.6755, "step": 53050 }, { "epoch": 0.9846507862499623, "grad_norm": 36.59375, "learning_rate": 9.84614835925895e-06, "loss": 18.5902, "step": 53060 }, { "epoch": 0.9848363593344421, "grad_norm": 35.90625, "learning_rate": 9.846119363472907e-06, "loss": 18.8222, "step": 53070 }, { "epoch": 0.985021932418922, "grad_norm": 36.75, "learning_rate": 9.846090367686865e-06, "loss": 18.9657, "step": 53080 }, { "epoch": 0.9852075055034017, "grad_norm": 36.875, "learning_rate": 9.846061371900822e-06, "loss": 18.6081, "step": 53090 }, { "epoch": 0.9853930785878816, "grad_norm": 38.75, "learning_rate": 9.84603237611478e-06, "loss": 19.0012, "step": 53100 }, { "epoch": 0.9855786516723615, "grad_norm": 35.40625, "learning_rate": 9.846003380328739e-06, "loss": 18.3228, "step": 53110 }, { "epoch": 0.9857642247568412, "grad_norm": 36.65625, "learning_rate": 9.845974384542695e-06, "loss": 18.4046, "step": 53120 }, { "epoch": 0.9859497978413211, "grad_norm": 36.21875, "learning_rate": 9.845945388756652e-06, "loss": 18.3394, "step": 53130 }, { "epoch": 0.986135370925801, "grad_norm": 36.96875, "learning_rate": 9.845916392970611e-06, "loss": 18.5615, "step": 53140 }, { "epoch": 0.9863209440102807, "grad_norm": 35.1875, "learning_rate": 9.845887397184568e-06, "loss": 18.6555, "step": 53150 }, { "epoch": 0.9865065170947606, "grad_norm": 38.15625, "learning_rate": 9.845858401398526e-06, "loss": 19.2572, "step": 53160 }, { "epoch": 0.9866920901792404, "grad_norm": 37.21875, "learning_rate": 9.845829405612483e-06, "loss": 18.6147, "step": 53170 }, { "epoch": 0.9868776632637203, "grad_norm": 35.09375, "learning_rate": 9.84580040982644e-06, "loss": 18.649, "step": 53180 }, { "epoch": 0.9870632363482, "grad_norm": 34.28125, "learning_rate": 9.845771414040398e-06, "loss": 18.5839, "step": 53190 }, { "epoch": 0.9872488094326799, "grad_norm": 37.71875, "learning_rate": 9.845742418254355e-06, "loss": 18.9422, "step": 53200 }, { "epoch": 0.9874343825171598, "grad_norm": 38.6875, "learning_rate": 9.845713422468315e-06, "loss": 19.0014, "step": 53210 }, { "epoch": 0.9876199556016395, "grad_norm": 38.03125, "learning_rate": 9.845684426682272e-06, "loss": 18.5322, "step": 53220 }, { "epoch": 0.9878055286861194, "grad_norm": 37.0, "learning_rate": 9.845655430896228e-06, "loss": 18.8219, "step": 53230 }, { "epoch": 0.9879911017705992, "grad_norm": 36.375, "learning_rate": 9.845626435110187e-06, "loss": 18.5651, "step": 53240 }, { "epoch": 0.988176674855079, "grad_norm": 35.09375, "learning_rate": 9.845597439324144e-06, "loss": 18.3392, "step": 53250 }, { "epoch": 0.9883622479395588, "grad_norm": 37.65625, "learning_rate": 9.845568443538102e-06, "loss": 19.2046, "step": 53260 }, { "epoch": 0.9885478210240387, "grad_norm": 36.3125, "learning_rate": 9.845539447752059e-06, "loss": 18.752, "step": 53270 }, { "epoch": 0.9887333941085185, "grad_norm": 37.09375, "learning_rate": 9.845510451966016e-06, "loss": 18.4617, "step": 53280 }, { "epoch": 0.9889189671929983, "grad_norm": 37.125, "learning_rate": 9.845481456179974e-06, "loss": 18.4353, "step": 53290 }, { "epoch": 0.9891045402774782, "grad_norm": 37.375, "learning_rate": 9.845452460393931e-06, "loss": 18.8832, "step": 53300 }, { "epoch": 0.9892901133619579, "grad_norm": 35.3125, "learning_rate": 9.845423464607889e-06, "loss": 19.0743, "step": 53310 }, { "epoch": 0.9894756864464378, "grad_norm": 35.78125, "learning_rate": 9.845394468821848e-06, "loss": 18.8587, "step": 53320 }, { "epoch": 0.9896612595309177, "grad_norm": 35.21875, "learning_rate": 9.845365473035805e-06, "loss": 18.5995, "step": 53330 }, { "epoch": 0.9898468326153974, "grad_norm": 37.40625, "learning_rate": 9.845336477249763e-06, "loss": 18.2105, "step": 53340 }, { "epoch": 0.9900324056998773, "grad_norm": 35.1875, "learning_rate": 9.84530748146372e-06, "loss": 18.6503, "step": 53350 }, { "epoch": 0.9902179787843571, "grad_norm": 37.09375, "learning_rate": 9.845278485677677e-06, "loss": 18.6598, "step": 53360 }, { "epoch": 0.990403551868837, "grad_norm": 36.21875, "learning_rate": 9.845249489891635e-06, "loss": 18.2827, "step": 53370 }, { "epoch": 0.9905891249533167, "grad_norm": 36.03125, "learning_rate": 9.845220494105592e-06, "loss": 18.5182, "step": 53380 }, { "epoch": 0.9907746980377966, "grad_norm": 37.5, "learning_rate": 9.84519149831955e-06, "loss": 18.4391, "step": 53390 }, { "epoch": 0.9909602711222765, "grad_norm": 37.21875, "learning_rate": 9.845162502533507e-06, "loss": 19.1688, "step": 53400 }, { "epoch": 0.9911458442067562, "grad_norm": 37.375, "learning_rate": 9.845133506747464e-06, "loss": 18.4897, "step": 53410 }, { "epoch": 0.9913314172912361, "grad_norm": 37.90625, "learning_rate": 9.845104510961424e-06, "loss": 19.3897, "step": 53420 }, { "epoch": 0.991516990375716, "grad_norm": 36.90625, "learning_rate": 9.845075515175381e-06, "loss": 18.5943, "step": 53430 }, { "epoch": 0.9917025634601957, "grad_norm": 36.65625, "learning_rate": 9.845046519389337e-06, "loss": 19.0475, "step": 53440 }, { "epoch": 0.9918881365446756, "grad_norm": 38.96875, "learning_rate": 9.845017523603296e-06, "loss": 18.6567, "step": 53450 }, { "epoch": 0.9920737096291554, "grad_norm": 37.5625, "learning_rate": 9.844988527817253e-06, "loss": 18.2615, "step": 53460 }, { "epoch": 0.9922592827136352, "grad_norm": 36.125, "learning_rate": 9.84495953203121e-06, "loss": 18.7222, "step": 53470 }, { "epoch": 0.992444855798115, "grad_norm": 37.46875, "learning_rate": 9.844930536245168e-06, "loss": 18.6064, "step": 53480 }, { "epoch": 0.9926304288825949, "grad_norm": 35.5625, "learning_rate": 9.844901540459127e-06, "loss": 18.9255, "step": 53490 }, { "epoch": 0.9928160019670746, "grad_norm": 37.0625, "learning_rate": 9.844872544673083e-06, "loss": 18.2984, "step": 53500 }, { "epoch": 0.9930015750515545, "grad_norm": 35.96875, "learning_rate": 9.84484354888704e-06, "loss": 18.5695, "step": 53510 }, { "epoch": 0.9931871481360344, "grad_norm": 37.21875, "learning_rate": 9.844814553101e-06, "loss": 18.8148, "step": 53520 }, { "epoch": 0.9933727212205142, "grad_norm": 35.28125, "learning_rate": 9.844785557314957e-06, "loss": 18.9416, "step": 53530 }, { "epoch": 0.993558294304994, "grad_norm": 35.96875, "learning_rate": 9.844756561528914e-06, "loss": 18.6266, "step": 53540 }, { "epoch": 0.9937438673894738, "grad_norm": 39.75, "learning_rate": 9.844727565742871e-06, "loss": 19.1788, "step": 53550 }, { "epoch": 0.9939294404739537, "grad_norm": 36.03125, "learning_rate": 9.844698569956829e-06, "loss": 18.8975, "step": 53560 }, { "epoch": 0.9941150135584335, "grad_norm": 36.0625, "learning_rate": 9.844669574170786e-06, "loss": 18.9393, "step": 53570 }, { "epoch": 0.9943005866429133, "grad_norm": 39.28125, "learning_rate": 9.844640578384744e-06, "loss": 18.5879, "step": 53580 }, { "epoch": 0.9944861597273932, "grad_norm": 36.46875, "learning_rate": 9.844611582598703e-06, "loss": 18.9418, "step": 53590 }, { "epoch": 0.9946717328118729, "grad_norm": 36.28125, "learning_rate": 9.844582586812659e-06, "loss": 18.719, "step": 53600 }, { "epoch": 0.9948573058963528, "grad_norm": 38.40625, "learning_rate": 9.844553591026616e-06, "loss": 18.7769, "step": 53610 }, { "epoch": 0.9950428789808327, "grad_norm": 35.125, "learning_rate": 9.844524595240575e-06, "loss": 18.7225, "step": 53620 }, { "epoch": 0.9952284520653124, "grad_norm": 36.21875, "learning_rate": 9.844495599454532e-06, "loss": 18.6759, "step": 53630 }, { "epoch": 0.9954140251497923, "grad_norm": 37.34375, "learning_rate": 9.84446660366849e-06, "loss": 18.6277, "step": 53640 }, { "epoch": 0.9955995982342721, "grad_norm": 38.28125, "learning_rate": 9.844437607882447e-06, "loss": 18.8499, "step": 53650 }, { "epoch": 0.9957851713187519, "grad_norm": 37.21875, "learning_rate": 9.844408612096405e-06, "loss": 18.6598, "step": 53660 }, { "epoch": 0.9959707444032317, "grad_norm": 36.4375, "learning_rate": 9.844379616310362e-06, "loss": 19.1049, "step": 53670 }, { "epoch": 0.9961563174877116, "grad_norm": 38.28125, "learning_rate": 9.84435062052432e-06, "loss": 18.3496, "step": 53680 }, { "epoch": 0.9963418905721914, "grad_norm": 37.5, "learning_rate": 9.844321624738279e-06, "loss": 18.6458, "step": 53690 }, { "epoch": 0.9965274636566712, "grad_norm": 35.90625, "learning_rate": 9.844292628952236e-06, "loss": 18.5307, "step": 53700 }, { "epoch": 0.9967130367411511, "grad_norm": 36.53125, "learning_rate": 9.844263633166192e-06, "loss": 19.2581, "step": 53710 }, { "epoch": 0.9968986098256309, "grad_norm": 36.25, "learning_rate": 9.84423463738015e-06, "loss": 18.803, "step": 53720 }, { "epoch": 0.9970841829101107, "grad_norm": 37.1875, "learning_rate": 9.844205641594108e-06, "loss": 18.2188, "step": 53730 }, { "epoch": 0.9972697559945906, "grad_norm": 36.8125, "learning_rate": 9.844176645808066e-06, "loss": 18.7958, "step": 53740 }, { "epoch": 0.9974553290790704, "grad_norm": 33.96875, "learning_rate": 9.844147650022023e-06, "loss": 18.6348, "step": 53750 }, { "epoch": 0.9976409021635502, "grad_norm": 35.96875, "learning_rate": 9.84411865423598e-06, "loss": 18.5909, "step": 53760 }, { "epoch": 0.99782647524803, "grad_norm": 36.0625, "learning_rate": 9.844089658449938e-06, "loss": 19.1494, "step": 53770 }, { "epoch": 0.9980120483325099, "grad_norm": 35.6875, "learning_rate": 9.844060662663895e-06, "loss": 18.9692, "step": 53780 }, { "epoch": 0.9981976214169896, "grad_norm": 36.0, "learning_rate": 9.844031666877853e-06, "loss": 18.5912, "step": 53790 }, { "epoch": 0.9983831945014695, "grad_norm": 33.34375, "learning_rate": 9.844002671091812e-06, "loss": 18.6809, "step": 53800 }, { "epoch": 0.9985687675859494, "grad_norm": 36.0625, "learning_rate": 9.84397367530577e-06, "loss": 18.3271, "step": 53810 }, { "epoch": 0.9987543406704291, "grad_norm": 37.28125, "learning_rate": 9.843944679519727e-06, "loss": 18.652, "step": 53820 }, { "epoch": 0.998939913754909, "grad_norm": 38.375, "learning_rate": 9.843915683733684e-06, "loss": 18.4857, "step": 53830 }, { "epoch": 0.9991254868393888, "grad_norm": 37.53125, "learning_rate": 9.843886687947641e-06, "loss": 18.413, "step": 53840 }, { "epoch": 0.9993110599238686, "grad_norm": 35.71875, "learning_rate": 9.843857692161599e-06, "loss": 18.8629, "step": 53850 }, { "epoch": 0.9994966330083485, "grad_norm": 35.84375, "learning_rate": 9.843828696375556e-06, "loss": 18.0011, "step": 53860 }, { "epoch": 0.9996822060928283, "grad_norm": 35.1875, "learning_rate": 9.843799700589514e-06, "loss": 18.2267, "step": 53870 }, { "epoch": 0.9998677791773081, "grad_norm": 36.34375, "learning_rate": 9.843770704803471e-06, "loss": 18.813, "step": 53880 } ], "logging_steps": 10, "max_steps": 53887, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.406098491698053e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }