{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4040126536763131, "eval_steps": 500, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.040126536763131e-05, "grad_norm": 34776792.0, "learning_rate": 1.6e-08, "loss": 1112513.0, "step": 10 }, { "epoch": 8.080253073526263e-05, "grad_norm": 8441178.0, "learning_rate": 3.2e-08, "loss": 1301709.2, "step": 20 }, { "epoch": 0.00012120379610289395, "grad_norm": 12542169.0, "learning_rate": 4.8e-08, "loss": 1175913.9, "step": 30 }, { "epoch": 0.00016160506147052525, "grad_norm": 42386256.0, "learning_rate": 6.4e-08, "loss": 1313873.9, "step": 40 }, { "epoch": 0.00020200632683815657, "grad_norm": 33162978.0, "learning_rate": 8e-08, "loss": 1142017.2, "step": 50 }, { "epoch": 0.0002424075922057879, "grad_norm": 52537692.0, "learning_rate": 9.6e-08, "loss": 1040597.1, "step": 60 }, { "epoch": 0.0002828088575734192, "grad_norm": 17375308.0, "learning_rate": 1.1200000000000001e-07, "loss": 1494345.4, "step": 70 }, { "epoch": 0.0003232101229410505, "grad_norm": 95491040.0, "learning_rate": 1.28e-07, "loss": 1404674.8, "step": 80 }, { "epoch": 0.0003636113883086818, "grad_norm": 13118159.0, "learning_rate": 1.4400000000000002e-07, "loss": 1204737.6, "step": 90 }, { "epoch": 0.00040401265367631315, "grad_norm": 18581922.0, "learning_rate": 1.6e-07, "loss": 783122.2, "step": 100 }, { "epoch": 0.00044441391904394446, "grad_norm": 15600268.0, "learning_rate": 1.7600000000000001e-07, "loss": 1342865.9, "step": 110 }, { "epoch": 0.0004848151844115758, "grad_norm": 5757326.5, "learning_rate": 1.92e-07, "loss": 998477.5, "step": 120 }, { "epoch": 0.000525216449779207, "grad_norm": 37603152.0, "learning_rate": 2.08e-07, "loss": 1113327.2, "step": 130 }, { "epoch": 0.0005656177151468384, "grad_norm": 3987340.5, "learning_rate": 2.2400000000000002e-07, "loss": 815339.55, "step": 140 }, { "epoch": 0.0006060189805144697, "grad_norm": 17012036.0, "learning_rate": 2.4000000000000003e-07, "loss": 799237.55, "step": 150 }, { "epoch": 0.000646420245882101, "grad_norm": 15123844.0, "learning_rate": 2.56e-07, "loss": 795830.9, "step": 160 }, { "epoch": 0.0006868215112497323, "grad_norm": 13699358.0, "learning_rate": 2.72e-07, "loss": 675074.05, "step": 170 }, { "epoch": 0.0007272227766173637, "grad_norm": 5762779.5, "learning_rate": 2.8800000000000004e-07, "loss": 397814.875, "step": 180 }, { "epoch": 0.000767624041984995, "grad_norm": 3545711.25, "learning_rate": 3.04e-07, "loss": 278445.225, "step": 190 }, { "epoch": 0.0008080253073526263, "grad_norm": 22483670.0, "learning_rate": 3.2e-07, "loss": 350395.65, "step": 200 }, { "epoch": 0.0008484265727202576, "grad_norm": 3111300.75, "learning_rate": 3.36e-07, "loss": 215067.95, "step": 210 }, { "epoch": 0.0008888278380878889, "grad_norm": 4985113.0, "learning_rate": 3.5200000000000003e-07, "loss": 160062.15, "step": 220 }, { "epoch": 0.0009292291034555202, "grad_norm": 10360142.0, "learning_rate": 3.68e-07, "loss": 177085.5, "step": 230 }, { "epoch": 0.0009696303688231516, "grad_norm": 4212897.0, "learning_rate": 3.84e-07, "loss": 156687.3375, "step": 240 }, { "epoch": 0.0010100316341907828, "grad_norm": 2492394.75, "learning_rate": 4.0000000000000003e-07, "loss": 218777.15, "step": 250 }, { "epoch": 0.001050432899558414, "grad_norm": 1271087.375, "learning_rate": 4.16e-07, "loss": 74811.1875, "step": 260 }, { "epoch": 0.0010908341649260454, "grad_norm": 11904261.0, "learning_rate": 4.3200000000000006e-07, "loss": 78403.6125, "step": 270 }, { "epoch": 0.0011312354302936767, "grad_norm": 1911185.875, "learning_rate": 4.4800000000000004e-07, "loss": 112190.75, "step": 280 }, { "epoch": 0.001171636695661308, "grad_norm": 1619918.75, "learning_rate": 4.64e-07, "loss": 33613.9688, "step": 290 }, { "epoch": 0.0012120379610289394, "grad_norm": 1365019.625, "learning_rate": 4.800000000000001e-07, "loss": 32836.45, "step": 300 }, { "epoch": 0.0012524392263965707, "grad_norm": 1140846.75, "learning_rate": 4.96e-07, "loss": 38892.625, "step": 310 }, { "epoch": 0.001292840491764202, "grad_norm": 950318.8125, "learning_rate": 5.12e-07, "loss": 43860.9844, "step": 320 }, { "epoch": 0.0013332417571318333, "grad_norm": 191924.40625, "learning_rate": 5.280000000000001e-07, "loss": 19675.0312, "step": 330 }, { "epoch": 0.0013736430224994647, "grad_norm": 354602.96875, "learning_rate": 5.44e-07, "loss": 17043.5047, "step": 340 }, { "epoch": 0.001414044287867096, "grad_norm": 64334.99609375, "learning_rate": 5.6e-07, "loss": 6989.507, "step": 350 }, { "epoch": 0.0014544455532347273, "grad_norm": 29138.234375, "learning_rate": 5.760000000000001e-07, "loss": 2483.216, "step": 360 }, { "epoch": 0.0014948468186023586, "grad_norm": 4881.9619140625, "learning_rate": 5.920000000000001e-07, "loss": 4704.4551, "step": 370 }, { "epoch": 0.00153524808396999, "grad_norm": 73014.8671875, "learning_rate": 6.08e-07, "loss": 1492.4164, "step": 380 }, { "epoch": 0.0015756493493376213, "grad_norm": 86664.8203125, "learning_rate": 6.24e-07, "loss": 1407.5383, "step": 390 }, { "epoch": 0.0016160506147052526, "grad_norm": 91074.265625, "learning_rate": 6.4e-07, "loss": 932.89, "step": 400 }, { "epoch": 0.001656451880072884, "grad_norm": 5759.06396484375, "learning_rate": 6.560000000000002e-07, "loss": 856.515, "step": 410 }, { "epoch": 0.0016968531454405152, "grad_norm": 1454.9195556640625, "learning_rate": 6.72e-07, "loss": 419.0969, "step": 420 }, { "epoch": 0.0017372544108081465, "grad_norm": 673.9178466796875, "learning_rate": 6.88e-07, "loss": 621.3434, "step": 430 }, { "epoch": 0.0017776556761757779, "grad_norm": 1015.685546875, "learning_rate": 7.040000000000001e-07, "loss": 498.2878, "step": 440 }, { "epoch": 0.0018180569415434092, "grad_norm": 6716.2080078125, "learning_rate": 7.2e-07, "loss": 556.3153, "step": 450 }, { "epoch": 0.0018584582069110405, "grad_norm": 1468.1954345703125, "learning_rate": 7.36e-07, "loss": 488.196, "step": 460 }, { "epoch": 0.0018988594722786718, "grad_norm": 5355.9794921875, "learning_rate": 7.520000000000001e-07, "loss": 553.4161, "step": 470 }, { "epoch": 0.0019392607376463031, "grad_norm": 1276.7708740234375, "learning_rate": 7.68e-07, "loss": 575.7385, "step": 480 }, { "epoch": 0.0019796620030139342, "grad_norm": 1997.7783203125, "learning_rate": 7.84e-07, "loss": 449.5601, "step": 490 }, { "epoch": 0.0020200632683815656, "grad_norm": 1234.7044677734375, "learning_rate": 8.000000000000001e-07, "loss": 454.7207, "step": 500 }, { "epoch": 0.002060464533749197, "grad_norm": 1192.5169677734375, "learning_rate": 8.160000000000001e-07, "loss": 459.7037, "step": 510 }, { "epoch": 0.002100865799116828, "grad_norm": 1192.2779541015625, "learning_rate": 8.32e-07, "loss": 406.1772, "step": 520 }, { "epoch": 0.0021412670644844595, "grad_norm": 1168.502685546875, "learning_rate": 8.480000000000001e-07, "loss": 520.4339, "step": 530 }, { "epoch": 0.002181668329852091, "grad_norm": 1580.9521484375, "learning_rate": 8.640000000000001e-07, "loss": 282.541, "step": 540 }, { "epoch": 0.002222069595219722, "grad_norm": 12831.9287109375, "learning_rate": 8.8e-07, "loss": 531.3956, "step": 550 }, { "epoch": 0.0022624708605873535, "grad_norm": 6975.4140625, "learning_rate": 8.960000000000001e-07, "loss": 377.4974, "step": 560 }, { "epoch": 0.002302872125954985, "grad_norm": 3642.0546875, "learning_rate": 9.120000000000001e-07, "loss": 325.7552, "step": 570 }, { "epoch": 0.002343273391322616, "grad_norm": 1394.17822265625, "learning_rate": 9.28e-07, "loss": 450.4489, "step": 580 }, { "epoch": 0.0023836746566902474, "grad_norm": 6102.92626953125, "learning_rate": 9.440000000000001e-07, "loss": 581.4113, "step": 590 }, { "epoch": 0.0024240759220578788, "grad_norm": 942.1673583984375, "learning_rate": 9.600000000000001e-07, "loss": 327.3541, "step": 600 }, { "epoch": 0.00246447718742551, "grad_norm": 7506.9111328125, "learning_rate": 9.76e-07, "loss": 562.9349, "step": 610 }, { "epoch": 0.0025048784527931414, "grad_norm": 828.693603515625, "learning_rate": 9.92e-07, "loss": 480.7408, "step": 620 }, { "epoch": 0.0025452797181607727, "grad_norm": 1101.0645751953125, "learning_rate": 1.0080000000000001e-06, "loss": 510.9642, "step": 630 }, { "epoch": 0.002585680983528404, "grad_norm": 4109.21728515625, "learning_rate": 1.024e-06, "loss": 380.4708, "step": 640 }, { "epoch": 0.0026260822488960354, "grad_norm": 1035.9979248046875, "learning_rate": 1.04e-06, "loss": 438.6869, "step": 650 }, { "epoch": 0.0026664835142636667, "grad_norm": 857.4368896484375, "learning_rate": 1.0560000000000001e-06, "loss": 333.5929, "step": 660 }, { "epoch": 0.002706884779631298, "grad_norm": 861.3447875976562, "learning_rate": 1.072e-06, "loss": 369.1007, "step": 670 }, { "epoch": 0.0027472860449989293, "grad_norm": 1937.781005859375, "learning_rate": 1.088e-06, "loss": 467.2207, "step": 680 }, { "epoch": 0.0027876873103665606, "grad_norm": 11980.4130859375, "learning_rate": 1.1040000000000001e-06, "loss": 461.9273, "step": 690 }, { "epoch": 0.002828088575734192, "grad_norm": 1039.8837890625, "learning_rate": 1.12e-06, "loss": 376.5904, "step": 700 }, { "epoch": 0.0028684898411018233, "grad_norm": 2001.8681640625, "learning_rate": 1.1360000000000002e-06, "loss": 397.0597, "step": 710 }, { "epoch": 0.0029088911064694546, "grad_norm": 4143.15087890625, "learning_rate": 1.1520000000000002e-06, "loss": 524.1538, "step": 720 }, { "epoch": 0.002949292371837086, "grad_norm": 1036.8919677734375, "learning_rate": 1.168e-06, "loss": 367.9916, "step": 730 }, { "epoch": 0.0029896936372047172, "grad_norm": 8330.5068359375, "learning_rate": 1.1840000000000002e-06, "loss": 394.5002, "step": 740 }, { "epoch": 0.0030300949025723486, "grad_norm": 2307.091064453125, "learning_rate": 1.2000000000000002e-06, "loss": 295.3556, "step": 750 }, { "epoch": 0.00307049616793998, "grad_norm": 1893.0789794921875, "learning_rate": 1.216e-06, "loss": 265.5479, "step": 760 }, { "epoch": 0.003110897433307611, "grad_norm": 2477.446533203125, "learning_rate": 1.2320000000000002e-06, "loss": 534.005, "step": 770 }, { "epoch": 0.0031512986986752425, "grad_norm": 916.7561645507812, "learning_rate": 1.248e-06, "loss": 334.4991, "step": 780 }, { "epoch": 0.003191699964042874, "grad_norm": 13106.6064453125, "learning_rate": 1.2640000000000003e-06, "loss": 464.4099, "step": 790 }, { "epoch": 0.003232101229410505, "grad_norm": 1100.711669921875, "learning_rate": 1.28e-06, "loss": 302.2139, "step": 800 }, { "epoch": 0.0032725024947781365, "grad_norm": 2172.443603515625, "learning_rate": 1.296e-06, "loss": 389.6274, "step": 810 }, { "epoch": 0.003312903760145768, "grad_norm": 1690.6312255859375, "learning_rate": 1.3120000000000003e-06, "loss": 361.3152, "step": 820 }, { "epoch": 0.003353305025513399, "grad_norm": 924.0753173828125, "learning_rate": 1.328e-06, "loss": 247.9785, "step": 830 }, { "epoch": 0.0033937062908810304, "grad_norm": 1367.6988525390625, "learning_rate": 1.344e-06, "loss": 320.1396, "step": 840 }, { "epoch": 0.0034341075562486618, "grad_norm": 731.0768432617188, "learning_rate": 1.3600000000000001e-06, "loss": 394.588, "step": 850 }, { "epoch": 0.003474508821616293, "grad_norm": 788.498046875, "learning_rate": 1.376e-06, "loss": 430.4721, "step": 860 }, { "epoch": 0.0035149100869839244, "grad_norm": 989.5845336914062, "learning_rate": 1.392e-06, "loss": 343.7414, "step": 870 }, { "epoch": 0.0035553113523515557, "grad_norm": 661.3421020507812, "learning_rate": 1.4080000000000001e-06, "loss": 382.3532, "step": 880 }, { "epoch": 0.003595712617719187, "grad_norm": 831.7238159179688, "learning_rate": 1.424e-06, "loss": 442.5967, "step": 890 }, { "epoch": 0.0036361138830868184, "grad_norm": 1258.17236328125, "learning_rate": 1.44e-06, "loss": 329.9442, "step": 900 }, { "epoch": 0.0036765151484544497, "grad_norm": 1166.2576904296875, "learning_rate": 1.4560000000000001e-06, "loss": 332.621, "step": 910 }, { "epoch": 0.003716916413822081, "grad_norm": 877.9427490234375, "learning_rate": 1.472e-06, "loss": 464.8878, "step": 920 }, { "epoch": 0.0037573176791897123, "grad_norm": 1044.3133544921875, "learning_rate": 1.488e-06, "loss": 292.7577, "step": 930 }, { "epoch": 0.0037977189445573436, "grad_norm": 1089.46728515625, "learning_rate": 1.5040000000000001e-06, "loss": 460.0909, "step": 940 }, { "epoch": 0.003838120209924975, "grad_norm": 797.8550415039062, "learning_rate": 1.52e-06, "loss": 346.0366, "step": 950 }, { "epoch": 0.0038785214752926063, "grad_norm": 798.5408325195312, "learning_rate": 1.536e-06, "loss": 310.8797, "step": 960 }, { "epoch": 0.003918922740660237, "grad_norm": 2262.073974609375, "learning_rate": 1.5520000000000001e-06, "loss": 370.4966, "step": 970 }, { "epoch": 0.0039593240060278685, "grad_norm": 2735.94482421875, "learning_rate": 1.568e-06, "loss": 323.6035, "step": 980 }, { "epoch": 0.0039997252713955, "grad_norm": 646.0745849609375, "learning_rate": 1.5840000000000002e-06, "loss": 339.8437, "step": 990 }, { "epoch": 0.004040126536763131, "grad_norm": 1748.4075927734375, "learning_rate": 1.6000000000000001e-06, "loss": 317.4156, "step": 1000 }, { "epoch": 0.0040805278021307624, "grad_norm": 780.6202392578125, "learning_rate": 1.616e-06, "loss": 296.7254, "step": 1010 }, { "epoch": 0.004120929067498394, "grad_norm": 1253.2996826171875, "learning_rate": 1.6320000000000002e-06, "loss": 384.1156, "step": 1020 }, { "epoch": 0.004161330332866025, "grad_norm": 1055.1898193359375, "learning_rate": 1.6480000000000001e-06, "loss": 397.7781, "step": 1030 }, { "epoch": 0.004201731598233656, "grad_norm": 803.1446533203125, "learning_rate": 1.664e-06, "loss": 359.3899, "step": 1040 }, { "epoch": 0.004242132863601288, "grad_norm": 798.8219604492188, "learning_rate": 1.6800000000000002e-06, "loss": 381.0167, "step": 1050 }, { "epoch": 0.004282534128968919, "grad_norm": 3304.813232421875, "learning_rate": 1.6960000000000002e-06, "loss": 391.1282, "step": 1060 }, { "epoch": 0.00432293539433655, "grad_norm": 2308.608154296875, "learning_rate": 1.712e-06, "loss": 453.3599, "step": 1070 }, { "epoch": 0.004363336659704182, "grad_norm": 744.8621215820312, "learning_rate": 1.7280000000000002e-06, "loss": 306.6811, "step": 1080 }, { "epoch": 0.004403737925071813, "grad_norm": 5821.24267578125, "learning_rate": 1.7440000000000002e-06, "loss": 608.5389, "step": 1090 }, { "epoch": 0.004444139190439444, "grad_norm": 865.3998413085938, "learning_rate": 1.76e-06, "loss": 242.1831, "step": 1100 }, { "epoch": 0.004484540455807076, "grad_norm": 955.5283203125, "learning_rate": 1.7760000000000002e-06, "loss": 406.3821, "step": 1110 }, { "epoch": 0.004524941721174707, "grad_norm": 1257.974853515625, "learning_rate": 1.7920000000000002e-06, "loss": 337.8827, "step": 1120 }, { "epoch": 0.004565342986542338, "grad_norm": 848.3350830078125, "learning_rate": 1.808e-06, "loss": 383.7443, "step": 1130 }, { "epoch": 0.00460574425190997, "grad_norm": 2609.029541015625, "learning_rate": 1.8240000000000002e-06, "loss": 394.2932, "step": 1140 }, { "epoch": 0.004646145517277601, "grad_norm": 1440.8753662109375, "learning_rate": 1.8400000000000002e-06, "loss": 433.8044, "step": 1150 }, { "epoch": 0.004686546782645232, "grad_norm": 1531.273681640625, "learning_rate": 1.856e-06, "loss": 424.2002, "step": 1160 }, { "epoch": 0.0047269480480128636, "grad_norm": 2845.63623046875, "learning_rate": 1.8720000000000002e-06, "loss": 343.2191, "step": 1170 }, { "epoch": 0.004767349313380495, "grad_norm": 679.157470703125, "learning_rate": 1.8880000000000002e-06, "loss": 327.4853, "step": 1180 }, { "epoch": 0.004807750578748126, "grad_norm": 1685.8907470703125, "learning_rate": 1.9040000000000003e-06, "loss": 343.0018, "step": 1190 }, { "epoch": 0.0048481518441157575, "grad_norm": 961.8662719726562, "learning_rate": 1.9200000000000003e-06, "loss": 345.5244, "step": 1200 }, { "epoch": 0.004888553109483389, "grad_norm": 1039.9063720703125, "learning_rate": 1.936e-06, "loss": 286.9379, "step": 1210 }, { "epoch": 0.00492895437485102, "grad_norm": 915.8055419921875, "learning_rate": 1.952e-06, "loss": 291.1856, "step": 1220 }, { "epoch": 0.0049693556402186515, "grad_norm": 2453.09716796875, "learning_rate": 1.968e-06, "loss": 357.5581, "step": 1230 }, { "epoch": 0.005009756905586283, "grad_norm": 1281.601318359375, "learning_rate": 1.984e-06, "loss": 370.0242, "step": 1240 }, { "epoch": 0.005050158170953914, "grad_norm": 1927.247802734375, "learning_rate": 2.0000000000000003e-06, "loss": 420.42, "step": 1250 }, { "epoch": 0.0050905594363215454, "grad_norm": 1159.7850341796875, "learning_rate": 2.0160000000000003e-06, "loss": 381.0742, "step": 1260 }, { "epoch": 0.005130960701689177, "grad_norm": 1102.625244140625, "learning_rate": 2.032e-06, "loss": 441.1049, "step": 1270 }, { "epoch": 0.005171361967056808, "grad_norm": 1439.484130859375, "learning_rate": 2.048e-06, "loss": 277.274, "step": 1280 }, { "epoch": 0.005211763232424439, "grad_norm": 5610.380859375, "learning_rate": 2.064e-06, "loss": 421.7488, "step": 1290 }, { "epoch": 0.005252164497792071, "grad_norm": 982.2139892578125, "learning_rate": 2.08e-06, "loss": 340.8586, "step": 1300 }, { "epoch": 0.005292565763159702, "grad_norm": 791.9103393554688, "learning_rate": 2.0960000000000003e-06, "loss": 363.6415, "step": 1310 }, { "epoch": 0.005332967028527333, "grad_norm": 0.0, "learning_rate": 2.1120000000000003e-06, "loss": 443.9375, "step": 1320 }, { "epoch": 0.005373368293894965, "grad_norm": 1269.4005126953125, "learning_rate": 2.128e-06, "loss": 352.3543, "step": 1330 }, { "epoch": 0.005413769559262596, "grad_norm": 1225.1302490234375, "learning_rate": 2.144e-06, "loss": 357.3038, "step": 1340 }, { "epoch": 0.005454170824630227, "grad_norm": 827.63720703125, "learning_rate": 2.16e-06, "loss": 303.4535, "step": 1350 }, { "epoch": 0.005494572089997859, "grad_norm": 848.7166137695312, "learning_rate": 2.176e-06, "loss": 268.09, "step": 1360 }, { "epoch": 0.00553497335536549, "grad_norm": 1410.2564697265625, "learning_rate": 2.1920000000000004e-06, "loss": 389.759, "step": 1370 }, { "epoch": 0.005575374620733121, "grad_norm": 1602.010009765625, "learning_rate": 2.2080000000000003e-06, "loss": 412.8606, "step": 1380 }, { "epoch": 0.005615775886100753, "grad_norm": 1337.7384033203125, "learning_rate": 2.2240000000000002e-06, "loss": 428.547, "step": 1390 }, { "epoch": 0.005656177151468384, "grad_norm": 867.4197387695312, "learning_rate": 2.24e-06, "loss": 378.6157, "step": 1400 }, { "epoch": 0.005696578416836015, "grad_norm": 731.271728515625, "learning_rate": 2.256e-06, "loss": 374.0744, "step": 1410 }, { "epoch": 0.0057369796822036466, "grad_norm": 2343.665771484375, "learning_rate": 2.2720000000000004e-06, "loss": 379.0267, "step": 1420 }, { "epoch": 0.005777380947571278, "grad_norm": 1540.48388671875, "learning_rate": 2.2880000000000004e-06, "loss": 382.6617, "step": 1430 }, { "epoch": 0.005817782212938909, "grad_norm": 1533.5958251953125, "learning_rate": 2.3040000000000003e-06, "loss": 396.0985, "step": 1440 }, { "epoch": 0.0058581834783065405, "grad_norm": 723.0481567382812, "learning_rate": 2.3200000000000002e-06, "loss": 308.0563, "step": 1450 }, { "epoch": 0.005898584743674172, "grad_norm": 2209.6083984375, "learning_rate": 2.336e-06, "loss": 424.7519, "step": 1460 }, { "epoch": 0.005938986009041803, "grad_norm": 566.4544067382812, "learning_rate": 2.352e-06, "loss": 248.9047, "step": 1470 }, { "epoch": 0.0059793872744094345, "grad_norm": 868.7781982421875, "learning_rate": 2.3680000000000005e-06, "loss": 257.8537, "step": 1480 }, { "epoch": 0.006019788539777066, "grad_norm": 732.210205078125, "learning_rate": 2.3840000000000004e-06, "loss": 304.146, "step": 1490 }, { "epoch": 0.006060189805144697, "grad_norm": 946.9075317382812, "learning_rate": 2.4000000000000003e-06, "loss": 239.7318, "step": 1500 }, { "epoch": 0.0061005910705123284, "grad_norm": 13990.4541015625, "learning_rate": 2.4160000000000002e-06, "loss": 317.3043, "step": 1510 }, { "epoch": 0.00614099233587996, "grad_norm": 1053.9326171875, "learning_rate": 2.432e-06, "loss": 306.3782, "step": 1520 }, { "epoch": 0.006181393601247591, "grad_norm": 1479.0665283203125, "learning_rate": 2.448e-06, "loss": 321.1809, "step": 1530 }, { "epoch": 0.006221794866615222, "grad_norm": 0.0, "learning_rate": 2.4640000000000005e-06, "loss": 272.3122, "step": 1540 }, { "epoch": 0.006262196131982854, "grad_norm": 836.43994140625, "learning_rate": 2.4800000000000004e-06, "loss": 276.743, "step": 1550 }, { "epoch": 0.006302597397350485, "grad_norm": 1576.191650390625, "learning_rate": 2.496e-06, "loss": 293.9756, "step": 1560 }, { "epoch": 0.006342998662718116, "grad_norm": 1241.993408203125, "learning_rate": 2.512e-06, "loss": 324.3042, "step": 1570 }, { "epoch": 0.006383399928085748, "grad_norm": 3129.779052734375, "learning_rate": 2.5280000000000006e-06, "loss": 282.3287, "step": 1580 }, { "epoch": 0.006423801193453379, "grad_norm": 824.8917846679688, "learning_rate": 2.5440000000000005e-06, "loss": 332.7758, "step": 1590 }, { "epoch": 0.00646420245882101, "grad_norm": 1429.83154296875, "learning_rate": 2.56e-06, "loss": 341.1543, "step": 1600 }, { "epoch": 0.006504603724188642, "grad_norm": 1096.0162353515625, "learning_rate": 2.576e-06, "loss": 370.956, "step": 1610 }, { "epoch": 0.006545004989556273, "grad_norm": 2190.672607421875, "learning_rate": 2.592e-06, "loss": 393.7615, "step": 1620 }, { "epoch": 0.006585406254923904, "grad_norm": 659.6456909179688, "learning_rate": 2.608e-06, "loss": 371.8131, "step": 1630 }, { "epoch": 0.006625807520291536, "grad_norm": 1114.4139404296875, "learning_rate": 2.6240000000000006e-06, "loss": 387.3432, "step": 1640 }, { "epoch": 0.006666208785659167, "grad_norm": 1945.1435546875, "learning_rate": 2.64e-06, "loss": 419.9613, "step": 1650 }, { "epoch": 0.006706610051026798, "grad_norm": 1134.198486328125, "learning_rate": 2.656e-06, "loss": 261.7552, "step": 1660 }, { "epoch": 0.0067470113163944296, "grad_norm": 625.1832885742188, "learning_rate": 2.672e-06, "loss": 348.9073, "step": 1670 }, { "epoch": 0.006787412581762061, "grad_norm": 1558.600830078125, "learning_rate": 2.688e-06, "loss": 329.5783, "step": 1680 }, { "epoch": 0.006827813847129692, "grad_norm": 903.2876586914062, "learning_rate": 2.704e-06, "loss": 288.3402, "step": 1690 }, { "epoch": 0.0068682151124973235, "grad_norm": 4166.87158203125, "learning_rate": 2.7200000000000002e-06, "loss": 357.7317, "step": 1700 }, { "epoch": 0.006908616377864955, "grad_norm": 784.2251586914062, "learning_rate": 2.736e-06, "loss": 318.5063, "step": 1710 }, { "epoch": 0.006949017643232586, "grad_norm": 5029.16552734375, "learning_rate": 2.752e-06, "loss": 442.4195, "step": 1720 }, { "epoch": 0.0069894189086002175, "grad_norm": 781.5169677734375, "learning_rate": 2.768e-06, "loss": 306.0392, "step": 1730 }, { "epoch": 0.007029820173967849, "grad_norm": 1048.294921875, "learning_rate": 2.784e-06, "loss": 359.3845, "step": 1740 }, { "epoch": 0.00707022143933548, "grad_norm": 1155.3671875, "learning_rate": 2.8000000000000003e-06, "loss": 359.3311, "step": 1750 }, { "epoch": 0.0071106227047031114, "grad_norm": 857.3798217773438, "learning_rate": 2.8160000000000002e-06, "loss": 391.5449, "step": 1760 }, { "epoch": 0.007151023970070743, "grad_norm": 933.0081787109375, "learning_rate": 2.832e-06, "loss": 297.32, "step": 1770 }, { "epoch": 0.007191425235438374, "grad_norm": 1060.47314453125, "learning_rate": 2.848e-06, "loss": 411.9166, "step": 1780 }, { "epoch": 0.007231826500806005, "grad_norm": 707.4050903320312, "learning_rate": 2.864e-06, "loss": 238.9362, "step": 1790 }, { "epoch": 0.007272227766173637, "grad_norm": 526.6619873046875, "learning_rate": 2.88e-06, "loss": 290.1179, "step": 1800 }, { "epoch": 0.007312629031541268, "grad_norm": 829.2198486328125, "learning_rate": 2.8960000000000003e-06, "loss": 347.0842, "step": 1810 }, { "epoch": 0.007353030296908899, "grad_norm": 716.2144165039062, "learning_rate": 2.9120000000000002e-06, "loss": 295.0978, "step": 1820 }, { "epoch": 0.007393431562276531, "grad_norm": 1365.7164306640625, "learning_rate": 2.928e-06, "loss": 399.1327, "step": 1830 }, { "epoch": 0.007433832827644162, "grad_norm": 1474.3541259765625, "learning_rate": 2.944e-06, "loss": 249.1963, "step": 1840 }, { "epoch": 0.007474234093011793, "grad_norm": 958.6668090820312, "learning_rate": 2.96e-06, "loss": 405.7022, "step": 1850 }, { "epoch": 0.007514635358379425, "grad_norm": 928.7461547851562, "learning_rate": 2.976e-06, "loss": 392.5513, "step": 1860 }, { "epoch": 0.007555036623747056, "grad_norm": 0.0, "learning_rate": 2.9920000000000003e-06, "loss": 235.2757, "step": 1870 }, { "epoch": 0.007595437889114687, "grad_norm": 13806.5771484375, "learning_rate": 3.0080000000000003e-06, "loss": 338.4041, "step": 1880 }, { "epoch": 0.007635839154482319, "grad_norm": 2829.76513671875, "learning_rate": 3.024e-06, "loss": 365.0503, "step": 1890 }, { "epoch": 0.00767624041984995, "grad_norm": 1206.556640625, "learning_rate": 3.04e-06, "loss": 372.7524, "step": 1900 }, { "epoch": 0.007716641685217581, "grad_norm": 1795.8287353515625, "learning_rate": 3.056e-06, "loss": 355.855, "step": 1910 }, { "epoch": 0.0077570429505852126, "grad_norm": 733.5438232421875, "learning_rate": 3.072e-06, "loss": 369.4895, "step": 1920 }, { "epoch": 0.007797444215952844, "grad_norm": 1936.3282470703125, "learning_rate": 3.0880000000000003e-06, "loss": 331.1103, "step": 1930 }, { "epoch": 0.007837845481320474, "grad_norm": 2228.966552734375, "learning_rate": 3.1040000000000003e-06, "loss": 441.473, "step": 1940 }, { "epoch": 0.007878246746688106, "grad_norm": 1527.3521728515625, "learning_rate": 3.12e-06, "loss": 286.5484, "step": 1950 }, { "epoch": 0.007918648012055737, "grad_norm": 1155.413330078125, "learning_rate": 3.136e-06, "loss": 325.0518, "step": 1960 }, { "epoch": 0.007959049277423368, "grad_norm": 5305.16845703125, "learning_rate": 3.152e-06, "loss": 426.8875, "step": 1970 }, { "epoch": 0.007999450542791, "grad_norm": 497.835693359375, "learning_rate": 3.1680000000000004e-06, "loss": 305.8856, "step": 1980 }, { "epoch": 0.008039851808158631, "grad_norm": 950.1622924804688, "learning_rate": 3.1840000000000003e-06, "loss": 298.3484, "step": 1990 }, { "epoch": 0.008080253073526262, "grad_norm": 977.10546875, "learning_rate": 3.2000000000000003e-06, "loss": 385.7309, "step": 2000 }, { "epoch": 0.008120654338893894, "grad_norm": 1347.1170654296875, "learning_rate": 3.216e-06, "loss": 311.8849, "step": 2010 }, { "epoch": 0.008161055604261525, "grad_norm": 7115.32470703125, "learning_rate": 3.232e-06, "loss": 310.0182, "step": 2020 }, { "epoch": 0.008201456869629156, "grad_norm": 979.557861328125, "learning_rate": 3.248e-06, "loss": 378.8968, "step": 2030 }, { "epoch": 0.008241858134996788, "grad_norm": 713.5515747070312, "learning_rate": 3.2640000000000004e-06, "loss": 333.8391, "step": 2040 }, { "epoch": 0.008282259400364419, "grad_norm": 1746.4747314453125, "learning_rate": 3.2800000000000004e-06, "loss": 211.6272, "step": 2050 }, { "epoch": 0.00832266066573205, "grad_norm": 3959.4423828125, "learning_rate": 3.2960000000000003e-06, "loss": 418.0941, "step": 2060 }, { "epoch": 0.008363061931099681, "grad_norm": 1449.259765625, "learning_rate": 3.3120000000000002e-06, "loss": 366.1021, "step": 2070 }, { "epoch": 0.008403463196467313, "grad_norm": 686.3818359375, "learning_rate": 3.328e-06, "loss": 317.5829, "step": 2080 }, { "epoch": 0.008443864461834944, "grad_norm": 1431.3048095703125, "learning_rate": 3.344e-06, "loss": 341.1351, "step": 2090 }, { "epoch": 0.008484265727202575, "grad_norm": 1141.1195068359375, "learning_rate": 3.3600000000000004e-06, "loss": 284.9502, "step": 2100 }, { "epoch": 0.008524666992570207, "grad_norm": 583.6392211914062, "learning_rate": 3.3760000000000004e-06, "loss": 246.5204, "step": 2110 }, { "epoch": 0.008565068257937838, "grad_norm": 1383.4288330078125, "learning_rate": 3.3920000000000003e-06, "loss": 335.3397, "step": 2120 }, { "epoch": 0.00860546952330547, "grad_norm": 720.5582275390625, "learning_rate": 3.4080000000000002e-06, "loss": 300.3475, "step": 2130 }, { "epoch": 0.0086458707886731, "grad_norm": 1018.2730712890625, "learning_rate": 3.424e-06, "loss": 364.7046, "step": 2140 }, { "epoch": 0.008686272054040732, "grad_norm": 836.5111694335938, "learning_rate": 3.44e-06, "loss": 215.2347, "step": 2150 }, { "epoch": 0.008726673319408363, "grad_norm": 1106.7752685546875, "learning_rate": 3.4560000000000005e-06, "loss": 392.8156, "step": 2160 }, { "epoch": 0.008767074584775995, "grad_norm": 838.0346069335938, "learning_rate": 3.4720000000000004e-06, "loss": 328.1276, "step": 2170 }, { "epoch": 0.008807475850143626, "grad_norm": 870.4414672851562, "learning_rate": 3.4880000000000003e-06, "loss": 313.8927, "step": 2180 }, { "epoch": 0.008847877115511257, "grad_norm": 2859.623046875, "learning_rate": 3.5040000000000002e-06, "loss": 241.9949, "step": 2190 }, { "epoch": 0.008888278380878889, "grad_norm": 971.9884033203125, "learning_rate": 3.52e-06, "loss": 349.5033, "step": 2200 }, { "epoch": 0.00892867964624652, "grad_norm": 670.4691772460938, "learning_rate": 3.5360000000000005e-06, "loss": 239.4784, "step": 2210 }, { "epoch": 0.008969080911614151, "grad_norm": 784.5670166015625, "learning_rate": 3.5520000000000005e-06, "loss": 310.9449, "step": 2220 }, { "epoch": 0.009009482176981783, "grad_norm": 915.0491333007812, "learning_rate": 3.5680000000000004e-06, "loss": 413.7835, "step": 2230 }, { "epoch": 0.009049883442349414, "grad_norm": 893.2843627929688, "learning_rate": 3.5840000000000003e-06, "loss": 365.2546, "step": 2240 }, { "epoch": 0.009090284707717045, "grad_norm": 1068.404296875, "learning_rate": 3.6000000000000003e-06, "loss": 302.3378, "step": 2250 }, { "epoch": 0.009130685973084677, "grad_norm": 1454.05859375, "learning_rate": 3.616e-06, "loss": 285.7942, "step": 2260 }, { "epoch": 0.009171087238452308, "grad_norm": 3212.861083984375, "learning_rate": 3.6320000000000005e-06, "loss": 296.5446, "step": 2270 }, { "epoch": 0.00921148850381994, "grad_norm": 700.9654541015625, "learning_rate": 3.6480000000000005e-06, "loss": 277.7706, "step": 2280 }, { "epoch": 0.00925188976918757, "grad_norm": 601.0996704101562, "learning_rate": 3.6640000000000004e-06, "loss": 299.2936, "step": 2290 }, { "epoch": 0.009292291034555202, "grad_norm": 1073.6248779296875, "learning_rate": 3.6800000000000003e-06, "loss": 227.1634, "step": 2300 }, { "epoch": 0.009332692299922833, "grad_norm": 803.8406372070312, "learning_rate": 3.6960000000000003e-06, "loss": 344.3006, "step": 2310 }, { "epoch": 0.009373093565290464, "grad_norm": 940.9983520507812, "learning_rate": 3.712e-06, "loss": 266.4591, "step": 2320 }, { "epoch": 0.009413494830658096, "grad_norm": 13408.8291015625, "learning_rate": 3.7280000000000006e-06, "loss": 295.9835, "step": 2330 }, { "epoch": 0.009453896096025727, "grad_norm": 1009.2761840820312, "learning_rate": 3.7440000000000005e-06, "loss": 292.3968, "step": 2340 }, { "epoch": 0.009494297361393358, "grad_norm": 556.4901733398438, "learning_rate": 3.7600000000000004e-06, "loss": 262.7075, "step": 2350 }, { "epoch": 0.00953469862676099, "grad_norm": 913.5816040039062, "learning_rate": 3.7760000000000004e-06, "loss": 508.4715, "step": 2360 }, { "epoch": 0.009575099892128621, "grad_norm": 1582.836181640625, "learning_rate": 3.7920000000000003e-06, "loss": 321.3247, "step": 2370 }, { "epoch": 0.009615501157496252, "grad_norm": 1292.945068359375, "learning_rate": 3.8080000000000006e-06, "loss": 266.3162, "step": 2380 }, { "epoch": 0.009655902422863884, "grad_norm": 741.0372314453125, "learning_rate": 3.824e-06, "loss": 333.5866, "step": 2390 }, { "epoch": 0.009696303688231515, "grad_norm": 594.6494750976562, "learning_rate": 3.8400000000000005e-06, "loss": 370.6466, "step": 2400 }, { "epoch": 0.009736704953599146, "grad_norm": 671.8253173828125, "learning_rate": 3.856e-06, "loss": 176.7593, "step": 2410 }, { "epoch": 0.009777106218966778, "grad_norm": 1318.59912109375, "learning_rate": 3.872e-06, "loss": 296.8816, "step": 2420 }, { "epoch": 0.009817507484334409, "grad_norm": 4490.33837890625, "learning_rate": 3.888e-06, "loss": 353.2209, "step": 2430 }, { "epoch": 0.00985790874970204, "grad_norm": 1145.6590576171875, "learning_rate": 3.904e-06, "loss": 232.9775, "step": 2440 }, { "epoch": 0.009898310015069672, "grad_norm": 669.175537109375, "learning_rate": 3.920000000000001e-06, "loss": 245.2131, "step": 2450 }, { "epoch": 0.009938711280437303, "grad_norm": 945.0543212890625, "learning_rate": 3.936e-06, "loss": 189.2121, "step": 2460 }, { "epoch": 0.009979112545804934, "grad_norm": 871.2230224609375, "learning_rate": 3.9520000000000004e-06, "loss": 354.6668, "step": 2470 }, { "epoch": 0.010019513811172566, "grad_norm": 1143.5745849609375, "learning_rate": 3.968e-06, "loss": 309.6017, "step": 2480 }, { "epoch": 0.010059915076540197, "grad_norm": 921.1688842773438, "learning_rate": 3.984e-06, "loss": 393.8875, "step": 2490 }, { "epoch": 0.010100316341907828, "grad_norm": 670.0267333984375, "learning_rate": 4.000000000000001e-06, "loss": 187.9601, "step": 2500 }, { "epoch": 0.01014071760727546, "grad_norm": 906.67626953125, "learning_rate": 4.016e-06, "loss": 263.1041, "step": 2510 }, { "epoch": 0.010181118872643091, "grad_norm": 501.8714599609375, "learning_rate": 4.0320000000000005e-06, "loss": 249.8009, "step": 2520 }, { "epoch": 0.010221520138010722, "grad_norm": 1012.7957153320312, "learning_rate": 4.048e-06, "loss": 215.6589, "step": 2530 }, { "epoch": 0.010261921403378354, "grad_norm": 1067.8087158203125, "learning_rate": 4.064e-06, "loss": 277.4538, "step": 2540 }, { "epoch": 0.010302322668745985, "grad_norm": 821.8876342773438, "learning_rate": 4.08e-06, "loss": 357.288, "step": 2550 }, { "epoch": 0.010342723934113616, "grad_norm": 695.7543334960938, "learning_rate": 4.096e-06, "loss": 192.3841, "step": 2560 }, { "epoch": 0.010383125199481247, "grad_norm": 2217.32080078125, "learning_rate": 4.112000000000001e-06, "loss": 311.4946, "step": 2570 }, { "epoch": 0.010423526464848879, "grad_norm": 1252.262451171875, "learning_rate": 4.128e-06, "loss": 299.7565, "step": 2580 }, { "epoch": 0.01046392773021651, "grad_norm": 1227.412109375, "learning_rate": 4.1440000000000005e-06, "loss": 258.2722, "step": 2590 }, { "epoch": 0.010504328995584141, "grad_norm": 1460.778076171875, "learning_rate": 4.16e-06, "loss": 310.2179, "step": 2600 }, { "epoch": 0.010544730260951773, "grad_norm": 3191.095947265625, "learning_rate": 4.176e-06, "loss": 276.1908, "step": 2610 }, { "epoch": 0.010585131526319404, "grad_norm": 841.0155029296875, "learning_rate": 4.192000000000001e-06, "loss": 409.2921, "step": 2620 }, { "epoch": 0.010625532791687035, "grad_norm": 979.8448486328125, "learning_rate": 4.208e-06, "loss": 370.0784, "step": 2630 }, { "epoch": 0.010665934057054667, "grad_norm": 779.2421264648438, "learning_rate": 4.2240000000000006e-06, "loss": 327.6988, "step": 2640 }, { "epoch": 0.010706335322422298, "grad_norm": 1958.304443359375, "learning_rate": 4.24e-06, "loss": 270.3837, "step": 2650 }, { "epoch": 0.01074673658778993, "grad_norm": 612.7789916992188, "learning_rate": 4.256e-06, "loss": 208.5096, "step": 2660 }, { "epoch": 0.01078713785315756, "grad_norm": 913.646484375, "learning_rate": 4.272000000000001e-06, "loss": 326.1478, "step": 2670 }, { "epoch": 0.010827539118525192, "grad_norm": 609.9149780273438, "learning_rate": 4.288e-06, "loss": 323.9391, "step": 2680 }, { "epoch": 0.010867940383892823, "grad_norm": 4146.9677734375, "learning_rate": 4.304000000000001e-06, "loss": 331.8204, "step": 2690 }, { "epoch": 0.010908341649260455, "grad_norm": 1144.696044921875, "learning_rate": 4.32e-06, "loss": 344.9819, "step": 2700 }, { "epoch": 0.010948742914628086, "grad_norm": 1466.951904296875, "learning_rate": 4.3360000000000005e-06, "loss": 364.1662, "step": 2710 }, { "epoch": 0.010989144179995717, "grad_norm": 955.5458984375, "learning_rate": 4.352e-06, "loss": 284.9516, "step": 2720 }, { "epoch": 0.011029545445363349, "grad_norm": 591.5704345703125, "learning_rate": 4.368e-06, "loss": 279.7713, "step": 2730 }, { "epoch": 0.01106994671073098, "grad_norm": 1922.548095703125, "learning_rate": 4.384000000000001e-06, "loss": 269.3951, "step": 2740 }, { "epoch": 0.011110347976098611, "grad_norm": 705.4920654296875, "learning_rate": 4.4e-06, "loss": 202.7344, "step": 2750 }, { "epoch": 0.011150749241466243, "grad_norm": 6034.74951171875, "learning_rate": 4.416000000000001e-06, "loss": 319.9652, "step": 2760 }, { "epoch": 0.011191150506833874, "grad_norm": 1474.4591064453125, "learning_rate": 4.432e-06, "loss": 239.9669, "step": 2770 }, { "epoch": 0.011231551772201505, "grad_norm": 998.685302734375, "learning_rate": 4.4480000000000004e-06, "loss": 254.0082, "step": 2780 }, { "epoch": 0.011271953037569137, "grad_norm": 1730.3958740234375, "learning_rate": 4.464000000000001e-06, "loss": 285.7142, "step": 2790 }, { "epoch": 0.011312354302936768, "grad_norm": 666.0034790039062, "learning_rate": 4.48e-06, "loss": 243.7346, "step": 2800 }, { "epoch": 0.0113527555683044, "grad_norm": 1185.6009521484375, "learning_rate": 4.496000000000001e-06, "loss": 243.6461, "step": 2810 }, { "epoch": 0.01139315683367203, "grad_norm": 902.0358276367188, "learning_rate": 4.512e-06, "loss": 193.6116, "step": 2820 }, { "epoch": 0.011433558099039662, "grad_norm": 1310.3072509765625, "learning_rate": 4.5280000000000005e-06, "loss": 327.7516, "step": 2830 }, { "epoch": 0.011473959364407293, "grad_norm": 693.1176147460938, "learning_rate": 4.544000000000001e-06, "loss": 239.3185, "step": 2840 }, { "epoch": 0.011514360629774924, "grad_norm": 968.5886840820312, "learning_rate": 4.56e-06, "loss": 287.3573, "step": 2850 }, { "epoch": 0.011554761895142556, "grad_norm": 705.2077026367188, "learning_rate": 4.576000000000001e-06, "loss": 282.05, "step": 2860 }, { "epoch": 0.011595163160510187, "grad_norm": 1975.849365234375, "learning_rate": 4.592e-06, "loss": 296.7342, "step": 2870 }, { "epoch": 0.011635564425877818, "grad_norm": 746.0858764648438, "learning_rate": 4.608000000000001e-06, "loss": 204.9099, "step": 2880 }, { "epoch": 0.01167596569124545, "grad_norm": 1130.648681640625, "learning_rate": 4.624e-06, "loss": 342.3003, "step": 2890 }, { "epoch": 0.011716366956613081, "grad_norm": 1225.900634765625, "learning_rate": 4.6400000000000005e-06, "loss": 302.3731, "step": 2900 }, { "epoch": 0.011756768221980712, "grad_norm": 965.9699096679688, "learning_rate": 4.656000000000001e-06, "loss": 403.5073, "step": 2910 }, { "epoch": 0.011797169487348344, "grad_norm": 700.615966796875, "learning_rate": 4.672e-06, "loss": 220.3576, "step": 2920 }, { "epoch": 0.011837570752715975, "grad_norm": 744.499267578125, "learning_rate": 4.688000000000001e-06, "loss": 227.1823, "step": 2930 }, { "epoch": 0.011877972018083606, "grad_norm": 588.2945556640625, "learning_rate": 4.704e-06, "loss": 294.2805, "step": 2940 }, { "epoch": 0.011918373283451238, "grad_norm": 8179.02490234375, "learning_rate": 4.7200000000000005e-06, "loss": 360.6105, "step": 2950 }, { "epoch": 0.011958774548818869, "grad_norm": 4027.3515625, "learning_rate": 4.736000000000001e-06, "loss": 286.0583, "step": 2960 }, { "epoch": 0.0119991758141865, "grad_norm": 8620.037109375, "learning_rate": 4.752e-06, "loss": 338.6114, "step": 2970 }, { "epoch": 0.012039577079554132, "grad_norm": 1030.6036376953125, "learning_rate": 4.768000000000001e-06, "loss": 293.7208, "step": 2980 }, { "epoch": 0.012079978344921763, "grad_norm": 2113.48095703125, "learning_rate": 4.784e-06, "loss": 338.7042, "step": 2990 }, { "epoch": 0.012120379610289394, "grad_norm": 664.2764892578125, "learning_rate": 4.800000000000001e-06, "loss": 180.6681, "step": 3000 }, { "epoch": 0.012160780875657026, "grad_norm": 2052.64111328125, "learning_rate": 4.816e-06, "loss": 269.5311, "step": 3010 }, { "epoch": 0.012201182141024657, "grad_norm": 646.8975219726562, "learning_rate": 4.8320000000000005e-06, "loss": 199.238, "step": 3020 }, { "epoch": 0.012241583406392288, "grad_norm": 1087.690673828125, "learning_rate": 4.848000000000001e-06, "loss": 300.9158, "step": 3030 }, { "epoch": 0.01228198467175992, "grad_norm": 1012.8238525390625, "learning_rate": 4.864e-06, "loss": 405.1533, "step": 3040 }, { "epoch": 0.01232238593712755, "grad_norm": 1519.6015625, "learning_rate": 4.880000000000001e-06, "loss": 340.4835, "step": 3050 }, { "epoch": 0.012362787202495182, "grad_norm": 787.8037109375, "learning_rate": 4.896e-06, "loss": 249.336, "step": 3060 }, { "epoch": 0.012403188467862813, "grad_norm": 1030.4306640625, "learning_rate": 4.9120000000000006e-06, "loss": 248.3195, "step": 3070 }, { "epoch": 0.012443589733230445, "grad_norm": 1620.253173828125, "learning_rate": 4.928000000000001e-06, "loss": 210.1467, "step": 3080 }, { "epoch": 0.012483990998598076, "grad_norm": 2037.361328125, "learning_rate": 4.9440000000000004e-06, "loss": 331.2714, "step": 3090 }, { "epoch": 0.012524392263965707, "grad_norm": 4152.18701171875, "learning_rate": 4.960000000000001e-06, "loss": 369.5478, "step": 3100 }, { "epoch": 0.012564793529333339, "grad_norm": 751.2320556640625, "learning_rate": 4.976e-06, "loss": 339.7245, "step": 3110 }, { "epoch": 0.01260519479470097, "grad_norm": 1463.79833984375, "learning_rate": 4.992e-06, "loss": 349.4508, "step": 3120 }, { "epoch": 0.012645596060068601, "grad_norm": 931.794189453125, "learning_rate": 5.008000000000001e-06, "loss": 219.2512, "step": 3130 }, { "epoch": 0.012685997325436233, "grad_norm": 587.0802001953125, "learning_rate": 5.024e-06, "loss": 249.6072, "step": 3140 }, { "epoch": 0.012726398590803864, "grad_norm": 4629.9833984375, "learning_rate": 5.04e-06, "loss": 274.248, "step": 3150 }, { "epoch": 0.012766799856171495, "grad_norm": 1912.8997802734375, "learning_rate": 5.056000000000001e-06, "loss": 265.1541, "step": 3160 }, { "epoch": 0.012807201121539127, "grad_norm": 714.0695190429688, "learning_rate": 5.072e-06, "loss": 189.4337, "step": 3170 }, { "epoch": 0.012847602386906758, "grad_norm": 1278.3006591796875, "learning_rate": 5.088000000000001e-06, "loss": 368.549, "step": 3180 }, { "epoch": 0.01288800365227439, "grad_norm": 749.0835571289062, "learning_rate": 5.104e-06, "loss": 228.1246, "step": 3190 }, { "epoch": 0.01292840491764202, "grad_norm": 0.0, "learning_rate": 5.12e-06, "loss": 227.3482, "step": 3200 }, { "epoch": 0.012968806183009652, "grad_norm": 888.6599731445312, "learning_rate": 5.136e-06, "loss": 310.4809, "step": 3210 }, { "epoch": 0.013009207448377283, "grad_norm": 760.9104614257812, "learning_rate": 5.152e-06, "loss": 229.1965, "step": 3220 }, { "epoch": 0.013049608713744915, "grad_norm": 1096.23291015625, "learning_rate": 5.168000000000001e-06, "loss": 325.4543, "step": 3230 }, { "epoch": 0.013090009979112546, "grad_norm": 2388.175537109375, "learning_rate": 5.184e-06, "loss": 270.4536, "step": 3240 }, { "epoch": 0.013130411244480177, "grad_norm": 7733.79296875, "learning_rate": 5.2e-06, "loss": 364.0487, "step": 3250 }, { "epoch": 0.013170812509847809, "grad_norm": 1177.9654541015625, "learning_rate": 5.216e-06, "loss": 272.8813, "step": 3260 }, { "epoch": 0.01321121377521544, "grad_norm": 1095.551025390625, "learning_rate": 5.232e-06, "loss": 356.0726, "step": 3270 }, { "epoch": 0.013251615040583071, "grad_norm": 1486.8251953125, "learning_rate": 5.248000000000001e-06, "loss": 269.6024, "step": 3280 }, { "epoch": 0.013292016305950703, "grad_norm": 1649.369384765625, "learning_rate": 5.264e-06, "loss": 406.2197, "step": 3290 }, { "epoch": 0.013332417571318334, "grad_norm": 901.133544921875, "learning_rate": 5.28e-06, "loss": 334.5719, "step": 3300 }, { "epoch": 0.013372818836685965, "grad_norm": 1080.5753173828125, "learning_rate": 5.296e-06, "loss": 256.0706, "step": 3310 }, { "epoch": 0.013413220102053596, "grad_norm": 903.6046752929688, "learning_rate": 5.312e-06, "loss": 229.897, "step": 3320 }, { "epoch": 0.013453621367421228, "grad_norm": 984.148193359375, "learning_rate": 5.328000000000001e-06, "loss": 268.5443, "step": 3330 }, { "epoch": 0.013494022632788859, "grad_norm": 1167.3309326171875, "learning_rate": 5.344e-06, "loss": 339.0956, "step": 3340 }, { "epoch": 0.01353442389815649, "grad_norm": 2445.7763671875, "learning_rate": 5.36e-06, "loss": 237.0434, "step": 3350 }, { "epoch": 0.013574825163524122, "grad_norm": 814.3775634765625, "learning_rate": 5.376e-06, "loss": 182.9125, "step": 3360 }, { "epoch": 0.013615226428891753, "grad_norm": 883.6320190429688, "learning_rate": 5.392e-06, "loss": 346.1443, "step": 3370 }, { "epoch": 0.013655627694259384, "grad_norm": 788.3290405273438, "learning_rate": 5.408e-06, "loss": 143.6525, "step": 3380 }, { "epoch": 0.013696028959627016, "grad_norm": 1109.12744140625, "learning_rate": 5.424e-06, "loss": 293.3582, "step": 3390 }, { "epoch": 0.013736430224994647, "grad_norm": 968.6015014648438, "learning_rate": 5.4400000000000004e-06, "loss": 228.7989, "step": 3400 }, { "epoch": 0.013776831490362278, "grad_norm": 1213.0040283203125, "learning_rate": 5.456e-06, "loss": 432.3414, "step": 3410 }, { "epoch": 0.01381723275572991, "grad_norm": 1326.287109375, "learning_rate": 5.472e-06, "loss": 332.6496, "step": 3420 }, { "epoch": 0.013857634021097541, "grad_norm": 6259.54248046875, "learning_rate": 5.488e-06, "loss": 271.7205, "step": 3430 }, { "epoch": 0.013898035286465172, "grad_norm": 708.2656860351562, "learning_rate": 5.504e-06, "loss": 180.486, "step": 3440 }, { "epoch": 0.013938436551832804, "grad_norm": 685.2205200195312, "learning_rate": 5.5200000000000005e-06, "loss": 268.5384, "step": 3450 }, { "epoch": 0.013978837817200435, "grad_norm": 1125.2498779296875, "learning_rate": 5.536e-06, "loss": 288.8181, "step": 3460 }, { "epoch": 0.014019239082568066, "grad_norm": 1228.8125, "learning_rate": 5.552e-06, "loss": 244.6971, "step": 3470 }, { "epoch": 0.014059640347935698, "grad_norm": 2762.309814453125, "learning_rate": 5.568e-06, "loss": 273.8453, "step": 3480 }, { "epoch": 0.014100041613303329, "grad_norm": 945.8040161132812, "learning_rate": 5.584e-06, "loss": 240.8805, "step": 3490 }, { "epoch": 0.01414044287867096, "grad_norm": 2207.40185546875, "learning_rate": 5.600000000000001e-06, "loss": 165.5966, "step": 3500 }, { "epoch": 0.014180844144038592, "grad_norm": 622.099609375, "learning_rate": 5.616e-06, "loss": 202.4418, "step": 3510 }, { "epoch": 0.014221245409406223, "grad_norm": 1473.064208984375, "learning_rate": 5.6320000000000005e-06, "loss": 229.2896, "step": 3520 }, { "epoch": 0.014261646674773854, "grad_norm": 1057.5018310546875, "learning_rate": 5.648e-06, "loss": 211.9801, "step": 3530 }, { "epoch": 0.014302047940141486, "grad_norm": 4774.0947265625, "learning_rate": 5.664e-06, "loss": 204.8896, "step": 3540 }, { "epoch": 0.014342449205509117, "grad_norm": 676.964111328125, "learning_rate": 5.68e-06, "loss": 200.4312, "step": 3550 }, { "epoch": 0.014382850470876748, "grad_norm": 571.7015380859375, "learning_rate": 5.696e-06, "loss": 275.7348, "step": 3560 }, { "epoch": 0.01442325173624438, "grad_norm": 1923.3953857421875, "learning_rate": 5.7120000000000005e-06, "loss": 224.3793, "step": 3570 }, { "epoch": 0.01446365300161201, "grad_norm": 541.0182495117188, "learning_rate": 5.728e-06, "loss": 155.7405, "step": 3580 }, { "epoch": 0.014504054266979642, "grad_norm": 1030.9990234375, "learning_rate": 5.744e-06, "loss": 225.919, "step": 3590 }, { "epoch": 0.014544455532347273, "grad_norm": 1258.4619140625, "learning_rate": 5.76e-06, "loss": 242.7457, "step": 3600 }, { "epoch": 0.014584856797714905, "grad_norm": 1085.32177734375, "learning_rate": 5.776e-06, "loss": 251.3692, "step": 3610 }, { "epoch": 0.014625258063082536, "grad_norm": 880.9969482421875, "learning_rate": 5.792000000000001e-06, "loss": 228.1045, "step": 3620 }, { "epoch": 0.014665659328450167, "grad_norm": 616.6618041992188, "learning_rate": 5.808e-06, "loss": 216.2711, "step": 3630 }, { "epoch": 0.014706060593817799, "grad_norm": 2865.4169921875, "learning_rate": 5.8240000000000005e-06, "loss": 253.2795, "step": 3640 }, { "epoch": 0.01474646185918543, "grad_norm": 941.0968627929688, "learning_rate": 5.84e-06, "loss": 268.1058, "step": 3650 }, { "epoch": 0.014786863124553061, "grad_norm": 1495.3153076171875, "learning_rate": 5.856e-06, "loss": 262.6222, "step": 3660 }, { "epoch": 0.014827264389920693, "grad_norm": 1153.8773193359375, "learning_rate": 5.872000000000001e-06, "loss": 314.2995, "step": 3670 }, { "epoch": 0.014867665655288324, "grad_norm": 3272.336669921875, "learning_rate": 5.888e-06, "loss": 275.6237, "step": 3680 }, { "epoch": 0.014908066920655955, "grad_norm": 1363.574462890625, "learning_rate": 5.9040000000000006e-06, "loss": 408.7946, "step": 3690 }, { "epoch": 0.014948468186023587, "grad_norm": 1989.05810546875, "learning_rate": 5.92e-06, "loss": 347.1884, "step": 3700 }, { "epoch": 0.014988869451391218, "grad_norm": 893.3648071289062, "learning_rate": 5.9360000000000004e-06, "loss": 231.8197, "step": 3710 }, { "epoch": 0.01502927071675885, "grad_norm": 2015.3258056640625, "learning_rate": 5.952e-06, "loss": 241.9273, "step": 3720 }, { "epoch": 0.01506967198212648, "grad_norm": 1097.1138916015625, "learning_rate": 5.968e-06, "loss": 289.7548, "step": 3730 }, { "epoch": 0.015110073247494112, "grad_norm": 1198.3504638671875, "learning_rate": 5.984000000000001e-06, "loss": 257.8569, "step": 3740 }, { "epoch": 0.015150474512861743, "grad_norm": 2764.9599609375, "learning_rate": 6e-06, "loss": 394.4565, "step": 3750 }, { "epoch": 0.015190875778229375, "grad_norm": 758.348876953125, "learning_rate": 6.0160000000000005e-06, "loss": 249.2975, "step": 3760 }, { "epoch": 0.015231277043597006, "grad_norm": 2277.13818359375, "learning_rate": 6.032e-06, "loss": 294.9143, "step": 3770 }, { "epoch": 0.015271678308964637, "grad_norm": 2135.546142578125, "learning_rate": 6.048e-06, "loss": 235.1545, "step": 3780 }, { "epoch": 0.015312079574332269, "grad_norm": 1097.388916015625, "learning_rate": 6.064000000000001e-06, "loss": 241.006, "step": 3790 }, { "epoch": 0.0153524808396999, "grad_norm": 943.5238647460938, "learning_rate": 6.08e-06, "loss": 174.3143, "step": 3800 }, { "epoch": 0.015392882105067531, "grad_norm": 1239.0521240234375, "learning_rate": 6.096000000000001e-06, "loss": 307.3331, "step": 3810 }, { "epoch": 0.015433283370435162, "grad_norm": 643.6400146484375, "learning_rate": 6.112e-06, "loss": 217.4633, "step": 3820 }, { "epoch": 0.015473684635802794, "grad_norm": 1569.7869873046875, "learning_rate": 6.1280000000000005e-06, "loss": 322.4538, "step": 3830 }, { "epoch": 0.015514085901170425, "grad_norm": 3763.859130859375, "learning_rate": 6.144e-06, "loss": 182.1174, "step": 3840 }, { "epoch": 0.015554487166538056, "grad_norm": 717.2794799804688, "learning_rate": 6.16e-06, "loss": 207.8513, "step": 3850 }, { "epoch": 0.015594888431905688, "grad_norm": 1418.3277587890625, "learning_rate": 6.176000000000001e-06, "loss": 254.4121, "step": 3860 }, { "epoch": 0.01563528969727332, "grad_norm": 1528.9119873046875, "learning_rate": 6.192e-06, "loss": 223.6123, "step": 3870 }, { "epoch": 0.01567569096264095, "grad_norm": 544.6751708984375, "learning_rate": 6.2080000000000005e-06, "loss": 222.1293, "step": 3880 }, { "epoch": 0.01571609222800858, "grad_norm": 754.9981079101562, "learning_rate": 6.224e-06, "loss": 255.1399, "step": 3890 }, { "epoch": 0.01575649349337621, "grad_norm": 1966.5140380859375, "learning_rate": 6.24e-06, "loss": 275.0173, "step": 3900 }, { "epoch": 0.015796894758743844, "grad_norm": 810.1495361328125, "learning_rate": 6.256000000000001e-06, "loss": 219.6732, "step": 3910 }, { "epoch": 0.015837296024111474, "grad_norm": 3056.527099609375, "learning_rate": 6.272e-06, "loss": 184.322, "step": 3920 }, { "epoch": 0.015877697289479107, "grad_norm": 2820.2568359375, "learning_rate": 6.288000000000001e-06, "loss": 235.3666, "step": 3930 }, { "epoch": 0.015918098554846737, "grad_norm": 1320.5225830078125, "learning_rate": 6.304e-06, "loss": 220.3165, "step": 3940 }, { "epoch": 0.01595849982021437, "grad_norm": 933.7466430664062, "learning_rate": 6.3200000000000005e-06, "loss": 307.4496, "step": 3950 }, { "epoch": 0.015998901085582, "grad_norm": 867.0122680664062, "learning_rate": 6.336000000000001e-06, "loss": 263.1927, "step": 3960 }, { "epoch": 0.016039302350949632, "grad_norm": 747.3133544921875, "learning_rate": 6.352e-06, "loss": 306.4962, "step": 3970 }, { "epoch": 0.016079703616317262, "grad_norm": 711.7617797851562, "learning_rate": 6.368000000000001e-06, "loss": 243.1562, "step": 3980 }, { "epoch": 0.016120104881684895, "grad_norm": 732.2003173828125, "learning_rate": 6.384e-06, "loss": 231.337, "step": 3990 }, { "epoch": 0.016160506147052525, "grad_norm": 582.39990234375, "learning_rate": 6.4000000000000006e-06, "loss": 273.9559, "step": 4000 }, { "epoch": 0.016200907412420158, "grad_norm": 572.5735473632812, "learning_rate": 6.416e-06, "loss": 212.5563, "step": 4010 }, { "epoch": 0.016241308677787787, "grad_norm": 0.0, "learning_rate": 6.432e-06, "loss": 224.8545, "step": 4020 }, { "epoch": 0.01628170994315542, "grad_norm": 887.3201293945312, "learning_rate": 6.448000000000001e-06, "loss": 271.5942, "step": 4030 }, { "epoch": 0.01632211120852305, "grad_norm": 1036.5596923828125, "learning_rate": 6.464e-06, "loss": 324.4769, "step": 4040 }, { "epoch": 0.016362512473890683, "grad_norm": 1676.9654541015625, "learning_rate": 6.480000000000001e-06, "loss": 221.9381, "step": 4050 }, { "epoch": 0.016402913739258312, "grad_norm": 1006.4035034179688, "learning_rate": 6.496e-06, "loss": 330.1907, "step": 4060 }, { "epoch": 0.016443315004625945, "grad_norm": 1340.500244140625, "learning_rate": 6.5120000000000005e-06, "loss": 241.6059, "step": 4070 }, { "epoch": 0.016483716269993575, "grad_norm": 824.397705078125, "learning_rate": 6.528000000000001e-06, "loss": 237.4955, "step": 4080 }, { "epoch": 0.016524117535361208, "grad_norm": 623.558837890625, "learning_rate": 6.544e-06, "loss": 338.69, "step": 4090 }, { "epoch": 0.016564518800728838, "grad_norm": 1017.6536865234375, "learning_rate": 6.560000000000001e-06, "loss": 250.0294, "step": 4100 }, { "epoch": 0.01660492006609647, "grad_norm": 708.3771362304688, "learning_rate": 6.576e-06, "loss": 201.3914, "step": 4110 }, { "epoch": 0.0166453213314641, "grad_norm": 609.1826171875, "learning_rate": 6.592000000000001e-06, "loss": 320.5673, "step": 4120 }, { "epoch": 0.016685722596831733, "grad_norm": 833.898193359375, "learning_rate": 6.608000000000001e-06, "loss": 214.4177, "step": 4130 }, { "epoch": 0.016726123862199363, "grad_norm": 1081.476318359375, "learning_rate": 6.6240000000000004e-06, "loss": 264.2287, "step": 4140 }, { "epoch": 0.016766525127566996, "grad_norm": 1495.6732177734375, "learning_rate": 6.640000000000001e-06, "loss": 315.7932, "step": 4150 }, { "epoch": 0.016806926392934626, "grad_norm": 838.46630859375, "learning_rate": 6.656e-06, "loss": 280.6871, "step": 4160 }, { "epoch": 0.01684732765830226, "grad_norm": 1784.915283203125, "learning_rate": 6.672000000000001e-06, "loss": 342.9094, "step": 4170 }, { "epoch": 0.016887728923669888, "grad_norm": 2573.862548828125, "learning_rate": 6.688e-06, "loss": 242.5726, "step": 4180 }, { "epoch": 0.01692813018903752, "grad_norm": 604.2163696289062, "learning_rate": 6.7040000000000005e-06, "loss": 243.26, "step": 4190 }, { "epoch": 0.01696853145440515, "grad_norm": 1083.6295166015625, "learning_rate": 6.720000000000001e-06, "loss": 174.146, "step": 4200 }, { "epoch": 0.017008932719772784, "grad_norm": 844.8646850585938, "learning_rate": 6.736e-06, "loss": 270.4487, "step": 4210 }, { "epoch": 0.017049333985140414, "grad_norm": 555.3699340820312, "learning_rate": 6.752000000000001e-06, "loss": 185.4077, "step": 4220 }, { "epoch": 0.017089735250508047, "grad_norm": 1184.4735107421875, "learning_rate": 6.768e-06, "loss": 287.9112, "step": 4230 }, { "epoch": 0.017130136515875676, "grad_norm": 699.8986206054688, "learning_rate": 6.784000000000001e-06, "loss": 312.1473, "step": 4240 }, { "epoch": 0.01717053778124331, "grad_norm": 1370.1610107421875, "learning_rate": 6.800000000000001e-06, "loss": 267.4715, "step": 4250 }, { "epoch": 0.01721093904661094, "grad_norm": 735.01123046875, "learning_rate": 6.8160000000000005e-06, "loss": 259.6105, "step": 4260 }, { "epoch": 0.017251340311978572, "grad_norm": 768.6294555664062, "learning_rate": 6.832000000000001e-06, "loss": 195.3645, "step": 4270 }, { "epoch": 0.0172917415773462, "grad_norm": 1129.20068359375, "learning_rate": 6.848e-06, "loss": 333.3923, "step": 4280 }, { "epoch": 0.017332142842713835, "grad_norm": 620.0527954101562, "learning_rate": 6.864000000000001e-06, "loss": 140.4836, "step": 4290 }, { "epoch": 0.017372544108081464, "grad_norm": 1524.297119140625, "learning_rate": 6.88e-06, "loss": 234.0078, "step": 4300 }, { "epoch": 0.017412945373449097, "grad_norm": 1074.130126953125, "learning_rate": 6.8960000000000006e-06, "loss": 333.4636, "step": 4310 }, { "epoch": 0.017453346638816727, "grad_norm": 1849.30419921875, "learning_rate": 6.912000000000001e-06, "loss": 250.6815, "step": 4320 }, { "epoch": 0.01749374790418436, "grad_norm": 953.8714599609375, "learning_rate": 6.928e-06, "loss": 168.5187, "step": 4330 }, { "epoch": 0.01753414916955199, "grad_norm": 1707.04296875, "learning_rate": 6.944000000000001e-06, "loss": 247.184, "step": 4340 }, { "epoch": 0.017574550434919622, "grad_norm": 1027.0791015625, "learning_rate": 6.96e-06, "loss": 212.3976, "step": 4350 }, { "epoch": 0.017614951700287252, "grad_norm": 1421.16650390625, "learning_rate": 6.976000000000001e-06, "loss": 196.0206, "step": 4360 }, { "epoch": 0.017655352965654885, "grad_norm": 2190.223388671875, "learning_rate": 6.992000000000001e-06, "loss": 245.279, "step": 4370 }, { "epoch": 0.017695754231022515, "grad_norm": 607.2709350585938, "learning_rate": 7.0080000000000005e-06, "loss": 269.9326, "step": 4380 }, { "epoch": 0.017736155496390148, "grad_norm": 905.8397216796875, "learning_rate": 7.024000000000001e-06, "loss": 197.5017, "step": 4390 }, { "epoch": 0.017776556761757777, "grad_norm": 829.648681640625, "learning_rate": 7.04e-06, "loss": 306.7443, "step": 4400 }, { "epoch": 0.01781695802712541, "grad_norm": 750.1204223632812, "learning_rate": 7.056000000000001e-06, "loss": 208.2544, "step": 4410 }, { "epoch": 0.01785735929249304, "grad_norm": 1103.8912353515625, "learning_rate": 7.072000000000001e-06, "loss": 208.2772, "step": 4420 }, { "epoch": 0.017897760557860673, "grad_norm": 2012.5870361328125, "learning_rate": 7.088000000000001e-06, "loss": 227.8844, "step": 4430 }, { "epoch": 0.017938161823228303, "grad_norm": 1063.1541748046875, "learning_rate": 7.104000000000001e-06, "loss": 197.1388, "step": 4440 }, { "epoch": 0.017978563088595936, "grad_norm": 1133.552978515625, "learning_rate": 7.1200000000000004e-06, "loss": 212.4223, "step": 4450 }, { "epoch": 0.018018964353963565, "grad_norm": 808.1385498046875, "learning_rate": 7.136000000000001e-06, "loss": 224.3651, "step": 4460 }, { "epoch": 0.0180593656193312, "grad_norm": 1333.4466552734375, "learning_rate": 7.152e-06, "loss": 213.2375, "step": 4470 }, { "epoch": 0.018099766884698828, "grad_norm": 1058.0499267578125, "learning_rate": 7.168000000000001e-06, "loss": 239.6389, "step": 4480 }, { "epoch": 0.01814016815006646, "grad_norm": 1192.045654296875, "learning_rate": 7.184000000000001e-06, "loss": 201.7774, "step": 4490 }, { "epoch": 0.01818056941543409, "grad_norm": 761.6637573242188, "learning_rate": 7.2000000000000005e-06, "loss": 239.5527, "step": 4500 }, { "epoch": 0.018220970680801724, "grad_norm": 1080.089111328125, "learning_rate": 7.216000000000001e-06, "loss": 220.7039, "step": 4510 }, { "epoch": 0.018261371946169353, "grad_norm": 900.0491943359375, "learning_rate": 7.232e-06, "loss": 202.6598, "step": 4520 }, { "epoch": 0.018301773211536986, "grad_norm": 1005.7420043945312, "learning_rate": 7.248000000000001e-06, "loss": 226.7294, "step": 4530 }, { "epoch": 0.018342174476904616, "grad_norm": 942.6360473632812, "learning_rate": 7.264000000000001e-06, "loss": 188.8948, "step": 4540 }, { "epoch": 0.01838257574227225, "grad_norm": 968.3139038085938, "learning_rate": 7.280000000000001e-06, "loss": 313.6362, "step": 4550 }, { "epoch": 0.01842297700763988, "grad_norm": 1409.2060546875, "learning_rate": 7.296000000000001e-06, "loss": 322.2942, "step": 4560 }, { "epoch": 0.01846337827300751, "grad_norm": 814.0696411132812, "learning_rate": 7.3120000000000005e-06, "loss": 222.029, "step": 4570 }, { "epoch": 0.01850377953837514, "grad_norm": 809.8764038085938, "learning_rate": 7.328000000000001e-06, "loss": 273.6918, "step": 4580 }, { "epoch": 0.018544180803742774, "grad_norm": 513.40625, "learning_rate": 7.344000000000001e-06, "loss": 307.8005, "step": 4590 }, { "epoch": 0.018584582069110404, "grad_norm": 4324.00146484375, "learning_rate": 7.360000000000001e-06, "loss": 303.7309, "step": 4600 }, { "epoch": 0.018624983334478037, "grad_norm": 836.23193359375, "learning_rate": 7.376000000000001e-06, "loss": 204.3285, "step": 4610 }, { "epoch": 0.018665384599845666, "grad_norm": 579.7659301757812, "learning_rate": 7.3920000000000005e-06, "loss": 175.3677, "step": 4620 }, { "epoch": 0.0187057858652133, "grad_norm": 1128.6890869140625, "learning_rate": 7.408000000000001e-06, "loss": 183.0995, "step": 4630 }, { "epoch": 0.01874618713058093, "grad_norm": 1351.5643310546875, "learning_rate": 7.424e-06, "loss": 184.3664, "step": 4640 }, { "epoch": 0.018786588395948562, "grad_norm": 2014.1749267578125, "learning_rate": 7.440000000000001e-06, "loss": 270.0711, "step": 4650 }, { "epoch": 0.01882698966131619, "grad_norm": 1736.61279296875, "learning_rate": 7.456000000000001e-06, "loss": 288.5561, "step": 4660 }, { "epoch": 0.018867390926683825, "grad_norm": 725.383544921875, "learning_rate": 7.472000000000001e-06, "loss": 279.8696, "step": 4670 }, { "epoch": 0.018907792192051454, "grad_norm": 1754.6114501953125, "learning_rate": 7.488000000000001e-06, "loss": 201.6906, "step": 4680 }, { "epoch": 0.018948193457419087, "grad_norm": 1377.6466064453125, "learning_rate": 7.5040000000000005e-06, "loss": 200.6483, "step": 4690 }, { "epoch": 0.018988594722786717, "grad_norm": 2007.0408935546875, "learning_rate": 7.520000000000001e-06, "loss": 232.2962, "step": 4700 }, { "epoch": 0.01902899598815435, "grad_norm": 1615.7125244140625, "learning_rate": 7.536000000000001e-06, "loss": 245.0112, "step": 4710 }, { "epoch": 0.01906939725352198, "grad_norm": 0.0, "learning_rate": 7.552000000000001e-06, "loss": 189.8007, "step": 4720 }, { "epoch": 0.019109798518889613, "grad_norm": 1523.142333984375, "learning_rate": 7.568000000000001e-06, "loss": 194.2404, "step": 4730 }, { "epoch": 0.019150199784257242, "grad_norm": 2107.94677734375, "learning_rate": 7.5840000000000006e-06, "loss": 254.0392, "step": 4740 }, { "epoch": 0.019190601049624875, "grad_norm": 858.9860229492188, "learning_rate": 7.600000000000001e-06, "loss": 260.6963, "step": 4750 }, { "epoch": 0.019231002314992505, "grad_norm": 801.8906860351562, "learning_rate": 7.616000000000001e-06, "loss": 210.6171, "step": 4760 }, { "epoch": 0.019271403580360138, "grad_norm": 763.8599243164062, "learning_rate": 7.632e-06, "loss": 244.6692, "step": 4770 }, { "epoch": 0.019311804845727767, "grad_norm": 1143.01171875, "learning_rate": 7.648e-06, "loss": 232.0638, "step": 4780 }, { "epoch": 0.0193522061110954, "grad_norm": 473.067626953125, "learning_rate": 7.664e-06, "loss": 163.634, "step": 4790 }, { "epoch": 0.01939260737646303, "grad_norm": 738.6350708007812, "learning_rate": 7.680000000000001e-06, "loss": 171.6407, "step": 4800 }, { "epoch": 0.019433008641830663, "grad_norm": 707.3241577148438, "learning_rate": 7.696e-06, "loss": 225.7683, "step": 4810 }, { "epoch": 0.019473409907198293, "grad_norm": 1966.50927734375, "learning_rate": 7.712e-06, "loss": 259.4346, "step": 4820 }, { "epoch": 0.019513811172565926, "grad_norm": 2337.94580078125, "learning_rate": 7.728000000000001e-06, "loss": 210.5883, "step": 4830 }, { "epoch": 0.019554212437933555, "grad_norm": 1208.361083984375, "learning_rate": 7.744e-06, "loss": 166.7386, "step": 4840 }, { "epoch": 0.01959461370330119, "grad_norm": 1413.30126953125, "learning_rate": 7.76e-06, "loss": 291.6949, "step": 4850 }, { "epoch": 0.019635014968668818, "grad_norm": 422.9515380859375, "learning_rate": 7.776e-06, "loss": 161.5541, "step": 4860 }, { "epoch": 0.01967541623403645, "grad_norm": 2810.00927734375, "learning_rate": 7.792000000000001e-06, "loss": 287.8005, "step": 4870 }, { "epoch": 0.01971581749940408, "grad_norm": 1965.0252685546875, "learning_rate": 7.808e-06, "loss": 194.2817, "step": 4880 }, { "epoch": 0.019756218764771714, "grad_norm": 1410.799560546875, "learning_rate": 7.824e-06, "loss": 180.6805, "step": 4890 }, { "epoch": 0.019796620030139343, "grad_norm": 857.7700805664062, "learning_rate": 7.840000000000001e-06, "loss": 295.3197, "step": 4900 }, { "epoch": 0.019837021295506976, "grad_norm": 1902.400146484375, "learning_rate": 7.856e-06, "loss": 278.1652, "step": 4910 }, { "epoch": 0.019877422560874606, "grad_norm": 897.0117797851562, "learning_rate": 7.872e-06, "loss": 154.4284, "step": 4920 }, { "epoch": 0.01991782382624224, "grad_norm": 1253.6666259765625, "learning_rate": 7.888e-06, "loss": 261.9078, "step": 4930 }, { "epoch": 0.01995822509160987, "grad_norm": 928.166748046875, "learning_rate": 7.904000000000001e-06, "loss": 260.5916, "step": 4940 }, { "epoch": 0.0199986263569775, "grad_norm": 1244.7052001953125, "learning_rate": 7.92e-06, "loss": 216.8228, "step": 4950 }, { "epoch": 0.02003902762234513, "grad_norm": 1885.830078125, "learning_rate": 7.936e-06, "loss": 138.4373, "step": 4960 }, { "epoch": 0.020079428887712764, "grad_norm": 10677.4921875, "learning_rate": 7.952000000000001e-06, "loss": 370.5018, "step": 4970 }, { "epoch": 0.020119830153080394, "grad_norm": 1309.8223876953125, "learning_rate": 7.968e-06, "loss": 235.4404, "step": 4980 }, { "epoch": 0.020160231418448027, "grad_norm": 633.9755249023438, "learning_rate": 7.984e-06, "loss": 255.8174, "step": 4990 }, { "epoch": 0.020200632683815656, "grad_norm": 1383.577880859375, "learning_rate": 8.000000000000001e-06, "loss": 177.3417, "step": 5000 }, { "epoch": 0.02024103394918329, "grad_norm": 1378.894287109375, "learning_rate": 8.016e-06, "loss": 225.4421, "step": 5010 }, { "epoch": 0.02028143521455092, "grad_norm": 1220.368896484375, "learning_rate": 8.032e-06, "loss": 147.1734, "step": 5020 }, { "epoch": 0.020321836479918552, "grad_norm": 1136.2757568359375, "learning_rate": 8.048e-06, "loss": 257.1214, "step": 5030 }, { "epoch": 0.020362237745286182, "grad_norm": 1104.6431884765625, "learning_rate": 8.064000000000001e-06, "loss": 246.424, "step": 5040 }, { "epoch": 0.020402639010653815, "grad_norm": 1294.03857421875, "learning_rate": 8.08e-06, "loss": 184.7015, "step": 5050 }, { "epoch": 0.020443040276021444, "grad_norm": 0.0, "learning_rate": 8.096e-06, "loss": 274.0798, "step": 5060 }, { "epoch": 0.020483441541389077, "grad_norm": 1032.7880859375, "learning_rate": 8.112000000000001e-06, "loss": 209.0563, "step": 5070 }, { "epoch": 0.020523842806756707, "grad_norm": 2536.4736328125, "learning_rate": 8.128e-06, "loss": 229.0236, "step": 5080 }, { "epoch": 0.02056424407212434, "grad_norm": 1960.7764892578125, "learning_rate": 8.144e-06, "loss": 290.2731, "step": 5090 }, { "epoch": 0.02060464533749197, "grad_norm": 544.7542114257812, "learning_rate": 8.16e-06, "loss": 217.8538, "step": 5100 }, { "epoch": 0.020645046602859603, "grad_norm": 881.3969116210938, "learning_rate": 8.176000000000001e-06, "loss": 253.8835, "step": 5110 }, { "epoch": 0.020685447868227232, "grad_norm": 933.0445556640625, "learning_rate": 8.192e-06, "loss": 199.286, "step": 5120 }, { "epoch": 0.020725849133594865, "grad_norm": 2709.8310546875, "learning_rate": 8.208e-06, "loss": 182.7853, "step": 5130 }, { "epoch": 0.020766250398962495, "grad_norm": 641.6781616210938, "learning_rate": 8.224000000000001e-06, "loss": 179.5877, "step": 5140 }, { "epoch": 0.020806651664330128, "grad_norm": 785.1431274414062, "learning_rate": 8.24e-06, "loss": 156.9568, "step": 5150 }, { "epoch": 0.020847052929697758, "grad_norm": 755.727783203125, "learning_rate": 8.256e-06, "loss": 220.8312, "step": 5160 }, { "epoch": 0.02088745419506539, "grad_norm": 1180.8179931640625, "learning_rate": 8.272000000000001e-06, "loss": 246.3089, "step": 5170 }, { "epoch": 0.02092785546043302, "grad_norm": 2293.158203125, "learning_rate": 8.288000000000001e-06, "loss": 205.6415, "step": 5180 }, { "epoch": 0.020968256725800653, "grad_norm": 842.48828125, "learning_rate": 8.304e-06, "loss": 249.575, "step": 5190 }, { "epoch": 0.021008657991168283, "grad_norm": 790.164794921875, "learning_rate": 8.32e-06, "loss": 245.8863, "step": 5200 }, { "epoch": 0.021049059256535916, "grad_norm": 1180.41650390625, "learning_rate": 8.336000000000001e-06, "loss": 221.1177, "step": 5210 }, { "epoch": 0.021089460521903546, "grad_norm": 999.559326171875, "learning_rate": 8.352e-06, "loss": 220.637, "step": 5220 }, { "epoch": 0.02112986178727118, "grad_norm": 1407.5498046875, "learning_rate": 8.368e-06, "loss": 245.8358, "step": 5230 }, { "epoch": 0.021170263052638808, "grad_norm": 1201.23486328125, "learning_rate": 8.384000000000001e-06, "loss": 303.6925, "step": 5240 }, { "epoch": 0.02121066431800644, "grad_norm": 3144.137939453125, "learning_rate": 8.400000000000001e-06, "loss": 256.0646, "step": 5250 }, { "epoch": 0.02125106558337407, "grad_norm": 1443.5968017578125, "learning_rate": 8.416e-06, "loss": 278.0506, "step": 5260 }, { "epoch": 0.021291466848741704, "grad_norm": 1761.7537841796875, "learning_rate": 8.432e-06, "loss": 213.0941, "step": 5270 }, { "epoch": 0.021331868114109333, "grad_norm": 1119.3565673828125, "learning_rate": 8.448000000000001e-06, "loss": 253.8438, "step": 5280 }, { "epoch": 0.021372269379476967, "grad_norm": 1161.480224609375, "learning_rate": 8.464e-06, "loss": 240.5102, "step": 5290 }, { "epoch": 0.021412670644844596, "grad_norm": 932.6578979492188, "learning_rate": 8.48e-06, "loss": 269.217, "step": 5300 }, { "epoch": 0.02145307191021223, "grad_norm": 2974.05712890625, "learning_rate": 8.496000000000001e-06, "loss": 275.3746, "step": 5310 }, { "epoch": 0.02149347317557986, "grad_norm": 1162.4228515625, "learning_rate": 8.512e-06, "loss": 203.4607, "step": 5320 }, { "epoch": 0.021533874440947492, "grad_norm": 818.4716186523438, "learning_rate": 8.528e-06, "loss": 232.1441, "step": 5330 }, { "epoch": 0.02157427570631512, "grad_norm": 1251.9176025390625, "learning_rate": 8.544000000000002e-06, "loss": 200.7606, "step": 5340 }, { "epoch": 0.021614676971682754, "grad_norm": 675.2981567382812, "learning_rate": 8.560000000000001e-06, "loss": 222.9481, "step": 5350 }, { "epoch": 0.021655078237050384, "grad_norm": 722.9965209960938, "learning_rate": 8.576e-06, "loss": 295.3248, "step": 5360 }, { "epoch": 0.021695479502418017, "grad_norm": 787.6085815429688, "learning_rate": 8.592e-06, "loss": 237.5749, "step": 5370 }, { "epoch": 0.021735880767785647, "grad_norm": 1618.6192626953125, "learning_rate": 8.608000000000001e-06, "loss": 273.413, "step": 5380 }, { "epoch": 0.02177628203315328, "grad_norm": 2153.603271484375, "learning_rate": 8.624e-06, "loss": 251.1006, "step": 5390 }, { "epoch": 0.02181668329852091, "grad_norm": 3269.96044921875, "learning_rate": 8.64e-06, "loss": 306.5505, "step": 5400 }, { "epoch": 0.021857084563888542, "grad_norm": 735.1317138671875, "learning_rate": 8.656000000000001e-06, "loss": 251.5622, "step": 5410 }, { "epoch": 0.021897485829256172, "grad_norm": 3821.17919921875, "learning_rate": 8.672000000000001e-06, "loss": 254.9936, "step": 5420 }, { "epoch": 0.021937887094623805, "grad_norm": 6532.201171875, "learning_rate": 8.688e-06, "loss": 255.663, "step": 5430 }, { "epoch": 0.021978288359991435, "grad_norm": 1257.637939453125, "learning_rate": 8.704e-06, "loss": 236.9894, "step": 5440 }, { "epoch": 0.022018689625359068, "grad_norm": 18393.888671875, "learning_rate": 8.720000000000001e-06, "loss": 342.399, "step": 5450 }, { "epoch": 0.022059090890726697, "grad_norm": 916.3475341796875, "learning_rate": 8.736e-06, "loss": 219.2116, "step": 5460 }, { "epoch": 0.02209949215609433, "grad_norm": 1160.2779541015625, "learning_rate": 8.752e-06, "loss": 218.9973, "step": 5470 }, { "epoch": 0.02213989342146196, "grad_norm": 2570.178466796875, "learning_rate": 8.768000000000001e-06, "loss": 243.9221, "step": 5480 }, { "epoch": 0.022180294686829593, "grad_norm": 1173.486572265625, "learning_rate": 8.784000000000001e-06, "loss": 181.3593, "step": 5490 }, { "epoch": 0.022220695952197222, "grad_norm": 2153.557373046875, "learning_rate": 8.8e-06, "loss": 254.7195, "step": 5500 }, { "epoch": 0.022261097217564856, "grad_norm": 3733.31201171875, "learning_rate": 8.816000000000002e-06, "loss": 196.0029, "step": 5510 }, { "epoch": 0.022301498482932485, "grad_norm": 912.1559448242188, "learning_rate": 8.832000000000001e-06, "loss": 213.8326, "step": 5520 }, { "epoch": 0.022341899748300118, "grad_norm": 4412.5341796875, "learning_rate": 8.848e-06, "loss": 189.7024, "step": 5530 }, { "epoch": 0.022382301013667748, "grad_norm": 2686.481201171875, "learning_rate": 8.864e-06, "loss": 243.9844, "step": 5540 }, { "epoch": 0.02242270227903538, "grad_norm": 2003.0455322265625, "learning_rate": 8.880000000000001e-06, "loss": 245.3063, "step": 5550 }, { "epoch": 0.02246310354440301, "grad_norm": 873.5650634765625, "learning_rate": 8.896000000000001e-06, "loss": 196.9391, "step": 5560 }, { "epoch": 0.022503504809770643, "grad_norm": 936.9183349609375, "learning_rate": 8.912e-06, "loss": 120.0026, "step": 5570 }, { "epoch": 0.022543906075138273, "grad_norm": 3665.57470703125, "learning_rate": 8.928000000000002e-06, "loss": 215.1941, "step": 5580 }, { "epoch": 0.022584307340505906, "grad_norm": 502.3744201660156, "learning_rate": 8.944000000000001e-06, "loss": 245.2761, "step": 5590 }, { "epoch": 0.022624708605873536, "grad_norm": 801.8453979492188, "learning_rate": 8.96e-06, "loss": 184.7434, "step": 5600 }, { "epoch": 0.02266510987124117, "grad_norm": 6697.9013671875, "learning_rate": 8.976e-06, "loss": 217.284, "step": 5610 }, { "epoch": 0.0227055111366088, "grad_norm": 1576.22265625, "learning_rate": 8.992000000000001e-06, "loss": 196.8912, "step": 5620 }, { "epoch": 0.02274591240197643, "grad_norm": 1674.2708740234375, "learning_rate": 9.008e-06, "loss": 277.6959, "step": 5630 }, { "epoch": 0.02278631366734406, "grad_norm": 889.8017578125, "learning_rate": 9.024e-06, "loss": 252.6676, "step": 5640 }, { "epoch": 0.022826714932711694, "grad_norm": 2106.39208984375, "learning_rate": 9.040000000000002e-06, "loss": 230.2455, "step": 5650 }, { "epoch": 0.022867116198079324, "grad_norm": 1100.5885009765625, "learning_rate": 9.056000000000001e-06, "loss": 248.682, "step": 5660 }, { "epoch": 0.022907517463446957, "grad_norm": 1121.548828125, "learning_rate": 9.072e-06, "loss": 192.1396, "step": 5670 }, { "epoch": 0.022947918728814586, "grad_norm": 525.61181640625, "learning_rate": 9.088000000000002e-06, "loss": 176.5126, "step": 5680 }, { "epoch": 0.02298831999418222, "grad_norm": 0.0, "learning_rate": 9.104000000000001e-06, "loss": 241.5197, "step": 5690 }, { "epoch": 0.02302872125954985, "grad_norm": 979.9448852539062, "learning_rate": 9.12e-06, "loss": 203.9497, "step": 5700 }, { "epoch": 0.023069122524917482, "grad_norm": 1121.1590576171875, "learning_rate": 9.136e-06, "loss": 210.5354, "step": 5710 }, { "epoch": 0.02310952379028511, "grad_norm": 528.6953125, "learning_rate": 9.152000000000001e-06, "loss": 183.6194, "step": 5720 }, { "epoch": 0.023149925055652745, "grad_norm": 747.2396850585938, "learning_rate": 9.168000000000001e-06, "loss": 158.1538, "step": 5730 }, { "epoch": 0.023190326321020374, "grad_norm": 706.2687377929688, "learning_rate": 9.184e-06, "loss": 196.5268, "step": 5740 }, { "epoch": 0.023230727586388007, "grad_norm": 1605.8697509765625, "learning_rate": 9.200000000000002e-06, "loss": 224.3516, "step": 5750 }, { "epoch": 0.023271128851755637, "grad_norm": 2184.8427734375, "learning_rate": 9.216000000000001e-06, "loss": 309.77, "step": 5760 }, { "epoch": 0.02331153011712327, "grad_norm": 1893.63037109375, "learning_rate": 9.232e-06, "loss": 201.3919, "step": 5770 }, { "epoch": 0.0233519313824909, "grad_norm": 871.6980590820312, "learning_rate": 9.248e-06, "loss": 128.403, "step": 5780 }, { "epoch": 0.023392332647858533, "grad_norm": 747.3956298828125, "learning_rate": 9.264000000000001e-06, "loss": 267.743, "step": 5790 }, { "epoch": 0.023432733913226162, "grad_norm": 920.072265625, "learning_rate": 9.280000000000001e-06, "loss": 222.4892, "step": 5800 }, { "epoch": 0.023473135178593795, "grad_norm": 923.3662109375, "learning_rate": 9.296e-06, "loss": 175.1828, "step": 5810 }, { "epoch": 0.023513536443961425, "grad_norm": 1141.5255126953125, "learning_rate": 9.312000000000002e-06, "loss": 212.2976, "step": 5820 }, { "epoch": 0.023553937709329058, "grad_norm": 803.1054077148438, "learning_rate": 9.328000000000001e-06, "loss": 167.2026, "step": 5830 }, { "epoch": 0.023594338974696687, "grad_norm": 1214.9140625, "learning_rate": 9.344e-06, "loss": 285.092, "step": 5840 }, { "epoch": 0.02363474024006432, "grad_norm": 1920.0303955078125, "learning_rate": 9.360000000000002e-06, "loss": 366.2662, "step": 5850 }, { "epoch": 0.02367514150543195, "grad_norm": 1020.25537109375, "learning_rate": 9.376000000000001e-06, "loss": 258.3467, "step": 5860 }, { "epoch": 0.023715542770799583, "grad_norm": 1090.7298583984375, "learning_rate": 9.392000000000001e-06, "loss": 214.8844, "step": 5870 }, { "epoch": 0.023755944036167213, "grad_norm": 551.9674072265625, "learning_rate": 9.408e-06, "loss": 194.8219, "step": 5880 }, { "epoch": 0.023796345301534846, "grad_norm": 1726.1187744140625, "learning_rate": 9.424000000000002e-06, "loss": 202.5622, "step": 5890 }, { "epoch": 0.023836746566902475, "grad_norm": 2476.4296875, "learning_rate": 9.440000000000001e-06, "loss": 308.0325, "step": 5900 }, { "epoch": 0.02387714783227011, "grad_norm": 1048.78369140625, "learning_rate": 9.456e-06, "loss": 230.1398, "step": 5910 }, { "epoch": 0.023917549097637738, "grad_norm": 1003.924560546875, "learning_rate": 9.472000000000002e-06, "loss": 143.4615, "step": 5920 }, { "epoch": 0.02395795036300537, "grad_norm": 2788.75830078125, "learning_rate": 9.488000000000001e-06, "loss": 196.0169, "step": 5930 }, { "epoch": 0.023998351628373, "grad_norm": 1144.117431640625, "learning_rate": 9.504e-06, "loss": 173.9962, "step": 5940 }, { "epoch": 0.024038752893740634, "grad_norm": 1300.0675048828125, "learning_rate": 9.52e-06, "loss": 205.8077, "step": 5950 }, { "epoch": 0.024079154159108263, "grad_norm": 831.2354125976562, "learning_rate": 9.536000000000002e-06, "loss": 187.0024, "step": 5960 }, { "epoch": 0.024119555424475896, "grad_norm": 1625.884765625, "learning_rate": 9.552000000000001e-06, "loss": 211.5945, "step": 5970 }, { "epoch": 0.024159956689843526, "grad_norm": 1912.547607421875, "learning_rate": 9.568e-06, "loss": 227.0418, "step": 5980 }, { "epoch": 0.02420035795521116, "grad_norm": 1300.2178955078125, "learning_rate": 9.584000000000002e-06, "loss": 110.0356, "step": 5990 }, { "epoch": 0.02424075922057879, "grad_norm": 1286.5931396484375, "learning_rate": 9.600000000000001e-06, "loss": 207.4342, "step": 6000 }, { "epoch": 0.02428116048594642, "grad_norm": 889.315673828125, "learning_rate": 9.616e-06, "loss": 166.3021, "step": 6010 }, { "epoch": 0.02432156175131405, "grad_norm": 1677.3450927734375, "learning_rate": 9.632e-06, "loss": 176.0853, "step": 6020 }, { "epoch": 0.024361963016681684, "grad_norm": 2593.000244140625, "learning_rate": 9.648000000000001e-06, "loss": 210.7468, "step": 6030 }, { "epoch": 0.024402364282049314, "grad_norm": 2517.197998046875, "learning_rate": 9.664000000000001e-06, "loss": 157.0118, "step": 6040 }, { "epoch": 0.024442765547416947, "grad_norm": 867.0703735351562, "learning_rate": 9.68e-06, "loss": 214.9123, "step": 6050 }, { "epoch": 0.024483166812784576, "grad_norm": 684.4923095703125, "learning_rate": 9.696000000000002e-06, "loss": 222.2375, "step": 6060 }, { "epoch": 0.02452356807815221, "grad_norm": 1864.6201171875, "learning_rate": 9.712e-06, "loss": 218.7184, "step": 6070 }, { "epoch": 0.02456396934351984, "grad_norm": 4889.07763671875, "learning_rate": 9.728e-06, "loss": 225.6674, "step": 6080 }, { "epoch": 0.02460437060888747, "grad_norm": 1293.3353271484375, "learning_rate": 9.744000000000002e-06, "loss": 224.6604, "step": 6090 }, { "epoch": 0.0246447718742551, "grad_norm": 1205.1356201171875, "learning_rate": 9.760000000000001e-06, "loss": 262.9507, "step": 6100 }, { "epoch": 0.02468517313962273, "grad_norm": 1262.5826416015625, "learning_rate": 9.776000000000001e-06, "loss": 227.2488, "step": 6110 }, { "epoch": 0.024725574404990364, "grad_norm": 1142.44921875, "learning_rate": 9.792e-06, "loss": 346.6811, "step": 6120 }, { "epoch": 0.024765975670357994, "grad_norm": 1234.95166015625, "learning_rate": 9.808000000000002e-06, "loss": 210.1194, "step": 6130 }, { "epoch": 0.024806376935725627, "grad_norm": 2154.517822265625, "learning_rate": 9.824000000000001e-06, "loss": 212.564, "step": 6140 }, { "epoch": 0.024846778201093257, "grad_norm": 699.7747192382812, "learning_rate": 9.84e-06, "loss": 208.7139, "step": 6150 }, { "epoch": 0.02488717946646089, "grad_norm": 720.7115478515625, "learning_rate": 9.856000000000002e-06, "loss": 201.0257, "step": 6160 }, { "epoch": 0.02492758073182852, "grad_norm": 1159.8109130859375, "learning_rate": 9.872e-06, "loss": 197.7215, "step": 6170 }, { "epoch": 0.024967981997196152, "grad_norm": 675.26904296875, "learning_rate": 9.888000000000001e-06, "loss": 220.4793, "step": 6180 }, { "epoch": 0.025008383262563782, "grad_norm": 1005.2100830078125, "learning_rate": 9.904e-06, "loss": 205.4046, "step": 6190 }, { "epoch": 0.025048784527931415, "grad_norm": 727.637939453125, "learning_rate": 9.920000000000002e-06, "loss": 248.9305, "step": 6200 }, { "epoch": 0.025089185793299044, "grad_norm": 8848.505859375, "learning_rate": 9.936000000000001e-06, "loss": 261.5944, "step": 6210 }, { "epoch": 0.025129587058666678, "grad_norm": 915.894287109375, "learning_rate": 9.952e-06, "loss": 211.3287, "step": 6220 }, { "epoch": 0.025169988324034307, "grad_norm": 844.5236206054688, "learning_rate": 9.968000000000002e-06, "loss": 183.3922, "step": 6230 }, { "epoch": 0.02521038958940194, "grad_norm": 924.010009765625, "learning_rate": 9.984e-06, "loss": 170.826, "step": 6240 }, { "epoch": 0.02525079085476957, "grad_norm": 516.2547607421875, "learning_rate": 1e-05, "loss": 248.4559, "step": 6250 }, { "epoch": 0.025291192120137203, "grad_norm": 1103.63330078125, "learning_rate": 1.0016000000000002e-05, "loss": 223.1157, "step": 6260 }, { "epoch": 0.025331593385504832, "grad_norm": 4242.97314453125, "learning_rate": 1.0032000000000002e-05, "loss": 320.5385, "step": 6270 }, { "epoch": 0.025371994650872465, "grad_norm": 1703.0921630859375, "learning_rate": 1.0048e-05, "loss": 208.4248, "step": 6280 }, { "epoch": 0.025412395916240095, "grad_norm": 842.8690795898438, "learning_rate": 1.0064e-05, "loss": 147.7861, "step": 6290 }, { "epoch": 0.025452797181607728, "grad_norm": 1763.817626953125, "learning_rate": 1.008e-05, "loss": 180.2249, "step": 6300 }, { "epoch": 0.025493198446975358, "grad_norm": 631.6640014648438, "learning_rate": 1.0096000000000001e-05, "loss": 141.9704, "step": 6310 }, { "epoch": 0.02553359971234299, "grad_norm": 2309.67724609375, "learning_rate": 1.0112000000000002e-05, "loss": 219.5726, "step": 6320 }, { "epoch": 0.02557400097771062, "grad_norm": 3058.4248046875, "learning_rate": 1.0128e-05, "loss": 271.9829, "step": 6330 }, { "epoch": 0.025614402243078253, "grad_norm": 778.4915161132812, "learning_rate": 1.0144e-05, "loss": 241.9638, "step": 6340 }, { "epoch": 0.025654803508445883, "grad_norm": 682.7378540039062, "learning_rate": 1.0160000000000001e-05, "loss": 254.1633, "step": 6350 }, { "epoch": 0.025695204773813516, "grad_norm": 1311.454833984375, "learning_rate": 1.0176000000000002e-05, "loss": 221.548, "step": 6360 }, { "epoch": 0.025735606039181146, "grad_norm": 1173.302734375, "learning_rate": 1.0192000000000002e-05, "loss": 192.5287, "step": 6370 }, { "epoch": 0.02577600730454878, "grad_norm": 1387.337646484375, "learning_rate": 1.0208e-05, "loss": 262.8041, "step": 6380 }, { "epoch": 0.025816408569916408, "grad_norm": 4407.6435546875, "learning_rate": 1.0224e-05, "loss": 219.8955, "step": 6390 }, { "epoch": 0.02585680983528404, "grad_norm": 621.7880249023438, "learning_rate": 1.024e-05, "loss": 183.8228, "step": 6400 }, { "epoch": 0.02589721110065167, "grad_norm": 809.182373046875, "learning_rate": 1.0256000000000001e-05, "loss": 189.9909, "step": 6410 }, { "epoch": 0.025937612366019304, "grad_norm": 2138.258056640625, "learning_rate": 1.0272e-05, "loss": 188.2057, "step": 6420 }, { "epoch": 0.025978013631386934, "grad_norm": 847.4774169921875, "learning_rate": 1.0288e-05, "loss": 198.1693, "step": 6430 }, { "epoch": 0.026018414896754567, "grad_norm": 3683.830322265625, "learning_rate": 1.0304e-05, "loss": 252.0563, "step": 6440 }, { "epoch": 0.026058816162122196, "grad_norm": 1519.5914306640625, "learning_rate": 1.0320000000000001e-05, "loss": 251.5904, "step": 6450 }, { "epoch": 0.02609921742748983, "grad_norm": 803.3671875, "learning_rate": 1.0336000000000002e-05, "loss": 153.3266, "step": 6460 }, { "epoch": 0.02613961869285746, "grad_norm": 826.2311401367188, "learning_rate": 1.0352e-05, "loss": 229.4645, "step": 6470 }, { "epoch": 0.026180019958225092, "grad_norm": 1242.169677734375, "learning_rate": 1.0368e-05, "loss": 257.6692, "step": 6480 }, { "epoch": 0.02622042122359272, "grad_norm": 3527.55712890625, "learning_rate": 1.0384000000000001e-05, "loss": 203.5762, "step": 6490 }, { "epoch": 0.026260822488960354, "grad_norm": 936.0517578125, "learning_rate": 1.04e-05, "loss": 235.4074, "step": 6500 }, { "epoch": 0.026301223754327984, "grad_norm": 735.2395629882812, "learning_rate": 1.0416000000000002e-05, "loss": 206.3603, "step": 6510 }, { "epoch": 0.026341625019695617, "grad_norm": 955.069091796875, "learning_rate": 1.0432e-05, "loss": 211.4242, "step": 6520 }, { "epoch": 0.026382026285063247, "grad_norm": 2535.602783203125, "learning_rate": 1.0448e-05, "loss": 245.856, "step": 6530 }, { "epoch": 0.02642242755043088, "grad_norm": 1577.4102783203125, "learning_rate": 1.0464e-05, "loss": 240.6815, "step": 6540 }, { "epoch": 0.02646282881579851, "grad_norm": 3635.015380859375, "learning_rate": 1.0480000000000001e-05, "loss": 171.5102, "step": 6550 }, { "epoch": 0.026503230081166142, "grad_norm": 1024.40966796875, "learning_rate": 1.0496000000000003e-05, "loss": 196.577, "step": 6560 }, { "epoch": 0.026543631346533772, "grad_norm": 4052.225341796875, "learning_rate": 1.0512e-05, "loss": 164.7473, "step": 6570 }, { "epoch": 0.026584032611901405, "grad_norm": 861.9880981445312, "learning_rate": 1.0528e-05, "loss": 198.6812, "step": 6580 }, { "epoch": 0.026624433877269035, "grad_norm": 1004.9078979492188, "learning_rate": 1.0544000000000001e-05, "loss": 124.6632, "step": 6590 }, { "epoch": 0.026664835142636668, "grad_norm": 815.4194946289062, "learning_rate": 1.056e-05, "loss": 252.8283, "step": 6600 }, { "epoch": 0.026705236408004297, "grad_norm": 1178.261962890625, "learning_rate": 1.0576000000000002e-05, "loss": 239.9308, "step": 6610 }, { "epoch": 0.02674563767337193, "grad_norm": 855.1956787109375, "learning_rate": 1.0592e-05, "loss": 170.0014, "step": 6620 }, { "epoch": 0.02678603893873956, "grad_norm": 764.4215087890625, "learning_rate": 1.0608e-05, "loss": 311.9611, "step": 6630 }, { "epoch": 0.026826440204107193, "grad_norm": 1226.867919921875, "learning_rate": 1.0624e-05, "loss": 164.5296, "step": 6640 }, { "epoch": 0.026866841469474823, "grad_norm": 793.0070190429688, "learning_rate": 1.0640000000000001e-05, "loss": 157.1765, "step": 6650 }, { "epoch": 0.026907242734842456, "grad_norm": 2848.79052734375, "learning_rate": 1.0656000000000003e-05, "loss": 178.3442, "step": 6660 }, { "epoch": 0.026947644000210085, "grad_norm": 1390.5682373046875, "learning_rate": 1.0672e-05, "loss": 203.0393, "step": 6670 }, { "epoch": 0.026988045265577718, "grad_norm": 912.458984375, "learning_rate": 1.0688e-05, "loss": 163.6808, "step": 6680 }, { "epoch": 0.027028446530945348, "grad_norm": 1008.1384887695312, "learning_rate": 1.0704000000000001e-05, "loss": 207.8135, "step": 6690 }, { "epoch": 0.02706884779631298, "grad_norm": 6595.03466796875, "learning_rate": 1.072e-05, "loss": 269.4763, "step": 6700 }, { "epoch": 0.02710924906168061, "grad_norm": 836.4190673828125, "learning_rate": 1.0736000000000002e-05, "loss": 244.2324, "step": 6710 }, { "epoch": 0.027149650327048244, "grad_norm": 2060.159423828125, "learning_rate": 1.0752e-05, "loss": 192.5937, "step": 6720 }, { "epoch": 0.027190051592415873, "grad_norm": 3539.124755859375, "learning_rate": 1.0768000000000001e-05, "loss": 257.8765, "step": 6730 }, { "epoch": 0.027230452857783506, "grad_norm": 1152.4991455078125, "learning_rate": 1.0784e-05, "loss": 215.0623, "step": 6740 }, { "epoch": 0.027270854123151136, "grad_norm": 1865.2408447265625, "learning_rate": 1.0800000000000002e-05, "loss": 298.6338, "step": 6750 }, { "epoch": 0.02731125538851877, "grad_norm": 3113.653564453125, "learning_rate": 1.0816e-05, "loss": 212.6075, "step": 6760 }, { "epoch": 0.0273516566538864, "grad_norm": 723.0384521484375, "learning_rate": 1.0832e-05, "loss": 176.7559, "step": 6770 }, { "epoch": 0.02739205791925403, "grad_norm": 1553.911376953125, "learning_rate": 1.0848e-05, "loss": 214.2935, "step": 6780 }, { "epoch": 0.02743245918462166, "grad_norm": 2617.25927734375, "learning_rate": 1.0864000000000001e-05, "loss": 208.5479, "step": 6790 }, { "epoch": 0.027472860449989294, "grad_norm": 2721.330078125, "learning_rate": 1.0880000000000001e-05, "loss": 247.9051, "step": 6800 }, { "epoch": 0.027513261715356924, "grad_norm": 1277.49365234375, "learning_rate": 1.0896e-05, "loss": 188.6469, "step": 6810 }, { "epoch": 0.027553662980724557, "grad_norm": 1097.37451171875, "learning_rate": 1.0912e-05, "loss": 165.6317, "step": 6820 }, { "epoch": 0.027594064246092186, "grad_norm": 2809.764404296875, "learning_rate": 1.0928000000000001e-05, "loss": 207.2922, "step": 6830 }, { "epoch": 0.02763446551145982, "grad_norm": 3215.823974609375, "learning_rate": 1.0944e-05, "loss": 145.3388, "step": 6840 }, { "epoch": 0.02767486677682745, "grad_norm": 896.5782470703125, "learning_rate": 1.0960000000000002e-05, "loss": 246.8312, "step": 6850 }, { "epoch": 0.027715268042195082, "grad_norm": 1490.93408203125, "learning_rate": 1.0976e-05, "loss": 167.422, "step": 6860 }, { "epoch": 0.02775566930756271, "grad_norm": 1855.150390625, "learning_rate": 1.0992e-05, "loss": 218.5458, "step": 6870 }, { "epoch": 0.027796070572930345, "grad_norm": 885.1539306640625, "learning_rate": 1.1008e-05, "loss": 247.277, "step": 6880 }, { "epoch": 0.027836471838297974, "grad_norm": 988.1691284179688, "learning_rate": 1.1024000000000002e-05, "loss": 275.7784, "step": 6890 }, { "epoch": 0.027876873103665607, "grad_norm": 1423.39404296875, "learning_rate": 1.1040000000000001e-05, "loss": 155.3902, "step": 6900 }, { "epoch": 0.027917274369033237, "grad_norm": 1528.7322998046875, "learning_rate": 1.1056e-05, "loss": 199.039, "step": 6910 }, { "epoch": 0.02795767563440087, "grad_norm": 744.6835327148438, "learning_rate": 1.1072e-05, "loss": 170.9844, "step": 6920 }, { "epoch": 0.0279980768997685, "grad_norm": 1452.251708984375, "learning_rate": 1.1088000000000001e-05, "loss": 159.8676, "step": 6930 }, { "epoch": 0.028038478165136133, "grad_norm": 1314.5618896484375, "learning_rate": 1.1104e-05, "loss": 181.9609, "step": 6940 }, { "epoch": 0.028078879430503762, "grad_norm": 906.5917358398438, "learning_rate": 1.1120000000000002e-05, "loss": 178.3718, "step": 6950 }, { "epoch": 0.028119280695871395, "grad_norm": 1324.80810546875, "learning_rate": 1.1136e-05, "loss": 143.8825, "step": 6960 }, { "epoch": 0.028159681961239025, "grad_norm": 0.0, "learning_rate": 1.1152000000000001e-05, "loss": 118.0358, "step": 6970 }, { "epoch": 0.028200083226606658, "grad_norm": 4537.2822265625, "learning_rate": 1.1168e-05, "loss": 205.2892, "step": 6980 }, { "epoch": 0.028240484491974287, "grad_norm": 1045.489501953125, "learning_rate": 1.1184000000000002e-05, "loss": 196.2545, "step": 6990 }, { "epoch": 0.02828088575734192, "grad_norm": 814.6027221679688, "learning_rate": 1.1200000000000001e-05, "loss": 188.3368, "step": 7000 }, { "epoch": 0.02832128702270955, "grad_norm": 823.8950805664062, "learning_rate": 1.1216e-05, "loss": 188.974, "step": 7010 }, { "epoch": 0.028361688288077183, "grad_norm": 1544.5411376953125, "learning_rate": 1.1232e-05, "loss": 230.2053, "step": 7020 }, { "epoch": 0.028402089553444813, "grad_norm": 646.723388671875, "learning_rate": 1.1248000000000001e-05, "loss": 128.2589, "step": 7030 }, { "epoch": 0.028442490818812446, "grad_norm": 1701.8121337890625, "learning_rate": 1.1264000000000001e-05, "loss": 180.4897, "step": 7040 }, { "epoch": 0.028482892084180075, "grad_norm": 1359.3916015625, "learning_rate": 1.128e-05, "loss": 204.192, "step": 7050 }, { "epoch": 0.02852329334954771, "grad_norm": 1082.42236328125, "learning_rate": 1.1296e-05, "loss": 232.6115, "step": 7060 }, { "epoch": 0.028563694614915338, "grad_norm": 1729.7169189453125, "learning_rate": 1.1312000000000001e-05, "loss": 141.8797, "step": 7070 }, { "epoch": 0.02860409588028297, "grad_norm": 1071.9970703125, "learning_rate": 1.1328e-05, "loss": 191.3511, "step": 7080 }, { "epoch": 0.0286444971456506, "grad_norm": 1277.10595703125, "learning_rate": 1.1344000000000002e-05, "loss": 200.7731, "step": 7090 }, { "epoch": 0.028684898411018234, "grad_norm": 668.3596801757812, "learning_rate": 1.136e-05, "loss": 255.4492, "step": 7100 }, { "epoch": 0.028725299676385863, "grad_norm": 778.4135131835938, "learning_rate": 1.1376000000000001e-05, "loss": 232.344, "step": 7110 }, { "epoch": 0.028765700941753496, "grad_norm": 839.8685302734375, "learning_rate": 1.1392e-05, "loss": 212.6378, "step": 7120 }, { "epoch": 0.028806102207121126, "grad_norm": 1015.4122924804688, "learning_rate": 1.1408000000000002e-05, "loss": 171.4305, "step": 7130 }, { "epoch": 0.02884650347248876, "grad_norm": 1297.2659912109375, "learning_rate": 1.1424000000000001e-05, "loss": 210.3192, "step": 7140 }, { "epoch": 0.02888690473785639, "grad_norm": 1470.431396484375, "learning_rate": 1.144e-05, "loss": 179.2978, "step": 7150 }, { "epoch": 0.02892730600322402, "grad_norm": 1462.60302734375, "learning_rate": 1.1456e-05, "loss": 243.1692, "step": 7160 }, { "epoch": 0.02896770726859165, "grad_norm": 1914.7281494140625, "learning_rate": 1.1472000000000001e-05, "loss": 178.1436, "step": 7170 }, { "epoch": 0.029008108533959284, "grad_norm": 0.0, "learning_rate": 1.1488e-05, "loss": 220.6732, "step": 7180 }, { "epoch": 0.029048509799326914, "grad_norm": 1843.9134521484375, "learning_rate": 1.1504000000000002e-05, "loss": 240.3133, "step": 7190 }, { "epoch": 0.029088911064694547, "grad_norm": 7561.37255859375, "learning_rate": 1.152e-05, "loss": 199.7405, "step": 7200 }, { "epoch": 0.029129312330062176, "grad_norm": 1293.88330078125, "learning_rate": 1.1536000000000001e-05, "loss": 180.5538, "step": 7210 }, { "epoch": 0.02916971359542981, "grad_norm": 504.95745849609375, "learning_rate": 1.1552e-05, "loss": 202.5211, "step": 7220 }, { "epoch": 0.02921011486079744, "grad_norm": 2889.349365234375, "learning_rate": 1.1568000000000002e-05, "loss": 236.6316, "step": 7230 }, { "epoch": 0.029250516126165072, "grad_norm": 741.410888671875, "learning_rate": 1.1584000000000001e-05, "loss": 168.3202, "step": 7240 }, { "epoch": 0.029290917391532702, "grad_norm": 2157.1884765625, "learning_rate": 1.16e-05, "loss": 203.7105, "step": 7250 }, { "epoch": 0.029331318656900335, "grad_norm": 2404.79150390625, "learning_rate": 1.1616e-05, "loss": 205.5438, "step": 7260 }, { "epoch": 0.029371719922267964, "grad_norm": 0.0, "learning_rate": 1.1632000000000001e-05, "loss": 141.7996, "step": 7270 }, { "epoch": 0.029412121187635597, "grad_norm": 651.517578125, "learning_rate": 1.1648000000000001e-05, "loss": 213.4516, "step": 7280 }, { "epoch": 0.029452522453003227, "grad_norm": 1541.439208984375, "learning_rate": 1.1664000000000002e-05, "loss": 212.3698, "step": 7290 }, { "epoch": 0.02949292371837086, "grad_norm": 2294.57373046875, "learning_rate": 1.168e-05, "loss": 161.3065, "step": 7300 }, { "epoch": 0.02953332498373849, "grad_norm": 1858.2723388671875, "learning_rate": 1.1696000000000001e-05, "loss": 259.5773, "step": 7310 }, { "epoch": 0.029573726249106123, "grad_norm": 1009.5340576171875, "learning_rate": 1.1712e-05, "loss": 248.1431, "step": 7320 }, { "epoch": 0.029614127514473752, "grad_norm": 3950.963623046875, "learning_rate": 1.1728000000000002e-05, "loss": 214.9634, "step": 7330 }, { "epoch": 0.029654528779841385, "grad_norm": 2193.398681640625, "learning_rate": 1.1744000000000001e-05, "loss": 181.1218, "step": 7340 }, { "epoch": 0.029694930045209015, "grad_norm": 944.3338012695312, "learning_rate": 1.1760000000000001e-05, "loss": 136.3482, "step": 7350 }, { "epoch": 0.029735331310576648, "grad_norm": 1111.0726318359375, "learning_rate": 1.1776e-05, "loss": 205.5683, "step": 7360 }, { "epoch": 0.029775732575944278, "grad_norm": 1929.471435546875, "learning_rate": 1.1792000000000002e-05, "loss": 147.8906, "step": 7370 }, { "epoch": 0.02981613384131191, "grad_norm": 1500.8133544921875, "learning_rate": 1.1808000000000001e-05, "loss": 176.9189, "step": 7380 }, { "epoch": 0.02985653510667954, "grad_norm": 1397.207763671875, "learning_rate": 1.1824e-05, "loss": 206.1869, "step": 7390 }, { "epoch": 0.029896936372047173, "grad_norm": 1064.505126953125, "learning_rate": 1.184e-05, "loss": 285.7666, "step": 7400 }, { "epoch": 0.029937337637414803, "grad_norm": 1040.721923828125, "learning_rate": 1.1856000000000001e-05, "loss": 200.1409, "step": 7410 }, { "epoch": 0.029977738902782436, "grad_norm": 1411.9283447265625, "learning_rate": 1.1872000000000001e-05, "loss": 197.3061, "step": 7420 }, { "epoch": 0.030018140168150065, "grad_norm": 1552.750244140625, "learning_rate": 1.1888000000000002e-05, "loss": 131.5654, "step": 7430 }, { "epoch": 0.0300585414335177, "grad_norm": 4135.3154296875, "learning_rate": 1.1904e-05, "loss": 158.1736, "step": 7440 }, { "epoch": 0.030098942698885328, "grad_norm": 1020.1656494140625, "learning_rate": 1.1920000000000001e-05, "loss": 202.9766, "step": 7450 }, { "epoch": 0.03013934396425296, "grad_norm": 1060.5179443359375, "learning_rate": 1.1936e-05, "loss": 231.8231, "step": 7460 }, { "epoch": 0.03017974522962059, "grad_norm": 1135.9185791015625, "learning_rate": 1.1952000000000002e-05, "loss": 180.0088, "step": 7470 }, { "epoch": 0.030220146494988224, "grad_norm": 1562.1593017578125, "learning_rate": 1.1968000000000001e-05, "loss": 194.9986, "step": 7480 }, { "epoch": 0.030260547760355853, "grad_norm": 1475.9083251953125, "learning_rate": 1.1984e-05, "loss": 190.9672, "step": 7490 }, { "epoch": 0.030300949025723486, "grad_norm": 2843.638427734375, "learning_rate": 1.2e-05, "loss": 208.5317, "step": 7500 }, { "epoch": 0.030341350291091116, "grad_norm": 13340.033203125, "learning_rate": 1.2016000000000002e-05, "loss": 290.147, "step": 7510 }, { "epoch": 0.03038175155645875, "grad_norm": 1923.64990234375, "learning_rate": 1.2032000000000001e-05, "loss": 187.0977, "step": 7520 }, { "epoch": 0.03042215282182638, "grad_norm": 753.6450805664062, "learning_rate": 1.2048000000000002e-05, "loss": 173.7488, "step": 7530 }, { "epoch": 0.030462554087194012, "grad_norm": 1671.169189453125, "learning_rate": 1.2064e-05, "loss": 234.1393, "step": 7540 }, { "epoch": 0.03050295535256164, "grad_norm": 4448.47607421875, "learning_rate": 1.2080000000000001e-05, "loss": 148.8285, "step": 7550 }, { "epoch": 0.030543356617929274, "grad_norm": 1697.405029296875, "learning_rate": 1.2096e-05, "loss": 165.7743, "step": 7560 }, { "epoch": 0.030583757883296904, "grad_norm": 1653.86669921875, "learning_rate": 1.2112000000000002e-05, "loss": 128.8316, "step": 7570 }, { "epoch": 0.030624159148664537, "grad_norm": 2187.434814453125, "learning_rate": 1.2128000000000001e-05, "loss": 309.9175, "step": 7580 }, { "epoch": 0.030664560414032167, "grad_norm": 648.2998657226562, "learning_rate": 1.2144000000000001e-05, "loss": 260.0035, "step": 7590 }, { "epoch": 0.0307049616793998, "grad_norm": 1619.0577392578125, "learning_rate": 1.216e-05, "loss": 269.7444, "step": 7600 }, { "epoch": 0.03074536294476743, "grad_norm": 793.527587890625, "learning_rate": 1.2176000000000002e-05, "loss": 253.9378, "step": 7610 }, { "epoch": 0.030785764210135062, "grad_norm": 618.2582397460938, "learning_rate": 1.2192000000000001e-05, "loss": 177.5981, "step": 7620 }, { "epoch": 0.030826165475502692, "grad_norm": 1379.3106689453125, "learning_rate": 1.2208000000000002e-05, "loss": 209.9215, "step": 7630 }, { "epoch": 0.030866566740870325, "grad_norm": 640.5135498046875, "learning_rate": 1.2224e-05, "loss": 150.5801, "step": 7640 }, { "epoch": 0.030906968006237955, "grad_norm": 1602.00390625, "learning_rate": 1.2240000000000001e-05, "loss": 207.2341, "step": 7650 }, { "epoch": 0.030947369271605588, "grad_norm": 804.4661865234375, "learning_rate": 1.2256000000000001e-05, "loss": 132.286, "step": 7660 }, { "epoch": 0.030987770536973217, "grad_norm": 1168.7255859375, "learning_rate": 1.2272000000000002e-05, "loss": 159.3607, "step": 7670 }, { "epoch": 0.03102817180234085, "grad_norm": 5299.14990234375, "learning_rate": 1.2288e-05, "loss": 247.6179, "step": 7680 }, { "epoch": 0.03106857306770848, "grad_norm": 2897.222900390625, "learning_rate": 1.2304000000000001e-05, "loss": 210.1399, "step": 7690 }, { "epoch": 0.031108974333076113, "grad_norm": 1037.8817138671875, "learning_rate": 1.232e-05, "loss": 177.3573, "step": 7700 }, { "epoch": 0.031149375598443742, "grad_norm": 1748.6580810546875, "learning_rate": 1.2336000000000002e-05, "loss": 200.1308, "step": 7710 }, { "epoch": 0.031189776863811376, "grad_norm": 1623.1146240234375, "learning_rate": 1.2352000000000001e-05, "loss": 138.4404, "step": 7720 }, { "epoch": 0.031230178129179005, "grad_norm": 658.6551513671875, "learning_rate": 1.2368e-05, "loss": 187.0388, "step": 7730 }, { "epoch": 0.03127057939454664, "grad_norm": 851.2140502929688, "learning_rate": 1.2384e-05, "loss": 121.0485, "step": 7740 }, { "epoch": 0.03131098065991427, "grad_norm": 1161.504150390625, "learning_rate": 1.2400000000000002e-05, "loss": 187.6907, "step": 7750 }, { "epoch": 0.0313513819252819, "grad_norm": 2363.146728515625, "learning_rate": 1.2416000000000001e-05, "loss": 243.086, "step": 7760 }, { "epoch": 0.03139178319064953, "grad_norm": 1060.61376953125, "learning_rate": 1.2432000000000002e-05, "loss": 159.8972, "step": 7770 }, { "epoch": 0.03143218445601716, "grad_norm": 1024.3526611328125, "learning_rate": 1.2448e-05, "loss": 197.9344, "step": 7780 }, { "epoch": 0.031472585721384796, "grad_norm": 1298.6092529296875, "learning_rate": 1.2464000000000001e-05, "loss": 179.0248, "step": 7790 }, { "epoch": 0.03151298698675242, "grad_norm": 1501.115966796875, "learning_rate": 1.248e-05, "loss": 199.7626, "step": 7800 }, { "epoch": 0.031553388252120056, "grad_norm": 645.8001098632812, "learning_rate": 1.2496000000000002e-05, "loss": 140.9509, "step": 7810 }, { "epoch": 0.03159378951748769, "grad_norm": 1474.071533203125, "learning_rate": 1.2512000000000002e-05, "loss": 225.0743, "step": 7820 }, { "epoch": 0.03163419078285532, "grad_norm": 578.9594116210938, "learning_rate": 1.2528e-05, "loss": 149.4478, "step": 7830 }, { "epoch": 0.03167459204822295, "grad_norm": 1799.5963134765625, "learning_rate": 1.2544e-05, "loss": 190.2897, "step": 7840 }, { "epoch": 0.03171499331359058, "grad_norm": 1678.9425048828125, "learning_rate": 1.2560000000000002e-05, "loss": 201.671, "step": 7850 }, { "epoch": 0.031755394578958214, "grad_norm": 1604.530517578125, "learning_rate": 1.2576000000000001e-05, "loss": 160.7375, "step": 7860 }, { "epoch": 0.03179579584432585, "grad_norm": 1029.1220703125, "learning_rate": 1.2592000000000002e-05, "loss": 168.309, "step": 7870 }, { "epoch": 0.03183619710969347, "grad_norm": 5791.7275390625, "learning_rate": 1.2608e-05, "loss": 317.029, "step": 7880 }, { "epoch": 0.031876598375061106, "grad_norm": 1256.510986328125, "learning_rate": 1.2624000000000001e-05, "loss": 145.226, "step": 7890 }, { "epoch": 0.03191699964042874, "grad_norm": 1424.3720703125, "learning_rate": 1.2640000000000001e-05, "loss": 123.2829, "step": 7900 }, { "epoch": 0.03195740090579637, "grad_norm": 1358.22314453125, "learning_rate": 1.2656000000000002e-05, "loss": 211.4159, "step": 7910 }, { "epoch": 0.031997802171164, "grad_norm": 1109.47509765625, "learning_rate": 1.2672000000000002e-05, "loss": 211.996, "step": 7920 }, { "epoch": 0.03203820343653163, "grad_norm": 1332.3016357421875, "learning_rate": 1.2688e-05, "loss": 202.7615, "step": 7930 }, { "epoch": 0.032078604701899265, "grad_norm": 0.0, "learning_rate": 1.2704e-05, "loss": 149.2255, "step": 7940 }, { "epoch": 0.0321190059672669, "grad_norm": 913.5186157226562, "learning_rate": 1.2720000000000002e-05, "loss": 229.9926, "step": 7950 }, { "epoch": 0.032159407232634524, "grad_norm": 1214.4002685546875, "learning_rate": 1.2736000000000001e-05, "loss": 262.6403, "step": 7960 }, { "epoch": 0.03219980849800216, "grad_norm": 2469.748291015625, "learning_rate": 1.2752e-05, "loss": 255.2624, "step": 7970 }, { "epoch": 0.03224020976336979, "grad_norm": 1674.9351806640625, "learning_rate": 1.2768e-05, "loss": 203.4937, "step": 7980 }, { "epoch": 0.03228061102873742, "grad_norm": 1442.7392578125, "learning_rate": 1.2784000000000002e-05, "loss": 174.4651, "step": 7990 }, { "epoch": 0.03232101229410505, "grad_norm": 697.1456909179688, "learning_rate": 1.2800000000000001e-05, "loss": 216.5391, "step": 8000 }, { "epoch": 0.03236141355947268, "grad_norm": 2099.24853515625, "learning_rate": 1.2816000000000002e-05, "loss": 210.8427, "step": 8010 }, { "epoch": 0.032401814824840315, "grad_norm": 3176.245361328125, "learning_rate": 1.2832e-05, "loss": 305.0184, "step": 8020 }, { "epoch": 0.03244221609020795, "grad_norm": 1037.900146484375, "learning_rate": 1.2848e-05, "loss": 224.8694, "step": 8030 }, { "epoch": 0.032482617355575574, "grad_norm": 474.0095520019531, "learning_rate": 1.2864e-05, "loss": 206.4488, "step": 8040 }, { "epoch": 0.03252301862094321, "grad_norm": 961.8276977539062, "learning_rate": 1.2880000000000002e-05, "loss": 174.9908, "step": 8050 }, { "epoch": 0.03256341988631084, "grad_norm": 1525.29833984375, "learning_rate": 1.2896000000000002e-05, "loss": 256.3985, "step": 8060 }, { "epoch": 0.03260382115167847, "grad_norm": 13022.603515625, "learning_rate": 1.2912e-05, "loss": 171.9165, "step": 8070 }, { "epoch": 0.0326442224170461, "grad_norm": 1341.8682861328125, "learning_rate": 1.2928e-05, "loss": 196.2449, "step": 8080 }, { "epoch": 0.03268462368241373, "grad_norm": 637.4829711914062, "learning_rate": 1.2944000000000002e-05, "loss": 154.6469, "step": 8090 }, { "epoch": 0.032725024947781366, "grad_norm": 1549.2552490234375, "learning_rate": 1.2960000000000001e-05, "loss": 132.3086, "step": 8100 }, { "epoch": 0.032765426213149, "grad_norm": 1383.0369873046875, "learning_rate": 1.2976000000000002e-05, "loss": 216.4039, "step": 8110 }, { "epoch": 0.032805827478516625, "grad_norm": 1574.9188232421875, "learning_rate": 1.2992e-05, "loss": 191.3513, "step": 8120 }, { "epoch": 0.03284622874388426, "grad_norm": 1016.8265380859375, "learning_rate": 1.3008e-05, "loss": 191.888, "step": 8130 }, { "epoch": 0.03288663000925189, "grad_norm": 579.7612915039062, "learning_rate": 1.3024000000000001e-05, "loss": 148.2349, "step": 8140 }, { "epoch": 0.032927031274619524, "grad_norm": 688.9622192382812, "learning_rate": 1.3040000000000002e-05, "loss": 131.6262, "step": 8150 }, { "epoch": 0.03296743253998715, "grad_norm": 1008.6786499023438, "learning_rate": 1.3056000000000002e-05, "loss": 191.4088, "step": 8160 }, { "epoch": 0.03300783380535478, "grad_norm": 731.140380859375, "learning_rate": 1.3072e-05, "loss": 144.9651, "step": 8170 }, { "epoch": 0.033048235070722416, "grad_norm": 1061.424072265625, "learning_rate": 1.3088e-05, "loss": 198.5589, "step": 8180 }, { "epoch": 0.03308863633609005, "grad_norm": 1009.1111450195312, "learning_rate": 1.3104000000000002e-05, "loss": 173.0443, "step": 8190 }, { "epoch": 0.033129037601457675, "grad_norm": 1960.4769287109375, "learning_rate": 1.3120000000000001e-05, "loss": 243.3992, "step": 8200 }, { "epoch": 0.03316943886682531, "grad_norm": 1036.068359375, "learning_rate": 1.3136000000000003e-05, "loss": 179.2266, "step": 8210 }, { "epoch": 0.03320984013219294, "grad_norm": 477.9182434082031, "learning_rate": 1.3152e-05, "loss": 132.2955, "step": 8220 }, { "epoch": 0.033250241397560575, "grad_norm": 997.7559814453125, "learning_rate": 1.3168e-05, "loss": 176.5007, "step": 8230 }, { "epoch": 0.0332906426629282, "grad_norm": 980.5240478515625, "learning_rate": 1.3184000000000001e-05, "loss": 114.6417, "step": 8240 }, { "epoch": 0.033331043928295834, "grad_norm": 732.45263671875, "learning_rate": 1.3200000000000002e-05, "loss": 196.549, "step": 8250 }, { "epoch": 0.03337144519366347, "grad_norm": 785.9083862304688, "learning_rate": 1.3216000000000002e-05, "loss": 178.3419, "step": 8260 }, { "epoch": 0.0334118464590311, "grad_norm": 1148.8909912109375, "learning_rate": 1.3232e-05, "loss": 219.1049, "step": 8270 }, { "epoch": 0.033452247724398726, "grad_norm": 1013.8929443359375, "learning_rate": 1.3248000000000001e-05, "loss": 245.6775, "step": 8280 }, { "epoch": 0.03349264898976636, "grad_norm": 942.855224609375, "learning_rate": 1.3264000000000002e-05, "loss": 217.4367, "step": 8290 }, { "epoch": 0.03353305025513399, "grad_norm": 524.8524780273438, "learning_rate": 1.3280000000000002e-05, "loss": 174.6055, "step": 8300 }, { "epoch": 0.033573451520501625, "grad_norm": 876.1190185546875, "learning_rate": 1.3296e-05, "loss": 164.101, "step": 8310 }, { "epoch": 0.03361385278586925, "grad_norm": 6020.8359375, "learning_rate": 1.3312e-05, "loss": 213.8552, "step": 8320 }, { "epoch": 0.033654254051236884, "grad_norm": 731.1875, "learning_rate": 1.3328e-05, "loss": 177.7584, "step": 8330 }, { "epoch": 0.03369465531660452, "grad_norm": 1150.075439453125, "learning_rate": 1.3344000000000001e-05, "loss": 167.518, "step": 8340 }, { "epoch": 0.03373505658197215, "grad_norm": 2308.036865234375, "learning_rate": 1.3360000000000003e-05, "loss": 216.0022, "step": 8350 }, { "epoch": 0.033775457847339777, "grad_norm": 784.7696533203125, "learning_rate": 1.3376e-05, "loss": 186.3424, "step": 8360 }, { "epoch": 0.03381585911270741, "grad_norm": 1114.6754150390625, "learning_rate": 1.3392e-05, "loss": 161.8721, "step": 8370 }, { "epoch": 0.03385626037807504, "grad_norm": 1241.662841796875, "learning_rate": 1.3408000000000001e-05, "loss": 147.7201, "step": 8380 }, { "epoch": 0.033896661643442676, "grad_norm": 3314.600341796875, "learning_rate": 1.3424000000000002e-05, "loss": 234.9966, "step": 8390 }, { "epoch": 0.0339370629088103, "grad_norm": 2304.822021484375, "learning_rate": 1.3440000000000002e-05, "loss": 154.348, "step": 8400 }, { "epoch": 0.033977464174177935, "grad_norm": 1355.9027099609375, "learning_rate": 1.3456e-05, "loss": 208.0225, "step": 8410 }, { "epoch": 0.03401786543954557, "grad_norm": 865.4819946289062, "learning_rate": 1.3472e-05, "loss": 162.2137, "step": 8420 }, { "epoch": 0.0340582667049132, "grad_norm": 649.958984375, "learning_rate": 1.3488e-05, "loss": 178.5345, "step": 8430 }, { "epoch": 0.03409866797028083, "grad_norm": 875.6942749023438, "learning_rate": 1.3504000000000001e-05, "loss": 167.0072, "step": 8440 }, { "epoch": 0.03413906923564846, "grad_norm": 967.6959228515625, "learning_rate": 1.3520000000000003e-05, "loss": 111.3185, "step": 8450 }, { "epoch": 0.03417947050101609, "grad_norm": 946.614501953125, "learning_rate": 1.3536e-05, "loss": 189.8445, "step": 8460 }, { "epoch": 0.034219871766383726, "grad_norm": 645.7300415039062, "learning_rate": 1.3552e-05, "loss": 181.4957, "step": 8470 }, { "epoch": 0.03426027303175135, "grad_norm": 1362.599365234375, "learning_rate": 1.3568000000000001e-05, "loss": 198.2257, "step": 8480 }, { "epoch": 0.034300674297118985, "grad_norm": 752.5654907226562, "learning_rate": 1.3584000000000002e-05, "loss": 231.436, "step": 8490 }, { "epoch": 0.03434107556248662, "grad_norm": 518.3107299804688, "learning_rate": 1.3600000000000002e-05, "loss": 198.5842, "step": 8500 }, { "epoch": 0.03438147682785425, "grad_norm": 953.03076171875, "learning_rate": 1.3616e-05, "loss": 238.2004, "step": 8510 }, { "epoch": 0.03442187809322188, "grad_norm": 880.9703979492188, "learning_rate": 1.3632000000000001e-05, "loss": 184.2565, "step": 8520 }, { "epoch": 0.03446227935858951, "grad_norm": 1198.8533935546875, "learning_rate": 1.3648e-05, "loss": 188.4894, "step": 8530 }, { "epoch": 0.034502680623957144, "grad_norm": 1765.7196044921875, "learning_rate": 1.3664000000000002e-05, "loss": 149.972, "step": 8540 }, { "epoch": 0.03454308188932478, "grad_norm": 2390.854736328125, "learning_rate": 1.3680000000000003e-05, "loss": 244.8083, "step": 8550 }, { "epoch": 0.0345834831546924, "grad_norm": 1242.086181640625, "learning_rate": 1.3696e-05, "loss": 113.8188, "step": 8560 }, { "epoch": 0.034623884420060036, "grad_norm": 838.2288818359375, "learning_rate": 1.3712e-05, "loss": 210.1455, "step": 8570 }, { "epoch": 0.03466428568542767, "grad_norm": 1526.849365234375, "learning_rate": 1.3728000000000001e-05, "loss": 176.4923, "step": 8580 }, { "epoch": 0.0347046869507953, "grad_norm": 3328.768310546875, "learning_rate": 1.3744000000000003e-05, "loss": 143.1111, "step": 8590 }, { "epoch": 0.03474508821616293, "grad_norm": 949.486328125, "learning_rate": 1.376e-05, "loss": 343.4464, "step": 8600 }, { "epoch": 0.03478548948153056, "grad_norm": 934.4647827148438, "learning_rate": 1.3776e-05, "loss": 200.4245, "step": 8610 }, { "epoch": 0.034825890746898194, "grad_norm": 1757.977294921875, "learning_rate": 1.3792000000000001e-05, "loss": 137.9326, "step": 8620 }, { "epoch": 0.03486629201226583, "grad_norm": 2243.847900390625, "learning_rate": 1.3808e-05, "loss": 198.7237, "step": 8630 }, { "epoch": 0.034906693277633453, "grad_norm": 2031.08984375, "learning_rate": 1.3824000000000002e-05, "loss": 201.4117, "step": 8640 }, { "epoch": 0.03494709454300109, "grad_norm": 2345.795654296875, "learning_rate": 1.384e-05, "loss": 166.4765, "step": 8650 }, { "epoch": 0.03498749580836872, "grad_norm": 861.6460571289062, "learning_rate": 1.3856e-05, "loss": 175.7778, "step": 8660 }, { "epoch": 0.03502789707373635, "grad_norm": 1131.3941650390625, "learning_rate": 1.3872e-05, "loss": 159.2677, "step": 8670 }, { "epoch": 0.03506829833910398, "grad_norm": 1215.5858154296875, "learning_rate": 1.3888000000000002e-05, "loss": 93.1331, "step": 8680 }, { "epoch": 0.03510869960447161, "grad_norm": 1228.4119873046875, "learning_rate": 1.3904000000000003e-05, "loss": 124.4559, "step": 8690 }, { "epoch": 0.035149100869839245, "grad_norm": 953.3961181640625, "learning_rate": 1.392e-05, "loss": 259.4703, "step": 8700 }, { "epoch": 0.03518950213520688, "grad_norm": 1472.0211181640625, "learning_rate": 1.3936e-05, "loss": 195.8783, "step": 8710 }, { "epoch": 0.035229903400574504, "grad_norm": 1071.033935546875, "learning_rate": 1.3952000000000001e-05, "loss": 207.037, "step": 8720 }, { "epoch": 0.03527030466594214, "grad_norm": 4129.77880859375, "learning_rate": 1.3968e-05, "loss": 202.4777, "step": 8730 }, { "epoch": 0.03531070593130977, "grad_norm": 3947.674560546875, "learning_rate": 1.3984000000000002e-05, "loss": 249.0112, "step": 8740 }, { "epoch": 0.0353511071966774, "grad_norm": 1019.8095092773438, "learning_rate": 1.4e-05, "loss": 159.161, "step": 8750 }, { "epoch": 0.03539150846204503, "grad_norm": 2045.958740234375, "learning_rate": 1.4016000000000001e-05, "loss": 166.8638, "step": 8760 }, { "epoch": 0.03543190972741266, "grad_norm": 569.2305908203125, "learning_rate": 1.4032e-05, "loss": 147.5823, "step": 8770 }, { "epoch": 0.035472310992780295, "grad_norm": 1683.003173828125, "learning_rate": 1.4048000000000002e-05, "loss": 178.0475, "step": 8780 }, { "epoch": 0.03551271225814793, "grad_norm": 1133.3314208984375, "learning_rate": 1.4064000000000003e-05, "loss": 179.5424, "step": 8790 }, { "epoch": 0.035553113523515555, "grad_norm": 996.33642578125, "learning_rate": 1.408e-05, "loss": 184.4815, "step": 8800 }, { "epoch": 0.03559351478888319, "grad_norm": 4099.81396484375, "learning_rate": 1.4096e-05, "loss": 217.5045, "step": 8810 }, { "epoch": 0.03563391605425082, "grad_norm": 2777.29052734375, "learning_rate": 1.4112000000000001e-05, "loss": 145.5609, "step": 8820 }, { "epoch": 0.035674317319618454, "grad_norm": 897.3009033203125, "learning_rate": 1.4128000000000001e-05, "loss": 220.9953, "step": 8830 }, { "epoch": 0.03571471858498608, "grad_norm": 3256.197509765625, "learning_rate": 1.4144000000000002e-05, "loss": 153.7076, "step": 8840 }, { "epoch": 0.03575511985035371, "grad_norm": 1044.2850341796875, "learning_rate": 1.416e-05, "loss": 160.3179, "step": 8850 }, { "epoch": 0.035795521115721346, "grad_norm": 598.4876098632812, "learning_rate": 1.4176000000000001e-05, "loss": 164.7267, "step": 8860 }, { "epoch": 0.03583592238108898, "grad_norm": 1391.9383544921875, "learning_rate": 1.4192e-05, "loss": 147.3058, "step": 8870 }, { "epoch": 0.035876323646456605, "grad_norm": 654.5458374023438, "learning_rate": 1.4208000000000002e-05, "loss": 191.4131, "step": 8880 }, { "epoch": 0.03591672491182424, "grad_norm": 1340.3818359375, "learning_rate": 1.4224000000000003e-05, "loss": 210.29, "step": 8890 }, { "epoch": 0.03595712617719187, "grad_norm": 1099.5445556640625, "learning_rate": 1.4240000000000001e-05, "loss": 168.8799, "step": 8900 }, { "epoch": 0.035997527442559504, "grad_norm": 1962.413330078125, "learning_rate": 1.4256e-05, "loss": 166.7184, "step": 8910 }, { "epoch": 0.03603792870792713, "grad_norm": 1163.4332275390625, "learning_rate": 1.4272000000000002e-05, "loss": 170.5609, "step": 8920 }, { "epoch": 0.036078329973294763, "grad_norm": 1148.7928466796875, "learning_rate": 1.4288000000000001e-05, "loss": 200.6612, "step": 8930 }, { "epoch": 0.0361187312386624, "grad_norm": 899.7924194335938, "learning_rate": 1.4304e-05, "loss": 120.8179, "step": 8940 }, { "epoch": 0.03615913250403003, "grad_norm": 1276.4134521484375, "learning_rate": 1.432e-05, "loss": 205.1565, "step": 8950 }, { "epoch": 0.036199533769397656, "grad_norm": 4312.658203125, "learning_rate": 1.4336000000000001e-05, "loss": 262.0594, "step": 8960 }, { "epoch": 0.03623993503476529, "grad_norm": 7287.62451171875, "learning_rate": 1.4352e-05, "loss": 169.9519, "step": 8970 }, { "epoch": 0.03628033630013292, "grad_norm": 791.3291625976562, "learning_rate": 1.4368000000000002e-05, "loss": 158.2487, "step": 8980 }, { "epoch": 0.036320737565500555, "grad_norm": 2023.2037353515625, "learning_rate": 1.4384e-05, "loss": 183.3372, "step": 8990 }, { "epoch": 0.03636113883086818, "grad_norm": 3376.461669921875, "learning_rate": 1.4400000000000001e-05, "loss": 195.7389, "step": 9000 }, { "epoch": 0.036401540096235814, "grad_norm": 1903.6873779296875, "learning_rate": 1.4416e-05, "loss": 205.4889, "step": 9010 }, { "epoch": 0.03644194136160345, "grad_norm": 2198.595458984375, "learning_rate": 1.4432000000000002e-05, "loss": 116.7384, "step": 9020 }, { "epoch": 0.03648234262697108, "grad_norm": 1408.8551025390625, "learning_rate": 1.4448000000000001e-05, "loss": 92.1331, "step": 9030 }, { "epoch": 0.036522743892338706, "grad_norm": 1082.4267578125, "learning_rate": 1.4464e-05, "loss": 161.9581, "step": 9040 }, { "epoch": 0.03656314515770634, "grad_norm": 933.0946655273438, "learning_rate": 1.448e-05, "loss": 162.6531, "step": 9050 }, { "epoch": 0.03660354642307397, "grad_norm": 1909.8857421875, "learning_rate": 1.4496000000000001e-05, "loss": 312.7636, "step": 9060 }, { "epoch": 0.036643947688441605, "grad_norm": 1491.857666015625, "learning_rate": 1.4512000000000001e-05, "loss": 190.8777, "step": 9070 }, { "epoch": 0.03668434895380923, "grad_norm": 894.1411743164062, "learning_rate": 1.4528000000000002e-05, "loss": 142.2706, "step": 9080 }, { "epoch": 0.036724750219176865, "grad_norm": 611.9476318359375, "learning_rate": 1.4544e-05, "loss": 141.9291, "step": 9090 }, { "epoch": 0.0367651514845445, "grad_norm": 1880.233642578125, "learning_rate": 1.4560000000000001e-05, "loss": 234.3097, "step": 9100 }, { "epoch": 0.03680555274991213, "grad_norm": 1328.005615234375, "learning_rate": 1.4576e-05, "loss": 102.8834, "step": 9110 }, { "epoch": 0.03684595401527976, "grad_norm": 1829.908447265625, "learning_rate": 1.4592000000000002e-05, "loss": 132.0794, "step": 9120 }, { "epoch": 0.03688635528064739, "grad_norm": 708.6975708007812, "learning_rate": 1.4608000000000001e-05, "loss": 126.2266, "step": 9130 }, { "epoch": 0.03692675654601502, "grad_norm": 1220.538818359375, "learning_rate": 1.4624000000000001e-05, "loss": 160.287, "step": 9140 }, { "epoch": 0.03696715781138265, "grad_norm": 813.824462890625, "learning_rate": 1.464e-05, "loss": 175.8625, "step": 9150 }, { "epoch": 0.03700755907675028, "grad_norm": 1006.5848388671875, "learning_rate": 1.4656000000000002e-05, "loss": 234.0254, "step": 9160 }, { "epoch": 0.037047960342117915, "grad_norm": 2413.237548828125, "learning_rate": 1.4672000000000001e-05, "loss": 184.3562, "step": 9170 }, { "epoch": 0.03708836160748555, "grad_norm": 1156.4200439453125, "learning_rate": 1.4688000000000002e-05, "loss": 141.5659, "step": 9180 }, { "epoch": 0.037128762872853174, "grad_norm": 1558.5284423828125, "learning_rate": 1.4704e-05, "loss": 179.8484, "step": 9190 }, { "epoch": 0.03716916413822081, "grad_norm": 5858.7548828125, "learning_rate": 1.4720000000000001e-05, "loss": 173.0041, "step": 9200 }, { "epoch": 0.03720956540358844, "grad_norm": 5371.80078125, "learning_rate": 1.4736000000000001e-05, "loss": 214.0791, "step": 9210 }, { "epoch": 0.037249966668956074, "grad_norm": 712.1119995117188, "learning_rate": 1.4752000000000002e-05, "loss": 150.8345, "step": 9220 }, { "epoch": 0.0372903679343237, "grad_norm": 2370.50927734375, "learning_rate": 1.4768e-05, "loss": 173.9066, "step": 9230 }, { "epoch": 0.03733076919969133, "grad_norm": 1736.8262939453125, "learning_rate": 1.4784000000000001e-05, "loss": 194.8434, "step": 9240 }, { "epoch": 0.037371170465058966, "grad_norm": 1515.369140625, "learning_rate": 1.48e-05, "loss": 195.5914, "step": 9250 }, { "epoch": 0.0374115717304266, "grad_norm": 3758.433837890625, "learning_rate": 1.4816000000000002e-05, "loss": 138.8154, "step": 9260 }, { "epoch": 0.037451972995794225, "grad_norm": 452.4622802734375, "learning_rate": 1.4832000000000001e-05, "loss": 200.1029, "step": 9270 }, { "epoch": 0.03749237426116186, "grad_norm": 726.9000854492188, "learning_rate": 1.4848e-05, "loss": 179.7058, "step": 9280 }, { "epoch": 0.03753277552652949, "grad_norm": 1493.676513671875, "learning_rate": 1.4864e-05, "loss": 172.9092, "step": 9290 }, { "epoch": 0.037573176791897124, "grad_norm": 1207.8115234375, "learning_rate": 1.4880000000000002e-05, "loss": 178.3551, "step": 9300 }, { "epoch": 0.03761357805726475, "grad_norm": 896.19873046875, "learning_rate": 1.4896000000000001e-05, "loss": 156.0574, "step": 9310 }, { "epoch": 0.03765397932263238, "grad_norm": 968.2182006835938, "learning_rate": 1.4912000000000002e-05, "loss": 232.6957, "step": 9320 }, { "epoch": 0.037694380588000016, "grad_norm": 1797.7032470703125, "learning_rate": 1.4928e-05, "loss": 254.2054, "step": 9330 }, { "epoch": 0.03773478185336765, "grad_norm": 1541.966064453125, "learning_rate": 1.4944000000000001e-05, "loss": 164.4436, "step": 9340 }, { "epoch": 0.037775183118735275, "grad_norm": 829.641845703125, "learning_rate": 1.496e-05, "loss": 98.9679, "step": 9350 }, { "epoch": 0.03781558438410291, "grad_norm": 1358.08544921875, "learning_rate": 1.4976000000000002e-05, "loss": 229.9947, "step": 9360 }, { "epoch": 0.03785598564947054, "grad_norm": 1063.0125732421875, "learning_rate": 1.4992000000000001e-05, "loss": 138.6031, "step": 9370 }, { "epoch": 0.037896386914838175, "grad_norm": 2268.275390625, "learning_rate": 1.5008000000000001e-05, "loss": 206.4531, "step": 9380 }, { "epoch": 0.0379367881802058, "grad_norm": 2438.62841796875, "learning_rate": 1.5024e-05, "loss": 133.9822, "step": 9390 }, { "epoch": 0.037977189445573434, "grad_norm": 661.8216552734375, "learning_rate": 1.5040000000000002e-05, "loss": 162.2787, "step": 9400 }, { "epoch": 0.03801759071094107, "grad_norm": 1721.645751953125, "learning_rate": 1.5056000000000001e-05, "loss": 202.9203, "step": 9410 }, { "epoch": 0.0380579919763087, "grad_norm": 1209.0455322265625, "learning_rate": 1.5072000000000002e-05, "loss": 140.194, "step": 9420 }, { "epoch": 0.038098393241676326, "grad_norm": 850.0849609375, "learning_rate": 1.5088e-05, "loss": 126.9117, "step": 9430 }, { "epoch": 0.03813879450704396, "grad_norm": 2595.15283203125, "learning_rate": 1.5104000000000001e-05, "loss": 189.6362, "step": 9440 }, { "epoch": 0.03817919577241159, "grad_norm": 2151.7373046875, "learning_rate": 1.5120000000000001e-05, "loss": 177.4334, "step": 9450 }, { "epoch": 0.038219597037779225, "grad_norm": 853.5826416015625, "learning_rate": 1.5136000000000002e-05, "loss": 211.8327, "step": 9460 }, { "epoch": 0.03825999830314685, "grad_norm": 1072.6729736328125, "learning_rate": 1.5152000000000002e-05, "loss": 201.9211, "step": 9470 }, { "epoch": 0.038300399568514484, "grad_norm": 2053.216552734375, "learning_rate": 1.5168000000000001e-05, "loss": 196.2717, "step": 9480 }, { "epoch": 0.03834080083388212, "grad_norm": 1001.5113525390625, "learning_rate": 1.5184e-05, "loss": 182.2864, "step": 9490 }, { "epoch": 0.03838120209924975, "grad_norm": 1147.33203125, "learning_rate": 1.5200000000000002e-05, "loss": 194.6276, "step": 9500 }, { "epoch": 0.03842160336461738, "grad_norm": 1260.394775390625, "learning_rate": 1.5216000000000001e-05, "loss": 91.9645, "step": 9510 }, { "epoch": 0.03846200462998501, "grad_norm": 1672.2742919921875, "learning_rate": 1.5232000000000003e-05, "loss": 177.3926, "step": 9520 }, { "epoch": 0.03850240589535264, "grad_norm": 7829.462890625, "learning_rate": 1.5248e-05, "loss": 176.5193, "step": 9530 }, { "epoch": 0.038542807160720276, "grad_norm": 732.9152221679688, "learning_rate": 1.5264e-05, "loss": 118.7326, "step": 9540 }, { "epoch": 0.0385832084260879, "grad_norm": 889.5083618164062, "learning_rate": 1.5280000000000003e-05, "loss": 138.2893, "step": 9550 }, { "epoch": 0.038623609691455535, "grad_norm": 1869.059326171875, "learning_rate": 1.5296e-05, "loss": 173.2165, "step": 9560 }, { "epoch": 0.03866401095682317, "grad_norm": 6519.04150390625, "learning_rate": 1.5312000000000002e-05, "loss": 177.7631, "step": 9570 }, { "epoch": 0.0387044122221908, "grad_norm": 1132.0162353515625, "learning_rate": 1.5328e-05, "loss": 153.1753, "step": 9580 }, { "epoch": 0.03874481348755843, "grad_norm": 961.602294921875, "learning_rate": 1.5344e-05, "loss": 131.0789, "step": 9590 }, { "epoch": 0.03878521475292606, "grad_norm": 976.28466796875, "learning_rate": 1.5360000000000002e-05, "loss": 173.8198, "step": 9600 }, { "epoch": 0.03882561601829369, "grad_norm": 714.9140014648438, "learning_rate": 1.5376000000000003e-05, "loss": 128.8499, "step": 9610 }, { "epoch": 0.038866017283661326, "grad_norm": 1191.9742431640625, "learning_rate": 1.5392e-05, "loss": 161.6513, "step": 9620 }, { "epoch": 0.03890641854902895, "grad_norm": 0.0, "learning_rate": 1.5408000000000002e-05, "loss": 183.5057, "step": 9630 }, { "epoch": 0.038946819814396585, "grad_norm": 1573.1395263671875, "learning_rate": 1.5424e-05, "loss": 152.1399, "step": 9640 }, { "epoch": 0.03898722107976422, "grad_norm": 999.6334838867188, "learning_rate": 1.544e-05, "loss": 162.3157, "step": 9650 }, { "epoch": 0.03902762234513185, "grad_norm": 0.0, "learning_rate": 1.5456000000000002e-05, "loss": 147.9127, "step": 9660 }, { "epoch": 0.03906802361049948, "grad_norm": 1467.85205078125, "learning_rate": 1.5472e-05, "loss": 146.4466, "step": 9670 }, { "epoch": 0.03910842487586711, "grad_norm": 1301.513427734375, "learning_rate": 1.5488e-05, "loss": 130.0043, "step": 9680 }, { "epoch": 0.039148826141234744, "grad_norm": 1770.3184814453125, "learning_rate": 1.5504000000000003e-05, "loss": 176.4808, "step": 9690 }, { "epoch": 0.03918922740660238, "grad_norm": 1669.854736328125, "learning_rate": 1.552e-05, "loss": 186.6205, "step": 9700 }, { "epoch": 0.03922962867197, "grad_norm": 638.5211791992188, "learning_rate": 1.5536e-05, "loss": 127.193, "step": 9710 }, { "epoch": 0.039270029937337636, "grad_norm": 3371.012939453125, "learning_rate": 1.5552e-05, "loss": 183.313, "step": 9720 }, { "epoch": 0.03931043120270527, "grad_norm": 1039.69677734375, "learning_rate": 1.5568e-05, "loss": 107.9096, "step": 9730 }, { "epoch": 0.0393508324680729, "grad_norm": 784.8140258789062, "learning_rate": 1.5584000000000002e-05, "loss": 161.9457, "step": 9740 }, { "epoch": 0.03939123373344053, "grad_norm": 5022.111328125, "learning_rate": 1.5600000000000003e-05, "loss": 199.7558, "step": 9750 }, { "epoch": 0.03943163499880816, "grad_norm": 2041.7452392578125, "learning_rate": 1.5616e-05, "loss": 166.1727, "step": 9760 }, { "epoch": 0.039472036264175794, "grad_norm": 1846.129638671875, "learning_rate": 1.5632000000000002e-05, "loss": 174.9207, "step": 9770 }, { "epoch": 0.03951243752954343, "grad_norm": 1014.9839477539062, "learning_rate": 1.5648e-05, "loss": 120.4058, "step": 9780 }, { "epoch": 0.039552838794911054, "grad_norm": 2188.939697265625, "learning_rate": 1.5664e-05, "loss": 190.7619, "step": 9790 }, { "epoch": 0.03959324006027869, "grad_norm": 992.56689453125, "learning_rate": 1.5680000000000002e-05, "loss": 127.4463, "step": 9800 }, { "epoch": 0.03963364132564632, "grad_norm": 2232.902099609375, "learning_rate": 1.5696000000000004e-05, "loss": 212.9937, "step": 9810 }, { "epoch": 0.03967404259101395, "grad_norm": 569.5333862304688, "learning_rate": 1.5712e-05, "loss": 165.9236, "step": 9820 }, { "epoch": 0.03971444385638158, "grad_norm": 454.3460998535156, "learning_rate": 1.5728000000000003e-05, "loss": 135.0038, "step": 9830 }, { "epoch": 0.03975484512174921, "grad_norm": 2677.75, "learning_rate": 1.5744e-05, "loss": 202.8767, "step": 9840 }, { "epoch": 0.039795246387116845, "grad_norm": 2137.688720703125, "learning_rate": 1.576e-05, "loss": 147.8878, "step": 9850 }, { "epoch": 0.03983564765248448, "grad_norm": 1873.88330078125, "learning_rate": 1.5776e-05, "loss": 201.978, "step": 9860 }, { "epoch": 0.039876048917852104, "grad_norm": 1263.277099609375, "learning_rate": 1.5792e-05, "loss": 179.6682, "step": 9870 }, { "epoch": 0.03991645018321974, "grad_norm": 569.78466796875, "learning_rate": 1.5808000000000002e-05, "loss": 131.1906, "step": 9880 }, { "epoch": 0.03995685144858737, "grad_norm": 458.5001220703125, "learning_rate": 1.5824000000000003e-05, "loss": 102.6573, "step": 9890 }, { "epoch": 0.039997252713955, "grad_norm": 727.7654418945312, "learning_rate": 1.584e-05, "loss": 172.3399, "step": 9900 }, { "epoch": 0.04003765397932263, "grad_norm": 540.7371215820312, "learning_rate": 1.5856e-05, "loss": 140.1403, "step": 9910 }, { "epoch": 0.04007805524469026, "grad_norm": 1218.4239501953125, "learning_rate": 1.5872e-05, "loss": 198.2595, "step": 9920 }, { "epoch": 0.040118456510057895, "grad_norm": 2273.38427734375, "learning_rate": 1.5888e-05, "loss": 224.8911, "step": 9930 }, { "epoch": 0.04015885777542553, "grad_norm": 803.8103637695312, "learning_rate": 1.5904000000000002e-05, "loss": 84.3416, "step": 9940 }, { "epoch": 0.040199259040793155, "grad_norm": 1237.755615234375, "learning_rate": 1.5920000000000003e-05, "loss": 125.1505, "step": 9950 }, { "epoch": 0.04023966030616079, "grad_norm": 957.0908813476562, "learning_rate": 1.5936e-05, "loss": 170.3722, "step": 9960 }, { "epoch": 0.04028006157152842, "grad_norm": 1235.8572998046875, "learning_rate": 1.5952000000000002e-05, "loss": 188.307, "step": 9970 }, { "epoch": 0.040320462836896054, "grad_norm": 1665.431884765625, "learning_rate": 1.5968e-05, "loss": 173.6013, "step": 9980 }, { "epoch": 0.04036086410226368, "grad_norm": 944.884521484375, "learning_rate": 1.5984e-05, "loss": 172.7963, "step": 9990 }, { "epoch": 0.04040126536763131, "grad_norm": 817.8704223632812, "learning_rate": 1.6000000000000003e-05, "loss": 121.9647, "step": 10000 }, { "epoch": 0.040441666632998946, "grad_norm": 531.5571899414062, "learning_rate": 1.6016e-05, "loss": 144.545, "step": 10010 }, { "epoch": 0.04048206789836658, "grad_norm": 1372.50048828125, "learning_rate": 1.6032e-05, "loss": 167.1301, "step": 10020 }, { "epoch": 0.040522469163734205, "grad_norm": 1101.1865234375, "learning_rate": 1.6048000000000003e-05, "loss": 196.8536, "step": 10030 }, { "epoch": 0.04056287042910184, "grad_norm": 1433.8011474609375, "learning_rate": 1.6064e-05, "loss": 157.4351, "step": 10040 }, { "epoch": 0.04060327169446947, "grad_norm": 2047.3099365234375, "learning_rate": 1.6080000000000002e-05, "loss": 127.7889, "step": 10050 }, { "epoch": 0.040643672959837104, "grad_norm": 0.0, "learning_rate": 1.6096e-05, "loss": 134.3584, "step": 10060 }, { "epoch": 0.04068407422520473, "grad_norm": 2167.48486328125, "learning_rate": 1.6112e-05, "loss": 161.7745, "step": 10070 }, { "epoch": 0.040724475490572364, "grad_norm": 2235.814697265625, "learning_rate": 1.6128000000000002e-05, "loss": 210.6461, "step": 10080 }, { "epoch": 0.04076487675594, "grad_norm": 766.3438110351562, "learning_rate": 1.6144000000000003e-05, "loss": 166.4439, "step": 10090 }, { "epoch": 0.04080527802130763, "grad_norm": 1047.2254638671875, "learning_rate": 1.616e-05, "loss": 162.5282, "step": 10100 }, { "epoch": 0.040845679286675256, "grad_norm": 1334.6319580078125, "learning_rate": 1.6176e-05, "loss": 172.6197, "step": 10110 }, { "epoch": 0.04088608055204289, "grad_norm": 796.6995849609375, "learning_rate": 1.6192e-05, "loss": 150.1012, "step": 10120 }, { "epoch": 0.04092648181741052, "grad_norm": 1244.7584228515625, "learning_rate": 1.6208e-05, "loss": 172.9664, "step": 10130 }, { "epoch": 0.040966883082778155, "grad_norm": 1606.3883056640625, "learning_rate": 1.6224000000000003e-05, "loss": 173.0676, "step": 10140 }, { "epoch": 0.04100728434814578, "grad_norm": 877.404541015625, "learning_rate": 1.6240000000000004e-05, "loss": 167.6871, "step": 10150 }, { "epoch": 0.041047685613513414, "grad_norm": 2475.5068359375, "learning_rate": 1.6256e-05, "loss": 172.3827, "step": 10160 }, { "epoch": 0.04108808687888105, "grad_norm": 1478.54638671875, "learning_rate": 1.6272000000000003e-05, "loss": 180.1601, "step": 10170 }, { "epoch": 0.04112848814424868, "grad_norm": 521.4425659179688, "learning_rate": 1.6288e-05, "loss": 167.5962, "step": 10180 }, { "epoch": 0.041168889409616306, "grad_norm": 2434.41064453125, "learning_rate": 1.6304000000000002e-05, "loss": 200.3995, "step": 10190 }, { "epoch": 0.04120929067498394, "grad_norm": 685.0830078125, "learning_rate": 1.632e-05, "loss": 149.9868, "step": 10200 }, { "epoch": 0.04124969194035157, "grad_norm": 997.065185546875, "learning_rate": 1.6336e-05, "loss": 152.3204, "step": 10210 }, { "epoch": 0.041290093205719205, "grad_norm": 1296.7747802734375, "learning_rate": 1.6352000000000002e-05, "loss": 187.0858, "step": 10220 }, { "epoch": 0.04133049447108683, "grad_norm": 757.8452758789062, "learning_rate": 1.6368000000000003e-05, "loss": 119.7687, "step": 10230 }, { "epoch": 0.041370895736454465, "grad_norm": 1303.6944580078125, "learning_rate": 1.6384e-05, "loss": 164.7086, "step": 10240 }, { "epoch": 0.0414112970018221, "grad_norm": 1585.5537109375, "learning_rate": 1.64e-05, "loss": 258.1144, "step": 10250 }, { "epoch": 0.04145169826718973, "grad_norm": 836.6436157226562, "learning_rate": 1.6416e-05, "loss": 155.0704, "step": 10260 }, { "epoch": 0.04149209953255736, "grad_norm": 918.627197265625, "learning_rate": 1.6432e-05, "loss": 148.9556, "step": 10270 }, { "epoch": 0.04153250079792499, "grad_norm": 1133.152099609375, "learning_rate": 1.6448000000000002e-05, "loss": 189.2886, "step": 10280 }, { "epoch": 0.04157290206329262, "grad_norm": 725.5909423828125, "learning_rate": 1.6464000000000004e-05, "loss": 102.5979, "step": 10290 }, { "epoch": 0.041613303328660256, "grad_norm": 2373.759765625, "learning_rate": 1.648e-05, "loss": 194.0122, "step": 10300 }, { "epoch": 0.04165370459402788, "grad_norm": 1110.9903564453125, "learning_rate": 1.6496e-05, "loss": 122.7091, "step": 10310 }, { "epoch": 0.041694105859395515, "grad_norm": 875.1665649414062, "learning_rate": 1.6512e-05, "loss": 159.7906, "step": 10320 }, { "epoch": 0.04173450712476315, "grad_norm": 601.5936279296875, "learning_rate": 1.6528e-05, "loss": 125.9924, "step": 10330 }, { "epoch": 0.04177490839013078, "grad_norm": 929.2774658203125, "learning_rate": 1.6544000000000003e-05, "loss": 128.1777, "step": 10340 }, { "epoch": 0.04181530965549841, "grad_norm": 1600.3740234375, "learning_rate": 1.656e-05, "loss": 142.6078, "step": 10350 }, { "epoch": 0.04185571092086604, "grad_norm": 1814.5093994140625, "learning_rate": 1.6576000000000002e-05, "loss": 188.1647, "step": 10360 }, { "epoch": 0.041896112186233674, "grad_norm": 3294.983642578125, "learning_rate": 1.6592000000000003e-05, "loss": 237.4234, "step": 10370 }, { "epoch": 0.04193651345160131, "grad_norm": 815.1757202148438, "learning_rate": 1.6608e-05, "loss": 210.1955, "step": 10380 }, { "epoch": 0.04197691471696893, "grad_norm": 396.14996337890625, "learning_rate": 1.6624000000000002e-05, "loss": 129.1369, "step": 10390 }, { "epoch": 0.042017315982336566, "grad_norm": 862.579833984375, "learning_rate": 1.664e-05, "loss": 179.3043, "step": 10400 }, { "epoch": 0.0420577172477042, "grad_norm": 889.371337890625, "learning_rate": 1.6656e-05, "loss": 152.2917, "step": 10410 }, { "epoch": 0.04209811851307183, "grad_norm": 606.1854858398438, "learning_rate": 1.6672000000000002e-05, "loss": 149.5471, "step": 10420 }, { "epoch": 0.04213851977843946, "grad_norm": 2273.623779296875, "learning_rate": 1.6688000000000004e-05, "loss": 167.0708, "step": 10430 }, { "epoch": 0.04217892104380709, "grad_norm": 929.865966796875, "learning_rate": 1.6704e-05, "loss": 190.4482, "step": 10440 }, { "epoch": 0.042219322309174724, "grad_norm": 1227.5264892578125, "learning_rate": 1.672e-05, "loss": 170.9611, "step": 10450 }, { "epoch": 0.04225972357454236, "grad_norm": 3041.83544921875, "learning_rate": 1.6736e-05, "loss": 145.8744, "step": 10460 }, { "epoch": 0.04230012483990998, "grad_norm": 682.5531616210938, "learning_rate": 1.6752e-05, "loss": 112.5866, "step": 10470 }, { "epoch": 0.042340526105277616, "grad_norm": 864.705322265625, "learning_rate": 1.6768000000000003e-05, "loss": 127.5083, "step": 10480 }, { "epoch": 0.04238092737064525, "grad_norm": 1205.29150390625, "learning_rate": 1.6784e-05, "loss": 157.477, "step": 10490 }, { "epoch": 0.04242132863601288, "grad_norm": 1558.863525390625, "learning_rate": 1.6800000000000002e-05, "loss": 169.6953, "step": 10500 }, { "epoch": 0.04246172990138051, "grad_norm": 792.3493041992188, "learning_rate": 1.6816e-05, "loss": 150.5652, "step": 10510 }, { "epoch": 0.04250213116674814, "grad_norm": 1993.4556884765625, "learning_rate": 1.6832e-05, "loss": 166.2245, "step": 10520 }, { "epoch": 0.042542532432115775, "grad_norm": 662.0936889648438, "learning_rate": 1.6848000000000002e-05, "loss": 161.7556, "step": 10530 }, { "epoch": 0.04258293369748341, "grad_norm": 671.510009765625, "learning_rate": 1.6864e-05, "loss": 144.911, "step": 10540 }, { "epoch": 0.042623334962851034, "grad_norm": 552.993896484375, "learning_rate": 1.688e-05, "loss": 113.3128, "step": 10550 }, { "epoch": 0.04266373622821867, "grad_norm": 2240.13720703125, "learning_rate": 1.6896000000000002e-05, "loss": 146.852, "step": 10560 }, { "epoch": 0.0427041374935863, "grad_norm": 1047.0406494140625, "learning_rate": 1.6912000000000003e-05, "loss": 131.8557, "step": 10570 }, { "epoch": 0.04274453875895393, "grad_norm": 1396.8720703125, "learning_rate": 1.6928e-05, "loss": 163.8682, "step": 10580 }, { "epoch": 0.04278494002432156, "grad_norm": 2435.2705078125, "learning_rate": 1.6944e-05, "loss": 170.1234, "step": 10590 }, { "epoch": 0.04282534128968919, "grad_norm": 1863.5042724609375, "learning_rate": 1.696e-05, "loss": 146.7008, "step": 10600 }, { "epoch": 0.042865742555056825, "grad_norm": 1173.83447265625, "learning_rate": 1.6976e-05, "loss": 197.7816, "step": 10610 }, { "epoch": 0.04290614382042446, "grad_norm": 2275.920166015625, "learning_rate": 1.6992000000000003e-05, "loss": 206.3002, "step": 10620 }, { "epoch": 0.042946545085792084, "grad_norm": 1100.142578125, "learning_rate": 1.7008000000000004e-05, "loss": 140.3349, "step": 10630 }, { "epoch": 0.04298694635115972, "grad_norm": 1740.51953125, "learning_rate": 1.7024e-05, "loss": 174.8, "step": 10640 }, { "epoch": 0.04302734761652735, "grad_norm": 1941.865234375, "learning_rate": 1.704e-05, "loss": 176.8449, "step": 10650 }, { "epoch": 0.043067748881894984, "grad_norm": 1190.6832275390625, "learning_rate": 1.7056e-05, "loss": 186.9511, "step": 10660 }, { "epoch": 0.04310815014726261, "grad_norm": 696.5484008789062, "learning_rate": 1.7072000000000002e-05, "loss": 144.1866, "step": 10670 }, { "epoch": 0.04314855141263024, "grad_norm": 861.3687744140625, "learning_rate": 1.7088000000000003e-05, "loss": 150.022, "step": 10680 }, { "epoch": 0.043188952677997876, "grad_norm": 659.2498779296875, "learning_rate": 1.7104e-05, "loss": 132.2025, "step": 10690 }, { "epoch": 0.04322935394336551, "grad_norm": 1642.438232421875, "learning_rate": 1.7120000000000002e-05, "loss": 218.317, "step": 10700 }, { "epoch": 0.043269755208733135, "grad_norm": 620.3860473632812, "learning_rate": 1.7136e-05, "loss": 173.4349, "step": 10710 }, { "epoch": 0.04331015647410077, "grad_norm": 1452.962890625, "learning_rate": 1.7152e-05, "loss": 203.3664, "step": 10720 }, { "epoch": 0.0433505577394684, "grad_norm": 2354.88916015625, "learning_rate": 1.7168000000000002e-05, "loss": 187.4407, "step": 10730 }, { "epoch": 0.043390959004836034, "grad_norm": 1276.4913330078125, "learning_rate": 1.7184e-05, "loss": 145.4733, "step": 10740 }, { "epoch": 0.04343136027020366, "grad_norm": 695.041259765625, "learning_rate": 1.72e-05, "loss": 189.1464, "step": 10750 }, { "epoch": 0.04347176153557129, "grad_norm": 14365.31640625, "learning_rate": 1.7216000000000003e-05, "loss": 225.0812, "step": 10760 }, { "epoch": 0.043512162800938926, "grad_norm": 1351.369140625, "learning_rate": 1.7232000000000004e-05, "loss": 179.7414, "step": 10770 }, { "epoch": 0.04355256406630656, "grad_norm": 1129.187255859375, "learning_rate": 1.7248e-05, "loss": 153.1924, "step": 10780 }, { "epoch": 0.043592965331674186, "grad_norm": 1236.0352783203125, "learning_rate": 1.7264e-05, "loss": 167.2036, "step": 10790 }, { "epoch": 0.04363336659704182, "grad_norm": 1314.908447265625, "learning_rate": 1.728e-05, "loss": 155.7452, "step": 10800 }, { "epoch": 0.04367376786240945, "grad_norm": 1064.8416748046875, "learning_rate": 1.7296000000000002e-05, "loss": 249.5876, "step": 10810 }, { "epoch": 0.043714169127777085, "grad_norm": 1580.3046875, "learning_rate": 1.7312000000000003e-05, "loss": 183.0618, "step": 10820 }, { "epoch": 0.04375457039314471, "grad_norm": 799.4830322265625, "learning_rate": 1.7328e-05, "loss": 193.176, "step": 10830 }, { "epoch": 0.043794971658512344, "grad_norm": 3630.85693359375, "learning_rate": 1.7344000000000002e-05, "loss": 169.7418, "step": 10840 }, { "epoch": 0.04383537292387998, "grad_norm": 1167.7203369140625, "learning_rate": 1.736e-05, "loss": 201.4246, "step": 10850 }, { "epoch": 0.04387577418924761, "grad_norm": 888.1072998046875, "learning_rate": 1.7376e-05, "loss": 151.4057, "step": 10860 }, { "epoch": 0.043916175454615236, "grad_norm": 2073.16455078125, "learning_rate": 1.7392000000000002e-05, "loss": 187.2557, "step": 10870 }, { "epoch": 0.04395657671998287, "grad_norm": 1258.6611328125, "learning_rate": 1.7408e-05, "loss": 182.0184, "step": 10880 }, { "epoch": 0.0439969779853505, "grad_norm": 759.2626953125, "learning_rate": 1.7424e-05, "loss": 180.3921, "step": 10890 }, { "epoch": 0.044037379250718135, "grad_norm": 1016.7139282226562, "learning_rate": 1.7440000000000002e-05, "loss": 156.4396, "step": 10900 }, { "epoch": 0.04407778051608576, "grad_norm": 1495.8121337890625, "learning_rate": 1.7456e-05, "loss": 120.8279, "step": 10910 }, { "epoch": 0.044118181781453394, "grad_norm": 782.62109375, "learning_rate": 1.7472e-05, "loss": 135.7859, "step": 10920 }, { "epoch": 0.04415858304682103, "grad_norm": 1541.3516845703125, "learning_rate": 1.7488e-05, "loss": 142.1355, "step": 10930 }, { "epoch": 0.04419898431218866, "grad_norm": 1355.412109375, "learning_rate": 1.7504e-05, "loss": 165.6635, "step": 10940 }, { "epoch": 0.04423938557755629, "grad_norm": 974.6658325195312, "learning_rate": 1.752e-05, "loss": 121.126, "step": 10950 }, { "epoch": 0.04427978684292392, "grad_norm": 1273.1082763671875, "learning_rate": 1.7536000000000003e-05, "loss": 102.4414, "step": 10960 }, { "epoch": 0.04432018810829155, "grad_norm": 269.8813781738281, "learning_rate": 1.7552e-05, "loss": 127.8815, "step": 10970 }, { "epoch": 0.044360589373659186, "grad_norm": 2854.266357421875, "learning_rate": 1.7568000000000002e-05, "loss": 220.2031, "step": 10980 }, { "epoch": 0.04440099063902681, "grad_norm": 808.132080078125, "learning_rate": 1.7584e-05, "loss": 291.7426, "step": 10990 }, { "epoch": 0.044441391904394445, "grad_norm": 1838.3631591796875, "learning_rate": 1.76e-05, "loss": 170.3808, "step": 11000 }, { "epoch": 0.04448179316976208, "grad_norm": 0.0, "learning_rate": 1.7616000000000002e-05, "loss": 160.7307, "step": 11010 }, { "epoch": 0.04452219443512971, "grad_norm": 1464.164306640625, "learning_rate": 1.7632000000000003e-05, "loss": 132.0214, "step": 11020 }, { "epoch": 0.04456259570049734, "grad_norm": 1470.7813720703125, "learning_rate": 1.7648e-05, "loss": 177.7017, "step": 11030 }, { "epoch": 0.04460299696586497, "grad_norm": 673.0449829101562, "learning_rate": 1.7664000000000002e-05, "loss": 166.1588, "step": 11040 }, { "epoch": 0.0446433982312326, "grad_norm": 450.8736572265625, "learning_rate": 1.768e-05, "loss": 135.9254, "step": 11050 }, { "epoch": 0.044683799496600236, "grad_norm": 694.109130859375, "learning_rate": 1.7696e-05, "loss": 116.8292, "step": 11060 }, { "epoch": 0.04472420076196786, "grad_norm": 549.8363647460938, "learning_rate": 1.7712000000000003e-05, "loss": 115.2245, "step": 11070 }, { "epoch": 0.044764602027335496, "grad_norm": 1329.90087890625, "learning_rate": 1.7728e-05, "loss": 151.094, "step": 11080 }, { "epoch": 0.04480500329270313, "grad_norm": 1619.731201171875, "learning_rate": 1.7744e-05, "loss": 125.6026, "step": 11090 }, { "epoch": 0.04484540455807076, "grad_norm": 1591.700927734375, "learning_rate": 1.7760000000000003e-05, "loss": 210.2778, "step": 11100 }, { "epoch": 0.04488580582343839, "grad_norm": 1928.7462158203125, "learning_rate": 1.7776e-05, "loss": 168.4507, "step": 11110 }, { "epoch": 0.04492620708880602, "grad_norm": 1136.9661865234375, "learning_rate": 1.7792000000000002e-05, "loss": 159.5933, "step": 11120 }, { "epoch": 0.044966608354173654, "grad_norm": 1273.693603515625, "learning_rate": 1.7808e-05, "loss": 90.969, "step": 11130 }, { "epoch": 0.04500700961954129, "grad_norm": 6502.5478515625, "learning_rate": 1.7824e-05, "loss": 200.0657, "step": 11140 }, { "epoch": 0.04504741088490891, "grad_norm": 1432.283447265625, "learning_rate": 1.7840000000000002e-05, "loss": 167.2854, "step": 11150 }, { "epoch": 0.045087812150276546, "grad_norm": 1637.12890625, "learning_rate": 1.7856000000000003e-05, "loss": 151.2584, "step": 11160 }, { "epoch": 0.04512821341564418, "grad_norm": 3664.730224609375, "learning_rate": 1.7872e-05, "loss": 139.8826, "step": 11170 }, { "epoch": 0.04516861468101181, "grad_norm": 1181.7783203125, "learning_rate": 1.7888000000000002e-05, "loss": 125.8161, "step": 11180 }, { "epoch": 0.04520901594637944, "grad_norm": 3673.306884765625, "learning_rate": 1.7904e-05, "loss": 134.7778, "step": 11190 }, { "epoch": 0.04524941721174707, "grad_norm": 2812.187744140625, "learning_rate": 1.792e-05, "loss": 253.2139, "step": 11200 }, { "epoch": 0.045289818477114704, "grad_norm": 937.6162109375, "learning_rate": 1.7936000000000002e-05, "loss": 151.9196, "step": 11210 }, { "epoch": 0.04533021974248234, "grad_norm": 1033.5677490234375, "learning_rate": 1.7952e-05, "loss": 184.6166, "step": 11220 }, { "epoch": 0.045370621007849964, "grad_norm": 1049.357421875, "learning_rate": 1.7968e-05, "loss": 143.1292, "step": 11230 }, { "epoch": 0.0454110222732176, "grad_norm": 2090.681640625, "learning_rate": 1.7984000000000003e-05, "loss": 237.326, "step": 11240 }, { "epoch": 0.04545142353858523, "grad_norm": 588.9541625976562, "learning_rate": 1.8e-05, "loss": 136.8592, "step": 11250 }, { "epoch": 0.04549182480395286, "grad_norm": 989.3211059570312, "learning_rate": 1.8016e-05, "loss": 173.3406, "step": 11260 }, { "epoch": 0.04553222606932049, "grad_norm": 1291.71875, "learning_rate": 1.8032e-05, "loss": 161.0808, "step": 11270 }, { "epoch": 0.04557262733468812, "grad_norm": 820.1669921875, "learning_rate": 1.8048e-05, "loss": 197.2681, "step": 11280 }, { "epoch": 0.045613028600055755, "grad_norm": 3350.81591796875, "learning_rate": 1.8064000000000002e-05, "loss": 206.3674, "step": 11290 }, { "epoch": 0.04565342986542339, "grad_norm": 1087.4700927734375, "learning_rate": 1.8080000000000003e-05, "loss": 133.9378, "step": 11300 }, { "epoch": 0.045693831130791014, "grad_norm": 880.30419921875, "learning_rate": 1.8096e-05, "loss": 164.019, "step": 11310 }, { "epoch": 0.04573423239615865, "grad_norm": 962.7153930664062, "learning_rate": 1.8112000000000002e-05, "loss": 178.859, "step": 11320 }, { "epoch": 0.04577463366152628, "grad_norm": 6233.5029296875, "learning_rate": 1.8128e-05, "loss": 187.2053, "step": 11330 }, { "epoch": 0.04581503492689391, "grad_norm": 1161.468994140625, "learning_rate": 1.8144e-05, "loss": 169.2743, "step": 11340 }, { "epoch": 0.04585543619226154, "grad_norm": 1120.290283203125, "learning_rate": 1.8160000000000002e-05, "loss": 172.2986, "step": 11350 }, { "epoch": 0.04589583745762917, "grad_norm": 3448.97705078125, "learning_rate": 1.8176000000000004e-05, "loss": 206.6683, "step": 11360 }, { "epoch": 0.045936238722996806, "grad_norm": 1253.4058837890625, "learning_rate": 1.8192e-05, "loss": 139.1167, "step": 11370 }, { "epoch": 0.04597663998836444, "grad_norm": 924.9556884765625, "learning_rate": 1.8208000000000003e-05, "loss": 148.5544, "step": 11380 }, { "epoch": 0.046017041253732065, "grad_norm": 660.0718994140625, "learning_rate": 1.8224e-05, "loss": 126.323, "step": 11390 }, { "epoch": 0.0460574425190997, "grad_norm": 822.0247802734375, "learning_rate": 1.824e-05, "loss": 146.354, "step": 11400 }, { "epoch": 0.04609784378446733, "grad_norm": 1849.636474609375, "learning_rate": 1.8256e-05, "loss": 131.2848, "step": 11410 }, { "epoch": 0.046138245049834964, "grad_norm": 4517.93212890625, "learning_rate": 1.8272e-05, "loss": 184.1627, "step": 11420 }, { "epoch": 0.04617864631520259, "grad_norm": 1306.6082763671875, "learning_rate": 1.8288000000000002e-05, "loss": 152.2419, "step": 11430 }, { "epoch": 0.04621904758057022, "grad_norm": 913.3738403320312, "learning_rate": 1.8304000000000003e-05, "loss": 174.6116, "step": 11440 }, { "epoch": 0.046259448845937856, "grad_norm": 801.408447265625, "learning_rate": 1.832e-05, "loss": 161.1145, "step": 11450 }, { "epoch": 0.04629985011130549, "grad_norm": 1175.022216796875, "learning_rate": 1.8336000000000002e-05, "loss": 183.3758, "step": 11460 }, { "epoch": 0.046340251376673115, "grad_norm": 1856.43701171875, "learning_rate": 1.8352e-05, "loss": 161.8469, "step": 11470 }, { "epoch": 0.04638065264204075, "grad_norm": 1363.8883056640625, "learning_rate": 1.8368e-05, "loss": 202.2804, "step": 11480 }, { "epoch": 0.04642105390740838, "grad_norm": 1238.9725341796875, "learning_rate": 1.8384000000000002e-05, "loss": 142.8043, "step": 11490 }, { "epoch": 0.046461455172776014, "grad_norm": 1129.2506103515625, "learning_rate": 1.8400000000000003e-05, "loss": 153.3768, "step": 11500 }, { "epoch": 0.04650185643814364, "grad_norm": 642.8306274414062, "learning_rate": 1.8416e-05, "loss": 152.1908, "step": 11510 }, { "epoch": 0.046542257703511274, "grad_norm": 480.4953918457031, "learning_rate": 1.8432000000000002e-05, "loss": 151.4173, "step": 11520 }, { "epoch": 0.04658265896887891, "grad_norm": 702.656005859375, "learning_rate": 1.8448e-05, "loss": 131.2011, "step": 11530 }, { "epoch": 0.04662306023424654, "grad_norm": 1275.0462646484375, "learning_rate": 1.8464e-05, "loss": 171.1476, "step": 11540 }, { "epoch": 0.046663461499614166, "grad_norm": 1706.020263671875, "learning_rate": 1.8480000000000003e-05, "loss": 194.7064, "step": 11550 }, { "epoch": 0.0467038627649818, "grad_norm": 1688.6947021484375, "learning_rate": 1.8496e-05, "loss": 244.8772, "step": 11560 }, { "epoch": 0.04674426403034943, "grad_norm": 1043.65087890625, "learning_rate": 1.8512e-05, "loss": 149.636, "step": 11570 }, { "epoch": 0.046784665295717065, "grad_norm": 614.2929077148438, "learning_rate": 1.8528000000000003e-05, "loss": 90.4073, "step": 11580 }, { "epoch": 0.04682506656108469, "grad_norm": 1882.9847412109375, "learning_rate": 1.8544e-05, "loss": 142.7772, "step": 11590 }, { "epoch": 0.046865467826452324, "grad_norm": 1458.3114013671875, "learning_rate": 1.8560000000000002e-05, "loss": 128.3059, "step": 11600 }, { "epoch": 0.04690586909181996, "grad_norm": 1554.3670654296875, "learning_rate": 1.8576e-05, "loss": 92.2401, "step": 11610 }, { "epoch": 0.04694627035718759, "grad_norm": 1737.34130859375, "learning_rate": 1.8592e-05, "loss": 151.1104, "step": 11620 }, { "epoch": 0.046986671622555216, "grad_norm": 1246.5958251953125, "learning_rate": 1.8608000000000002e-05, "loss": 189.7216, "step": 11630 }, { "epoch": 0.04702707288792285, "grad_norm": 1355.0965576171875, "learning_rate": 1.8624000000000003e-05, "loss": 164.5095, "step": 11640 }, { "epoch": 0.04706747415329048, "grad_norm": 1271.6949462890625, "learning_rate": 1.864e-05, "loss": 168.5206, "step": 11650 }, { "epoch": 0.047107875418658116, "grad_norm": 1827.9058837890625, "learning_rate": 1.8656000000000002e-05, "loss": 92.2439, "step": 11660 }, { "epoch": 0.04714827668402574, "grad_norm": 497.3401184082031, "learning_rate": 1.8672e-05, "loss": 77.0604, "step": 11670 }, { "epoch": 0.047188677949393375, "grad_norm": 969.7373046875, "learning_rate": 1.8688e-05, "loss": 244.0555, "step": 11680 }, { "epoch": 0.04722907921476101, "grad_norm": 2381.234130859375, "learning_rate": 1.8704000000000003e-05, "loss": 249.9872, "step": 11690 }, { "epoch": 0.04726948048012864, "grad_norm": 1113.687744140625, "learning_rate": 1.8720000000000004e-05, "loss": 140.6708, "step": 11700 }, { "epoch": 0.04730988174549627, "grad_norm": 1018.4200439453125, "learning_rate": 1.8736e-05, "loss": 123.3354, "step": 11710 }, { "epoch": 0.0473502830108639, "grad_norm": 1131.4271240234375, "learning_rate": 1.8752000000000003e-05, "loss": 151.4623, "step": 11720 }, { "epoch": 0.04739068427623153, "grad_norm": 727.86376953125, "learning_rate": 1.8768e-05, "loss": 161.0078, "step": 11730 }, { "epoch": 0.047431085541599166, "grad_norm": 1448.9930419921875, "learning_rate": 1.8784000000000002e-05, "loss": 132.0155, "step": 11740 }, { "epoch": 0.04747148680696679, "grad_norm": 1065.7197265625, "learning_rate": 1.88e-05, "loss": 178.1806, "step": 11750 }, { "epoch": 0.047511888072334425, "grad_norm": 888.4298706054688, "learning_rate": 1.8816e-05, "loss": 99.7927, "step": 11760 }, { "epoch": 0.04755228933770206, "grad_norm": 3456.236083984375, "learning_rate": 1.8832000000000002e-05, "loss": 172.4764, "step": 11770 }, { "epoch": 0.04759269060306969, "grad_norm": 1027.8367919921875, "learning_rate": 1.8848000000000003e-05, "loss": 96.5219, "step": 11780 }, { "epoch": 0.04763309186843732, "grad_norm": 2561.343505859375, "learning_rate": 1.8864e-05, "loss": 152.5547, "step": 11790 }, { "epoch": 0.04767349313380495, "grad_norm": 726.5477294921875, "learning_rate": 1.8880000000000002e-05, "loss": 163.8758, "step": 11800 }, { "epoch": 0.047713894399172584, "grad_norm": 497.03021240234375, "learning_rate": 1.8896e-05, "loss": 165.0003, "step": 11810 }, { "epoch": 0.04775429566454022, "grad_norm": 1021.0319213867188, "learning_rate": 1.8912e-05, "loss": 144.085, "step": 11820 }, { "epoch": 0.04779469692990784, "grad_norm": 803.744384765625, "learning_rate": 1.8928000000000002e-05, "loss": 163.5293, "step": 11830 }, { "epoch": 0.047835098195275476, "grad_norm": 1352.6136474609375, "learning_rate": 1.8944000000000004e-05, "loss": 156.9807, "step": 11840 }, { "epoch": 0.04787549946064311, "grad_norm": 1498.0391845703125, "learning_rate": 1.896e-05, "loss": 136.0139, "step": 11850 }, { "epoch": 0.04791590072601074, "grad_norm": 2074.9453125, "learning_rate": 1.8976000000000003e-05, "loss": 135.7337, "step": 11860 }, { "epoch": 0.04795630199137837, "grad_norm": 1958.399658203125, "learning_rate": 1.8992e-05, "loss": 169.1383, "step": 11870 }, { "epoch": 0.047996703256746, "grad_norm": 1390.4888916015625, "learning_rate": 1.9008e-05, "loss": 195.9214, "step": 11880 }, { "epoch": 0.048037104522113634, "grad_norm": 1233.4976806640625, "learning_rate": 1.9024000000000003e-05, "loss": 185.4625, "step": 11890 }, { "epoch": 0.04807750578748127, "grad_norm": 7298.0615234375, "learning_rate": 1.904e-05, "loss": 208.103, "step": 11900 }, { "epoch": 0.04811790705284889, "grad_norm": 1131.549560546875, "learning_rate": 1.9056000000000002e-05, "loss": 130.0675, "step": 11910 }, { "epoch": 0.048158308318216526, "grad_norm": 4298.8173828125, "learning_rate": 1.9072000000000003e-05, "loss": 130.8222, "step": 11920 }, { "epoch": 0.04819870958358416, "grad_norm": 1270.93505859375, "learning_rate": 1.9088e-05, "loss": 138.4447, "step": 11930 }, { "epoch": 0.04823911084895179, "grad_norm": 2340.214111328125, "learning_rate": 1.9104000000000002e-05, "loss": 176.73, "step": 11940 }, { "epoch": 0.04827951211431942, "grad_norm": 1349.122314453125, "learning_rate": 1.912e-05, "loss": 209.7494, "step": 11950 }, { "epoch": 0.04831991337968705, "grad_norm": 1326.8984375, "learning_rate": 1.9136e-05, "loss": 117.3182, "step": 11960 }, { "epoch": 0.048360314645054685, "grad_norm": 868.340576171875, "learning_rate": 1.9152000000000002e-05, "loss": 108.6576, "step": 11970 }, { "epoch": 0.04840071591042232, "grad_norm": 754.5211791992188, "learning_rate": 1.9168000000000004e-05, "loss": 160.192, "step": 11980 }, { "epoch": 0.048441117175789944, "grad_norm": 2791.62744140625, "learning_rate": 1.9184e-05, "loss": 207.5689, "step": 11990 }, { "epoch": 0.04848151844115758, "grad_norm": 653.2196655273438, "learning_rate": 1.9200000000000003e-05, "loss": 171.7243, "step": 12000 }, { "epoch": 0.04852191970652521, "grad_norm": 1092.5750732421875, "learning_rate": 1.9216e-05, "loss": 157.921, "step": 12010 }, { "epoch": 0.04856232097189284, "grad_norm": 1124.6910400390625, "learning_rate": 1.9232e-05, "loss": 152.1631, "step": 12020 }, { "epoch": 0.04860272223726047, "grad_norm": 996.9329223632812, "learning_rate": 1.9248000000000003e-05, "loss": 199.5053, "step": 12030 }, { "epoch": 0.0486431235026281, "grad_norm": 2939.33447265625, "learning_rate": 1.9264e-05, "loss": 125.7709, "step": 12040 }, { "epoch": 0.048683524767995735, "grad_norm": 620.4122924804688, "learning_rate": 1.9280000000000002e-05, "loss": 155.2206, "step": 12050 }, { "epoch": 0.04872392603336337, "grad_norm": 987.931884765625, "learning_rate": 1.9296000000000003e-05, "loss": 144.0866, "step": 12060 }, { "epoch": 0.048764327298730994, "grad_norm": 1881.5980224609375, "learning_rate": 1.9312e-05, "loss": 145.1766, "step": 12070 }, { "epoch": 0.04880472856409863, "grad_norm": 2606.857177734375, "learning_rate": 1.9328000000000002e-05, "loss": 121.8912, "step": 12080 }, { "epoch": 0.04884512982946626, "grad_norm": 1632.544677734375, "learning_rate": 1.9344e-05, "loss": 233.9964, "step": 12090 }, { "epoch": 0.048885531094833894, "grad_norm": 1396.0145263671875, "learning_rate": 1.936e-05, "loss": 199.8086, "step": 12100 }, { "epoch": 0.04892593236020152, "grad_norm": 2512.758056640625, "learning_rate": 1.9376000000000002e-05, "loss": 200.7336, "step": 12110 }, { "epoch": 0.04896633362556915, "grad_norm": 1487.132080078125, "learning_rate": 1.9392000000000003e-05, "loss": 192.9431, "step": 12120 }, { "epoch": 0.049006734890936786, "grad_norm": 1734.700439453125, "learning_rate": 1.9408e-05, "loss": 163.1206, "step": 12130 }, { "epoch": 0.04904713615630442, "grad_norm": 898.8317260742188, "learning_rate": 1.9424e-05, "loss": 161.1301, "step": 12140 }, { "epoch": 0.049087537421672045, "grad_norm": 1437.3121337890625, "learning_rate": 1.944e-05, "loss": 177.5109, "step": 12150 }, { "epoch": 0.04912793868703968, "grad_norm": 5169.5986328125, "learning_rate": 1.9456e-05, "loss": 210.6177, "step": 12160 }, { "epoch": 0.04916833995240731, "grad_norm": 1230.391357421875, "learning_rate": 1.9472000000000003e-05, "loss": 187.2195, "step": 12170 }, { "epoch": 0.04920874121777494, "grad_norm": 3862.664794921875, "learning_rate": 1.9488000000000004e-05, "loss": 177.8196, "step": 12180 }, { "epoch": 0.04924914248314257, "grad_norm": 1136.101318359375, "learning_rate": 1.9504e-05, "loss": 177.7476, "step": 12190 }, { "epoch": 0.0492895437485102, "grad_norm": 2034.5399169921875, "learning_rate": 1.9520000000000003e-05, "loss": 108.4205, "step": 12200 }, { "epoch": 0.049329945013877836, "grad_norm": 3904.319091796875, "learning_rate": 1.9536e-05, "loss": 125.3061, "step": 12210 }, { "epoch": 0.04937034627924546, "grad_norm": 1803.0955810546875, "learning_rate": 1.9552000000000002e-05, "loss": 128.3449, "step": 12220 }, { "epoch": 0.049410747544613096, "grad_norm": 1118.046875, "learning_rate": 1.9568000000000003e-05, "loss": 199.2001, "step": 12230 }, { "epoch": 0.04945114880998073, "grad_norm": 958.41259765625, "learning_rate": 1.9584e-05, "loss": 194.366, "step": 12240 }, { "epoch": 0.04949155007534836, "grad_norm": 4219.087890625, "learning_rate": 1.9600000000000002e-05, "loss": 181.3084, "step": 12250 }, { "epoch": 0.04953195134071599, "grad_norm": 743.9143676757812, "learning_rate": 1.9616000000000003e-05, "loss": 211.1867, "step": 12260 }, { "epoch": 0.04957235260608362, "grad_norm": 1083.905029296875, "learning_rate": 1.9632e-05, "loss": 133.8096, "step": 12270 }, { "epoch": 0.049612753871451254, "grad_norm": 906.1483764648438, "learning_rate": 1.9648000000000002e-05, "loss": 171.5658, "step": 12280 }, { "epoch": 0.04965315513681889, "grad_norm": 717.866455078125, "learning_rate": 1.9664e-05, "loss": 113.4374, "step": 12290 }, { "epoch": 0.04969355640218651, "grad_norm": 1161.523681640625, "learning_rate": 1.968e-05, "loss": 173.7262, "step": 12300 }, { "epoch": 0.049733957667554146, "grad_norm": 1218.2979736328125, "learning_rate": 1.9696000000000003e-05, "loss": 121.5765, "step": 12310 }, { "epoch": 0.04977435893292178, "grad_norm": 1055.20166015625, "learning_rate": 1.9712000000000004e-05, "loss": 144.3302, "step": 12320 }, { "epoch": 0.04981476019828941, "grad_norm": 1507.555419921875, "learning_rate": 1.9728e-05, "loss": 144.8477, "step": 12330 }, { "epoch": 0.04985516146365704, "grad_norm": 1124.2626953125, "learning_rate": 1.9744e-05, "loss": 216.2754, "step": 12340 }, { "epoch": 0.04989556272902467, "grad_norm": 1329.7135009765625, "learning_rate": 1.976e-05, "loss": 169.4856, "step": 12350 }, { "epoch": 0.049935963994392304, "grad_norm": 1700.2542724609375, "learning_rate": 1.9776000000000002e-05, "loss": 160.8507, "step": 12360 }, { "epoch": 0.04997636525975994, "grad_norm": 5681.53662109375, "learning_rate": 1.9792000000000003e-05, "loss": 175.2828, "step": 12370 }, { "epoch": 0.050016766525127564, "grad_norm": 1659.982177734375, "learning_rate": 1.9808e-05, "loss": 159.9759, "step": 12380 }, { "epoch": 0.0500571677904952, "grad_norm": 2237.371337890625, "learning_rate": 1.9824000000000002e-05, "loss": 230.2747, "step": 12390 }, { "epoch": 0.05009756905586283, "grad_norm": 12513.3671875, "learning_rate": 1.9840000000000003e-05, "loss": 155.1298, "step": 12400 }, { "epoch": 0.05013797032123046, "grad_norm": 3976.13232421875, "learning_rate": 1.9856e-05, "loss": 173.9067, "step": 12410 }, { "epoch": 0.05017837158659809, "grad_norm": 5326.07080078125, "learning_rate": 1.9872000000000002e-05, "loss": 141.2005, "step": 12420 }, { "epoch": 0.05021877285196572, "grad_norm": 1620.393310546875, "learning_rate": 1.9888e-05, "loss": 192.9733, "step": 12430 }, { "epoch": 0.050259174117333355, "grad_norm": 3446.421142578125, "learning_rate": 1.9904e-05, "loss": 244.3542, "step": 12440 }, { "epoch": 0.05029957538270099, "grad_norm": 2199.027099609375, "learning_rate": 1.9920000000000002e-05, "loss": 140.0264, "step": 12450 }, { "epoch": 0.050339976648068614, "grad_norm": 2481.56494140625, "learning_rate": 1.9936000000000004e-05, "loss": 115.8152, "step": 12460 }, { "epoch": 0.05038037791343625, "grad_norm": 691.91748046875, "learning_rate": 1.9952e-05, "loss": 70.8518, "step": 12470 }, { "epoch": 0.05042077917880388, "grad_norm": 717.9995727539062, "learning_rate": 1.9968e-05, "loss": 198.1902, "step": 12480 }, { "epoch": 0.05046118044417151, "grad_norm": 790.2597045898438, "learning_rate": 1.9984e-05, "loss": 171.2203, "step": 12490 }, { "epoch": 0.05050158170953914, "grad_norm": 1000.937255859375, "learning_rate": 2e-05, "loss": 124.132, "step": 12500 }, { "epoch": 0.05054198297490677, "grad_norm": 2628.76708984375, "learning_rate": 2.0016e-05, "loss": 137.946, "step": 12510 }, { "epoch": 0.050582384240274406, "grad_norm": 3296.44287109375, "learning_rate": 2.0032000000000004e-05, "loss": 138.9817, "step": 12520 }, { "epoch": 0.05062278550564204, "grad_norm": 1251.9384765625, "learning_rate": 2.0048000000000002e-05, "loss": 116.2065, "step": 12530 }, { "epoch": 0.050663186771009665, "grad_norm": 3689.34814453125, "learning_rate": 2.0064000000000003e-05, "loss": 190.9615, "step": 12540 }, { "epoch": 0.0507035880363773, "grad_norm": 1161.31787109375, "learning_rate": 2.008e-05, "loss": 165.3648, "step": 12550 }, { "epoch": 0.05074398930174493, "grad_norm": 1477.417724609375, "learning_rate": 2.0096e-05, "loss": 181.9141, "step": 12560 }, { "epoch": 0.050784390567112564, "grad_norm": 699.610595703125, "learning_rate": 2.0112000000000003e-05, "loss": 209.1569, "step": 12570 }, { "epoch": 0.05082479183248019, "grad_norm": 520.3313598632812, "learning_rate": 2.0128e-05, "loss": 140.6631, "step": 12580 }, { "epoch": 0.05086519309784782, "grad_norm": 443.720947265625, "learning_rate": 2.0144000000000002e-05, "loss": 185.0845, "step": 12590 }, { "epoch": 0.050905594363215456, "grad_norm": 905.438232421875, "learning_rate": 2.016e-05, "loss": 113.3908, "step": 12600 }, { "epoch": 0.05094599562858309, "grad_norm": 1029.767578125, "learning_rate": 2.0176e-05, "loss": 161.9168, "step": 12610 }, { "epoch": 0.050986396893950715, "grad_norm": 809.9927978515625, "learning_rate": 2.0192000000000003e-05, "loss": 196.2517, "step": 12620 }, { "epoch": 0.05102679815931835, "grad_norm": 1245.3720703125, "learning_rate": 2.0208e-05, "loss": 162.7455, "step": 12630 }, { "epoch": 0.05106719942468598, "grad_norm": 461.35711669921875, "learning_rate": 2.0224000000000005e-05, "loss": 197.058, "step": 12640 }, { "epoch": 0.051107600690053615, "grad_norm": 0.0, "learning_rate": 2.0240000000000003e-05, "loss": 159.2665, "step": 12650 }, { "epoch": 0.05114800195542124, "grad_norm": 1341.0728759765625, "learning_rate": 2.0256e-05, "loss": 153.8318, "step": 12660 }, { "epoch": 0.051188403220788874, "grad_norm": 2662.817626953125, "learning_rate": 2.0272000000000002e-05, "loss": 164.7002, "step": 12670 }, { "epoch": 0.05122880448615651, "grad_norm": 2191.09423828125, "learning_rate": 2.0288e-05, "loss": 146.5947, "step": 12680 }, { "epoch": 0.05126920575152414, "grad_norm": 1247.1414794921875, "learning_rate": 2.0304000000000004e-05, "loss": 175.3468, "step": 12690 }, { "epoch": 0.051309607016891766, "grad_norm": 728.5390625, "learning_rate": 2.0320000000000002e-05, "loss": 133.0505, "step": 12700 }, { "epoch": 0.0513500082822594, "grad_norm": 830.8687744140625, "learning_rate": 2.0336e-05, "loss": 108.7042, "step": 12710 }, { "epoch": 0.05139040954762703, "grad_norm": 879.9666137695312, "learning_rate": 2.0352000000000004e-05, "loss": 129.6018, "step": 12720 }, { "epoch": 0.051430810812994665, "grad_norm": 1011.3085327148438, "learning_rate": 2.0368000000000002e-05, "loss": 173.0924, "step": 12730 }, { "epoch": 0.05147121207836229, "grad_norm": 1851.60107421875, "learning_rate": 2.0384000000000003e-05, "loss": 145.7644, "step": 12740 }, { "epoch": 0.051511613343729924, "grad_norm": 663.5894165039062, "learning_rate": 2.04e-05, "loss": 121.5047, "step": 12750 }, { "epoch": 0.05155201460909756, "grad_norm": 714.2340698242188, "learning_rate": 2.0416e-05, "loss": 182.6054, "step": 12760 }, { "epoch": 0.05159241587446519, "grad_norm": 3369.689697265625, "learning_rate": 2.0432000000000004e-05, "loss": 197.5232, "step": 12770 }, { "epoch": 0.051632817139832816, "grad_norm": 1169.3397216796875, "learning_rate": 2.0448e-05, "loss": 167.386, "step": 12780 }, { "epoch": 0.05167321840520045, "grad_norm": 875.116943359375, "learning_rate": 2.0464000000000003e-05, "loss": 174.5069, "step": 12790 }, { "epoch": 0.05171361967056808, "grad_norm": 2895.494140625, "learning_rate": 2.048e-05, "loss": 175.5416, "step": 12800 }, { "epoch": 0.051754020935935716, "grad_norm": 1204.8529052734375, "learning_rate": 2.0496e-05, "loss": 232.9305, "step": 12810 }, { "epoch": 0.05179442220130334, "grad_norm": 966.132080078125, "learning_rate": 2.0512000000000003e-05, "loss": 155.4237, "step": 12820 }, { "epoch": 0.051834823466670975, "grad_norm": 1232.181884765625, "learning_rate": 2.0528e-05, "loss": 200.3393, "step": 12830 }, { "epoch": 0.05187522473203861, "grad_norm": 794.1149291992188, "learning_rate": 2.0544e-05, "loss": 143.9225, "step": 12840 }, { "epoch": 0.05191562599740624, "grad_norm": 911.0679321289062, "learning_rate": 2.0560000000000003e-05, "loss": 130.7181, "step": 12850 }, { "epoch": 0.05195602726277387, "grad_norm": 577.2883911132812, "learning_rate": 2.0576e-05, "loss": 118.0697, "step": 12860 }, { "epoch": 0.0519964285281415, "grad_norm": 1354.333740234375, "learning_rate": 2.0592000000000002e-05, "loss": 154.2893, "step": 12870 }, { "epoch": 0.05203682979350913, "grad_norm": 2138.07958984375, "learning_rate": 2.0608e-05, "loss": 151.4154, "step": 12880 }, { "epoch": 0.052077231058876766, "grad_norm": 4152.9033203125, "learning_rate": 2.0624e-05, "loss": 155.5429, "step": 12890 }, { "epoch": 0.05211763232424439, "grad_norm": 714.933349609375, "learning_rate": 2.0640000000000002e-05, "loss": 143.2872, "step": 12900 }, { "epoch": 0.052158033589612025, "grad_norm": 759.1387329101562, "learning_rate": 2.0656e-05, "loss": 151.3617, "step": 12910 }, { "epoch": 0.05219843485497966, "grad_norm": 1528.81494140625, "learning_rate": 2.0672000000000005e-05, "loss": 180.9476, "step": 12920 }, { "epoch": 0.05223883612034729, "grad_norm": 1480.501220703125, "learning_rate": 2.0688000000000003e-05, "loss": 162.473, "step": 12930 }, { "epoch": 0.05227923738571492, "grad_norm": 1521.9293212890625, "learning_rate": 2.0704e-05, "loss": 166.9442, "step": 12940 }, { "epoch": 0.05231963865108255, "grad_norm": 2496.98828125, "learning_rate": 2.072e-05, "loss": 167.1679, "step": 12950 }, { "epoch": 0.052360039916450184, "grad_norm": 1867.0335693359375, "learning_rate": 2.0736e-05, "loss": 207.2721, "step": 12960 }, { "epoch": 0.05240044118181782, "grad_norm": 626.9522094726562, "learning_rate": 2.0752000000000004e-05, "loss": 144.8393, "step": 12970 }, { "epoch": 0.05244084244718544, "grad_norm": 870.9942016601562, "learning_rate": 2.0768000000000002e-05, "loss": 115.7043, "step": 12980 }, { "epoch": 0.052481243712553076, "grad_norm": 652.1517944335938, "learning_rate": 2.0784e-05, "loss": 96.7211, "step": 12990 }, { "epoch": 0.05252164497792071, "grad_norm": 1137.64697265625, "learning_rate": 2.08e-05, "loss": 135.8071, "step": 13000 }, { "epoch": 0.05256204624328834, "grad_norm": 1930.1585693359375, "learning_rate": 2.0816000000000002e-05, "loss": 136.4251, "step": 13010 }, { "epoch": 0.05260244750865597, "grad_norm": 907.9906005859375, "learning_rate": 2.0832000000000003e-05, "loss": 89.2023, "step": 13020 }, { "epoch": 0.0526428487740236, "grad_norm": 558.8703002929688, "learning_rate": 2.0848e-05, "loss": 147.8288, "step": 13030 }, { "epoch": 0.052683250039391234, "grad_norm": 2412.71484375, "learning_rate": 2.0864e-05, "loss": 137.4368, "step": 13040 }, { "epoch": 0.05272365130475887, "grad_norm": 1965.6383056640625, "learning_rate": 2.0880000000000003e-05, "loss": 105.5261, "step": 13050 }, { "epoch": 0.05276405257012649, "grad_norm": 960.9400634765625, "learning_rate": 2.0896e-05, "loss": 120.9263, "step": 13060 }, { "epoch": 0.052804453835494126, "grad_norm": 0.0, "learning_rate": 2.0912000000000002e-05, "loss": 208.6261, "step": 13070 }, { "epoch": 0.05284485510086176, "grad_norm": 807.88623046875, "learning_rate": 2.0928e-05, "loss": 135.5652, "step": 13080 }, { "epoch": 0.05288525636622939, "grad_norm": 683.6365356445312, "learning_rate": 2.0944e-05, "loss": 122.0361, "step": 13090 }, { "epoch": 0.05292565763159702, "grad_norm": 6482.541015625, "learning_rate": 2.0960000000000003e-05, "loss": 118.127, "step": 13100 }, { "epoch": 0.05296605889696465, "grad_norm": 861.0405883789062, "learning_rate": 2.0976e-05, "loss": 127.4016, "step": 13110 }, { "epoch": 0.053006460162332285, "grad_norm": 2566.6220703125, "learning_rate": 2.0992000000000005e-05, "loss": 116.446, "step": 13120 }, { "epoch": 0.05304686142769992, "grad_norm": 1192.2772216796875, "learning_rate": 2.1008000000000003e-05, "loss": 202.4242, "step": 13130 }, { "epoch": 0.053087262693067544, "grad_norm": 598.492431640625, "learning_rate": 2.1024e-05, "loss": 169.9234, "step": 13140 }, { "epoch": 0.05312766395843518, "grad_norm": 532.2695922851562, "learning_rate": 2.1040000000000002e-05, "loss": 151.1845, "step": 13150 }, { "epoch": 0.05316806522380281, "grad_norm": 2847.472412109375, "learning_rate": 2.1056e-05, "loss": 222.5728, "step": 13160 }, { "epoch": 0.05320846648917044, "grad_norm": 3605.068603515625, "learning_rate": 2.1072000000000004e-05, "loss": 154.8115, "step": 13170 }, { "epoch": 0.05324886775453807, "grad_norm": 2527.835205078125, "learning_rate": 2.1088000000000002e-05, "loss": 216.4981, "step": 13180 }, { "epoch": 0.0532892690199057, "grad_norm": 559.9857177734375, "learning_rate": 2.1104e-05, "loss": 106.2085, "step": 13190 }, { "epoch": 0.053329670285273335, "grad_norm": 1436.94287109375, "learning_rate": 2.112e-05, "loss": 127.6425, "step": 13200 }, { "epoch": 0.05337007155064097, "grad_norm": 821.7449340820312, "learning_rate": 2.1136000000000002e-05, "loss": 244.2373, "step": 13210 }, { "epoch": 0.053410472816008595, "grad_norm": 887.7293090820312, "learning_rate": 2.1152000000000003e-05, "loss": 103.6638, "step": 13220 }, { "epoch": 0.05345087408137623, "grad_norm": 574.5474243164062, "learning_rate": 2.1168e-05, "loss": 172.9593, "step": 13230 }, { "epoch": 0.05349127534674386, "grad_norm": 1248.0147705078125, "learning_rate": 2.1184e-05, "loss": 152.9556, "step": 13240 }, { "epoch": 0.053531676612111494, "grad_norm": 964.110595703125, "learning_rate": 2.1200000000000004e-05, "loss": 188.2181, "step": 13250 }, { "epoch": 0.05357207787747912, "grad_norm": 796.8040161132812, "learning_rate": 2.1216e-05, "loss": 113.264, "step": 13260 }, { "epoch": 0.05361247914284675, "grad_norm": 1286.59765625, "learning_rate": 2.1232000000000003e-05, "loss": 122.0433, "step": 13270 }, { "epoch": 0.053652880408214386, "grad_norm": 1039.0003662109375, "learning_rate": 2.1248e-05, "loss": 166.3482, "step": 13280 }, { "epoch": 0.05369328167358202, "grad_norm": 1605.037841796875, "learning_rate": 2.1264000000000002e-05, "loss": 119.0407, "step": 13290 }, { "epoch": 0.053733682938949645, "grad_norm": 1566.1156005859375, "learning_rate": 2.1280000000000003e-05, "loss": 159.9284, "step": 13300 }, { "epoch": 0.05377408420431728, "grad_norm": 905.0177612304688, "learning_rate": 2.1296e-05, "loss": 98.9591, "step": 13310 }, { "epoch": 0.05381448546968491, "grad_norm": 1059.0621337890625, "learning_rate": 2.1312000000000005e-05, "loss": 124.6446, "step": 13320 }, { "epoch": 0.053854886735052544, "grad_norm": 717.8817138671875, "learning_rate": 2.1328000000000003e-05, "loss": 219.626, "step": 13330 }, { "epoch": 0.05389528800042017, "grad_norm": 1320.574951171875, "learning_rate": 2.1344e-05, "loss": 176.7211, "step": 13340 }, { "epoch": 0.0539356892657878, "grad_norm": 1039.6083984375, "learning_rate": 2.1360000000000002e-05, "loss": 130.3765, "step": 13350 }, { "epoch": 0.053976090531155436, "grad_norm": 1116.202880859375, "learning_rate": 2.1376e-05, "loss": 197.231, "step": 13360 }, { "epoch": 0.05401649179652307, "grad_norm": 2312.27587890625, "learning_rate": 2.1392000000000005e-05, "loss": 192.5465, "step": 13370 }, { "epoch": 0.054056893061890696, "grad_norm": 3889.453857421875, "learning_rate": 2.1408000000000002e-05, "loss": 123.0288, "step": 13380 }, { "epoch": 0.05409729432725833, "grad_norm": 1394.5799560546875, "learning_rate": 2.1424e-05, "loss": 139.7589, "step": 13390 }, { "epoch": 0.05413769559262596, "grad_norm": 686.0022583007812, "learning_rate": 2.144e-05, "loss": 101.6074, "step": 13400 }, { "epoch": 0.054178096857993595, "grad_norm": 3230.813720703125, "learning_rate": 2.1456000000000003e-05, "loss": 147.0386, "step": 13410 }, { "epoch": 0.05421849812336122, "grad_norm": 1445.752197265625, "learning_rate": 2.1472000000000004e-05, "loss": 178.1156, "step": 13420 }, { "epoch": 0.054258899388728854, "grad_norm": 1217.630126953125, "learning_rate": 2.1488e-05, "loss": 107.1757, "step": 13430 }, { "epoch": 0.05429930065409649, "grad_norm": 910.73583984375, "learning_rate": 2.1504e-05, "loss": 150.5691, "step": 13440 }, { "epoch": 0.05433970191946412, "grad_norm": 1758.7811279296875, "learning_rate": 2.1520000000000004e-05, "loss": 169.052, "step": 13450 }, { "epoch": 0.054380103184831746, "grad_norm": 1926.9754638671875, "learning_rate": 2.1536000000000002e-05, "loss": 121.4231, "step": 13460 }, { "epoch": 0.05442050445019938, "grad_norm": 2126.060791015625, "learning_rate": 2.1552e-05, "loss": 166.1807, "step": 13470 }, { "epoch": 0.05446090571556701, "grad_norm": 779.2872924804688, "learning_rate": 2.1568e-05, "loss": 148.9455, "step": 13480 }, { "epoch": 0.054501306980934645, "grad_norm": 971.3914794921875, "learning_rate": 2.1584000000000002e-05, "loss": 173.3893, "step": 13490 }, { "epoch": 0.05454170824630227, "grad_norm": 1498.48486328125, "learning_rate": 2.1600000000000003e-05, "loss": 200.5015, "step": 13500 }, { "epoch": 0.054582109511669905, "grad_norm": 1800.676025390625, "learning_rate": 2.1616e-05, "loss": 144.656, "step": 13510 }, { "epoch": 0.05462251077703754, "grad_norm": 952.250244140625, "learning_rate": 2.1632e-05, "loss": 117.9983, "step": 13520 }, { "epoch": 0.05466291204240517, "grad_norm": 1999.3441162109375, "learning_rate": 2.1648000000000003e-05, "loss": 106.9742, "step": 13530 }, { "epoch": 0.0547033133077728, "grad_norm": 1853.0484619140625, "learning_rate": 2.1664e-05, "loss": 170.8819, "step": 13540 }, { "epoch": 0.05474371457314043, "grad_norm": 1101.6063232421875, "learning_rate": 2.1680000000000002e-05, "loss": 184.5558, "step": 13550 }, { "epoch": 0.05478411583850806, "grad_norm": 1025.219970703125, "learning_rate": 2.1696e-05, "loss": 141.2452, "step": 13560 }, { "epoch": 0.054824517103875696, "grad_norm": 1053.7066650390625, "learning_rate": 2.1711999999999998e-05, "loss": 158.2792, "step": 13570 }, { "epoch": 0.05486491836924332, "grad_norm": 0.0, "learning_rate": 2.1728000000000003e-05, "loss": 83.3861, "step": 13580 }, { "epoch": 0.054905319634610955, "grad_norm": 744.6858520507812, "learning_rate": 2.1744e-05, "loss": 165.1833, "step": 13590 }, { "epoch": 0.05494572089997859, "grad_norm": 2354.499267578125, "learning_rate": 2.1760000000000002e-05, "loss": 121.2317, "step": 13600 }, { "epoch": 0.05498612216534622, "grad_norm": 1683.3133544921875, "learning_rate": 2.1776000000000003e-05, "loss": 231.1734, "step": 13610 }, { "epoch": 0.05502652343071385, "grad_norm": 792.2965698242188, "learning_rate": 2.1792e-05, "loss": 115.8473, "step": 13620 }, { "epoch": 0.05506692469608148, "grad_norm": 1531.6292724609375, "learning_rate": 2.1808000000000002e-05, "loss": 161.9549, "step": 13630 }, { "epoch": 0.05510732596144911, "grad_norm": 1182.44091796875, "learning_rate": 2.1824e-05, "loss": 119.7171, "step": 13640 }, { "epoch": 0.055147727226816746, "grad_norm": 1206.6705322265625, "learning_rate": 2.1840000000000004e-05, "loss": 184.1771, "step": 13650 }, { "epoch": 0.05518812849218437, "grad_norm": 629.7221069335938, "learning_rate": 2.1856000000000002e-05, "loss": 158.6814, "step": 13660 }, { "epoch": 0.055228529757552006, "grad_norm": 769.7032470703125, "learning_rate": 2.1872e-05, "loss": 133.69, "step": 13670 }, { "epoch": 0.05526893102291964, "grad_norm": 910.8077392578125, "learning_rate": 2.1888e-05, "loss": 104.3274, "step": 13680 }, { "epoch": 0.05530933228828727, "grad_norm": 1321.2825927734375, "learning_rate": 2.1904000000000002e-05, "loss": 150.9695, "step": 13690 }, { "epoch": 0.0553497335536549, "grad_norm": 692.1464233398438, "learning_rate": 2.1920000000000004e-05, "loss": 154.0631, "step": 13700 }, { "epoch": 0.05539013481902253, "grad_norm": 1960.6553955078125, "learning_rate": 2.1936e-05, "loss": 172.6004, "step": 13710 }, { "epoch": 0.055430536084390164, "grad_norm": 937.15625, "learning_rate": 2.1952e-05, "loss": 116.4846, "step": 13720 }, { "epoch": 0.0554709373497578, "grad_norm": 1401.888671875, "learning_rate": 2.1968000000000004e-05, "loss": 143.6478, "step": 13730 }, { "epoch": 0.05551133861512542, "grad_norm": 1163.2547607421875, "learning_rate": 2.1984e-05, "loss": 158.8097, "step": 13740 }, { "epoch": 0.055551739880493056, "grad_norm": 878.194091796875, "learning_rate": 2.2000000000000003e-05, "loss": 136.7762, "step": 13750 }, { "epoch": 0.05559214114586069, "grad_norm": 2160.6083984375, "learning_rate": 2.2016e-05, "loss": 133.5667, "step": 13760 }, { "epoch": 0.05563254241122832, "grad_norm": 1110.6484375, "learning_rate": 2.2032e-05, "loss": 144.071, "step": 13770 }, { "epoch": 0.05567294367659595, "grad_norm": 759.3908081054688, "learning_rate": 2.2048000000000003e-05, "loss": 124.2756, "step": 13780 }, { "epoch": 0.05571334494196358, "grad_norm": 689.4682006835938, "learning_rate": 2.2064e-05, "loss": 132.506, "step": 13790 }, { "epoch": 0.055753746207331215, "grad_norm": 1468.38232421875, "learning_rate": 2.2080000000000002e-05, "loss": 172.1625, "step": 13800 }, { "epoch": 0.05579414747269885, "grad_norm": 1024.220947265625, "learning_rate": 2.2096000000000003e-05, "loss": 110.2222, "step": 13810 }, { "epoch": 0.055834548738066474, "grad_norm": 1557.86669921875, "learning_rate": 2.2112e-05, "loss": 154.4878, "step": 13820 }, { "epoch": 0.05587495000343411, "grad_norm": 410.6898498535156, "learning_rate": 2.2128000000000002e-05, "loss": 149.6926, "step": 13830 }, { "epoch": 0.05591535126880174, "grad_norm": 1283.35791015625, "learning_rate": 2.2144e-05, "loss": 121.5034, "step": 13840 }, { "epoch": 0.05595575253416937, "grad_norm": 908.16748046875, "learning_rate": 2.2160000000000005e-05, "loss": 156.9578, "step": 13850 }, { "epoch": 0.055996153799537, "grad_norm": 1568.7562255859375, "learning_rate": 2.2176000000000002e-05, "loss": 130.5618, "step": 13860 }, { "epoch": 0.05603655506490463, "grad_norm": 949.5377807617188, "learning_rate": 2.2192e-05, "loss": 185.1214, "step": 13870 }, { "epoch": 0.056076956330272265, "grad_norm": 1054.2724609375, "learning_rate": 2.2208e-05, "loss": 139.6275, "step": 13880 }, { "epoch": 0.0561173575956399, "grad_norm": 816.1144409179688, "learning_rate": 2.2224000000000003e-05, "loss": 89.5711, "step": 13890 }, { "epoch": 0.056157758861007524, "grad_norm": 913.6422729492188, "learning_rate": 2.2240000000000004e-05, "loss": 107.2711, "step": 13900 }, { "epoch": 0.05619816012637516, "grad_norm": 684.166015625, "learning_rate": 2.2256000000000002e-05, "loss": 131.4961, "step": 13910 }, { "epoch": 0.05623856139174279, "grad_norm": 3857.287841796875, "learning_rate": 2.2272e-05, "loss": 179.3707, "step": 13920 }, { "epoch": 0.05627896265711042, "grad_norm": 1287.8927001953125, "learning_rate": 2.2288000000000004e-05, "loss": 126.2159, "step": 13930 }, { "epoch": 0.05631936392247805, "grad_norm": 753.49853515625, "learning_rate": 2.2304000000000002e-05, "loss": 127.1484, "step": 13940 }, { "epoch": 0.05635976518784568, "grad_norm": 1118.16748046875, "learning_rate": 2.2320000000000003e-05, "loss": 165.6699, "step": 13950 }, { "epoch": 0.056400166453213316, "grad_norm": 1042.443115234375, "learning_rate": 2.2336e-05, "loss": 142.2855, "step": 13960 }, { "epoch": 0.05644056771858095, "grad_norm": 1004.0928955078125, "learning_rate": 2.2352e-05, "loss": 127.3151, "step": 13970 }, { "epoch": 0.056480968983948575, "grad_norm": 1794.01171875, "learning_rate": 2.2368000000000003e-05, "loss": 136.5507, "step": 13980 }, { "epoch": 0.05652137024931621, "grad_norm": 487.4826354980469, "learning_rate": 2.2384e-05, "loss": 164.9086, "step": 13990 }, { "epoch": 0.05656177151468384, "grad_norm": 751.5004272460938, "learning_rate": 2.2400000000000002e-05, "loss": 114.4318, "step": 14000 }, { "epoch": 0.056602172780051474, "grad_norm": 1770.594482421875, "learning_rate": 2.2416000000000004e-05, "loss": 202.0991, "step": 14010 }, { "epoch": 0.0566425740454191, "grad_norm": 1120.473388671875, "learning_rate": 2.2432e-05, "loss": 152.7142, "step": 14020 }, { "epoch": 0.05668297531078673, "grad_norm": 520.49365234375, "learning_rate": 2.2448000000000003e-05, "loss": 137.2922, "step": 14030 }, { "epoch": 0.056723376576154366, "grad_norm": 1072.8531494140625, "learning_rate": 2.2464e-05, "loss": 147.3355, "step": 14040 }, { "epoch": 0.056763777841522, "grad_norm": 1459.0318603515625, "learning_rate": 2.2480000000000005e-05, "loss": 144.517, "step": 14050 }, { "epoch": 0.056804179106889625, "grad_norm": 1281.28271484375, "learning_rate": 2.2496000000000003e-05, "loss": 153.5128, "step": 14060 }, { "epoch": 0.05684458037225726, "grad_norm": 2782.97900390625, "learning_rate": 2.2512e-05, "loss": 180.4696, "step": 14070 }, { "epoch": 0.05688498163762489, "grad_norm": 736.7996826171875, "learning_rate": 2.2528000000000002e-05, "loss": 123.2284, "step": 14080 }, { "epoch": 0.056925382902992525, "grad_norm": 3690.111083984375, "learning_rate": 2.2544000000000003e-05, "loss": 116.4326, "step": 14090 }, { "epoch": 0.05696578416836015, "grad_norm": 4582.87841796875, "learning_rate": 2.256e-05, "loss": 205.8677, "step": 14100 }, { "epoch": 0.057006185433727784, "grad_norm": 2828.5712890625, "learning_rate": 2.2576000000000002e-05, "loss": 121.0329, "step": 14110 }, { "epoch": 0.05704658669909542, "grad_norm": 910.3086547851562, "learning_rate": 2.2592e-05, "loss": 127.2261, "step": 14120 }, { "epoch": 0.05708698796446305, "grad_norm": 1229.384033203125, "learning_rate": 2.2608000000000004e-05, "loss": 150.3236, "step": 14130 }, { "epoch": 0.057127389229830676, "grad_norm": 2388.891845703125, "learning_rate": 2.2624000000000002e-05, "loss": 202.8478, "step": 14140 }, { "epoch": 0.05716779049519831, "grad_norm": 820.1349487304688, "learning_rate": 2.264e-05, "loss": 139.7399, "step": 14150 }, { "epoch": 0.05720819176056594, "grad_norm": 1131.6744384765625, "learning_rate": 2.2656e-05, "loss": 119.5409, "step": 14160 }, { "epoch": 0.057248593025933575, "grad_norm": 1692.20947265625, "learning_rate": 2.2672e-05, "loss": 136.8943, "step": 14170 }, { "epoch": 0.0572889942913012, "grad_norm": 841.63330078125, "learning_rate": 2.2688000000000004e-05, "loss": 112.7383, "step": 14180 }, { "epoch": 0.057329395556668834, "grad_norm": 993.0922241210938, "learning_rate": 2.2704e-05, "loss": 139.6173, "step": 14190 }, { "epoch": 0.05736979682203647, "grad_norm": 848.125, "learning_rate": 2.272e-05, "loss": 117.9697, "step": 14200 }, { "epoch": 0.0574101980874041, "grad_norm": 1794.74755859375, "learning_rate": 2.2736000000000004e-05, "loss": 120.4493, "step": 14210 }, { "epoch": 0.057450599352771727, "grad_norm": 700.1529541015625, "learning_rate": 2.2752000000000002e-05, "loss": 111.5371, "step": 14220 }, { "epoch": 0.05749100061813936, "grad_norm": 2567.41845703125, "learning_rate": 2.2768000000000003e-05, "loss": 151.0815, "step": 14230 }, { "epoch": 0.05753140188350699, "grad_norm": 1424.2322998046875, "learning_rate": 2.2784e-05, "loss": 173.4974, "step": 14240 }, { "epoch": 0.057571803148874626, "grad_norm": 1377.6383056640625, "learning_rate": 2.28e-05, "loss": 135.3481, "step": 14250 }, { "epoch": 0.05761220441424225, "grad_norm": 1382.552734375, "learning_rate": 2.2816000000000003e-05, "loss": 148.4613, "step": 14260 }, { "epoch": 0.057652605679609885, "grad_norm": 478.0409240722656, "learning_rate": 2.2832e-05, "loss": 115.8696, "step": 14270 }, { "epoch": 0.05769300694497752, "grad_norm": 588.9566040039062, "learning_rate": 2.2848000000000002e-05, "loss": 149.4777, "step": 14280 }, { "epoch": 0.05773340821034515, "grad_norm": 620.9132690429688, "learning_rate": 2.2864000000000003e-05, "loss": 196.3389, "step": 14290 }, { "epoch": 0.05777380947571278, "grad_norm": 3938.6494140625, "learning_rate": 2.288e-05, "loss": 172.2783, "step": 14300 }, { "epoch": 0.05781421074108041, "grad_norm": 3169.12548828125, "learning_rate": 2.2896000000000002e-05, "loss": 128.8295, "step": 14310 }, { "epoch": 0.05785461200644804, "grad_norm": 1205.0302734375, "learning_rate": 2.2912e-05, "loss": 149.3006, "step": 14320 }, { "epoch": 0.057895013271815676, "grad_norm": 880.8743286132812, "learning_rate": 2.2928000000000005e-05, "loss": 100.6335, "step": 14330 }, { "epoch": 0.0579354145371833, "grad_norm": 411.00128173828125, "learning_rate": 2.2944000000000003e-05, "loss": 167.0317, "step": 14340 }, { "epoch": 0.057975815802550935, "grad_norm": 438.6155700683594, "learning_rate": 2.296e-05, "loss": 170.8691, "step": 14350 }, { "epoch": 0.05801621706791857, "grad_norm": 2452.795654296875, "learning_rate": 2.2976e-05, "loss": 159.6383, "step": 14360 }, { "epoch": 0.0580566183332862, "grad_norm": 3605.11181640625, "learning_rate": 2.2992e-05, "loss": 128.4706, "step": 14370 }, { "epoch": 0.05809701959865383, "grad_norm": 1165.4371337890625, "learning_rate": 2.3008000000000004e-05, "loss": 163.2181, "step": 14380 }, { "epoch": 0.05813742086402146, "grad_norm": 3038.6328125, "learning_rate": 2.3024000000000002e-05, "loss": 176.579, "step": 14390 }, { "epoch": 0.058177822129389094, "grad_norm": 810.4943237304688, "learning_rate": 2.304e-05, "loss": 105.8301, "step": 14400 }, { "epoch": 0.05821822339475673, "grad_norm": 2276.8701171875, "learning_rate": 2.3056000000000004e-05, "loss": 169.4745, "step": 14410 }, { "epoch": 0.05825862466012435, "grad_norm": 706.0416259765625, "learning_rate": 2.3072000000000002e-05, "loss": 132.1325, "step": 14420 }, { "epoch": 0.058299025925491986, "grad_norm": 723.7369995117188, "learning_rate": 2.3088000000000003e-05, "loss": 106.6888, "step": 14430 }, { "epoch": 0.05833942719085962, "grad_norm": 609.4017944335938, "learning_rate": 2.3104e-05, "loss": 125.4262, "step": 14440 }, { "epoch": 0.05837982845622725, "grad_norm": 1099.2354736328125, "learning_rate": 2.312e-05, "loss": 143.0599, "step": 14450 }, { "epoch": 0.05842022972159488, "grad_norm": 978.5392456054688, "learning_rate": 2.3136000000000003e-05, "loss": 135.362, "step": 14460 }, { "epoch": 0.05846063098696251, "grad_norm": 893.01806640625, "learning_rate": 2.3152e-05, "loss": 108.5644, "step": 14470 }, { "epoch": 0.058501032252330144, "grad_norm": 783.920166015625, "learning_rate": 2.3168000000000002e-05, "loss": 202.1595, "step": 14480 }, { "epoch": 0.05854143351769778, "grad_norm": 1331.897705078125, "learning_rate": 2.3184000000000004e-05, "loss": 144.4532, "step": 14490 }, { "epoch": 0.058581834783065403, "grad_norm": 1102.8529052734375, "learning_rate": 2.32e-05, "loss": 122.3522, "step": 14500 }, { "epoch": 0.05862223604843304, "grad_norm": 743.5985717773438, "learning_rate": 2.3216000000000003e-05, "loss": 99.6895, "step": 14510 }, { "epoch": 0.05866263731380067, "grad_norm": 1235.1953125, "learning_rate": 2.3232e-05, "loss": 179.0568, "step": 14520 }, { "epoch": 0.0587030385791683, "grad_norm": 698.5330810546875, "learning_rate": 2.3248000000000005e-05, "loss": 152.631, "step": 14530 }, { "epoch": 0.05874343984453593, "grad_norm": 826.0693359375, "learning_rate": 2.3264000000000003e-05, "loss": 102.5496, "step": 14540 }, { "epoch": 0.05878384110990356, "grad_norm": 931.8894653320312, "learning_rate": 2.328e-05, "loss": 104.5592, "step": 14550 }, { "epoch": 0.058824242375271195, "grad_norm": 1267.3544921875, "learning_rate": 2.3296000000000002e-05, "loss": 145.6881, "step": 14560 }, { "epoch": 0.05886464364063883, "grad_norm": 710.9752197265625, "learning_rate": 2.3312e-05, "loss": 102.4729, "step": 14570 }, { "epoch": 0.058905044906006454, "grad_norm": 1418.5509033203125, "learning_rate": 2.3328000000000004e-05, "loss": 123.5127, "step": 14580 }, { "epoch": 0.05894544617137409, "grad_norm": 2174.474853515625, "learning_rate": 2.3344000000000002e-05, "loss": 120.0803, "step": 14590 }, { "epoch": 0.05898584743674172, "grad_norm": 2546.129150390625, "learning_rate": 2.336e-05, "loss": 174.6094, "step": 14600 }, { "epoch": 0.05902624870210935, "grad_norm": 2337.811767578125, "learning_rate": 2.3376000000000005e-05, "loss": 121.787, "step": 14610 }, { "epoch": 0.05906664996747698, "grad_norm": 607.4607543945312, "learning_rate": 2.3392000000000002e-05, "loss": 145.2568, "step": 14620 }, { "epoch": 0.05910705123284461, "grad_norm": 1109.9788818359375, "learning_rate": 2.3408000000000004e-05, "loss": 92.7968, "step": 14630 }, { "epoch": 0.059147452498212245, "grad_norm": 996.027587890625, "learning_rate": 2.3424e-05, "loss": 146.8611, "step": 14640 }, { "epoch": 0.05918785376357988, "grad_norm": 1124.2379150390625, "learning_rate": 2.344e-05, "loss": 106.3015, "step": 14650 }, { "epoch": 0.059228255028947505, "grad_norm": 674.2631225585938, "learning_rate": 2.3456000000000004e-05, "loss": 150.5028, "step": 14660 }, { "epoch": 0.05926865629431514, "grad_norm": 1879.044677734375, "learning_rate": 2.3472e-05, "loss": 128.7712, "step": 14670 }, { "epoch": 0.05930905755968277, "grad_norm": 1457.988037109375, "learning_rate": 2.3488000000000003e-05, "loss": 147.8928, "step": 14680 }, { "epoch": 0.059349458825050404, "grad_norm": 1219.433349609375, "learning_rate": 2.3504000000000004e-05, "loss": 131.0105, "step": 14690 }, { "epoch": 0.05938986009041803, "grad_norm": 467.0386047363281, "learning_rate": 2.3520000000000002e-05, "loss": 151.1394, "step": 14700 }, { "epoch": 0.05943026135578566, "grad_norm": 2050.65234375, "learning_rate": 2.3536000000000003e-05, "loss": 129.0073, "step": 14710 }, { "epoch": 0.059470662621153296, "grad_norm": 1917.9736328125, "learning_rate": 2.3552e-05, "loss": 165.6747, "step": 14720 }, { "epoch": 0.05951106388652093, "grad_norm": 14780.8095703125, "learning_rate": 2.3568e-05, "loss": 180.8651, "step": 14730 }, { "epoch": 0.059551465151888555, "grad_norm": 1709.83349609375, "learning_rate": 2.3584000000000003e-05, "loss": 131.5537, "step": 14740 }, { "epoch": 0.05959186641725619, "grad_norm": 476.5604248046875, "learning_rate": 2.36e-05, "loss": 115.247, "step": 14750 }, { "epoch": 0.05963226768262382, "grad_norm": 1123.6815185546875, "learning_rate": 2.3616000000000002e-05, "loss": 145.8521, "step": 14760 }, { "epoch": 0.059672668947991454, "grad_norm": 802.3085327148438, "learning_rate": 2.3632e-05, "loss": 137.058, "step": 14770 }, { "epoch": 0.05971307021335908, "grad_norm": 571.6912231445312, "learning_rate": 2.3648e-05, "loss": 108.2743, "step": 14780 }, { "epoch": 0.059753471478726713, "grad_norm": 1714.322265625, "learning_rate": 2.3664000000000002e-05, "loss": 150.8154, "step": 14790 }, { "epoch": 0.05979387274409435, "grad_norm": 1163.4627685546875, "learning_rate": 2.368e-05, "loss": 155.5478, "step": 14800 }, { "epoch": 0.05983427400946198, "grad_norm": 668.583740234375, "learning_rate": 2.3696000000000005e-05, "loss": 143.8596, "step": 14810 }, { "epoch": 0.059874675274829606, "grad_norm": 598.8736572265625, "learning_rate": 2.3712000000000003e-05, "loss": 132.4404, "step": 14820 }, { "epoch": 0.05991507654019724, "grad_norm": 1189.759033203125, "learning_rate": 2.3728e-05, "loss": 109.6164, "step": 14830 }, { "epoch": 0.05995547780556487, "grad_norm": 1057.4130859375, "learning_rate": 2.3744000000000002e-05, "loss": 129.6899, "step": 14840 }, { "epoch": 0.059995879070932505, "grad_norm": 463.6476135253906, "learning_rate": 2.376e-05, "loss": 154.4845, "step": 14850 }, { "epoch": 0.06003628033630013, "grad_norm": 2989.97265625, "learning_rate": 2.3776000000000004e-05, "loss": 113.0495, "step": 14860 }, { "epoch": 0.060076681601667764, "grad_norm": 1022.0186157226562, "learning_rate": 2.3792000000000002e-05, "loss": 114.5988, "step": 14870 }, { "epoch": 0.0601170828670354, "grad_norm": 800.594970703125, "learning_rate": 2.3808e-05, "loss": 120.2548, "step": 14880 }, { "epoch": 0.06015748413240303, "grad_norm": 893.33447265625, "learning_rate": 2.3824000000000004e-05, "loss": 138.4012, "step": 14890 }, { "epoch": 0.060197885397770656, "grad_norm": 766.477783203125, "learning_rate": 2.3840000000000002e-05, "loss": 152.4039, "step": 14900 }, { "epoch": 0.06023828666313829, "grad_norm": 4025.67822265625, "learning_rate": 2.3856000000000003e-05, "loss": 144.6702, "step": 14910 }, { "epoch": 0.06027868792850592, "grad_norm": 1157.56884765625, "learning_rate": 2.3872e-05, "loss": 182.9655, "step": 14920 }, { "epoch": 0.060319089193873555, "grad_norm": 2603.474853515625, "learning_rate": 2.3888e-05, "loss": 172.9417, "step": 14930 }, { "epoch": 0.06035949045924118, "grad_norm": 1721.3291015625, "learning_rate": 2.3904000000000004e-05, "loss": 148.0654, "step": 14940 }, { "epoch": 0.060399891724608815, "grad_norm": 1430.939208984375, "learning_rate": 2.392e-05, "loss": 132.7668, "step": 14950 }, { "epoch": 0.06044029298997645, "grad_norm": 853.9814453125, "learning_rate": 2.3936000000000003e-05, "loss": 67.8114, "step": 14960 }, { "epoch": 0.06048069425534408, "grad_norm": 3689.06005859375, "learning_rate": 2.3952e-05, "loss": 192.2166, "step": 14970 }, { "epoch": 0.06052109552071171, "grad_norm": 481.86920166015625, "learning_rate": 2.3968e-05, "loss": 117.9959, "step": 14980 }, { "epoch": 0.06056149678607934, "grad_norm": 683.6334228515625, "learning_rate": 2.3984000000000003e-05, "loss": 100.6381, "step": 14990 }, { "epoch": 0.06060189805144697, "grad_norm": 1204.35693359375, "learning_rate": 2.4e-05, "loss": 128.2924, "step": 15000 }, { "epoch": 0.060642299316814606, "grad_norm": 1228.6927490234375, "learning_rate": 2.4016000000000005e-05, "loss": 125.4803, "step": 15010 }, { "epoch": 0.06068270058218223, "grad_norm": 1097.30126953125, "learning_rate": 2.4032000000000003e-05, "loss": 122.2645, "step": 15020 }, { "epoch": 0.060723101847549865, "grad_norm": 680.966796875, "learning_rate": 2.4048e-05, "loss": 135.325, "step": 15030 }, { "epoch": 0.0607635031129175, "grad_norm": 1410.129638671875, "learning_rate": 2.4064000000000002e-05, "loss": 115.6653, "step": 15040 }, { "epoch": 0.06080390437828513, "grad_norm": 830.8237915039062, "learning_rate": 2.408e-05, "loss": 146.791, "step": 15050 }, { "epoch": 0.06084430564365276, "grad_norm": 1142.181640625, "learning_rate": 2.4096000000000004e-05, "loss": 113.4309, "step": 15060 }, { "epoch": 0.06088470690902039, "grad_norm": 845.2885131835938, "learning_rate": 2.4112000000000002e-05, "loss": 142.3753, "step": 15070 }, { "epoch": 0.060925108174388024, "grad_norm": 1506.6373291015625, "learning_rate": 2.4128e-05, "loss": 177.1799, "step": 15080 }, { "epoch": 0.06096550943975566, "grad_norm": 1998.8515625, "learning_rate": 2.4144000000000005e-05, "loss": 147.3931, "step": 15090 }, { "epoch": 0.06100591070512328, "grad_norm": 1360.8604736328125, "learning_rate": 2.4160000000000002e-05, "loss": 156.8285, "step": 15100 }, { "epoch": 0.061046311970490916, "grad_norm": 1141.12109375, "learning_rate": 2.4176000000000004e-05, "loss": 124.2817, "step": 15110 }, { "epoch": 0.06108671323585855, "grad_norm": 4913.79443359375, "learning_rate": 2.4192e-05, "loss": 154.2291, "step": 15120 }, { "epoch": 0.06112711450122618, "grad_norm": 1053.5574951171875, "learning_rate": 2.4208e-05, "loss": 83.8442, "step": 15130 }, { "epoch": 0.06116751576659381, "grad_norm": 1806.4326171875, "learning_rate": 2.4224000000000004e-05, "loss": 154.9223, "step": 15140 }, { "epoch": 0.06120791703196144, "grad_norm": 655.29443359375, "learning_rate": 2.4240000000000002e-05, "loss": 135.3248, "step": 15150 }, { "epoch": 0.061248318297329074, "grad_norm": 1480.966064453125, "learning_rate": 2.4256000000000003e-05, "loss": 172.8818, "step": 15160 }, { "epoch": 0.06128871956269671, "grad_norm": 10371.9072265625, "learning_rate": 2.4272e-05, "loss": 150.4807, "step": 15170 }, { "epoch": 0.06132912082806433, "grad_norm": 3907.77001953125, "learning_rate": 2.4288000000000002e-05, "loss": 197.0451, "step": 15180 }, { "epoch": 0.061369522093431966, "grad_norm": 1567.521484375, "learning_rate": 2.4304000000000003e-05, "loss": 132.6108, "step": 15190 }, { "epoch": 0.0614099233587996, "grad_norm": 877.543212890625, "learning_rate": 2.432e-05, "loss": 152.2519, "step": 15200 }, { "epoch": 0.061450324624167225, "grad_norm": 1100.1990966796875, "learning_rate": 2.4336000000000006e-05, "loss": 152.2165, "step": 15210 }, { "epoch": 0.06149072588953486, "grad_norm": 1301.2376708984375, "learning_rate": 2.4352000000000003e-05, "loss": 172.2335, "step": 15220 }, { "epoch": 0.06153112715490249, "grad_norm": 776.1826171875, "learning_rate": 2.4368e-05, "loss": 109.9598, "step": 15230 }, { "epoch": 0.061571528420270125, "grad_norm": 1602.8883056640625, "learning_rate": 2.4384000000000002e-05, "loss": 95.1105, "step": 15240 }, { "epoch": 0.06161192968563775, "grad_norm": 929.6801147460938, "learning_rate": 2.44e-05, "loss": 165.9625, "step": 15250 }, { "epoch": 0.061652330951005384, "grad_norm": 332.2320251464844, "learning_rate": 2.4416000000000005e-05, "loss": 75.9045, "step": 15260 }, { "epoch": 0.06169273221637302, "grad_norm": 2452.326416015625, "learning_rate": 2.4432000000000003e-05, "loss": 244.5978, "step": 15270 }, { "epoch": 0.06173313348174065, "grad_norm": 1301.530029296875, "learning_rate": 2.4448e-05, "loss": 111.0449, "step": 15280 }, { "epoch": 0.061773534747108276, "grad_norm": 696.3073120117188, "learning_rate": 2.4464000000000005e-05, "loss": 88.0738, "step": 15290 }, { "epoch": 0.06181393601247591, "grad_norm": 1441.196533203125, "learning_rate": 2.4480000000000003e-05, "loss": 132.6508, "step": 15300 }, { "epoch": 0.06185433727784354, "grad_norm": 833.18896484375, "learning_rate": 2.4496000000000004e-05, "loss": 140.9942, "step": 15310 }, { "epoch": 0.061894738543211175, "grad_norm": 1413.989013671875, "learning_rate": 2.4512000000000002e-05, "loss": 132.8459, "step": 15320 }, { "epoch": 0.0619351398085788, "grad_norm": 1399.9921875, "learning_rate": 2.4528e-05, "loss": 104.1776, "step": 15330 }, { "epoch": 0.061975541073946434, "grad_norm": 2461.732177734375, "learning_rate": 2.4544000000000004e-05, "loss": 157.2997, "step": 15340 }, { "epoch": 0.06201594233931407, "grad_norm": 645.6864624023438, "learning_rate": 2.4560000000000002e-05, "loss": 135.3236, "step": 15350 }, { "epoch": 0.0620563436046817, "grad_norm": 687.8350219726562, "learning_rate": 2.4576e-05, "loss": 101.6606, "step": 15360 }, { "epoch": 0.06209674487004933, "grad_norm": 1433.7965087890625, "learning_rate": 2.4592e-05, "loss": 119.4751, "step": 15370 }, { "epoch": 0.06213714613541696, "grad_norm": 375.80035400390625, "learning_rate": 2.4608000000000002e-05, "loss": 110.7543, "step": 15380 }, { "epoch": 0.06217754740078459, "grad_norm": 1924.688720703125, "learning_rate": 2.4624000000000003e-05, "loss": 134.0393, "step": 15390 }, { "epoch": 0.062217948666152226, "grad_norm": 1919.5198974609375, "learning_rate": 2.464e-05, "loss": 148.155, "step": 15400 }, { "epoch": 0.06225834993151985, "grad_norm": 952.2554931640625, "learning_rate": 2.4656e-05, "loss": 147.3355, "step": 15410 }, { "epoch": 0.062298751196887485, "grad_norm": 1131.790771484375, "learning_rate": 2.4672000000000004e-05, "loss": 128.157, "step": 15420 }, { "epoch": 0.06233915246225512, "grad_norm": 919.5127563476562, "learning_rate": 2.4688e-05, "loss": 115.3047, "step": 15430 }, { "epoch": 0.06237955372762275, "grad_norm": 2218.905029296875, "learning_rate": 2.4704000000000003e-05, "loss": 201.5949, "step": 15440 }, { "epoch": 0.06241995499299038, "grad_norm": 1855.6712646484375, "learning_rate": 2.472e-05, "loss": 134.0892, "step": 15450 }, { "epoch": 0.06246035625835801, "grad_norm": 877.10009765625, "learning_rate": 2.4736e-05, "loss": 102.3566, "step": 15460 }, { "epoch": 0.06250075752372564, "grad_norm": 1202.427734375, "learning_rate": 2.4752000000000003e-05, "loss": 109.7552, "step": 15470 }, { "epoch": 0.06254115878909328, "grad_norm": 1207.779296875, "learning_rate": 2.4768e-05, "loss": 101.6074, "step": 15480 }, { "epoch": 0.0625815600544609, "grad_norm": 1002.058349609375, "learning_rate": 2.4784000000000005e-05, "loss": 159.0241, "step": 15490 }, { "epoch": 0.06262196131982854, "grad_norm": 1376.6588134765625, "learning_rate": 2.4800000000000003e-05, "loss": 129.3523, "step": 15500 }, { "epoch": 0.06266236258519617, "grad_norm": 7330.33447265625, "learning_rate": 2.4816e-05, "loss": 124.5064, "step": 15510 }, { "epoch": 0.0627027638505638, "grad_norm": 1264.3929443359375, "learning_rate": 2.4832000000000002e-05, "loss": 122.9997, "step": 15520 }, { "epoch": 0.06274316511593143, "grad_norm": 2313.05712890625, "learning_rate": 2.4848e-05, "loss": 127.3592, "step": 15530 }, { "epoch": 0.06278356638129906, "grad_norm": 813.3026123046875, "learning_rate": 2.4864000000000005e-05, "loss": 111.5016, "step": 15540 }, { "epoch": 0.06282396764666669, "grad_norm": 1202.1077880859375, "learning_rate": 2.4880000000000002e-05, "loss": 130.1115, "step": 15550 }, { "epoch": 0.06286436891203433, "grad_norm": 1124.310791015625, "learning_rate": 2.4896e-05, "loss": 113.2873, "step": 15560 }, { "epoch": 0.06290477017740195, "grad_norm": 635.4754638671875, "learning_rate": 2.4912e-05, "loss": 143.046, "step": 15570 }, { "epoch": 0.06294517144276959, "grad_norm": 721.7821655273438, "learning_rate": 2.4928000000000003e-05, "loss": 136.0892, "step": 15580 }, { "epoch": 0.06298557270813722, "grad_norm": 2478.818115234375, "learning_rate": 2.4944000000000004e-05, "loss": 192.2085, "step": 15590 }, { "epoch": 0.06302597397350485, "grad_norm": 1114.921630859375, "learning_rate": 2.496e-05, "loss": 176.0056, "step": 15600 }, { "epoch": 0.06306637523887249, "grad_norm": 1268.53076171875, "learning_rate": 2.4976e-05, "loss": 136.4438, "step": 15610 }, { "epoch": 0.06310677650424011, "grad_norm": 1675.6795654296875, "learning_rate": 2.4992000000000004e-05, "loss": 137.4553, "step": 15620 }, { "epoch": 0.06314717776960774, "grad_norm": 1147.9560546875, "learning_rate": 2.5008000000000002e-05, "loss": 138.8533, "step": 15630 }, { "epoch": 0.06318757903497538, "grad_norm": 875.7190551757812, "learning_rate": 2.5024000000000003e-05, "loss": 162.8821, "step": 15640 }, { "epoch": 0.063227980300343, "grad_norm": 0.0, "learning_rate": 2.504e-05, "loss": 114.5405, "step": 15650 }, { "epoch": 0.06326838156571064, "grad_norm": 867.88037109375, "learning_rate": 2.5056e-05, "loss": 150.2994, "step": 15660 }, { "epoch": 0.06330878283107827, "grad_norm": 1961.737548828125, "learning_rate": 2.5072000000000003e-05, "loss": 99.0726, "step": 15670 }, { "epoch": 0.0633491840964459, "grad_norm": 989.47265625, "learning_rate": 2.5088e-05, "loss": 161.8524, "step": 15680 }, { "epoch": 0.06338958536181354, "grad_norm": 1500.89697265625, "learning_rate": 2.5104000000000002e-05, "loss": 162.7008, "step": 15690 }, { "epoch": 0.06342998662718116, "grad_norm": 899.3739013671875, "learning_rate": 2.5120000000000003e-05, "loss": 129.2148, "step": 15700 }, { "epoch": 0.06347038789254879, "grad_norm": 458.4581298828125, "learning_rate": 2.5136e-05, "loss": 105.5956, "step": 15710 }, { "epoch": 0.06351078915791643, "grad_norm": 974.6414184570312, "learning_rate": 2.5152000000000002e-05, "loss": 156.5464, "step": 15720 }, { "epoch": 0.06355119042328405, "grad_norm": 2536.4658203125, "learning_rate": 2.5168e-05, "loss": 109.3993, "step": 15730 }, { "epoch": 0.0635915916886517, "grad_norm": 2482.249267578125, "learning_rate": 2.5184000000000005e-05, "loss": 140.73, "step": 15740 }, { "epoch": 0.06363199295401932, "grad_norm": 2715.053955078125, "learning_rate": 2.5200000000000003e-05, "loss": 146.4766, "step": 15750 }, { "epoch": 0.06367239421938695, "grad_norm": 877.90771484375, "learning_rate": 2.5216e-05, "loss": 122.6491, "step": 15760 }, { "epoch": 0.06371279548475459, "grad_norm": 808.40625, "learning_rate": 2.5232e-05, "loss": 138.159, "step": 15770 }, { "epoch": 0.06375319675012221, "grad_norm": 1289.734130859375, "learning_rate": 2.5248000000000003e-05, "loss": 138.8296, "step": 15780 }, { "epoch": 0.06379359801548984, "grad_norm": 2172.106201171875, "learning_rate": 2.5264000000000004e-05, "loss": 144.9031, "step": 15790 }, { "epoch": 0.06383399928085748, "grad_norm": 930.3880004882812, "learning_rate": 2.5280000000000002e-05, "loss": 160.7876, "step": 15800 }, { "epoch": 0.0638744005462251, "grad_norm": 1657.616455078125, "learning_rate": 2.5296e-05, "loss": 111.3719, "step": 15810 }, { "epoch": 0.06391480181159274, "grad_norm": 791.59765625, "learning_rate": 2.5312000000000004e-05, "loss": 83.945, "step": 15820 }, { "epoch": 0.06395520307696037, "grad_norm": 3115.993896484375, "learning_rate": 2.5328000000000002e-05, "loss": 122.3074, "step": 15830 }, { "epoch": 0.063995604342328, "grad_norm": 1407.626220703125, "learning_rate": 2.5344000000000003e-05, "loss": 87.4488, "step": 15840 }, { "epoch": 0.06403600560769564, "grad_norm": 1312.5579833984375, "learning_rate": 2.536e-05, "loss": 126.2786, "step": 15850 }, { "epoch": 0.06407640687306326, "grad_norm": 1124.7440185546875, "learning_rate": 2.5376e-05, "loss": 147.5409, "step": 15860 }, { "epoch": 0.06411680813843089, "grad_norm": 1031.2725830078125, "learning_rate": 2.5392000000000004e-05, "loss": 130.0712, "step": 15870 }, { "epoch": 0.06415720940379853, "grad_norm": 1127.12548828125, "learning_rate": 2.5408e-05, "loss": 86.7857, "step": 15880 }, { "epoch": 0.06419761066916616, "grad_norm": 1454.6871337890625, "learning_rate": 2.5424000000000003e-05, "loss": 225.9277, "step": 15890 }, { "epoch": 0.0642380119345338, "grad_norm": 680.7732543945312, "learning_rate": 2.5440000000000004e-05, "loss": 114.08, "step": 15900 }, { "epoch": 0.06427841319990142, "grad_norm": 1291.77685546875, "learning_rate": 2.5456e-05, "loss": 112.0439, "step": 15910 }, { "epoch": 0.06431881446526905, "grad_norm": 870.5249633789062, "learning_rate": 2.5472000000000003e-05, "loss": 90.3746, "step": 15920 }, { "epoch": 0.06435921573063669, "grad_norm": 966.7274169921875, "learning_rate": 2.5488e-05, "loss": 180.7504, "step": 15930 }, { "epoch": 0.06439961699600431, "grad_norm": 584.5242919921875, "learning_rate": 2.5504e-05, "loss": 93.3127, "step": 15940 }, { "epoch": 0.06444001826137194, "grad_norm": 684.8132934570312, "learning_rate": 2.5520000000000003e-05, "loss": 104.0317, "step": 15950 }, { "epoch": 0.06448041952673958, "grad_norm": 736.9375610351562, "learning_rate": 2.5536e-05, "loss": 130.1245, "step": 15960 }, { "epoch": 0.0645208207921072, "grad_norm": 1334.9195556640625, "learning_rate": 2.5552000000000002e-05, "loss": 158.7703, "step": 15970 }, { "epoch": 0.06456122205747485, "grad_norm": 1718.863525390625, "learning_rate": 2.5568000000000003e-05, "loss": 165.1619, "step": 15980 }, { "epoch": 0.06460162332284247, "grad_norm": 1156.9222412109375, "learning_rate": 2.5584e-05, "loss": 158.3269, "step": 15990 }, { "epoch": 0.0646420245882101, "grad_norm": 1795.82177734375, "learning_rate": 2.5600000000000002e-05, "loss": 182.6038, "step": 16000 }, { "epoch": 0.06468242585357774, "grad_norm": 9040.69921875, "learning_rate": 2.5616e-05, "loss": 197.5475, "step": 16010 }, { "epoch": 0.06472282711894536, "grad_norm": 2413.524658203125, "learning_rate": 2.5632000000000005e-05, "loss": 150.8373, "step": 16020 }, { "epoch": 0.06476322838431299, "grad_norm": 1966.7882080078125, "learning_rate": 2.5648000000000002e-05, "loss": 163.2591, "step": 16030 }, { "epoch": 0.06480362964968063, "grad_norm": 985.5374145507812, "learning_rate": 2.5664e-05, "loss": 167.9098, "step": 16040 }, { "epoch": 0.06484403091504826, "grad_norm": 1213.7303466796875, "learning_rate": 2.568e-05, "loss": 120.5761, "step": 16050 }, { "epoch": 0.0648844321804159, "grad_norm": 2820.966796875, "learning_rate": 2.5696e-05, "loss": 107.9031, "step": 16060 }, { "epoch": 0.06492483344578352, "grad_norm": 1509.03076171875, "learning_rate": 2.5712000000000004e-05, "loss": 147.851, "step": 16070 }, { "epoch": 0.06496523471115115, "grad_norm": 5092.55859375, "learning_rate": 2.5728e-05, "loss": 107.7917, "step": 16080 }, { "epoch": 0.06500563597651879, "grad_norm": 1273.028076171875, "learning_rate": 2.5744e-05, "loss": 135.458, "step": 16090 }, { "epoch": 0.06504603724188641, "grad_norm": 6080.85498046875, "learning_rate": 2.5760000000000004e-05, "loss": 138.0135, "step": 16100 }, { "epoch": 0.06508643850725404, "grad_norm": 1283.4329833984375, "learning_rate": 2.5776000000000002e-05, "loss": 113.1424, "step": 16110 }, { "epoch": 0.06512683977262168, "grad_norm": 2675.70166015625, "learning_rate": 2.5792000000000003e-05, "loss": 106.4951, "step": 16120 }, { "epoch": 0.0651672410379893, "grad_norm": 8512.3154296875, "learning_rate": 2.5808e-05, "loss": 168.7911, "step": 16130 }, { "epoch": 0.06520764230335695, "grad_norm": 1965.6282958984375, "learning_rate": 2.5824e-05, "loss": 193.8709, "step": 16140 }, { "epoch": 0.06524804356872457, "grad_norm": 893.007080078125, "learning_rate": 2.5840000000000003e-05, "loss": 117.2167, "step": 16150 }, { "epoch": 0.0652884448340922, "grad_norm": 1275.0057373046875, "learning_rate": 2.5856e-05, "loss": 113.1999, "step": 16160 }, { "epoch": 0.06532884609945984, "grad_norm": 1035.0467529296875, "learning_rate": 2.5872000000000002e-05, "loss": 157.7083, "step": 16170 }, { "epoch": 0.06536924736482747, "grad_norm": 1248.9114990234375, "learning_rate": 2.5888000000000004e-05, "loss": 112.5848, "step": 16180 }, { "epoch": 0.06540964863019509, "grad_norm": 1170.686767578125, "learning_rate": 2.5904e-05, "loss": 167.6834, "step": 16190 }, { "epoch": 0.06545004989556273, "grad_norm": 974.4927978515625, "learning_rate": 2.5920000000000003e-05, "loss": 119.0438, "step": 16200 }, { "epoch": 0.06549045116093036, "grad_norm": 1922.8992919921875, "learning_rate": 2.5936e-05, "loss": 171.7924, "step": 16210 }, { "epoch": 0.065530852426298, "grad_norm": 772.7111206054688, "learning_rate": 2.5952000000000005e-05, "loss": 123.9591, "step": 16220 }, { "epoch": 0.06557125369166562, "grad_norm": 1295.742431640625, "learning_rate": 2.5968000000000003e-05, "loss": 167.7593, "step": 16230 }, { "epoch": 0.06561165495703325, "grad_norm": 1234.3642578125, "learning_rate": 2.5984e-05, "loss": 122.131, "step": 16240 }, { "epoch": 0.06565205622240089, "grad_norm": 435.2963562011719, "learning_rate": 2.6000000000000002e-05, "loss": 104.5241, "step": 16250 }, { "epoch": 0.06569245748776852, "grad_norm": 489.53704833984375, "learning_rate": 2.6016e-05, "loss": 190.6924, "step": 16260 }, { "epoch": 0.06573285875313614, "grad_norm": 646.9764404296875, "learning_rate": 2.6032000000000004e-05, "loss": 138.386, "step": 16270 }, { "epoch": 0.06577326001850378, "grad_norm": 3126.28369140625, "learning_rate": 2.6048000000000002e-05, "loss": 199.9261, "step": 16280 }, { "epoch": 0.06581366128387141, "grad_norm": 879.2015991210938, "learning_rate": 2.6064e-05, "loss": 125.0898, "step": 16290 }, { "epoch": 0.06585406254923905, "grad_norm": 1662.759033203125, "learning_rate": 2.6080000000000004e-05, "loss": 180.1892, "step": 16300 }, { "epoch": 0.06589446381460667, "grad_norm": 1649.497802734375, "learning_rate": 2.6096000000000002e-05, "loss": 115.2371, "step": 16310 }, { "epoch": 0.0659348650799743, "grad_norm": 923.7296142578125, "learning_rate": 2.6112000000000003e-05, "loss": 120.7811, "step": 16320 }, { "epoch": 0.06597526634534194, "grad_norm": 661.9632568359375, "learning_rate": 2.6128e-05, "loss": 147.1547, "step": 16330 }, { "epoch": 0.06601566761070957, "grad_norm": 2440.39892578125, "learning_rate": 2.6144e-05, "loss": 134.8431, "step": 16340 }, { "epoch": 0.06605606887607719, "grad_norm": 1195.9329833984375, "learning_rate": 2.6160000000000004e-05, "loss": 198.3232, "step": 16350 }, { "epoch": 0.06609647014144483, "grad_norm": 674.6731567382812, "learning_rate": 2.6176e-05, "loss": 105.0343, "step": 16360 }, { "epoch": 0.06613687140681246, "grad_norm": 623.1482543945312, "learning_rate": 2.6192000000000003e-05, "loss": 110.9122, "step": 16370 }, { "epoch": 0.0661772726721801, "grad_norm": 1151.586181640625, "learning_rate": 2.6208000000000004e-05, "loss": 133.3356, "step": 16380 }, { "epoch": 0.06621767393754772, "grad_norm": 2473.778076171875, "learning_rate": 2.6224e-05, "loss": 129.0639, "step": 16390 }, { "epoch": 0.06625807520291535, "grad_norm": 1522.5828857421875, "learning_rate": 2.6240000000000003e-05, "loss": 101.4512, "step": 16400 }, { "epoch": 0.06629847646828299, "grad_norm": 3793.374755859375, "learning_rate": 2.6256e-05, "loss": 134.3651, "step": 16410 }, { "epoch": 0.06633887773365062, "grad_norm": 1190.8206787109375, "learning_rate": 2.6272000000000005e-05, "loss": 120.1487, "step": 16420 }, { "epoch": 0.06637927899901824, "grad_norm": 0.0, "learning_rate": 2.6288000000000003e-05, "loss": 71.3246, "step": 16430 }, { "epoch": 0.06641968026438588, "grad_norm": 746.3397216796875, "learning_rate": 2.6304e-05, "loss": 102.9548, "step": 16440 }, { "epoch": 0.06646008152975351, "grad_norm": 505.4109802246094, "learning_rate": 2.6320000000000002e-05, "loss": 109.599, "step": 16450 }, { "epoch": 0.06650048279512115, "grad_norm": 466.3865661621094, "learning_rate": 2.6336e-05, "loss": 121.0319, "step": 16460 }, { "epoch": 0.06654088406048878, "grad_norm": 1312.0987548828125, "learning_rate": 2.6352000000000005e-05, "loss": 124.488, "step": 16470 }, { "epoch": 0.0665812853258564, "grad_norm": 984.8582763671875, "learning_rate": 2.6368000000000002e-05, "loss": 99.0475, "step": 16480 }, { "epoch": 0.06662168659122404, "grad_norm": 1430.27001953125, "learning_rate": 2.6384e-05, "loss": 100.6167, "step": 16490 }, { "epoch": 0.06666208785659167, "grad_norm": 986.4356079101562, "learning_rate": 2.6400000000000005e-05, "loss": 130.1774, "step": 16500 }, { "epoch": 0.0667024891219593, "grad_norm": 684.648193359375, "learning_rate": 2.6416000000000003e-05, "loss": 127.4885, "step": 16510 }, { "epoch": 0.06674289038732693, "grad_norm": 866.86474609375, "learning_rate": 2.6432000000000004e-05, "loss": 134.0104, "step": 16520 }, { "epoch": 0.06678329165269456, "grad_norm": 761.9561157226562, "learning_rate": 2.6448e-05, "loss": 117.3864, "step": 16530 }, { "epoch": 0.0668236929180622, "grad_norm": 0.0, "learning_rate": 2.6464e-05, "loss": 89.1632, "step": 16540 }, { "epoch": 0.06686409418342983, "grad_norm": 1040.6204833984375, "learning_rate": 2.6480000000000004e-05, "loss": 147.5284, "step": 16550 }, { "epoch": 0.06690449544879745, "grad_norm": 1022.326171875, "learning_rate": 2.6496000000000002e-05, "loss": 119.413, "step": 16560 }, { "epoch": 0.06694489671416509, "grad_norm": 641.7581176757812, "learning_rate": 2.6512e-05, "loss": 118.3933, "step": 16570 }, { "epoch": 0.06698529797953272, "grad_norm": 1253.9312744140625, "learning_rate": 2.6528000000000004e-05, "loss": 125.6518, "step": 16580 }, { "epoch": 0.06702569924490034, "grad_norm": 2859.3115234375, "learning_rate": 2.6544000000000002e-05, "loss": 151.1297, "step": 16590 }, { "epoch": 0.06706610051026798, "grad_norm": 643.5469360351562, "learning_rate": 2.6560000000000003e-05, "loss": 150.4048, "step": 16600 }, { "epoch": 0.06710650177563561, "grad_norm": 457.0594177246094, "learning_rate": 2.6576e-05, "loss": 117.0777, "step": 16610 }, { "epoch": 0.06714690304100325, "grad_norm": 688.000244140625, "learning_rate": 2.6592e-05, "loss": 156.2777, "step": 16620 }, { "epoch": 0.06718730430637088, "grad_norm": 1273.36572265625, "learning_rate": 2.6608000000000003e-05, "loss": 219.1801, "step": 16630 }, { "epoch": 0.0672277055717385, "grad_norm": 1638.51708984375, "learning_rate": 2.6624e-05, "loss": 185.9331, "step": 16640 }, { "epoch": 0.06726810683710614, "grad_norm": 851.9219970703125, "learning_rate": 2.6640000000000002e-05, "loss": 118.9226, "step": 16650 }, { "epoch": 0.06730850810247377, "grad_norm": 1012.8102416992188, "learning_rate": 2.6656e-05, "loss": 117.7325, "step": 16660 }, { "epoch": 0.0673489093678414, "grad_norm": 752.0963745117188, "learning_rate": 2.6672e-05, "loss": 129.8979, "step": 16670 }, { "epoch": 0.06738931063320903, "grad_norm": 652.9700927734375, "learning_rate": 2.6688000000000003e-05, "loss": 126.5764, "step": 16680 }, { "epoch": 0.06742971189857666, "grad_norm": 498.1106872558594, "learning_rate": 2.6704e-05, "loss": 118.9791, "step": 16690 }, { "epoch": 0.0674701131639443, "grad_norm": 660.4872436523438, "learning_rate": 2.6720000000000005e-05, "loss": 117.8117, "step": 16700 }, { "epoch": 0.06751051442931193, "grad_norm": 773.5398559570312, "learning_rate": 2.6736000000000003e-05, "loss": 126.5417, "step": 16710 }, { "epoch": 0.06755091569467955, "grad_norm": 701.9429321289062, "learning_rate": 2.6752e-05, "loss": 121.2665, "step": 16720 }, { "epoch": 0.06759131696004719, "grad_norm": 1298.2679443359375, "learning_rate": 2.6768000000000002e-05, "loss": 134.9247, "step": 16730 }, { "epoch": 0.06763171822541482, "grad_norm": 733.1492919921875, "learning_rate": 2.6784e-05, "loss": 104.2357, "step": 16740 }, { "epoch": 0.06767211949078245, "grad_norm": 1005.172607421875, "learning_rate": 2.6800000000000004e-05, "loss": 110.5115, "step": 16750 }, { "epoch": 0.06771252075615009, "grad_norm": 897.8380737304688, "learning_rate": 2.6816000000000002e-05, "loss": 105.6745, "step": 16760 }, { "epoch": 0.06775292202151771, "grad_norm": 891.0289916992188, "learning_rate": 2.6832e-05, "loss": 127.119, "step": 16770 }, { "epoch": 0.06779332328688535, "grad_norm": 1331.1082763671875, "learning_rate": 2.6848000000000005e-05, "loss": 146.7941, "step": 16780 }, { "epoch": 0.06783372455225298, "grad_norm": 2302.458740234375, "learning_rate": 2.6864000000000002e-05, "loss": 147.356, "step": 16790 }, { "epoch": 0.0678741258176206, "grad_norm": 1848.271240234375, "learning_rate": 2.6880000000000004e-05, "loss": 140.58, "step": 16800 }, { "epoch": 0.06791452708298824, "grad_norm": 1468.3052978515625, "learning_rate": 2.6896e-05, "loss": 200.7853, "step": 16810 }, { "epoch": 0.06795492834835587, "grad_norm": 1507.734130859375, "learning_rate": 2.6912e-05, "loss": 133.0001, "step": 16820 }, { "epoch": 0.0679953296137235, "grad_norm": 1028.371337890625, "learning_rate": 2.6928000000000004e-05, "loss": 122.7306, "step": 16830 }, { "epoch": 0.06803573087909114, "grad_norm": 1497.8377685546875, "learning_rate": 2.6944e-05, "loss": 158.8021, "step": 16840 }, { "epoch": 0.06807613214445876, "grad_norm": 2305.415283203125, "learning_rate": 2.6960000000000003e-05, "loss": 159.5207, "step": 16850 }, { "epoch": 0.0681165334098264, "grad_norm": 2324.898193359375, "learning_rate": 2.6976e-05, "loss": 137.9295, "step": 16860 }, { "epoch": 0.06815693467519403, "grad_norm": 2173.98876953125, "learning_rate": 2.6992000000000002e-05, "loss": 158.221, "step": 16870 }, { "epoch": 0.06819733594056165, "grad_norm": 3569.12158203125, "learning_rate": 2.7008000000000003e-05, "loss": 145.5339, "step": 16880 }, { "epoch": 0.0682377372059293, "grad_norm": 1188.2027587890625, "learning_rate": 2.7024e-05, "loss": 114.8195, "step": 16890 }, { "epoch": 0.06827813847129692, "grad_norm": 1106.9266357421875, "learning_rate": 2.7040000000000005e-05, "loss": 169.873, "step": 16900 }, { "epoch": 0.06831853973666455, "grad_norm": 1759.7666015625, "learning_rate": 2.7056000000000003e-05, "loss": 115.8787, "step": 16910 }, { "epoch": 0.06835894100203219, "grad_norm": 3340.66162109375, "learning_rate": 2.7072e-05, "loss": 188.1911, "step": 16920 }, { "epoch": 0.06839934226739981, "grad_norm": 1076.770751953125, "learning_rate": 2.7088000000000002e-05, "loss": 132.0581, "step": 16930 }, { "epoch": 0.06843974353276745, "grad_norm": 516.5988159179688, "learning_rate": 2.7104e-05, "loss": 136.9663, "step": 16940 }, { "epoch": 0.06848014479813508, "grad_norm": 630.5826416015625, "learning_rate": 2.7120000000000005e-05, "loss": 99.7604, "step": 16950 }, { "epoch": 0.0685205460635027, "grad_norm": 1387.2393798828125, "learning_rate": 2.7136000000000002e-05, "loss": 161.1518, "step": 16960 }, { "epoch": 0.06856094732887034, "grad_norm": 2331.400390625, "learning_rate": 2.7152e-05, "loss": 170.4191, "step": 16970 }, { "epoch": 0.06860134859423797, "grad_norm": 812.1047973632812, "learning_rate": 2.7168000000000005e-05, "loss": 143.006, "step": 16980 }, { "epoch": 0.0686417498596056, "grad_norm": 2469.12255859375, "learning_rate": 2.7184000000000003e-05, "loss": 165.8849, "step": 16990 }, { "epoch": 0.06868215112497324, "grad_norm": 964.4491577148438, "learning_rate": 2.7200000000000004e-05, "loss": 244.0513, "step": 17000 }, { "epoch": 0.06872255239034086, "grad_norm": 1405.3626708984375, "learning_rate": 2.7216e-05, "loss": 119.3549, "step": 17010 }, { "epoch": 0.0687629536557085, "grad_norm": 0.0, "learning_rate": 2.7232e-05, "loss": 117.0388, "step": 17020 }, { "epoch": 0.06880335492107613, "grad_norm": 1727.89794921875, "learning_rate": 2.7248000000000004e-05, "loss": 164.1464, "step": 17030 }, { "epoch": 0.06884375618644376, "grad_norm": 1437.021728515625, "learning_rate": 2.7264000000000002e-05, "loss": 156.7928, "step": 17040 }, { "epoch": 0.0688841574518114, "grad_norm": 710.775634765625, "learning_rate": 2.7280000000000003e-05, "loss": 141.683, "step": 17050 }, { "epoch": 0.06892455871717902, "grad_norm": 879.23193359375, "learning_rate": 2.7296e-05, "loss": 115.3712, "step": 17060 }, { "epoch": 0.06896495998254665, "grad_norm": 1486.0762939453125, "learning_rate": 2.7312000000000002e-05, "loss": 107.1092, "step": 17070 }, { "epoch": 0.06900536124791429, "grad_norm": 1041.4608154296875, "learning_rate": 2.7328000000000003e-05, "loss": 93.719, "step": 17080 }, { "epoch": 0.06904576251328191, "grad_norm": 641.7068481445312, "learning_rate": 2.7344e-05, "loss": 94.0493, "step": 17090 }, { "epoch": 0.06908616377864955, "grad_norm": 1755.1153564453125, "learning_rate": 2.7360000000000006e-05, "loss": 127.0733, "step": 17100 }, { "epoch": 0.06912656504401718, "grad_norm": 659.0975952148438, "learning_rate": 2.7376000000000004e-05, "loss": 139.6171, "step": 17110 }, { "epoch": 0.0691669663093848, "grad_norm": 1058.2906494140625, "learning_rate": 2.7392e-05, "loss": 82.3711, "step": 17120 }, { "epoch": 0.06920736757475245, "grad_norm": 1285.0167236328125, "learning_rate": 2.7408000000000003e-05, "loss": 124.321, "step": 17130 }, { "epoch": 0.06924776884012007, "grad_norm": 1735.9765625, "learning_rate": 2.7424e-05, "loss": 125.6664, "step": 17140 }, { "epoch": 0.0692881701054877, "grad_norm": 9220.4453125, "learning_rate": 2.7440000000000005e-05, "loss": 156.938, "step": 17150 }, { "epoch": 0.06932857137085534, "grad_norm": 2777.53759765625, "learning_rate": 2.7456000000000003e-05, "loss": 115.4183, "step": 17160 }, { "epoch": 0.06936897263622296, "grad_norm": 1371.195068359375, "learning_rate": 2.7472e-05, "loss": 116.4201, "step": 17170 }, { "epoch": 0.0694093739015906, "grad_norm": 10435.0751953125, "learning_rate": 2.7488000000000005e-05, "loss": 142.1004, "step": 17180 }, { "epoch": 0.06944977516695823, "grad_norm": 1178.654296875, "learning_rate": 2.7504000000000003e-05, "loss": 181.0073, "step": 17190 }, { "epoch": 0.06949017643232586, "grad_norm": 654.7691040039062, "learning_rate": 2.752e-05, "loss": 93.7593, "step": 17200 }, { "epoch": 0.0695305776976935, "grad_norm": 1095.8475341796875, "learning_rate": 2.7536000000000002e-05, "loss": 109.7081, "step": 17210 }, { "epoch": 0.06957097896306112, "grad_norm": 2671.902587890625, "learning_rate": 2.7552e-05, "loss": 144.6699, "step": 17220 }, { "epoch": 0.06961138022842875, "grad_norm": 1229.1751708984375, "learning_rate": 2.7568000000000004e-05, "loss": 138.7873, "step": 17230 }, { "epoch": 0.06965178149379639, "grad_norm": 1349.5089111328125, "learning_rate": 2.7584000000000002e-05, "loss": 94.9966, "step": 17240 }, { "epoch": 0.06969218275916401, "grad_norm": 676.7572021484375, "learning_rate": 2.76e-05, "loss": 121.9994, "step": 17250 }, { "epoch": 0.06973258402453165, "grad_norm": 512.7169189453125, "learning_rate": 2.7616e-05, "loss": 80.4757, "step": 17260 }, { "epoch": 0.06977298528989928, "grad_norm": 591.3970947265625, "learning_rate": 2.7632000000000002e-05, "loss": 120.3442, "step": 17270 }, { "epoch": 0.06981338655526691, "grad_norm": 0.0, "learning_rate": 2.7648000000000004e-05, "loss": 111.9648, "step": 17280 }, { "epoch": 0.06985378782063455, "grad_norm": 904.6334228515625, "learning_rate": 2.7664e-05, "loss": 164.6575, "step": 17290 }, { "epoch": 0.06989418908600217, "grad_norm": 2712.48291015625, "learning_rate": 2.768e-05, "loss": 103.8473, "step": 17300 }, { "epoch": 0.0699345903513698, "grad_norm": 975.9140625, "learning_rate": 2.7696000000000004e-05, "loss": 132.7137, "step": 17310 }, { "epoch": 0.06997499161673744, "grad_norm": 844.25537109375, "learning_rate": 2.7712e-05, "loss": 134.3907, "step": 17320 }, { "epoch": 0.07001539288210507, "grad_norm": 558.0436401367188, "learning_rate": 2.7728000000000003e-05, "loss": 123.1448, "step": 17330 }, { "epoch": 0.0700557941474727, "grad_norm": 1072.6103515625, "learning_rate": 2.7744e-05, "loss": 133.5322, "step": 17340 }, { "epoch": 0.07009619541284033, "grad_norm": 605.5867919921875, "learning_rate": 2.7760000000000002e-05, "loss": 141.9054, "step": 17350 }, { "epoch": 0.07013659667820796, "grad_norm": 891.19384765625, "learning_rate": 2.7776000000000003e-05, "loss": 141.5447, "step": 17360 }, { "epoch": 0.0701769979435756, "grad_norm": 2915.88818359375, "learning_rate": 2.7792e-05, "loss": 171.8047, "step": 17370 }, { "epoch": 0.07021739920894322, "grad_norm": 1861.5186767578125, "learning_rate": 2.7808000000000005e-05, "loss": 164.1231, "step": 17380 }, { "epoch": 0.07025780047431085, "grad_norm": 998.0330810546875, "learning_rate": 2.7824000000000003e-05, "loss": 115.6295, "step": 17390 }, { "epoch": 0.07029820173967849, "grad_norm": 1189.415771484375, "learning_rate": 2.784e-05, "loss": 70.8924, "step": 17400 }, { "epoch": 0.07033860300504612, "grad_norm": 1055.349853515625, "learning_rate": 2.7856000000000002e-05, "loss": 190.4664, "step": 17410 }, { "epoch": 0.07037900427041376, "grad_norm": 1523.332275390625, "learning_rate": 2.7872e-05, "loss": 143.7663, "step": 17420 }, { "epoch": 0.07041940553578138, "grad_norm": 1106.2191162109375, "learning_rate": 2.7888000000000005e-05, "loss": 112.4859, "step": 17430 }, { "epoch": 0.07045980680114901, "grad_norm": 1430.1102294921875, "learning_rate": 2.7904000000000003e-05, "loss": 111.35, "step": 17440 }, { "epoch": 0.07050020806651665, "grad_norm": 1060.981201171875, "learning_rate": 2.792e-05, "loss": 89.6806, "step": 17450 }, { "epoch": 0.07054060933188427, "grad_norm": 2935.448486328125, "learning_rate": 2.7936e-05, "loss": 133.8329, "step": 17460 }, { "epoch": 0.0705810105972519, "grad_norm": 2200.762451171875, "learning_rate": 2.7952000000000003e-05, "loss": 145.5165, "step": 17470 }, { "epoch": 0.07062141186261954, "grad_norm": 1097.909423828125, "learning_rate": 2.7968000000000004e-05, "loss": 123.1423, "step": 17480 }, { "epoch": 0.07066181312798717, "grad_norm": 2040.2109375, "learning_rate": 2.7984000000000002e-05, "loss": 112.1275, "step": 17490 }, { "epoch": 0.0707022143933548, "grad_norm": 645.7501831054688, "learning_rate": 2.8e-05, "loss": 110.6064, "step": 17500 }, { "epoch": 0.07074261565872243, "grad_norm": 13496.923828125, "learning_rate": 2.8016000000000004e-05, "loss": 167.6134, "step": 17510 }, { "epoch": 0.07078301692409006, "grad_norm": 1220.3197021484375, "learning_rate": 2.8032000000000002e-05, "loss": 91.5186, "step": 17520 }, { "epoch": 0.0708234181894577, "grad_norm": 1471.6046142578125, "learning_rate": 2.8048000000000003e-05, "loss": 123.2698, "step": 17530 }, { "epoch": 0.07086381945482532, "grad_norm": 565.1813354492188, "learning_rate": 2.8064e-05, "loss": 108.927, "step": 17540 }, { "epoch": 0.07090422072019295, "grad_norm": 1448.7821044921875, "learning_rate": 2.8080000000000002e-05, "loss": 195.6281, "step": 17550 }, { "epoch": 0.07094462198556059, "grad_norm": 864.647216796875, "learning_rate": 2.8096000000000003e-05, "loss": 164.469, "step": 17560 }, { "epoch": 0.07098502325092822, "grad_norm": 605.3051147460938, "learning_rate": 2.8112e-05, "loss": 86.6841, "step": 17570 }, { "epoch": 0.07102542451629586, "grad_norm": 816.9525756835938, "learning_rate": 2.8128000000000006e-05, "loss": 132.6632, "step": 17580 }, { "epoch": 0.07106582578166348, "grad_norm": 1012.05419921875, "learning_rate": 2.8144000000000004e-05, "loss": 124.3037, "step": 17590 }, { "epoch": 0.07110622704703111, "grad_norm": 748.9439086914062, "learning_rate": 2.816e-05, "loss": 181.5007, "step": 17600 }, { "epoch": 0.07114662831239875, "grad_norm": 2104.114990234375, "learning_rate": 2.8176000000000003e-05, "loss": 130.2049, "step": 17610 }, { "epoch": 0.07118702957776638, "grad_norm": 0.0, "learning_rate": 2.8192e-05, "loss": 148.4061, "step": 17620 }, { "epoch": 0.071227430843134, "grad_norm": 751.892578125, "learning_rate": 2.8208000000000005e-05, "loss": 99.2189, "step": 17630 }, { "epoch": 0.07126783210850164, "grad_norm": 829.3687744140625, "learning_rate": 2.8224000000000003e-05, "loss": 100.3081, "step": 17640 }, { "epoch": 0.07130823337386927, "grad_norm": 1362.5450439453125, "learning_rate": 2.824e-05, "loss": 163.4869, "step": 17650 }, { "epoch": 0.07134863463923691, "grad_norm": 736.9421997070312, "learning_rate": 2.8256000000000002e-05, "loss": 114.6114, "step": 17660 }, { "epoch": 0.07138903590460453, "grad_norm": 0.0, "learning_rate": 2.8272000000000003e-05, "loss": 152.363, "step": 17670 }, { "epoch": 0.07142943716997216, "grad_norm": 1884.3018798828125, "learning_rate": 2.8288000000000004e-05, "loss": 181.776, "step": 17680 }, { "epoch": 0.0714698384353398, "grad_norm": 702.0150756835938, "learning_rate": 2.8304000000000002e-05, "loss": 138.0082, "step": 17690 }, { "epoch": 0.07151023970070743, "grad_norm": 1243.4521484375, "learning_rate": 2.832e-05, "loss": 126.1238, "step": 17700 }, { "epoch": 0.07155064096607505, "grad_norm": 2849.708740234375, "learning_rate": 2.8336000000000004e-05, "loss": 124.8339, "step": 17710 }, { "epoch": 0.07159104223144269, "grad_norm": 814.1820068359375, "learning_rate": 2.8352000000000002e-05, "loss": 100.6486, "step": 17720 }, { "epoch": 0.07163144349681032, "grad_norm": 1201.3582763671875, "learning_rate": 2.8368000000000004e-05, "loss": 145.7217, "step": 17730 }, { "epoch": 0.07167184476217796, "grad_norm": 571.5975952148438, "learning_rate": 2.8384e-05, "loss": 103.9509, "step": 17740 }, { "epoch": 0.07171224602754558, "grad_norm": 896.2073364257812, "learning_rate": 2.8400000000000003e-05, "loss": 155.8821, "step": 17750 }, { "epoch": 0.07175264729291321, "grad_norm": 962.5536499023438, "learning_rate": 2.8416000000000004e-05, "loss": 92.6593, "step": 17760 }, { "epoch": 0.07179304855828085, "grad_norm": 1393.9190673828125, "learning_rate": 2.8432e-05, "loss": 168.3829, "step": 17770 }, { "epoch": 0.07183344982364848, "grad_norm": 672.989501953125, "learning_rate": 2.8448000000000006e-05, "loss": 158.9301, "step": 17780 }, { "epoch": 0.0718738510890161, "grad_norm": 687.2365112304688, "learning_rate": 2.8464000000000004e-05, "loss": 79.7871, "step": 17790 }, { "epoch": 0.07191425235438374, "grad_norm": 738.2789916992188, "learning_rate": 2.8480000000000002e-05, "loss": 108.1994, "step": 17800 }, { "epoch": 0.07195465361975137, "grad_norm": 1095.77099609375, "learning_rate": 2.8496000000000003e-05, "loss": 106.8416, "step": 17810 }, { "epoch": 0.07199505488511901, "grad_norm": 326.4542236328125, "learning_rate": 2.8512e-05, "loss": 104.4799, "step": 17820 }, { "epoch": 0.07203545615048663, "grad_norm": 2892.407958984375, "learning_rate": 2.8528e-05, "loss": 94.0657, "step": 17830 }, { "epoch": 0.07207585741585426, "grad_norm": 1634.0128173828125, "learning_rate": 2.8544000000000003e-05, "loss": 119.6491, "step": 17840 }, { "epoch": 0.0721162586812219, "grad_norm": 2394.59033203125, "learning_rate": 2.856e-05, "loss": 160.5872, "step": 17850 }, { "epoch": 0.07215665994658953, "grad_norm": 995.382080078125, "learning_rate": 2.8576000000000002e-05, "loss": 89.2815, "step": 17860 }, { "epoch": 0.07219706121195715, "grad_norm": 562.4957275390625, "learning_rate": 2.8592000000000003e-05, "loss": 117.9278, "step": 17870 }, { "epoch": 0.0722374624773248, "grad_norm": 1529.1700439453125, "learning_rate": 2.8608e-05, "loss": 144.6818, "step": 17880 }, { "epoch": 0.07227786374269242, "grad_norm": 1666.5615234375, "learning_rate": 2.8624000000000002e-05, "loss": 154.7127, "step": 17890 }, { "epoch": 0.07231826500806006, "grad_norm": 1106.76416015625, "learning_rate": 2.864e-05, "loss": 121.5894, "step": 17900 }, { "epoch": 0.07235866627342769, "grad_norm": 1048.846435546875, "learning_rate": 2.8656000000000005e-05, "loss": 118.475, "step": 17910 }, { "epoch": 0.07239906753879531, "grad_norm": 1723.060546875, "learning_rate": 2.8672000000000003e-05, "loss": 81.5143, "step": 17920 }, { "epoch": 0.07243946880416295, "grad_norm": 827.5028076171875, "learning_rate": 2.8688e-05, "loss": 120.4572, "step": 17930 }, { "epoch": 0.07247987006953058, "grad_norm": 2439.23193359375, "learning_rate": 2.8704e-05, "loss": 150.0546, "step": 17940 }, { "epoch": 0.0725202713348982, "grad_norm": 1679.714111328125, "learning_rate": 2.8720000000000003e-05, "loss": 113.1306, "step": 17950 }, { "epoch": 0.07256067260026584, "grad_norm": 1051.8074951171875, "learning_rate": 2.8736000000000004e-05, "loss": 175.98, "step": 17960 }, { "epoch": 0.07260107386563347, "grad_norm": 708.1478881835938, "learning_rate": 2.8752000000000002e-05, "loss": 120.6407, "step": 17970 }, { "epoch": 0.07264147513100111, "grad_norm": 1140.8126220703125, "learning_rate": 2.8768e-05, "loss": 100.2487, "step": 17980 }, { "epoch": 0.07268187639636874, "grad_norm": 1086.7423095703125, "learning_rate": 2.8784000000000004e-05, "loss": 88.6672, "step": 17990 }, { "epoch": 0.07272227766173636, "grad_norm": 2896.58935546875, "learning_rate": 2.8800000000000002e-05, "loss": 123.9438, "step": 18000 }, { "epoch": 0.072762678927104, "grad_norm": 781.7880249023438, "learning_rate": 2.8816000000000003e-05, "loss": 128.9763, "step": 18010 }, { "epoch": 0.07280308019247163, "grad_norm": 1596.1004638671875, "learning_rate": 2.8832e-05, "loss": 103.9027, "step": 18020 }, { "epoch": 0.07284348145783925, "grad_norm": 1522.1275634765625, "learning_rate": 2.8848e-05, "loss": 82.2587, "step": 18030 }, { "epoch": 0.0728838827232069, "grad_norm": 1198.4852294921875, "learning_rate": 2.8864000000000004e-05, "loss": 78.4048, "step": 18040 }, { "epoch": 0.07292428398857452, "grad_norm": 1041.67578125, "learning_rate": 2.888e-05, "loss": 130.7603, "step": 18050 }, { "epoch": 0.07296468525394216, "grad_norm": 1674.17919921875, "learning_rate": 2.8896000000000003e-05, "loss": 133.4344, "step": 18060 }, { "epoch": 0.07300508651930979, "grad_norm": 1320.9654541015625, "learning_rate": 2.8912000000000004e-05, "loss": 101.3308, "step": 18070 }, { "epoch": 0.07304548778467741, "grad_norm": 2332.47265625, "learning_rate": 2.8928e-05, "loss": 104.6813, "step": 18080 }, { "epoch": 0.07308588905004505, "grad_norm": 677.6102294921875, "learning_rate": 2.8944000000000003e-05, "loss": 111.6524, "step": 18090 }, { "epoch": 0.07312629031541268, "grad_norm": 446.9483337402344, "learning_rate": 2.896e-05, "loss": 79.7397, "step": 18100 }, { "epoch": 0.0731666915807803, "grad_norm": 903.9324951171875, "learning_rate": 2.8976000000000005e-05, "loss": 106.6063, "step": 18110 }, { "epoch": 0.07320709284614794, "grad_norm": 825.9483642578125, "learning_rate": 2.8992000000000003e-05, "loss": 119.8426, "step": 18120 }, { "epoch": 0.07324749411151557, "grad_norm": 871.2711791992188, "learning_rate": 2.9008e-05, "loss": 147.6799, "step": 18130 }, { "epoch": 0.07328789537688321, "grad_norm": 1512.626220703125, "learning_rate": 2.9024000000000002e-05, "loss": 100.2563, "step": 18140 }, { "epoch": 0.07332829664225084, "grad_norm": 2183.07421875, "learning_rate": 2.9040000000000003e-05, "loss": 128.243, "step": 18150 }, { "epoch": 0.07336869790761846, "grad_norm": 577.5823974609375, "learning_rate": 2.9056000000000004e-05, "loss": 104.5627, "step": 18160 }, { "epoch": 0.0734090991729861, "grad_norm": 1346.950927734375, "learning_rate": 2.9072000000000002e-05, "loss": 131.9779, "step": 18170 }, { "epoch": 0.07344950043835373, "grad_norm": 897.385498046875, "learning_rate": 2.9088e-05, "loss": 139.202, "step": 18180 }, { "epoch": 0.07348990170372136, "grad_norm": 957.313232421875, "learning_rate": 2.9104000000000005e-05, "loss": 129.68, "step": 18190 }, { "epoch": 0.073530302969089, "grad_norm": 644.3162231445312, "learning_rate": 2.9120000000000002e-05, "loss": 126.1235, "step": 18200 }, { "epoch": 0.07357070423445662, "grad_norm": 812.66943359375, "learning_rate": 2.9136000000000004e-05, "loss": 144.1799, "step": 18210 }, { "epoch": 0.07361110549982426, "grad_norm": 1055.3414306640625, "learning_rate": 2.9152e-05, "loss": 129.42, "step": 18220 }, { "epoch": 0.07365150676519189, "grad_norm": 2815.364013671875, "learning_rate": 2.9168e-05, "loss": 130.298, "step": 18230 }, { "epoch": 0.07369190803055951, "grad_norm": 2299.622314453125, "learning_rate": 2.9184000000000004e-05, "loss": 153.6496, "step": 18240 }, { "epoch": 0.07373230929592715, "grad_norm": 1223.2135009765625, "learning_rate": 2.92e-05, "loss": 131.9549, "step": 18250 }, { "epoch": 0.07377271056129478, "grad_norm": 800.2896728515625, "learning_rate": 2.9216000000000003e-05, "loss": 96.7813, "step": 18260 }, { "epoch": 0.0738131118266624, "grad_norm": 1550.8492431640625, "learning_rate": 2.9232000000000004e-05, "loss": 134.3318, "step": 18270 }, { "epoch": 0.07385351309203005, "grad_norm": 1408.572021484375, "learning_rate": 2.9248000000000002e-05, "loss": 169.4472, "step": 18280 }, { "epoch": 0.07389391435739767, "grad_norm": 775.6466674804688, "learning_rate": 2.9264000000000003e-05, "loss": 139.8296, "step": 18290 }, { "epoch": 0.0739343156227653, "grad_norm": 3935.3876953125, "learning_rate": 2.928e-05, "loss": 197.3509, "step": 18300 }, { "epoch": 0.07397471688813294, "grad_norm": 1000.7813720703125, "learning_rate": 2.9296000000000005e-05, "loss": 124.4853, "step": 18310 }, { "epoch": 0.07401511815350056, "grad_norm": 746.0226440429688, "learning_rate": 2.9312000000000003e-05, "loss": 139.8521, "step": 18320 }, { "epoch": 0.0740555194188682, "grad_norm": 1989.065673828125, "learning_rate": 2.9328e-05, "loss": 163.3705, "step": 18330 }, { "epoch": 0.07409592068423583, "grad_norm": 1917.332763671875, "learning_rate": 2.9344000000000002e-05, "loss": 127.1422, "step": 18340 }, { "epoch": 0.07413632194960346, "grad_norm": 1084.9027099609375, "learning_rate": 2.9360000000000003e-05, "loss": 67.5235, "step": 18350 }, { "epoch": 0.0741767232149711, "grad_norm": 629.862548828125, "learning_rate": 2.9376000000000005e-05, "loss": 92.0765, "step": 18360 }, { "epoch": 0.07421712448033872, "grad_norm": 772.4987182617188, "learning_rate": 2.9392000000000003e-05, "loss": 188.4281, "step": 18370 }, { "epoch": 0.07425752574570635, "grad_norm": 977.8203125, "learning_rate": 2.9408e-05, "loss": 112.2548, "step": 18380 }, { "epoch": 0.07429792701107399, "grad_norm": 996.1343383789062, "learning_rate": 2.9424000000000005e-05, "loss": 125.9146, "step": 18390 }, { "epoch": 0.07433832827644161, "grad_norm": 1767.5286865234375, "learning_rate": 2.9440000000000003e-05, "loss": 121.1109, "step": 18400 }, { "epoch": 0.07437872954180925, "grad_norm": 4251.8291015625, "learning_rate": 2.9456000000000004e-05, "loss": 177.3741, "step": 18410 }, { "epoch": 0.07441913080717688, "grad_norm": 923.5038452148438, "learning_rate": 2.9472000000000002e-05, "loss": 98.2529, "step": 18420 }, { "epoch": 0.07445953207254451, "grad_norm": 1615.978271484375, "learning_rate": 2.9488e-05, "loss": 139.2994, "step": 18430 }, { "epoch": 0.07449993333791215, "grad_norm": 1293.6497802734375, "learning_rate": 2.9504000000000004e-05, "loss": 67.7334, "step": 18440 }, { "epoch": 0.07454033460327977, "grad_norm": 952.5794677734375, "learning_rate": 2.9520000000000002e-05, "loss": 98.4205, "step": 18450 }, { "epoch": 0.0745807358686474, "grad_norm": 843.2684936523438, "learning_rate": 2.9536e-05, "loss": 133.0427, "step": 18460 }, { "epoch": 0.07462113713401504, "grad_norm": 1032.7259521484375, "learning_rate": 2.9552000000000004e-05, "loss": 115.7002, "step": 18470 }, { "epoch": 0.07466153839938267, "grad_norm": 709.962890625, "learning_rate": 2.9568000000000002e-05, "loss": 93.0021, "step": 18480 }, { "epoch": 0.0747019396647503, "grad_norm": 1049.282470703125, "learning_rate": 2.9584000000000003e-05, "loss": 115.9874, "step": 18490 }, { "epoch": 0.07474234093011793, "grad_norm": 946.2427978515625, "learning_rate": 2.96e-05, "loss": 103.3239, "step": 18500 }, { "epoch": 0.07478274219548556, "grad_norm": 813.2142333984375, "learning_rate": 2.9616e-05, "loss": 96.9168, "step": 18510 }, { "epoch": 0.0748231434608532, "grad_norm": 702.8202514648438, "learning_rate": 2.9632000000000004e-05, "loss": 96.5812, "step": 18520 }, { "epoch": 0.07486354472622082, "grad_norm": 995.9627685546875, "learning_rate": 2.9648e-05, "loss": 97.2412, "step": 18530 }, { "epoch": 0.07490394599158845, "grad_norm": 3526.63671875, "learning_rate": 2.9664000000000003e-05, "loss": 192.7317, "step": 18540 }, { "epoch": 0.07494434725695609, "grad_norm": 455.59088134765625, "learning_rate": 2.9680000000000004e-05, "loss": 131.787, "step": 18550 }, { "epoch": 0.07498474852232372, "grad_norm": 1115.4947509765625, "learning_rate": 2.9696e-05, "loss": 86.6757, "step": 18560 }, { "epoch": 0.07502514978769136, "grad_norm": 947.726318359375, "learning_rate": 2.9712000000000003e-05, "loss": 96.3201, "step": 18570 }, { "epoch": 0.07506555105305898, "grad_norm": 1747.0982666015625, "learning_rate": 2.9728e-05, "loss": 120.4169, "step": 18580 }, { "epoch": 0.07510595231842661, "grad_norm": 678.9027099609375, "learning_rate": 2.9744000000000005e-05, "loss": 72.0365, "step": 18590 }, { "epoch": 0.07514635358379425, "grad_norm": 1209.59326171875, "learning_rate": 2.9760000000000003e-05, "loss": 199.8874, "step": 18600 }, { "epoch": 0.07518675484916187, "grad_norm": 658.3634643554688, "learning_rate": 2.9776e-05, "loss": 175.686, "step": 18610 }, { "epoch": 0.0752271561145295, "grad_norm": 748.1829833984375, "learning_rate": 2.9792000000000002e-05, "loss": 145.6304, "step": 18620 }, { "epoch": 0.07526755737989714, "grad_norm": 693.3621215820312, "learning_rate": 2.9808e-05, "loss": 120.1813, "step": 18630 }, { "epoch": 0.07530795864526477, "grad_norm": 731.7698364257812, "learning_rate": 2.9824000000000004e-05, "loss": 167.9795, "step": 18640 }, { "epoch": 0.0753483599106324, "grad_norm": 813.2443237304688, "learning_rate": 2.9840000000000002e-05, "loss": 73.1504, "step": 18650 }, { "epoch": 0.07538876117600003, "grad_norm": 2292.02490234375, "learning_rate": 2.9856e-05, "loss": 132.4692, "step": 18660 }, { "epoch": 0.07542916244136766, "grad_norm": 1452.90087890625, "learning_rate": 2.9872000000000005e-05, "loss": 134.5143, "step": 18670 }, { "epoch": 0.0754695637067353, "grad_norm": 918.1961669921875, "learning_rate": 2.9888000000000003e-05, "loss": 106.5664, "step": 18680 }, { "epoch": 0.07550996497210292, "grad_norm": 1377.7470703125, "learning_rate": 2.9904000000000004e-05, "loss": 89.391, "step": 18690 }, { "epoch": 0.07555036623747055, "grad_norm": 1466.1744384765625, "learning_rate": 2.992e-05, "loss": 124.1821, "step": 18700 }, { "epoch": 0.07559076750283819, "grad_norm": 1014.8499755859375, "learning_rate": 2.9936e-05, "loss": 185.3473, "step": 18710 }, { "epoch": 0.07563116876820582, "grad_norm": 1184.2947998046875, "learning_rate": 2.9952000000000004e-05, "loss": 125.8145, "step": 18720 }, { "epoch": 0.07567157003357346, "grad_norm": 774.492919921875, "learning_rate": 2.9968000000000002e-05, "loss": 120.7197, "step": 18730 }, { "epoch": 0.07571197129894108, "grad_norm": 1086.3082275390625, "learning_rate": 2.9984000000000003e-05, "loss": 126.5244, "step": 18740 }, { "epoch": 0.07575237256430871, "grad_norm": 937.5267944335938, "learning_rate": 3.0000000000000004e-05, "loss": 118.7993, "step": 18750 }, { "epoch": 0.07579277382967635, "grad_norm": 1027.5623779296875, "learning_rate": 3.0016000000000002e-05, "loss": 94.4266, "step": 18760 }, { "epoch": 0.07583317509504398, "grad_norm": 516.3690185546875, "learning_rate": 3.0032000000000003e-05, "loss": 120.8037, "step": 18770 }, { "epoch": 0.0758735763604116, "grad_norm": 1685.0865478515625, "learning_rate": 3.0048e-05, "loss": 106.1391, "step": 18780 }, { "epoch": 0.07591397762577924, "grad_norm": 0.0, "learning_rate": 3.0064000000000006e-05, "loss": 74.0085, "step": 18790 }, { "epoch": 0.07595437889114687, "grad_norm": 1456.856689453125, "learning_rate": 3.0080000000000003e-05, "loss": 118.443, "step": 18800 }, { "epoch": 0.07599478015651451, "grad_norm": 2234.4599609375, "learning_rate": 3.0096e-05, "loss": 171.6478, "step": 18810 }, { "epoch": 0.07603518142188213, "grad_norm": 976.5721435546875, "learning_rate": 3.0112000000000002e-05, "loss": 155.1822, "step": 18820 }, { "epoch": 0.07607558268724976, "grad_norm": 1938.18603515625, "learning_rate": 3.0128e-05, "loss": 139.4546, "step": 18830 }, { "epoch": 0.0761159839526174, "grad_norm": 621.24755859375, "learning_rate": 3.0144000000000005e-05, "loss": 84.2352, "step": 18840 }, { "epoch": 0.07615638521798503, "grad_norm": 599.5986938476562, "learning_rate": 3.0160000000000003e-05, "loss": 139.9842, "step": 18850 }, { "epoch": 0.07619678648335265, "grad_norm": 1418.4581298828125, "learning_rate": 3.0176e-05, "loss": 128.9917, "step": 18860 }, { "epoch": 0.07623718774872029, "grad_norm": 1401.3021240234375, "learning_rate": 3.0192000000000005e-05, "loss": 104.3214, "step": 18870 }, { "epoch": 0.07627758901408792, "grad_norm": 748.1585083007812, "learning_rate": 3.0208000000000003e-05, "loss": 119.8759, "step": 18880 }, { "epoch": 0.07631799027945556, "grad_norm": 1339.5333251953125, "learning_rate": 3.0224000000000004e-05, "loss": 115.9315, "step": 18890 }, { "epoch": 0.07635839154482318, "grad_norm": 1126.16552734375, "learning_rate": 3.0240000000000002e-05, "loss": 145.7979, "step": 18900 }, { "epoch": 0.07639879281019081, "grad_norm": 808.7857055664062, "learning_rate": 3.0256e-05, "loss": 127.7257, "step": 18910 }, { "epoch": 0.07643919407555845, "grad_norm": 1016.2286987304688, "learning_rate": 3.0272000000000004e-05, "loss": 228.2659, "step": 18920 }, { "epoch": 0.07647959534092608, "grad_norm": 1210.65771484375, "learning_rate": 3.0288000000000002e-05, "loss": 118.8449, "step": 18930 }, { "epoch": 0.0765199966062937, "grad_norm": 759.594970703125, "learning_rate": 3.0304000000000003e-05, "loss": 73.6208, "step": 18940 }, { "epoch": 0.07656039787166134, "grad_norm": 805.1461181640625, "learning_rate": 3.032e-05, "loss": 100.1516, "step": 18950 }, { "epoch": 0.07660079913702897, "grad_norm": 1020.590576171875, "learning_rate": 3.0336000000000002e-05, "loss": 130.3593, "step": 18960 }, { "epoch": 0.07664120040239661, "grad_norm": 281.2948303222656, "learning_rate": 3.0352000000000003e-05, "loss": 87.3999, "step": 18970 }, { "epoch": 0.07668160166776423, "grad_norm": 1126.6138916015625, "learning_rate": 3.0368e-05, "loss": 125.5814, "step": 18980 }, { "epoch": 0.07672200293313186, "grad_norm": 3934.1298828125, "learning_rate": 3.0384000000000006e-05, "loss": 179.5769, "step": 18990 }, { "epoch": 0.0767624041984995, "grad_norm": 1989.8963623046875, "learning_rate": 3.0400000000000004e-05, "loss": 164.1262, "step": 19000 }, { "epoch": 0.07680280546386713, "grad_norm": 891.9097290039062, "learning_rate": 3.0416e-05, "loss": 78.4145, "step": 19010 }, { "epoch": 0.07684320672923475, "grad_norm": 1621.536865234375, "learning_rate": 3.0432000000000003e-05, "loss": 97.0821, "step": 19020 }, { "epoch": 0.0768836079946024, "grad_norm": 1369.31591796875, "learning_rate": 3.0448e-05, "loss": 97.2216, "step": 19030 }, { "epoch": 0.07692400925997002, "grad_norm": 1075.7518310546875, "learning_rate": 3.0464000000000005e-05, "loss": 130.9594, "step": 19040 }, { "epoch": 0.07696441052533766, "grad_norm": 1702.1077880859375, "learning_rate": 3.0480000000000003e-05, "loss": 157.67, "step": 19050 }, { "epoch": 0.07700481179070529, "grad_norm": 1624.005126953125, "learning_rate": 3.0496e-05, "loss": 180.6171, "step": 19060 }, { "epoch": 0.07704521305607291, "grad_norm": 1812.77099609375, "learning_rate": 3.0512000000000005e-05, "loss": 115.4045, "step": 19070 }, { "epoch": 0.07708561432144055, "grad_norm": 823.732177734375, "learning_rate": 3.0528e-05, "loss": 105.2697, "step": 19080 }, { "epoch": 0.07712601558680818, "grad_norm": 900.1708984375, "learning_rate": 3.0544e-05, "loss": 118.3052, "step": 19090 }, { "epoch": 0.0771664168521758, "grad_norm": 1073.2254638671875, "learning_rate": 3.0560000000000006e-05, "loss": 161.0425, "step": 19100 }, { "epoch": 0.07720681811754344, "grad_norm": 932.1373901367188, "learning_rate": 3.0576e-05, "loss": 124.8353, "step": 19110 }, { "epoch": 0.07724721938291107, "grad_norm": 992.2178344726562, "learning_rate": 3.0592e-05, "loss": 87.1603, "step": 19120 }, { "epoch": 0.07728762064827871, "grad_norm": 655.614013671875, "learning_rate": 3.0608e-05, "loss": 148.0067, "step": 19130 }, { "epoch": 0.07732802191364634, "grad_norm": 566.0341186523438, "learning_rate": 3.0624000000000004e-05, "loss": 104.9753, "step": 19140 }, { "epoch": 0.07736842317901396, "grad_norm": 604.9459838867188, "learning_rate": 3.0640000000000005e-05, "loss": 80.1844, "step": 19150 }, { "epoch": 0.0774088244443816, "grad_norm": 939.7496948242188, "learning_rate": 3.0656e-05, "loss": 78.9237, "step": 19160 }, { "epoch": 0.07744922570974923, "grad_norm": 1111.1923828125, "learning_rate": 3.067200000000001e-05, "loss": 70.4472, "step": 19170 }, { "epoch": 0.07748962697511685, "grad_norm": 1667.745361328125, "learning_rate": 3.0688e-05, "loss": 109.166, "step": 19180 }, { "epoch": 0.0775300282404845, "grad_norm": 1046.6304931640625, "learning_rate": 3.0704e-05, "loss": 165.926, "step": 19190 }, { "epoch": 0.07757042950585212, "grad_norm": 1761.15869140625, "learning_rate": 3.0720000000000004e-05, "loss": 129.2139, "step": 19200 }, { "epoch": 0.07761083077121976, "grad_norm": 1684.56884765625, "learning_rate": 3.0736e-05, "loss": 154.1343, "step": 19210 }, { "epoch": 0.07765123203658739, "grad_norm": 551.8289794921875, "learning_rate": 3.0752000000000006e-05, "loss": 172.8532, "step": 19220 }, { "epoch": 0.07769163330195501, "grad_norm": 1006.9168701171875, "learning_rate": 3.0768e-05, "loss": 110.0696, "step": 19230 }, { "epoch": 0.07773203456732265, "grad_norm": 1449.43994140625, "learning_rate": 3.0784e-05, "loss": 160.3563, "step": 19240 }, { "epoch": 0.07777243583269028, "grad_norm": 994.652587890625, "learning_rate": 3.08e-05, "loss": 106.7819, "step": 19250 }, { "epoch": 0.0778128370980579, "grad_norm": 1494.919189453125, "learning_rate": 3.0816000000000004e-05, "loss": 154.2932, "step": 19260 }, { "epoch": 0.07785323836342554, "grad_norm": 3010.73974609375, "learning_rate": 3.0832000000000006e-05, "loss": 115.7749, "step": 19270 }, { "epoch": 0.07789363962879317, "grad_norm": 1167.75634765625, "learning_rate": 3.0848e-05, "loss": 84.5835, "step": 19280 }, { "epoch": 0.07793404089416081, "grad_norm": 512.6953735351562, "learning_rate": 3.0864e-05, "loss": 70.4986, "step": 19290 }, { "epoch": 0.07797444215952844, "grad_norm": 1121.130126953125, "learning_rate": 3.088e-05, "loss": 131.0057, "step": 19300 }, { "epoch": 0.07801484342489606, "grad_norm": 821.2005004882812, "learning_rate": 3.0896000000000004e-05, "loss": 93.5886, "step": 19310 }, { "epoch": 0.0780552446902637, "grad_norm": 19915.904296875, "learning_rate": 3.0912000000000005e-05, "loss": 210.1925, "step": 19320 }, { "epoch": 0.07809564595563133, "grad_norm": 970.84033203125, "learning_rate": 3.0928e-05, "loss": 158.7296, "step": 19330 }, { "epoch": 0.07813604722099896, "grad_norm": 2203.380859375, "learning_rate": 3.0944e-05, "loss": 124.2448, "step": 19340 }, { "epoch": 0.0781764484863666, "grad_norm": 1706.534423828125, "learning_rate": 3.096e-05, "loss": 112.4453, "step": 19350 }, { "epoch": 0.07821684975173422, "grad_norm": 558.5722045898438, "learning_rate": 3.0976e-05, "loss": 158.2485, "step": 19360 }, { "epoch": 0.07825725101710186, "grad_norm": 1104.03125, "learning_rate": 3.0992000000000004e-05, "loss": 112.5157, "step": 19370 }, { "epoch": 0.07829765228246949, "grad_norm": 3149.04296875, "learning_rate": 3.1008000000000005e-05, "loss": 172.0443, "step": 19380 }, { "epoch": 0.07833805354783711, "grad_norm": 1571.8192138671875, "learning_rate": 3.1024e-05, "loss": 162.6578, "step": 19390 }, { "epoch": 0.07837845481320475, "grad_norm": 1629.932373046875, "learning_rate": 3.104e-05, "loss": 164.0212, "step": 19400 }, { "epoch": 0.07841885607857238, "grad_norm": 2302.69140625, "learning_rate": 3.1056e-05, "loss": 138.8319, "step": 19410 }, { "epoch": 0.07845925734394, "grad_norm": 1597.150634765625, "learning_rate": 3.1072e-05, "loss": 127.0055, "step": 19420 }, { "epoch": 0.07849965860930765, "grad_norm": 905.0975952148438, "learning_rate": 3.1088000000000005e-05, "loss": 124.8965, "step": 19430 }, { "epoch": 0.07854005987467527, "grad_norm": 756.5381469726562, "learning_rate": 3.1104e-05, "loss": 111.2599, "step": 19440 }, { "epoch": 0.07858046114004291, "grad_norm": 3848.724609375, "learning_rate": 3.112e-05, "loss": 138.655, "step": 19450 }, { "epoch": 0.07862086240541054, "grad_norm": 344.63385009765625, "learning_rate": 3.1136e-05, "loss": 85.4582, "step": 19460 }, { "epoch": 0.07866126367077816, "grad_norm": 996.605712890625, "learning_rate": 3.1152e-05, "loss": 133.7001, "step": 19470 }, { "epoch": 0.0787016649361458, "grad_norm": 1711.14892578125, "learning_rate": 3.1168000000000004e-05, "loss": 95.2133, "step": 19480 }, { "epoch": 0.07874206620151343, "grad_norm": 1539.0604248046875, "learning_rate": 3.1184e-05, "loss": 105.0518, "step": 19490 }, { "epoch": 0.07878246746688106, "grad_norm": 744.2188720703125, "learning_rate": 3.1200000000000006e-05, "loss": 112.2348, "step": 19500 }, { "epoch": 0.0788228687322487, "grad_norm": 665.140380859375, "learning_rate": 3.1216e-05, "loss": 105.9944, "step": 19510 }, { "epoch": 0.07886326999761632, "grad_norm": 1196.1968994140625, "learning_rate": 3.1232e-05, "loss": 102.4042, "step": 19520 }, { "epoch": 0.07890367126298396, "grad_norm": 1342.2335205078125, "learning_rate": 3.1248e-05, "loss": 124.4353, "step": 19530 }, { "epoch": 0.07894407252835159, "grad_norm": 2917.825439453125, "learning_rate": 3.1264000000000004e-05, "loss": 119.8374, "step": 19540 }, { "epoch": 0.07898447379371921, "grad_norm": 1537.9954833984375, "learning_rate": 3.1280000000000005e-05, "loss": 120.8902, "step": 19550 }, { "epoch": 0.07902487505908685, "grad_norm": 713.2342529296875, "learning_rate": 3.1296e-05, "loss": 123.1111, "step": 19560 }, { "epoch": 0.07906527632445448, "grad_norm": 1007.7623901367188, "learning_rate": 3.131200000000001e-05, "loss": 122.3046, "step": 19570 }, { "epoch": 0.07910567758982211, "grad_norm": 744.2635498046875, "learning_rate": 3.1328e-05, "loss": 88.612, "step": 19580 }, { "epoch": 0.07914607885518975, "grad_norm": 1206.9375, "learning_rate": 3.1344000000000003e-05, "loss": 81.3717, "step": 19590 }, { "epoch": 0.07918648012055737, "grad_norm": 1274.3399658203125, "learning_rate": 3.1360000000000005e-05, "loss": 157.8111, "step": 19600 }, { "epoch": 0.07922688138592501, "grad_norm": 696.0009765625, "learning_rate": 3.1376e-05, "loss": 112.9236, "step": 19610 }, { "epoch": 0.07926728265129264, "grad_norm": 1086.2701416015625, "learning_rate": 3.139200000000001e-05, "loss": 103.9647, "step": 19620 }, { "epoch": 0.07930768391666027, "grad_norm": 1140.2965087890625, "learning_rate": 3.1408e-05, "loss": 92.4705, "step": 19630 }, { "epoch": 0.0793480851820279, "grad_norm": 1569.0645751953125, "learning_rate": 3.1424e-05, "loss": 121.3736, "step": 19640 }, { "epoch": 0.07938848644739553, "grad_norm": 1136.5538330078125, "learning_rate": 3.1440000000000004e-05, "loss": 112.8915, "step": 19650 }, { "epoch": 0.07942888771276316, "grad_norm": 740.44091796875, "learning_rate": 3.1456000000000005e-05, "loss": 121.2627, "step": 19660 }, { "epoch": 0.0794692889781308, "grad_norm": 1086.4205322265625, "learning_rate": 3.1472000000000006e-05, "loss": 119.6377, "step": 19670 }, { "epoch": 0.07950969024349842, "grad_norm": 943.4683227539062, "learning_rate": 3.1488e-05, "loss": 95.2023, "step": 19680 }, { "epoch": 0.07955009150886606, "grad_norm": 991.89892578125, "learning_rate": 3.1504e-05, "loss": 178.4546, "step": 19690 }, { "epoch": 0.07959049277423369, "grad_norm": 421.7826232910156, "learning_rate": 3.152e-05, "loss": 100.0158, "step": 19700 }, { "epoch": 0.07963089403960132, "grad_norm": 1290.488525390625, "learning_rate": 3.1536000000000004e-05, "loss": 104.956, "step": 19710 }, { "epoch": 0.07967129530496896, "grad_norm": 1131.934326171875, "learning_rate": 3.1552e-05, "loss": 100.3167, "step": 19720 }, { "epoch": 0.07971169657033658, "grad_norm": 1422.13525390625, "learning_rate": 3.1568e-05, "loss": 139.9084, "step": 19730 }, { "epoch": 0.07975209783570421, "grad_norm": 1051.194580078125, "learning_rate": 3.1584e-05, "loss": 109.0689, "step": 19740 }, { "epoch": 0.07979249910107185, "grad_norm": 1163.12548828125, "learning_rate": 3.16e-05, "loss": 89.2682, "step": 19750 }, { "epoch": 0.07983290036643947, "grad_norm": 1967.873291015625, "learning_rate": 3.1616000000000004e-05, "loss": 128.6821, "step": 19760 }, { "epoch": 0.07987330163180711, "grad_norm": 1103.2349853515625, "learning_rate": 3.1632e-05, "loss": 147.6279, "step": 19770 }, { "epoch": 0.07991370289717474, "grad_norm": 1125.0118408203125, "learning_rate": 3.1648000000000006e-05, "loss": 92.0829, "step": 19780 }, { "epoch": 0.07995410416254237, "grad_norm": 1415.862060546875, "learning_rate": 3.1664e-05, "loss": 111.4338, "step": 19790 }, { "epoch": 0.07999450542791, "grad_norm": 1246.4283447265625, "learning_rate": 3.168e-05, "loss": 83.8045, "step": 19800 }, { "epoch": 0.08003490669327763, "grad_norm": 1319.5565185546875, "learning_rate": 3.1696e-05, "loss": 138.7009, "step": 19810 }, { "epoch": 0.08007530795864526, "grad_norm": 539.1595458984375, "learning_rate": 3.1712e-05, "loss": 143.3671, "step": 19820 }, { "epoch": 0.0801157092240129, "grad_norm": 2494.538818359375, "learning_rate": 3.1728000000000005e-05, "loss": 186.8914, "step": 19830 }, { "epoch": 0.08015611048938052, "grad_norm": 1490.8349609375, "learning_rate": 3.1744e-05, "loss": 178.6781, "step": 19840 }, { "epoch": 0.08019651175474816, "grad_norm": 1049.079833984375, "learning_rate": 3.176e-05, "loss": 146.0306, "step": 19850 }, { "epoch": 0.08023691302011579, "grad_norm": 1048.5006103515625, "learning_rate": 3.1776e-05, "loss": 73.0426, "step": 19860 }, { "epoch": 0.08027731428548342, "grad_norm": 962.3606567382812, "learning_rate": 3.1792e-05, "loss": 88.1, "step": 19870 }, { "epoch": 0.08031771555085106, "grad_norm": 962.2769775390625, "learning_rate": 3.1808000000000004e-05, "loss": 144.6858, "step": 19880 }, { "epoch": 0.08035811681621868, "grad_norm": 517.8065185546875, "learning_rate": 3.1824e-05, "loss": 135.4217, "step": 19890 }, { "epoch": 0.08039851808158631, "grad_norm": 0.0, "learning_rate": 3.184000000000001e-05, "loss": 102.9744, "step": 19900 }, { "epoch": 0.08043891934695395, "grad_norm": 2071.30810546875, "learning_rate": 3.1856e-05, "loss": 223.0694, "step": 19910 }, { "epoch": 0.08047932061232158, "grad_norm": 726.2340087890625, "learning_rate": 3.1872e-05, "loss": 85.1156, "step": 19920 }, { "epoch": 0.08051972187768922, "grad_norm": 1996.00390625, "learning_rate": 3.1888000000000004e-05, "loss": 90.4812, "step": 19930 }, { "epoch": 0.08056012314305684, "grad_norm": 665.4238891601562, "learning_rate": 3.1904000000000005e-05, "loss": 82.7014, "step": 19940 }, { "epoch": 0.08060052440842447, "grad_norm": 724.19970703125, "learning_rate": 3.1920000000000006e-05, "loss": 107.1705, "step": 19950 }, { "epoch": 0.08064092567379211, "grad_norm": 1721.2724609375, "learning_rate": 3.1936e-05, "loss": 129.9797, "step": 19960 }, { "epoch": 0.08068132693915973, "grad_norm": 3038.216796875, "learning_rate": 3.1952e-05, "loss": 177.2808, "step": 19970 }, { "epoch": 0.08072172820452736, "grad_norm": 1314.772705078125, "learning_rate": 3.1968e-05, "loss": 113.6141, "step": 19980 }, { "epoch": 0.080762129469895, "grad_norm": 785.627685546875, "learning_rate": 3.1984000000000004e-05, "loss": 89.7999, "step": 19990 }, { "epoch": 0.08080253073526263, "grad_norm": 745.5064697265625, "learning_rate": 3.2000000000000005e-05, "loss": 118.3001, "step": 20000 }, { "epoch": 0.08084293200063027, "grad_norm": 6274.64111328125, "learning_rate": 3.2016e-05, "loss": 188.7399, "step": 20010 }, { "epoch": 0.08088333326599789, "grad_norm": 613.6302490234375, "learning_rate": 3.2032e-05, "loss": 105.8049, "step": 20020 }, { "epoch": 0.08092373453136552, "grad_norm": 408.5493469238281, "learning_rate": 3.2048e-05, "loss": 114.5295, "step": 20030 }, { "epoch": 0.08096413579673316, "grad_norm": 1796.3682861328125, "learning_rate": 3.2064e-05, "loss": 145.9985, "step": 20040 }, { "epoch": 0.08100453706210078, "grad_norm": 750.31396484375, "learning_rate": 3.2080000000000005e-05, "loss": 119.3285, "step": 20050 }, { "epoch": 0.08104493832746841, "grad_norm": 664.3645629882812, "learning_rate": 3.2096000000000006e-05, "loss": 124.7819, "step": 20060 }, { "epoch": 0.08108533959283605, "grad_norm": 3907.92431640625, "learning_rate": 3.2112e-05, "loss": 184.4771, "step": 20070 }, { "epoch": 0.08112574085820368, "grad_norm": 1053.4244384765625, "learning_rate": 3.2128e-05, "loss": 169.1388, "step": 20080 }, { "epoch": 0.08116614212357132, "grad_norm": 1770.0511474609375, "learning_rate": 3.2144e-05, "loss": 109.763, "step": 20090 }, { "epoch": 0.08120654338893894, "grad_norm": 2079.87548828125, "learning_rate": 3.2160000000000004e-05, "loss": 118.6706, "step": 20100 }, { "epoch": 0.08124694465430657, "grad_norm": 1446.0218505859375, "learning_rate": 3.2176000000000005e-05, "loss": 129.1161, "step": 20110 }, { "epoch": 0.08128734591967421, "grad_norm": 944.9737548828125, "learning_rate": 3.2192e-05, "loss": 145.1025, "step": 20120 }, { "epoch": 0.08132774718504183, "grad_norm": 489.6378173828125, "learning_rate": 3.2208e-05, "loss": 143.1353, "step": 20130 }, { "epoch": 0.08136814845040946, "grad_norm": 4239.8681640625, "learning_rate": 3.2224e-05, "loss": 146.636, "step": 20140 }, { "epoch": 0.0814085497157771, "grad_norm": 375.4095153808594, "learning_rate": 3.224e-05, "loss": 119.1396, "step": 20150 }, { "epoch": 0.08144895098114473, "grad_norm": 678.9210205078125, "learning_rate": 3.2256000000000004e-05, "loss": 114.3637, "step": 20160 }, { "epoch": 0.08148935224651237, "grad_norm": 1916.13671875, "learning_rate": 3.2272e-05, "loss": 107.4577, "step": 20170 }, { "epoch": 0.08152975351188, "grad_norm": 1574.0010986328125, "learning_rate": 3.228800000000001e-05, "loss": 148.9322, "step": 20180 }, { "epoch": 0.08157015477724762, "grad_norm": 1359.7548828125, "learning_rate": 3.2304e-05, "loss": 115.4104, "step": 20190 }, { "epoch": 0.08161055604261526, "grad_norm": 838.4428100585938, "learning_rate": 3.232e-05, "loss": 116.7437, "step": 20200 }, { "epoch": 0.08165095730798289, "grad_norm": 653.6634521484375, "learning_rate": 3.2336000000000003e-05, "loss": 110.3881, "step": 20210 }, { "epoch": 0.08169135857335051, "grad_norm": 947.4215698242188, "learning_rate": 3.2352e-05, "loss": 116.1792, "step": 20220 }, { "epoch": 0.08173175983871815, "grad_norm": 3714.198974609375, "learning_rate": 3.2368000000000006e-05, "loss": 164.5668, "step": 20230 }, { "epoch": 0.08177216110408578, "grad_norm": 0.0, "learning_rate": 3.2384e-05, "loss": 127.3948, "step": 20240 }, { "epoch": 0.08181256236945342, "grad_norm": 899.4036865234375, "learning_rate": 3.24e-05, "loss": 109.6105, "step": 20250 }, { "epoch": 0.08185296363482104, "grad_norm": 1221.5169677734375, "learning_rate": 3.2416e-05, "loss": 108.7112, "step": 20260 }, { "epoch": 0.08189336490018867, "grad_norm": 945.672607421875, "learning_rate": 3.2432000000000004e-05, "loss": 110.9536, "step": 20270 }, { "epoch": 0.08193376616555631, "grad_norm": 659.3902587890625, "learning_rate": 3.2448000000000005e-05, "loss": 145.62, "step": 20280 }, { "epoch": 0.08197416743092394, "grad_norm": 936.4209594726562, "learning_rate": 3.2464e-05, "loss": 99.8831, "step": 20290 }, { "epoch": 0.08201456869629156, "grad_norm": 6973.3740234375, "learning_rate": 3.248000000000001e-05, "loss": 113.0091, "step": 20300 }, { "epoch": 0.0820549699616592, "grad_norm": 1618.539794921875, "learning_rate": 3.2496e-05, "loss": 175.2063, "step": 20310 }, { "epoch": 0.08209537122702683, "grad_norm": 1092.4254150390625, "learning_rate": 3.2512e-05, "loss": 95.937, "step": 20320 }, { "epoch": 0.08213577249239447, "grad_norm": 1008.83544921875, "learning_rate": 3.2528000000000004e-05, "loss": 123.3814, "step": 20330 }, { "epoch": 0.0821761737577621, "grad_norm": 811.6151123046875, "learning_rate": 3.2544000000000006e-05, "loss": 111.5735, "step": 20340 }, { "epoch": 0.08221657502312972, "grad_norm": 2305.8076171875, "learning_rate": 3.256e-05, "loss": 105.6014, "step": 20350 }, { "epoch": 0.08225697628849736, "grad_norm": 874.2246704101562, "learning_rate": 3.2576e-05, "loss": 84.1637, "step": 20360 }, { "epoch": 0.08229737755386499, "grad_norm": 5405.212890625, "learning_rate": 3.2592e-05, "loss": 155.2143, "step": 20370 }, { "epoch": 0.08233777881923261, "grad_norm": 2860.93994140625, "learning_rate": 3.2608000000000004e-05, "loss": 112.6364, "step": 20380 }, { "epoch": 0.08237818008460025, "grad_norm": 3501.953125, "learning_rate": 3.2624000000000005e-05, "loss": 117.0012, "step": 20390 }, { "epoch": 0.08241858134996788, "grad_norm": 786.2649536132812, "learning_rate": 3.264e-05, "loss": 118.425, "step": 20400 }, { "epoch": 0.08245898261533552, "grad_norm": 5292.2001953125, "learning_rate": 3.2656e-05, "loss": 100.0973, "step": 20410 }, { "epoch": 0.08249938388070314, "grad_norm": 1418.3062744140625, "learning_rate": 3.2672e-05, "loss": 155.9431, "step": 20420 }, { "epoch": 0.08253978514607077, "grad_norm": 905.533935546875, "learning_rate": 3.2688e-05, "loss": 98.626, "step": 20430 }, { "epoch": 0.08258018641143841, "grad_norm": 3069.26416015625, "learning_rate": 3.2704000000000004e-05, "loss": 118.547, "step": 20440 }, { "epoch": 0.08262058767680604, "grad_norm": 2428.871826171875, "learning_rate": 3.272e-05, "loss": 119.8318, "step": 20450 }, { "epoch": 0.08266098894217366, "grad_norm": 411.2120361328125, "learning_rate": 3.2736000000000006e-05, "loss": 114.0648, "step": 20460 }, { "epoch": 0.0827013902075413, "grad_norm": 1001.8212890625, "learning_rate": 3.2752e-05, "loss": 158.7806, "step": 20470 }, { "epoch": 0.08274179147290893, "grad_norm": 736.3692016601562, "learning_rate": 3.2768e-05, "loss": 97.0208, "step": 20480 }, { "epoch": 0.08278219273827657, "grad_norm": 1338.68994140625, "learning_rate": 3.2784e-05, "loss": 145.6164, "step": 20490 }, { "epoch": 0.0828225940036442, "grad_norm": 2277.1337890625, "learning_rate": 3.28e-05, "loss": 156.1081, "step": 20500 }, { "epoch": 0.08286299526901182, "grad_norm": 639.820556640625, "learning_rate": 3.2816000000000006e-05, "loss": 148.0952, "step": 20510 }, { "epoch": 0.08290339653437946, "grad_norm": 976.7066650390625, "learning_rate": 3.2832e-05, "loss": 88.4951, "step": 20520 }, { "epoch": 0.08294379779974709, "grad_norm": 920.2750244140625, "learning_rate": 3.2848e-05, "loss": 140.128, "step": 20530 }, { "epoch": 0.08298419906511471, "grad_norm": 604.2228393554688, "learning_rate": 3.2864e-05, "loss": 91.421, "step": 20540 }, { "epoch": 0.08302460033048235, "grad_norm": 1255.64013671875, "learning_rate": 3.2880000000000004e-05, "loss": 114.2171, "step": 20550 }, { "epoch": 0.08306500159584998, "grad_norm": 1299.9122314453125, "learning_rate": 3.2896000000000005e-05, "loss": 102.5171, "step": 20560 }, { "epoch": 0.08310540286121762, "grad_norm": 926.244384765625, "learning_rate": 3.2912e-05, "loss": 64.6635, "step": 20570 }, { "epoch": 0.08314580412658525, "grad_norm": 982.9434814453125, "learning_rate": 3.292800000000001e-05, "loss": 145.3727, "step": 20580 }, { "epoch": 0.08318620539195287, "grad_norm": 685.9315795898438, "learning_rate": 3.2944e-05, "loss": 137.4475, "step": 20590 }, { "epoch": 0.08322660665732051, "grad_norm": 750.7125244140625, "learning_rate": 3.296e-05, "loss": 162.0429, "step": 20600 }, { "epoch": 0.08326700792268814, "grad_norm": 1121.966064453125, "learning_rate": 3.2976000000000004e-05, "loss": 163.8441, "step": 20610 }, { "epoch": 0.08330740918805576, "grad_norm": 2098.244384765625, "learning_rate": 3.2992e-05, "loss": 140.6999, "step": 20620 }, { "epoch": 0.0833478104534234, "grad_norm": 1297.5299072265625, "learning_rate": 3.3008000000000007e-05, "loss": 161.9485, "step": 20630 }, { "epoch": 0.08338821171879103, "grad_norm": 764.8720703125, "learning_rate": 3.3024e-05, "loss": 131.4767, "step": 20640 }, { "epoch": 0.08342861298415867, "grad_norm": 3363.512939453125, "learning_rate": 3.304e-05, "loss": 101.7747, "step": 20650 }, { "epoch": 0.0834690142495263, "grad_norm": 4445.4267578125, "learning_rate": 3.3056e-05, "loss": 174.0515, "step": 20660 }, { "epoch": 0.08350941551489392, "grad_norm": 1621.59228515625, "learning_rate": 3.3072000000000005e-05, "loss": 162.773, "step": 20670 }, { "epoch": 0.08354981678026156, "grad_norm": 758.7988891601562, "learning_rate": 3.3088000000000006e-05, "loss": 110.7945, "step": 20680 }, { "epoch": 0.08359021804562919, "grad_norm": 1453.5654296875, "learning_rate": 3.3104e-05, "loss": 124.9009, "step": 20690 }, { "epoch": 0.08363061931099681, "grad_norm": 1057.9686279296875, "learning_rate": 3.312e-05, "loss": 149.3531, "step": 20700 }, { "epoch": 0.08367102057636445, "grad_norm": 805.1429443359375, "learning_rate": 3.3136e-05, "loss": 150.6182, "step": 20710 }, { "epoch": 0.08371142184173208, "grad_norm": 963.612060546875, "learning_rate": 3.3152000000000004e-05, "loss": 112.0466, "step": 20720 }, { "epoch": 0.08375182310709972, "grad_norm": 832.54296875, "learning_rate": 3.3168000000000005e-05, "loss": 135.5144, "step": 20730 }, { "epoch": 0.08379222437246735, "grad_norm": 8515.330078125, "learning_rate": 3.3184000000000006e-05, "loss": 197.9367, "step": 20740 }, { "epoch": 0.08383262563783497, "grad_norm": 5629.37890625, "learning_rate": 3.32e-05, "loss": 80.24, "step": 20750 }, { "epoch": 0.08387302690320261, "grad_norm": 1472.2364501953125, "learning_rate": 3.3216e-05, "loss": 125.4584, "step": 20760 }, { "epoch": 0.08391342816857024, "grad_norm": 5512.04736328125, "learning_rate": 3.3232e-05, "loss": 111.62, "step": 20770 }, { "epoch": 0.08395382943393787, "grad_norm": 759.9108276367188, "learning_rate": 3.3248000000000004e-05, "loss": 129.376, "step": 20780 }, { "epoch": 0.0839942306993055, "grad_norm": 670.1220092773438, "learning_rate": 3.3264000000000005e-05, "loss": 116.9635, "step": 20790 }, { "epoch": 0.08403463196467313, "grad_norm": 3641.0625, "learning_rate": 3.328e-05, "loss": 161.1426, "step": 20800 }, { "epoch": 0.08407503323004077, "grad_norm": 3545.739013671875, "learning_rate": 3.3296e-05, "loss": 140.4873, "step": 20810 }, { "epoch": 0.0841154344954084, "grad_norm": 1007.53564453125, "learning_rate": 3.3312e-05, "loss": 94.6542, "step": 20820 }, { "epoch": 0.08415583576077602, "grad_norm": 993.787109375, "learning_rate": 3.3328000000000003e-05, "loss": 132.4424, "step": 20830 }, { "epoch": 0.08419623702614366, "grad_norm": 1444.1688232421875, "learning_rate": 3.3344000000000005e-05, "loss": 132.11, "step": 20840 }, { "epoch": 0.08423663829151129, "grad_norm": 785.0534057617188, "learning_rate": 3.336e-05, "loss": 77.0269, "step": 20850 }, { "epoch": 0.08427703955687892, "grad_norm": 847.0296020507812, "learning_rate": 3.337600000000001e-05, "loss": 87.6466, "step": 20860 }, { "epoch": 0.08431744082224656, "grad_norm": 1261.2034912109375, "learning_rate": 3.3392e-05, "loss": 90.1927, "step": 20870 }, { "epoch": 0.08435784208761418, "grad_norm": 1592.902099609375, "learning_rate": 3.3408e-05, "loss": 159.0212, "step": 20880 }, { "epoch": 0.08439824335298182, "grad_norm": 933.5139770507812, "learning_rate": 3.3424000000000004e-05, "loss": 109.2687, "step": 20890 }, { "epoch": 0.08443864461834945, "grad_norm": 2483.051513671875, "learning_rate": 3.344e-05, "loss": 133.2125, "step": 20900 }, { "epoch": 0.08447904588371707, "grad_norm": 1393.5037841796875, "learning_rate": 3.3456000000000006e-05, "loss": 215.5028, "step": 20910 }, { "epoch": 0.08451944714908471, "grad_norm": 3210.046142578125, "learning_rate": 3.3472e-05, "loss": 112.5895, "step": 20920 }, { "epoch": 0.08455984841445234, "grad_norm": 867.4534301757812, "learning_rate": 3.3488e-05, "loss": 117.9855, "step": 20930 }, { "epoch": 0.08460024967981997, "grad_norm": 897.9375, "learning_rate": 3.3504e-05, "loss": 116.4767, "step": 20940 }, { "epoch": 0.0846406509451876, "grad_norm": 829.8159790039062, "learning_rate": 3.3520000000000004e-05, "loss": 117.4122, "step": 20950 }, { "epoch": 0.08468105221055523, "grad_norm": 496.8394470214844, "learning_rate": 3.3536000000000006e-05, "loss": 100.3099, "step": 20960 }, { "epoch": 0.08472145347592287, "grad_norm": 1295.52783203125, "learning_rate": 3.3552e-05, "loss": 130.5201, "step": 20970 }, { "epoch": 0.0847618547412905, "grad_norm": 1697.2413330078125, "learning_rate": 3.3568e-05, "loss": 81.13, "step": 20980 }, { "epoch": 0.08480225600665812, "grad_norm": 642.0286254882812, "learning_rate": 3.3584e-05, "loss": 124.2179, "step": 20990 }, { "epoch": 0.08484265727202576, "grad_norm": 825.5877075195312, "learning_rate": 3.3600000000000004e-05, "loss": 93.1462, "step": 21000 }, { "epoch": 0.08488305853739339, "grad_norm": 2915.330078125, "learning_rate": 3.3616000000000005e-05, "loss": 183.7277, "step": 21010 }, { "epoch": 0.08492345980276102, "grad_norm": 5314.201171875, "learning_rate": 3.3632e-05, "loss": 161.8029, "step": 21020 }, { "epoch": 0.08496386106812866, "grad_norm": 309.57415771484375, "learning_rate": 3.3648e-05, "loss": 97.7818, "step": 21030 }, { "epoch": 0.08500426233349628, "grad_norm": 727.4566040039062, "learning_rate": 3.3664e-05, "loss": 84.2195, "step": 21040 }, { "epoch": 0.08504466359886392, "grad_norm": 1173.5078125, "learning_rate": 3.368e-05, "loss": 110.4723, "step": 21050 }, { "epoch": 0.08508506486423155, "grad_norm": 1007.157958984375, "learning_rate": 3.3696000000000004e-05, "loss": 79.8195, "step": 21060 }, { "epoch": 0.08512546612959918, "grad_norm": 647.617431640625, "learning_rate": 3.3712000000000005e-05, "loss": 159.9282, "step": 21070 }, { "epoch": 0.08516586739496682, "grad_norm": 784.1207275390625, "learning_rate": 3.3728e-05, "loss": 120.1622, "step": 21080 }, { "epoch": 0.08520626866033444, "grad_norm": 5701.87451171875, "learning_rate": 3.3744e-05, "loss": 126.8908, "step": 21090 }, { "epoch": 0.08524666992570207, "grad_norm": 1457.8411865234375, "learning_rate": 3.376e-05, "loss": 111.7649, "step": 21100 }, { "epoch": 0.08528707119106971, "grad_norm": 1022.2379760742188, "learning_rate": 3.3776e-05, "loss": 116.9885, "step": 21110 }, { "epoch": 0.08532747245643733, "grad_norm": 665.683837890625, "learning_rate": 3.3792000000000004e-05, "loss": 124.5699, "step": 21120 }, { "epoch": 0.08536787372180497, "grad_norm": 565.2042846679688, "learning_rate": 3.3808e-05, "loss": 128.3419, "step": 21130 }, { "epoch": 0.0854082749871726, "grad_norm": 1426.2344970703125, "learning_rate": 3.382400000000001e-05, "loss": 169.1157, "step": 21140 }, { "epoch": 0.08544867625254023, "grad_norm": 1249.49267578125, "learning_rate": 3.384e-05, "loss": 109.0338, "step": 21150 }, { "epoch": 0.08548907751790787, "grad_norm": 712.49609375, "learning_rate": 3.3856e-05, "loss": 85.7494, "step": 21160 }, { "epoch": 0.08552947878327549, "grad_norm": 692.7472534179688, "learning_rate": 3.3872000000000004e-05, "loss": 111.5182, "step": 21170 }, { "epoch": 0.08556988004864312, "grad_norm": 441.89892578125, "learning_rate": 3.3888e-05, "loss": 99.3837, "step": 21180 }, { "epoch": 0.08561028131401076, "grad_norm": 945.43603515625, "learning_rate": 3.3904000000000006e-05, "loss": 138.1833, "step": 21190 }, { "epoch": 0.08565068257937838, "grad_norm": 1521.3837890625, "learning_rate": 3.392e-05, "loss": 123.8109, "step": 21200 }, { "epoch": 0.08569108384474602, "grad_norm": 2716.091796875, "learning_rate": 3.3936e-05, "loss": 176.2317, "step": 21210 }, { "epoch": 0.08573148511011365, "grad_norm": 2309.829833984375, "learning_rate": 3.3952e-05, "loss": 166.3631, "step": 21220 }, { "epoch": 0.08577188637548128, "grad_norm": 1705.5728759765625, "learning_rate": 3.3968000000000004e-05, "loss": 127.6594, "step": 21230 }, { "epoch": 0.08581228764084892, "grad_norm": 388.3826599121094, "learning_rate": 3.3984000000000005e-05, "loss": 91.3603, "step": 21240 }, { "epoch": 0.08585268890621654, "grad_norm": 1309.0146484375, "learning_rate": 3.4e-05, "loss": 95.5241, "step": 21250 }, { "epoch": 0.08589309017158417, "grad_norm": 1299.4666748046875, "learning_rate": 3.401600000000001e-05, "loss": 138.5965, "step": 21260 }, { "epoch": 0.08593349143695181, "grad_norm": 1387.5, "learning_rate": 3.4032e-05, "loss": 112.3464, "step": 21270 }, { "epoch": 0.08597389270231943, "grad_norm": 1501.5421142578125, "learning_rate": 3.4048e-05, "loss": 121.9169, "step": 21280 }, { "epoch": 0.08601429396768706, "grad_norm": 7527.328125, "learning_rate": 3.4064000000000005e-05, "loss": 171.4414, "step": 21290 }, { "epoch": 0.0860546952330547, "grad_norm": 991.0069580078125, "learning_rate": 3.408e-05, "loss": 113.5109, "step": 21300 }, { "epoch": 0.08609509649842233, "grad_norm": 1593.8912353515625, "learning_rate": 3.409600000000001e-05, "loss": 119.5967, "step": 21310 }, { "epoch": 0.08613549776378997, "grad_norm": 949.4871826171875, "learning_rate": 3.4112e-05, "loss": 102.3903, "step": 21320 }, { "epoch": 0.0861758990291576, "grad_norm": 753.6689453125, "learning_rate": 3.4128e-05, "loss": 103.7823, "step": 21330 }, { "epoch": 0.08621630029452522, "grad_norm": 1338.2713623046875, "learning_rate": 3.4144000000000004e-05, "loss": 130.913, "step": 21340 }, { "epoch": 0.08625670155989286, "grad_norm": 973.3551635742188, "learning_rate": 3.4160000000000005e-05, "loss": 106.8074, "step": 21350 }, { "epoch": 0.08629710282526049, "grad_norm": 916.3033447265625, "learning_rate": 3.4176000000000006e-05, "loss": 103.7848, "step": 21360 }, { "epoch": 0.08633750409062811, "grad_norm": 810.3303833007812, "learning_rate": 3.4192e-05, "loss": 105.5932, "step": 21370 }, { "epoch": 0.08637790535599575, "grad_norm": 1097.08203125, "learning_rate": 3.4208e-05, "loss": 100.8104, "step": 21380 }, { "epoch": 0.08641830662136338, "grad_norm": 4735.88671875, "learning_rate": 3.4224e-05, "loss": 127.2736, "step": 21390 }, { "epoch": 0.08645870788673102, "grad_norm": 1749.6431884765625, "learning_rate": 3.4240000000000004e-05, "loss": 163.8167, "step": 21400 }, { "epoch": 0.08649910915209864, "grad_norm": 730.6497802734375, "learning_rate": 3.4256000000000005e-05, "loss": 152.2738, "step": 21410 }, { "epoch": 0.08653951041746627, "grad_norm": 882.814208984375, "learning_rate": 3.4272e-05, "loss": 102.3541, "step": 21420 }, { "epoch": 0.08657991168283391, "grad_norm": 1553.72216796875, "learning_rate": 3.4288e-05, "loss": 101.4461, "step": 21430 }, { "epoch": 0.08662031294820154, "grad_norm": 1559.2188720703125, "learning_rate": 3.4304e-05, "loss": 153.7359, "step": 21440 }, { "epoch": 0.08666071421356916, "grad_norm": 684.1407470703125, "learning_rate": 3.4320000000000003e-05, "loss": 108.5924, "step": 21450 }, { "epoch": 0.0867011154789368, "grad_norm": 3555.82177734375, "learning_rate": 3.4336000000000005e-05, "loss": 123.4738, "step": 21460 }, { "epoch": 0.08674151674430443, "grad_norm": 1726.3648681640625, "learning_rate": 3.4352000000000006e-05, "loss": 167.0885, "step": 21470 }, { "epoch": 0.08678191800967207, "grad_norm": 1249.389404296875, "learning_rate": 3.4368e-05, "loss": 67.5671, "step": 21480 }, { "epoch": 0.0868223192750397, "grad_norm": 1911.92431640625, "learning_rate": 3.4384e-05, "loss": 163.4095, "step": 21490 }, { "epoch": 0.08686272054040732, "grad_norm": 1018.9912109375, "learning_rate": 3.44e-05, "loss": 91.9574, "step": 21500 }, { "epoch": 0.08690312180577496, "grad_norm": 675.4153442382812, "learning_rate": 3.4416000000000004e-05, "loss": 121.4217, "step": 21510 }, { "epoch": 0.08694352307114259, "grad_norm": 1455.37451171875, "learning_rate": 3.4432000000000005e-05, "loss": 106.3664, "step": 21520 }, { "epoch": 0.08698392433651021, "grad_norm": 1519.950927734375, "learning_rate": 3.4448e-05, "loss": 118.7567, "step": 21530 }, { "epoch": 0.08702432560187785, "grad_norm": 1685.112548828125, "learning_rate": 3.446400000000001e-05, "loss": 80.7093, "step": 21540 }, { "epoch": 0.08706472686724548, "grad_norm": 848.3253784179688, "learning_rate": 3.448e-05, "loss": 123.2339, "step": 21550 }, { "epoch": 0.08710512813261312, "grad_norm": 3590.259765625, "learning_rate": 3.4496e-05, "loss": 131.1099, "step": 21560 }, { "epoch": 0.08714552939798074, "grad_norm": 1122.024169921875, "learning_rate": 3.4512000000000004e-05, "loss": 78.0208, "step": 21570 }, { "epoch": 0.08718593066334837, "grad_norm": 483.8023376464844, "learning_rate": 3.4528e-05, "loss": 119.6661, "step": 21580 }, { "epoch": 0.08722633192871601, "grad_norm": 793.3316650390625, "learning_rate": 3.454400000000001e-05, "loss": 118.78, "step": 21590 }, { "epoch": 0.08726673319408364, "grad_norm": 1123.4295654296875, "learning_rate": 3.456e-05, "loss": 133.3474, "step": 21600 }, { "epoch": 0.08730713445945126, "grad_norm": 718.1134033203125, "learning_rate": 3.4576e-05, "loss": 96.3448, "step": 21610 }, { "epoch": 0.0873475357248189, "grad_norm": 997.1589965820312, "learning_rate": 3.4592000000000004e-05, "loss": 106.7913, "step": 21620 }, { "epoch": 0.08738793699018653, "grad_norm": 533.8572998046875, "learning_rate": 3.4608000000000005e-05, "loss": 175.2727, "step": 21630 }, { "epoch": 0.08742833825555417, "grad_norm": 1356.1878662109375, "learning_rate": 3.4624000000000006e-05, "loss": 93.2392, "step": 21640 }, { "epoch": 0.0874687395209218, "grad_norm": 1554.261962890625, "learning_rate": 3.464e-05, "loss": 122.4991, "step": 21650 }, { "epoch": 0.08750914078628942, "grad_norm": 905.5225219726562, "learning_rate": 3.4656e-05, "loss": 106.4889, "step": 21660 }, { "epoch": 0.08754954205165706, "grad_norm": 484.3202209472656, "learning_rate": 3.4672e-05, "loss": 119.5927, "step": 21670 }, { "epoch": 0.08758994331702469, "grad_norm": 1324.7713623046875, "learning_rate": 3.4688000000000004e-05, "loss": 88.9022, "step": 21680 }, { "epoch": 0.08763034458239231, "grad_norm": 863.8072509765625, "learning_rate": 3.4704000000000005e-05, "loss": 123.0851, "step": 21690 }, { "epoch": 0.08767074584775995, "grad_norm": 345.5253601074219, "learning_rate": 3.472e-05, "loss": 139.7284, "step": 21700 }, { "epoch": 0.08771114711312758, "grad_norm": 1755.52001953125, "learning_rate": 3.4736e-05, "loss": 127.2942, "step": 21710 }, { "epoch": 0.08775154837849522, "grad_norm": 959.5047607421875, "learning_rate": 3.4752e-05, "loss": 115.2856, "step": 21720 }, { "epoch": 0.08779194964386285, "grad_norm": 599.0006103515625, "learning_rate": 3.4768e-05, "loss": 86.4051, "step": 21730 }, { "epoch": 0.08783235090923047, "grad_norm": 1719.8614501953125, "learning_rate": 3.4784000000000004e-05, "loss": 84.124, "step": 21740 }, { "epoch": 0.08787275217459811, "grad_norm": 744.1102905273438, "learning_rate": 3.4800000000000006e-05, "loss": 106.3017, "step": 21750 }, { "epoch": 0.08791315343996574, "grad_norm": 1090.3604736328125, "learning_rate": 3.4816e-05, "loss": 81.3375, "step": 21760 }, { "epoch": 0.08795355470533336, "grad_norm": 570.5980834960938, "learning_rate": 3.4832e-05, "loss": 100.2312, "step": 21770 }, { "epoch": 0.087993955970701, "grad_norm": 591.20947265625, "learning_rate": 3.4848e-05, "loss": 74.5655, "step": 21780 }, { "epoch": 0.08803435723606863, "grad_norm": 675.690185546875, "learning_rate": 3.4864000000000004e-05, "loss": 120.0532, "step": 21790 }, { "epoch": 0.08807475850143627, "grad_norm": 1083.811767578125, "learning_rate": 3.4880000000000005e-05, "loss": 161.6993, "step": 21800 }, { "epoch": 0.0881151597668039, "grad_norm": 1041.786376953125, "learning_rate": 3.4896e-05, "loss": 131.3204, "step": 21810 }, { "epoch": 0.08815556103217152, "grad_norm": 1283.564453125, "learning_rate": 3.4912e-05, "loss": 79.6838, "step": 21820 }, { "epoch": 0.08819596229753916, "grad_norm": 2376.157958984375, "learning_rate": 3.4928e-05, "loss": 119.8821, "step": 21830 }, { "epoch": 0.08823636356290679, "grad_norm": 623.5429077148438, "learning_rate": 3.4944e-05, "loss": 116.1281, "step": 21840 }, { "epoch": 0.08827676482827441, "grad_norm": 770.1627807617188, "learning_rate": 3.4960000000000004e-05, "loss": 60.8863, "step": 21850 }, { "epoch": 0.08831716609364205, "grad_norm": 1002.646240234375, "learning_rate": 3.4976e-05, "loss": 113.2037, "step": 21860 }, { "epoch": 0.08835756735900968, "grad_norm": 1004.6346435546875, "learning_rate": 3.4992000000000006e-05, "loss": 110.553, "step": 21870 }, { "epoch": 0.08839796862437732, "grad_norm": 1741.873046875, "learning_rate": 3.5008e-05, "loss": 111.8646, "step": 21880 }, { "epoch": 0.08843836988974495, "grad_norm": 1661.6435546875, "learning_rate": 3.5024e-05, "loss": 86.3073, "step": 21890 }, { "epoch": 0.08847877115511257, "grad_norm": 940.521728515625, "learning_rate": 3.504e-05, "loss": 89.9938, "step": 21900 }, { "epoch": 0.08851917242048021, "grad_norm": 1818.4632568359375, "learning_rate": 3.5056e-05, "loss": 138.901, "step": 21910 }, { "epoch": 0.08855957368584784, "grad_norm": 4558.5595703125, "learning_rate": 3.5072000000000006e-05, "loss": 95.3903, "step": 21920 }, { "epoch": 0.08859997495121547, "grad_norm": 2346.676025390625, "learning_rate": 3.5088e-05, "loss": 102.4624, "step": 21930 }, { "epoch": 0.0886403762165831, "grad_norm": 784.5299682617188, "learning_rate": 3.5104e-05, "loss": 110.0189, "step": 21940 }, { "epoch": 0.08868077748195073, "grad_norm": 2423.447265625, "learning_rate": 3.512e-05, "loss": 102.8573, "step": 21950 }, { "epoch": 0.08872117874731837, "grad_norm": 1321.034912109375, "learning_rate": 3.5136000000000004e-05, "loss": 162.9776, "step": 21960 }, { "epoch": 0.088761580012686, "grad_norm": 644.175048828125, "learning_rate": 3.5152000000000005e-05, "loss": 151.2046, "step": 21970 }, { "epoch": 0.08880198127805362, "grad_norm": 769.9696044921875, "learning_rate": 3.5168e-05, "loss": 102.919, "step": 21980 }, { "epoch": 0.08884238254342126, "grad_norm": 0.0, "learning_rate": 3.518400000000001e-05, "loss": 84.6088, "step": 21990 }, { "epoch": 0.08888278380878889, "grad_norm": 617.4421997070312, "learning_rate": 3.52e-05, "loss": 109.1384, "step": 22000 }, { "epoch": 0.08892318507415652, "grad_norm": 853.436279296875, "learning_rate": 3.5216e-05, "loss": 104.1218, "step": 22010 }, { "epoch": 0.08896358633952416, "grad_norm": 4421.9814453125, "learning_rate": 3.5232000000000004e-05, "loss": 65.627, "step": 22020 }, { "epoch": 0.08900398760489178, "grad_norm": 1764.05224609375, "learning_rate": 3.5248000000000005e-05, "loss": 142.3967, "step": 22030 }, { "epoch": 0.08904438887025942, "grad_norm": 1308.7003173828125, "learning_rate": 3.5264000000000007e-05, "loss": 102.8396, "step": 22040 }, { "epoch": 0.08908479013562705, "grad_norm": 1276.386474609375, "learning_rate": 3.528e-05, "loss": 83.7799, "step": 22050 }, { "epoch": 0.08912519140099467, "grad_norm": 1775.8756103515625, "learning_rate": 3.5296e-05, "loss": 130.5492, "step": 22060 }, { "epoch": 0.08916559266636231, "grad_norm": 742.988525390625, "learning_rate": 3.5312000000000003e-05, "loss": 103.372, "step": 22070 }, { "epoch": 0.08920599393172994, "grad_norm": 4215.49609375, "learning_rate": 3.5328000000000005e-05, "loss": 142.1393, "step": 22080 }, { "epoch": 0.08924639519709757, "grad_norm": 2923.902099609375, "learning_rate": 3.5344000000000006e-05, "loss": 157.1858, "step": 22090 }, { "epoch": 0.0892867964624652, "grad_norm": 1210.239501953125, "learning_rate": 3.536e-05, "loss": 130.1175, "step": 22100 }, { "epoch": 0.08932719772783283, "grad_norm": 2710.9931640625, "learning_rate": 3.5376e-05, "loss": 128.473, "step": 22110 }, { "epoch": 0.08936759899320047, "grad_norm": 1313.605712890625, "learning_rate": 3.5392e-05, "loss": 105.5896, "step": 22120 }, { "epoch": 0.0894080002585681, "grad_norm": 1979.692626953125, "learning_rate": 3.5408000000000004e-05, "loss": 122.7433, "step": 22130 }, { "epoch": 0.08944840152393572, "grad_norm": 878.58642578125, "learning_rate": 3.5424000000000005e-05, "loss": 97.7734, "step": 22140 }, { "epoch": 0.08948880278930336, "grad_norm": 1813.173828125, "learning_rate": 3.5440000000000006e-05, "loss": 125.1185, "step": 22150 }, { "epoch": 0.08952920405467099, "grad_norm": 1052.124267578125, "learning_rate": 3.5456e-05, "loss": 80.2057, "step": 22160 }, { "epoch": 0.08956960532003862, "grad_norm": 1051.0555419921875, "learning_rate": 3.5472e-05, "loss": 117.2363, "step": 22170 }, { "epoch": 0.08961000658540626, "grad_norm": 567.1451416015625, "learning_rate": 3.5488e-05, "loss": 90.6433, "step": 22180 }, { "epoch": 0.08965040785077388, "grad_norm": 1285.5130615234375, "learning_rate": 3.5504e-05, "loss": 114.9006, "step": 22190 }, { "epoch": 0.08969080911614152, "grad_norm": 1761.4788818359375, "learning_rate": 3.5520000000000006e-05, "loss": 107.4548, "step": 22200 }, { "epoch": 0.08973121038150915, "grad_norm": 922.4437866210938, "learning_rate": 3.5536e-05, "loss": 118.1805, "step": 22210 }, { "epoch": 0.08977161164687678, "grad_norm": 1029.1673583984375, "learning_rate": 3.5552e-05, "loss": 168.9282, "step": 22220 }, { "epoch": 0.08981201291224442, "grad_norm": 438.4372863769531, "learning_rate": 3.5568e-05, "loss": 81.5482, "step": 22230 }, { "epoch": 0.08985241417761204, "grad_norm": 587.6930541992188, "learning_rate": 3.5584000000000004e-05, "loss": 147.9191, "step": 22240 }, { "epoch": 0.08989281544297967, "grad_norm": 1436.885498046875, "learning_rate": 3.5600000000000005e-05, "loss": 141.406, "step": 22250 }, { "epoch": 0.08993321670834731, "grad_norm": 835.5823974609375, "learning_rate": 3.5616e-05, "loss": 103.0261, "step": 22260 }, { "epoch": 0.08997361797371493, "grad_norm": 604.3268432617188, "learning_rate": 3.563200000000001e-05, "loss": 114.0826, "step": 22270 }, { "epoch": 0.09001401923908257, "grad_norm": 694.15869140625, "learning_rate": 3.5648e-05, "loss": 81.4152, "step": 22280 }, { "epoch": 0.0900544205044502, "grad_norm": 412.0992736816406, "learning_rate": 3.5664e-05, "loss": 139.5334, "step": 22290 }, { "epoch": 0.09009482176981783, "grad_norm": 1818.29296875, "learning_rate": 3.5680000000000004e-05, "loss": 155.0708, "step": 22300 }, { "epoch": 0.09013522303518547, "grad_norm": 635.8440551757812, "learning_rate": 3.5696e-05, "loss": 100.3289, "step": 22310 }, { "epoch": 0.09017562430055309, "grad_norm": 358.85845947265625, "learning_rate": 3.5712000000000006e-05, "loss": 153.7336, "step": 22320 }, { "epoch": 0.09021602556592072, "grad_norm": 1078.0045166015625, "learning_rate": 3.5728e-05, "loss": 104.7138, "step": 22330 }, { "epoch": 0.09025642683128836, "grad_norm": 2302.094482421875, "learning_rate": 3.5744e-05, "loss": 104.76, "step": 22340 }, { "epoch": 0.09029682809665598, "grad_norm": 545.4147338867188, "learning_rate": 3.576e-05, "loss": 107.9992, "step": 22350 }, { "epoch": 0.09033722936202362, "grad_norm": 1128.086669921875, "learning_rate": 3.5776000000000004e-05, "loss": 95.0454, "step": 22360 }, { "epoch": 0.09037763062739125, "grad_norm": 1193.4002685546875, "learning_rate": 3.5792000000000006e-05, "loss": 112.622, "step": 22370 }, { "epoch": 0.09041803189275888, "grad_norm": 939.8592529296875, "learning_rate": 3.5808e-05, "loss": 104.9512, "step": 22380 }, { "epoch": 0.09045843315812652, "grad_norm": 720.6575317382812, "learning_rate": 3.5824e-05, "loss": 139.2956, "step": 22390 }, { "epoch": 0.09049883442349414, "grad_norm": 941.1958618164062, "learning_rate": 3.584e-05, "loss": 99.4069, "step": 22400 }, { "epoch": 0.09053923568886177, "grad_norm": 1065.10498046875, "learning_rate": 3.5856000000000004e-05, "loss": 107.6034, "step": 22410 }, { "epoch": 0.09057963695422941, "grad_norm": 668.1845092773438, "learning_rate": 3.5872000000000005e-05, "loss": 113.4124, "step": 22420 }, { "epoch": 0.09062003821959703, "grad_norm": 792.0282592773438, "learning_rate": 3.5888000000000006e-05, "loss": 110.8783, "step": 22430 }, { "epoch": 0.09066043948496467, "grad_norm": 0.0, "learning_rate": 3.5904e-05, "loss": 85.3758, "step": 22440 }, { "epoch": 0.0907008407503323, "grad_norm": 1627.4656982421875, "learning_rate": 3.592e-05, "loss": 135.8512, "step": 22450 }, { "epoch": 0.09074124201569993, "grad_norm": 17669.587890625, "learning_rate": 3.5936e-05, "loss": 167.7172, "step": 22460 }, { "epoch": 0.09078164328106757, "grad_norm": 830.4048461914062, "learning_rate": 3.5952000000000004e-05, "loss": 128.9894, "step": 22470 }, { "epoch": 0.0908220445464352, "grad_norm": 609.1981811523438, "learning_rate": 3.5968000000000005e-05, "loss": 131.7272, "step": 22480 }, { "epoch": 0.09086244581180282, "grad_norm": 829.3265991210938, "learning_rate": 3.5984e-05, "loss": 121.7255, "step": 22490 }, { "epoch": 0.09090284707717046, "grad_norm": 1340.07958984375, "learning_rate": 3.6e-05, "loss": 79.694, "step": 22500 }, { "epoch": 0.09094324834253809, "grad_norm": 1699.5284423828125, "learning_rate": 3.6016e-05, "loss": 96.8572, "step": 22510 }, { "epoch": 0.09098364960790573, "grad_norm": 1204.338623046875, "learning_rate": 3.6032e-05, "loss": 91.7086, "step": 22520 }, { "epoch": 0.09102405087327335, "grad_norm": 1220.1192626953125, "learning_rate": 3.6048000000000005e-05, "loss": 92.4644, "step": 22530 }, { "epoch": 0.09106445213864098, "grad_norm": 1748.9383544921875, "learning_rate": 3.6064e-05, "loss": 150.2315, "step": 22540 }, { "epoch": 0.09110485340400862, "grad_norm": 1452.05078125, "learning_rate": 3.608000000000001e-05, "loss": 85.5033, "step": 22550 }, { "epoch": 0.09114525466937624, "grad_norm": 678.501953125, "learning_rate": 3.6096e-05, "loss": 130.9142, "step": 22560 }, { "epoch": 0.09118565593474387, "grad_norm": 565.4130859375, "learning_rate": 3.6112e-05, "loss": 150.4763, "step": 22570 }, { "epoch": 0.09122605720011151, "grad_norm": 769.1754760742188, "learning_rate": 3.6128000000000004e-05, "loss": 125.7674, "step": 22580 }, { "epoch": 0.09126645846547914, "grad_norm": 881.0234375, "learning_rate": 3.6144e-05, "loss": 83.7712, "step": 22590 }, { "epoch": 0.09130685973084678, "grad_norm": 622.296142578125, "learning_rate": 3.6160000000000006e-05, "loss": 148.5823, "step": 22600 }, { "epoch": 0.0913472609962144, "grad_norm": 983.2738647460938, "learning_rate": 3.6176e-05, "loss": 107.8446, "step": 22610 }, { "epoch": 0.09138766226158203, "grad_norm": 887.962890625, "learning_rate": 3.6192e-05, "loss": 121.2446, "step": 22620 }, { "epoch": 0.09142806352694967, "grad_norm": 903.2112426757812, "learning_rate": 3.6208e-05, "loss": 122.9406, "step": 22630 }, { "epoch": 0.0914684647923173, "grad_norm": 401.5851135253906, "learning_rate": 3.6224000000000004e-05, "loss": 99.0492, "step": 22640 }, { "epoch": 0.09150886605768492, "grad_norm": 553.1171875, "learning_rate": 3.6240000000000005e-05, "loss": 122.498, "step": 22650 }, { "epoch": 0.09154926732305256, "grad_norm": 4576.8271484375, "learning_rate": 3.6256e-05, "loss": 138.7712, "step": 22660 }, { "epoch": 0.09158966858842019, "grad_norm": 603.9403686523438, "learning_rate": 3.627200000000001e-05, "loss": 124.1618, "step": 22670 }, { "epoch": 0.09163006985378783, "grad_norm": 1222.1373291015625, "learning_rate": 3.6288e-05, "loss": 108.6959, "step": 22680 }, { "epoch": 0.09167047111915545, "grad_norm": 766.6356201171875, "learning_rate": 3.6304000000000003e-05, "loss": 90.1852, "step": 22690 }, { "epoch": 0.09171087238452308, "grad_norm": 777.71240234375, "learning_rate": 3.6320000000000005e-05, "loss": 137.761, "step": 22700 }, { "epoch": 0.09175127364989072, "grad_norm": 0.0, "learning_rate": 3.6336e-05, "loss": 102.6547, "step": 22710 }, { "epoch": 0.09179167491525834, "grad_norm": 566.0398559570312, "learning_rate": 3.635200000000001e-05, "loss": 85.4628, "step": 22720 }, { "epoch": 0.09183207618062597, "grad_norm": 1419.7137451171875, "learning_rate": 3.6368e-05, "loss": 137.7339, "step": 22730 }, { "epoch": 0.09187247744599361, "grad_norm": 851.448486328125, "learning_rate": 3.6384e-05, "loss": 98.0729, "step": 22740 }, { "epoch": 0.09191287871136124, "grad_norm": 724.6092529296875, "learning_rate": 3.6400000000000004e-05, "loss": 54.0254, "step": 22750 }, { "epoch": 0.09195327997672888, "grad_norm": 1501.339599609375, "learning_rate": 3.6416000000000005e-05, "loss": 153.3385, "step": 22760 }, { "epoch": 0.0919936812420965, "grad_norm": 905.32177734375, "learning_rate": 3.6432000000000006e-05, "loss": 117.4171, "step": 22770 }, { "epoch": 0.09203408250746413, "grad_norm": 2430.184326171875, "learning_rate": 3.6448e-05, "loss": 111.6443, "step": 22780 }, { "epoch": 0.09207448377283177, "grad_norm": 992.3556518554688, "learning_rate": 3.6464e-05, "loss": 71.0798, "step": 22790 }, { "epoch": 0.0921148850381994, "grad_norm": 1678.62744140625, "learning_rate": 3.648e-05, "loss": 109.5951, "step": 22800 }, { "epoch": 0.09215528630356702, "grad_norm": 1451.3145751953125, "learning_rate": 3.6496000000000004e-05, "loss": 149.5164, "step": 22810 }, { "epoch": 0.09219568756893466, "grad_norm": 3507.716552734375, "learning_rate": 3.6512e-05, "loss": 90.6843, "step": 22820 }, { "epoch": 0.09223608883430229, "grad_norm": 825.1586303710938, "learning_rate": 3.652800000000001e-05, "loss": 92.9351, "step": 22830 }, { "epoch": 0.09227649009966993, "grad_norm": 1350.14306640625, "learning_rate": 3.6544e-05, "loss": 131.2104, "step": 22840 }, { "epoch": 0.09231689136503755, "grad_norm": 558.4495849609375, "learning_rate": 3.656e-05, "loss": 106.5891, "step": 22850 }, { "epoch": 0.09235729263040518, "grad_norm": 1416.712158203125, "learning_rate": 3.6576000000000004e-05, "loss": 134.2764, "step": 22860 }, { "epoch": 0.09239769389577282, "grad_norm": 883.6400756835938, "learning_rate": 3.6592e-05, "loss": 94.5502, "step": 22870 }, { "epoch": 0.09243809516114045, "grad_norm": 589.9857788085938, "learning_rate": 3.6608000000000006e-05, "loss": 65.831, "step": 22880 }, { "epoch": 0.09247849642650807, "grad_norm": 2002.652587890625, "learning_rate": 3.6624e-05, "loss": 136.2, "step": 22890 }, { "epoch": 0.09251889769187571, "grad_norm": 753.989501953125, "learning_rate": 3.664e-05, "loss": 104.6583, "step": 22900 }, { "epoch": 0.09255929895724334, "grad_norm": 615.0721435546875, "learning_rate": 3.6656e-05, "loss": 146.1161, "step": 22910 }, { "epoch": 0.09259970022261098, "grad_norm": 1668.8343505859375, "learning_rate": 3.6672000000000004e-05, "loss": 139.9676, "step": 22920 }, { "epoch": 0.0926401014879786, "grad_norm": 883.7720336914062, "learning_rate": 3.6688000000000005e-05, "loss": 132.2069, "step": 22930 }, { "epoch": 0.09268050275334623, "grad_norm": 5342.50244140625, "learning_rate": 3.6704e-05, "loss": 98.5303, "step": 22940 }, { "epoch": 0.09272090401871387, "grad_norm": 888.3287963867188, "learning_rate": 3.672000000000001e-05, "loss": 97.6094, "step": 22950 }, { "epoch": 0.0927613052840815, "grad_norm": 519.9688110351562, "learning_rate": 3.6736e-05, "loss": 110.704, "step": 22960 }, { "epoch": 0.09280170654944912, "grad_norm": 851.119140625, "learning_rate": 3.6752e-05, "loss": 122.4055, "step": 22970 }, { "epoch": 0.09284210781481676, "grad_norm": 1095.8951416015625, "learning_rate": 3.6768000000000004e-05, "loss": 85.4359, "step": 22980 }, { "epoch": 0.09288250908018439, "grad_norm": 1443.4359130859375, "learning_rate": 3.6784e-05, "loss": 105.7661, "step": 22990 }, { "epoch": 0.09292291034555203, "grad_norm": 1374.320068359375, "learning_rate": 3.680000000000001e-05, "loss": 84.9598, "step": 23000 }, { "epoch": 0.09296331161091966, "grad_norm": 1480.087890625, "learning_rate": 3.6816e-05, "loss": 86.5958, "step": 23010 }, { "epoch": 0.09300371287628728, "grad_norm": 1209.684326171875, "learning_rate": 3.6832e-05, "loss": 67.3631, "step": 23020 }, { "epoch": 0.09304411414165492, "grad_norm": 815.2809448242188, "learning_rate": 3.6848000000000004e-05, "loss": 140.1031, "step": 23030 }, { "epoch": 0.09308451540702255, "grad_norm": 1305.93994140625, "learning_rate": 3.6864000000000005e-05, "loss": 116.6758, "step": 23040 }, { "epoch": 0.09312491667239017, "grad_norm": 599.248046875, "learning_rate": 3.6880000000000006e-05, "loss": 282.2803, "step": 23050 }, { "epoch": 0.09316531793775781, "grad_norm": 695.9918823242188, "learning_rate": 3.6896e-05, "loss": 99.9409, "step": 23060 }, { "epoch": 0.09320571920312544, "grad_norm": 4307.1064453125, "learning_rate": 3.6912e-05, "loss": 114.5274, "step": 23070 }, { "epoch": 0.09324612046849308, "grad_norm": 1717.5181884765625, "learning_rate": 3.6928e-05, "loss": 120.5985, "step": 23080 }, { "epoch": 0.0932865217338607, "grad_norm": 1048.98095703125, "learning_rate": 3.6944000000000004e-05, "loss": 144.147, "step": 23090 }, { "epoch": 0.09332692299922833, "grad_norm": 451.38031005859375, "learning_rate": 3.6960000000000005e-05, "loss": 113.492, "step": 23100 }, { "epoch": 0.09336732426459597, "grad_norm": 307.1217346191406, "learning_rate": 3.6976e-05, "loss": 112.2755, "step": 23110 }, { "epoch": 0.0934077255299636, "grad_norm": 532.1742553710938, "learning_rate": 3.6992e-05, "loss": 81.0992, "step": 23120 }, { "epoch": 0.09344812679533122, "grad_norm": 1730.8272705078125, "learning_rate": 3.7008e-05, "loss": 133.7006, "step": 23130 }, { "epoch": 0.09348852806069886, "grad_norm": 671.4747314453125, "learning_rate": 3.7024e-05, "loss": 84.7934, "step": 23140 }, { "epoch": 0.09352892932606649, "grad_norm": 2103.27880859375, "learning_rate": 3.7040000000000005e-05, "loss": 173.953, "step": 23150 }, { "epoch": 0.09356933059143413, "grad_norm": 3488.06787109375, "learning_rate": 3.7056000000000006e-05, "loss": 128.9973, "step": 23160 }, { "epoch": 0.09360973185680176, "grad_norm": 819.060791015625, "learning_rate": 3.7072e-05, "loss": 62.6777, "step": 23170 }, { "epoch": 0.09365013312216938, "grad_norm": 905.0873413085938, "learning_rate": 3.7088e-05, "loss": 148.4846, "step": 23180 }, { "epoch": 0.09369053438753702, "grad_norm": 987.261474609375, "learning_rate": 3.7104e-05, "loss": 132.8778, "step": 23190 }, { "epoch": 0.09373093565290465, "grad_norm": 1234.3077392578125, "learning_rate": 3.7120000000000004e-05, "loss": 124.4064, "step": 23200 }, { "epoch": 0.09377133691827227, "grad_norm": 1419.2791748046875, "learning_rate": 3.7136000000000005e-05, "loss": 126.3721, "step": 23210 }, { "epoch": 0.09381173818363991, "grad_norm": 917.65771484375, "learning_rate": 3.7152e-05, "loss": 130.0472, "step": 23220 }, { "epoch": 0.09385213944900754, "grad_norm": 1203.3936767578125, "learning_rate": 3.716800000000001e-05, "loss": 180.4214, "step": 23230 }, { "epoch": 0.09389254071437518, "grad_norm": 631.2800903320312, "learning_rate": 3.7184e-05, "loss": 77.2428, "step": 23240 }, { "epoch": 0.0939329419797428, "grad_norm": 1260.2119140625, "learning_rate": 3.72e-05, "loss": 61.4884, "step": 23250 }, { "epoch": 0.09397334324511043, "grad_norm": 515.2831420898438, "learning_rate": 3.7216000000000004e-05, "loss": 105.8688, "step": 23260 }, { "epoch": 0.09401374451047807, "grad_norm": 374.6453857421875, "learning_rate": 3.7232e-05, "loss": 62.7642, "step": 23270 }, { "epoch": 0.0940541457758457, "grad_norm": 790.422607421875, "learning_rate": 3.7248000000000007e-05, "loss": 103.0694, "step": 23280 }, { "epoch": 0.09409454704121333, "grad_norm": 442.69647216796875, "learning_rate": 3.7264e-05, "loss": 73.2971, "step": 23290 }, { "epoch": 0.09413494830658097, "grad_norm": 1085.193115234375, "learning_rate": 3.728e-05, "loss": 98.9435, "step": 23300 }, { "epoch": 0.09417534957194859, "grad_norm": 536.7901000976562, "learning_rate": 3.7296000000000003e-05, "loss": 80.0706, "step": 23310 }, { "epoch": 0.09421575083731623, "grad_norm": 1735.353759765625, "learning_rate": 3.7312000000000005e-05, "loss": 116.9445, "step": 23320 }, { "epoch": 0.09425615210268386, "grad_norm": 1051.9068603515625, "learning_rate": 3.7328000000000006e-05, "loss": 107.8367, "step": 23330 }, { "epoch": 0.09429655336805148, "grad_norm": 678.4794921875, "learning_rate": 3.7344e-05, "loss": 101.073, "step": 23340 }, { "epoch": 0.09433695463341912, "grad_norm": 1435.1505126953125, "learning_rate": 3.736000000000001e-05, "loss": 91.6877, "step": 23350 }, { "epoch": 0.09437735589878675, "grad_norm": 940.3088989257812, "learning_rate": 3.7376e-05, "loss": 114.8032, "step": 23360 }, { "epoch": 0.09441775716415438, "grad_norm": 740.4794311523438, "learning_rate": 3.7392000000000004e-05, "loss": 86.9696, "step": 23370 }, { "epoch": 0.09445815842952202, "grad_norm": 2469.025634765625, "learning_rate": 3.7408000000000005e-05, "loss": 120.204, "step": 23380 }, { "epoch": 0.09449855969488964, "grad_norm": 856.591796875, "learning_rate": 3.7424e-05, "loss": 62.1455, "step": 23390 }, { "epoch": 0.09453896096025728, "grad_norm": 1139.650390625, "learning_rate": 3.744000000000001e-05, "loss": 174.0881, "step": 23400 }, { "epoch": 0.09457936222562491, "grad_norm": 1099.4644775390625, "learning_rate": 3.7456e-05, "loss": 116.2295, "step": 23410 }, { "epoch": 0.09461976349099253, "grad_norm": 955.2962646484375, "learning_rate": 3.7472e-05, "loss": 94.5716, "step": 23420 }, { "epoch": 0.09466016475636017, "grad_norm": 960.6361694335938, "learning_rate": 3.7488000000000004e-05, "loss": 73.6774, "step": 23430 }, { "epoch": 0.0947005660217278, "grad_norm": 728.4270629882812, "learning_rate": 3.7504000000000005e-05, "loss": 110.0601, "step": 23440 }, { "epoch": 0.09474096728709543, "grad_norm": 879.024658203125, "learning_rate": 3.752e-05, "loss": 126.9347, "step": 23450 }, { "epoch": 0.09478136855246307, "grad_norm": 607.6163940429688, "learning_rate": 3.7536e-05, "loss": 66.8819, "step": 23460 }, { "epoch": 0.09482176981783069, "grad_norm": 1249.79931640625, "learning_rate": 3.7552e-05, "loss": 109.3968, "step": 23470 }, { "epoch": 0.09486217108319833, "grad_norm": 820.2130126953125, "learning_rate": 3.7568000000000004e-05, "loss": 77.7926, "step": 23480 }, { "epoch": 0.09490257234856596, "grad_norm": 2280.809326171875, "learning_rate": 3.7584000000000005e-05, "loss": 138.4351, "step": 23490 }, { "epoch": 0.09494297361393358, "grad_norm": 1270.8955078125, "learning_rate": 3.76e-05, "loss": 141.0211, "step": 23500 }, { "epoch": 0.09498337487930122, "grad_norm": 768.01953125, "learning_rate": 3.7616e-05, "loss": 99.7928, "step": 23510 }, { "epoch": 0.09502377614466885, "grad_norm": 1329.7403564453125, "learning_rate": 3.7632e-05, "loss": 117.8346, "step": 23520 }, { "epoch": 0.09506417741003648, "grad_norm": 1108.1868896484375, "learning_rate": 3.7648e-05, "loss": 108.9952, "step": 23530 }, { "epoch": 0.09510457867540412, "grad_norm": 1093.89794921875, "learning_rate": 3.7664000000000004e-05, "loss": 86.9773, "step": 23540 }, { "epoch": 0.09514497994077174, "grad_norm": 1512.74951171875, "learning_rate": 3.768e-05, "loss": 117.1909, "step": 23550 }, { "epoch": 0.09518538120613938, "grad_norm": 701.7789916992188, "learning_rate": 3.7696000000000006e-05, "loss": 109.6943, "step": 23560 }, { "epoch": 0.09522578247150701, "grad_norm": 2115.947265625, "learning_rate": 3.7712e-05, "loss": 105.5409, "step": 23570 }, { "epoch": 0.09526618373687464, "grad_norm": 467.70703125, "learning_rate": 3.7728e-05, "loss": 79.5947, "step": 23580 }, { "epoch": 0.09530658500224228, "grad_norm": 736.294921875, "learning_rate": 3.7744e-05, "loss": 70.7979, "step": 23590 }, { "epoch": 0.0953469862676099, "grad_norm": 1899.92529296875, "learning_rate": 3.7760000000000004e-05, "loss": 131.5578, "step": 23600 }, { "epoch": 0.09538738753297753, "grad_norm": 848.14453125, "learning_rate": 3.7776000000000006e-05, "loss": 96.7246, "step": 23610 }, { "epoch": 0.09542778879834517, "grad_norm": 1070.447509765625, "learning_rate": 3.7792e-05, "loss": 119.4036, "step": 23620 }, { "epoch": 0.0954681900637128, "grad_norm": 1277.6422119140625, "learning_rate": 3.780800000000001e-05, "loss": 168.7875, "step": 23630 }, { "epoch": 0.09550859132908043, "grad_norm": 1350.6763916015625, "learning_rate": 3.7824e-05, "loss": 110.022, "step": 23640 }, { "epoch": 0.09554899259444806, "grad_norm": 2100.522705078125, "learning_rate": 3.7840000000000004e-05, "loss": 166.9165, "step": 23650 }, { "epoch": 0.09558939385981569, "grad_norm": 632.5545043945312, "learning_rate": 3.7856000000000005e-05, "loss": 102.5553, "step": 23660 }, { "epoch": 0.09562979512518333, "grad_norm": 1013.6865844726562, "learning_rate": 3.7872e-05, "loss": 115.3168, "step": 23670 }, { "epoch": 0.09567019639055095, "grad_norm": 834.7869262695312, "learning_rate": 3.788800000000001e-05, "loss": 154.9165, "step": 23680 }, { "epoch": 0.09571059765591858, "grad_norm": 854.434814453125, "learning_rate": 3.7904e-05, "loss": 92.1741, "step": 23690 }, { "epoch": 0.09575099892128622, "grad_norm": 899.4342041015625, "learning_rate": 3.792e-05, "loss": 121.1676, "step": 23700 }, { "epoch": 0.09579140018665384, "grad_norm": 489.4569091796875, "learning_rate": 3.7936000000000004e-05, "loss": 114.8512, "step": 23710 }, { "epoch": 0.09583180145202148, "grad_norm": 1061.2698974609375, "learning_rate": 3.7952000000000005e-05, "loss": 104.255, "step": 23720 }, { "epoch": 0.09587220271738911, "grad_norm": 2563.587158203125, "learning_rate": 3.7968000000000006e-05, "loss": 97.8892, "step": 23730 }, { "epoch": 0.09591260398275674, "grad_norm": 1684.666015625, "learning_rate": 3.7984e-05, "loss": 142.1081, "step": 23740 }, { "epoch": 0.09595300524812438, "grad_norm": 1298.2633056640625, "learning_rate": 3.8e-05, "loss": 108.3703, "step": 23750 }, { "epoch": 0.095993406513492, "grad_norm": 639.5582275390625, "learning_rate": 3.8016e-05, "loss": 103.8809, "step": 23760 }, { "epoch": 0.09603380777885963, "grad_norm": 884.7063598632812, "learning_rate": 3.8032000000000004e-05, "loss": 142.9607, "step": 23770 }, { "epoch": 0.09607420904422727, "grad_norm": 835.0308837890625, "learning_rate": 3.8048000000000006e-05, "loss": 98.5731, "step": 23780 }, { "epoch": 0.0961146103095949, "grad_norm": 1084.726806640625, "learning_rate": 3.8064e-05, "loss": 135.3973, "step": 23790 }, { "epoch": 0.09615501157496253, "grad_norm": 684.0401000976562, "learning_rate": 3.808e-05, "loss": 119.6357, "step": 23800 }, { "epoch": 0.09619541284033016, "grad_norm": 546.32421875, "learning_rate": 3.8096e-05, "loss": 89.1974, "step": 23810 }, { "epoch": 0.09623581410569779, "grad_norm": 661.587646484375, "learning_rate": 3.8112000000000004e-05, "loss": 111.5153, "step": 23820 }, { "epoch": 0.09627621537106543, "grad_norm": 1184.2430419921875, "learning_rate": 3.8128000000000005e-05, "loss": 151.34, "step": 23830 }, { "epoch": 0.09631661663643305, "grad_norm": 694.1065673828125, "learning_rate": 3.8144000000000006e-05, "loss": 130.5082, "step": 23840 }, { "epoch": 0.09635701790180068, "grad_norm": 1578.1820068359375, "learning_rate": 3.816e-05, "loss": 78.125, "step": 23850 }, { "epoch": 0.09639741916716832, "grad_norm": 810.7494506835938, "learning_rate": 3.8176e-05, "loss": 115.6929, "step": 23860 }, { "epoch": 0.09643782043253595, "grad_norm": 806.3323364257812, "learning_rate": 3.8192e-05, "loss": 88.014, "step": 23870 }, { "epoch": 0.09647822169790359, "grad_norm": 677.9561157226562, "learning_rate": 3.8208000000000004e-05, "loss": 92.2052, "step": 23880 }, { "epoch": 0.09651862296327121, "grad_norm": 843.2612915039062, "learning_rate": 3.8224000000000005e-05, "loss": 117.2728, "step": 23890 }, { "epoch": 0.09655902422863884, "grad_norm": 560.440673828125, "learning_rate": 3.824e-05, "loss": 156.6519, "step": 23900 }, { "epoch": 0.09659942549400648, "grad_norm": 2791.057373046875, "learning_rate": 3.8256e-05, "loss": 160.5463, "step": 23910 }, { "epoch": 0.0966398267593741, "grad_norm": 1825.56494140625, "learning_rate": 3.8272e-05, "loss": 76.3217, "step": 23920 }, { "epoch": 0.09668022802474173, "grad_norm": 756.5723876953125, "learning_rate": 3.8288000000000003e-05, "loss": 114.9041, "step": 23930 }, { "epoch": 0.09672062929010937, "grad_norm": 735.3499755859375, "learning_rate": 3.8304000000000005e-05, "loss": 125.154, "step": 23940 }, { "epoch": 0.096761030555477, "grad_norm": 1043.668701171875, "learning_rate": 3.832e-05, "loss": 118.3253, "step": 23950 }, { "epoch": 0.09680143182084464, "grad_norm": 694.8841552734375, "learning_rate": 3.833600000000001e-05, "loss": 103.1498, "step": 23960 }, { "epoch": 0.09684183308621226, "grad_norm": 1475.5992431640625, "learning_rate": 3.8352e-05, "loss": 121.2389, "step": 23970 }, { "epoch": 0.09688223435157989, "grad_norm": 506.0126953125, "learning_rate": 3.8368e-05, "loss": 80.8196, "step": 23980 }, { "epoch": 0.09692263561694753, "grad_norm": 885.1693115234375, "learning_rate": 3.8384000000000004e-05, "loss": 104.6312, "step": 23990 }, { "epoch": 0.09696303688231515, "grad_norm": 1443.1024169921875, "learning_rate": 3.8400000000000005e-05, "loss": 134.2576, "step": 24000 }, { "epoch": 0.09700343814768278, "grad_norm": 1300.169921875, "learning_rate": 3.8416000000000006e-05, "loss": 136.2243, "step": 24010 }, { "epoch": 0.09704383941305042, "grad_norm": 606.1035766601562, "learning_rate": 3.8432e-05, "loss": 129.7206, "step": 24020 }, { "epoch": 0.09708424067841805, "grad_norm": 1067.2373046875, "learning_rate": 3.844800000000001e-05, "loss": 117.3039, "step": 24030 }, { "epoch": 0.09712464194378569, "grad_norm": 827.10302734375, "learning_rate": 3.8464e-05, "loss": 133.863, "step": 24040 }, { "epoch": 0.09716504320915331, "grad_norm": 1323.8310546875, "learning_rate": 3.8480000000000004e-05, "loss": 97.7097, "step": 24050 }, { "epoch": 0.09720544447452094, "grad_norm": 694.2848510742188, "learning_rate": 3.8496000000000005e-05, "loss": 98.1374, "step": 24060 }, { "epoch": 0.09724584573988858, "grad_norm": 389.7241516113281, "learning_rate": 3.8512e-05, "loss": 56.3713, "step": 24070 }, { "epoch": 0.0972862470052562, "grad_norm": 1979.0894775390625, "learning_rate": 3.8528e-05, "loss": 135.2374, "step": 24080 }, { "epoch": 0.09732664827062383, "grad_norm": 920.7020263671875, "learning_rate": 3.8544e-05, "loss": 84.8128, "step": 24090 }, { "epoch": 0.09736704953599147, "grad_norm": 650.2858276367188, "learning_rate": 3.8560000000000004e-05, "loss": 124.8343, "step": 24100 }, { "epoch": 0.0974074508013591, "grad_norm": 748.7391357421875, "learning_rate": 3.8576000000000005e-05, "loss": 105.6386, "step": 24110 }, { "epoch": 0.09744785206672674, "grad_norm": 1407.361328125, "learning_rate": 3.8592000000000006e-05, "loss": 88.1447, "step": 24120 }, { "epoch": 0.09748825333209436, "grad_norm": 823.1781616210938, "learning_rate": 3.8608e-05, "loss": 94.2701, "step": 24130 }, { "epoch": 0.09752865459746199, "grad_norm": 1753.80712890625, "learning_rate": 3.8624e-05, "loss": 112.8359, "step": 24140 }, { "epoch": 0.09756905586282963, "grad_norm": 1930.0491943359375, "learning_rate": 3.864e-05, "loss": 108.6234, "step": 24150 }, { "epoch": 0.09760945712819726, "grad_norm": 720.0437622070312, "learning_rate": 3.8656000000000004e-05, "loss": 91.7267, "step": 24160 }, { "epoch": 0.09764985839356488, "grad_norm": 1543.5885009765625, "learning_rate": 3.8672000000000005e-05, "loss": 86.1541, "step": 24170 }, { "epoch": 0.09769025965893252, "grad_norm": 1376.3941650390625, "learning_rate": 3.8688e-05, "loss": 139.0348, "step": 24180 }, { "epoch": 0.09773066092430015, "grad_norm": 1557.67919921875, "learning_rate": 3.8704e-05, "loss": 105.9557, "step": 24190 }, { "epoch": 0.09777106218966779, "grad_norm": 1324.2451171875, "learning_rate": 3.872e-05, "loss": 102.7443, "step": 24200 }, { "epoch": 0.09781146345503541, "grad_norm": 921.9620361328125, "learning_rate": 3.8736e-05, "loss": 143.7736, "step": 24210 }, { "epoch": 0.09785186472040304, "grad_norm": 738.0506591796875, "learning_rate": 3.8752000000000004e-05, "loss": 104.4059, "step": 24220 }, { "epoch": 0.09789226598577068, "grad_norm": 3766.658447265625, "learning_rate": 3.8768e-05, "loss": 158.2326, "step": 24230 }, { "epoch": 0.0979326672511383, "grad_norm": 653.2822875976562, "learning_rate": 3.878400000000001e-05, "loss": 81.5535, "step": 24240 }, { "epoch": 0.09797306851650593, "grad_norm": 550.072265625, "learning_rate": 3.88e-05, "loss": 168.4727, "step": 24250 }, { "epoch": 0.09801346978187357, "grad_norm": 1663.3287353515625, "learning_rate": 3.8816e-05, "loss": 155.4968, "step": 24260 }, { "epoch": 0.0980538710472412, "grad_norm": 2497.78466796875, "learning_rate": 3.8832000000000004e-05, "loss": 90.2777, "step": 24270 }, { "epoch": 0.09809427231260884, "grad_norm": 3979.058837890625, "learning_rate": 3.8848e-05, "loss": 113.6011, "step": 24280 }, { "epoch": 0.09813467357797646, "grad_norm": 1241.6263427734375, "learning_rate": 3.8864000000000006e-05, "loss": 99.367, "step": 24290 }, { "epoch": 0.09817507484334409, "grad_norm": 650.6756591796875, "learning_rate": 3.888e-05, "loss": 86.7288, "step": 24300 }, { "epoch": 0.09821547610871173, "grad_norm": 764.9371948242188, "learning_rate": 3.8896e-05, "loss": 113.2154, "step": 24310 }, { "epoch": 0.09825587737407936, "grad_norm": 787.2350463867188, "learning_rate": 3.8912e-05, "loss": 99.8835, "step": 24320 }, { "epoch": 0.09829627863944698, "grad_norm": 998.755126953125, "learning_rate": 3.8928000000000004e-05, "loss": 94.1024, "step": 24330 }, { "epoch": 0.09833667990481462, "grad_norm": 388.8599853515625, "learning_rate": 3.8944000000000005e-05, "loss": 61.2629, "step": 24340 }, { "epoch": 0.09837708117018225, "grad_norm": 525.0775756835938, "learning_rate": 3.896e-05, "loss": 79.6292, "step": 24350 }, { "epoch": 0.09841748243554987, "grad_norm": 1979.02294921875, "learning_rate": 3.897600000000001e-05, "loss": 157.633, "step": 24360 }, { "epoch": 0.09845788370091751, "grad_norm": 598.3854370117188, "learning_rate": 3.8992e-05, "loss": 96.9426, "step": 24370 }, { "epoch": 0.09849828496628514, "grad_norm": 1230.67724609375, "learning_rate": 3.9008e-05, "loss": 129.7387, "step": 24380 }, { "epoch": 0.09853868623165278, "grad_norm": 1653.3104248046875, "learning_rate": 3.9024000000000004e-05, "loss": 131.7615, "step": 24390 }, { "epoch": 0.0985790874970204, "grad_norm": 907.8214111328125, "learning_rate": 3.9040000000000006e-05, "loss": 127.1972, "step": 24400 }, { "epoch": 0.09861948876238803, "grad_norm": 3581.931640625, "learning_rate": 3.905600000000001e-05, "loss": 154.1143, "step": 24410 }, { "epoch": 0.09865989002775567, "grad_norm": 1160.47119140625, "learning_rate": 3.9072e-05, "loss": 133.1434, "step": 24420 }, { "epoch": 0.0987002912931233, "grad_norm": 890.6558227539062, "learning_rate": 3.9088e-05, "loss": 78.0879, "step": 24430 }, { "epoch": 0.09874069255849093, "grad_norm": 744.0023803710938, "learning_rate": 3.9104000000000004e-05, "loss": 158.2902, "step": 24440 }, { "epoch": 0.09878109382385857, "grad_norm": 1092.13671875, "learning_rate": 3.9120000000000005e-05, "loss": 129.2463, "step": 24450 }, { "epoch": 0.09882149508922619, "grad_norm": 783.1102905273438, "learning_rate": 3.9136000000000006e-05, "loss": 118.658, "step": 24460 }, { "epoch": 0.09886189635459383, "grad_norm": 2806.539306640625, "learning_rate": 3.9152e-05, "loss": 103.9075, "step": 24470 }, { "epoch": 0.09890229761996146, "grad_norm": 603.8475341796875, "learning_rate": 3.9168e-05, "loss": 76.5813, "step": 24480 }, { "epoch": 0.09894269888532908, "grad_norm": 1004.6978149414062, "learning_rate": 3.9184e-05, "loss": 161.1362, "step": 24490 }, { "epoch": 0.09898310015069672, "grad_norm": 1597.204833984375, "learning_rate": 3.9200000000000004e-05, "loss": 136.4567, "step": 24500 }, { "epoch": 0.09902350141606435, "grad_norm": 930.0423583984375, "learning_rate": 3.9216000000000005e-05, "loss": 114.5231, "step": 24510 }, { "epoch": 0.09906390268143198, "grad_norm": 2183.127685546875, "learning_rate": 3.9232000000000007e-05, "loss": 102.2725, "step": 24520 }, { "epoch": 0.09910430394679962, "grad_norm": 653.5531005859375, "learning_rate": 3.9248e-05, "loss": 93.1283, "step": 24530 }, { "epoch": 0.09914470521216724, "grad_norm": 1036.9739990234375, "learning_rate": 3.9264e-05, "loss": 120.2268, "step": 24540 }, { "epoch": 0.09918510647753488, "grad_norm": 1612.5570068359375, "learning_rate": 3.9280000000000003e-05, "loss": 94.6924, "step": 24550 }, { "epoch": 0.09922550774290251, "grad_norm": 863.7626342773438, "learning_rate": 3.9296000000000005e-05, "loss": 120.5359, "step": 24560 }, { "epoch": 0.09926590900827013, "grad_norm": 1776.318115234375, "learning_rate": 3.9312000000000006e-05, "loss": 179.9252, "step": 24570 }, { "epoch": 0.09930631027363777, "grad_norm": 665.2869262695312, "learning_rate": 3.9328e-05, "loss": 83.8321, "step": 24580 }, { "epoch": 0.0993467115390054, "grad_norm": 892.1969604492188, "learning_rate": 3.9344e-05, "loss": 146.0414, "step": 24590 }, { "epoch": 0.09938711280437303, "grad_norm": 0.0, "learning_rate": 3.936e-05, "loss": 133.4863, "step": 24600 }, { "epoch": 0.09942751406974067, "grad_norm": 1188.8670654296875, "learning_rate": 3.9376000000000004e-05, "loss": 101.8091, "step": 24610 }, { "epoch": 0.09946791533510829, "grad_norm": 737.1953735351562, "learning_rate": 3.9392000000000005e-05, "loss": 74.0432, "step": 24620 }, { "epoch": 0.09950831660047593, "grad_norm": 490.60430908203125, "learning_rate": 3.9408e-05, "loss": 82.5474, "step": 24630 }, { "epoch": 0.09954871786584356, "grad_norm": 1418.0870361328125, "learning_rate": 3.942400000000001e-05, "loss": 115.8092, "step": 24640 }, { "epoch": 0.09958911913121118, "grad_norm": 1001.0704956054688, "learning_rate": 3.944e-05, "loss": 74.5635, "step": 24650 }, { "epoch": 0.09962952039657882, "grad_norm": 1704.5621337890625, "learning_rate": 3.9456e-05, "loss": 119.8563, "step": 24660 }, { "epoch": 0.09966992166194645, "grad_norm": 1012.4752197265625, "learning_rate": 3.9472000000000004e-05, "loss": 124.5904, "step": 24670 }, { "epoch": 0.09971032292731408, "grad_norm": 1744.2069091796875, "learning_rate": 3.9488e-05, "loss": 114.0528, "step": 24680 }, { "epoch": 0.09975072419268172, "grad_norm": 895.2362670898438, "learning_rate": 3.950400000000001e-05, "loss": 128.9408, "step": 24690 }, { "epoch": 0.09979112545804934, "grad_norm": 713.849609375, "learning_rate": 3.952e-05, "loss": 79.1376, "step": 24700 }, { "epoch": 0.09983152672341698, "grad_norm": 966.8638305664062, "learning_rate": 3.9536e-05, "loss": 103.0292, "step": 24710 }, { "epoch": 0.09987192798878461, "grad_norm": 881.3704223632812, "learning_rate": 3.9552000000000003e-05, "loss": 82.8581, "step": 24720 }, { "epoch": 0.09991232925415224, "grad_norm": 555.9190063476562, "learning_rate": 3.9568000000000005e-05, "loss": 101.4556, "step": 24730 }, { "epoch": 0.09995273051951988, "grad_norm": 1946.76025390625, "learning_rate": 3.9584000000000006e-05, "loss": 112.9839, "step": 24740 }, { "epoch": 0.0999931317848875, "grad_norm": 1399.944091796875, "learning_rate": 3.96e-05, "loss": 161.2461, "step": 24750 }, { "epoch": 0.10003353305025513, "grad_norm": 492.08251953125, "learning_rate": 3.9616e-05, "loss": 85.7148, "step": 24760 }, { "epoch": 0.10007393431562277, "grad_norm": 698.5414428710938, "learning_rate": 3.9632e-05, "loss": 81.156, "step": 24770 }, { "epoch": 0.1001143355809904, "grad_norm": 1429.7156982421875, "learning_rate": 3.9648000000000004e-05, "loss": 78.8263, "step": 24780 }, { "epoch": 0.10015473684635803, "grad_norm": 2266.67041015625, "learning_rate": 3.9664000000000005e-05, "loss": 133.6976, "step": 24790 }, { "epoch": 0.10019513811172566, "grad_norm": 2114.505615234375, "learning_rate": 3.9680000000000006e-05, "loss": 157.514, "step": 24800 }, { "epoch": 0.10023553937709329, "grad_norm": 1892.30615234375, "learning_rate": 3.9696e-05, "loss": 112.1205, "step": 24810 }, { "epoch": 0.10027594064246093, "grad_norm": 1739.9906005859375, "learning_rate": 3.9712e-05, "loss": 124.3941, "step": 24820 }, { "epoch": 0.10031634190782855, "grad_norm": 1360.4404296875, "learning_rate": 3.9728e-05, "loss": 170.2335, "step": 24830 }, { "epoch": 0.10035674317319618, "grad_norm": 887.6372680664062, "learning_rate": 3.9744000000000004e-05, "loss": 135.4758, "step": 24840 }, { "epoch": 0.10039714443856382, "grad_norm": 356.5849914550781, "learning_rate": 3.9760000000000006e-05, "loss": 93.1374, "step": 24850 }, { "epoch": 0.10043754570393144, "grad_norm": 1182.120849609375, "learning_rate": 3.9776e-05, "loss": 147.9866, "step": 24860 }, { "epoch": 0.10047794696929908, "grad_norm": 1378.6016845703125, "learning_rate": 3.9792e-05, "loss": 116.0618, "step": 24870 }, { "epoch": 0.10051834823466671, "grad_norm": 1231.1202392578125, "learning_rate": 3.9808e-05, "loss": 113.3535, "step": 24880 }, { "epoch": 0.10055874950003434, "grad_norm": 2073.397705078125, "learning_rate": 3.9824000000000004e-05, "loss": 101.1435, "step": 24890 }, { "epoch": 0.10059915076540198, "grad_norm": 700.2265014648438, "learning_rate": 3.9840000000000005e-05, "loss": 119.9656, "step": 24900 }, { "epoch": 0.1006395520307696, "grad_norm": 1094.663818359375, "learning_rate": 3.9856e-05, "loss": 96.4916, "step": 24910 }, { "epoch": 0.10067995329613723, "grad_norm": 776.6048583984375, "learning_rate": 3.987200000000001e-05, "loss": 76.0012, "step": 24920 }, { "epoch": 0.10072035456150487, "grad_norm": 711.3641357421875, "learning_rate": 3.9888e-05, "loss": 103.8389, "step": 24930 }, { "epoch": 0.1007607558268725, "grad_norm": 1021.636474609375, "learning_rate": 3.9904e-05, "loss": 105.0635, "step": 24940 }, { "epoch": 0.10080115709224013, "grad_norm": 1410.3927001953125, "learning_rate": 3.9920000000000004e-05, "loss": 93.3259, "step": 24950 }, { "epoch": 0.10084155835760776, "grad_norm": 692.6359252929688, "learning_rate": 3.9936e-05, "loss": 110.1186, "step": 24960 }, { "epoch": 0.10088195962297539, "grad_norm": 672.6102905273438, "learning_rate": 3.9952000000000006e-05, "loss": 89.1554, "step": 24970 }, { "epoch": 0.10092236088834303, "grad_norm": 2117.183349609375, "learning_rate": 3.9968e-05, "loss": 142.3189, "step": 24980 }, { "epoch": 0.10096276215371065, "grad_norm": 937.2538452148438, "learning_rate": 3.9984e-05, "loss": 103.7949, "step": 24990 }, { "epoch": 0.10100316341907828, "grad_norm": 495.8382568359375, "learning_rate": 4e-05, "loss": 105.3346, "step": 25000 }, { "epoch": 0.10104356468444592, "grad_norm": 5377.6513671875, "learning_rate": 3.999999980504486e-05, "loss": 163.8216, "step": 25010 }, { "epoch": 0.10108396594981355, "grad_norm": 1024.221435546875, "learning_rate": 3.999999922017941e-05, "loss": 167.4767, "step": 25020 }, { "epoch": 0.10112436721518119, "grad_norm": 2024.415283203125, "learning_rate": 3.999999824540369e-05, "loss": 107.0843, "step": 25030 }, { "epoch": 0.10116476848054881, "grad_norm": 1808.91455078125, "learning_rate": 3.9999996880717705e-05, "loss": 75.9993, "step": 25040 }, { "epoch": 0.10120516974591644, "grad_norm": 1401.970458984375, "learning_rate": 3.999999512612149e-05, "loss": 118.6832, "step": 25050 }, { "epoch": 0.10124557101128408, "grad_norm": 1190.476806640625, "learning_rate": 3.9999992981615066e-05, "loss": 116.5726, "step": 25060 }, { "epoch": 0.1012859722766517, "grad_norm": 517.6707763671875, "learning_rate": 3.999999044719848e-05, "loss": 101.7224, "step": 25070 }, { "epoch": 0.10132637354201933, "grad_norm": 514.5786743164062, "learning_rate": 3.9999987522871786e-05, "loss": 83.3145, "step": 25080 }, { "epoch": 0.10136677480738697, "grad_norm": 1905.0189208984375, "learning_rate": 3.999998420863504e-05, "loss": 116.2477, "step": 25090 }, { "epoch": 0.1014071760727546, "grad_norm": 2310.874267578125, "learning_rate": 3.9999980504488305e-05, "loss": 92.526, "step": 25100 }, { "epoch": 0.10144757733812224, "grad_norm": 907.70166015625, "learning_rate": 3.999997641043165e-05, "loss": 128.3388, "step": 25110 }, { "epoch": 0.10148797860348986, "grad_norm": 440.4619140625, "learning_rate": 3.999997192646517e-05, "loss": 105.6848, "step": 25120 }, { "epoch": 0.10152837986885749, "grad_norm": 688.9473266601562, "learning_rate": 3.999996705258893e-05, "loss": 97.8082, "step": 25130 }, { "epoch": 0.10156878113422513, "grad_norm": 472.28729248046875, "learning_rate": 3.9999961788803034e-05, "loss": 174.5959, "step": 25140 }, { "epoch": 0.10160918239959275, "grad_norm": 919.4473266601562, "learning_rate": 3.999995613510759e-05, "loss": 138.1659, "step": 25150 }, { "epoch": 0.10164958366496038, "grad_norm": 1194.282470703125, "learning_rate": 3.9999950091502706e-05, "loss": 138.3285, "step": 25160 }, { "epoch": 0.10168998493032802, "grad_norm": 1675.2808837890625, "learning_rate": 3.9999943657988496e-05, "loss": 139.3541, "step": 25170 }, { "epoch": 0.10173038619569565, "grad_norm": 995.9146728515625, "learning_rate": 3.9999936834565085e-05, "loss": 106.5323, "step": 25180 }, { "epoch": 0.10177078746106329, "grad_norm": 1579.7122802734375, "learning_rate": 3.9999929621232614e-05, "loss": 113.5721, "step": 25190 }, { "epoch": 0.10181118872643091, "grad_norm": 1105.6669921875, "learning_rate": 3.9999922017991213e-05, "loss": 117.8805, "step": 25200 }, { "epoch": 0.10185158999179854, "grad_norm": 888.4319458007812, "learning_rate": 3.999991402484104e-05, "loss": 94.7415, "step": 25210 }, { "epoch": 0.10189199125716618, "grad_norm": 1027.7147216796875, "learning_rate": 3.999990564178225e-05, "loss": 62.6618, "step": 25220 }, { "epoch": 0.1019323925225338, "grad_norm": 799.4996337890625, "learning_rate": 3.9999896868814995e-05, "loss": 116.6909, "step": 25230 }, { "epoch": 0.10197279378790143, "grad_norm": 3651.783203125, "learning_rate": 3.999988770593946e-05, "loss": 165.9793, "step": 25240 }, { "epoch": 0.10201319505326907, "grad_norm": 729.583740234375, "learning_rate": 3.999987815315581e-05, "loss": 95.7285, "step": 25250 }, { "epoch": 0.1020535963186367, "grad_norm": 634.2191162109375, "learning_rate": 3.9999868210464244e-05, "loss": 109.2163, "step": 25260 }, { "epoch": 0.10209399758400434, "grad_norm": 683.3226318359375, "learning_rate": 3.9999857877864945e-05, "loss": 125.2095, "step": 25270 }, { "epoch": 0.10213439884937196, "grad_norm": 952.6441650390625, "learning_rate": 3.999984715535813e-05, "loss": 96.8702, "step": 25280 }, { "epoch": 0.10217480011473959, "grad_norm": 612.0934448242188, "learning_rate": 3.999983604294399e-05, "loss": 98.7964, "step": 25290 }, { "epoch": 0.10221520138010723, "grad_norm": 668.8614501953125, "learning_rate": 3.999982454062275e-05, "loss": 79.0043, "step": 25300 }, { "epoch": 0.10225560264547486, "grad_norm": 1596.1842041015625, "learning_rate": 3.999981264839464e-05, "loss": 103.495, "step": 25310 }, { "epoch": 0.10229600391084248, "grad_norm": 675.1773681640625, "learning_rate": 3.999980036625989e-05, "loss": 157.9035, "step": 25320 }, { "epoch": 0.10233640517621012, "grad_norm": 1616.4547119140625, "learning_rate": 3.9999787694218724e-05, "loss": 93.8177, "step": 25330 }, { "epoch": 0.10237680644157775, "grad_norm": 834.3451538085938, "learning_rate": 3.99997746322714e-05, "loss": 137.3178, "step": 25340 }, { "epoch": 0.10241720770694539, "grad_norm": 1074.680419921875, "learning_rate": 3.999976118041818e-05, "loss": 106.7518, "step": 25350 }, { "epoch": 0.10245760897231301, "grad_norm": 432.718505859375, "learning_rate": 3.999974733865932e-05, "loss": 110.611, "step": 25360 }, { "epoch": 0.10249801023768064, "grad_norm": 1014.7579956054688, "learning_rate": 3.999973310699509e-05, "loss": 137.3919, "step": 25370 }, { "epoch": 0.10253841150304828, "grad_norm": 1398.2919921875, "learning_rate": 3.999971848542576e-05, "loss": 129.0789, "step": 25380 }, { "epoch": 0.1025788127684159, "grad_norm": 2139.510498046875, "learning_rate": 3.999970347395162e-05, "loss": 93.033, "step": 25390 }, { "epoch": 0.10261921403378353, "grad_norm": 1227.2335205078125, "learning_rate": 3.9999688072572966e-05, "loss": 158.6131, "step": 25400 }, { "epoch": 0.10265961529915117, "grad_norm": 444.8744201660156, "learning_rate": 3.99996722812901e-05, "loss": 104.8071, "step": 25410 }, { "epoch": 0.1027000165645188, "grad_norm": 761.33837890625, "learning_rate": 3.9999656100103325e-05, "loss": 97.1906, "step": 25420 }, { "epoch": 0.10274041782988644, "grad_norm": 1000.1937255859375, "learning_rate": 3.999963952901296e-05, "loss": 83.2851, "step": 25430 }, { "epoch": 0.10278081909525406, "grad_norm": 1454.795166015625, "learning_rate": 3.999962256801932e-05, "loss": 125.3475, "step": 25440 }, { "epoch": 0.10282122036062169, "grad_norm": 620.0244750976562, "learning_rate": 3.999960521712274e-05, "loss": 151.7363, "step": 25450 }, { "epoch": 0.10286162162598933, "grad_norm": 1074.2821044921875, "learning_rate": 3.999958747632357e-05, "loss": 117.8375, "step": 25460 }, { "epoch": 0.10290202289135696, "grad_norm": 2790.96533203125, "learning_rate": 3.9999569345622135e-05, "loss": 123.3024, "step": 25470 }, { "epoch": 0.10294242415672458, "grad_norm": 2994.655517578125, "learning_rate": 3.999955082501881e-05, "loss": 92.7528, "step": 25480 }, { "epoch": 0.10298282542209222, "grad_norm": 445.1771545410156, "learning_rate": 3.999953191451394e-05, "loss": 88.779, "step": 25490 }, { "epoch": 0.10302322668745985, "grad_norm": 1553.27490234375, "learning_rate": 3.9999512614107894e-05, "loss": 153.5818, "step": 25500 }, { "epoch": 0.10306362795282749, "grad_norm": 2650.843017578125, "learning_rate": 3.999949292380106e-05, "loss": 93.9213, "step": 25510 }, { "epoch": 0.10310402921819511, "grad_norm": 654.7503662109375, "learning_rate": 3.9999472843593816e-05, "loss": 133.6554, "step": 25520 }, { "epoch": 0.10314443048356274, "grad_norm": 398.31951904296875, "learning_rate": 3.999945237348655e-05, "loss": 167.9717, "step": 25530 }, { "epoch": 0.10318483174893038, "grad_norm": 1162.1407470703125, "learning_rate": 3.999943151347967e-05, "loss": 124.5522, "step": 25540 }, { "epoch": 0.103225233014298, "grad_norm": 1724.40283203125, "learning_rate": 3.999941026357356e-05, "loss": 122.637, "step": 25550 }, { "epoch": 0.10326563427966563, "grad_norm": 731.8903198242188, "learning_rate": 3.999938862376866e-05, "loss": 67.4073, "step": 25560 }, { "epoch": 0.10330603554503327, "grad_norm": 0.0, "learning_rate": 3.999936659406539e-05, "loss": 68.4163, "step": 25570 }, { "epoch": 0.1033464368104009, "grad_norm": 705.5543212890625, "learning_rate": 3.999934417446416e-05, "loss": 83.734, "step": 25580 }, { "epoch": 0.10338683807576854, "grad_norm": 1163.376220703125, "learning_rate": 3.9999321364965415e-05, "loss": 128.0824, "step": 25590 }, { "epoch": 0.10342723934113617, "grad_norm": 1845.739990234375, "learning_rate": 3.9999298165569614e-05, "loss": 109.6928, "step": 25600 }, { "epoch": 0.10346764060650379, "grad_norm": 509.751220703125, "learning_rate": 3.9999274576277196e-05, "loss": 97.6254, "step": 25610 }, { "epoch": 0.10350804187187143, "grad_norm": 1141.1207275390625, "learning_rate": 3.999925059708863e-05, "loss": 89.0088, "step": 25620 }, { "epoch": 0.10354844313723906, "grad_norm": 1681.5274658203125, "learning_rate": 3.999922622800437e-05, "loss": 107.7538, "step": 25630 }, { "epoch": 0.10358884440260668, "grad_norm": 2417.304931640625, "learning_rate": 3.99992014690249e-05, "loss": 169.7357, "step": 25640 }, { "epoch": 0.10362924566797432, "grad_norm": 1126.2418212890625, "learning_rate": 3.99991763201507e-05, "loss": 111.1401, "step": 25650 }, { "epoch": 0.10366964693334195, "grad_norm": 1027.2210693359375, "learning_rate": 3.999915078138226e-05, "loss": 116.2014, "step": 25660 }, { "epoch": 0.10371004819870959, "grad_norm": 1453.3272705078125, "learning_rate": 3.999912485272008e-05, "loss": 103.9699, "step": 25670 }, { "epoch": 0.10375044946407722, "grad_norm": 457.8889465332031, "learning_rate": 3.9999098534164675e-05, "loss": 69.5038, "step": 25680 }, { "epoch": 0.10379085072944484, "grad_norm": 531.3936767578125, "learning_rate": 3.999907182571654e-05, "loss": 106.4484, "step": 25690 }, { "epoch": 0.10383125199481248, "grad_norm": 3379.6875, "learning_rate": 3.99990447273762e-05, "loss": 134.128, "step": 25700 }, { "epoch": 0.10387165326018011, "grad_norm": 1140.1044921875, "learning_rate": 3.999901723914419e-05, "loss": 123.5596, "step": 25710 }, { "epoch": 0.10391205452554773, "grad_norm": 459.4917907714844, "learning_rate": 3.999898936102104e-05, "loss": 62.9321, "step": 25720 }, { "epoch": 0.10395245579091537, "grad_norm": 1177.872314453125, "learning_rate": 3.9998961093007295e-05, "loss": 143.5161, "step": 25730 }, { "epoch": 0.103992857056283, "grad_norm": 873.7769165039062, "learning_rate": 3.9998932435103513e-05, "loss": 93.7114, "step": 25740 }, { "epoch": 0.10403325832165064, "grad_norm": 1600.9781494140625, "learning_rate": 3.999890338731025e-05, "loss": 103.5482, "step": 25750 }, { "epoch": 0.10407365958701827, "grad_norm": 4216.92724609375, "learning_rate": 3.999887394962806e-05, "loss": 121.3336, "step": 25760 }, { "epoch": 0.10411406085238589, "grad_norm": 2037.351806640625, "learning_rate": 3.999884412205753e-05, "loss": 112.482, "step": 25770 }, { "epoch": 0.10415446211775353, "grad_norm": 1124.732177734375, "learning_rate": 3.9998813904599234e-05, "loss": 79.0539, "step": 25780 }, { "epoch": 0.10419486338312116, "grad_norm": 860.0994262695312, "learning_rate": 3.999878329725377e-05, "loss": 134.404, "step": 25790 }, { "epoch": 0.10423526464848878, "grad_norm": 538.1453247070312, "learning_rate": 3.9998752300021736e-05, "loss": 148.6289, "step": 25800 }, { "epoch": 0.10427566591385642, "grad_norm": 441.525634765625, "learning_rate": 3.999872091290372e-05, "loss": 86.7214, "step": 25810 }, { "epoch": 0.10431606717922405, "grad_norm": 660.6888427734375, "learning_rate": 3.999868913590034e-05, "loss": 135.1893, "step": 25820 }, { "epoch": 0.10435646844459169, "grad_norm": 567.1407470703125, "learning_rate": 3.999865696901223e-05, "loss": 122.7115, "step": 25830 }, { "epoch": 0.10439686970995932, "grad_norm": 755.7587890625, "learning_rate": 3.9998624412240004e-05, "loss": 125.0915, "step": 25840 }, { "epoch": 0.10443727097532694, "grad_norm": 837.2969360351562, "learning_rate": 3.999859146558429e-05, "loss": 86.853, "step": 25850 }, { "epoch": 0.10447767224069458, "grad_norm": 694.6524658203125, "learning_rate": 3.999855812904575e-05, "loss": 91.0812, "step": 25860 }, { "epoch": 0.10451807350606221, "grad_norm": 0.0, "learning_rate": 3.999852440262502e-05, "loss": 89.327, "step": 25870 }, { "epoch": 0.10455847477142984, "grad_norm": 1052.8719482421875, "learning_rate": 3.999849028632276e-05, "loss": 119.6729, "step": 25880 }, { "epoch": 0.10459887603679748, "grad_norm": 1102.21875, "learning_rate": 3.9998455780139635e-05, "loss": 112.6542, "step": 25890 }, { "epoch": 0.1046392773021651, "grad_norm": 1052.8319091796875, "learning_rate": 3.999842088407633e-05, "loss": 110.996, "step": 25900 }, { "epoch": 0.10467967856753274, "grad_norm": 2375.400146484375, "learning_rate": 3.99983855981335e-05, "loss": 110.829, "step": 25910 }, { "epoch": 0.10472007983290037, "grad_norm": 1199.724609375, "learning_rate": 3.9998349922311856e-05, "loss": 71.8274, "step": 25920 }, { "epoch": 0.104760481098268, "grad_norm": 581.4923706054688, "learning_rate": 3.999831385661208e-05, "loss": 82.232, "step": 25930 }, { "epoch": 0.10480088236363563, "grad_norm": 635.7989501953125, "learning_rate": 3.999827740103487e-05, "loss": 127.8533, "step": 25940 }, { "epoch": 0.10484128362900326, "grad_norm": 2129.2431640625, "learning_rate": 3.9998240555580954e-05, "loss": 129.5675, "step": 25950 }, { "epoch": 0.10488168489437089, "grad_norm": 1494.1981201171875, "learning_rate": 3.999820332025105e-05, "loss": 107.2558, "step": 25960 }, { "epoch": 0.10492208615973853, "grad_norm": 1150.99560546875, "learning_rate": 3.999816569504587e-05, "loss": 116.159, "step": 25970 }, { "epoch": 0.10496248742510615, "grad_norm": 1286.3641357421875, "learning_rate": 3.9998127679966154e-05, "loss": 122.6338, "step": 25980 }, { "epoch": 0.10500288869047379, "grad_norm": 1373.02685546875, "learning_rate": 3.999808927501264e-05, "loss": 114.1198, "step": 25990 }, { "epoch": 0.10504328995584142, "grad_norm": 915.142333984375, "learning_rate": 3.999805048018609e-05, "loss": 100.8753, "step": 26000 }, { "epoch": 0.10508369122120904, "grad_norm": 1715.4393310546875, "learning_rate": 3.9998011295487236e-05, "loss": 88.3989, "step": 26010 }, { "epoch": 0.10512409248657668, "grad_norm": 721.6860961914062, "learning_rate": 3.999797172091687e-05, "loss": 62.4881, "step": 26020 }, { "epoch": 0.10516449375194431, "grad_norm": 642.193359375, "learning_rate": 3.9997931756475744e-05, "loss": 96.4464, "step": 26030 }, { "epoch": 0.10520489501731194, "grad_norm": 3102.461181640625, "learning_rate": 3.9997891402164644e-05, "loss": 111.057, "step": 26040 }, { "epoch": 0.10524529628267958, "grad_norm": 1137.797119140625, "learning_rate": 3.999785065798435e-05, "loss": 66.1138, "step": 26050 }, { "epoch": 0.1052856975480472, "grad_norm": 1059.8470458984375, "learning_rate": 3.999780952393566e-05, "loss": 110.6024, "step": 26060 }, { "epoch": 0.10532609881341484, "grad_norm": 1064.2550048828125, "learning_rate": 3.999776800001939e-05, "loss": 101.4643, "step": 26070 }, { "epoch": 0.10536650007878247, "grad_norm": 1279.454833984375, "learning_rate": 3.9997726086236325e-05, "loss": 125.3454, "step": 26080 }, { "epoch": 0.1054069013441501, "grad_norm": 1695.6534423828125, "learning_rate": 3.999768378258731e-05, "loss": 103.3218, "step": 26090 }, { "epoch": 0.10544730260951773, "grad_norm": 841.222412109375, "learning_rate": 3.999764108907314e-05, "loss": 68.5106, "step": 26100 }, { "epoch": 0.10548770387488536, "grad_norm": 1536.8846435546875, "learning_rate": 3.999759800569467e-05, "loss": 94.1219, "step": 26110 }, { "epoch": 0.10552810514025299, "grad_norm": 1104.2398681640625, "learning_rate": 3.999755453245272e-05, "loss": 61.2582, "step": 26120 }, { "epoch": 0.10556850640562063, "grad_norm": 1145.83056640625, "learning_rate": 3.999751066934816e-05, "loss": 87.8465, "step": 26130 }, { "epoch": 0.10560890767098825, "grad_norm": 896.1638793945312, "learning_rate": 3.999746641638183e-05, "loss": 77.771, "step": 26140 }, { "epoch": 0.10564930893635589, "grad_norm": 1831.322509765625, "learning_rate": 3.9997421773554596e-05, "loss": 162.7093, "step": 26150 }, { "epoch": 0.10568971020172352, "grad_norm": 495.71588134765625, "learning_rate": 3.9997376740867334e-05, "loss": 122.5265, "step": 26160 }, { "epoch": 0.10573011146709115, "grad_norm": 1053.8997802734375, "learning_rate": 3.9997331318320906e-05, "loss": 120.7888, "step": 26170 }, { "epoch": 0.10577051273245879, "grad_norm": 826.036865234375, "learning_rate": 3.9997285505916215e-05, "loss": 84.5706, "step": 26180 }, { "epoch": 0.10581091399782641, "grad_norm": 580.434814453125, "learning_rate": 3.999723930365415e-05, "loss": 74.2667, "step": 26190 }, { "epoch": 0.10585131526319404, "grad_norm": 299.5496520996094, "learning_rate": 3.999719271153561e-05, "loss": 92.8395, "step": 26200 }, { "epoch": 0.10589171652856168, "grad_norm": 1334.234375, "learning_rate": 3.9997145729561505e-05, "loss": 100.1445, "step": 26210 }, { "epoch": 0.1059321177939293, "grad_norm": 796.839599609375, "learning_rate": 3.999709835773274e-05, "loss": 144.5433, "step": 26220 }, { "epoch": 0.10597251905929694, "grad_norm": 778.1115112304688, "learning_rate": 3.9997050596050255e-05, "loss": 108.5845, "step": 26230 }, { "epoch": 0.10601292032466457, "grad_norm": 6468.21044921875, "learning_rate": 3.999700244451497e-05, "loss": 81.2048, "step": 26240 }, { "epoch": 0.1060533215900322, "grad_norm": 671.6563720703125, "learning_rate": 3.999695390312783e-05, "loss": 54.482, "step": 26250 }, { "epoch": 0.10609372285539984, "grad_norm": 782.2870483398438, "learning_rate": 3.999690497188978e-05, "loss": 49.7613, "step": 26260 }, { "epoch": 0.10613412412076746, "grad_norm": 1402.428466796875, "learning_rate": 3.999685565080176e-05, "loss": 118.4322, "step": 26270 }, { "epoch": 0.10617452538613509, "grad_norm": 563.5989990234375, "learning_rate": 3.999680593986475e-05, "loss": 84.3666, "step": 26280 }, { "epoch": 0.10621492665150273, "grad_norm": 928.5982055664062, "learning_rate": 3.999675583907972e-05, "loss": 134.5598, "step": 26290 }, { "epoch": 0.10625532791687035, "grad_norm": 1692.81640625, "learning_rate": 3.999670534844763e-05, "loss": 126.942, "step": 26300 }, { "epoch": 0.106295729182238, "grad_norm": 3591.871826171875, "learning_rate": 3.9996654467969485e-05, "loss": 92.5221, "step": 26310 }, { "epoch": 0.10633613044760562, "grad_norm": 2086.629150390625, "learning_rate": 3.999660319764626e-05, "loss": 85.6505, "step": 26320 }, { "epoch": 0.10637653171297325, "grad_norm": 1024.0364990234375, "learning_rate": 3.9996551537478965e-05, "loss": 165.9923, "step": 26330 }, { "epoch": 0.10641693297834089, "grad_norm": 2205.33154296875, "learning_rate": 3.99964994874686e-05, "loss": 112.9845, "step": 26340 }, { "epoch": 0.10645733424370851, "grad_norm": 1261.9781494140625, "learning_rate": 3.9996447047616185e-05, "loss": 142.4679, "step": 26350 }, { "epoch": 0.10649773550907614, "grad_norm": 1418.99755859375, "learning_rate": 3.999639421792274e-05, "loss": 83.0303, "step": 26360 }, { "epoch": 0.10653813677444378, "grad_norm": 3501.380615234375, "learning_rate": 3.999634099838929e-05, "loss": 78.1086, "step": 26370 }, { "epoch": 0.1065785380398114, "grad_norm": 534.0908813476562, "learning_rate": 3.9996287389016876e-05, "loss": 111.2012, "step": 26380 }, { "epoch": 0.10661893930517904, "grad_norm": 771.1531982421875, "learning_rate": 3.999623338980655e-05, "loss": 147.8283, "step": 26390 }, { "epoch": 0.10665934057054667, "grad_norm": 927.004638671875, "learning_rate": 3.999617900075936e-05, "loss": 87.6993, "step": 26400 }, { "epoch": 0.1066997418359143, "grad_norm": 1738.247802734375, "learning_rate": 3.9996124221876364e-05, "loss": 117.6841, "step": 26410 }, { "epoch": 0.10674014310128194, "grad_norm": 1035.49609375, "learning_rate": 3.9996069053158626e-05, "loss": 182.0309, "step": 26420 }, { "epoch": 0.10678054436664956, "grad_norm": 1327.6573486328125, "learning_rate": 3.9996013494607234e-05, "loss": 107.2708, "step": 26430 }, { "epoch": 0.10682094563201719, "grad_norm": 1198.87255859375, "learning_rate": 3.999595754622326e-05, "loss": 130.0626, "step": 26440 }, { "epoch": 0.10686134689738483, "grad_norm": 0.0, "learning_rate": 3.99959012080078e-05, "loss": 86.2307, "step": 26450 }, { "epoch": 0.10690174816275246, "grad_norm": 1337.4168701171875, "learning_rate": 3.999584447996196e-05, "loss": 122.7532, "step": 26460 }, { "epoch": 0.1069421494281201, "grad_norm": 813.623046875, "learning_rate": 3.9995787362086824e-05, "loss": 61.1589, "step": 26470 }, { "epoch": 0.10698255069348772, "grad_norm": 1643.9227294921875, "learning_rate": 3.9995729854383526e-05, "loss": 112.8391, "step": 26480 }, { "epoch": 0.10702295195885535, "grad_norm": 1001.5737915039062, "learning_rate": 3.999567195685318e-05, "loss": 137.1343, "step": 26490 }, { "epoch": 0.10706335322422299, "grad_norm": 768.2099609375, "learning_rate": 3.999561366949691e-05, "loss": 101.1197, "step": 26500 }, { "epoch": 0.10710375448959061, "grad_norm": 1358.92041015625, "learning_rate": 3.9995554992315863e-05, "loss": 128.1427, "step": 26510 }, { "epoch": 0.10714415575495824, "grad_norm": 683.9628295898438, "learning_rate": 3.999549592531118e-05, "loss": 77.3999, "step": 26520 }, { "epoch": 0.10718455702032588, "grad_norm": 1114.36376953125, "learning_rate": 3.9995436468484e-05, "loss": 122.6443, "step": 26530 }, { "epoch": 0.1072249582856935, "grad_norm": 867.2285766601562, "learning_rate": 3.999537662183549e-05, "loss": 89.0948, "step": 26540 }, { "epoch": 0.10726535955106115, "grad_norm": 1265.471435546875, "learning_rate": 3.9995316385366825e-05, "loss": 90.6688, "step": 26550 }, { "epoch": 0.10730576081642877, "grad_norm": 1176.1046142578125, "learning_rate": 3.999525575907918e-05, "loss": 68.6942, "step": 26560 }, { "epoch": 0.1073461620817964, "grad_norm": 1456.6717529296875, "learning_rate": 3.999519474297372e-05, "loss": 94.2505, "step": 26570 }, { "epoch": 0.10738656334716404, "grad_norm": 942.738037109375, "learning_rate": 3.9995133337051645e-05, "loss": 158.1714, "step": 26580 }, { "epoch": 0.10742696461253166, "grad_norm": 624.3585205078125, "learning_rate": 3.999507154131415e-05, "loss": 128.1745, "step": 26590 }, { "epoch": 0.10746736587789929, "grad_norm": 1895.4283447265625, "learning_rate": 3.999500935576245e-05, "loss": 126.8323, "step": 26600 }, { "epoch": 0.10750776714326693, "grad_norm": 1229.0679931640625, "learning_rate": 3.999494678039774e-05, "loss": 116.0763, "step": 26610 }, { "epoch": 0.10754816840863456, "grad_norm": 417.828125, "learning_rate": 3.999488381522125e-05, "loss": 73.9217, "step": 26620 }, { "epoch": 0.1075885696740022, "grad_norm": 520.174560546875, "learning_rate": 3.999482046023421e-05, "loss": 99.6772, "step": 26630 }, { "epoch": 0.10762897093936982, "grad_norm": 1039.8192138671875, "learning_rate": 3.9994756715437846e-05, "loss": 143.6114, "step": 26640 }, { "epoch": 0.10766937220473745, "grad_norm": 6462.5771484375, "learning_rate": 3.9994692580833406e-05, "loss": 98.8883, "step": 26650 }, { "epoch": 0.10770977347010509, "grad_norm": 2129.985107421875, "learning_rate": 3.999462805642214e-05, "loss": 106.4062, "step": 26660 }, { "epoch": 0.10775017473547271, "grad_norm": 2644.1337890625, "learning_rate": 3.999456314220531e-05, "loss": 123.5466, "step": 26670 }, { "epoch": 0.10779057600084034, "grad_norm": 6350.77001953125, "learning_rate": 3.9994497838184173e-05, "loss": 96.4797, "step": 26680 }, { "epoch": 0.10783097726620798, "grad_norm": 1997.462890625, "learning_rate": 3.9994432144360014e-05, "loss": 107.3564, "step": 26690 }, { "epoch": 0.1078713785315756, "grad_norm": 1046.4324951171875, "learning_rate": 3.99943660607341e-05, "loss": 95.4632, "step": 26700 }, { "epoch": 0.10791177979694325, "grad_norm": 1082.221435546875, "learning_rate": 3.9994299587307724e-05, "loss": 73.5173, "step": 26710 }, { "epoch": 0.10795218106231087, "grad_norm": 475.9582824707031, "learning_rate": 3.999423272408219e-05, "loss": 118.0872, "step": 26720 }, { "epoch": 0.1079925823276785, "grad_norm": 2606.221435546875, "learning_rate": 3.9994165471058795e-05, "loss": 142.7932, "step": 26730 }, { "epoch": 0.10803298359304614, "grad_norm": 987.0201416015625, "learning_rate": 3.999409782823884e-05, "loss": 121.8413, "step": 26740 }, { "epoch": 0.10807338485841377, "grad_norm": 979.9131469726562, "learning_rate": 3.999402979562367e-05, "loss": 107.7124, "step": 26750 }, { "epoch": 0.10811378612378139, "grad_norm": 751.9520263671875, "learning_rate": 3.9993961373214585e-05, "loss": 91.4504, "step": 26760 }, { "epoch": 0.10815418738914903, "grad_norm": 466.2114562988281, "learning_rate": 3.9993892561012935e-05, "loss": 129.0346, "step": 26770 }, { "epoch": 0.10819458865451666, "grad_norm": 1181.390380859375, "learning_rate": 3.999382335902005e-05, "loss": 125.1326, "step": 26780 }, { "epoch": 0.1082349899198843, "grad_norm": 914.0978393554688, "learning_rate": 3.999375376723729e-05, "loss": 117.911, "step": 26790 }, { "epoch": 0.10827539118525192, "grad_norm": 1160.3109130859375, "learning_rate": 3.9993683785666e-05, "loss": 94.7705, "step": 26800 }, { "epoch": 0.10831579245061955, "grad_norm": 766.2025146484375, "learning_rate": 3.999361341430756e-05, "loss": 103.3526, "step": 26810 }, { "epoch": 0.10835619371598719, "grad_norm": 2005.7353515625, "learning_rate": 3.999354265316333e-05, "loss": 106.9616, "step": 26820 }, { "epoch": 0.10839659498135482, "grad_norm": 2113.0107421875, "learning_rate": 3.999347150223469e-05, "loss": 117.2856, "step": 26830 }, { "epoch": 0.10843699624672244, "grad_norm": 1407.716064453125, "learning_rate": 3.999339996152303e-05, "loss": 72.8703, "step": 26840 }, { "epoch": 0.10847739751209008, "grad_norm": 332.46832275390625, "learning_rate": 3.999332803102974e-05, "loss": 97.1084, "step": 26850 }, { "epoch": 0.10851779877745771, "grad_norm": 519.922119140625, "learning_rate": 3.999325571075624e-05, "loss": 98.0303, "step": 26860 }, { "epoch": 0.10855820004282535, "grad_norm": 401.651611328125, "learning_rate": 3.999318300070392e-05, "loss": 77.1453, "step": 26870 }, { "epoch": 0.10859860130819297, "grad_norm": 946.8197631835938, "learning_rate": 3.999310990087421e-05, "loss": 96.2507, "step": 26880 }, { "epoch": 0.1086390025735606, "grad_norm": 916.1583251953125, "learning_rate": 3.999303641126852e-05, "loss": 92.2482, "step": 26890 }, { "epoch": 0.10867940383892824, "grad_norm": 1386.3487548828125, "learning_rate": 3.999296253188829e-05, "loss": 168.3209, "step": 26900 }, { "epoch": 0.10871980510429587, "grad_norm": 1362.5413818359375, "learning_rate": 3.999288826273497e-05, "loss": 168.2882, "step": 26910 }, { "epoch": 0.10876020636966349, "grad_norm": 481.9889221191406, "learning_rate": 3.999281360381e-05, "loss": 83.7807, "step": 26920 }, { "epoch": 0.10880060763503113, "grad_norm": 785.8662719726562, "learning_rate": 3.999273855511483e-05, "loss": 123.8795, "step": 26930 }, { "epoch": 0.10884100890039876, "grad_norm": 958.2007446289062, "learning_rate": 3.999266311665094e-05, "loss": 75.8988, "step": 26940 }, { "epoch": 0.1088814101657664, "grad_norm": 676.339111328125, "learning_rate": 3.999258728841977e-05, "loss": 131.9098, "step": 26950 }, { "epoch": 0.10892181143113402, "grad_norm": 920.4100952148438, "learning_rate": 3.999251107042284e-05, "loss": 75.1306, "step": 26960 }, { "epoch": 0.10896221269650165, "grad_norm": 495.47821044921875, "learning_rate": 3.99924344626616e-05, "loss": 101.7487, "step": 26970 }, { "epoch": 0.10900261396186929, "grad_norm": 1311.8775634765625, "learning_rate": 3.999235746513757e-05, "loss": 94.5948, "step": 26980 }, { "epoch": 0.10904301522723692, "grad_norm": 706.6832275390625, "learning_rate": 3.999228007785222e-05, "loss": 110.3509, "step": 26990 }, { "epoch": 0.10908341649260454, "grad_norm": 350.2680358886719, "learning_rate": 3.999220230080709e-05, "loss": 82.0634, "step": 27000 }, { "epoch": 0.10912381775797218, "grad_norm": 863.8991088867188, "learning_rate": 3.999212413400368e-05, "loss": 110.7725, "step": 27010 }, { "epoch": 0.10916421902333981, "grad_norm": 1048.016845703125, "learning_rate": 3.999204557744352e-05, "loss": 111.5144, "step": 27020 }, { "epoch": 0.10920462028870745, "grad_norm": 555.6709594726562, "learning_rate": 3.999196663112813e-05, "loss": 167.0167, "step": 27030 }, { "epoch": 0.10924502155407508, "grad_norm": 695.7076416015625, "learning_rate": 3.9991887295059065e-05, "loss": 103.8333, "step": 27040 }, { "epoch": 0.1092854228194427, "grad_norm": 619.1875, "learning_rate": 3.999180756923787e-05, "loss": 171.0366, "step": 27050 }, { "epoch": 0.10932582408481034, "grad_norm": 649.6815185546875, "learning_rate": 3.999172745366609e-05, "loss": 98.1232, "step": 27060 }, { "epoch": 0.10936622535017797, "grad_norm": 709.5027465820312, "learning_rate": 3.999164694834529e-05, "loss": 130.3714, "step": 27070 }, { "epoch": 0.1094066266155456, "grad_norm": 1100.6815185546875, "learning_rate": 3.999156605327704e-05, "loss": 106.8556, "step": 27080 }, { "epoch": 0.10944702788091323, "grad_norm": 726.9677124023438, "learning_rate": 3.999148476846292e-05, "loss": 86.2317, "step": 27090 }, { "epoch": 0.10948742914628086, "grad_norm": 560.1898803710938, "learning_rate": 3.9991403093904505e-05, "loss": 171.8191, "step": 27100 }, { "epoch": 0.1095278304116485, "grad_norm": 1026.121337890625, "learning_rate": 3.99913210296034e-05, "loss": 89.2664, "step": 27110 }, { "epoch": 0.10956823167701613, "grad_norm": 1080.0189208984375, "learning_rate": 3.99912385755612e-05, "loss": 61.5915, "step": 27120 }, { "epoch": 0.10960863294238375, "grad_norm": 630.165771484375, "learning_rate": 3.9991155731779506e-05, "loss": 97.5475, "step": 27130 }, { "epoch": 0.10964903420775139, "grad_norm": 479.4427795410156, "learning_rate": 3.999107249825994e-05, "loss": 89.1548, "step": 27140 }, { "epoch": 0.10968943547311902, "grad_norm": 750.473876953125, "learning_rate": 3.999098887500413e-05, "loss": 105.348, "step": 27150 }, { "epoch": 0.10972983673848664, "grad_norm": 459.2093811035156, "learning_rate": 3.999090486201369e-05, "loss": 111.195, "step": 27160 }, { "epoch": 0.10977023800385428, "grad_norm": 956.58544921875, "learning_rate": 3.999082045929028e-05, "loss": 150.9714, "step": 27170 }, { "epoch": 0.10981063926922191, "grad_norm": 699.5205078125, "learning_rate": 3.999073566683552e-05, "loss": 87.5247, "step": 27180 }, { "epoch": 0.10985104053458955, "grad_norm": 944.4476928710938, "learning_rate": 3.999065048465108e-05, "loss": 116.8029, "step": 27190 }, { "epoch": 0.10989144179995718, "grad_norm": 837.1326904296875, "learning_rate": 3.9990564912738626e-05, "loss": 75.7266, "step": 27200 }, { "epoch": 0.1099318430653248, "grad_norm": 778.825927734375, "learning_rate": 3.999047895109981e-05, "loss": 100.6665, "step": 27210 }, { "epoch": 0.10997224433069244, "grad_norm": 1490.7548828125, "learning_rate": 3.999039259973632e-05, "loss": 126.6425, "step": 27220 }, { "epoch": 0.11001264559606007, "grad_norm": 452.08575439453125, "learning_rate": 3.999030585864983e-05, "loss": 87.2329, "step": 27230 }, { "epoch": 0.1100530468614277, "grad_norm": 1867.3792724609375, "learning_rate": 3.999021872784203e-05, "loss": 94.7032, "step": 27240 }, { "epoch": 0.11009344812679533, "grad_norm": 620.962158203125, "learning_rate": 3.9990131207314634e-05, "loss": 72.1691, "step": 27250 }, { "epoch": 0.11013384939216296, "grad_norm": 932.4627075195312, "learning_rate": 3.9990043297069335e-05, "loss": 101.9201, "step": 27260 }, { "epoch": 0.1101742506575306, "grad_norm": 1095.8741455078125, "learning_rate": 3.998995499710785e-05, "loss": 100.2253, "step": 27270 }, { "epoch": 0.11021465192289823, "grad_norm": 1374.950927734375, "learning_rate": 3.99898663074319e-05, "loss": 81.8334, "step": 27280 }, { "epoch": 0.11025505318826585, "grad_norm": 2230.595947265625, "learning_rate": 3.9989777228043216e-05, "loss": 95.7221, "step": 27290 }, { "epoch": 0.11029545445363349, "grad_norm": 1166.83447265625, "learning_rate": 3.998968775894354e-05, "loss": 114.2452, "step": 27300 }, { "epoch": 0.11033585571900112, "grad_norm": 705.6727905273438, "learning_rate": 3.9989597900134594e-05, "loss": 69.3813, "step": 27310 }, { "epoch": 0.11037625698436875, "grad_norm": 455.673583984375, "learning_rate": 3.998950765161816e-05, "loss": 145.5359, "step": 27320 }, { "epoch": 0.11041665824973639, "grad_norm": 1001.2382202148438, "learning_rate": 3.9989417013395975e-05, "loss": 106.7314, "step": 27330 }, { "epoch": 0.11045705951510401, "grad_norm": 1855.0413818359375, "learning_rate": 3.998932598546982e-05, "loss": 152.2735, "step": 27340 }, { "epoch": 0.11049746078047164, "grad_norm": 1260.8231201171875, "learning_rate": 3.998923456784146e-05, "loss": 136.3262, "step": 27350 }, { "epoch": 0.11053786204583928, "grad_norm": 888.2510375976562, "learning_rate": 3.998914276051269e-05, "loss": 115.822, "step": 27360 }, { "epoch": 0.1105782633112069, "grad_norm": 779.9644775390625, "learning_rate": 3.9989050563485276e-05, "loss": 163.1847, "step": 27370 }, { "epoch": 0.11061866457657454, "grad_norm": 531.3817749023438, "learning_rate": 3.998895797676103e-05, "loss": 86.5874, "step": 27380 }, { "epoch": 0.11065906584194217, "grad_norm": 574.7570190429688, "learning_rate": 3.9988865000341764e-05, "loss": 109.047, "step": 27390 }, { "epoch": 0.1106994671073098, "grad_norm": 603.2306518554688, "learning_rate": 3.998877163422929e-05, "loss": 114.8451, "step": 27400 }, { "epoch": 0.11073986837267744, "grad_norm": 1424.3984375, "learning_rate": 3.9988677878425414e-05, "loss": 116.6163, "step": 27410 }, { "epoch": 0.11078026963804506, "grad_norm": 3271.53369140625, "learning_rate": 3.998858373293198e-05, "loss": 124.9541, "step": 27420 }, { "epoch": 0.11082067090341269, "grad_norm": 1137.7222900390625, "learning_rate": 3.99884891977508e-05, "loss": 98.2931, "step": 27430 }, { "epoch": 0.11086107216878033, "grad_norm": 1314.94091796875, "learning_rate": 3.998839427288375e-05, "loss": 84.932, "step": 27440 }, { "epoch": 0.11090147343414795, "grad_norm": 981.7919311523438, "learning_rate": 3.998829895833265e-05, "loss": 121.3453, "step": 27450 }, { "epoch": 0.1109418746995156, "grad_norm": 778.75146484375, "learning_rate": 3.9988203254099373e-05, "loss": 75.516, "step": 27460 }, { "epoch": 0.11098227596488322, "grad_norm": 664.9393310546875, "learning_rate": 3.9988107160185785e-05, "loss": 113.4883, "step": 27470 }, { "epoch": 0.11102267723025085, "grad_norm": 2451.9697265625, "learning_rate": 3.998801067659376e-05, "loss": 108.3004, "step": 27480 }, { "epoch": 0.11106307849561849, "grad_norm": 645.4609375, "learning_rate": 3.998791380332517e-05, "loss": 87.737, "step": 27490 }, { "epoch": 0.11110347976098611, "grad_norm": 693.7180786132812, "learning_rate": 3.998781654038192e-05, "loss": 110.6939, "step": 27500 }, { "epoch": 0.11114388102635374, "grad_norm": 618.6651611328125, "learning_rate": 3.998771888776589e-05, "loss": 63.0742, "step": 27510 }, { "epoch": 0.11118428229172138, "grad_norm": 162.7868194580078, "learning_rate": 3.998762084547899e-05, "loss": 104.0014, "step": 27520 }, { "epoch": 0.111224683557089, "grad_norm": 653.8965454101562, "learning_rate": 3.9987522413523135e-05, "loss": 96.1498, "step": 27530 }, { "epoch": 0.11126508482245664, "grad_norm": 2130.4833984375, "learning_rate": 3.998742359190023e-05, "loss": 95.9789, "step": 27540 }, { "epoch": 0.11130548608782427, "grad_norm": 548.673583984375, "learning_rate": 3.998732438061222e-05, "loss": 137.2213, "step": 27550 }, { "epoch": 0.1113458873531919, "grad_norm": 745.030029296875, "learning_rate": 3.998722477966103e-05, "loss": 69.7114, "step": 27560 }, { "epoch": 0.11138628861855954, "grad_norm": 1384.381591796875, "learning_rate": 3.99871247890486e-05, "loss": 121.4083, "step": 27570 }, { "epoch": 0.11142668988392716, "grad_norm": 1384.610107421875, "learning_rate": 3.998702440877689e-05, "loss": 100.7045, "step": 27580 }, { "epoch": 0.11146709114929479, "grad_norm": 880.5833129882812, "learning_rate": 3.998692363884784e-05, "loss": 86.9645, "step": 27590 }, { "epoch": 0.11150749241466243, "grad_norm": 1050.4068603515625, "learning_rate": 3.998682247926343e-05, "loss": 160.0109, "step": 27600 }, { "epoch": 0.11154789368003006, "grad_norm": 2584.7294921875, "learning_rate": 3.998672093002562e-05, "loss": 109.3766, "step": 27610 }, { "epoch": 0.1115882949453977, "grad_norm": 1057.3648681640625, "learning_rate": 3.99866189911364e-05, "loss": 116.4955, "step": 27620 }, { "epoch": 0.11162869621076532, "grad_norm": 1380.513916015625, "learning_rate": 3.998651666259775e-05, "loss": 86.7301, "step": 27630 }, { "epoch": 0.11166909747613295, "grad_norm": 567.40869140625, "learning_rate": 3.998641394441167e-05, "loss": 68.0413, "step": 27640 }, { "epoch": 0.11170949874150059, "grad_norm": 1073.0047607421875, "learning_rate": 3.998631083658016e-05, "loss": 121.8724, "step": 27650 }, { "epoch": 0.11174990000686821, "grad_norm": 990.1610717773438, "learning_rate": 3.9986207339105235e-05, "loss": 97.6038, "step": 27660 }, { "epoch": 0.11179030127223584, "grad_norm": 630.7728881835938, "learning_rate": 3.99861034519889e-05, "loss": 96.9841, "step": 27670 }, { "epoch": 0.11183070253760348, "grad_norm": 1043.8477783203125, "learning_rate": 3.99859991752332e-05, "loss": 95.7591, "step": 27680 }, { "epoch": 0.1118711038029711, "grad_norm": 1339.642578125, "learning_rate": 3.998589450884014e-05, "loss": 86.0877, "step": 27690 }, { "epoch": 0.11191150506833875, "grad_norm": 1462.45751953125, "learning_rate": 3.998578945281179e-05, "loss": 105.2841, "step": 27700 }, { "epoch": 0.11195190633370637, "grad_norm": 1582.08251953125, "learning_rate": 3.998568400715018e-05, "loss": 93.621, "step": 27710 }, { "epoch": 0.111992307599074, "grad_norm": 853.9618530273438, "learning_rate": 3.998557817185737e-05, "loss": 67.4083, "step": 27720 }, { "epoch": 0.11203270886444164, "grad_norm": 4256.23876953125, "learning_rate": 3.998547194693543e-05, "loss": 128.0626, "step": 27730 }, { "epoch": 0.11207311012980926, "grad_norm": 687.2172241210938, "learning_rate": 3.9985365332386424e-05, "loss": 101.9704, "step": 27740 }, { "epoch": 0.11211351139517689, "grad_norm": 542.3268432617188, "learning_rate": 3.998525832821242e-05, "loss": 112.3464, "step": 27750 }, { "epoch": 0.11215391266054453, "grad_norm": 920.6582641601562, "learning_rate": 3.998515093441553e-05, "loss": 78.368, "step": 27760 }, { "epoch": 0.11219431392591216, "grad_norm": 968.70849609375, "learning_rate": 3.998504315099783e-05, "loss": 77.5449, "step": 27770 }, { "epoch": 0.1122347151912798, "grad_norm": 814.1373291015625, "learning_rate": 3.998493497796142e-05, "loss": 86.3444, "step": 27780 }, { "epoch": 0.11227511645664742, "grad_norm": 652.4821166992188, "learning_rate": 3.998482641530842e-05, "loss": 80.1591, "step": 27790 }, { "epoch": 0.11231551772201505, "grad_norm": 1460.57373046875, "learning_rate": 3.998471746304094e-05, "loss": 133.1036, "step": 27800 }, { "epoch": 0.11235591898738269, "grad_norm": 1677.220947265625, "learning_rate": 3.99846081211611e-05, "loss": 122.758, "step": 27810 }, { "epoch": 0.11239632025275031, "grad_norm": 1221.714111328125, "learning_rate": 3.998449838967104e-05, "loss": 97.5468, "step": 27820 }, { "epoch": 0.11243672151811794, "grad_norm": 1173.8179931640625, "learning_rate": 3.9984388268572894e-05, "loss": 116.5459, "step": 27830 }, { "epoch": 0.11247712278348558, "grad_norm": 1088.1904296875, "learning_rate": 3.998427775786881e-05, "loss": 70.6181, "step": 27840 }, { "epoch": 0.1125175240488532, "grad_norm": 749.2481079101562, "learning_rate": 3.998416685756094e-05, "loss": 146.586, "step": 27850 }, { "epoch": 0.11255792531422085, "grad_norm": 799.7578735351562, "learning_rate": 3.998405556765145e-05, "loss": 117.1241, "step": 27860 }, { "epoch": 0.11259832657958847, "grad_norm": 758.5236206054688, "learning_rate": 3.9983943888142505e-05, "loss": 90.8864, "step": 27870 }, { "epoch": 0.1126387278449561, "grad_norm": 3251.1787109375, "learning_rate": 3.998383181903629e-05, "loss": 110.5195, "step": 27880 }, { "epoch": 0.11267912911032374, "grad_norm": 805.9417114257812, "learning_rate": 3.9983719360334985e-05, "loss": 68.4453, "step": 27890 }, { "epoch": 0.11271953037569137, "grad_norm": 721.3700561523438, "learning_rate": 3.9983606512040786e-05, "loss": 108.295, "step": 27900 }, { "epoch": 0.11275993164105899, "grad_norm": 805.6432495117188, "learning_rate": 3.998349327415588e-05, "loss": 72.9431, "step": 27910 }, { "epoch": 0.11280033290642663, "grad_norm": 1314.1534423828125, "learning_rate": 3.998337964668249e-05, "loss": 89.8111, "step": 27920 }, { "epoch": 0.11284073417179426, "grad_norm": 616.1529541015625, "learning_rate": 3.998326562962283e-05, "loss": 94.4337, "step": 27930 }, { "epoch": 0.1128811354371619, "grad_norm": 3171.981201171875, "learning_rate": 3.998315122297911e-05, "loss": 99.0224, "step": 27940 }, { "epoch": 0.11292153670252952, "grad_norm": 817.9891967773438, "learning_rate": 3.9983036426753574e-05, "loss": 162.2938, "step": 27950 }, { "epoch": 0.11296193796789715, "grad_norm": 1005.9824829101562, "learning_rate": 3.998292124094845e-05, "loss": 87.353, "step": 27960 }, { "epoch": 0.11300233923326479, "grad_norm": 773.955810546875, "learning_rate": 3.9982805665566e-05, "loss": 137.2771, "step": 27970 }, { "epoch": 0.11304274049863242, "grad_norm": 2464.9912109375, "learning_rate": 3.998268970060846e-05, "loss": 78.9946, "step": 27980 }, { "epoch": 0.11308314176400004, "grad_norm": 985.4806518554688, "learning_rate": 3.998257334607809e-05, "loss": 152.6931, "step": 27990 }, { "epoch": 0.11312354302936768, "grad_norm": 1514.001953125, "learning_rate": 3.998245660197717e-05, "loss": 101.2416, "step": 28000 }, { "epoch": 0.11316394429473531, "grad_norm": 784.7289428710938, "learning_rate": 3.9982339468307974e-05, "loss": 98.5727, "step": 28010 }, { "epoch": 0.11320434556010295, "grad_norm": 1584.1505126953125, "learning_rate": 3.998222194507277e-05, "loss": 90.057, "step": 28020 }, { "epoch": 0.11324474682547057, "grad_norm": 489.11767578125, "learning_rate": 3.9982104032273875e-05, "loss": 112.8647, "step": 28030 }, { "epoch": 0.1132851480908382, "grad_norm": 988.647705078125, "learning_rate": 3.998198572991357e-05, "loss": 118.9399, "step": 28040 }, { "epoch": 0.11332554935620584, "grad_norm": 1578.1778564453125, "learning_rate": 3.998186703799417e-05, "loss": 133.543, "step": 28050 }, { "epoch": 0.11336595062157347, "grad_norm": 1059.1217041015625, "learning_rate": 3.998174795651798e-05, "loss": 78.5302, "step": 28060 }, { "epoch": 0.11340635188694109, "grad_norm": 1273.186279296875, "learning_rate": 3.998162848548733e-05, "loss": 176.5455, "step": 28070 }, { "epoch": 0.11344675315230873, "grad_norm": 642.8786010742188, "learning_rate": 3.998150862490453e-05, "loss": 83.828, "step": 28080 }, { "epoch": 0.11348715441767636, "grad_norm": 1636.2587890625, "learning_rate": 3.998138837477195e-05, "loss": 184.4266, "step": 28090 }, { "epoch": 0.113527555683044, "grad_norm": 778.3223876953125, "learning_rate": 3.998126773509191e-05, "loss": 118.4609, "step": 28100 }, { "epoch": 0.11356795694841162, "grad_norm": 882.5321044921875, "learning_rate": 3.998114670586678e-05, "loss": 98.4019, "step": 28110 }, { "epoch": 0.11360835821377925, "grad_norm": 2727.796630859375, "learning_rate": 3.998102528709889e-05, "loss": 99.3129, "step": 28120 }, { "epoch": 0.11364875947914689, "grad_norm": 925.4190063476562, "learning_rate": 3.998090347879063e-05, "loss": 99.0337, "step": 28130 }, { "epoch": 0.11368916074451452, "grad_norm": 765.3026123046875, "learning_rate": 3.998078128094437e-05, "loss": 93.1959, "step": 28140 }, { "epoch": 0.11372956200988214, "grad_norm": 675.1519165039062, "learning_rate": 3.99806586935625e-05, "loss": 79.8641, "step": 28150 }, { "epoch": 0.11376996327524978, "grad_norm": 854.69921875, "learning_rate": 3.9980535716647394e-05, "loss": 121.8348, "step": 28160 }, { "epoch": 0.11381036454061741, "grad_norm": 759.4384155273438, "learning_rate": 3.998041235020146e-05, "loss": 103.8514, "step": 28170 }, { "epoch": 0.11385076580598505, "grad_norm": 1522.1177978515625, "learning_rate": 3.99802885942271e-05, "loss": 110.5345, "step": 28180 }, { "epoch": 0.11389116707135268, "grad_norm": 1211.49072265625, "learning_rate": 3.998016444872673e-05, "loss": 119.6895, "step": 28190 }, { "epoch": 0.1139315683367203, "grad_norm": 1361.937255859375, "learning_rate": 3.998003991370277e-05, "loss": 92.2974, "step": 28200 }, { "epoch": 0.11397196960208794, "grad_norm": 818.15380859375, "learning_rate": 3.9979914989157634e-05, "loss": 87.3831, "step": 28210 }, { "epoch": 0.11401237086745557, "grad_norm": 793.6883544921875, "learning_rate": 3.997978967509378e-05, "loss": 77.2384, "step": 28220 }, { "epoch": 0.1140527721328232, "grad_norm": 1113.99267578125, "learning_rate": 3.997966397151364e-05, "loss": 92.0928, "step": 28230 }, { "epoch": 0.11409317339819083, "grad_norm": 824.6795043945312, "learning_rate": 3.997953787841965e-05, "loss": 112.8881, "step": 28240 }, { "epoch": 0.11413357466355846, "grad_norm": 890.2943725585938, "learning_rate": 3.99794113958143e-05, "loss": 94.8603, "step": 28250 }, { "epoch": 0.1141739759289261, "grad_norm": 859.4837036132812, "learning_rate": 3.997928452370003e-05, "loss": 85.9278, "step": 28260 }, { "epoch": 0.11421437719429373, "grad_norm": 412.1380615234375, "learning_rate": 3.997915726207932e-05, "loss": 137.5683, "step": 28270 }, { "epoch": 0.11425477845966135, "grad_norm": 4836.41845703125, "learning_rate": 3.9979029610954664e-05, "loss": 159.7229, "step": 28280 }, { "epoch": 0.11429517972502899, "grad_norm": 1094.9395751953125, "learning_rate": 3.997890157032853e-05, "loss": 94.2975, "step": 28290 }, { "epoch": 0.11433558099039662, "grad_norm": 1191.2569580078125, "learning_rate": 3.997877314020343e-05, "loss": 75.3415, "step": 28300 }, { "epoch": 0.11437598225576424, "grad_norm": 915.8956298828125, "learning_rate": 3.9978644320581856e-05, "loss": 102.0962, "step": 28310 }, { "epoch": 0.11441638352113188, "grad_norm": 797.2686767578125, "learning_rate": 3.997851511146633e-05, "loss": 98.4335, "step": 28320 }, { "epoch": 0.11445678478649951, "grad_norm": 565.2205810546875, "learning_rate": 3.997838551285936e-05, "loss": 110.0382, "step": 28330 }, { "epoch": 0.11449718605186715, "grad_norm": 656.094482421875, "learning_rate": 3.9978255524763494e-05, "loss": 68.3598, "step": 28340 }, { "epoch": 0.11453758731723478, "grad_norm": 2733.058837890625, "learning_rate": 3.9978125147181235e-05, "loss": 134.7628, "step": 28350 }, { "epoch": 0.1145779885826024, "grad_norm": 508.31982421875, "learning_rate": 3.997799438011515e-05, "loss": 74.2418, "step": 28360 }, { "epoch": 0.11461838984797004, "grad_norm": 1044.4010009765625, "learning_rate": 3.9977863223567774e-05, "loss": 101.4478, "step": 28370 }, { "epoch": 0.11465879111333767, "grad_norm": 888.8732299804688, "learning_rate": 3.997773167754167e-05, "loss": 95.4757, "step": 28380 }, { "epoch": 0.1146991923787053, "grad_norm": 869.9403076171875, "learning_rate": 3.9977599742039404e-05, "loss": 101.585, "step": 28390 }, { "epoch": 0.11473959364407293, "grad_norm": 450.99981689453125, "learning_rate": 3.9977467417063544e-05, "loss": 102.3021, "step": 28400 }, { "epoch": 0.11477999490944056, "grad_norm": 1323.034423828125, "learning_rate": 3.9977334702616676e-05, "loss": 105.8671, "step": 28410 }, { "epoch": 0.1148203961748082, "grad_norm": 2043.1444091796875, "learning_rate": 3.997720159870137e-05, "loss": 101.5552, "step": 28420 }, { "epoch": 0.11486079744017583, "grad_norm": 1640.767822265625, "learning_rate": 3.997706810532025e-05, "loss": 107.5822, "step": 28430 }, { "epoch": 0.11490119870554345, "grad_norm": 643.0208740234375, "learning_rate": 3.9976934222475893e-05, "loss": 118.5226, "step": 28440 }, { "epoch": 0.11494159997091109, "grad_norm": 1271.7794189453125, "learning_rate": 3.997679995017092e-05, "loss": 59.5527, "step": 28450 }, { "epoch": 0.11498200123627872, "grad_norm": 1369.9146728515625, "learning_rate": 3.997666528840795e-05, "loss": 82.6735, "step": 28460 }, { "epoch": 0.11502240250164635, "grad_norm": 601.714599609375, "learning_rate": 3.99765302371896e-05, "loss": 82.4955, "step": 28470 }, { "epoch": 0.11506280376701399, "grad_norm": 1195.427001953125, "learning_rate": 3.9976394796518514e-05, "loss": 88.4767, "step": 28480 }, { "epoch": 0.11510320503238161, "grad_norm": 457.9436340332031, "learning_rate": 3.997625896639733e-05, "loss": 132.1176, "step": 28490 }, { "epoch": 0.11514360629774925, "grad_norm": 1108.717529296875, "learning_rate": 3.9976122746828684e-05, "loss": 130.0687, "step": 28500 }, { "epoch": 0.11518400756311688, "grad_norm": 1652.4522705078125, "learning_rate": 3.997598613781525e-05, "loss": 113.9347, "step": 28510 }, { "epoch": 0.1152244088284845, "grad_norm": 717.0134887695312, "learning_rate": 3.997584913935967e-05, "loss": 76.4573, "step": 28520 }, { "epoch": 0.11526481009385214, "grad_norm": 928.9058227539062, "learning_rate": 3.997571175146463e-05, "loss": 70.7099, "step": 28530 }, { "epoch": 0.11530521135921977, "grad_norm": 1893.921142578125, "learning_rate": 3.997557397413281e-05, "loss": 94.3326, "step": 28540 }, { "epoch": 0.1153456126245874, "grad_norm": 938.5257568359375, "learning_rate": 3.9975435807366895e-05, "loss": 117.9883, "step": 28550 }, { "epoch": 0.11538601388995504, "grad_norm": 2122.823486328125, "learning_rate": 3.997529725116957e-05, "loss": 82.8352, "step": 28560 }, { "epoch": 0.11542641515532266, "grad_norm": 1137.513671875, "learning_rate": 3.9975158305543536e-05, "loss": 121.7476, "step": 28570 }, { "epoch": 0.1154668164206903, "grad_norm": 964.6400756835938, "learning_rate": 3.9975018970491515e-05, "loss": 94.4993, "step": 28580 }, { "epoch": 0.11550721768605793, "grad_norm": 1079.1875, "learning_rate": 3.997487924601621e-05, "loss": 136.5345, "step": 28590 }, { "epoch": 0.11554761895142555, "grad_norm": 0.0, "learning_rate": 3.997473913212036e-05, "loss": 98.4004, "step": 28600 }, { "epoch": 0.1155880202167932, "grad_norm": 823.25830078125, "learning_rate": 3.9974598628806675e-05, "loss": 117.2009, "step": 28610 }, { "epoch": 0.11562842148216082, "grad_norm": 702.5995483398438, "learning_rate": 3.99744577360779e-05, "loss": 99.8464, "step": 28620 }, { "epoch": 0.11566882274752845, "grad_norm": 1018.5477294921875, "learning_rate": 3.9974316453936806e-05, "loss": 73.2368, "step": 28630 }, { "epoch": 0.11570922401289609, "grad_norm": 824.5650634765625, "learning_rate": 3.997417478238612e-05, "loss": 71.0984, "step": 28640 }, { "epoch": 0.11574962527826371, "grad_norm": 871.6005859375, "learning_rate": 3.9974032721428615e-05, "loss": 150.4988, "step": 28650 }, { "epoch": 0.11579002654363135, "grad_norm": 1187.3568115234375, "learning_rate": 3.9973890271067056e-05, "loss": 138.0883, "step": 28660 }, { "epoch": 0.11583042780899898, "grad_norm": 1488.8284912109375, "learning_rate": 3.997374743130423e-05, "loss": 119.0744, "step": 28670 }, { "epoch": 0.1158708290743666, "grad_norm": 1320.722900390625, "learning_rate": 3.9973604202142906e-05, "loss": 119.735, "step": 28680 }, { "epoch": 0.11591123033973424, "grad_norm": 2015.63818359375, "learning_rate": 3.997346058358589e-05, "loss": 130.6859, "step": 28690 }, { "epoch": 0.11595163160510187, "grad_norm": 1006.3882446289062, "learning_rate": 3.997331657563598e-05, "loss": 120.2585, "step": 28700 }, { "epoch": 0.1159920328704695, "grad_norm": 1254.012451171875, "learning_rate": 3.997317217829598e-05, "loss": 105.0112, "step": 28710 }, { "epoch": 0.11603243413583714, "grad_norm": 1289.52001953125, "learning_rate": 3.99730273915687e-05, "loss": 115.2411, "step": 28720 }, { "epoch": 0.11607283540120476, "grad_norm": 3730.40771484375, "learning_rate": 3.997288221545697e-05, "loss": 89.3371, "step": 28730 }, { "epoch": 0.1161132366665724, "grad_norm": 1199.5682373046875, "learning_rate": 3.997273664996361e-05, "loss": 116.5537, "step": 28740 }, { "epoch": 0.11615363793194003, "grad_norm": 372.0835876464844, "learning_rate": 3.9972590695091476e-05, "loss": 56.8714, "step": 28750 }, { "epoch": 0.11619403919730766, "grad_norm": 1533.7314453125, "learning_rate": 3.997244435084341e-05, "loss": 81.515, "step": 28760 }, { "epoch": 0.1162344404626753, "grad_norm": 3192.15185546875, "learning_rate": 3.997229761722225e-05, "loss": 127.6431, "step": 28770 }, { "epoch": 0.11627484172804292, "grad_norm": 1288.7010498046875, "learning_rate": 3.997215049423086e-05, "loss": 85.945, "step": 28780 }, { "epoch": 0.11631524299341055, "grad_norm": 1837.347900390625, "learning_rate": 3.9972002981872124e-05, "loss": 126.8788, "step": 28790 }, { "epoch": 0.11635564425877819, "grad_norm": 1303.0589599609375, "learning_rate": 3.9971855080148906e-05, "loss": 112.7729, "step": 28800 }, { "epoch": 0.11639604552414581, "grad_norm": 2085.722900390625, "learning_rate": 3.997170678906409e-05, "loss": 134.1721, "step": 28810 }, { "epoch": 0.11643644678951345, "grad_norm": 512.4556274414062, "learning_rate": 3.997155810862057e-05, "loss": 76.3151, "step": 28820 }, { "epoch": 0.11647684805488108, "grad_norm": 727.9221801757812, "learning_rate": 3.997140903882124e-05, "loss": 76.4606, "step": 28830 }, { "epoch": 0.1165172493202487, "grad_norm": 1693.4381103515625, "learning_rate": 3.9971259579669006e-05, "loss": 103.796, "step": 28840 }, { "epoch": 0.11655765058561635, "grad_norm": 1555.8154296875, "learning_rate": 3.997110973116679e-05, "loss": 112.5202, "step": 28850 }, { "epoch": 0.11659805185098397, "grad_norm": 585.7073974609375, "learning_rate": 3.99709594933175e-05, "loss": 81.8929, "step": 28860 }, { "epoch": 0.1166384531163516, "grad_norm": 1181.9959716796875, "learning_rate": 3.997080886612408e-05, "loss": 112.615, "step": 28870 }, { "epoch": 0.11667885438171924, "grad_norm": 720.1932373046875, "learning_rate": 3.9970657849589465e-05, "loss": 72.8144, "step": 28880 }, { "epoch": 0.11671925564708686, "grad_norm": 1048.8651123046875, "learning_rate": 3.9970506443716586e-05, "loss": 112.9906, "step": 28890 }, { "epoch": 0.1167596569124545, "grad_norm": 1059.72998046875, "learning_rate": 3.99703546485084e-05, "loss": 149.952, "step": 28900 }, { "epoch": 0.11680005817782213, "grad_norm": 1071.949951171875, "learning_rate": 3.997020246396787e-05, "loss": 111.094, "step": 28910 }, { "epoch": 0.11684045944318976, "grad_norm": 1332.3616943359375, "learning_rate": 3.9970049890097965e-05, "loss": 88.1347, "step": 28920 }, { "epoch": 0.1168808607085574, "grad_norm": 437.4418029785156, "learning_rate": 3.996989692690165e-05, "loss": 60.3282, "step": 28930 }, { "epoch": 0.11692126197392502, "grad_norm": 419.84619140625, "learning_rate": 3.996974357438192e-05, "loss": 115.4472, "step": 28940 }, { "epoch": 0.11696166323929265, "grad_norm": 864.1251220703125, "learning_rate": 3.996958983254175e-05, "loss": 145.9911, "step": 28950 }, { "epoch": 0.11700206450466029, "grad_norm": 953.606689453125, "learning_rate": 3.996943570138416e-05, "loss": 117.4569, "step": 28960 }, { "epoch": 0.11704246577002791, "grad_norm": 845.4664916992188, "learning_rate": 3.996928118091213e-05, "loss": 69.0099, "step": 28970 }, { "epoch": 0.11708286703539555, "grad_norm": 1636.659423828125, "learning_rate": 3.996912627112868e-05, "loss": 120.3927, "step": 28980 }, { "epoch": 0.11712326830076318, "grad_norm": 1072.338623046875, "learning_rate": 3.996897097203684e-05, "loss": 119.216, "step": 28990 }, { "epoch": 0.11716366956613081, "grad_norm": 674.6630859375, "learning_rate": 3.9968815283639625e-05, "loss": 73.2614, "step": 29000 }, { "epoch": 0.11720407083149845, "grad_norm": 616.6441650390625, "learning_rate": 3.996865920594007e-05, "loss": 123.0924, "step": 29010 }, { "epoch": 0.11724447209686607, "grad_norm": 1423.77978515625, "learning_rate": 3.996850273894124e-05, "loss": 138.7357, "step": 29020 }, { "epoch": 0.1172848733622337, "grad_norm": 1283.6065673828125, "learning_rate": 3.996834588264615e-05, "loss": 80.1142, "step": 29030 }, { "epoch": 0.11732527462760134, "grad_norm": 1390.6195068359375, "learning_rate": 3.9968188637057886e-05, "loss": 112.3068, "step": 29040 }, { "epoch": 0.11736567589296897, "grad_norm": 6142.37744140625, "learning_rate": 3.99680310021795e-05, "loss": 119.8403, "step": 29050 }, { "epoch": 0.1174060771583366, "grad_norm": 497.8596496582031, "learning_rate": 3.9967872978014074e-05, "loss": 57.4572, "step": 29060 }, { "epoch": 0.11744647842370423, "grad_norm": 476.202880859375, "learning_rate": 3.996771456456468e-05, "loss": 99.7678, "step": 29070 }, { "epoch": 0.11748687968907186, "grad_norm": 957.2471313476562, "learning_rate": 3.996755576183442e-05, "loss": 120.2677, "step": 29080 }, { "epoch": 0.1175272809544395, "grad_norm": 606.5353393554688, "learning_rate": 3.9967396569826374e-05, "loss": 90.1209, "step": 29090 }, { "epoch": 0.11756768221980712, "grad_norm": 0.0, "learning_rate": 3.996723698854365e-05, "loss": 62.9719, "step": 29100 }, { "epoch": 0.11760808348517475, "grad_norm": 853.1898193359375, "learning_rate": 3.996707701798936e-05, "loss": 109.8049, "step": 29110 }, { "epoch": 0.11764848475054239, "grad_norm": 1235.3507080078125, "learning_rate": 3.9966916658166625e-05, "loss": 88.3993, "step": 29120 }, { "epoch": 0.11768888601591002, "grad_norm": 733.3147583007812, "learning_rate": 3.996675590907857e-05, "loss": 68.5436, "step": 29130 }, { "epoch": 0.11772928728127766, "grad_norm": 1256.930908203125, "learning_rate": 3.996659477072833e-05, "loss": 96.9386, "step": 29140 }, { "epoch": 0.11776968854664528, "grad_norm": 818.8697509765625, "learning_rate": 3.996643324311905e-05, "loss": 120.3725, "step": 29150 }, { "epoch": 0.11781008981201291, "grad_norm": 744.5905151367188, "learning_rate": 3.9966271326253874e-05, "loss": 79.7646, "step": 29160 }, { "epoch": 0.11785049107738055, "grad_norm": 924.5302734375, "learning_rate": 3.996610902013595e-05, "loss": 119.0584, "step": 29170 }, { "epoch": 0.11789089234274817, "grad_norm": 755.6444091796875, "learning_rate": 3.996594632476846e-05, "loss": 86.5377, "step": 29180 }, { "epoch": 0.1179312936081158, "grad_norm": 709.6162719726562, "learning_rate": 3.996578324015456e-05, "loss": 152.3329, "step": 29190 }, { "epoch": 0.11797169487348344, "grad_norm": 635.0053100585938, "learning_rate": 3.996561976629744e-05, "loss": 67.3558, "step": 29200 }, { "epoch": 0.11801209613885107, "grad_norm": 3188.703857421875, "learning_rate": 3.996545590320029e-05, "loss": 132.2558, "step": 29210 }, { "epoch": 0.1180524974042187, "grad_norm": 1246.9461669921875, "learning_rate": 3.996529165086629e-05, "loss": 92.1603, "step": 29220 }, { "epoch": 0.11809289866958633, "grad_norm": 525.8442993164062, "learning_rate": 3.9965127009298655e-05, "loss": 113.832, "step": 29230 }, { "epoch": 0.11813329993495396, "grad_norm": 1278.482421875, "learning_rate": 3.9964961978500586e-05, "loss": 119.3593, "step": 29240 }, { "epoch": 0.1181737012003216, "grad_norm": 586.2559204101562, "learning_rate": 3.9964796558475314e-05, "loss": 124.3714, "step": 29250 }, { "epoch": 0.11821410246568922, "grad_norm": 857.21533203125, "learning_rate": 3.996463074922604e-05, "loss": 74.2636, "step": 29260 }, { "epoch": 0.11825450373105685, "grad_norm": 1271.8468017578125, "learning_rate": 3.996446455075602e-05, "loss": 98.3933, "step": 29270 }, { "epoch": 0.11829490499642449, "grad_norm": 910.466064453125, "learning_rate": 3.996429796306848e-05, "loss": 98.6038, "step": 29280 }, { "epoch": 0.11833530626179212, "grad_norm": 947.049072265625, "learning_rate": 3.996413098616668e-05, "loss": 146.7748, "step": 29290 }, { "epoch": 0.11837570752715976, "grad_norm": 2612.84521484375, "learning_rate": 3.9963963620053865e-05, "loss": 119.463, "step": 29300 }, { "epoch": 0.11841610879252738, "grad_norm": 694.37255859375, "learning_rate": 3.99637958647333e-05, "loss": 125.3401, "step": 29310 }, { "epoch": 0.11845651005789501, "grad_norm": 731.656494140625, "learning_rate": 3.996362772020826e-05, "loss": 68.2026, "step": 29320 }, { "epoch": 0.11849691132326265, "grad_norm": 1356.3507080078125, "learning_rate": 3.9963459186482014e-05, "loss": 122.4604, "step": 29330 }, { "epoch": 0.11853731258863028, "grad_norm": 1672.551513671875, "learning_rate": 3.9963290263557856e-05, "loss": 134.1063, "step": 29340 }, { "epoch": 0.1185777138539979, "grad_norm": 2280.984619140625, "learning_rate": 3.996312095143908e-05, "loss": 124.3759, "step": 29350 }, { "epoch": 0.11861811511936554, "grad_norm": 687.0208129882812, "learning_rate": 3.996295125012898e-05, "loss": 105.2226, "step": 29360 }, { "epoch": 0.11865851638473317, "grad_norm": 830.426513671875, "learning_rate": 3.9962781159630865e-05, "loss": 87.1865, "step": 29370 }, { "epoch": 0.11869891765010081, "grad_norm": 1512.3504638671875, "learning_rate": 3.9962610679948065e-05, "loss": 105.9469, "step": 29380 }, { "epoch": 0.11873931891546843, "grad_norm": 955.3963012695312, "learning_rate": 3.9962439811083875e-05, "loss": 136.482, "step": 29390 }, { "epoch": 0.11877972018083606, "grad_norm": 398.2100830078125, "learning_rate": 3.9962268553041656e-05, "loss": 96.7592, "step": 29400 }, { "epoch": 0.1188201214462037, "grad_norm": 1317.3018798828125, "learning_rate": 3.996209690582473e-05, "loss": 90.4329, "step": 29410 }, { "epoch": 0.11886052271157133, "grad_norm": 1313.802490234375, "learning_rate": 3.996192486943645e-05, "loss": 119.6198, "step": 29420 }, { "epoch": 0.11890092397693895, "grad_norm": 1340.90869140625, "learning_rate": 3.996175244388017e-05, "loss": 120.3501, "step": 29430 }, { "epoch": 0.11894132524230659, "grad_norm": 683.0484008789062, "learning_rate": 3.9961579629159244e-05, "loss": 107.2445, "step": 29440 }, { "epoch": 0.11898172650767422, "grad_norm": 804.3728637695312, "learning_rate": 3.9961406425277045e-05, "loss": 97.0491, "step": 29450 }, { "epoch": 0.11902212777304186, "grad_norm": 591.4122314453125, "learning_rate": 3.9961232832236956e-05, "loss": 99.1442, "step": 29460 }, { "epoch": 0.11906252903840948, "grad_norm": 1971.5877685546875, "learning_rate": 3.9961058850042345e-05, "loss": 99.8225, "step": 29470 }, { "epoch": 0.11910293030377711, "grad_norm": 1043.689208984375, "learning_rate": 3.9960884478696627e-05, "loss": 90.9582, "step": 29480 }, { "epoch": 0.11914333156914475, "grad_norm": 2496.54541015625, "learning_rate": 3.996070971820319e-05, "loss": 131.3534, "step": 29490 }, { "epoch": 0.11918373283451238, "grad_norm": 736.2053833007812, "learning_rate": 3.9960534568565436e-05, "loss": 120.754, "step": 29500 }, { "epoch": 0.11922413409988, "grad_norm": 1127.8834228515625, "learning_rate": 3.996035902978679e-05, "loss": 117.7929, "step": 29510 }, { "epoch": 0.11926453536524764, "grad_norm": 747.2788696289062, "learning_rate": 3.996018310187066e-05, "loss": 84.6329, "step": 29520 }, { "epoch": 0.11930493663061527, "grad_norm": 1040.839599609375, "learning_rate": 3.9960006784820485e-05, "loss": 147.8331, "step": 29530 }, { "epoch": 0.11934533789598291, "grad_norm": 2474.502197265625, "learning_rate": 3.99598300786397e-05, "loss": 118.2935, "step": 29540 }, { "epoch": 0.11938573916135053, "grad_norm": 1262.937255859375, "learning_rate": 3.995965298333176e-05, "loss": 85.3698, "step": 29550 }, { "epoch": 0.11942614042671816, "grad_norm": 833.2108154296875, "learning_rate": 3.99594754989001e-05, "loss": 100.4102, "step": 29560 }, { "epoch": 0.1194665416920858, "grad_norm": 1125.703125, "learning_rate": 3.9959297625348196e-05, "loss": 115.4946, "step": 29570 }, { "epoch": 0.11950694295745343, "grad_norm": 741.2850341796875, "learning_rate": 3.995911936267951e-05, "loss": 90.9823, "step": 29580 }, { "epoch": 0.11954734422282105, "grad_norm": 465.7984313964844, "learning_rate": 3.995894071089751e-05, "loss": 95.7353, "step": 29590 }, { "epoch": 0.1195877454881887, "grad_norm": 1323.34130859375, "learning_rate": 3.995876167000569e-05, "loss": 72.345, "step": 29600 }, { "epoch": 0.11962814675355632, "grad_norm": 2258.849609375, "learning_rate": 3.9958582240007536e-05, "loss": 131.1186, "step": 29610 }, { "epoch": 0.11966854801892396, "grad_norm": 854.4385375976562, "learning_rate": 3.995840242090655e-05, "loss": 95.1046, "step": 29620 }, { "epoch": 0.11970894928429159, "grad_norm": 394.0882873535156, "learning_rate": 3.995822221270622e-05, "loss": 112.4525, "step": 29630 }, { "epoch": 0.11974935054965921, "grad_norm": 636.0965576171875, "learning_rate": 3.9958041615410085e-05, "loss": 63.1489, "step": 29640 }, { "epoch": 0.11978975181502685, "grad_norm": 1257.3487548828125, "learning_rate": 3.995786062902165e-05, "loss": 154.1359, "step": 29650 }, { "epoch": 0.11983015308039448, "grad_norm": 997.6132202148438, "learning_rate": 3.995767925354445e-05, "loss": 77.0885, "step": 29660 }, { "epoch": 0.1198705543457621, "grad_norm": 939.9737548828125, "learning_rate": 3.9957497488982014e-05, "loss": 101.9474, "step": 29670 }, { "epoch": 0.11991095561112974, "grad_norm": 1560.003173828125, "learning_rate": 3.995731533533789e-05, "loss": 128.4176, "step": 29680 }, { "epoch": 0.11995135687649737, "grad_norm": 1200.610107421875, "learning_rate": 3.995713279261563e-05, "loss": 105.8525, "step": 29690 }, { "epoch": 0.11999175814186501, "grad_norm": 579.2079467773438, "learning_rate": 3.995694986081879e-05, "loss": 96.4495, "step": 29700 }, { "epoch": 0.12003215940723264, "grad_norm": 8644.3876953125, "learning_rate": 3.9956766539950934e-05, "loss": 148.03, "step": 29710 }, { "epoch": 0.12007256067260026, "grad_norm": 4799.1953125, "learning_rate": 3.995658283001564e-05, "loss": 122.2243, "step": 29720 }, { "epoch": 0.1201129619379679, "grad_norm": 953.9501342773438, "learning_rate": 3.99563987310165e-05, "loss": 112.8426, "step": 29730 }, { "epoch": 0.12015336320333553, "grad_norm": 590.100830078125, "learning_rate": 3.995621424295709e-05, "loss": 102.4715, "step": 29740 }, { "epoch": 0.12019376446870315, "grad_norm": 3277.8310546875, "learning_rate": 3.9956029365841005e-05, "loss": 101.0477, "step": 29750 }, { "epoch": 0.1202341657340708, "grad_norm": 404.7799072265625, "learning_rate": 3.995584409967185e-05, "loss": 113.8865, "step": 29760 }, { "epoch": 0.12027456699943842, "grad_norm": 1674.560791015625, "learning_rate": 3.9955658444453244e-05, "loss": 111.887, "step": 29770 }, { "epoch": 0.12031496826480606, "grad_norm": 1182.077880859375, "learning_rate": 3.995547240018881e-05, "loss": 109.1132, "step": 29780 }, { "epoch": 0.12035536953017369, "grad_norm": 401.8653869628906, "learning_rate": 3.995528596688216e-05, "loss": 82.7655, "step": 29790 }, { "epoch": 0.12039577079554131, "grad_norm": 2734.860107421875, "learning_rate": 3.995509914453694e-05, "loss": 104.0152, "step": 29800 }, { "epoch": 0.12043617206090895, "grad_norm": 696.2372436523438, "learning_rate": 3.995491193315678e-05, "loss": 67.8705, "step": 29810 }, { "epoch": 0.12047657332627658, "grad_norm": 1432.671142578125, "learning_rate": 3.9954724332745344e-05, "loss": 119.9143, "step": 29820 }, { "epoch": 0.1205169745916442, "grad_norm": 696.959228515625, "learning_rate": 3.9954536343306286e-05, "loss": 93.7465, "step": 29830 }, { "epoch": 0.12055737585701184, "grad_norm": 1257.0362548828125, "learning_rate": 3.995434796484326e-05, "loss": 89.61, "step": 29840 }, { "epoch": 0.12059777712237947, "grad_norm": 1463.5260009765625, "learning_rate": 3.9954159197359956e-05, "loss": 84.8807, "step": 29850 }, { "epoch": 0.12063817838774711, "grad_norm": 579.013916015625, "learning_rate": 3.9953970040860045e-05, "loss": 74.2993, "step": 29860 }, { "epoch": 0.12067857965311474, "grad_norm": 880.2040405273438, "learning_rate": 3.9953780495347214e-05, "loss": 80.5523, "step": 29870 }, { "epoch": 0.12071898091848236, "grad_norm": 1162.419677734375, "learning_rate": 3.995359056082516e-05, "loss": 86.788, "step": 29880 }, { "epoch": 0.12075938218385, "grad_norm": 1339.3079833984375, "learning_rate": 3.9953400237297584e-05, "loss": 96.6253, "step": 29890 }, { "epoch": 0.12079978344921763, "grad_norm": 717.9099731445312, "learning_rate": 3.99532095247682e-05, "loss": 83.6678, "step": 29900 }, { "epoch": 0.12084018471458526, "grad_norm": 935.6298217773438, "learning_rate": 3.995301842324072e-05, "loss": 134.8532, "step": 29910 }, { "epoch": 0.1208805859799529, "grad_norm": 474.3695068359375, "learning_rate": 3.9952826932718874e-05, "loss": 114.1753, "step": 29920 }, { "epoch": 0.12092098724532052, "grad_norm": 1465.263671875, "learning_rate": 3.995263505320639e-05, "loss": 191.371, "step": 29930 }, { "epoch": 0.12096138851068816, "grad_norm": 653.8214111328125, "learning_rate": 3.9952442784707025e-05, "loss": 103.5274, "step": 29940 }, { "epoch": 0.12100178977605579, "grad_norm": 1105.3265380859375, "learning_rate": 3.995225012722451e-05, "loss": 128.0836, "step": 29950 }, { "epoch": 0.12104219104142341, "grad_norm": 1911.07861328125, "learning_rate": 3.995205708076261e-05, "loss": 129.5159, "step": 29960 }, { "epoch": 0.12108259230679105, "grad_norm": 665.4229125976562, "learning_rate": 3.995186364532507e-05, "loss": 108.7585, "step": 29970 }, { "epoch": 0.12112299357215868, "grad_norm": 1236.4959716796875, "learning_rate": 3.99516698209157e-05, "loss": 109.1846, "step": 29980 }, { "epoch": 0.1211633948375263, "grad_norm": 2250.654541015625, "learning_rate": 3.9951475607538234e-05, "loss": 77.2249, "step": 29990 }, { "epoch": 0.12120379610289395, "grad_norm": 868.831298828125, "learning_rate": 3.9951281005196486e-05, "loss": 144.552, "step": 30000 }, { "epoch": 0.12124419736826157, "grad_norm": 448.3783874511719, "learning_rate": 3.995108601389425e-05, "loss": 90.4089, "step": 30010 }, { "epoch": 0.12128459863362921, "grad_norm": 945.7691040039062, "learning_rate": 3.995089063363531e-05, "loss": 109.7032, "step": 30020 }, { "epoch": 0.12132499989899684, "grad_norm": 915.0947265625, "learning_rate": 3.9950694864423496e-05, "loss": 152.643, "step": 30030 }, { "epoch": 0.12136540116436446, "grad_norm": 700.6856079101562, "learning_rate": 3.995049870626261e-05, "loss": 87.6623, "step": 30040 }, { "epoch": 0.1214058024297321, "grad_norm": 798.1749267578125, "learning_rate": 3.9950302159156476e-05, "loss": 123.6986, "step": 30050 }, { "epoch": 0.12144620369509973, "grad_norm": 1099.6778564453125, "learning_rate": 3.9950105223108935e-05, "loss": 125.2038, "step": 30060 }, { "epoch": 0.12148660496046736, "grad_norm": 781.4649658203125, "learning_rate": 3.994990789812383e-05, "loss": 121.0432, "step": 30070 }, { "epoch": 0.121527006225835, "grad_norm": 260.9813537597656, "learning_rate": 3.994971018420498e-05, "loss": 107.953, "step": 30080 }, { "epoch": 0.12156740749120262, "grad_norm": 2069.24169921875, "learning_rate": 3.9949512081356275e-05, "loss": 117.141, "step": 30090 }, { "epoch": 0.12160780875657026, "grad_norm": 1008.3120727539062, "learning_rate": 3.9949313589581555e-05, "loss": 78.568, "step": 30100 }, { "epoch": 0.12164821002193789, "grad_norm": 1447.808349609375, "learning_rate": 3.99491147088847e-05, "loss": 93.7204, "step": 30110 }, { "epoch": 0.12168861128730551, "grad_norm": 699.3298950195312, "learning_rate": 3.994891543926958e-05, "loss": 114.6168, "step": 30120 }, { "epoch": 0.12172901255267315, "grad_norm": 2293.560302734375, "learning_rate": 3.9948715780740077e-05, "loss": 80.4595, "step": 30130 }, { "epoch": 0.12176941381804078, "grad_norm": 879.6803588867188, "learning_rate": 3.99485157333001e-05, "loss": 83.2679, "step": 30140 }, { "epoch": 0.12180981508340841, "grad_norm": 1878.105224609375, "learning_rate": 3.9948315296953535e-05, "loss": 104.4954, "step": 30150 }, { "epoch": 0.12185021634877605, "grad_norm": 940.0166625976562, "learning_rate": 3.9948114471704296e-05, "loss": 128.4855, "step": 30160 }, { "epoch": 0.12189061761414367, "grad_norm": 545.82861328125, "learning_rate": 3.9947913257556285e-05, "loss": 112.7048, "step": 30170 }, { "epoch": 0.12193101887951131, "grad_norm": 709.3935546875, "learning_rate": 3.9947711654513445e-05, "loss": 92.9257, "step": 30180 }, { "epoch": 0.12197142014487894, "grad_norm": 2208.292236328125, "learning_rate": 3.99475096625797e-05, "loss": 110.0978, "step": 30190 }, { "epoch": 0.12201182141024657, "grad_norm": 939.6221923828125, "learning_rate": 3.994730728175897e-05, "loss": 81.1035, "step": 30200 }, { "epoch": 0.1220522226756142, "grad_norm": 754.7445678710938, "learning_rate": 3.994710451205523e-05, "loss": 76.923, "step": 30210 }, { "epoch": 0.12209262394098183, "grad_norm": 710.9456787109375, "learning_rate": 3.994690135347241e-05, "loss": 81.77, "step": 30220 }, { "epoch": 0.12213302520634946, "grad_norm": 7868.96240234375, "learning_rate": 3.9946697806014476e-05, "loss": 131.3129, "step": 30230 }, { "epoch": 0.1221734264717171, "grad_norm": 914.0556030273438, "learning_rate": 3.99464938696854e-05, "loss": 76.2861, "step": 30240 }, { "epoch": 0.12221382773708472, "grad_norm": 835.4411010742188, "learning_rate": 3.994628954448916e-05, "loss": 73.5062, "step": 30250 }, { "epoch": 0.12225422900245236, "grad_norm": 1167.3447265625, "learning_rate": 3.994608483042974e-05, "loss": 102.8286, "step": 30260 }, { "epoch": 0.12229463026781999, "grad_norm": 631.1087646484375, "learning_rate": 3.9945879727511126e-05, "loss": 58.7113, "step": 30270 }, { "epoch": 0.12233503153318762, "grad_norm": 445.20159912109375, "learning_rate": 3.9945674235737314e-05, "loss": 70.526, "step": 30280 }, { "epoch": 0.12237543279855526, "grad_norm": 284.5147705078125, "learning_rate": 3.994546835511232e-05, "loss": 73.7212, "step": 30290 }, { "epoch": 0.12241583406392288, "grad_norm": 1401.1563720703125, "learning_rate": 3.994526208564014e-05, "loss": 93.3938, "step": 30300 }, { "epoch": 0.12245623532929051, "grad_norm": 2154.647705078125, "learning_rate": 3.994505542732482e-05, "loss": 158.8047, "step": 30310 }, { "epoch": 0.12249663659465815, "grad_norm": 5086.50927734375, "learning_rate": 3.994484838017037e-05, "loss": 93.4497, "step": 30320 }, { "epoch": 0.12253703786002577, "grad_norm": 782.09521484375, "learning_rate": 3.994464094418083e-05, "loss": 104.2793, "step": 30330 }, { "epoch": 0.12257743912539341, "grad_norm": 2351.08056640625, "learning_rate": 3.994443311936025e-05, "loss": 120.7483, "step": 30340 }, { "epoch": 0.12261784039076104, "grad_norm": 1276.39111328125, "learning_rate": 3.994422490571268e-05, "loss": 80.3845, "step": 30350 }, { "epoch": 0.12265824165612867, "grad_norm": 940.2988891601562, "learning_rate": 3.994401630324217e-05, "loss": 133.5265, "step": 30360 }, { "epoch": 0.1226986429214963, "grad_norm": 690.517578125, "learning_rate": 3.99438073119528e-05, "loss": 130.6995, "step": 30370 }, { "epoch": 0.12273904418686393, "grad_norm": 490.5731201171875, "learning_rate": 3.994359793184864e-05, "loss": 91.125, "step": 30380 }, { "epoch": 0.12277944545223156, "grad_norm": 439.6798400878906, "learning_rate": 3.994338816293377e-05, "loss": 83.8165, "step": 30390 }, { "epoch": 0.1228198467175992, "grad_norm": 751.61767578125, "learning_rate": 3.994317800521228e-05, "loss": 100.241, "step": 30400 }, { "epoch": 0.12286024798296682, "grad_norm": 429.4967041015625, "learning_rate": 3.9942967458688267e-05, "loss": 90.6561, "step": 30410 }, { "epoch": 0.12290064924833445, "grad_norm": 701.9464721679688, "learning_rate": 3.9942756523365835e-05, "loss": 57.0881, "step": 30420 }, { "epoch": 0.12294105051370209, "grad_norm": 1101.32958984375, "learning_rate": 3.99425451992491e-05, "loss": 105.1508, "step": 30430 }, { "epoch": 0.12298145177906972, "grad_norm": 976.6168212890625, "learning_rate": 3.994233348634218e-05, "loss": 102.866, "step": 30440 }, { "epoch": 0.12302185304443736, "grad_norm": 747.2569580078125, "learning_rate": 3.99421213846492e-05, "loss": 59.548, "step": 30450 }, { "epoch": 0.12306225430980498, "grad_norm": 1975.991455078125, "learning_rate": 3.9941908894174295e-05, "loss": 91.4762, "step": 30460 }, { "epoch": 0.12310265557517261, "grad_norm": 1650.881103515625, "learning_rate": 3.994169601492161e-05, "loss": 111.3243, "step": 30470 }, { "epoch": 0.12314305684054025, "grad_norm": 1898.3350830078125, "learning_rate": 3.994148274689529e-05, "loss": 121.5767, "step": 30480 }, { "epoch": 0.12318345810590788, "grad_norm": 0.0, "learning_rate": 3.99412690900995e-05, "loss": 133.1026, "step": 30490 }, { "epoch": 0.1232238593712755, "grad_norm": 803.773193359375, "learning_rate": 3.994105504453841e-05, "loss": 108.3969, "step": 30500 }, { "epoch": 0.12326426063664314, "grad_norm": 1622.7261962890625, "learning_rate": 3.994084061021618e-05, "loss": 109.2424, "step": 30510 }, { "epoch": 0.12330466190201077, "grad_norm": 1043.625732421875, "learning_rate": 3.9940625787136995e-05, "loss": 99.5337, "step": 30520 }, { "epoch": 0.12334506316737841, "grad_norm": 1230.0400390625, "learning_rate": 3.9940410575305044e-05, "loss": 80.0279, "step": 30530 }, { "epoch": 0.12338546443274603, "grad_norm": 954.072021484375, "learning_rate": 3.9940194974724524e-05, "loss": 104.5978, "step": 30540 }, { "epoch": 0.12342586569811366, "grad_norm": 660.2078247070312, "learning_rate": 3.993997898539964e-05, "loss": 113.9359, "step": 30550 }, { "epoch": 0.1234662669634813, "grad_norm": 1597.9298095703125, "learning_rate": 3.993976260733459e-05, "loss": 76.1934, "step": 30560 }, { "epoch": 0.12350666822884893, "grad_norm": 1095.2469482421875, "learning_rate": 3.993954584053361e-05, "loss": 92.5734, "step": 30570 }, { "epoch": 0.12354706949421655, "grad_norm": 2675.254638671875, "learning_rate": 3.993932868500092e-05, "loss": 98.4438, "step": 30580 }, { "epoch": 0.12358747075958419, "grad_norm": 1733.7291259765625, "learning_rate": 3.9939111140740754e-05, "loss": 96.593, "step": 30590 }, { "epoch": 0.12362787202495182, "grad_norm": 516.620361328125, "learning_rate": 3.993889320775735e-05, "loss": 85.3212, "step": 30600 }, { "epoch": 0.12366827329031946, "grad_norm": 1090.36083984375, "learning_rate": 3.993867488605495e-05, "loss": 92.5375, "step": 30610 }, { "epoch": 0.12370867455568708, "grad_norm": 1519.6614990234375, "learning_rate": 3.993845617563782e-05, "loss": 158.0534, "step": 30620 }, { "epoch": 0.12374907582105471, "grad_norm": 1311.4442138671875, "learning_rate": 3.993823707651023e-05, "loss": 113.0447, "step": 30630 }, { "epoch": 0.12378947708642235, "grad_norm": 1757.02685546875, "learning_rate": 3.993801758867643e-05, "loss": 156.7644, "step": 30640 }, { "epoch": 0.12382987835178998, "grad_norm": 1286.7274169921875, "learning_rate": 3.993779771214072e-05, "loss": 102.2056, "step": 30650 }, { "epoch": 0.1238702796171576, "grad_norm": 530.4484252929688, "learning_rate": 3.9937577446907376e-05, "loss": 81.6175, "step": 30660 }, { "epoch": 0.12391068088252524, "grad_norm": 624.5010375976562, "learning_rate": 3.99373567929807e-05, "loss": 70.0852, "step": 30670 }, { "epoch": 0.12395108214789287, "grad_norm": 512.4428100585938, "learning_rate": 3.9937135750364983e-05, "loss": 61.0621, "step": 30680 }, { "epoch": 0.12399148341326051, "grad_norm": 662.2406616210938, "learning_rate": 3.9936914319064543e-05, "loss": 134.4542, "step": 30690 }, { "epoch": 0.12403188467862813, "grad_norm": 2490.583251953125, "learning_rate": 3.9936692499083696e-05, "loss": 89.6221, "step": 30700 }, { "epoch": 0.12407228594399576, "grad_norm": 1335.3441162109375, "learning_rate": 3.9936470290426765e-05, "loss": 87.1301, "step": 30710 }, { "epoch": 0.1241126872093634, "grad_norm": 2422.685546875, "learning_rate": 3.993624769309808e-05, "loss": 79.4778, "step": 30720 }, { "epoch": 0.12415308847473103, "grad_norm": 1162.318603515625, "learning_rate": 3.9936024707101984e-05, "loss": 47.9735, "step": 30730 }, { "epoch": 0.12419348974009865, "grad_norm": 2508.990234375, "learning_rate": 3.993580133244282e-05, "loss": 121.7656, "step": 30740 }, { "epoch": 0.1242338910054663, "grad_norm": 1584.348876953125, "learning_rate": 3.993557756912495e-05, "loss": 67.5902, "step": 30750 }, { "epoch": 0.12427429227083392, "grad_norm": 726.6798095703125, "learning_rate": 3.9935353417152724e-05, "loss": 75.2805, "step": 30760 }, { "epoch": 0.12431469353620156, "grad_norm": 784.2379150390625, "learning_rate": 3.9935128876530524e-05, "loss": 89.1682, "step": 30770 }, { "epoch": 0.12435509480156919, "grad_norm": 1024.3555908203125, "learning_rate": 3.9934903947262726e-05, "loss": 89.0586, "step": 30780 }, { "epoch": 0.12439549606693681, "grad_norm": 1700.198486328125, "learning_rate": 3.99346786293537e-05, "loss": 109.834, "step": 30790 }, { "epoch": 0.12443589733230445, "grad_norm": 3284.315185546875, "learning_rate": 3.993445292280787e-05, "loss": 153.4986, "step": 30800 }, { "epoch": 0.12447629859767208, "grad_norm": 1563.0628662109375, "learning_rate": 3.9934226827629603e-05, "loss": 82.126, "step": 30810 }, { "epoch": 0.1245166998630397, "grad_norm": 739.8726196289062, "learning_rate": 3.993400034382333e-05, "loss": 82.8279, "step": 30820 }, { "epoch": 0.12455710112840734, "grad_norm": 1393.825439453125, "learning_rate": 3.993377347139345e-05, "loss": 108.3987, "step": 30830 }, { "epoch": 0.12459750239377497, "grad_norm": 836.917724609375, "learning_rate": 3.99335462103444e-05, "loss": 108.2581, "step": 30840 }, { "epoch": 0.12463790365914261, "grad_norm": 1300.4273681640625, "learning_rate": 3.99333185606806e-05, "loss": 65.6786, "step": 30850 }, { "epoch": 0.12467830492451024, "grad_norm": 276.4493713378906, "learning_rate": 3.99330905224065e-05, "loss": 133.912, "step": 30860 }, { "epoch": 0.12471870618987786, "grad_norm": 712.7859497070312, "learning_rate": 3.993286209552652e-05, "loss": 115.7652, "step": 30870 }, { "epoch": 0.1247591074552455, "grad_norm": 1046.329345703125, "learning_rate": 3.9932633280045154e-05, "loss": 83.9093, "step": 30880 }, { "epoch": 0.12479950872061313, "grad_norm": 708.8577880859375, "learning_rate": 3.993240407596682e-05, "loss": 137.295, "step": 30890 }, { "epoch": 0.12483990998598075, "grad_norm": 1173.4384765625, "learning_rate": 3.993217448329602e-05, "loss": 65.7867, "step": 30900 }, { "epoch": 0.1248803112513484, "grad_norm": 589.5326538085938, "learning_rate": 3.993194450203721e-05, "loss": 93.5474, "step": 30910 }, { "epoch": 0.12492071251671602, "grad_norm": 809.4989013671875, "learning_rate": 3.993171413219489e-05, "loss": 76.3629, "step": 30920 }, { "epoch": 0.12496111378208366, "grad_norm": 1225.9090576171875, "learning_rate": 3.9931483373773524e-05, "loss": 80.0511, "step": 30930 }, { "epoch": 0.12500151504745127, "grad_norm": 442.1229248046875, "learning_rate": 3.9931252226777646e-05, "loss": 133.8095, "step": 30940 }, { "epoch": 0.1250419163128189, "grad_norm": 1054.5335693359375, "learning_rate": 3.993102069121173e-05, "loss": 81.4998, "step": 30950 }, { "epoch": 0.12508231757818655, "grad_norm": 1194.748046875, "learning_rate": 3.9930788767080316e-05, "loss": 175.7166, "step": 30960 }, { "epoch": 0.1251227188435542, "grad_norm": 434.2280578613281, "learning_rate": 3.993055645438791e-05, "loss": 99.1189, "step": 30970 }, { "epoch": 0.1251631201089218, "grad_norm": 734.3633422851562, "learning_rate": 3.993032375313904e-05, "loss": 80.5113, "step": 30980 }, { "epoch": 0.12520352137428944, "grad_norm": 1156.7469482421875, "learning_rate": 3.993009066333826e-05, "loss": 90.6556, "step": 30990 }, { "epoch": 0.12524392263965708, "grad_norm": 979.235107421875, "learning_rate": 3.992985718499009e-05, "loss": 108.3393, "step": 31000 }, { "epoch": 0.1252843239050247, "grad_norm": 570.459716796875, "learning_rate": 3.99296233180991e-05, "loss": 71.2451, "step": 31010 }, { "epoch": 0.12532472517039234, "grad_norm": 1419.055908203125, "learning_rate": 3.9929389062669834e-05, "loss": 105.6378, "step": 31020 }, { "epoch": 0.12536512643575998, "grad_norm": 762.352294921875, "learning_rate": 3.9929154418706876e-05, "loss": 114.2663, "step": 31030 }, { "epoch": 0.1254055277011276, "grad_norm": 1120.8670654296875, "learning_rate": 3.992891938621479e-05, "loss": 135.846, "step": 31040 }, { "epoch": 0.12544592896649523, "grad_norm": 1000.6475219726562, "learning_rate": 3.992868396519816e-05, "loss": 78.1399, "step": 31050 }, { "epoch": 0.12548633023186287, "grad_norm": 1076.135986328125, "learning_rate": 3.9928448155661576e-05, "loss": 101.4686, "step": 31060 }, { "epoch": 0.12552673149723048, "grad_norm": 1245.489501953125, "learning_rate": 3.992821195760964e-05, "loss": 98.2108, "step": 31070 }, { "epoch": 0.12556713276259812, "grad_norm": 1279.958984375, "learning_rate": 3.992797537104695e-05, "loss": 89.94, "step": 31080 }, { "epoch": 0.12560753402796576, "grad_norm": 1093.621337890625, "learning_rate": 3.9927738395978116e-05, "loss": 122.3186, "step": 31090 }, { "epoch": 0.12564793529333337, "grad_norm": 567.3983154296875, "learning_rate": 3.9927501032407775e-05, "loss": 98.0507, "step": 31100 }, { "epoch": 0.125688336558701, "grad_norm": 961.0360107421875, "learning_rate": 3.992726328034053e-05, "loss": 86.6894, "step": 31110 }, { "epoch": 0.12572873782406865, "grad_norm": 1218.6414794921875, "learning_rate": 3.9927025139781023e-05, "loss": 82.9272, "step": 31120 }, { "epoch": 0.1257691390894363, "grad_norm": 531.4141845703125, "learning_rate": 3.9926786610733917e-05, "loss": 95.1389, "step": 31130 }, { "epoch": 0.1258095403548039, "grad_norm": 1093.076904296875, "learning_rate": 3.9926547693203836e-05, "loss": 96.6621, "step": 31140 }, { "epoch": 0.12584994162017155, "grad_norm": 548.7486572265625, "learning_rate": 3.9926308387195456e-05, "loss": 88.0375, "step": 31150 }, { "epoch": 0.12589034288553919, "grad_norm": 1444.5079345703125, "learning_rate": 3.992606869271343e-05, "loss": 107.03, "step": 31160 }, { "epoch": 0.1259307441509068, "grad_norm": 1084.5355224609375, "learning_rate": 3.992582860976244e-05, "loss": 104.2385, "step": 31170 }, { "epoch": 0.12597114541627444, "grad_norm": 587.4788208007812, "learning_rate": 3.9925588138347155e-05, "loss": 58.3109, "step": 31180 }, { "epoch": 0.12601154668164208, "grad_norm": 1209.0523681640625, "learning_rate": 3.992534727847228e-05, "loss": 58.8434, "step": 31190 }, { "epoch": 0.1260519479470097, "grad_norm": 1135.3553466796875, "learning_rate": 3.99251060301425e-05, "loss": 114.3812, "step": 31200 }, { "epoch": 0.12609234921237733, "grad_norm": 2140.721435546875, "learning_rate": 3.992486439336252e-05, "loss": 82.2324, "step": 31210 }, { "epoch": 0.12613275047774497, "grad_norm": 2814.8818359375, "learning_rate": 3.992462236813704e-05, "loss": 141.9477, "step": 31220 }, { "epoch": 0.12617315174311258, "grad_norm": 742.0789794921875, "learning_rate": 3.99243799544708e-05, "loss": 117.9271, "step": 31230 }, { "epoch": 0.12621355300848022, "grad_norm": 647.2166748046875, "learning_rate": 3.9924137152368516e-05, "loss": 76.7173, "step": 31240 }, { "epoch": 0.12625395427384786, "grad_norm": 2315.521728515625, "learning_rate": 3.9923893961834914e-05, "loss": 104.3867, "step": 31250 }, { "epoch": 0.12629435553921547, "grad_norm": 2408.8681640625, "learning_rate": 3.9923650382874744e-05, "loss": 139.8789, "step": 31260 }, { "epoch": 0.12633475680458311, "grad_norm": 631.905029296875, "learning_rate": 3.9923406415492755e-05, "loss": 69.1908, "step": 31270 }, { "epoch": 0.12637515806995075, "grad_norm": 1183.33056640625, "learning_rate": 3.99231620596937e-05, "loss": 66.9473, "step": 31280 }, { "epoch": 0.1264155593353184, "grad_norm": 657.8339233398438, "learning_rate": 3.9922917315482344e-05, "loss": 129.2818, "step": 31290 }, { "epoch": 0.126455960600686, "grad_norm": 635.81982421875, "learning_rate": 3.9922672182863456e-05, "loss": 87.3411, "step": 31300 }, { "epoch": 0.12649636186605365, "grad_norm": 1390.1109619140625, "learning_rate": 3.992242666184181e-05, "loss": 95.1961, "step": 31310 }, { "epoch": 0.1265367631314213, "grad_norm": 735.389892578125, "learning_rate": 3.9922180752422214e-05, "loss": 104.7503, "step": 31320 }, { "epoch": 0.1265771643967889, "grad_norm": 904.2584838867188, "learning_rate": 3.9921934454609435e-05, "loss": 59.2481, "step": 31330 }, { "epoch": 0.12661756566215654, "grad_norm": 927.343505859375, "learning_rate": 3.99216877684083e-05, "loss": 77.0618, "step": 31340 }, { "epoch": 0.12665796692752418, "grad_norm": 1172.96044921875, "learning_rate": 3.9921440693823596e-05, "loss": 79.6245, "step": 31350 }, { "epoch": 0.1266983681928918, "grad_norm": 1871.51416015625, "learning_rate": 3.992119323086015e-05, "loss": 74.9882, "step": 31360 }, { "epoch": 0.12673876945825943, "grad_norm": 257.1997985839844, "learning_rate": 3.9920945379522784e-05, "loss": 95.8861, "step": 31370 }, { "epoch": 0.12677917072362707, "grad_norm": 1125.3067626953125, "learning_rate": 3.992069713981634e-05, "loss": 113.5839, "step": 31380 }, { "epoch": 0.12681957198899468, "grad_norm": 4032.64111328125, "learning_rate": 3.992044851174564e-05, "loss": 124.356, "step": 31390 }, { "epoch": 0.12685997325436232, "grad_norm": 605.8938598632812, "learning_rate": 3.992019949531555e-05, "loss": 75.3891, "step": 31400 }, { "epoch": 0.12690037451972996, "grad_norm": 4289.74853515625, "learning_rate": 3.991995009053091e-05, "loss": 131.748, "step": 31410 }, { "epoch": 0.12694077578509758, "grad_norm": 926.93603515625, "learning_rate": 3.9919700297396585e-05, "loss": 88.6814, "step": 31420 }, { "epoch": 0.12698117705046522, "grad_norm": 413.9977111816406, "learning_rate": 3.991945011591745e-05, "loss": 111.6733, "step": 31430 }, { "epoch": 0.12702157831583286, "grad_norm": 778.385009765625, "learning_rate": 3.9919199546098377e-05, "loss": 64.7624, "step": 31440 }, { "epoch": 0.1270619795812005, "grad_norm": 2869.706787109375, "learning_rate": 3.991894858794426e-05, "loss": 112.8549, "step": 31450 }, { "epoch": 0.1271023808465681, "grad_norm": 2792.096923828125, "learning_rate": 3.991869724145998e-05, "loss": 97.986, "step": 31460 }, { "epoch": 0.12714278211193575, "grad_norm": 1144.2340087890625, "learning_rate": 3.991844550665045e-05, "loss": 93.456, "step": 31470 }, { "epoch": 0.1271831833773034, "grad_norm": 1057.1517333984375, "learning_rate": 3.991819338352057e-05, "loss": 78.6856, "step": 31480 }, { "epoch": 0.127223584642671, "grad_norm": 1895.421630859375, "learning_rate": 3.991794087207524e-05, "loss": 104.1628, "step": 31490 }, { "epoch": 0.12726398590803864, "grad_norm": 1157.536865234375, "learning_rate": 3.991768797231941e-05, "loss": 101.4132, "step": 31500 }, { "epoch": 0.12730438717340628, "grad_norm": 575.575439453125, "learning_rate": 3.9917434684258e-05, "loss": 130.7384, "step": 31510 }, { "epoch": 0.1273447884387739, "grad_norm": 1335.8681640625, "learning_rate": 3.9917181007895946e-05, "loss": 81.0369, "step": 31520 }, { "epoch": 0.12738518970414153, "grad_norm": 3140.3173828125, "learning_rate": 3.9916926943238185e-05, "loss": 104.6779, "step": 31530 }, { "epoch": 0.12742559096950917, "grad_norm": 390.1804504394531, "learning_rate": 3.991667249028969e-05, "loss": 117.5998, "step": 31540 }, { "epoch": 0.12746599223487678, "grad_norm": 1342.5087890625, "learning_rate": 3.9916417649055404e-05, "loss": 94.7564, "step": 31550 }, { "epoch": 0.12750639350024442, "grad_norm": 441.471435546875, "learning_rate": 3.991616241954031e-05, "loss": 124.2194, "step": 31560 }, { "epoch": 0.12754679476561206, "grad_norm": 922.2943115234375, "learning_rate": 3.9915906801749365e-05, "loss": 127.6032, "step": 31570 }, { "epoch": 0.12758719603097968, "grad_norm": 569.3106689453125, "learning_rate": 3.991565079568757e-05, "loss": 81.4049, "step": 31580 }, { "epoch": 0.12762759729634732, "grad_norm": 1510.0428466796875, "learning_rate": 3.991539440135991e-05, "loss": 89.6174, "step": 31590 }, { "epoch": 0.12766799856171496, "grad_norm": 447.491943359375, "learning_rate": 3.9915137618771386e-05, "loss": 74.3884, "step": 31600 }, { "epoch": 0.1277083998270826, "grad_norm": 466.8428649902344, "learning_rate": 3.9914880447927e-05, "loss": 86.6112, "step": 31610 }, { "epoch": 0.1277488010924502, "grad_norm": 1096.8792724609375, "learning_rate": 3.991462288883176e-05, "loss": 105.1563, "step": 31620 }, { "epoch": 0.12778920235781785, "grad_norm": 622.0668334960938, "learning_rate": 3.99143649414907e-05, "loss": 101.307, "step": 31630 }, { "epoch": 0.1278296036231855, "grad_norm": 939.9150390625, "learning_rate": 3.9914106605908845e-05, "loss": 119.6522, "step": 31640 }, { "epoch": 0.1278700048885531, "grad_norm": 1054.1038818359375, "learning_rate": 3.991384788209123e-05, "loss": 131.3634, "step": 31650 }, { "epoch": 0.12791040615392074, "grad_norm": 1325.1695556640625, "learning_rate": 3.991358877004289e-05, "loss": 136.0962, "step": 31660 }, { "epoch": 0.12795080741928838, "grad_norm": 1035.624267578125, "learning_rate": 3.991332926976888e-05, "loss": 110.5461, "step": 31670 }, { "epoch": 0.127991208684656, "grad_norm": 951.2764892578125, "learning_rate": 3.9913069381274274e-05, "loss": 72.4459, "step": 31680 }, { "epoch": 0.12803160995002363, "grad_norm": 1553.06787109375, "learning_rate": 3.991280910456413e-05, "loss": 107.8237, "step": 31690 }, { "epoch": 0.12807201121539127, "grad_norm": 379.1010437011719, "learning_rate": 3.991254843964352e-05, "loss": 117.5021, "step": 31700 }, { "epoch": 0.12811241248075889, "grad_norm": 1347.34423828125, "learning_rate": 3.9912287386517524e-05, "loss": 83.7117, "step": 31710 }, { "epoch": 0.12815281374612653, "grad_norm": 683.2993774414062, "learning_rate": 3.9912025945191233e-05, "loss": 113.7659, "step": 31720 }, { "epoch": 0.12819321501149417, "grad_norm": 857.4117431640625, "learning_rate": 3.991176411566974e-05, "loss": 81.4052, "step": 31730 }, { "epoch": 0.12823361627686178, "grad_norm": 3744.839111328125, "learning_rate": 3.991150189795816e-05, "loss": 138.6356, "step": 31740 }, { "epoch": 0.12827401754222942, "grad_norm": 1018.3668823242188, "learning_rate": 3.99112392920616e-05, "loss": 90.8482, "step": 31750 }, { "epoch": 0.12831441880759706, "grad_norm": 730.134033203125, "learning_rate": 3.991097629798519e-05, "loss": 118.8588, "step": 31760 }, { "epoch": 0.1283548200729647, "grad_norm": 1990.3084716796875, "learning_rate": 3.991071291573403e-05, "loss": 118.1557, "step": 31770 }, { "epoch": 0.1283952213383323, "grad_norm": 757.5863647460938, "learning_rate": 3.991044914531327e-05, "loss": 55.718, "step": 31780 }, { "epoch": 0.12843562260369995, "grad_norm": 439.8123779296875, "learning_rate": 3.991018498672806e-05, "loss": 66.3055, "step": 31790 }, { "epoch": 0.1284760238690676, "grad_norm": 758.9248046875, "learning_rate": 3.990992043998354e-05, "loss": 101.9324, "step": 31800 }, { "epoch": 0.1285164251344352, "grad_norm": 2390.60791015625, "learning_rate": 3.990965550508488e-05, "loss": 113.7801, "step": 31810 }, { "epoch": 0.12855682639980284, "grad_norm": 766.4974365234375, "learning_rate": 3.990939018203723e-05, "loss": 106.3282, "step": 31820 }, { "epoch": 0.12859722766517048, "grad_norm": 1003.3472900390625, "learning_rate": 3.990912447084576e-05, "loss": 93.9784, "step": 31830 }, { "epoch": 0.1286376289305381, "grad_norm": 1004.0360107421875, "learning_rate": 3.990885837151567e-05, "loss": 110.0491, "step": 31840 }, { "epoch": 0.12867803019590573, "grad_norm": 647.4901733398438, "learning_rate": 3.990859188405213e-05, "loss": 89.2669, "step": 31850 }, { "epoch": 0.12871843146127337, "grad_norm": 600.5079345703125, "learning_rate": 3.990832500846034e-05, "loss": 82.5083, "step": 31860 }, { "epoch": 0.128758832726641, "grad_norm": 566.7688598632812, "learning_rate": 3.990805774474551e-05, "loss": 76.502, "step": 31870 }, { "epoch": 0.12879923399200863, "grad_norm": 1008.888671875, "learning_rate": 3.990779009291284e-05, "loss": 71.2898, "step": 31880 }, { "epoch": 0.12883963525737627, "grad_norm": 957.80615234375, "learning_rate": 3.9907522052967556e-05, "loss": 85.8995, "step": 31890 }, { "epoch": 0.12888003652274388, "grad_norm": 877.7742309570312, "learning_rate": 3.990725362491488e-05, "loss": 103.1037, "step": 31900 }, { "epoch": 0.12892043778811152, "grad_norm": 1224.771728515625, "learning_rate": 3.990698480876005e-05, "loss": 63.2698, "step": 31910 }, { "epoch": 0.12896083905347916, "grad_norm": 1240.038818359375, "learning_rate": 3.9906715604508295e-05, "loss": 109.5601, "step": 31920 }, { "epoch": 0.1290012403188468, "grad_norm": 1355.4737548828125, "learning_rate": 3.990644601216487e-05, "loss": 105.8995, "step": 31930 }, { "epoch": 0.1290416415842144, "grad_norm": 549.8170166015625, "learning_rate": 3.990617603173504e-05, "loss": 61.5239, "step": 31940 }, { "epoch": 0.12908204284958205, "grad_norm": 976.7488403320312, "learning_rate": 3.9905905663224054e-05, "loss": 84.4975, "step": 31950 }, { "epoch": 0.1291224441149497, "grad_norm": 776.4035034179688, "learning_rate": 3.9905634906637185e-05, "loss": 66.8786, "step": 31960 }, { "epoch": 0.1291628453803173, "grad_norm": 441.0339050292969, "learning_rate": 3.9905363761979724e-05, "loss": 77.6645, "step": 31970 }, { "epoch": 0.12920324664568494, "grad_norm": 391.274169921875, "learning_rate": 3.9905092229256945e-05, "loss": 56.2057, "step": 31980 }, { "epoch": 0.12924364791105258, "grad_norm": 1062.639892578125, "learning_rate": 3.9904820308474145e-05, "loss": 114.6927, "step": 31990 }, { "epoch": 0.1292840491764202, "grad_norm": 1814.6654052734375, "learning_rate": 3.9904547999636625e-05, "loss": 110.2025, "step": 32000 }, { "epoch": 0.12932445044178784, "grad_norm": 756.2318725585938, "learning_rate": 3.9904275302749696e-05, "loss": 132.7259, "step": 32010 }, { "epoch": 0.12936485170715548, "grad_norm": 1501.403076171875, "learning_rate": 3.990400221781867e-05, "loss": 113.8047, "step": 32020 }, { "epoch": 0.1294052529725231, "grad_norm": 1061.5772705078125, "learning_rate": 3.990372874484887e-05, "loss": 88.4707, "step": 32030 }, { "epoch": 0.12944565423789073, "grad_norm": 697.8121337890625, "learning_rate": 3.9903454883845645e-05, "loss": 98.3409, "step": 32040 }, { "epoch": 0.12948605550325837, "grad_norm": 1300.261962890625, "learning_rate": 3.9903180634814304e-05, "loss": 71.091, "step": 32050 }, { "epoch": 0.12952645676862598, "grad_norm": 4183.81982421875, "learning_rate": 3.990290599776022e-05, "loss": 96.3512, "step": 32060 }, { "epoch": 0.12956685803399362, "grad_norm": 1181.3817138671875, "learning_rate": 3.990263097268873e-05, "loss": 98.9867, "step": 32070 }, { "epoch": 0.12960725929936126, "grad_norm": 551.7296752929688, "learning_rate": 3.990235555960521e-05, "loss": 160.0591, "step": 32080 }, { "epoch": 0.1296476605647289, "grad_norm": 629.874267578125, "learning_rate": 3.990207975851502e-05, "loss": 92.8718, "step": 32090 }, { "epoch": 0.1296880618300965, "grad_norm": 1450.927001953125, "learning_rate": 3.990180356942353e-05, "loss": 90.074, "step": 32100 }, { "epoch": 0.12972846309546415, "grad_norm": 1250.63916015625, "learning_rate": 3.990152699233614e-05, "loss": 72.6941, "step": 32110 }, { "epoch": 0.1297688643608318, "grad_norm": 1635.5433349609375, "learning_rate": 3.990125002725824e-05, "loss": 94.4942, "step": 32120 }, { "epoch": 0.1298092656261994, "grad_norm": 692.6416625976562, "learning_rate": 3.990097267419522e-05, "loss": 83.3047, "step": 32130 }, { "epoch": 0.12984966689156704, "grad_norm": 360.87469482421875, "learning_rate": 3.990069493315249e-05, "loss": 48.6074, "step": 32140 }, { "epoch": 0.12989006815693468, "grad_norm": 953.3905639648438, "learning_rate": 3.990041680413547e-05, "loss": 70.155, "step": 32150 }, { "epoch": 0.1299304694223023, "grad_norm": 1337.5194091796875, "learning_rate": 3.9900138287149575e-05, "loss": 135.2556, "step": 32160 }, { "epoch": 0.12997087068766994, "grad_norm": 1706.7265625, "learning_rate": 3.989985938220025e-05, "loss": 138.5848, "step": 32170 }, { "epoch": 0.13001127195303758, "grad_norm": 848.6858520507812, "learning_rate": 3.9899580089292904e-05, "loss": 88.3094, "step": 32180 }, { "epoch": 0.1300516732184052, "grad_norm": 1716.445556640625, "learning_rate": 3.9899300408433005e-05, "loss": 93.1287, "step": 32190 }, { "epoch": 0.13009207448377283, "grad_norm": 803.143798828125, "learning_rate": 3.989902033962601e-05, "loss": 79.1566, "step": 32200 }, { "epoch": 0.13013247574914047, "grad_norm": 643.987548828125, "learning_rate": 3.989873988287736e-05, "loss": 85.9153, "step": 32210 }, { "epoch": 0.13017287701450808, "grad_norm": 1115.3116455078125, "learning_rate": 3.989845903819253e-05, "loss": 97.8489, "step": 32220 }, { "epoch": 0.13021327827987572, "grad_norm": 822.09912109375, "learning_rate": 3.9898177805577e-05, "loss": 87.0368, "step": 32230 }, { "epoch": 0.13025367954524336, "grad_norm": 2172.662353515625, "learning_rate": 3.989789618503624e-05, "loss": 95.4219, "step": 32240 }, { "epoch": 0.130294080810611, "grad_norm": 406.08599853515625, "learning_rate": 3.989761417657577e-05, "loss": 99.9701, "step": 32250 }, { "epoch": 0.1303344820759786, "grad_norm": 984.9737548828125, "learning_rate": 3.989733178020105e-05, "loss": 57.6613, "step": 32260 }, { "epoch": 0.13037488334134625, "grad_norm": 1696.062744140625, "learning_rate": 3.989704899591761e-05, "loss": 68.0745, "step": 32270 }, { "epoch": 0.1304152846067139, "grad_norm": 1011.5084838867188, "learning_rate": 3.989676582373096e-05, "loss": 98.9419, "step": 32280 }, { "epoch": 0.1304556858720815, "grad_norm": 1103.0570068359375, "learning_rate": 3.989648226364661e-05, "loss": 134.7057, "step": 32290 }, { "epoch": 0.13049608713744915, "grad_norm": 261.1528015136719, "learning_rate": 3.98961983156701e-05, "loss": 55.7983, "step": 32300 }, { "epoch": 0.13053648840281679, "grad_norm": 1446.6749267578125, "learning_rate": 3.9895913979806955e-05, "loss": 109.7331, "step": 32310 }, { "epoch": 0.1305768896681844, "grad_norm": 822.2714233398438, "learning_rate": 3.9895629256062726e-05, "loss": 101.0507, "step": 32320 }, { "epoch": 0.13061729093355204, "grad_norm": 1948.6011962890625, "learning_rate": 3.989534414444296e-05, "loss": 111.6279, "step": 32330 }, { "epoch": 0.13065769219891968, "grad_norm": 609.5797729492188, "learning_rate": 3.989505864495322e-05, "loss": 67.6002, "step": 32340 }, { "epoch": 0.1306980934642873, "grad_norm": 6199.38720703125, "learning_rate": 3.989477275759907e-05, "loss": 101.5113, "step": 32350 }, { "epoch": 0.13073849472965493, "grad_norm": 0.0, "learning_rate": 3.989448648238608e-05, "loss": 123.2018, "step": 32360 }, { "epoch": 0.13077889599502257, "grad_norm": 606.24658203125, "learning_rate": 3.989419981931984e-05, "loss": 65.7179, "step": 32370 }, { "epoch": 0.13081929726039018, "grad_norm": 520.7660522460938, "learning_rate": 3.989391276840592e-05, "loss": 104.3083, "step": 32380 }, { "epoch": 0.13085969852575782, "grad_norm": 0.0, "learning_rate": 3.989362532964994e-05, "loss": 73.2306, "step": 32390 }, { "epoch": 0.13090009979112546, "grad_norm": 935.7091064453125, "learning_rate": 3.989333750305749e-05, "loss": 116.7558, "step": 32400 }, { "epoch": 0.1309405010564931, "grad_norm": 1432.7255859375, "learning_rate": 3.9893049288634174e-05, "loss": 124.7905, "step": 32410 }, { "epoch": 0.13098090232186071, "grad_norm": 776.0060424804688, "learning_rate": 3.989276068638563e-05, "loss": 83.9067, "step": 32420 }, { "epoch": 0.13102130358722835, "grad_norm": 1112.199951171875, "learning_rate": 3.9892471696317467e-05, "loss": 107.2018, "step": 32430 }, { "epoch": 0.131061704852596, "grad_norm": 849.1611328125, "learning_rate": 3.9892182318435336e-05, "loss": 91.3857, "step": 32440 }, { "epoch": 0.1311021061179636, "grad_norm": 1403.359130859375, "learning_rate": 3.989189255274487e-05, "loss": 112.8682, "step": 32450 }, { "epoch": 0.13114250738333125, "grad_norm": 912.6363525390625, "learning_rate": 3.989160239925171e-05, "loss": 104.4206, "step": 32460 }, { "epoch": 0.1311829086486989, "grad_norm": 2311.5087890625, "learning_rate": 3.989131185796153e-05, "loss": 105.4599, "step": 32470 }, { "epoch": 0.1312233099140665, "grad_norm": 817.505859375, "learning_rate": 3.989102092887997e-05, "loss": 69.9134, "step": 32480 }, { "epoch": 0.13126371117943414, "grad_norm": 1116.56298828125, "learning_rate": 3.9890729612012726e-05, "loss": 94.8134, "step": 32490 }, { "epoch": 0.13130411244480178, "grad_norm": 1570.620361328125, "learning_rate": 3.989043790736547e-05, "loss": 131.5534, "step": 32500 }, { "epoch": 0.1313445137101694, "grad_norm": 1480.2958984375, "learning_rate": 3.989014581494388e-05, "loss": 104.3328, "step": 32510 }, { "epoch": 0.13138491497553703, "grad_norm": 1166.97998046875, "learning_rate": 3.9889853334753666e-05, "loss": 82.1462, "step": 32520 }, { "epoch": 0.13142531624090467, "grad_norm": 950.3305053710938, "learning_rate": 3.988956046680051e-05, "loss": 148.301, "step": 32530 }, { "epoch": 0.13146571750627228, "grad_norm": 1937.57080078125, "learning_rate": 3.9889267211090145e-05, "loss": 125.095, "step": 32540 }, { "epoch": 0.13150611877163992, "grad_norm": 808.1902465820312, "learning_rate": 3.988897356762827e-05, "loss": 84.5435, "step": 32550 }, { "epoch": 0.13154652003700756, "grad_norm": 2491.928955078125, "learning_rate": 3.988867953642062e-05, "loss": 136.9711, "step": 32560 }, { "epoch": 0.1315869213023752, "grad_norm": 383.66302490234375, "learning_rate": 3.9888385117472914e-05, "loss": 76.6657, "step": 32570 }, { "epoch": 0.13162732256774282, "grad_norm": 1040.90087890625, "learning_rate": 3.9888090310790904e-05, "loss": 61.5886, "step": 32580 }, { "epoch": 0.13166772383311046, "grad_norm": 782.1383666992188, "learning_rate": 3.9887795116380336e-05, "loss": 104.2784, "step": 32590 }, { "epoch": 0.1317081250984781, "grad_norm": 1465.7843017578125, "learning_rate": 3.988749953424696e-05, "loss": 90.0144, "step": 32600 }, { "epoch": 0.1317485263638457, "grad_norm": 661.722412109375, "learning_rate": 3.988720356439655e-05, "loss": 123.0394, "step": 32610 }, { "epoch": 0.13178892762921335, "grad_norm": 1469.308837890625, "learning_rate": 3.988690720683486e-05, "loss": 110.255, "step": 32620 }, { "epoch": 0.131829328894581, "grad_norm": 491.6518859863281, "learning_rate": 3.988661046156768e-05, "loss": 114.0818, "step": 32630 }, { "epoch": 0.1318697301599486, "grad_norm": 610.9449462890625, "learning_rate": 3.988631332860079e-05, "loss": 78.5732, "step": 32640 }, { "epoch": 0.13191013142531624, "grad_norm": 762.2991333007812, "learning_rate": 3.988601580793998e-05, "loss": 104.1977, "step": 32650 }, { "epoch": 0.13195053269068388, "grad_norm": 1247.912109375, "learning_rate": 3.988571789959106e-05, "loss": 108.5621, "step": 32660 }, { "epoch": 0.1319909339560515, "grad_norm": 742.3600463867188, "learning_rate": 3.988541960355982e-05, "loss": 77.3082, "step": 32670 }, { "epoch": 0.13203133522141913, "grad_norm": 780.1296997070312, "learning_rate": 3.98851209198521e-05, "loss": 74.177, "step": 32680 }, { "epoch": 0.13207173648678677, "grad_norm": 605.8449096679688, "learning_rate": 3.988482184847371e-05, "loss": 77.8987, "step": 32690 }, { "epoch": 0.13211213775215438, "grad_norm": 705.7808227539062, "learning_rate": 3.988452238943047e-05, "loss": 78.1375, "step": 32700 }, { "epoch": 0.13215253901752202, "grad_norm": 1150.5770263671875, "learning_rate": 3.9884222542728236e-05, "loss": 82.5151, "step": 32710 }, { "epoch": 0.13219294028288966, "grad_norm": 1019.019775390625, "learning_rate": 3.988392230837285e-05, "loss": 121.0599, "step": 32720 }, { "epoch": 0.1322333415482573, "grad_norm": 797.7850341796875, "learning_rate": 3.988362168637015e-05, "loss": 98.1945, "step": 32730 }, { "epoch": 0.13227374281362492, "grad_norm": 2180.442138671875, "learning_rate": 3.9883320676726015e-05, "loss": 79.0123, "step": 32740 }, { "epoch": 0.13231414407899256, "grad_norm": 1715.8033447265625, "learning_rate": 3.988301927944631e-05, "loss": 110.9263, "step": 32750 }, { "epoch": 0.1323545453443602, "grad_norm": 739.5059204101562, "learning_rate": 3.988271749453691e-05, "loss": 111.0484, "step": 32760 }, { "epoch": 0.1323949466097278, "grad_norm": 866.54833984375, "learning_rate": 3.988241532200369e-05, "loss": 95.5286, "step": 32770 }, { "epoch": 0.13243534787509545, "grad_norm": 1200.807373046875, "learning_rate": 3.9882112761852544e-05, "loss": 85.0713, "step": 32780 }, { "epoch": 0.1324757491404631, "grad_norm": 2249.028564453125, "learning_rate": 3.9881809814089376e-05, "loss": 111.7555, "step": 32790 }, { "epoch": 0.1325161504058307, "grad_norm": 678.5682983398438, "learning_rate": 3.9881506478720095e-05, "loss": 79.8005, "step": 32800 }, { "epoch": 0.13255655167119834, "grad_norm": 1730.4722900390625, "learning_rate": 3.9881202755750604e-05, "loss": 110.668, "step": 32810 }, { "epoch": 0.13259695293656598, "grad_norm": 547.8741455078125, "learning_rate": 3.988089864518683e-05, "loss": 82.1663, "step": 32820 }, { "epoch": 0.1326373542019336, "grad_norm": 678.1209106445312, "learning_rate": 3.988059414703471e-05, "loss": 92.4558, "step": 32830 }, { "epoch": 0.13267775546730123, "grad_norm": 1880.7825927734375, "learning_rate": 3.9880289261300166e-05, "loss": 167.3718, "step": 32840 }, { "epoch": 0.13271815673266887, "grad_norm": 1045.5400390625, "learning_rate": 3.987998398798914e-05, "loss": 141.326, "step": 32850 }, { "epoch": 0.13275855799803649, "grad_norm": 1256.3192138671875, "learning_rate": 3.987967832710761e-05, "loss": 123.7676, "step": 32860 }, { "epoch": 0.13279895926340413, "grad_norm": 415.38787841796875, "learning_rate": 3.98793722786615e-05, "loss": 85.7358, "step": 32870 }, { "epoch": 0.13283936052877177, "grad_norm": 2784.51953125, "learning_rate": 3.9879065842656796e-05, "loss": 88.3061, "step": 32880 }, { "epoch": 0.1328797617941394, "grad_norm": 1133.65771484375, "learning_rate": 3.987875901909947e-05, "loss": 114.6212, "step": 32890 }, { "epoch": 0.13292016305950702, "grad_norm": 654.9711303710938, "learning_rate": 3.9878451807995496e-05, "loss": 75.8217, "step": 32900 }, { "epoch": 0.13296056432487466, "grad_norm": 1000.1675415039062, "learning_rate": 3.987814420935088e-05, "loss": 73.452, "step": 32910 }, { "epoch": 0.1330009655902423, "grad_norm": 804.9745483398438, "learning_rate": 3.987783622317161e-05, "loss": 66.1093, "step": 32920 }, { "epoch": 0.1330413668556099, "grad_norm": 1595.5555419921875, "learning_rate": 3.987752784946368e-05, "loss": 130.9591, "step": 32930 }, { "epoch": 0.13308176812097755, "grad_norm": 724.1141357421875, "learning_rate": 3.9877219088233115e-05, "loss": 111.874, "step": 32940 }, { "epoch": 0.1331221693863452, "grad_norm": 1259.160400390625, "learning_rate": 3.987690993948594e-05, "loss": 110.3312, "step": 32950 }, { "epoch": 0.1331625706517128, "grad_norm": 468.20782470703125, "learning_rate": 3.9876600403228154e-05, "loss": 84.0473, "step": 32960 }, { "epoch": 0.13320297191708044, "grad_norm": 525.40087890625, "learning_rate": 3.987629047946582e-05, "loss": 88.113, "step": 32970 }, { "epoch": 0.13324337318244808, "grad_norm": 728.6627197265625, "learning_rate": 3.9875980168204976e-05, "loss": 94.1612, "step": 32980 }, { "epoch": 0.1332837744478157, "grad_norm": 685.3392333984375, "learning_rate": 3.987566946945166e-05, "loss": 82.9419, "step": 32990 }, { "epoch": 0.13332417571318333, "grad_norm": 1111.914306640625, "learning_rate": 3.987535838321193e-05, "loss": 101.6496, "step": 33000 }, { "epoch": 0.13336457697855097, "grad_norm": 488.5706481933594, "learning_rate": 3.987504690949186e-05, "loss": 93.6909, "step": 33010 }, { "epoch": 0.1334049782439186, "grad_norm": 513.7379150390625, "learning_rate": 3.987473504829752e-05, "loss": 75.1137, "step": 33020 }, { "epoch": 0.13344537950928623, "grad_norm": 913.1249389648438, "learning_rate": 3.987442279963499e-05, "loss": 70.1443, "step": 33030 }, { "epoch": 0.13348578077465387, "grad_norm": 517.8516845703125, "learning_rate": 3.9874110163510345e-05, "loss": 109.9103, "step": 33040 }, { "epoch": 0.1335261820400215, "grad_norm": 1112.8758544921875, "learning_rate": 3.98737971399297e-05, "loss": 80.9284, "step": 33050 }, { "epoch": 0.13356658330538912, "grad_norm": 909.37451171875, "learning_rate": 3.987348372889915e-05, "loss": 109.8047, "step": 33060 }, { "epoch": 0.13360698457075676, "grad_norm": 1172.3194580078125, "learning_rate": 3.9873169930424796e-05, "loss": 82.3379, "step": 33070 }, { "epoch": 0.1336473858361244, "grad_norm": 1014.2901000976562, "learning_rate": 3.987285574451276e-05, "loss": 114.6765, "step": 33080 }, { "epoch": 0.133687787101492, "grad_norm": 282.9718322753906, "learning_rate": 3.987254117116918e-05, "loss": 105.4184, "step": 33090 }, { "epoch": 0.13372818836685965, "grad_norm": 702.3818359375, "learning_rate": 3.987222621040017e-05, "loss": 124.3384, "step": 33100 }, { "epoch": 0.1337685896322273, "grad_norm": 706.9837036132812, "learning_rate": 3.987191086221189e-05, "loss": 101.213, "step": 33110 }, { "epoch": 0.1338089908975949, "grad_norm": 1250.2989501953125, "learning_rate": 3.9871595126610466e-05, "loss": 90.6203, "step": 33120 }, { "epoch": 0.13384939216296254, "grad_norm": 2235.422607421875, "learning_rate": 3.987127900360207e-05, "loss": 96.2993, "step": 33130 }, { "epoch": 0.13388979342833018, "grad_norm": 960.5076904296875, "learning_rate": 3.9870962493192856e-05, "loss": 93.8741, "step": 33140 }, { "epoch": 0.1339301946936978, "grad_norm": 998.1218872070312, "learning_rate": 3.9870645595389e-05, "loss": 75.5594, "step": 33150 }, { "epoch": 0.13397059595906544, "grad_norm": 516.9995727539062, "learning_rate": 3.987032831019668e-05, "loss": 113.7387, "step": 33160 }, { "epoch": 0.13401099722443308, "grad_norm": 1901.6185302734375, "learning_rate": 3.987001063762208e-05, "loss": 86.161, "step": 33170 }, { "epoch": 0.1340513984898007, "grad_norm": 753.5205078125, "learning_rate": 3.986969257767139e-05, "loss": 73.1547, "step": 33180 }, { "epoch": 0.13409179975516833, "grad_norm": 1136.2593994140625, "learning_rate": 3.9869374130350805e-05, "loss": 81.2451, "step": 33190 }, { "epoch": 0.13413220102053597, "grad_norm": 1040.9864501953125, "learning_rate": 3.986905529566655e-05, "loss": 81.6566, "step": 33200 }, { "epoch": 0.1341726022859036, "grad_norm": 828.9725952148438, "learning_rate": 3.986873607362484e-05, "loss": 65.9397, "step": 33210 }, { "epoch": 0.13421300355127122, "grad_norm": 596.030517578125, "learning_rate": 3.986841646423188e-05, "loss": 64.6862, "step": 33220 }, { "epoch": 0.13425340481663886, "grad_norm": 326.7821350097656, "learning_rate": 3.9868096467493924e-05, "loss": 99.5571, "step": 33230 }, { "epoch": 0.1342938060820065, "grad_norm": 1642.4222412109375, "learning_rate": 3.9867776083417186e-05, "loss": 84.424, "step": 33240 }, { "epoch": 0.1343342073473741, "grad_norm": 1301.66552734375, "learning_rate": 3.986745531200793e-05, "loss": 90.8854, "step": 33250 }, { "epoch": 0.13437460861274175, "grad_norm": 1154.9903564453125, "learning_rate": 3.9867134153272404e-05, "loss": 81.8859, "step": 33260 }, { "epoch": 0.1344150098781094, "grad_norm": 660.017578125, "learning_rate": 3.9866812607216875e-05, "loss": 104.3724, "step": 33270 }, { "epoch": 0.134455411143477, "grad_norm": 671.1018676757812, "learning_rate": 3.9866490673847596e-05, "loss": 96.5107, "step": 33280 }, { "epoch": 0.13449581240884464, "grad_norm": 594.5077514648438, "learning_rate": 3.9866168353170855e-05, "loss": 87.6706, "step": 33290 }, { "epoch": 0.13453621367421228, "grad_norm": 764.1627197265625, "learning_rate": 3.986584564519294e-05, "loss": 94.7507, "step": 33300 }, { "epoch": 0.1345766149395799, "grad_norm": 1440.6705322265625, "learning_rate": 3.9865522549920135e-05, "loss": 89.43, "step": 33310 }, { "epoch": 0.13461701620494754, "grad_norm": 509.2161560058594, "learning_rate": 3.986519906735874e-05, "loss": 127.8514, "step": 33320 }, { "epoch": 0.13465741747031518, "grad_norm": 330.7436218261719, "learning_rate": 3.986487519751506e-05, "loss": 71.8705, "step": 33330 }, { "epoch": 0.1346978187356828, "grad_norm": 3115.505859375, "learning_rate": 3.9864550940395413e-05, "loss": 168.864, "step": 33340 }, { "epoch": 0.13473822000105043, "grad_norm": 708.8053588867188, "learning_rate": 3.9864226296006114e-05, "loss": 99.1782, "step": 33350 }, { "epoch": 0.13477862126641807, "grad_norm": 827.0519409179688, "learning_rate": 3.986390126435351e-05, "loss": 88.3666, "step": 33360 }, { "epoch": 0.1348190225317857, "grad_norm": 348.74884033203125, "learning_rate": 3.986357584544391e-05, "loss": 79.9038, "step": 33370 }, { "epoch": 0.13485942379715332, "grad_norm": 809.9948120117188, "learning_rate": 3.986325003928367e-05, "loss": 75.5851, "step": 33380 }, { "epoch": 0.13489982506252096, "grad_norm": 530.9696044921875, "learning_rate": 3.986292384587916e-05, "loss": 74.4206, "step": 33390 }, { "epoch": 0.1349402263278886, "grad_norm": 840.42236328125, "learning_rate": 3.986259726523671e-05, "loss": 98.9562, "step": 33400 }, { "epoch": 0.1349806275932562, "grad_norm": 362.3224182128906, "learning_rate": 3.9862270297362704e-05, "loss": 74.7036, "step": 33410 }, { "epoch": 0.13502102885862385, "grad_norm": 4263.97314453125, "learning_rate": 3.986194294226351e-05, "loss": 132.8398, "step": 33420 }, { "epoch": 0.1350614301239915, "grad_norm": 982.0714111328125, "learning_rate": 3.986161519994552e-05, "loss": 112.7037, "step": 33430 }, { "epoch": 0.1351018313893591, "grad_norm": 961.3419799804688, "learning_rate": 3.986128707041511e-05, "loss": 95.7062, "step": 33440 }, { "epoch": 0.13514223265472675, "grad_norm": 749.6942138671875, "learning_rate": 3.9860958553678694e-05, "loss": 115.0838, "step": 33450 }, { "epoch": 0.13518263392009439, "grad_norm": 0.0, "learning_rate": 3.986062964974265e-05, "loss": 68.6302, "step": 33460 }, { "epoch": 0.135223035185462, "grad_norm": 1634.578125, "learning_rate": 3.9860300358613416e-05, "loss": 94.7061, "step": 33470 }, { "epoch": 0.13526343645082964, "grad_norm": 504.92449951171875, "learning_rate": 3.98599706802974e-05, "loss": 69.5252, "step": 33480 }, { "epoch": 0.13530383771619728, "grad_norm": 2182.119140625, "learning_rate": 3.985964061480103e-05, "loss": 91.9078, "step": 33490 }, { "epoch": 0.1353442389815649, "grad_norm": 669.2144775390625, "learning_rate": 3.985931016213074e-05, "loss": 65.255, "step": 33500 }, { "epoch": 0.13538464024693253, "grad_norm": 678.09521484375, "learning_rate": 3.985897932229298e-05, "loss": 130.8218, "step": 33510 }, { "epoch": 0.13542504151230017, "grad_norm": 860.627685546875, "learning_rate": 3.985864809529419e-05, "loss": 136.829, "step": 33520 }, { "epoch": 0.13546544277766778, "grad_norm": 691.2222290039062, "learning_rate": 3.985831648114083e-05, "loss": 106.527, "step": 33530 }, { "epoch": 0.13550584404303542, "grad_norm": 677.6774291992188, "learning_rate": 3.985798447983937e-05, "loss": 84.5683, "step": 33540 }, { "epoch": 0.13554624530840306, "grad_norm": 463.0068359375, "learning_rate": 3.9857652091396276e-05, "loss": 71.5883, "step": 33550 }, { "epoch": 0.1355866465737707, "grad_norm": 1355.86083984375, "learning_rate": 3.985731931581804e-05, "loss": 75.3525, "step": 33560 }, { "epoch": 0.13562704783913831, "grad_norm": 595.2366333007812, "learning_rate": 3.985698615311113e-05, "loss": 72.4136, "step": 33570 }, { "epoch": 0.13566744910450595, "grad_norm": 838.0628662109375, "learning_rate": 3.985665260328205e-05, "loss": 111.8475, "step": 33580 }, { "epoch": 0.1357078503698736, "grad_norm": 607.656005859375, "learning_rate": 3.985631866633731e-05, "loss": 92.9903, "step": 33590 }, { "epoch": 0.1357482516352412, "grad_norm": 802.7066650390625, "learning_rate": 3.9855984342283414e-05, "loss": 97.0624, "step": 33600 }, { "epoch": 0.13578865290060885, "grad_norm": 1092.105712890625, "learning_rate": 3.9855649631126884e-05, "loss": 117.6566, "step": 33610 }, { "epoch": 0.1358290541659765, "grad_norm": 450.5210266113281, "learning_rate": 3.985531453287424e-05, "loss": 92.6769, "step": 33620 }, { "epoch": 0.1358694554313441, "grad_norm": 439.0806884765625, "learning_rate": 3.985497904753201e-05, "loss": 84.2845, "step": 33630 }, { "epoch": 0.13590985669671174, "grad_norm": 1899.7283935546875, "learning_rate": 3.9854643175106756e-05, "loss": 102.3803, "step": 33640 }, { "epoch": 0.13595025796207938, "grad_norm": 767.927490234375, "learning_rate": 3.9854306915605e-05, "loss": 146.8131, "step": 33650 }, { "epoch": 0.135990659227447, "grad_norm": 864.8515014648438, "learning_rate": 3.985397026903331e-05, "loss": 94.1728, "step": 33660 }, { "epoch": 0.13603106049281463, "grad_norm": 1093.5802001953125, "learning_rate": 3.985363323539825e-05, "loss": 107.6846, "step": 33670 }, { "epoch": 0.13607146175818227, "grad_norm": 1755.8154296875, "learning_rate": 3.9853295814706395e-05, "loss": 113.3186, "step": 33680 }, { "epoch": 0.13611186302354988, "grad_norm": 992.8015747070312, "learning_rate": 3.98529580069643e-05, "loss": 107.5117, "step": 33690 }, { "epoch": 0.13615226428891752, "grad_norm": 536.5291137695312, "learning_rate": 3.985261981217858e-05, "loss": 72.3382, "step": 33700 }, { "epoch": 0.13619266555428516, "grad_norm": 1494.7418212890625, "learning_rate": 3.985228123035582e-05, "loss": 69.5633, "step": 33710 }, { "epoch": 0.1362330668196528, "grad_norm": 362.628173828125, "learning_rate": 3.98519422615026e-05, "loss": 127.147, "step": 33720 }, { "epoch": 0.13627346808502042, "grad_norm": 915.39208984375, "learning_rate": 3.985160290562556e-05, "loss": 134.0353, "step": 33730 }, { "epoch": 0.13631386935038806, "grad_norm": 1054.449462890625, "learning_rate": 3.98512631627313e-05, "loss": 89.7184, "step": 33740 }, { "epoch": 0.1363542706157557, "grad_norm": 892.0343017578125, "learning_rate": 3.985092303282645e-05, "loss": 68.4411, "step": 33750 }, { "epoch": 0.1363946718811233, "grad_norm": 1595.01953125, "learning_rate": 3.985058251591762e-05, "loss": 92.7033, "step": 33760 }, { "epoch": 0.13643507314649095, "grad_norm": 1104.977783203125, "learning_rate": 3.985024161201147e-05, "loss": 91.0335, "step": 33770 }, { "epoch": 0.1364754744118586, "grad_norm": 0.0, "learning_rate": 3.984990032111465e-05, "loss": 58.1744, "step": 33780 }, { "epoch": 0.1365158756772262, "grad_norm": 1564.0908203125, "learning_rate": 3.984955864323379e-05, "loss": 75.7117, "step": 33790 }, { "epoch": 0.13655627694259384, "grad_norm": 700.7978515625, "learning_rate": 3.984921657837557e-05, "loss": 108.2256, "step": 33800 }, { "epoch": 0.13659667820796148, "grad_norm": 1223.709716796875, "learning_rate": 3.9848874126546654e-05, "loss": 150.9498, "step": 33810 }, { "epoch": 0.1366370794733291, "grad_norm": 732.4114379882812, "learning_rate": 3.984853128775372e-05, "loss": 60.9993, "step": 33820 }, { "epoch": 0.13667748073869673, "grad_norm": 1338.5892333984375, "learning_rate": 3.9848188062003445e-05, "loss": 123.3062, "step": 33830 }, { "epoch": 0.13671788200406437, "grad_norm": 696.5016479492188, "learning_rate": 3.984784444930253e-05, "loss": 101.0902, "step": 33840 }, { "epoch": 0.13675828326943198, "grad_norm": 565.2360229492188, "learning_rate": 3.9847500449657666e-05, "loss": 88.9729, "step": 33850 }, { "epoch": 0.13679868453479962, "grad_norm": 788.9739379882812, "learning_rate": 3.984715606307556e-05, "loss": 79.289, "step": 33860 }, { "epoch": 0.13683908580016726, "grad_norm": 531.5402221679688, "learning_rate": 3.984681128956294e-05, "loss": 70.041, "step": 33870 }, { "epoch": 0.1368794870655349, "grad_norm": 541.3567504882812, "learning_rate": 3.9846466129126506e-05, "loss": 142.128, "step": 33880 }, { "epoch": 0.13691988833090252, "grad_norm": 640.6295166015625, "learning_rate": 3.9846120581773e-05, "loss": 108.9354, "step": 33890 }, { "epoch": 0.13696028959627016, "grad_norm": 474.4267578125, "learning_rate": 3.984577464750916e-05, "loss": 83.6102, "step": 33900 }, { "epoch": 0.1370006908616378, "grad_norm": 769.0298461914062, "learning_rate": 3.984542832634172e-05, "loss": 86.5954, "step": 33910 }, { "epoch": 0.1370410921270054, "grad_norm": 1042.3206787109375, "learning_rate": 3.984508161827743e-05, "loss": 131.0708, "step": 33920 }, { "epoch": 0.13708149339237305, "grad_norm": 1229.37158203125, "learning_rate": 3.984473452332307e-05, "loss": 72.523, "step": 33930 }, { "epoch": 0.1371218946577407, "grad_norm": 1179.4063720703125, "learning_rate": 3.9844387041485396e-05, "loss": 121.1243, "step": 33940 }, { "epoch": 0.1371622959231083, "grad_norm": 616.3158569335938, "learning_rate": 3.9844039172771174e-05, "loss": 74.466, "step": 33950 }, { "epoch": 0.13720269718847594, "grad_norm": 1129.1138916015625, "learning_rate": 3.984369091718719e-05, "loss": 115.7123, "step": 33960 }, { "epoch": 0.13724309845384358, "grad_norm": 907.6840209960938, "learning_rate": 3.984334227474023e-05, "loss": 114.4695, "step": 33970 }, { "epoch": 0.1372834997192112, "grad_norm": 2617.643798828125, "learning_rate": 3.984299324543711e-05, "loss": 113.5119, "step": 33980 }, { "epoch": 0.13732390098457883, "grad_norm": 1317.3724365234375, "learning_rate": 3.984264382928461e-05, "loss": 111.3625, "step": 33990 }, { "epoch": 0.13736430224994647, "grad_norm": 1482.2108154296875, "learning_rate": 3.9842294026289565e-05, "loss": 123.0276, "step": 34000 }, { "epoch": 0.13740470351531409, "grad_norm": 471.9527282714844, "learning_rate": 3.984194383645877e-05, "loss": 71.2953, "step": 34010 }, { "epoch": 0.13744510478068173, "grad_norm": 645.5573120117188, "learning_rate": 3.984159325979907e-05, "loss": 117.5256, "step": 34020 }, { "epoch": 0.13748550604604937, "grad_norm": 644.1488037109375, "learning_rate": 3.9841242296317294e-05, "loss": 54.6307, "step": 34030 }, { "epoch": 0.137525907311417, "grad_norm": 1674.2548828125, "learning_rate": 3.984089094602028e-05, "loss": 61.2714, "step": 34040 }, { "epoch": 0.13756630857678462, "grad_norm": 1002.239501953125, "learning_rate": 3.984053920891489e-05, "loss": 78.2601, "step": 34050 }, { "epoch": 0.13760670984215226, "grad_norm": 785.2839965820312, "learning_rate": 3.9840187085007965e-05, "loss": 103.6112, "step": 34060 }, { "epoch": 0.1376471111075199, "grad_norm": 914.4097290039062, "learning_rate": 3.983983457430639e-05, "loss": 66.841, "step": 34070 }, { "epoch": 0.1376875123728875, "grad_norm": 681.90234375, "learning_rate": 3.983948167681701e-05, "loss": 68.6914, "step": 34080 }, { "epoch": 0.13772791363825515, "grad_norm": 2546.71337890625, "learning_rate": 3.983912839254673e-05, "loss": 108.741, "step": 34090 }, { "epoch": 0.1377683149036228, "grad_norm": 1170.43212890625, "learning_rate": 3.983877472150243e-05, "loss": 73.4186, "step": 34100 }, { "epoch": 0.1378087161689904, "grad_norm": 921.9115600585938, "learning_rate": 3.9838420663691e-05, "loss": 100.3894, "step": 34110 }, { "epoch": 0.13784911743435804, "grad_norm": 1105.57421875, "learning_rate": 3.9838066219119354e-05, "loss": 90.6614, "step": 34120 }, { "epoch": 0.13788951869972568, "grad_norm": 884.2128295898438, "learning_rate": 3.983771138779438e-05, "loss": 80.733, "step": 34130 }, { "epoch": 0.1379299199650933, "grad_norm": 4304.98828125, "learning_rate": 3.983735616972301e-05, "loss": 152.8355, "step": 34140 }, { "epoch": 0.13797032123046093, "grad_norm": 831.826904296875, "learning_rate": 3.983700056491218e-05, "loss": 66.2968, "step": 34150 }, { "epoch": 0.13801072249582857, "grad_norm": 691.4739990234375, "learning_rate": 3.9836644573368804e-05, "loss": 133.1252, "step": 34160 }, { "epoch": 0.1380511237611962, "grad_norm": 926.5187377929688, "learning_rate": 3.9836288195099834e-05, "loss": 113.0467, "step": 34170 }, { "epoch": 0.13809152502656383, "grad_norm": 808.3482055664062, "learning_rate": 3.983593143011221e-05, "loss": 97.4411, "step": 34180 }, { "epoch": 0.13813192629193147, "grad_norm": 1823.265869140625, "learning_rate": 3.983557427841289e-05, "loss": 136.7448, "step": 34190 }, { "epoch": 0.1381723275572991, "grad_norm": 1061.7825927734375, "learning_rate": 3.9835216740008835e-05, "loss": 81.9278, "step": 34200 }, { "epoch": 0.13821272882266672, "grad_norm": 1279.1239013671875, "learning_rate": 3.983485881490702e-05, "loss": 125.9145, "step": 34210 }, { "epoch": 0.13825313008803436, "grad_norm": 713.9881591796875, "learning_rate": 3.983450050311442e-05, "loss": 72.8081, "step": 34220 }, { "epoch": 0.138293531353402, "grad_norm": 902.9114990234375, "learning_rate": 3.9834141804638024e-05, "loss": 95.5154, "step": 34230 }, { "epoch": 0.1383339326187696, "grad_norm": 675.9998779296875, "learning_rate": 3.983378271948482e-05, "loss": 70.6422, "step": 34240 }, { "epoch": 0.13837433388413725, "grad_norm": 1091.7891845703125, "learning_rate": 3.983342324766181e-05, "loss": 88.8903, "step": 34250 }, { "epoch": 0.1384147351495049, "grad_norm": 647.6904907226562, "learning_rate": 3.9833063389176005e-05, "loss": 136.1893, "step": 34260 }, { "epoch": 0.1384551364148725, "grad_norm": 544.426025390625, "learning_rate": 3.983270314403442e-05, "loss": 87.8933, "step": 34270 }, { "epoch": 0.13849553768024014, "grad_norm": 845.7183227539062, "learning_rate": 3.983234251224407e-05, "loss": 89.7384, "step": 34280 }, { "epoch": 0.13853593894560778, "grad_norm": 0.0, "learning_rate": 3.9831981493812e-05, "loss": 63.4833, "step": 34290 }, { "epoch": 0.1385763402109754, "grad_norm": 1032.6282958984375, "learning_rate": 3.9831620088745236e-05, "loss": 120.7541, "step": 34300 }, { "epoch": 0.13861674147634304, "grad_norm": 1177.107421875, "learning_rate": 3.983125829705083e-05, "loss": 93.1693, "step": 34310 }, { "epoch": 0.13865714274171068, "grad_norm": 1217.580322265625, "learning_rate": 3.983089611873583e-05, "loss": 79.4667, "step": 34320 }, { "epoch": 0.1386975440070783, "grad_norm": 5233.41015625, "learning_rate": 3.9830533553807306e-05, "loss": 117.9349, "step": 34330 }, { "epoch": 0.13873794527244593, "grad_norm": 536.6962890625, "learning_rate": 3.9830170602272316e-05, "loss": 94.761, "step": 34340 }, { "epoch": 0.13877834653781357, "grad_norm": 567.7264404296875, "learning_rate": 3.982980726413795e-05, "loss": 79.9903, "step": 34350 }, { "epoch": 0.1388187478031812, "grad_norm": 514.4133911132812, "learning_rate": 3.9829443539411275e-05, "loss": 106.1553, "step": 34360 }, { "epoch": 0.13885914906854882, "grad_norm": 724.4990234375, "learning_rate": 3.9829079428099386e-05, "loss": 101.8466, "step": 34370 }, { "epoch": 0.13889955033391646, "grad_norm": 776.9714965820312, "learning_rate": 3.982871493020939e-05, "loss": 89.8777, "step": 34380 }, { "epoch": 0.1389399515992841, "grad_norm": 986.380126953125, "learning_rate": 3.9828350045748395e-05, "loss": 99.2931, "step": 34390 }, { "epoch": 0.1389803528646517, "grad_norm": 1103.84228515625, "learning_rate": 3.9827984774723495e-05, "loss": 84.1699, "step": 34400 }, { "epoch": 0.13902075413001935, "grad_norm": 1649.4866943359375, "learning_rate": 3.982761911714183e-05, "loss": 86.1248, "step": 34410 }, { "epoch": 0.139061155395387, "grad_norm": 1313.41015625, "learning_rate": 3.982725307301052e-05, "loss": 109.6458, "step": 34420 }, { "epoch": 0.1391015566607546, "grad_norm": 1034.6004638671875, "learning_rate": 3.98268866423367e-05, "loss": 95.0798, "step": 34430 }, { "epoch": 0.13914195792612225, "grad_norm": 1234.4898681640625, "learning_rate": 3.9826519825127524e-05, "loss": 119.6586, "step": 34440 }, { "epoch": 0.13918235919148988, "grad_norm": 1234.220947265625, "learning_rate": 3.9826152621390137e-05, "loss": 64.2341, "step": 34450 }, { "epoch": 0.1392227604568575, "grad_norm": 948.7850952148438, "learning_rate": 3.982578503113169e-05, "loss": 82.6322, "step": 34460 }, { "epoch": 0.13926316172222514, "grad_norm": 552.3663330078125, "learning_rate": 3.982541705435936e-05, "loss": 134.0263, "step": 34470 }, { "epoch": 0.13930356298759278, "grad_norm": 1042.3890380859375, "learning_rate": 3.982504869108033e-05, "loss": 77.6071, "step": 34480 }, { "epoch": 0.1393439642529604, "grad_norm": 883.0326538085938, "learning_rate": 3.982467994130175e-05, "loss": 84.257, "step": 34490 }, { "epoch": 0.13938436551832803, "grad_norm": 917.8054809570312, "learning_rate": 3.982431080503084e-05, "loss": 82.6933, "step": 34500 }, { "epoch": 0.13942476678369567, "grad_norm": 594.1129760742188, "learning_rate": 3.982394128227478e-05, "loss": 93.3724, "step": 34510 }, { "epoch": 0.1394651680490633, "grad_norm": 554.779541015625, "learning_rate": 3.982357137304078e-05, "loss": 118.062, "step": 34520 }, { "epoch": 0.13950556931443092, "grad_norm": 2090.916259765625, "learning_rate": 3.982320107733605e-05, "loss": 92.5961, "step": 34530 }, { "epoch": 0.13954597057979856, "grad_norm": 888.49951171875, "learning_rate": 3.982283039516781e-05, "loss": 88.7859, "step": 34540 }, { "epoch": 0.1395863718451662, "grad_norm": 580.6609497070312, "learning_rate": 3.982245932654328e-05, "loss": 127.9387, "step": 34550 }, { "epoch": 0.13962677311053381, "grad_norm": 1474.858154296875, "learning_rate": 3.982208787146971e-05, "loss": 109.5851, "step": 34560 }, { "epoch": 0.13966717437590145, "grad_norm": 432.7795104980469, "learning_rate": 3.9821716029954326e-05, "loss": 76.8793, "step": 34570 }, { "epoch": 0.1397075756412691, "grad_norm": 563.7490844726562, "learning_rate": 3.9821343802004386e-05, "loss": 71.9206, "step": 34580 }, { "epoch": 0.1397479769066367, "grad_norm": 649.1707763671875, "learning_rate": 3.982097118762714e-05, "loss": 88.6161, "step": 34590 }, { "epoch": 0.13978837817200435, "grad_norm": 858.9306030273438, "learning_rate": 3.982059818682986e-05, "loss": 102.4193, "step": 34600 }, { "epoch": 0.13982877943737199, "grad_norm": 1089.40673828125, "learning_rate": 3.982022479961981e-05, "loss": 74.4667, "step": 34610 }, { "epoch": 0.1398691807027396, "grad_norm": 1060.2073974609375, "learning_rate": 3.981985102600428e-05, "loss": 106.4542, "step": 34620 }, { "epoch": 0.13990958196810724, "grad_norm": 1195.9647216796875, "learning_rate": 3.981947686599054e-05, "loss": 58.7589, "step": 34630 }, { "epoch": 0.13994998323347488, "grad_norm": 1027.3399658203125, "learning_rate": 3.98191023195859e-05, "loss": 110.5383, "step": 34640 }, { "epoch": 0.1399903844988425, "grad_norm": 598.2379150390625, "learning_rate": 3.9818727386797656e-05, "loss": 58.4869, "step": 34650 }, { "epoch": 0.14003078576421013, "grad_norm": 811.7263793945312, "learning_rate": 3.981835206763312e-05, "loss": 112.3262, "step": 34660 }, { "epoch": 0.14007118702957777, "grad_norm": 686.4317016601562, "learning_rate": 3.981797636209961e-05, "loss": 99.8144, "step": 34670 }, { "epoch": 0.1401115882949454, "grad_norm": 1130.1160888671875, "learning_rate": 3.981760027020444e-05, "loss": 106.6619, "step": 34680 }, { "epoch": 0.14015198956031302, "grad_norm": 1811.7294921875, "learning_rate": 3.981722379195496e-05, "loss": 137.9515, "step": 34690 }, { "epoch": 0.14019239082568066, "grad_norm": 2056.862060546875, "learning_rate": 3.981684692735849e-05, "loss": 127.8664, "step": 34700 }, { "epoch": 0.1402327920910483, "grad_norm": 2816.014892578125, "learning_rate": 3.9816469676422395e-05, "loss": 112.5819, "step": 34710 }, { "epoch": 0.14027319335641592, "grad_norm": 587.19580078125, "learning_rate": 3.981609203915402e-05, "loss": 79.4191, "step": 34720 }, { "epoch": 0.14031359462178356, "grad_norm": 1270.5882568359375, "learning_rate": 3.9815714015560724e-05, "loss": 84.8151, "step": 34730 }, { "epoch": 0.1403539958871512, "grad_norm": 717.61767578125, "learning_rate": 3.981533560564988e-05, "loss": 101.6747, "step": 34740 }, { "epoch": 0.1403943971525188, "grad_norm": 1222.3599853515625, "learning_rate": 3.9814956809428876e-05, "loss": 117.1959, "step": 34750 }, { "epoch": 0.14043479841788645, "grad_norm": 1309.3331298828125, "learning_rate": 3.981457762690508e-05, "loss": 93.242, "step": 34760 }, { "epoch": 0.1404751996832541, "grad_norm": 981.174560546875, "learning_rate": 3.98141980580859e-05, "loss": 97.2162, "step": 34770 }, { "epoch": 0.1405156009486217, "grad_norm": 1109.3221435546875, "learning_rate": 3.981381810297872e-05, "loss": 99.3464, "step": 34780 }, { "epoch": 0.14055600221398934, "grad_norm": 1016.7904663085938, "learning_rate": 3.9813437761590954e-05, "loss": 100.2344, "step": 34790 }, { "epoch": 0.14059640347935698, "grad_norm": 461.59075927734375, "learning_rate": 3.981305703393002e-05, "loss": 77.6536, "step": 34800 }, { "epoch": 0.1406368047447246, "grad_norm": 806.1221923828125, "learning_rate": 3.981267592000334e-05, "loss": 84.1041, "step": 34810 }, { "epoch": 0.14067720601009223, "grad_norm": 1290.621337890625, "learning_rate": 3.981229441981834e-05, "loss": 115.2158, "step": 34820 }, { "epoch": 0.14071760727545987, "grad_norm": 1207.3558349609375, "learning_rate": 3.9811912533382465e-05, "loss": 94.0456, "step": 34830 }, { "epoch": 0.1407580085408275, "grad_norm": 618.889892578125, "learning_rate": 3.981153026070315e-05, "loss": 103.8425, "step": 34840 }, { "epoch": 0.14079840980619512, "grad_norm": 1754.47265625, "learning_rate": 3.981114760178786e-05, "loss": 101.4771, "step": 34850 }, { "epoch": 0.14083881107156276, "grad_norm": 1202.9140625, "learning_rate": 3.981076455664405e-05, "loss": 92.0938, "step": 34860 }, { "epoch": 0.1408792123369304, "grad_norm": 505.58685302734375, "learning_rate": 3.981038112527918e-05, "loss": 71.7284, "step": 34870 }, { "epoch": 0.14091961360229802, "grad_norm": 1087.3023681640625, "learning_rate": 3.980999730770072e-05, "loss": 72.5711, "step": 34880 }, { "epoch": 0.14096001486766566, "grad_norm": 437.0929260253906, "learning_rate": 3.9809613103916185e-05, "loss": 105.24, "step": 34890 }, { "epoch": 0.1410004161330333, "grad_norm": 569.6231689453125, "learning_rate": 3.980922851393303e-05, "loss": 88.6582, "step": 34900 }, { "epoch": 0.1410408173984009, "grad_norm": 1146.248779296875, "learning_rate": 3.980884353775877e-05, "loss": 104.9999, "step": 34910 }, { "epoch": 0.14108121866376855, "grad_norm": 3283.828857421875, "learning_rate": 3.980845817540091e-05, "loss": 80.5895, "step": 34920 }, { "epoch": 0.1411216199291362, "grad_norm": 629.7263793945312, "learning_rate": 3.980807242686695e-05, "loss": 58.2123, "step": 34930 }, { "epoch": 0.1411620211945038, "grad_norm": 594.664794921875, "learning_rate": 3.980768629216443e-05, "loss": 148.3967, "step": 34940 }, { "epoch": 0.14120242245987144, "grad_norm": 1153.6336669921875, "learning_rate": 3.980729977130086e-05, "loss": 120.5844, "step": 34950 }, { "epoch": 0.14124282372523908, "grad_norm": 2773.668701171875, "learning_rate": 3.9806912864283796e-05, "loss": 91.5852, "step": 34960 }, { "epoch": 0.1412832249906067, "grad_norm": 590.5272216796875, "learning_rate": 3.9806525571120754e-05, "loss": 91.9213, "step": 34970 }, { "epoch": 0.14132362625597433, "grad_norm": 951.1868286132812, "learning_rate": 3.9806137891819306e-05, "loss": 118.9212, "step": 34980 }, { "epoch": 0.14136402752134197, "grad_norm": 761.1874389648438, "learning_rate": 3.9805749826387005e-05, "loss": 89.8664, "step": 34990 }, { "epoch": 0.1414044287867096, "grad_norm": 1326.7030029296875, "learning_rate": 3.980536137483141e-05, "loss": 220.9611, "step": 35000 }, { "epoch": 0.14144483005207723, "grad_norm": 385.3703918457031, "learning_rate": 3.98049725371601e-05, "loss": 79.5324, "step": 35010 }, { "epoch": 0.14148523131744487, "grad_norm": 1560.7586669921875, "learning_rate": 3.980458331338065e-05, "loss": 100.8703, "step": 35020 }, { "epoch": 0.1415256325828125, "grad_norm": 949.5518188476562, "learning_rate": 3.980419370350066e-05, "loss": 106.5335, "step": 35030 }, { "epoch": 0.14156603384818012, "grad_norm": 1331.5462646484375, "learning_rate": 3.980380370752771e-05, "loss": 92.9983, "step": 35040 }, { "epoch": 0.14160643511354776, "grad_norm": 1191.44140625, "learning_rate": 3.980341332546942e-05, "loss": 92.3872, "step": 35050 }, { "epoch": 0.1416468363789154, "grad_norm": 1959.1849365234375, "learning_rate": 3.9803022557333387e-05, "loss": 102.082, "step": 35060 }, { "epoch": 0.141687237644283, "grad_norm": 559.7910766601562, "learning_rate": 3.9802631403127234e-05, "loss": 74.2511, "step": 35070 }, { "epoch": 0.14172763890965065, "grad_norm": 1271.74169921875, "learning_rate": 3.980223986285859e-05, "loss": 138.9416, "step": 35080 }, { "epoch": 0.1417680401750183, "grad_norm": 900.1123046875, "learning_rate": 3.980184793653508e-05, "loss": 78.2254, "step": 35090 }, { "epoch": 0.1418084414403859, "grad_norm": 0.0, "learning_rate": 3.9801455624164354e-05, "loss": 103.8371, "step": 35100 }, { "epoch": 0.14184884270575354, "grad_norm": 4405.7216796875, "learning_rate": 3.980106292575405e-05, "loss": 95.1343, "step": 35110 }, { "epoch": 0.14188924397112118, "grad_norm": 2287.119384765625, "learning_rate": 3.980066984131184e-05, "loss": 156.3899, "step": 35120 }, { "epoch": 0.1419296452364888, "grad_norm": 915.5543823242188, "learning_rate": 3.980027637084537e-05, "loss": 110.9924, "step": 35130 }, { "epoch": 0.14197004650185643, "grad_norm": 299.7638244628906, "learning_rate": 3.979988251436232e-05, "loss": 101.3539, "step": 35140 }, { "epoch": 0.14201044776722407, "grad_norm": 467.0106506347656, "learning_rate": 3.979948827187036e-05, "loss": 89.7661, "step": 35150 }, { "epoch": 0.14205084903259171, "grad_norm": 916.4078369140625, "learning_rate": 3.9799093643377195e-05, "loss": 102.0699, "step": 35160 }, { "epoch": 0.14209125029795933, "grad_norm": 946.13232421875, "learning_rate": 3.97986986288905e-05, "loss": 108.2325, "step": 35170 }, { "epoch": 0.14213165156332697, "grad_norm": 1187.6005859375, "learning_rate": 3.9798303228417975e-05, "loss": 77.135, "step": 35180 }, { "epoch": 0.1421720528286946, "grad_norm": 761.8079833984375, "learning_rate": 3.9797907441967345e-05, "loss": 191.0544, "step": 35190 }, { "epoch": 0.14221245409406222, "grad_norm": 527.9906005859375, "learning_rate": 3.979751126954632e-05, "loss": 84.1937, "step": 35200 }, { "epoch": 0.14225285535942986, "grad_norm": 433.1168212890625, "learning_rate": 3.979711471116261e-05, "loss": 63.9944, "step": 35210 }, { "epoch": 0.1422932566247975, "grad_norm": 771.2758178710938, "learning_rate": 3.9796717766823966e-05, "loss": 102.1717, "step": 35220 }, { "epoch": 0.1423336578901651, "grad_norm": 1156.9407958984375, "learning_rate": 3.979632043653811e-05, "loss": 82.1883, "step": 35230 }, { "epoch": 0.14237405915553275, "grad_norm": 1199.1309814453125, "learning_rate": 3.9795922720312796e-05, "loss": 99.0348, "step": 35240 }, { "epoch": 0.1424144604209004, "grad_norm": 2005.9031982421875, "learning_rate": 3.979552461815578e-05, "loss": 94.9322, "step": 35250 }, { "epoch": 0.142454861686268, "grad_norm": 781.2155151367188, "learning_rate": 3.9795126130074824e-05, "loss": 80.2976, "step": 35260 }, { "epoch": 0.14249526295163564, "grad_norm": 900.551025390625, "learning_rate": 3.9794727256077685e-05, "loss": 111.1041, "step": 35270 }, { "epoch": 0.14253566421700328, "grad_norm": 2165.27294921875, "learning_rate": 3.979432799617215e-05, "loss": 122.0848, "step": 35280 }, { "epoch": 0.1425760654823709, "grad_norm": 2192.3740234375, "learning_rate": 3.9793928350366e-05, "loss": 77.1153, "step": 35290 }, { "epoch": 0.14261646674773854, "grad_norm": 1193.210205078125, "learning_rate": 3.979352831866702e-05, "loss": 63.2352, "step": 35300 }, { "epoch": 0.14265686801310618, "grad_norm": 679.7105712890625, "learning_rate": 3.979312790108303e-05, "loss": 108.7417, "step": 35310 }, { "epoch": 0.14269726927847382, "grad_norm": 3321.63671875, "learning_rate": 3.979272709762181e-05, "loss": 98.3625, "step": 35320 }, { "epoch": 0.14273767054384143, "grad_norm": 520.6390380859375, "learning_rate": 3.979232590829119e-05, "loss": 88.9054, "step": 35330 }, { "epoch": 0.14277807180920907, "grad_norm": 755.5736083984375, "learning_rate": 3.9791924333098986e-05, "loss": 118.9642, "step": 35340 }, { "epoch": 0.1428184730745767, "grad_norm": 1435.6326904296875, "learning_rate": 3.979152237205302e-05, "loss": 60.0594, "step": 35350 }, { "epoch": 0.14285887433994432, "grad_norm": 687.8348388671875, "learning_rate": 3.979112002516115e-05, "loss": 99.3451, "step": 35360 }, { "epoch": 0.14289927560531196, "grad_norm": 3126.065673828125, "learning_rate": 3.979071729243119e-05, "loss": 94.5208, "step": 35370 }, { "epoch": 0.1429396768706796, "grad_norm": 627.322265625, "learning_rate": 3.979031417387102e-05, "loss": 96.7639, "step": 35380 }, { "epoch": 0.1429800781360472, "grad_norm": 1383.9814453125, "learning_rate": 3.978991066948848e-05, "loss": 121.3856, "step": 35390 }, { "epoch": 0.14302047940141485, "grad_norm": 1195.60498046875, "learning_rate": 3.9789506779291445e-05, "loss": 100.726, "step": 35400 }, { "epoch": 0.1430608806667825, "grad_norm": 1437.9139404296875, "learning_rate": 3.978910250328779e-05, "loss": 97.5714, "step": 35410 }, { "epoch": 0.1431012819321501, "grad_norm": 1709.1278076171875, "learning_rate": 3.978869784148539e-05, "loss": 90.859, "step": 35420 }, { "epoch": 0.14314168319751774, "grad_norm": 724.835693359375, "learning_rate": 3.978829279389214e-05, "loss": 110.8027, "step": 35430 }, { "epoch": 0.14318208446288538, "grad_norm": 3041.685791015625, "learning_rate": 3.978788736051593e-05, "loss": 77.8094, "step": 35440 }, { "epoch": 0.143222485728253, "grad_norm": 1632.168701171875, "learning_rate": 3.978748154136467e-05, "loss": 101.8852, "step": 35450 }, { "epoch": 0.14326288699362064, "grad_norm": 0.0, "learning_rate": 3.978707533644627e-05, "loss": 79.8863, "step": 35460 }, { "epoch": 0.14330328825898828, "grad_norm": 1186.8582763671875, "learning_rate": 3.978666874576865e-05, "loss": 83.3348, "step": 35470 }, { "epoch": 0.14334368952435592, "grad_norm": 1080.42919921875, "learning_rate": 3.978626176933974e-05, "loss": 82.4089, "step": 35480 }, { "epoch": 0.14338409078972353, "grad_norm": 658.2520751953125, "learning_rate": 3.978585440716746e-05, "loss": 102.263, "step": 35490 }, { "epoch": 0.14342449205509117, "grad_norm": 924.6647338867188, "learning_rate": 3.978544665925977e-05, "loss": 90.3248, "step": 35500 }, { "epoch": 0.1434648933204588, "grad_norm": 1277.648681640625, "learning_rate": 3.978503852562461e-05, "loss": 60.2407, "step": 35510 }, { "epoch": 0.14350529458582642, "grad_norm": 1398.44775390625, "learning_rate": 3.9784630006269935e-05, "loss": 91.2831, "step": 35520 }, { "epoch": 0.14354569585119406, "grad_norm": 637.0263671875, "learning_rate": 3.9784221101203715e-05, "loss": 85.0435, "step": 35530 }, { "epoch": 0.1435860971165617, "grad_norm": 2696.323486328125, "learning_rate": 3.978381181043392e-05, "loss": 95.1324, "step": 35540 }, { "epoch": 0.1436264983819293, "grad_norm": 914.337890625, "learning_rate": 3.978340213396853e-05, "loss": 89.1903, "step": 35550 }, { "epoch": 0.14366689964729695, "grad_norm": 569.9886474609375, "learning_rate": 3.978299207181552e-05, "loss": 146.3416, "step": 35560 }, { "epoch": 0.1437073009126646, "grad_norm": 2463.093017578125, "learning_rate": 3.97825816239829e-05, "loss": 85.1585, "step": 35570 }, { "epoch": 0.1437477021780322, "grad_norm": 888.9117431640625, "learning_rate": 3.978217079047867e-05, "loss": 77.3447, "step": 35580 }, { "epoch": 0.14378810344339985, "grad_norm": 860.64013671875, "learning_rate": 3.9781759571310835e-05, "loss": 87.8478, "step": 35590 }, { "epoch": 0.14382850470876749, "grad_norm": 1036.18994140625, "learning_rate": 3.9781347966487415e-05, "loss": 111.4597, "step": 35600 }, { "epoch": 0.1438689059741351, "grad_norm": 547.6618041992188, "learning_rate": 3.978093597601643e-05, "loss": 76.2303, "step": 35610 }, { "epoch": 0.14390930723950274, "grad_norm": 1933.931640625, "learning_rate": 3.978052359990591e-05, "loss": 89.3041, "step": 35620 }, { "epoch": 0.14394970850487038, "grad_norm": 360.5538330078125, "learning_rate": 3.97801108381639e-05, "loss": 85.9847, "step": 35630 }, { "epoch": 0.14399010977023802, "grad_norm": 1371.674560546875, "learning_rate": 3.977969769079845e-05, "loss": 92.9651, "step": 35640 }, { "epoch": 0.14403051103560563, "grad_norm": 3026.696044921875, "learning_rate": 3.977928415781761e-05, "loss": 86.0992, "step": 35650 }, { "epoch": 0.14407091230097327, "grad_norm": 1090.5291748046875, "learning_rate": 3.977887023922944e-05, "loss": 98.6011, "step": 35660 }, { "epoch": 0.1441113135663409, "grad_norm": 886.7594604492188, "learning_rate": 3.977845593504201e-05, "loss": 128.8346, "step": 35670 }, { "epoch": 0.14415171483170852, "grad_norm": 372.66937255859375, "learning_rate": 3.97780412452634e-05, "loss": 80.6952, "step": 35680 }, { "epoch": 0.14419211609707616, "grad_norm": 569.3165283203125, "learning_rate": 3.97776261699017e-05, "loss": 82.1124, "step": 35690 }, { "epoch": 0.1442325173624438, "grad_norm": 495.3749694824219, "learning_rate": 3.977721070896499e-05, "loss": 63.9636, "step": 35700 }, { "epoch": 0.14427291862781141, "grad_norm": 1659.83544921875, "learning_rate": 3.977679486246137e-05, "loss": 150.3122, "step": 35710 }, { "epoch": 0.14431331989317905, "grad_norm": 933.7442626953125, "learning_rate": 3.977637863039896e-05, "loss": 85.469, "step": 35720 }, { "epoch": 0.1443537211585467, "grad_norm": 2418.2099609375, "learning_rate": 3.9775962012785863e-05, "loss": 76.3617, "step": 35730 }, { "epoch": 0.1443941224239143, "grad_norm": 908.3840942382812, "learning_rate": 3.9775545009630204e-05, "loss": 98.9815, "step": 35740 }, { "epoch": 0.14443452368928195, "grad_norm": 593.0587158203125, "learning_rate": 3.977512762094012e-05, "loss": 106.191, "step": 35750 }, { "epoch": 0.1444749249546496, "grad_norm": 1349.186279296875, "learning_rate": 3.977470984672374e-05, "loss": 124.571, "step": 35760 }, { "epoch": 0.1445153262200172, "grad_norm": 399.5314025878906, "learning_rate": 3.977429168698921e-05, "loss": 107.339, "step": 35770 }, { "epoch": 0.14455572748538484, "grad_norm": 867.3482055664062, "learning_rate": 3.9773873141744684e-05, "loss": 123.2388, "step": 35780 }, { "epoch": 0.14459612875075248, "grad_norm": 377.1903991699219, "learning_rate": 3.9773454210998315e-05, "loss": 116.9524, "step": 35790 }, { "epoch": 0.14463653001612012, "grad_norm": 851.285888671875, "learning_rate": 3.977303489475828e-05, "loss": 108.5521, "step": 35800 }, { "epoch": 0.14467693128148773, "grad_norm": 1076.890869140625, "learning_rate": 3.977261519303275e-05, "loss": 128.3607, "step": 35810 }, { "epoch": 0.14471733254685537, "grad_norm": 412.141845703125, "learning_rate": 3.9772195105829914e-05, "loss": 90.6862, "step": 35820 }, { "epoch": 0.144757733812223, "grad_norm": 959.1841430664062, "learning_rate": 3.977177463315795e-05, "loss": 77.2501, "step": 35830 }, { "epoch": 0.14479813507759062, "grad_norm": 839.197509765625, "learning_rate": 3.977135377502506e-05, "loss": 81.871, "step": 35840 }, { "epoch": 0.14483853634295826, "grad_norm": 977.5892944335938, "learning_rate": 3.9770932531439445e-05, "loss": 97.2153, "step": 35850 }, { "epoch": 0.1448789376083259, "grad_norm": 3265.607177734375, "learning_rate": 3.977051090240933e-05, "loss": 85.7729, "step": 35860 }, { "epoch": 0.14491933887369352, "grad_norm": 535.1461791992188, "learning_rate": 3.977008888794291e-05, "loss": 81.5642, "step": 35870 }, { "epoch": 0.14495974013906116, "grad_norm": 624.2498779296875, "learning_rate": 3.976966648804845e-05, "loss": 149.5731, "step": 35880 }, { "epoch": 0.1450001414044288, "grad_norm": 1168.02734375, "learning_rate": 3.976924370273415e-05, "loss": 98.4266, "step": 35890 }, { "epoch": 0.1450405426697964, "grad_norm": 1551.1861572265625, "learning_rate": 3.976882053200827e-05, "loss": 79.1619, "step": 35900 }, { "epoch": 0.14508094393516405, "grad_norm": 634.694580078125, "learning_rate": 3.976839697587906e-05, "loss": 67.8042, "step": 35910 }, { "epoch": 0.1451213452005317, "grad_norm": 602.9379272460938, "learning_rate": 3.9767973034354764e-05, "loss": 83.8792, "step": 35920 }, { "epoch": 0.1451617464658993, "grad_norm": 2905.12451171875, "learning_rate": 3.9767548707443665e-05, "loss": 120.9742, "step": 35930 }, { "epoch": 0.14520214773126694, "grad_norm": 910.666015625, "learning_rate": 3.976712399515402e-05, "loss": 111.3549, "step": 35940 }, { "epoch": 0.14524254899663458, "grad_norm": 1786.91796875, "learning_rate": 3.976669889749412e-05, "loss": 72.8603, "step": 35950 }, { "epoch": 0.14528295026200222, "grad_norm": 1702.6033935546875, "learning_rate": 3.9766273414472254e-05, "loss": 72.2166, "step": 35960 }, { "epoch": 0.14532335152736983, "grad_norm": 1203.12548828125, "learning_rate": 3.97658475460967e-05, "loss": 85.1111, "step": 35970 }, { "epoch": 0.14536375279273747, "grad_norm": 1354.2686767578125, "learning_rate": 3.976542129237578e-05, "loss": 82.6227, "step": 35980 }, { "epoch": 0.1454041540581051, "grad_norm": 646.3790893554688, "learning_rate": 3.976499465331779e-05, "loss": 83.9092, "step": 35990 }, { "epoch": 0.14544455532347272, "grad_norm": 1774.5269775390625, "learning_rate": 3.976456762893106e-05, "loss": 147.7749, "step": 36000 }, { "epoch": 0.14548495658884036, "grad_norm": 484.8998718261719, "learning_rate": 3.976414021922391e-05, "loss": 69.3422, "step": 36010 }, { "epoch": 0.145525357854208, "grad_norm": 803.2476806640625, "learning_rate": 3.9763712424204664e-05, "loss": 81.9522, "step": 36020 }, { "epoch": 0.14556575911957562, "grad_norm": 390.46014404296875, "learning_rate": 3.976328424388167e-05, "loss": 58.7185, "step": 36030 }, { "epoch": 0.14560616038494326, "grad_norm": 552.03076171875, "learning_rate": 3.9762855678263274e-05, "loss": 125.9961, "step": 36040 }, { "epoch": 0.1456465616503109, "grad_norm": 849.45361328125, "learning_rate": 3.976242672735784e-05, "loss": 96.4432, "step": 36050 }, { "epoch": 0.1456869629156785, "grad_norm": 995.2199096679688, "learning_rate": 3.9761997391173715e-05, "loss": 121.193, "step": 36060 }, { "epoch": 0.14572736418104615, "grad_norm": 645.1380004882812, "learning_rate": 3.976156766971928e-05, "loss": 99.5118, "step": 36070 }, { "epoch": 0.1457677654464138, "grad_norm": 494.75238037109375, "learning_rate": 3.976113756300291e-05, "loss": 77.7827, "step": 36080 }, { "epoch": 0.1458081667117814, "grad_norm": 616.9432983398438, "learning_rate": 3.976070707103299e-05, "loss": 99.2099, "step": 36090 }, { "epoch": 0.14584856797714904, "grad_norm": 894.1851196289062, "learning_rate": 3.976027619381791e-05, "loss": 80.5238, "step": 36100 }, { "epoch": 0.14588896924251668, "grad_norm": 1238.346923828125, "learning_rate": 3.975984493136607e-05, "loss": 75.3991, "step": 36110 }, { "epoch": 0.14592937050788432, "grad_norm": 1124.619873046875, "learning_rate": 3.975941328368588e-05, "loss": 139.1967, "step": 36120 }, { "epoch": 0.14596977177325193, "grad_norm": 0.0, "learning_rate": 3.975898125078576e-05, "loss": 86.8455, "step": 36130 }, { "epoch": 0.14601017303861957, "grad_norm": 999.146728515625, "learning_rate": 3.9758548832674126e-05, "loss": 98.2229, "step": 36140 }, { "epoch": 0.1460505743039872, "grad_norm": 921.8599243164062, "learning_rate": 3.975811602935941e-05, "loss": 65.2062, "step": 36150 }, { "epoch": 0.14609097556935483, "grad_norm": 1060.4775390625, "learning_rate": 3.9757682840850045e-05, "loss": 73.0853, "step": 36160 }, { "epoch": 0.14613137683472247, "grad_norm": 1437.3382568359375, "learning_rate": 3.975724926715449e-05, "loss": 103.5526, "step": 36170 }, { "epoch": 0.1461717781000901, "grad_norm": 1258.572509765625, "learning_rate": 3.9756815308281185e-05, "loss": 97.9677, "step": 36180 }, { "epoch": 0.14621217936545772, "grad_norm": 767.1698608398438, "learning_rate": 3.975638096423859e-05, "loss": 148.626, "step": 36190 }, { "epoch": 0.14625258063082536, "grad_norm": 1137.6505126953125, "learning_rate": 3.975594623503518e-05, "loss": 72.6446, "step": 36200 }, { "epoch": 0.146292981896193, "grad_norm": 733.9754638671875, "learning_rate": 3.975551112067943e-05, "loss": 76.0892, "step": 36210 }, { "epoch": 0.1463333831615606, "grad_norm": 0.0, "learning_rate": 3.975507562117982e-05, "loss": 79.5784, "step": 36220 }, { "epoch": 0.14637378442692825, "grad_norm": 1067.524169921875, "learning_rate": 3.975463973654483e-05, "loss": 95.0418, "step": 36230 }, { "epoch": 0.1464141856922959, "grad_norm": 1821.6085205078125, "learning_rate": 3.975420346678298e-05, "loss": 85.3464, "step": 36240 }, { "epoch": 0.1464545869576635, "grad_norm": 477.71600341796875, "learning_rate": 3.9753766811902756e-05, "loss": 79.2371, "step": 36250 }, { "epoch": 0.14649498822303114, "grad_norm": 668.6638793945312, "learning_rate": 3.975332977191268e-05, "loss": 92.1396, "step": 36260 }, { "epoch": 0.14653538948839878, "grad_norm": 0.0, "learning_rate": 3.9752892346821274e-05, "loss": 166.9924, "step": 36270 }, { "epoch": 0.14657579075376642, "grad_norm": 1038.5552978515625, "learning_rate": 3.975245453663706e-05, "loss": 95.1147, "step": 36280 }, { "epoch": 0.14661619201913403, "grad_norm": 1050.910400390625, "learning_rate": 3.9752016341368574e-05, "loss": 92.2121, "step": 36290 }, { "epoch": 0.14665659328450167, "grad_norm": 1757.861572265625, "learning_rate": 3.9751577761024366e-05, "loss": 72.9227, "step": 36300 }, { "epoch": 0.14669699454986931, "grad_norm": 1091.1439208984375, "learning_rate": 3.9751138795612976e-05, "loss": 114.0233, "step": 36310 }, { "epoch": 0.14673739581523693, "grad_norm": 253.02093505859375, "learning_rate": 3.975069944514296e-05, "loss": 84.1495, "step": 36320 }, { "epoch": 0.14677779708060457, "grad_norm": 1279.52490234375, "learning_rate": 3.97502597096229e-05, "loss": 110.2362, "step": 36330 }, { "epoch": 0.1468181983459722, "grad_norm": 856.653076171875, "learning_rate": 3.9749819589061354e-05, "loss": 75.3621, "step": 36340 }, { "epoch": 0.14685859961133982, "grad_norm": 726.4605712890625, "learning_rate": 3.974937908346691e-05, "loss": 84.8673, "step": 36350 }, { "epoch": 0.14689900087670746, "grad_norm": 1209.525634765625, "learning_rate": 3.974893819284815e-05, "loss": 77.0584, "step": 36360 }, { "epoch": 0.1469394021420751, "grad_norm": 1527.44580078125, "learning_rate": 3.9748496917213675e-05, "loss": 119.0866, "step": 36370 }, { "epoch": 0.1469798034074427, "grad_norm": 665.8195190429688, "learning_rate": 3.9748055256572084e-05, "loss": 76.9333, "step": 36380 }, { "epoch": 0.14702020467281035, "grad_norm": 408.0297546386719, "learning_rate": 3.974761321093199e-05, "loss": 99.8797, "step": 36390 }, { "epoch": 0.147060605938178, "grad_norm": 617.4054565429688, "learning_rate": 3.974717078030201e-05, "loss": 71.3116, "step": 36400 }, { "epoch": 0.1471010072035456, "grad_norm": 715.342529296875, "learning_rate": 3.974672796469077e-05, "loss": 107.3487, "step": 36410 }, { "epoch": 0.14714140846891324, "grad_norm": 639.6741943359375, "learning_rate": 3.97462847641069e-05, "loss": 65.2551, "step": 36420 }, { "epoch": 0.14718180973428088, "grad_norm": 0.0, "learning_rate": 3.974584117855904e-05, "loss": 106.0524, "step": 36430 }, { "epoch": 0.14722221099964852, "grad_norm": 1352.6854248046875, "learning_rate": 3.974539720805585e-05, "loss": 86.2377, "step": 36440 }, { "epoch": 0.14726261226501614, "grad_norm": 1535.055419921875, "learning_rate": 3.9744952852605965e-05, "loss": 90.6992, "step": 36450 }, { "epoch": 0.14730301353038378, "grad_norm": 986.7384643554688, "learning_rate": 3.974450811221806e-05, "loss": 92.4026, "step": 36460 }, { "epoch": 0.14734341479575142, "grad_norm": 887.9044799804688, "learning_rate": 3.974406298690081e-05, "loss": 74.2204, "step": 36470 }, { "epoch": 0.14738381606111903, "grad_norm": 392.92437744140625, "learning_rate": 3.974361747666288e-05, "loss": 79.7651, "step": 36480 }, { "epoch": 0.14742421732648667, "grad_norm": 1977.6578369140625, "learning_rate": 3.974317158151297e-05, "loss": 88.8437, "step": 36490 }, { "epoch": 0.1474646185918543, "grad_norm": 650.1098022460938, "learning_rate": 3.974272530145976e-05, "loss": 64.0652, "step": 36500 }, { "epoch": 0.14750501985722192, "grad_norm": 555.5321044921875, "learning_rate": 3.974227863651196e-05, "loss": 95.1656, "step": 36510 }, { "epoch": 0.14754542112258956, "grad_norm": 1186.1986083984375, "learning_rate": 3.974183158667827e-05, "loss": 104.5347, "step": 36520 }, { "epoch": 0.1475858223879572, "grad_norm": 804.430908203125, "learning_rate": 3.9741384151967416e-05, "loss": 78.5228, "step": 36530 }, { "epoch": 0.1476262236533248, "grad_norm": 724.3908081054688, "learning_rate": 3.974093633238811e-05, "loss": 95.4657, "step": 36540 }, { "epoch": 0.14766662491869245, "grad_norm": 657.1284790039062, "learning_rate": 3.974048812794908e-05, "loss": 95.8921, "step": 36550 }, { "epoch": 0.1477070261840601, "grad_norm": 813.1570434570312, "learning_rate": 3.974003953865908e-05, "loss": 87.4508, "step": 36560 }, { "epoch": 0.1477474274494277, "grad_norm": 1045.587890625, "learning_rate": 3.973959056452685e-05, "loss": 116.7263, "step": 36570 }, { "epoch": 0.14778782871479534, "grad_norm": 680.8571166992188, "learning_rate": 3.9739141205561126e-05, "loss": 117.5743, "step": 36580 }, { "epoch": 0.14782822998016298, "grad_norm": 584.37158203125, "learning_rate": 3.973869146177069e-05, "loss": 79.1421, "step": 36590 }, { "epoch": 0.1478686312455306, "grad_norm": 1341.7860107421875, "learning_rate": 3.973824133316431e-05, "loss": 97.5215, "step": 36600 }, { "epoch": 0.14790903251089824, "grad_norm": 829.0403442382812, "learning_rate": 3.973779081975074e-05, "loss": 109.4106, "step": 36610 }, { "epoch": 0.14794943377626588, "grad_norm": 435.8904724121094, "learning_rate": 3.9737339921538775e-05, "loss": 94.5021, "step": 36620 }, { "epoch": 0.14798983504163352, "grad_norm": 1048.1324462890625, "learning_rate": 3.973688863853721e-05, "loss": 105.4422, "step": 36630 }, { "epoch": 0.14803023630700113, "grad_norm": 1104.1217041015625, "learning_rate": 3.973643697075485e-05, "loss": 86.1049, "step": 36640 }, { "epoch": 0.14807063757236877, "grad_norm": 2705.364501953125, "learning_rate": 3.973598491820048e-05, "loss": 125.3173, "step": 36650 }, { "epoch": 0.1481110388377364, "grad_norm": 1945.0257568359375, "learning_rate": 3.9735532480882925e-05, "loss": 110.4482, "step": 36660 }, { "epoch": 0.14815144010310402, "grad_norm": 1239.58056640625, "learning_rate": 3.9735079658811006e-05, "loss": 102.1776, "step": 36670 }, { "epoch": 0.14819184136847166, "grad_norm": 751.5692138671875, "learning_rate": 3.973462645199355e-05, "loss": 88.416, "step": 36680 }, { "epoch": 0.1482322426338393, "grad_norm": 738.1787719726562, "learning_rate": 3.9734172860439385e-05, "loss": 72.5949, "step": 36690 }, { "epoch": 0.1482726438992069, "grad_norm": 442.69232177734375, "learning_rate": 3.973371888415736e-05, "loss": 91.564, "step": 36700 }, { "epoch": 0.14831304516457455, "grad_norm": 2007.3953857421875, "learning_rate": 3.9733264523156337e-05, "loss": 136.281, "step": 36710 }, { "epoch": 0.1483534464299422, "grad_norm": 453.4363708496094, "learning_rate": 3.973280977744515e-05, "loss": 111.2205, "step": 36720 }, { "epoch": 0.1483938476953098, "grad_norm": 912.5247192382812, "learning_rate": 3.9732354647032687e-05, "loss": 86.6152, "step": 36730 }, { "epoch": 0.14843424896067745, "grad_norm": 412.4578857421875, "learning_rate": 3.97318991319278e-05, "loss": 80.3192, "step": 36740 }, { "epoch": 0.14847465022604509, "grad_norm": 879.3831176757812, "learning_rate": 3.973144323213939e-05, "loss": 64.8996, "step": 36750 }, { "epoch": 0.1485150514914127, "grad_norm": 697.5357666015625, "learning_rate": 3.9730986947676336e-05, "loss": 152.6258, "step": 36760 }, { "epoch": 0.14855545275678034, "grad_norm": 546.4622802734375, "learning_rate": 3.973053027854753e-05, "loss": 78.2268, "step": 36770 }, { "epoch": 0.14859585402214798, "grad_norm": 0.0, "learning_rate": 3.973007322476188e-05, "loss": 75.756, "step": 36780 }, { "epoch": 0.14863625528751562, "grad_norm": 1019.0073852539062, "learning_rate": 3.9729615786328286e-05, "loss": 110.6772, "step": 36790 }, { "epoch": 0.14867665655288323, "grad_norm": 548.605224609375, "learning_rate": 3.972915796325569e-05, "loss": 66.7377, "step": 36800 }, { "epoch": 0.14871705781825087, "grad_norm": 877.7902221679688, "learning_rate": 3.972869975555299e-05, "loss": 82.7605, "step": 36810 }, { "epoch": 0.1487574590836185, "grad_norm": 1418.376708984375, "learning_rate": 3.972824116322913e-05, "loss": 96.2335, "step": 36820 }, { "epoch": 0.14879786034898612, "grad_norm": 2107.466064453125, "learning_rate": 3.9727782186293066e-05, "loss": 99.8524, "step": 36830 }, { "epoch": 0.14883826161435376, "grad_norm": 863.6719360351562, "learning_rate": 3.972732282475372e-05, "loss": 100.959, "step": 36840 }, { "epoch": 0.1488786628797214, "grad_norm": 400.9438171386719, "learning_rate": 3.9726863078620066e-05, "loss": 70.2841, "step": 36850 }, { "epoch": 0.14891906414508901, "grad_norm": 588.9529418945312, "learning_rate": 3.972640294790106e-05, "loss": 118.53, "step": 36860 }, { "epoch": 0.14895946541045665, "grad_norm": 764.5951538085938, "learning_rate": 3.9725942432605666e-05, "loss": 110.678, "step": 36870 }, { "epoch": 0.1489998666758243, "grad_norm": 999.9136352539062, "learning_rate": 3.972548153274287e-05, "loss": 86.9159, "step": 36880 }, { "epoch": 0.1490402679411919, "grad_norm": 739.2926635742188, "learning_rate": 3.972502024832166e-05, "loss": 117.9995, "step": 36890 }, { "epoch": 0.14908066920655955, "grad_norm": 1673.72900390625, "learning_rate": 3.972455857935102e-05, "loss": 57.6239, "step": 36900 }, { "epoch": 0.1491210704719272, "grad_norm": 721.6724243164062, "learning_rate": 3.972409652583996e-05, "loss": 66.6377, "step": 36910 }, { "epoch": 0.1491614717372948, "grad_norm": 580.5721435546875, "learning_rate": 3.9723634087797486e-05, "loss": 85.9561, "step": 36920 }, { "epoch": 0.14920187300266244, "grad_norm": 416.51556396484375, "learning_rate": 3.9723171265232606e-05, "loss": 111.8571, "step": 36930 }, { "epoch": 0.14924227426803008, "grad_norm": 809.1591186523438, "learning_rate": 3.972270805815435e-05, "loss": 93.3519, "step": 36940 }, { "epoch": 0.14928267553339772, "grad_norm": 522.8436279296875, "learning_rate": 3.9722244466571745e-05, "loss": 102.2965, "step": 36950 }, { "epoch": 0.14932307679876533, "grad_norm": 1699.3441162109375, "learning_rate": 3.972178049049383e-05, "loss": 125.5619, "step": 36960 }, { "epoch": 0.14936347806413297, "grad_norm": 1413.349609375, "learning_rate": 3.972131612992965e-05, "loss": 119.3396, "step": 36970 }, { "epoch": 0.1494038793295006, "grad_norm": 520.0859375, "learning_rate": 3.972085138488826e-05, "loss": 101.6701, "step": 36980 }, { "epoch": 0.14944428059486822, "grad_norm": 3968.024658203125, "learning_rate": 3.972038625537873e-05, "loss": 122.6143, "step": 36990 }, { "epoch": 0.14948468186023586, "grad_norm": 579.20751953125, "learning_rate": 3.97199207414101e-05, "loss": 85.4002, "step": 37000 }, { "epoch": 0.1495250831256035, "grad_norm": 578.19140625, "learning_rate": 3.9719454842991474e-05, "loss": 81.768, "step": 37010 }, { "epoch": 0.14956548439097112, "grad_norm": 1352.609619140625, "learning_rate": 3.9718988560131915e-05, "loss": 94.6862, "step": 37020 }, { "epoch": 0.14960588565633876, "grad_norm": 1129.244140625, "learning_rate": 3.971852189284053e-05, "loss": 98.3099, "step": 37030 }, { "epoch": 0.1496462869217064, "grad_norm": 886.0817260742188, "learning_rate": 3.9718054841126406e-05, "loss": 74.3526, "step": 37040 }, { "epoch": 0.149686688187074, "grad_norm": 792.4911499023438, "learning_rate": 3.9717587404998645e-05, "loss": 82.4121, "step": 37050 }, { "epoch": 0.14972708945244165, "grad_norm": 1291.1729736328125, "learning_rate": 3.971711958446638e-05, "loss": 125.0775, "step": 37060 }, { "epoch": 0.1497674907178093, "grad_norm": 312.7253723144531, "learning_rate": 3.9716651379538704e-05, "loss": 94.9803, "step": 37070 }, { "epoch": 0.1498078919831769, "grad_norm": 459.3520812988281, "learning_rate": 3.971618279022477e-05, "loss": 71.5689, "step": 37080 }, { "epoch": 0.14984829324854454, "grad_norm": 508.1872863769531, "learning_rate": 3.9715713816533695e-05, "loss": 55.3603, "step": 37090 }, { "epoch": 0.14988869451391218, "grad_norm": 1485.495849609375, "learning_rate": 3.971524445847463e-05, "loss": 112.9238, "step": 37100 }, { "epoch": 0.14992909577927982, "grad_norm": 974.6215209960938, "learning_rate": 3.971477471605672e-05, "loss": 102.8172, "step": 37110 }, { "epoch": 0.14996949704464743, "grad_norm": 3471.12646484375, "learning_rate": 3.9714304589289134e-05, "loss": 99.5517, "step": 37120 }, { "epoch": 0.15000989831001507, "grad_norm": 1056.742919921875, "learning_rate": 3.971383407818103e-05, "loss": 131.3219, "step": 37130 }, { "epoch": 0.1500502995753827, "grad_norm": 3554.474609375, "learning_rate": 3.971336318274158e-05, "loss": 134.7771, "step": 37140 }, { "epoch": 0.15009070084075032, "grad_norm": 358.5300598144531, "learning_rate": 3.971289190297997e-05, "loss": 95.0274, "step": 37150 }, { "epoch": 0.15013110210611796, "grad_norm": 1012.35888671875, "learning_rate": 3.971242023890537e-05, "loss": 99.5953, "step": 37160 }, { "epoch": 0.1501715033714856, "grad_norm": 862.522705078125, "learning_rate": 3.9711948190526997e-05, "loss": 89.4212, "step": 37170 }, { "epoch": 0.15021190463685322, "grad_norm": 744.9771118164062, "learning_rate": 3.9711475757854045e-05, "loss": 80.1172, "step": 37180 }, { "epoch": 0.15025230590222086, "grad_norm": 971.1306762695312, "learning_rate": 3.971100294089573e-05, "loss": 92.3551, "step": 37190 }, { "epoch": 0.1502927071675885, "grad_norm": 667.2974853515625, "learning_rate": 3.971052973966126e-05, "loss": 119.2706, "step": 37200 }, { "epoch": 0.1503331084329561, "grad_norm": 988.1494750976562, "learning_rate": 3.9710056154159865e-05, "loss": 112.7863, "step": 37210 }, { "epoch": 0.15037350969832375, "grad_norm": 602.7747192382812, "learning_rate": 3.970958218440078e-05, "loss": 109.6781, "step": 37220 }, { "epoch": 0.1504139109636914, "grad_norm": 591.7127685546875, "learning_rate": 3.970910783039324e-05, "loss": 63.6022, "step": 37230 }, { "epoch": 0.150454312229059, "grad_norm": 439.0905456542969, "learning_rate": 3.97086330921465e-05, "loss": 59.3322, "step": 37240 }, { "epoch": 0.15049471349442664, "grad_norm": 605.6318969726562, "learning_rate": 3.970815796966981e-05, "loss": 112.7581, "step": 37250 }, { "epoch": 0.15053511475979428, "grad_norm": 692.8350830078125, "learning_rate": 3.970768246297243e-05, "loss": 97.8905, "step": 37260 }, { "epoch": 0.15057551602516192, "grad_norm": 881.8389282226562, "learning_rate": 3.970720657206363e-05, "loss": 111.7944, "step": 37270 }, { "epoch": 0.15061591729052953, "grad_norm": 799.2363891601562, "learning_rate": 3.97067302969527e-05, "loss": 95.6336, "step": 37280 }, { "epoch": 0.15065631855589717, "grad_norm": 828.990478515625, "learning_rate": 3.970625363764892e-05, "loss": 90.0103, "step": 37290 }, { "epoch": 0.1506967198212648, "grad_norm": 943.7299194335938, "learning_rate": 3.970577659416158e-05, "loss": 60.9479, "step": 37300 }, { "epoch": 0.15073712108663243, "grad_norm": 560.2476196289062, "learning_rate": 3.970529916649997e-05, "loss": 61.7219, "step": 37310 }, { "epoch": 0.15077752235200007, "grad_norm": 773.2211303710938, "learning_rate": 3.9704821354673414e-05, "loss": 47.1061, "step": 37320 }, { "epoch": 0.1508179236173677, "grad_norm": 1033.5302734375, "learning_rate": 3.9704343158691226e-05, "loss": 112.5848, "step": 37330 }, { "epoch": 0.15085832488273532, "grad_norm": 778.7824096679688, "learning_rate": 3.970386457856271e-05, "loss": 58.9093, "step": 37340 }, { "epoch": 0.15089872614810296, "grad_norm": 431.2569274902344, "learning_rate": 3.9703385614297224e-05, "loss": 47.7874, "step": 37350 }, { "epoch": 0.1509391274134706, "grad_norm": 799.5265502929688, "learning_rate": 3.9702906265904085e-05, "loss": 70.1433, "step": 37360 }, { "epoch": 0.1509795286788382, "grad_norm": 407.71075439453125, "learning_rate": 3.9702426533392645e-05, "loss": 100.7932, "step": 37370 }, { "epoch": 0.15101992994420585, "grad_norm": 1554.0538330078125, "learning_rate": 3.9701946416772254e-05, "loss": 100.1079, "step": 37380 }, { "epoch": 0.1510603312095735, "grad_norm": 790.8369140625, "learning_rate": 3.970146591605228e-05, "loss": 77.1758, "step": 37390 }, { "epoch": 0.1511007324749411, "grad_norm": 796.0833129882812, "learning_rate": 3.9700985031242076e-05, "loss": 89.7363, "step": 37400 }, { "epoch": 0.15114113374030874, "grad_norm": 359.7783203125, "learning_rate": 3.970050376235104e-05, "loss": 80.3151, "step": 37410 }, { "epoch": 0.15118153500567638, "grad_norm": 904.0203857421875, "learning_rate": 3.970002210938853e-05, "loss": 121.1745, "step": 37420 }, { "epoch": 0.15122193627104402, "grad_norm": 702.6942138671875, "learning_rate": 3.969954007236395e-05, "loss": 53.5863, "step": 37430 }, { "epoch": 0.15126233753641163, "grad_norm": 1143.8529052734375, "learning_rate": 3.969905765128669e-05, "loss": 74.2955, "step": 37440 }, { "epoch": 0.15130273880177927, "grad_norm": 795.322021484375, "learning_rate": 3.969857484616616e-05, "loss": 77.7138, "step": 37450 }, { "epoch": 0.15134314006714691, "grad_norm": 1021.1033325195312, "learning_rate": 3.969809165701177e-05, "loss": 111.0508, "step": 37460 }, { "epoch": 0.15138354133251453, "grad_norm": 521.082275390625, "learning_rate": 3.969760808383295e-05, "loss": 83.8422, "step": 37470 }, { "epoch": 0.15142394259788217, "grad_norm": 778.8450927734375, "learning_rate": 3.969712412663912e-05, "loss": 60.9541, "step": 37480 }, { "epoch": 0.1514643438632498, "grad_norm": 593.9297485351562, "learning_rate": 3.969663978543971e-05, "loss": 69.2896, "step": 37490 }, { "epoch": 0.15150474512861742, "grad_norm": 609.1443481445312, "learning_rate": 3.9696155060244166e-05, "loss": 111.5514, "step": 37500 }, { "epoch": 0.15154514639398506, "grad_norm": 730.1421508789062, "learning_rate": 3.969566995106194e-05, "loss": 81.852, "step": 37510 }, { "epoch": 0.1515855476593527, "grad_norm": 831.3905029296875, "learning_rate": 3.969518445790248e-05, "loss": 80.2276, "step": 37520 }, { "epoch": 0.1516259489247203, "grad_norm": 1291.9847412109375, "learning_rate": 3.9694698580775275e-05, "loss": 99.0928, "step": 37530 }, { "epoch": 0.15166635019008795, "grad_norm": 809.3699340820312, "learning_rate": 3.969421231968977e-05, "loss": 78.3106, "step": 37540 }, { "epoch": 0.1517067514554556, "grad_norm": 2072.676025390625, "learning_rate": 3.9693725674655465e-05, "loss": 85.1074, "step": 37550 }, { "epoch": 0.1517471527208232, "grad_norm": 1137.106689453125, "learning_rate": 3.969323864568183e-05, "loss": 93.6903, "step": 37560 }, { "epoch": 0.15178755398619084, "grad_norm": 426.4228515625, "learning_rate": 3.9692751232778376e-05, "loss": 83.9971, "step": 37570 }, { "epoch": 0.15182795525155848, "grad_norm": 567.2477416992188, "learning_rate": 3.96922634359546e-05, "loss": 73.9459, "step": 37580 }, { "epoch": 0.15186835651692612, "grad_norm": 965.7251586914062, "learning_rate": 3.969177525522e-05, "loss": 67.762, "step": 37590 }, { "epoch": 0.15190875778229374, "grad_norm": 1557.0831298828125, "learning_rate": 3.969128669058411e-05, "loss": 105.5141, "step": 37600 }, { "epoch": 0.15194915904766138, "grad_norm": 730.3154907226562, "learning_rate": 3.969079774205645e-05, "loss": 52.5085, "step": 37610 }, { "epoch": 0.15198956031302902, "grad_norm": 424.4344177246094, "learning_rate": 3.969030840964655e-05, "loss": 81.0125, "step": 37620 }, { "epoch": 0.15202996157839663, "grad_norm": 1289.1795654296875, "learning_rate": 3.968981869336395e-05, "loss": 118.6801, "step": 37630 }, { "epoch": 0.15207036284376427, "grad_norm": 768.481201171875, "learning_rate": 3.968932859321819e-05, "loss": 116.7715, "step": 37640 }, { "epoch": 0.1521107641091319, "grad_norm": 1433.2896728515625, "learning_rate": 3.9688838109218836e-05, "loss": 99.1868, "step": 37650 }, { "epoch": 0.15215116537449952, "grad_norm": 1053.1134033203125, "learning_rate": 3.968834724137545e-05, "loss": 64.6867, "step": 37660 }, { "epoch": 0.15219156663986716, "grad_norm": 1071.5689697265625, "learning_rate": 3.96878559896976e-05, "loss": 77.4557, "step": 37670 }, { "epoch": 0.1522319679052348, "grad_norm": 763.2518920898438, "learning_rate": 3.9687364354194854e-05, "loss": 102.5508, "step": 37680 }, { "epoch": 0.1522723691706024, "grad_norm": 698.8189697265625, "learning_rate": 3.968687233487681e-05, "loss": 58.931, "step": 37690 }, { "epoch": 0.15231277043597005, "grad_norm": 1018.3699951171875, "learning_rate": 3.968637993175305e-05, "loss": 118.7861, "step": 37700 }, { "epoch": 0.1523531717013377, "grad_norm": 557.1495361328125, "learning_rate": 3.9685887144833185e-05, "loss": 75.3155, "step": 37710 }, { "epoch": 0.1523935729667053, "grad_norm": 0.0, "learning_rate": 3.9685393974126804e-05, "loss": 75.7861, "step": 37720 }, { "epoch": 0.15243397423207294, "grad_norm": 697.90185546875, "learning_rate": 3.9684900419643544e-05, "loss": 46.2014, "step": 37730 }, { "epoch": 0.15247437549744058, "grad_norm": 1075.672119140625, "learning_rate": 3.968440648139301e-05, "loss": 83.1796, "step": 37740 }, { "epoch": 0.15251477676280822, "grad_norm": 786.6807861328125, "learning_rate": 3.968391215938484e-05, "loss": 68.6527, "step": 37750 }, { "epoch": 0.15255517802817584, "grad_norm": 923.9205932617188, "learning_rate": 3.968341745362867e-05, "loss": 108.3222, "step": 37760 }, { "epoch": 0.15259557929354348, "grad_norm": 702.1288452148438, "learning_rate": 3.968292236413414e-05, "loss": 140.3841, "step": 37770 }, { "epoch": 0.15263598055891112, "grad_norm": 916.8606567382812, "learning_rate": 3.96824268909109e-05, "loss": 76.2463, "step": 37780 }, { "epoch": 0.15267638182427873, "grad_norm": 715.5382080078125, "learning_rate": 3.9681931033968625e-05, "loss": 83.2818, "step": 37790 }, { "epoch": 0.15271678308964637, "grad_norm": 668.6615600585938, "learning_rate": 3.9681434793316966e-05, "loss": 57.9715, "step": 37800 }, { "epoch": 0.152757184355014, "grad_norm": 879.0608520507812, "learning_rate": 3.96809381689656e-05, "loss": 108.9632, "step": 37810 }, { "epoch": 0.15279758562038162, "grad_norm": 1151.613037109375, "learning_rate": 3.9680441160924225e-05, "loss": 104.5107, "step": 37820 }, { "epoch": 0.15283798688574926, "grad_norm": 1173.372314453125, "learning_rate": 3.96799437692025e-05, "loss": 104.231, "step": 37830 }, { "epoch": 0.1528783881511169, "grad_norm": 2017.4984130859375, "learning_rate": 3.967944599381015e-05, "loss": 105.15, "step": 37840 }, { "epoch": 0.1529187894164845, "grad_norm": 1668.8818359375, "learning_rate": 3.9678947834756864e-05, "loss": 104.0688, "step": 37850 }, { "epoch": 0.15295919068185215, "grad_norm": 597.96337890625, "learning_rate": 3.967844929205236e-05, "loss": 78.81, "step": 37860 }, { "epoch": 0.1529995919472198, "grad_norm": 646.5809936523438, "learning_rate": 3.9677950365706365e-05, "loss": 72.663, "step": 37870 }, { "epoch": 0.1530399932125874, "grad_norm": 1005.3717651367188, "learning_rate": 3.967745105572858e-05, "loss": 123.4354, "step": 37880 }, { "epoch": 0.15308039447795505, "grad_norm": 674.1689453125, "learning_rate": 3.967695136212877e-05, "loss": 74.8123, "step": 37890 }, { "epoch": 0.15312079574332269, "grad_norm": 919.642822265625, "learning_rate": 3.967645128491666e-05, "loss": 55.2663, "step": 37900 }, { "epoch": 0.15316119700869033, "grad_norm": 559.356689453125, "learning_rate": 3.967595082410199e-05, "loss": 81.8104, "step": 37910 }, { "epoch": 0.15320159827405794, "grad_norm": 852.5032958984375, "learning_rate": 3.967544997969454e-05, "loss": 72.4133, "step": 37920 }, { "epoch": 0.15324199953942558, "grad_norm": 760.2708129882812, "learning_rate": 3.967494875170406e-05, "loss": 113.9829, "step": 37930 }, { "epoch": 0.15328240080479322, "grad_norm": 472.9176025390625, "learning_rate": 3.967444714014032e-05, "loss": 96.0468, "step": 37940 }, { "epoch": 0.15332280207016083, "grad_norm": 771.8394165039062, "learning_rate": 3.9673945145013114e-05, "loss": 81.0025, "step": 37950 }, { "epoch": 0.15336320333552847, "grad_norm": 472.99627685546875, "learning_rate": 3.967344276633222e-05, "loss": 81.9558, "step": 37960 }, { "epoch": 0.1534036046008961, "grad_norm": 555.433837890625, "learning_rate": 3.9672940004107426e-05, "loss": 103.9633, "step": 37970 }, { "epoch": 0.15344400586626372, "grad_norm": 860.8916625976562, "learning_rate": 3.967243685834854e-05, "loss": 104.1778, "step": 37980 }, { "epoch": 0.15348440713163136, "grad_norm": 648.06787109375, "learning_rate": 3.967193332906537e-05, "loss": 76.7259, "step": 37990 }, { "epoch": 0.153524808396999, "grad_norm": 1496.262451171875, "learning_rate": 3.967142941626772e-05, "loss": 76.2057, "step": 38000 }, { "epoch": 0.15356520966236661, "grad_norm": 1186.5738525390625, "learning_rate": 3.9670925119965434e-05, "loss": 90.1337, "step": 38010 }, { "epoch": 0.15360561092773425, "grad_norm": 628.0262451171875, "learning_rate": 3.9670420440168335e-05, "loss": 96.4808, "step": 38020 }, { "epoch": 0.1536460121931019, "grad_norm": 1244.613037109375, "learning_rate": 3.9669915376886265e-05, "loss": 83.5602, "step": 38030 }, { "epoch": 0.1536864134584695, "grad_norm": 1034.5682373046875, "learning_rate": 3.966940993012907e-05, "loss": 84.4719, "step": 38040 }, { "epoch": 0.15372681472383715, "grad_norm": 630.8580322265625, "learning_rate": 3.96689040999066e-05, "loss": 73.0619, "step": 38050 }, { "epoch": 0.1537672159892048, "grad_norm": 765.167724609375, "learning_rate": 3.966839788622872e-05, "loss": 106.9954, "step": 38060 }, { "epoch": 0.15380761725457243, "grad_norm": 0.0, "learning_rate": 3.966789128910529e-05, "loss": 49.2205, "step": 38070 }, { "epoch": 0.15384801851994004, "grad_norm": 869.7047729492188, "learning_rate": 3.966738430854619e-05, "loss": 94.2237, "step": 38080 }, { "epoch": 0.15388841978530768, "grad_norm": 412.8327941894531, "learning_rate": 3.966687694456132e-05, "loss": 92.6531, "step": 38090 }, { "epoch": 0.15392882105067532, "grad_norm": 994.3251342773438, "learning_rate": 3.966636919716056e-05, "loss": 97.2214, "step": 38100 }, { "epoch": 0.15396922231604293, "grad_norm": 1117.16650390625, "learning_rate": 3.966586106635379e-05, "loss": 86.1158, "step": 38110 }, { "epoch": 0.15400962358141057, "grad_norm": 568.3110961914062, "learning_rate": 3.966535255215095e-05, "loss": 111.1136, "step": 38120 }, { "epoch": 0.1540500248467782, "grad_norm": 1117.4755859375, "learning_rate": 3.966484365456193e-05, "loss": 82.1834, "step": 38130 }, { "epoch": 0.15409042611214582, "grad_norm": 926.09326171875, "learning_rate": 3.966433437359667e-05, "loss": 92.5847, "step": 38140 }, { "epoch": 0.15413082737751346, "grad_norm": 760.3319702148438, "learning_rate": 3.9663824709265075e-05, "loss": 81.4818, "step": 38150 }, { "epoch": 0.1541712286428811, "grad_norm": 694.4853515625, "learning_rate": 3.96633146615771e-05, "loss": 83.545, "step": 38160 }, { "epoch": 0.15421162990824872, "grad_norm": 606.398193359375, "learning_rate": 3.9662804230542676e-05, "loss": 83.9181, "step": 38170 }, { "epoch": 0.15425203117361636, "grad_norm": 901.7685546875, "learning_rate": 3.966229341617177e-05, "loss": 77.0491, "step": 38180 }, { "epoch": 0.154292432438984, "grad_norm": 830.6808471679688, "learning_rate": 3.9661782218474325e-05, "loss": 70.1705, "step": 38190 }, { "epoch": 0.1543328337043516, "grad_norm": 1575.902099609375, "learning_rate": 3.966127063746031e-05, "loss": 61.3311, "step": 38200 }, { "epoch": 0.15437323496971925, "grad_norm": 844.6079711914062, "learning_rate": 3.966075867313971e-05, "loss": 88.1307, "step": 38210 }, { "epoch": 0.1544136362350869, "grad_norm": 413.93865966796875, "learning_rate": 3.966024632552249e-05, "loss": 95.2208, "step": 38220 }, { "epoch": 0.15445403750045453, "grad_norm": 789.9360961914062, "learning_rate": 3.965973359461865e-05, "loss": 105.475, "step": 38230 }, { "epoch": 0.15449443876582214, "grad_norm": 0.0, "learning_rate": 3.965922048043818e-05, "loss": 127.8588, "step": 38240 }, { "epoch": 0.15453484003118978, "grad_norm": 3267.522216796875, "learning_rate": 3.965870698299109e-05, "loss": 126.6062, "step": 38250 }, { "epoch": 0.15457524129655742, "grad_norm": 1106.333251953125, "learning_rate": 3.965819310228738e-05, "loss": 93.0975, "step": 38260 }, { "epoch": 0.15461564256192503, "grad_norm": 1589.9356689453125, "learning_rate": 3.965767883833708e-05, "loss": 113.15, "step": 38270 }, { "epoch": 0.15465604382729267, "grad_norm": 1526.2000732421875, "learning_rate": 3.965716419115021e-05, "loss": 96.7691, "step": 38280 }, { "epoch": 0.1546964450926603, "grad_norm": 736.6803588867188, "learning_rate": 3.96566491607368e-05, "loss": 96.5812, "step": 38290 }, { "epoch": 0.15473684635802792, "grad_norm": 495.99993896484375, "learning_rate": 3.96561337471069e-05, "loss": 91.3958, "step": 38300 }, { "epoch": 0.15477724762339556, "grad_norm": 1220.4888916015625, "learning_rate": 3.965561795027054e-05, "loss": 86.7831, "step": 38310 }, { "epoch": 0.1548176488887632, "grad_norm": 511.2290344238281, "learning_rate": 3.9655101770237805e-05, "loss": 126.2356, "step": 38320 }, { "epoch": 0.15485805015413082, "grad_norm": 993.008544921875, "learning_rate": 3.965458520701874e-05, "loss": 82.0525, "step": 38330 }, { "epoch": 0.15489845141949846, "grad_norm": 491.7477722167969, "learning_rate": 3.965406826062341e-05, "loss": 73.5542, "step": 38340 }, { "epoch": 0.1549388526848661, "grad_norm": 975.9268188476562, "learning_rate": 3.96535509310619e-05, "loss": 62.1843, "step": 38350 }, { "epoch": 0.1549792539502337, "grad_norm": 1564.1531982421875, "learning_rate": 3.96530332183443e-05, "loss": 76.3985, "step": 38360 }, { "epoch": 0.15501965521560135, "grad_norm": 1053.3935546875, "learning_rate": 3.96525151224807e-05, "loss": 70.3877, "step": 38370 }, { "epoch": 0.155060056480969, "grad_norm": 820.1409912109375, "learning_rate": 3.96519966434812e-05, "loss": 93.3208, "step": 38380 }, { "epoch": 0.15510045774633663, "grad_norm": 428.53887939453125, "learning_rate": 3.965147778135591e-05, "loss": 78.4831, "step": 38390 }, { "epoch": 0.15514085901170424, "grad_norm": 626.5733032226562, "learning_rate": 3.965095853611494e-05, "loss": 100.7439, "step": 38400 }, { "epoch": 0.15518126027707188, "grad_norm": 709.6735229492188, "learning_rate": 3.9650438907768414e-05, "loss": 148.4095, "step": 38410 }, { "epoch": 0.15522166154243952, "grad_norm": 899.0986938476562, "learning_rate": 3.964991889632647e-05, "loss": 86.0226, "step": 38420 }, { "epoch": 0.15526206280780713, "grad_norm": 1556.7742919921875, "learning_rate": 3.964939850179923e-05, "loss": 74.1751, "step": 38430 }, { "epoch": 0.15530246407317477, "grad_norm": 298.58319091796875, "learning_rate": 3.964887772419687e-05, "loss": 107.9748, "step": 38440 }, { "epoch": 0.1553428653385424, "grad_norm": 656.981689453125, "learning_rate": 3.9648356563529506e-05, "loss": 115.6758, "step": 38450 }, { "epoch": 0.15538326660391003, "grad_norm": 917.2551879882812, "learning_rate": 3.964783501980732e-05, "loss": 72.7824, "step": 38460 }, { "epoch": 0.15542366786927767, "grad_norm": 312.0767822265625, "learning_rate": 3.9647313093040475e-05, "loss": 93.3208, "step": 38470 }, { "epoch": 0.1554640691346453, "grad_norm": 919.5922241210938, "learning_rate": 3.964679078323915e-05, "loss": 116.9953, "step": 38480 }, { "epoch": 0.15550447040001292, "grad_norm": 898.2864379882812, "learning_rate": 3.9646268090413516e-05, "loss": 89.4504, "step": 38490 }, { "epoch": 0.15554487166538056, "grad_norm": 948.1696166992188, "learning_rate": 3.964574501457378e-05, "loss": 128.4789, "step": 38500 }, { "epoch": 0.1555852729307482, "grad_norm": 2223.70166015625, "learning_rate": 3.964522155573012e-05, "loss": 104.3688, "step": 38510 }, { "epoch": 0.1556256741961158, "grad_norm": 705.4492797851562, "learning_rate": 3.964469771389276e-05, "loss": 97.0524, "step": 38520 }, { "epoch": 0.15566607546148345, "grad_norm": 1810.4759521484375, "learning_rate": 3.96441734890719e-05, "loss": 72.1592, "step": 38530 }, { "epoch": 0.1557064767268511, "grad_norm": 500.4573974609375, "learning_rate": 3.964364888127777e-05, "loss": 98.4273, "step": 38540 }, { "epoch": 0.15574687799221873, "grad_norm": 335.0638122558594, "learning_rate": 3.9643123890520584e-05, "loss": 70.7148, "step": 38550 }, { "epoch": 0.15578727925758634, "grad_norm": 1567.4873046875, "learning_rate": 3.9642598516810593e-05, "loss": 90.5238, "step": 38560 }, { "epoch": 0.15582768052295398, "grad_norm": 552.8950805664062, "learning_rate": 3.9642072760158024e-05, "loss": 108.9904, "step": 38570 }, { "epoch": 0.15586808178832162, "grad_norm": 634.5554809570312, "learning_rate": 3.964154662057314e-05, "loss": 60.6461, "step": 38580 }, { "epoch": 0.15590848305368923, "grad_norm": 1294.1488037109375, "learning_rate": 3.9641020098066185e-05, "loss": 93.5097, "step": 38590 }, { "epoch": 0.15594888431905687, "grad_norm": 1212.355712890625, "learning_rate": 3.964049319264744e-05, "loss": 108.3358, "step": 38600 }, { "epoch": 0.15598928558442451, "grad_norm": 1027.73974609375, "learning_rate": 3.963996590432716e-05, "loss": 101.4828, "step": 38610 }, { "epoch": 0.15602968684979213, "grad_norm": 816.5595703125, "learning_rate": 3.963943823311564e-05, "loss": 77.334, "step": 38620 }, { "epoch": 0.15607008811515977, "grad_norm": 497.77679443359375, "learning_rate": 3.9638910179023156e-05, "loss": 114.117, "step": 38630 }, { "epoch": 0.1561104893805274, "grad_norm": 731.276123046875, "learning_rate": 3.963838174206001e-05, "loss": 91.6992, "step": 38640 }, { "epoch": 0.15615089064589502, "grad_norm": 533.4052124023438, "learning_rate": 3.963785292223651e-05, "loss": 96.2654, "step": 38650 }, { "epoch": 0.15619129191126266, "grad_norm": 421.8385009765625, "learning_rate": 3.9637323719562936e-05, "loss": 66.5563, "step": 38660 }, { "epoch": 0.1562316931766303, "grad_norm": 3177.8974609375, "learning_rate": 3.963679413404964e-05, "loss": 131.4963, "step": 38670 }, { "epoch": 0.1562720944419979, "grad_norm": 771.8431396484375, "learning_rate": 3.963626416570693e-05, "loss": 102.0437, "step": 38680 }, { "epoch": 0.15631249570736555, "grad_norm": 965.1478881835938, "learning_rate": 3.963573381454515e-05, "loss": 93.6955, "step": 38690 }, { "epoch": 0.1563528969727332, "grad_norm": 706.6978149414062, "learning_rate": 3.963520308057462e-05, "loss": 96.3812, "step": 38700 }, { "epoch": 0.15639329823810083, "grad_norm": 1077.8919677734375, "learning_rate": 3.96346719638057e-05, "loss": 55.8834, "step": 38710 }, { "epoch": 0.15643369950346844, "grad_norm": 2407.916748046875, "learning_rate": 3.963414046424874e-05, "loss": 59.8068, "step": 38720 }, { "epoch": 0.15647410076883608, "grad_norm": 1492.0802001953125, "learning_rate": 3.96336085819141e-05, "loss": 88.5377, "step": 38730 }, { "epoch": 0.15651450203420372, "grad_norm": 1077.629150390625, "learning_rate": 3.9633076316812155e-05, "loss": 86.7247, "step": 38740 }, { "epoch": 0.15655490329957134, "grad_norm": 468.01129150390625, "learning_rate": 3.9632543668953284e-05, "loss": 93.5541, "step": 38750 }, { "epoch": 0.15659530456493898, "grad_norm": 1229.2720947265625, "learning_rate": 3.9632010638347865e-05, "loss": 70.9075, "step": 38760 }, { "epoch": 0.15663570583030662, "grad_norm": 1041.6185302734375, "learning_rate": 3.9631477225006285e-05, "loss": 112.2093, "step": 38770 }, { "epoch": 0.15667610709567423, "grad_norm": 811.9631958007812, "learning_rate": 3.963094342893896e-05, "loss": 61.5546, "step": 38780 }, { "epoch": 0.15671650836104187, "grad_norm": 959.6410522460938, "learning_rate": 3.963040925015628e-05, "loss": 96.6481, "step": 38790 }, { "epoch": 0.1567569096264095, "grad_norm": 364.67559814453125, "learning_rate": 3.962987468866866e-05, "loss": 65.4956, "step": 38800 }, { "epoch": 0.15679731089177712, "grad_norm": 1041.04150390625, "learning_rate": 3.9629339744486534e-05, "loss": 81.0789, "step": 38810 }, { "epoch": 0.15683771215714476, "grad_norm": 1525.4169921875, "learning_rate": 3.962880441762032e-05, "loss": 116.3364, "step": 38820 }, { "epoch": 0.1568781134225124, "grad_norm": 820.9378662109375, "learning_rate": 3.962826870808046e-05, "loss": 102.4911, "step": 38830 }, { "epoch": 0.15691851468788, "grad_norm": 564.9578247070312, "learning_rate": 3.96277326158774e-05, "loss": 71.7791, "step": 38840 }, { "epoch": 0.15695891595324765, "grad_norm": 426.0314025878906, "learning_rate": 3.962719614102158e-05, "loss": 87.1656, "step": 38850 }, { "epoch": 0.1569993172186153, "grad_norm": 1478.522705078125, "learning_rate": 3.9626659283523475e-05, "loss": 75.4472, "step": 38860 }, { "epoch": 0.15703971848398293, "grad_norm": 519.1206665039062, "learning_rate": 3.9626122043393535e-05, "loss": 90.5628, "step": 38870 }, { "epoch": 0.15708011974935054, "grad_norm": 369.3790283203125, "learning_rate": 3.9625584420642245e-05, "loss": 83.9716, "step": 38880 }, { "epoch": 0.15712052101471818, "grad_norm": 951.3013916015625, "learning_rate": 3.962504641528009e-05, "loss": 65.3877, "step": 38890 }, { "epoch": 0.15716092228008582, "grad_norm": 523.937744140625, "learning_rate": 3.962450802731754e-05, "loss": 60.1187, "step": 38900 }, { "epoch": 0.15720132354545344, "grad_norm": 1172.847900390625, "learning_rate": 3.962396925676511e-05, "loss": 85.6999, "step": 38910 }, { "epoch": 0.15724172481082108, "grad_norm": 676.1847534179688, "learning_rate": 3.962343010363329e-05, "loss": 98.4259, "step": 38920 }, { "epoch": 0.15728212607618872, "grad_norm": 1541.6544189453125, "learning_rate": 3.9622890567932605e-05, "loss": 85.6831, "step": 38930 }, { "epoch": 0.15732252734155633, "grad_norm": 1251.5394287109375, "learning_rate": 3.962235064967356e-05, "loss": 114.6008, "step": 38940 }, { "epoch": 0.15736292860692397, "grad_norm": 858.481201171875, "learning_rate": 3.962181034886668e-05, "loss": 80.0586, "step": 38950 }, { "epoch": 0.1574033298722916, "grad_norm": 737.1397705078125, "learning_rate": 3.9621269665522516e-05, "loss": 85.137, "step": 38960 }, { "epoch": 0.15744373113765922, "grad_norm": 771.252685546875, "learning_rate": 3.9620728599651596e-05, "loss": 107.0303, "step": 38970 }, { "epoch": 0.15748413240302686, "grad_norm": 555.4755249023438, "learning_rate": 3.9620187151264474e-05, "loss": 94.9436, "step": 38980 }, { "epoch": 0.1575245336683945, "grad_norm": 2882.533203125, "learning_rate": 3.961964532037169e-05, "loss": 102.9361, "step": 38990 }, { "epoch": 0.1575649349337621, "grad_norm": 562.4039306640625, "learning_rate": 3.9619103106983835e-05, "loss": 123.3879, "step": 39000 }, { "epoch": 0.15760533619912975, "grad_norm": 1785.08740234375, "learning_rate": 3.961856051111146e-05, "loss": 98.0155, "step": 39010 }, { "epoch": 0.1576457374644974, "grad_norm": 763.029296875, "learning_rate": 3.961801753276514e-05, "loss": 41.8298, "step": 39020 }, { "epoch": 0.15768613872986503, "grad_norm": 1000.0222778320312, "learning_rate": 3.9617474171955475e-05, "loss": 94.5578, "step": 39030 }, { "epoch": 0.15772653999523265, "grad_norm": 334.7162780761719, "learning_rate": 3.961693042869305e-05, "loss": 86.058, "step": 39040 }, { "epoch": 0.15776694126060029, "grad_norm": 1190.070556640625, "learning_rate": 3.961638630298847e-05, "loss": 98.6637, "step": 39050 }, { "epoch": 0.15780734252596793, "grad_norm": 629.3197021484375, "learning_rate": 3.9615841794852336e-05, "loss": 83.3572, "step": 39060 }, { "epoch": 0.15784774379133554, "grad_norm": 460.8150329589844, "learning_rate": 3.9615296904295264e-05, "loss": 122.8573, "step": 39070 }, { "epoch": 0.15788814505670318, "grad_norm": 604.5160522460938, "learning_rate": 3.961475163132789e-05, "loss": 55.7113, "step": 39080 }, { "epoch": 0.15792854632207082, "grad_norm": 1380.890380859375, "learning_rate": 3.961420597596082e-05, "loss": 121.6424, "step": 39090 }, { "epoch": 0.15796894758743843, "grad_norm": 808.0025634765625, "learning_rate": 3.961365993820471e-05, "loss": 66.9209, "step": 39100 }, { "epoch": 0.15800934885280607, "grad_norm": 3276.390625, "learning_rate": 3.961311351807022e-05, "loss": 82.583, "step": 39110 }, { "epoch": 0.1580497501181737, "grad_norm": 2217.059326171875, "learning_rate": 3.961256671556796e-05, "loss": 89.6204, "step": 39120 }, { "epoch": 0.15809015138354132, "grad_norm": 1654.140380859375, "learning_rate": 3.961201953070863e-05, "loss": 64.8732, "step": 39130 }, { "epoch": 0.15813055264890896, "grad_norm": 532.6248168945312, "learning_rate": 3.961147196350288e-05, "loss": 98.7978, "step": 39140 }, { "epoch": 0.1581709539142766, "grad_norm": 809.4042358398438, "learning_rate": 3.9610924013961376e-05, "loss": 69.8393, "step": 39150 }, { "epoch": 0.15821135517964421, "grad_norm": 918.3334350585938, "learning_rate": 3.9610375682094824e-05, "loss": 77.7073, "step": 39160 }, { "epoch": 0.15825175644501185, "grad_norm": 0.0, "learning_rate": 3.96098269679139e-05, "loss": 61.9318, "step": 39170 }, { "epoch": 0.1582921577103795, "grad_norm": 401.17279052734375, "learning_rate": 3.96092778714293e-05, "loss": 114.9751, "step": 39180 }, { "epoch": 0.15833255897574713, "grad_norm": 731.6635131835938, "learning_rate": 3.9608728392651734e-05, "loss": 53.0921, "step": 39190 }, { "epoch": 0.15837296024111475, "grad_norm": 1872.025390625, "learning_rate": 3.960817853159192e-05, "loss": 80.1266, "step": 39200 }, { "epoch": 0.1584133615064824, "grad_norm": 764.140869140625, "learning_rate": 3.960762828826056e-05, "loss": 84.0326, "step": 39210 }, { "epoch": 0.15845376277185003, "grad_norm": 877.96484375, "learning_rate": 3.96070776626684e-05, "loss": 82.8266, "step": 39220 }, { "epoch": 0.15849416403721764, "grad_norm": 363.5792541503906, "learning_rate": 3.9606526654826154e-05, "loss": 79.819, "step": 39230 }, { "epoch": 0.15853456530258528, "grad_norm": 555.1956787109375, "learning_rate": 3.960597526474459e-05, "loss": 68.749, "step": 39240 }, { "epoch": 0.15857496656795292, "grad_norm": 543.0640869140625, "learning_rate": 3.9605423492434444e-05, "loss": 53.4244, "step": 39250 }, { "epoch": 0.15861536783332053, "grad_norm": 538.7412109375, "learning_rate": 3.9604871337906466e-05, "loss": 79.1591, "step": 39260 }, { "epoch": 0.15865576909868817, "grad_norm": 1329.620849609375, "learning_rate": 3.960431880117143e-05, "loss": 79.2483, "step": 39270 }, { "epoch": 0.1586961703640558, "grad_norm": 521.2180786132812, "learning_rate": 3.96037658822401e-05, "loss": 84.4006, "step": 39280 }, { "epoch": 0.15873657162942342, "grad_norm": 1234.4501953125, "learning_rate": 3.960321258112328e-05, "loss": 56.2984, "step": 39290 }, { "epoch": 0.15877697289479106, "grad_norm": 1980.7930908203125, "learning_rate": 3.960265889783173e-05, "loss": 89.2887, "step": 39300 }, { "epoch": 0.1588173741601587, "grad_norm": 1109.7982177734375, "learning_rate": 3.9602104832376244e-05, "loss": 67.8054, "step": 39310 }, { "epoch": 0.15885777542552632, "grad_norm": 1189.190185546875, "learning_rate": 3.960155038476764e-05, "loss": 72.3154, "step": 39320 }, { "epoch": 0.15889817669089396, "grad_norm": 687.5368041992188, "learning_rate": 3.9600995555016715e-05, "loss": 102.9218, "step": 39330 }, { "epoch": 0.1589385779562616, "grad_norm": 1048.0631103515625, "learning_rate": 3.960044034313429e-05, "loss": 79.893, "step": 39340 }, { "epoch": 0.15897897922162924, "grad_norm": 1082.3621826171875, "learning_rate": 3.9599884749131195e-05, "loss": 114.1062, "step": 39350 }, { "epoch": 0.15901938048699685, "grad_norm": 1691.8948974609375, "learning_rate": 3.9599328773018255e-05, "loss": 106.8196, "step": 39360 }, { "epoch": 0.1590597817523645, "grad_norm": 1052.7442626953125, "learning_rate": 3.9598772414806306e-05, "loss": 109.5173, "step": 39370 }, { "epoch": 0.15910018301773213, "grad_norm": 1856.539306640625, "learning_rate": 3.95982156745062e-05, "loss": 64.213, "step": 39380 }, { "epoch": 0.15914058428309974, "grad_norm": 652.4766845703125, "learning_rate": 3.95976585521288e-05, "loss": 99.9336, "step": 39390 }, { "epoch": 0.15918098554846738, "grad_norm": 677.7192993164062, "learning_rate": 3.959710104768494e-05, "loss": 85.7536, "step": 39400 }, { "epoch": 0.15922138681383502, "grad_norm": 232.5833740234375, "learning_rate": 3.9596543161185515e-05, "loss": 93.3603, "step": 39410 }, { "epoch": 0.15926178807920263, "grad_norm": 1655.010986328125, "learning_rate": 3.959598489264139e-05, "loss": 82.8985, "step": 39420 }, { "epoch": 0.15930218934457027, "grad_norm": 1230.93603515625, "learning_rate": 3.959542624206346e-05, "loss": 121.071, "step": 39430 }, { "epoch": 0.1593425906099379, "grad_norm": 397.9216003417969, "learning_rate": 3.9594867209462594e-05, "loss": 82.405, "step": 39440 }, { "epoch": 0.15938299187530552, "grad_norm": 816.2092895507812, "learning_rate": 3.959430779484971e-05, "loss": 98.675, "step": 39450 }, { "epoch": 0.15942339314067316, "grad_norm": 953.708740234375, "learning_rate": 3.9593747998235696e-05, "loss": 118.3295, "step": 39460 }, { "epoch": 0.1594637944060408, "grad_norm": 1187.2166748046875, "learning_rate": 3.9593187819631496e-05, "loss": 77.116, "step": 39470 }, { "epoch": 0.15950419567140842, "grad_norm": 1949.8765869140625, "learning_rate": 3.9592627259048e-05, "loss": 94.8703, "step": 39480 }, { "epoch": 0.15954459693677606, "grad_norm": 745.1921997070312, "learning_rate": 3.9592066316496155e-05, "loss": 76.3544, "step": 39490 }, { "epoch": 0.1595849982021437, "grad_norm": 570.1115112304688, "learning_rate": 3.959150499198688e-05, "loss": 69.0455, "step": 39500 }, { "epoch": 0.1596253994675113, "grad_norm": 1056.903076171875, "learning_rate": 3.9590943285531146e-05, "loss": 80.1547, "step": 39510 }, { "epoch": 0.15966580073287895, "grad_norm": 982.5983276367188, "learning_rate": 3.959038119713987e-05, "loss": 63.0331, "step": 39520 }, { "epoch": 0.1597062019982466, "grad_norm": 689.6868896484375, "learning_rate": 3.958981872682404e-05, "loss": 103.1534, "step": 39530 }, { "epoch": 0.15974660326361423, "grad_norm": 688.78564453125, "learning_rate": 3.95892558745946e-05, "loss": 77.986, "step": 39540 }, { "epoch": 0.15978700452898184, "grad_norm": 590.9727783203125, "learning_rate": 3.958869264046253e-05, "loss": 100.1267, "step": 39550 }, { "epoch": 0.15982740579434948, "grad_norm": 833.3731079101562, "learning_rate": 3.958812902443882e-05, "loss": 88.936, "step": 39560 }, { "epoch": 0.15986780705971712, "grad_norm": 529.066650390625, "learning_rate": 3.958756502653444e-05, "loss": 86.5306, "step": 39570 }, { "epoch": 0.15990820832508473, "grad_norm": 2491.70166015625, "learning_rate": 3.95870006467604e-05, "loss": 113.7662, "step": 39580 }, { "epoch": 0.15994860959045237, "grad_norm": 890.7277221679688, "learning_rate": 3.9586435885127705e-05, "loss": 109.6661, "step": 39590 }, { "epoch": 0.15998901085582, "grad_norm": 2090.965576171875, "learning_rate": 3.958587074164735e-05, "loss": 66.3457, "step": 39600 }, { "epoch": 0.16002941212118763, "grad_norm": 971.6329956054688, "learning_rate": 3.958530521633036e-05, "loss": 92.5265, "step": 39610 }, { "epoch": 0.16006981338655527, "grad_norm": 377.7248840332031, "learning_rate": 3.958473930918777e-05, "loss": 96.4538, "step": 39620 }, { "epoch": 0.1601102146519229, "grad_norm": 472.4529724121094, "learning_rate": 3.95841730202306e-05, "loss": 117.1553, "step": 39630 }, { "epoch": 0.16015061591729052, "grad_norm": 5134.74853515625, "learning_rate": 3.958360634946989e-05, "loss": 120.4135, "step": 39640 }, { "epoch": 0.16019101718265816, "grad_norm": 1356.9932861328125, "learning_rate": 3.9583039296916704e-05, "loss": 89.7805, "step": 39650 }, { "epoch": 0.1602314184480258, "grad_norm": 4443.1279296875, "learning_rate": 3.958247186258208e-05, "loss": 108.7569, "step": 39660 }, { "epoch": 0.1602718197133934, "grad_norm": 782.8939208984375, "learning_rate": 3.9581904046477076e-05, "loss": 78.7764, "step": 39670 }, { "epoch": 0.16031222097876105, "grad_norm": 736.0662841796875, "learning_rate": 3.958133584861278e-05, "loss": 76.8847, "step": 39680 }, { "epoch": 0.1603526222441287, "grad_norm": 458.4825744628906, "learning_rate": 3.958076726900026e-05, "loss": 77.1281, "step": 39690 }, { "epoch": 0.16039302350949633, "grad_norm": 786.83740234375, "learning_rate": 3.95801983076506e-05, "loss": 54.9982, "step": 39700 }, { "epoch": 0.16043342477486394, "grad_norm": 1158.18017578125, "learning_rate": 3.957962896457489e-05, "loss": 71.0925, "step": 39710 }, { "epoch": 0.16047382604023158, "grad_norm": 844.1268310546875, "learning_rate": 3.957905923978424e-05, "loss": 118.5867, "step": 39720 }, { "epoch": 0.16051422730559922, "grad_norm": 1144.5777587890625, "learning_rate": 3.9578489133289745e-05, "loss": 75.6427, "step": 39730 }, { "epoch": 0.16055462857096683, "grad_norm": 2612.0439453125, "learning_rate": 3.9577918645102524e-05, "loss": 96.4201, "step": 39740 }, { "epoch": 0.16059502983633447, "grad_norm": 1410.67724609375, "learning_rate": 3.9577347775233705e-05, "loss": 108.2059, "step": 39750 }, { "epoch": 0.16063543110170211, "grad_norm": 730.7840576171875, "learning_rate": 3.957677652369441e-05, "loss": 75.2652, "step": 39760 }, { "epoch": 0.16067583236706973, "grad_norm": 1091.61376953125, "learning_rate": 3.957620489049577e-05, "loss": 109.8948, "step": 39770 }, { "epoch": 0.16071623363243737, "grad_norm": 3437.56884765625, "learning_rate": 3.957563287564895e-05, "loss": 105.7026, "step": 39780 }, { "epoch": 0.160756634897805, "grad_norm": 1045.112548828125, "learning_rate": 3.957506047916508e-05, "loss": 54.9407, "step": 39790 }, { "epoch": 0.16079703616317262, "grad_norm": 2067.13623046875, "learning_rate": 3.9574487701055326e-05, "loss": 84.4046, "step": 39800 }, { "epoch": 0.16083743742854026, "grad_norm": 868.0408935546875, "learning_rate": 3.9573914541330865e-05, "loss": 75.3392, "step": 39810 }, { "epoch": 0.1608778386939079, "grad_norm": 946.1988525390625, "learning_rate": 3.957334100000286e-05, "loss": 79.963, "step": 39820 }, { "epoch": 0.1609182399592755, "grad_norm": 951.0245361328125, "learning_rate": 3.95727670770825e-05, "loss": 88.2257, "step": 39830 }, { "epoch": 0.16095864122464315, "grad_norm": 780.5066528320312, "learning_rate": 3.957219277258096e-05, "loss": 111.4322, "step": 39840 }, { "epoch": 0.1609990424900108, "grad_norm": 1591.3392333984375, "learning_rate": 3.957161808650944e-05, "loss": 66.2167, "step": 39850 }, { "epoch": 0.16103944375537843, "grad_norm": 658.369140625, "learning_rate": 3.957104301887916e-05, "loss": 58.5095, "step": 39860 }, { "epoch": 0.16107984502074604, "grad_norm": 904.9728393554688, "learning_rate": 3.957046756970132e-05, "loss": 98.7982, "step": 39870 }, { "epoch": 0.16112024628611368, "grad_norm": 531.2388916015625, "learning_rate": 3.9569891738987136e-05, "loss": 74.5442, "step": 39880 }, { "epoch": 0.16116064755148132, "grad_norm": 551.3512573242188, "learning_rate": 3.9569315526747843e-05, "loss": 86.7883, "step": 39890 }, { "epoch": 0.16120104881684894, "grad_norm": 638.0982055664062, "learning_rate": 3.9568738932994665e-05, "loss": 89.8119, "step": 39900 }, { "epoch": 0.16124145008221658, "grad_norm": 642.2186279296875, "learning_rate": 3.9568161957738844e-05, "loss": 107.5101, "step": 39910 }, { "epoch": 0.16128185134758422, "grad_norm": 563.9918212890625, "learning_rate": 3.9567584600991635e-05, "loss": 82.3985, "step": 39920 }, { "epoch": 0.16132225261295183, "grad_norm": 1653.6661376953125, "learning_rate": 3.9567006862764286e-05, "loss": 126.2073, "step": 39930 }, { "epoch": 0.16136265387831947, "grad_norm": 491.9596862792969, "learning_rate": 3.9566428743068074e-05, "loss": 78.9053, "step": 39940 }, { "epoch": 0.1614030551436871, "grad_norm": 1005.4078979492188, "learning_rate": 3.9565850241914246e-05, "loss": 117.193, "step": 39950 }, { "epoch": 0.16144345640905472, "grad_norm": 2084.00634765625, "learning_rate": 3.9565271359314107e-05, "loss": 105.7904, "step": 39960 }, { "epoch": 0.16148385767442236, "grad_norm": 1149.814453125, "learning_rate": 3.9564692095278924e-05, "loss": 133.7218, "step": 39970 }, { "epoch": 0.16152425893979, "grad_norm": 1500.60107421875, "learning_rate": 3.956411244981999e-05, "loss": 106.1676, "step": 39980 }, { "epoch": 0.1615646602051576, "grad_norm": 530.439208984375, "learning_rate": 3.9563532422948625e-05, "loss": 114.8235, "step": 39990 }, { "epoch": 0.16160506147052525, "grad_norm": 766.109619140625, "learning_rate": 3.9562952014676116e-05, "loss": 86.8458, "step": 40000 }, { "epoch": 0.1616454627358929, "grad_norm": 495.8179016113281, "learning_rate": 3.956237122501379e-05, "loss": 62.4111, "step": 40010 }, { "epoch": 0.16168586400126053, "grad_norm": 634.97412109375, "learning_rate": 3.956179005397296e-05, "loss": 82.6757, "step": 40020 }, { "epoch": 0.16172626526662814, "grad_norm": 928.5135498046875, "learning_rate": 3.956120850156496e-05, "loss": 76.7512, "step": 40030 }, { "epoch": 0.16176666653199578, "grad_norm": 909.52294921875, "learning_rate": 3.9560626567801136e-05, "loss": 79.9613, "step": 40040 }, { "epoch": 0.16180706779736342, "grad_norm": 980.2033081054688, "learning_rate": 3.9560044252692826e-05, "loss": 104.3447, "step": 40050 }, { "epoch": 0.16184746906273104, "grad_norm": 2851.51611328125, "learning_rate": 3.955946155625138e-05, "loss": 88.5099, "step": 40060 }, { "epoch": 0.16188787032809868, "grad_norm": 445.9934387207031, "learning_rate": 3.955887847848816e-05, "loss": 66.0215, "step": 40070 }, { "epoch": 0.16192827159346632, "grad_norm": 1758.5360107421875, "learning_rate": 3.9558295019414534e-05, "loss": 82.5284, "step": 40080 }, { "epoch": 0.16196867285883393, "grad_norm": 831.5187377929688, "learning_rate": 3.9557711179041887e-05, "loss": 70.7222, "step": 40090 }, { "epoch": 0.16200907412420157, "grad_norm": 2261.695556640625, "learning_rate": 3.955712695738158e-05, "loss": 78.2804, "step": 40100 }, { "epoch": 0.1620494753895692, "grad_norm": 720.5111694335938, "learning_rate": 3.955654235444502e-05, "loss": 74.9033, "step": 40110 }, { "epoch": 0.16208987665493682, "grad_norm": 484.18115234375, "learning_rate": 3.95559573702436e-05, "loss": 96.4327, "step": 40120 }, { "epoch": 0.16213027792030446, "grad_norm": 1393.7919921875, "learning_rate": 3.955537200478872e-05, "loss": 101.8992, "step": 40130 }, { "epoch": 0.1621706791856721, "grad_norm": 627.5079345703125, "learning_rate": 3.955478625809179e-05, "loss": 122.9893, "step": 40140 }, { "epoch": 0.1622110804510397, "grad_norm": 695.4521484375, "learning_rate": 3.955420013016424e-05, "loss": 81.3099, "step": 40150 }, { "epoch": 0.16225148171640735, "grad_norm": 1384.3963623046875, "learning_rate": 3.9553613621017495e-05, "loss": 112.9681, "step": 40160 }, { "epoch": 0.162291882981775, "grad_norm": 823.0811767578125, "learning_rate": 3.955302673066298e-05, "loss": 100.5564, "step": 40170 }, { "epoch": 0.16233228424714263, "grad_norm": 690.856201171875, "learning_rate": 3.955243945911214e-05, "loss": 89.7016, "step": 40180 }, { "epoch": 0.16237268551251025, "grad_norm": 1174.603759765625, "learning_rate": 3.955185180637643e-05, "loss": 79.1691, "step": 40190 }, { "epoch": 0.16241308677787789, "grad_norm": 641.5059814453125, "learning_rate": 3.955126377246731e-05, "loss": 106.7659, "step": 40200 }, { "epoch": 0.16245348804324553, "grad_norm": 3953.796630859375, "learning_rate": 3.955067535739623e-05, "loss": 97.4286, "step": 40210 }, { "epoch": 0.16249388930861314, "grad_norm": 560.7224731445312, "learning_rate": 3.955008656117467e-05, "loss": 83.4753, "step": 40220 }, { "epoch": 0.16253429057398078, "grad_norm": 577.6102294921875, "learning_rate": 3.9549497383814105e-05, "loss": 64.1252, "step": 40230 }, { "epoch": 0.16257469183934842, "grad_norm": 825.94677734375, "learning_rate": 3.954890782532602e-05, "loss": 80.1885, "step": 40240 }, { "epoch": 0.16261509310471603, "grad_norm": 973.9566040039062, "learning_rate": 3.9548317885721925e-05, "loss": 60.6026, "step": 40250 }, { "epoch": 0.16265549437008367, "grad_norm": 384.3602600097656, "learning_rate": 3.9547727565013295e-05, "loss": 78.6402, "step": 40260 }, { "epoch": 0.1626958956354513, "grad_norm": 817.73388671875, "learning_rate": 3.954713686321166e-05, "loss": 165.5563, "step": 40270 }, { "epoch": 0.16273629690081892, "grad_norm": 450.3658447265625, "learning_rate": 3.954654578032853e-05, "loss": 120.2511, "step": 40280 }, { "epoch": 0.16277669816618656, "grad_norm": 239.74362182617188, "learning_rate": 3.954595431637542e-05, "loss": 64.2798, "step": 40290 }, { "epoch": 0.1628170994315542, "grad_norm": 980.2670288085938, "learning_rate": 3.954536247136387e-05, "loss": 77.4414, "step": 40300 }, { "epoch": 0.16285750069692181, "grad_norm": 2538.709228515625, "learning_rate": 3.954477024530542e-05, "loss": 121.8563, "step": 40310 }, { "epoch": 0.16289790196228945, "grad_norm": 1247.4947509765625, "learning_rate": 3.954417763821161e-05, "loss": 80.4332, "step": 40320 }, { "epoch": 0.1629383032276571, "grad_norm": 730.9105834960938, "learning_rate": 3.9543584650093994e-05, "loss": 140.6749, "step": 40330 }, { "epoch": 0.16297870449302473, "grad_norm": 1664.7508544921875, "learning_rate": 3.954299128096413e-05, "loss": 109.0711, "step": 40340 }, { "epoch": 0.16301910575839235, "grad_norm": 770.0411987304688, "learning_rate": 3.95423975308336e-05, "loss": 62.409, "step": 40350 }, { "epoch": 0.16305950702376, "grad_norm": 733.1558227539062, "learning_rate": 3.9541803399713956e-05, "loss": 97.6732, "step": 40360 }, { "epoch": 0.16309990828912763, "grad_norm": 1060.8912353515625, "learning_rate": 3.9541208887616805e-05, "loss": 69.0565, "step": 40370 }, { "epoch": 0.16314030955449524, "grad_norm": 356.4098815917969, "learning_rate": 3.954061399455372e-05, "loss": 105.4898, "step": 40380 }, { "epoch": 0.16318071081986288, "grad_norm": 504.13482666015625, "learning_rate": 3.95400187205363e-05, "loss": 61.4338, "step": 40390 }, { "epoch": 0.16322111208523052, "grad_norm": 682.7866821289062, "learning_rate": 3.9539423065576165e-05, "loss": 115.8884, "step": 40400 }, { "epoch": 0.16326151335059813, "grad_norm": 737.2675170898438, "learning_rate": 3.9538827029684916e-05, "loss": 55.9793, "step": 40410 }, { "epoch": 0.16330191461596577, "grad_norm": 924.0701293945312, "learning_rate": 3.9538230612874174e-05, "loss": 103.0369, "step": 40420 }, { "epoch": 0.1633423158813334, "grad_norm": 1058.978759765625, "learning_rate": 3.953763381515556e-05, "loss": 103.6735, "step": 40430 }, { "epoch": 0.16338271714670102, "grad_norm": 1186.2822265625, "learning_rate": 3.953703663654072e-05, "loss": 106.1324, "step": 40440 }, { "epoch": 0.16342311841206866, "grad_norm": 825.0392456054688, "learning_rate": 3.95364390770413e-05, "loss": 81.3327, "step": 40450 }, { "epoch": 0.1634635196774363, "grad_norm": 584.6072998046875, "learning_rate": 3.9535841136668936e-05, "loss": 76.001, "step": 40460 }, { "epoch": 0.16350392094280392, "grad_norm": 915.310791015625, "learning_rate": 3.953524281543529e-05, "loss": 82.7482, "step": 40470 }, { "epoch": 0.16354432220817156, "grad_norm": 1052.021240234375, "learning_rate": 3.9534644113352036e-05, "loss": 86.3224, "step": 40480 }, { "epoch": 0.1635847234735392, "grad_norm": 688.9476928710938, "learning_rate": 3.953404503043083e-05, "loss": 79.8042, "step": 40490 }, { "epoch": 0.16362512473890684, "grad_norm": 2522.06787109375, "learning_rate": 3.9533445566683364e-05, "loss": 120.3851, "step": 40500 }, { "epoch": 0.16366552600427445, "grad_norm": 531.6077880859375, "learning_rate": 3.9532845722121315e-05, "loss": 41.9876, "step": 40510 }, { "epoch": 0.1637059272696421, "grad_norm": 696.3869018554688, "learning_rate": 3.953224549675638e-05, "loss": 94.6229, "step": 40520 }, { "epoch": 0.16374632853500973, "grad_norm": 724.3345336914062, "learning_rate": 3.9531644890600276e-05, "loss": 99.2432, "step": 40530 }, { "epoch": 0.16378672980037734, "grad_norm": 1059.6839599609375, "learning_rate": 3.953104390366469e-05, "loss": 71.4181, "step": 40540 }, { "epoch": 0.16382713106574498, "grad_norm": 774.9991455078125, "learning_rate": 3.953044253596135e-05, "loss": 80.0983, "step": 40550 }, { "epoch": 0.16386753233111262, "grad_norm": 491.90277099609375, "learning_rate": 3.952984078750198e-05, "loss": 90.0716, "step": 40560 }, { "epoch": 0.16390793359648023, "grad_norm": 814.5154418945312, "learning_rate": 3.9529238658298304e-05, "loss": 81.0831, "step": 40570 }, { "epoch": 0.16394833486184787, "grad_norm": 980.4241943359375, "learning_rate": 3.952863614836207e-05, "loss": 62.4222, "step": 40580 }, { "epoch": 0.1639887361272155, "grad_norm": 2784.486572265625, "learning_rate": 3.952803325770501e-05, "loss": 108.2839, "step": 40590 }, { "epoch": 0.16402913739258312, "grad_norm": 1452.8868408203125, "learning_rate": 3.95274299863389e-05, "loss": 82.7274, "step": 40600 }, { "epoch": 0.16406953865795076, "grad_norm": 523.1763916015625, "learning_rate": 3.952682633427548e-05, "loss": 72.2552, "step": 40610 }, { "epoch": 0.1641099399233184, "grad_norm": 1814.376220703125, "learning_rate": 3.952622230152654e-05, "loss": 104.2498, "step": 40620 }, { "epoch": 0.16415034118868602, "grad_norm": 1456.19140625, "learning_rate": 3.952561788810384e-05, "loss": 111.9931, "step": 40630 }, { "epoch": 0.16419074245405366, "grad_norm": 418.4021911621094, "learning_rate": 3.952501309401916e-05, "loss": 82.0589, "step": 40640 }, { "epoch": 0.1642311437194213, "grad_norm": 1160.2156982421875, "learning_rate": 3.95244079192843e-05, "loss": 131.2082, "step": 40650 }, { "epoch": 0.16427154498478894, "grad_norm": 979.8446044921875, "learning_rate": 3.952380236391106e-05, "loss": 89.2304, "step": 40660 }, { "epoch": 0.16431194625015655, "grad_norm": 1009.5330200195312, "learning_rate": 3.952319642791124e-05, "loss": 54.3505, "step": 40670 }, { "epoch": 0.1643523475155242, "grad_norm": 515.9607543945312, "learning_rate": 3.9522590111296646e-05, "loss": 86.7991, "step": 40680 }, { "epoch": 0.16439274878089183, "grad_norm": 958.9117431640625, "learning_rate": 3.952198341407911e-05, "loss": 94.9694, "step": 40690 }, { "epoch": 0.16443315004625944, "grad_norm": 973.7232055664062, "learning_rate": 3.9521376336270466e-05, "loss": 101.5517, "step": 40700 }, { "epoch": 0.16447355131162708, "grad_norm": 2472.23193359375, "learning_rate": 3.952076887788253e-05, "loss": 113.2867, "step": 40710 }, { "epoch": 0.16451395257699472, "grad_norm": 1117.849609375, "learning_rate": 3.952016103892716e-05, "loss": 63.5003, "step": 40720 }, { "epoch": 0.16455435384236233, "grad_norm": 427.4268493652344, "learning_rate": 3.95195528194162e-05, "loss": 112.6997, "step": 40730 }, { "epoch": 0.16459475510772997, "grad_norm": 227.0601806640625, "learning_rate": 3.951894421936151e-05, "loss": 82.3097, "step": 40740 }, { "epoch": 0.1646351563730976, "grad_norm": 3456.140625, "learning_rate": 3.951833523877495e-05, "loss": 101.9074, "step": 40750 }, { "epoch": 0.16467555763846523, "grad_norm": 1029.93603515625, "learning_rate": 3.95177258776684e-05, "loss": 80.9789, "step": 40760 }, { "epoch": 0.16471595890383287, "grad_norm": 983.2781372070312, "learning_rate": 3.951711613605374e-05, "loss": 66.6682, "step": 40770 }, { "epoch": 0.1647563601692005, "grad_norm": 1014.9578247070312, "learning_rate": 3.9516506013942836e-05, "loss": 89.8433, "step": 40780 }, { "epoch": 0.16479676143456812, "grad_norm": 900.0442504882812, "learning_rate": 3.951589551134761e-05, "loss": 98.0265, "step": 40790 }, { "epoch": 0.16483716269993576, "grad_norm": 759.8152465820312, "learning_rate": 3.9515284628279954e-05, "loss": 50.9888, "step": 40800 }, { "epoch": 0.1648775639653034, "grad_norm": 822.179443359375, "learning_rate": 3.9514673364751776e-05, "loss": 107.9123, "step": 40810 }, { "epoch": 0.16491796523067104, "grad_norm": 710.6921997070312, "learning_rate": 3.9514061720775e-05, "loss": 117.4759, "step": 40820 }, { "epoch": 0.16495836649603865, "grad_norm": 818.1751098632812, "learning_rate": 3.9513449696361535e-05, "loss": 80.6732, "step": 40830 }, { "epoch": 0.1649987677614063, "grad_norm": 1064.0673828125, "learning_rate": 3.951283729152332e-05, "loss": 76.4911, "step": 40840 }, { "epoch": 0.16503916902677393, "grad_norm": 662.7578735351562, "learning_rate": 3.951222450627231e-05, "loss": 48.1669, "step": 40850 }, { "epoch": 0.16507957029214154, "grad_norm": 545.164794921875, "learning_rate": 3.951161134062042e-05, "loss": 90.6871, "step": 40860 }, { "epoch": 0.16511997155750918, "grad_norm": 1219.935302734375, "learning_rate": 3.951099779457963e-05, "loss": 56.5449, "step": 40870 }, { "epoch": 0.16516037282287682, "grad_norm": 988.223876953125, "learning_rate": 3.95103838681619e-05, "loss": 90.2754, "step": 40880 }, { "epoch": 0.16520077408824443, "grad_norm": 719.2081298828125, "learning_rate": 3.9509769561379184e-05, "loss": 89.6676, "step": 40890 }, { "epoch": 0.16524117535361207, "grad_norm": 874.89404296875, "learning_rate": 3.9509154874243466e-05, "loss": 59.4812, "step": 40900 }, { "epoch": 0.16528157661897971, "grad_norm": 701.1567993164062, "learning_rate": 3.950853980676673e-05, "loss": 98.7121, "step": 40910 }, { "epoch": 0.16532197788434733, "grad_norm": 1197.994140625, "learning_rate": 3.950792435896097e-05, "loss": 112.145, "step": 40920 }, { "epoch": 0.16536237914971497, "grad_norm": 4444.24267578125, "learning_rate": 3.950730853083818e-05, "loss": 104.277, "step": 40930 }, { "epoch": 0.1654027804150826, "grad_norm": 1147.2232666015625, "learning_rate": 3.950669232241036e-05, "loss": 132.6901, "step": 40940 }, { "epoch": 0.16544318168045022, "grad_norm": 1110.708984375, "learning_rate": 3.950607573368954e-05, "loss": 59.1241, "step": 40950 }, { "epoch": 0.16548358294581786, "grad_norm": 1008.7105712890625, "learning_rate": 3.950545876468773e-05, "loss": 75.2638, "step": 40960 }, { "epoch": 0.1655239842111855, "grad_norm": 2469.8623046875, "learning_rate": 3.9504841415416955e-05, "loss": 85.3266, "step": 40970 }, { "epoch": 0.16556438547655314, "grad_norm": 1274.08447265625, "learning_rate": 3.950422368588926e-05, "loss": 73.8704, "step": 40980 }, { "epoch": 0.16560478674192075, "grad_norm": 1128.9522705078125, "learning_rate": 3.950360557611668e-05, "loss": 73.6916, "step": 40990 }, { "epoch": 0.1656451880072884, "grad_norm": 491.25958251953125, "learning_rate": 3.950298708611127e-05, "loss": 76.6751, "step": 41000 }, { "epoch": 0.16568558927265603, "grad_norm": 625.8621826171875, "learning_rate": 3.950236821588508e-05, "loss": 80.7306, "step": 41010 }, { "epoch": 0.16572599053802364, "grad_norm": 629.7647094726562, "learning_rate": 3.9501748965450186e-05, "loss": 75.1716, "step": 41020 }, { "epoch": 0.16576639180339128, "grad_norm": 1378.2105712890625, "learning_rate": 3.950112933481866e-05, "loss": 73.2715, "step": 41030 }, { "epoch": 0.16580679306875892, "grad_norm": 571.427734375, "learning_rate": 3.950050932400257e-05, "loss": 91.563, "step": 41040 }, { "epoch": 0.16584719433412654, "grad_norm": 890.5443115234375, "learning_rate": 3.949988893301401e-05, "loss": 125.3012, "step": 41050 }, { "epoch": 0.16588759559949418, "grad_norm": 1563.917236328125, "learning_rate": 3.9499268161865085e-05, "loss": 65.5101, "step": 41060 }, { "epoch": 0.16592799686486182, "grad_norm": 614.747802734375, "learning_rate": 3.949864701056788e-05, "loss": 87.1772, "step": 41070 }, { "epoch": 0.16596839813022943, "grad_norm": 642.1237182617188, "learning_rate": 3.9498025479134516e-05, "loss": 77.2061, "step": 41080 }, { "epoch": 0.16600879939559707, "grad_norm": 613.6666259765625, "learning_rate": 3.9497403567577114e-05, "loss": 115.0317, "step": 41090 }, { "epoch": 0.1660492006609647, "grad_norm": 468.08660888671875, "learning_rate": 3.949678127590778e-05, "loss": 74.2069, "step": 41100 }, { "epoch": 0.16608960192633232, "grad_norm": 513.4039306640625, "learning_rate": 3.949615860413866e-05, "loss": 60.0802, "step": 41110 }, { "epoch": 0.16613000319169996, "grad_norm": 931.3645629882812, "learning_rate": 3.94955355522819e-05, "loss": 143.4764, "step": 41120 }, { "epoch": 0.1661704044570676, "grad_norm": 2047.0240478515625, "learning_rate": 3.9494912120349626e-05, "loss": 55.3324, "step": 41130 }, { "epoch": 0.16621080572243524, "grad_norm": 533.386474609375, "learning_rate": 3.949428830835401e-05, "loss": 55.0412, "step": 41140 }, { "epoch": 0.16625120698780285, "grad_norm": 620.4298095703125, "learning_rate": 3.9493664116307204e-05, "loss": 95.4496, "step": 41150 }, { "epoch": 0.1662916082531705, "grad_norm": 641.894287109375, "learning_rate": 3.9493039544221375e-05, "loss": 103.0947, "step": 41160 }, { "epoch": 0.16633200951853813, "grad_norm": 677.8861694335938, "learning_rate": 3.949241459210871e-05, "loss": 66.4296, "step": 41170 }, { "epoch": 0.16637241078390574, "grad_norm": 683.670654296875, "learning_rate": 3.949178925998139e-05, "loss": 123.1877, "step": 41180 }, { "epoch": 0.16641281204927338, "grad_norm": 448.2669372558594, "learning_rate": 3.9491163547851604e-05, "loss": 79.1134, "step": 41190 }, { "epoch": 0.16645321331464102, "grad_norm": 6343.84619140625, "learning_rate": 3.949053745573155e-05, "loss": 127.7989, "step": 41200 }, { "epoch": 0.16649361458000864, "grad_norm": 1064.6607666015625, "learning_rate": 3.9489910983633426e-05, "loss": 114.2296, "step": 41210 }, { "epoch": 0.16653401584537628, "grad_norm": 980.7378540039062, "learning_rate": 3.9489284131569456e-05, "loss": 85.9728, "step": 41220 }, { "epoch": 0.16657441711074392, "grad_norm": 480.8191833496094, "learning_rate": 3.948865689955186e-05, "loss": 71.3556, "step": 41230 }, { "epoch": 0.16661481837611153, "grad_norm": 1177.986083984375, "learning_rate": 3.948802928759287e-05, "loss": 83.3197, "step": 41240 }, { "epoch": 0.16665521964147917, "grad_norm": 865.0421752929688, "learning_rate": 3.948740129570471e-05, "loss": 76.694, "step": 41250 }, { "epoch": 0.1666956209068468, "grad_norm": 835.8969116210938, "learning_rate": 3.948677292389963e-05, "loss": 74.6189, "step": 41260 }, { "epoch": 0.16673602217221442, "grad_norm": 474.4497375488281, "learning_rate": 3.948614417218988e-05, "loss": 81.0856, "step": 41270 }, { "epoch": 0.16677642343758206, "grad_norm": 2981.888671875, "learning_rate": 3.948551504058771e-05, "loss": 175.6042, "step": 41280 }, { "epoch": 0.1668168247029497, "grad_norm": 615.6951904296875, "learning_rate": 3.94848855291054e-05, "loss": 89.9799, "step": 41290 }, { "epoch": 0.16685722596831734, "grad_norm": 1236.52587890625, "learning_rate": 3.948425563775521e-05, "loss": 110.5213, "step": 41300 }, { "epoch": 0.16689762723368495, "grad_norm": 926.8430786132812, "learning_rate": 3.948362536654943e-05, "loss": 81.6385, "step": 41310 }, { "epoch": 0.1669380284990526, "grad_norm": 606.0703125, "learning_rate": 3.948299471550034e-05, "loss": 82.6857, "step": 41320 }, { "epoch": 0.16697842976442023, "grad_norm": 1497.1026611328125, "learning_rate": 3.9482363684620247e-05, "loss": 109.4494, "step": 41330 }, { "epoch": 0.16701883102978785, "grad_norm": 1270.447021484375, "learning_rate": 3.9481732273921435e-05, "loss": 101.7404, "step": 41340 }, { "epoch": 0.16705923229515549, "grad_norm": 1176.3333740234375, "learning_rate": 3.948110048341622e-05, "loss": 92.2997, "step": 41350 }, { "epoch": 0.16709963356052313, "grad_norm": 2200.978759765625, "learning_rate": 3.9480468313116925e-05, "loss": 121.3717, "step": 41360 }, { "epoch": 0.16714003482589074, "grad_norm": 577.7105712890625, "learning_rate": 3.947983576303587e-05, "loss": 74.5596, "step": 41370 }, { "epoch": 0.16718043609125838, "grad_norm": 949.1942749023438, "learning_rate": 3.947920283318539e-05, "loss": 83.1184, "step": 41380 }, { "epoch": 0.16722083735662602, "grad_norm": 1375.0670166015625, "learning_rate": 3.947856952357782e-05, "loss": 116.7619, "step": 41390 }, { "epoch": 0.16726123862199363, "grad_norm": 467.704345703125, "learning_rate": 3.9477935834225503e-05, "loss": 96.9709, "step": 41400 }, { "epoch": 0.16730163988736127, "grad_norm": 944.2130126953125, "learning_rate": 3.947730176514081e-05, "loss": 82.2528, "step": 41410 }, { "epoch": 0.1673420411527289, "grad_norm": 791.8900756835938, "learning_rate": 3.947666731633609e-05, "loss": 71.4929, "step": 41420 }, { "epoch": 0.16738244241809652, "grad_norm": 557.81884765625, "learning_rate": 3.947603248782371e-05, "loss": 76.7843, "step": 41430 }, { "epoch": 0.16742284368346416, "grad_norm": 681.4508666992188, "learning_rate": 3.947539727961605e-05, "loss": 91.5363, "step": 41440 }, { "epoch": 0.1674632449488318, "grad_norm": 1442.1290283203125, "learning_rate": 3.947476169172549e-05, "loss": 93.9871, "step": 41450 }, { "epoch": 0.16750364621419944, "grad_norm": 483.75927734375, "learning_rate": 3.947412572416443e-05, "loss": 65.8295, "step": 41460 }, { "epoch": 0.16754404747956705, "grad_norm": 5170.841796875, "learning_rate": 3.947348937694526e-05, "loss": 119.9971, "step": 41470 }, { "epoch": 0.1675844487449347, "grad_norm": 0.0, "learning_rate": 3.947285265008039e-05, "loss": 72.5246, "step": 41480 }, { "epoch": 0.16762485001030233, "grad_norm": 832.8370361328125, "learning_rate": 3.9472215543582234e-05, "loss": 66.7058, "step": 41490 }, { "epoch": 0.16766525127566995, "grad_norm": 1178.047607421875, "learning_rate": 3.9471578057463206e-05, "loss": 82.1243, "step": 41500 }, { "epoch": 0.1677056525410376, "grad_norm": 1412.372314453125, "learning_rate": 3.9470940191735745e-05, "loss": 69.2213, "step": 41510 }, { "epoch": 0.16774605380640523, "grad_norm": 945.0978393554688, "learning_rate": 3.947030194641228e-05, "loss": 79.4089, "step": 41520 }, { "epoch": 0.16778645507177284, "grad_norm": 1217.2481689453125, "learning_rate": 3.946966332150525e-05, "loss": 116.153, "step": 41530 }, { "epoch": 0.16782685633714048, "grad_norm": 883.7046508789062, "learning_rate": 3.9469024317027115e-05, "loss": 78.1752, "step": 41540 }, { "epoch": 0.16786725760250812, "grad_norm": 528.1052856445312, "learning_rate": 3.9468384932990324e-05, "loss": 93.4256, "step": 41550 }, { "epoch": 0.16790765886787573, "grad_norm": 972.721923828125, "learning_rate": 3.9467745169407346e-05, "loss": 104.534, "step": 41560 }, { "epoch": 0.16794806013324337, "grad_norm": 2014.378173828125, "learning_rate": 3.946710502629065e-05, "loss": 113.3028, "step": 41570 }, { "epoch": 0.167988461398611, "grad_norm": 585.617431640625, "learning_rate": 3.946646450365273e-05, "loss": 97.4658, "step": 41580 }, { "epoch": 0.16802886266397862, "grad_norm": 1128.84521484375, "learning_rate": 3.9465823601506055e-05, "loss": 83.0689, "step": 41590 }, { "epoch": 0.16806926392934626, "grad_norm": 1114.514404296875, "learning_rate": 3.946518231986313e-05, "loss": 78.9807, "step": 41600 }, { "epoch": 0.1681096651947139, "grad_norm": 738.8482055664062, "learning_rate": 3.946454065873645e-05, "loss": 112.8025, "step": 41610 }, { "epoch": 0.16815006646008154, "grad_norm": 786.415771484375, "learning_rate": 3.946389861813854e-05, "loss": 84.9417, "step": 41620 }, { "epoch": 0.16819046772544916, "grad_norm": 293.24774169921875, "learning_rate": 3.946325619808189e-05, "loss": 89.6034, "step": 41630 }, { "epoch": 0.1682308689908168, "grad_norm": 860.4002075195312, "learning_rate": 3.9462613398579044e-05, "loss": 70.1078, "step": 41640 }, { "epoch": 0.16827127025618444, "grad_norm": 470.266357421875, "learning_rate": 3.9461970219642535e-05, "loss": 98.0849, "step": 41650 }, { "epoch": 0.16831167152155205, "grad_norm": 240.69097900390625, "learning_rate": 3.946132666128489e-05, "loss": 78.9933, "step": 41660 }, { "epoch": 0.1683520727869197, "grad_norm": 681.0687866210938, "learning_rate": 3.946068272351867e-05, "loss": 108.567, "step": 41670 }, { "epoch": 0.16839247405228733, "grad_norm": 1583.38330078125, "learning_rate": 3.946003840635642e-05, "loss": 100.0662, "step": 41680 }, { "epoch": 0.16843287531765494, "grad_norm": 1083.94677734375, "learning_rate": 3.94593937098107e-05, "loss": 86.575, "step": 41690 }, { "epoch": 0.16847327658302258, "grad_norm": 556.242431640625, "learning_rate": 3.945874863389408e-05, "loss": 69.5942, "step": 41700 }, { "epoch": 0.16851367784839022, "grad_norm": 749.9948120117188, "learning_rate": 3.9458103178619146e-05, "loss": 91.4834, "step": 41710 }, { "epoch": 0.16855407911375783, "grad_norm": 626.6065063476562, "learning_rate": 3.945745734399846e-05, "loss": 103.6504, "step": 41720 }, { "epoch": 0.16859448037912547, "grad_norm": 762.7034301757812, "learning_rate": 3.945681113004463e-05, "loss": 129.9065, "step": 41730 }, { "epoch": 0.1686348816444931, "grad_norm": 456.9073486328125, "learning_rate": 3.945616453677025e-05, "loss": 71.9278, "step": 41740 }, { "epoch": 0.16867528290986072, "grad_norm": 1217.7987060546875, "learning_rate": 3.945551756418794e-05, "loss": 99.0016, "step": 41750 }, { "epoch": 0.16871568417522836, "grad_norm": 983.9970092773438, "learning_rate": 3.945487021231028e-05, "loss": 82.6868, "step": 41760 }, { "epoch": 0.168756085440596, "grad_norm": 614.2739868164062, "learning_rate": 3.9454222481149916e-05, "loss": 69.2636, "step": 41770 }, { "epoch": 0.16879648670596364, "grad_norm": 797.482421875, "learning_rate": 3.945357437071947e-05, "loss": 88.3623, "step": 41780 }, { "epoch": 0.16883688797133126, "grad_norm": 1216.2550048828125, "learning_rate": 3.9452925881031574e-05, "loss": 69.5762, "step": 41790 }, { "epoch": 0.1688772892366989, "grad_norm": 706.0321044921875, "learning_rate": 3.9452277012098875e-05, "loss": 83.3594, "step": 41800 }, { "epoch": 0.16891769050206654, "grad_norm": 888.8679809570312, "learning_rate": 3.945162776393402e-05, "loss": 84.315, "step": 41810 }, { "epoch": 0.16895809176743415, "grad_norm": 846.63037109375, "learning_rate": 3.9450978136549665e-05, "loss": 62.6609, "step": 41820 }, { "epoch": 0.1689984930328018, "grad_norm": 588.7283935546875, "learning_rate": 3.9450328129958484e-05, "loss": 88.6878, "step": 41830 }, { "epoch": 0.16903889429816943, "grad_norm": 510.836669921875, "learning_rate": 3.9449677744173135e-05, "loss": 80.6699, "step": 41840 }, { "epoch": 0.16907929556353704, "grad_norm": 524.7201538085938, "learning_rate": 3.9449026979206305e-05, "loss": 58.7747, "step": 41850 }, { "epoch": 0.16911969682890468, "grad_norm": 9646.47265625, "learning_rate": 3.9448375835070685e-05, "loss": 143.0488, "step": 41860 }, { "epoch": 0.16916009809427232, "grad_norm": 1948.435302734375, "learning_rate": 3.944772431177896e-05, "loss": 85.102, "step": 41870 }, { "epoch": 0.16920049935963993, "grad_norm": 809.4441528320312, "learning_rate": 3.9447072409343844e-05, "loss": 82.9807, "step": 41880 }, { "epoch": 0.16924090062500757, "grad_norm": 711.6243896484375, "learning_rate": 3.944642012777804e-05, "loss": 77.7331, "step": 41890 }, { "epoch": 0.1692813018903752, "grad_norm": 450.212646484375, "learning_rate": 3.9445767467094256e-05, "loss": 77.5103, "step": 41900 }, { "epoch": 0.16932170315574283, "grad_norm": 1139.9300537109375, "learning_rate": 3.944511442730523e-05, "loss": 127.2431, "step": 41910 }, { "epoch": 0.16936210442111047, "grad_norm": 681.1736450195312, "learning_rate": 3.9444461008423687e-05, "loss": 63.0341, "step": 41920 }, { "epoch": 0.1694025056864781, "grad_norm": 794.4819946289062, "learning_rate": 3.944380721046236e-05, "loss": 75.632, "step": 41930 }, { "epoch": 0.16944290695184575, "grad_norm": 1029.8173828125, "learning_rate": 3.944315303343401e-05, "loss": 76.0773, "step": 41940 }, { "epoch": 0.16948330821721336, "grad_norm": 661.2575073242188, "learning_rate": 3.9442498477351376e-05, "loss": 117.2325, "step": 41950 }, { "epoch": 0.169523709482581, "grad_norm": 383.8749084472656, "learning_rate": 3.944184354222722e-05, "loss": 80.0125, "step": 41960 }, { "epoch": 0.16956411074794864, "grad_norm": 3768.85986328125, "learning_rate": 3.9441188228074326e-05, "loss": 113.0336, "step": 41970 }, { "epoch": 0.16960451201331625, "grad_norm": 969.9005737304688, "learning_rate": 3.944053253490546e-05, "loss": 75.1191, "step": 41980 }, { "epoch": 0.1696449132786839, "grad_norm": 759.4833984375, "learning_rate": 3.943987646273339e-05, "loss": 120.5944, "step": 41990 }, { "epoch": 0.16968531454405153, "grad_norm": 414.0032958984375, "learning_rate": 3.943922001157093e-05, "loss": 61.2086, "step": 42000 }, { "epoch": 0.16972571580941914, "grad_norm": 577.0447387695312, "learning_rate": 3.9438563181430863e-05, "loss": 66.7768, "step": 42010 }, { "epoch": 0.16976611707478678, "grad_norm": 462.7989501953125, "learning_rate": 3.9437905972326e-05, "loss": 49.0065, "step": 42020 }, { "epoch": 0.16980651834015442, "grad_norm": 2454.510986328125, "learning_rate": 3.9437248384269155e-05, "loss": 74.5437, "step": 42030 }, { "epoch": 0.16984691960552203, "grad_norm": 0.0, "learning_rate": 3.943659041727314e-05, "loss": 63.6807, "step": 42040 }, { "epoch": 0.16988732087088967, "grad_norm": 507.4505920410156, "learning_rate": 3.94359320713508e-05, "loss": 84.9203, "step": 42050 }, { "epoch": 0.16992772213625731, "grad_norm": 490.3003234863281, "learning_rate": 3.943527334651495e-05, "loss": 50.9804, "step": 42060 }, { "epoch": 0.16996812340162493, "grad_norm": 1047.548095703125, "learning_rate": 3.9434614242778435e-05, "loss": 82.9078, "step": 42070 }, { "epoch": 0.17000852466699257, "grad_norm": 1060.2462158203125, "learning_rate": 3.9433954760154116e-05, "loss": 101.4438, "step": 42080 }, { "epoch": 0.1700489259323602, "grad_norm": 1283.112060546875, "learning_rate": 3.9433294898654846e-05, "loss": 90.8724, "step": 42090 }, { "epoch": 0.17008932719772785, "grad_norm": 701.982177734375, "learning_rate": 3.943263465829348e-05, "loss": 64.7551, "step": 42100 }, { "epoch": 0.17012972846309546, "grad_norm": 505.5760498046875, "learning_rate": 3.94319740390829e-05, "loss": 48.8886, "step": 42110 }, { "epoch": 0.1701701297284631, "grad_norm": 1101.0406494140625, "learning_rate": 3.943131304103599e-05, "loss": 87.4703, "step": 42120 }, { "epoch": 0.17021053099383074, "grad_norm": 1292.386474609375, "learning_rate": 3.9430651664165616e-05, "loss": 113.6191, "step": 42130 }, { "epoch": 0.17025093225919835, "grad_norm": 374.6224060058594, "learning_rate": 3.942998990848469e-05, "loss": 76.7611, "step": 42140 }, { "epoch": 0.170291333524566, "grad_norm": 1148.2353515625, "learning_rate": 3.942932777400611e-05, "loss": 78.2513, "step": 42150 }, { "epoch": 0.17033173478993363, "grad_norm": 652.7380981445312, "learning_rate": 3.942866526074277e-05, "loss": 68.5083, "step": 42160 }, { "epoch": 0.17037213605530124, "grad_norm": 1101.5650634765625, "learning_rate": 3.942800236870761e-05, "loss": 83.1009, "step": 42170 }, { "epoch": 0.17041253732066888, "grad_norm": 765.6564331054688, "learning_rate": 3.942733909791354e-05, "loss": 81.3225, "step": 42180 }, { "epoch": 0.17045293858603652, "grad_norm": 647.0240478515625, "learning_rate": 3.942667544837349e-05, "loss": 113.6144, "step": 42190 }, { "epoch": 0.17049333985140414, "grad_norm": 644.6279296875, "learning_rate": 3.9426011420100405e-05, "loss": 82.8448, "step": 42200 }, { "epoch": 0.17053374111677178, "grad_norm": 849.5108642578125, "learning_rate": 3.942534701310722e-05, "loss": 75.6162, "step": 42210 }, { "epoch": 0.17057414238213942, "grad_norm": 869.8896484375, "learning_rate": 3.94246822274069e-05, "loss": 107.6029, "step": 42220 }, { "epoch": 0.17061454364750703, "grad_norm": 1466.9735107421875, "learning_rate": 3.9424017063012394e-05, "loss": 73.4565, "step": 42230 }, { "epoch": 0.17065494491287467, "grad_norm": 934.4044799804688, "learning_rate": 3.942335151993668e-05, "loss": 86.7369, "step": 42240 }, { "epoch": 0.1706953461782423, "grad_norm": 1067.3001708984375, "learning_rate": 3.942268559819272e-05, "loss": 82.3458, "step": 42250 }, { "epoch": 0.17073574744360995, "grad_norm": 1136.6522216796875, "learning_rate": 3.9422019297793516e-05, "loss": 77.6294, "step": 42260 }, { "epoch": 0.17077614870897756, "grad_norm": 696.1351928710938, "learning_rate": 3.942135261875204e-05, "loss": 118.3667, "step": 42270 }, { "epoch": 0.1708165499743452, "grad_norm": 593.81103515625, "learning_rate": 3.94206855610813e-05, "loss": 88.9259, "step": 42280 }, { "epoch": 0.17085695123971284, "grad_norm": 1247.7071533203125, "learning_rate": 3.9420018124794294e-05, "loss": 79.2447, "step": 42290 }, { "epoch": 0.17089735250508045, "grad_norm": 1543.30322265625, "learning_rate": 3.941935030990403e-05, "loss": 88.1879, "step": 42300 }, { "epoch": 0.1709377537704481, "grad_norm": 616.7864379882812, "learning_rate": 3.941868211642355e-05, "loss": 81.0888, "step": 42310 }, { "epoch": 0.17097815503581573, "grad_norm": 504.2239685058594, "learning_rate": 3.941801354436585e-05, "loss": 79.8196, "step": 42320 }, { "epoch": 0.17101855630118334, "grad_norm": 646.478271484375, "learning_rate": 3.941734459374399e-05, "loss": 79.9383, "step": 42330 }, { "epoch": 0.17105895756655098, "grad_norm": 304.4427490234375, "learning_rate": 3.941667526457099e-05, "loss": 79.6843, "step": 42340 }, { "epoch": 0.17109935883191862, "grad_norm": 2277.253662109375, "learning_rate": 3.9416005556859914e-05, "loss": 91.1325, "step": 42350 }, { "epoch": 0.17113976009728624, "grad_norm": 596.6697387695312, "learning_rate": 3.9415335470623816e-05, "loss": 100.1752, "step": 42360 }, { "epoch": 0.17118016136265388, "grad_norm": 2080.851806640625, "learning_rate": 3.941466500587575e-05, "loss": 111.6894, "step": 42370 }, { "epoch": 0.17122056262802152, "grad_norm": 497.0038146972656, "learning_rate": 3.9413994162628804e-05, "loss": 117.063, "step": 42380 }, { "epoch": 0.17126096389338913, "grad_norm": 1601.8934326171875, "learning_rate": 3.941332294089604e-05, "loss": 79.9856, "step": 42390 }, { "epoch": 0.17130136515875677, "grad_norm": 638.1102294921875, "learning_rate": 3.941265134069055e-05, "loss": 89.9926, "step": 42400 }, { "epoch": 0.1713417664241244, "grad_norm": 731.0360107421875, "learning_rate": 3.941197936202543e-05, "loss": 96.968, "step": 42410 }, { "epoch": 0.17138216768949205, "grad_norm": 696.0232543945312, "learning_rate": 3.941130700491378e-05, "loss": 69.7123, "step": 42420 }, { "epoch": 0.17142256895485966, "grad_norm": 1154.012939453125, "learning_rate": 3.9410634269368706e-05, "loss": 98.8974, "step": 42430 }, { "epoch": 0.1714629702202273, "grad_norm": 548.6651000976562, "learning_rate": 3.940996115540332e-05, "loss": 90.9087, "step": 42440 }, { "epoch": 0.17150337148559494, "grad_norm": 1186.6876220703125, "learning_rate": 3.9409287663030754e-05, "loss": 98.7666, "step": 42450 }, { "epoch": 0.17154377275096255, "grad_norm": 1163.623291015625, "learning_rate": 3.9408613792264125e-05, "loss": 111.3392, "step": 42460 }, { "epoch": 0.1715841740163302, "grad_norm": 2000.774658203125, "learning_rate": 3.940793954311659e-05, "loss": 96.1054, "step": 42470 }, { "epoch": 0.17162457528169783, "grad_norm": 1090.0098876953125, "learning_rate": 3.940726491560127e-05, "loss": 81.3527, "step": 42480 }, { "epoch": 0.17166497654706545, "grad_norm": 414.6957092285156, "learning_rate": 3.9406589909731335e-05, "loss": 50.2307, "step": 42490 }, { "epoch": 0.17170537781243309, "grad_norm": 707.9476318359375, "learning_rate": 3.940591452551993e-05, "loss": 112.7422, "step": 42500 }, { "epoch": 0.17174577907780073, "grad_norm": 499.1845703125, "learning_rate": 3.940523876298024e-05, "loss": 79.7069, "step": 42510 }, { "epoch": 0.17178618034316834, "grad_norm": 876.81298828125, "learning_rate": 3.940456262212543e-05, "loss": 93.4067, "step": 42520 }, { "epoch": 0.17182658160853598, "grad_norm": 732.292236328125, "learning_rate": 3.940388610296868e-05, "loss": 66.0287, "step": 42530 }, { "epoch": 0.17186698287390362, "grad_norm": 875.480712890625, "learning_rate": 3.9403209205523173e-05, "loss": 78.4949, "step": 42540 }, { "epoch": 0.17190738413927123, "grad_norm": 677.6840209960938, "learning_rate": 3.940253192980212e-05, "loss": 92.2969, "step": 42550 }, { "epoch": 0.17194778540463887, "grad_norm": 1275.6361083984375, "learning_rate": 3.9401854275818716e-05, "loss": 86.797, "step": 42560 }, { "epoch": 0.1719881866700065, "grad_norm": 857.1155395507812, "learning_rate": 3.9401176243586177e-05, "loss": 74.4845, "step": 42570 }, { "epoch": 0.17202858793537412, "grad_norm": 3165.652587890625, "learning_rate": 3.9400497833117716e-05, "loss": 116.9501, "step": 42580 }, { "epoch": 0.17206898920074176, "grad_norm": 486.62353515625, "learning_rate": 3.939981904442657e-05, "loss": 76.6197, "step": 42590 }, { "epoch": 0.1721093904661094, "grad_norm": 891.3754272460938, "learning_rate": 3.939913987752595e-05, "loss": 89.9513, "step": 42600 }, { "epoch": 0.17214979173147704, "grad_norm": 594.1891479492188, "learning_rate": 3.9398460332429115e-05, "loss": 91.0488, "step": 42610 }, { "epoch": 0.17219019299684465, "grad_norm": 429.0885009765625, "learning_rate": 3.9397780409149314e-05, "loss": 49.7918, "step": 42620 }, { "epoch": 0.1722305942622123, "grad_norm": 1666.359375, "learning_rate": 3.9397100107699795e-05, "loss": 97.0718, "step": 42630 }, { "epoch": 0.17227099552757993, "grad_norm": 603.205322265625, "learning_rate": 3.939641942809382e-05, "loss": 88.019, "step": 42640 }, { "epoch": 0.17231139679294755, "grad_norm": 716.9115600585938, "learning_rate": 3.939573837034466e-05, "loss": 104.4141, "step": 42650 }, { "epoch": 0.1723517980583152, "grad_norm": 755.2896118164062, "learning_rate": 3.9395056934465604e-05, "loss": 104.734, "step": 42660 }, { "epoch": 0.17239219932368283, "grad_norm": 438.7043151855469, "learning_rate": 3.939437512046993e-05, "loss": 83.8911, "step": 42670 }, { "epoch": 0.17243260058905044, "grad_norm": 1080.4783935546875, "learning_rate": 3.939369292837092e-05, "loss": 80.1764, "step": 42680 }, { "epoch": 0.17247300185441808, "grad_norm": 1368.8602294921875, "learning_rate": 3.939301035818188e-05, "loss": 76.1026, "step": 42690 }, { "epoch": 0.17251340311978572, "grad_norm": 1755.056640625, "learning_rate": 3.939232740991612e-05, "loss": 102.755, "step": 42700 }, { "epoch": 0.17255380438515333, "grad_norm": 1074.890869140625, "learning_rate": 3.939164408358696e-05, "loss": 110.8715, "step": 42710 }, { "epoch": 0.17259420565052097, "grad_norm": 749.5791015625, "learning_rate": 3.939096037920771e-05, "loss": 83.5525, "step": 42720 }, { "epoch": 0.1726346069158886, "grad_norm": 882.516357421875, "learning_rate": 3.939027629679171e-05, "loss": 90.7301, "step": 42730 }, { "epoch": 0.17267500818125622, "grad_norm": 6177.97705078125, "learning_rate": 3.938959183635228e-05, "loss": 101.8573, "step": 42740 }, { "epoch": 0.17271540944662386, "grad_norm": 1139.997314453125, "learning_rate": 3.9388906997902784e-05, "loss": 85.6628, "step": 42750 }, { "epoch": 0.1727558107119915, "grad_norm": 1481.9365234375, "learning_rate": 3.938822178145656e-05, "loss": 114.4302, "step": 42760 }, { "epoch": 0.17279621197735914, "grad_norm": 1003.7747192382812, "learning_rate": 3.938753618702697e-05, "loss": 107.0744, "step": 42770 }, { "epoch": 0.17283661324272676, "grad_norm": 778.5792236328125, "learning_rate": 3.938685021462738e-05, "loss": 75.9139, "step": 42780 }, { "epoch": 0.1728770145080944, "grad_norm": 596.6504516601562, "learning_rate": 3.938616386427117e-05, "loss": 95.3925, "step": 42790 }, { "epoch": 0.17291741577346204, "grad_norm": 346.0478820800781, "learning_rate": 3.938547713597171e-05, "loss": 64.3553, "step": 42800 }, { "epoch": 0.17295781703882965, "grad_norm": 659.998291015625, "learning_rate": 3.938479002974239e-05, "loss": 79.2351, "step": 42810 }, { "epoch": 0.1729982183041973, "grad_norm": 1590.90283203125, "learning_rate": 3.938410254559661e-05, "loss": 111.3108, "step": 42820 }, { "epoch": 0.17303861956956493, "grad_norm": 580.7548217773438, "learning_rate": 3.938341468354778e-05, "loss": 78.8128, "step": 42830 }, { "epoch": 0.17307902083493254, "grad_norm": 1085.07373046875, "learning_rate": 3.938272644360929e-05, "loss": 142.478, "step": 42840 }, { "epoch": 0.17311942210030018, "grad_norm": 446.0859375, "learning_rate": 3.938203782579457e-05, "loss": 73.5577, "step": 42850 }, { "epoch": 0.17315982336566782, "grad_norm": 452.1142578125, "learning_rate": 3.9381348830117055e-05, "loss": 93.8503, "step": 42860 }, { "epoch": 0.17320022463103543, "grad_norm": 1228.706298828125, "learning_rate": 3.938065945659016e-05, "loss": 89.0013, "step": 42870 }, { "epoch": 0.17324062589640307, "grad_norm": 830.8073120117188, "learning_rate": 3.937996970522733e-05, "loss": 82.8595, "step": 42880 }, { "epoch": 0.1732810271617707, "grad_norm": 730.0189208984375, "learning_rate": 3.937927957604201e-05, "loss": 50.0603, "step": 42890 }, { "epoch": 0.17332142842713832, "grad_norm": 706.650634765625, "learning_rate": 3.937858906904766e-05, "loss": 44.7563, "step": 42900 }, { "epoch": 0.17336182969250596, "grad_norm": 1158.515380859375, "learning_rate": 3.937789818425774e-05, "loss": 125.2809, "step": 42910 }, { "epoch": 0.1734022309578736, "grad_norm": 834.2371215820312, "learning_rate": 3.9377206921685725e-05, "loss": 93.5468, "step": 42920 }, { "epoch": 0.17344263222324124, "grad_norm": 609.667236328125, "learning_rate": 3.9376515281345076e-05, "loss": 92.9755, "step": 42930 }, { "epoch": 0.17348303348860886, "grad_norm": 701.4984130859375, "learning_rate": 3.937582326324929e-05, "loss": 89.169, "step": 42940 }, { "epoch": 0.1735234347539765, "grad_norm": 0.0, "learning_rate": 3.937513086741185e-05, "loss": 84.4199, "step": 42950 }, { "epoch": 0.17356383601934414, "grad_norm": 755.2521362304688, "learning_rate": 3.937443809384626e-05, "loss": 87.8608, "step": 42960 }, { "epoch": 0.17360423728471175, "grad_norm": 240.83729553222656, "learning_rate": 3.9373744942566025e-05, "loss": 59.1297, "step": 42970 }, { "epoch": 0.1736446385500794, "grad_norm": 513.7739868164062, "learning_rate": 3.937305141358466e-05, "loss": 75.2643, "step": 42980 }, { "epoch": 0.17368503981544703, "grad_norm": 667.5625610351562, "learning_rate": 3.937235750691569e-05, "loss": 98.8688, "step": 42990 }, { "epoch": 0.17372544108081464, "grad_norm": 540.6917724609375, "learning_rate": 3.9371663222572625e-05, "loss": 91.6689, "step": 43000 }, { "epoch": 0.17376584234618228, "grad_norm": 1611.7796630859375, "learning_rate": 3.937096856056902e-05, "loss": 77.2374, "step": 43010 }, { "epoch": 0.17380624361154992, "grad_norm": 1174.986572265625, "learning_rate": 3.93702735209184e-05, "loss": 88.8681, "step": 43020 }, { "epoch": 0.17384664487691753, "grad_norm": 1794.541259765625, "learning_rate": 3.936957810363434e-05, "loss": 123.8373, "step": 43030 }, { "epoch": 0.17388704614228517, "grad_norm": 643.811279296875, "learning_rate": 3.936888230873037e-05, "loss": 79.1278, "step": 43040 }, { "epoch": 0.1739274474076528, "grad_norm": 1467.1026611328125, "learning_rate": 3.936818613622008e-05, "loss": 112.4248, "step": 43050 }, { "epoch": 0.17396784867302043, "grad_norm": 595.2394409179688, "learning_rate": 3.936748958611702e-05, "loss": 94.1707, "step": 43060 }, { "epoch": 0.17400824993838807, "grad_norm": 2667.404052734375, "learning_rate": 3.936679265843478e-05, "loss": 115.8994, "step": 43070 }, { "epoch": 0.1740486512037557, "grad_norm": 1556.4005126953125, "learning_rate": 3.936609535318695e-05, "loss": 94.7888, "step": 43080 }, { "epoch": 0.17408905246912335, "grad_norm": 621.27099609375, "learning_rate": 3.936539767038712e-05, "loss": 77.0604, "step": 43090 }, { "epoch": 0.17412945373449096, "grad_norm": 711.2188720703125, "learning_rate": 3.9364699610048894e-05, "loss": 89.3644, "step": 43100 }, { "epoch": 0.1741698549998586, "grad_norm": 662.9762573242188, "learning_rate": 3.9364001172185875e-05, "loss": 57.3206, "step": 43110 }, { "epoch": 0.17421025626522624, "grad_norm": 1898.1923828125, "learning_rate": 3.936330235681169e-05, "loss": 103.0097, "step": 43120 }, { "epoch": 0.17425065753059385, "grad_norm": 682.8529052734375, "learning_rate": 3.936260316393995e-05, "loss": 108.5161, "step": 43130 }, { "epoch": 0.1742910587959615, "grad_norm": 369.9712829589844, "learning_rate": 3.936190359358429e-05, "loss": 92.947, "step": 43140 }, { "epoch": 0.17433146006132913, "grad_norm": 757.6825561523438, "learning_rate": 3.936120364575836e-05, "loss": 70.8851, "step": 43150 }, { "epoch": 0.17437186132669674, "grad_norm": 719.6384887695312, "learning_rate": 3.9360503320475795e-05, "loss": 94.8329, "step": 43160 }, { "epoch": 0.17441226259206438, "grad_norm": 917.0863647460938, "learning_rate": 3.935980261775025e-05, "loss": 95.4134, "step": 43170 }, { "epoch": 0.17445266385743202, "grad_norm": 795.0466918945312, "learning_rate": 3.935910153759538e-05, "loss": 75.6606, "step": 43180 }, { "epoch": 0.17449306512279963, "grad_norm": 618.247802734375, "learning_rate": 3.9358400080024867e-05, "loss": 86.7528, "step": 43190 }, { "epoch": 0.17453346638816727, "grad_norm": 850.3892211914062, "learning_rate": 3.935769824505238e-05, "loss": 118.1322, "step": 43200 }, { "epoch": 0.17457386765353491, "grad_norm": 578.2403564453125, "learning_rate": 3.935699603269159e-05, "loss": 81.528, "step": 43210 }, { "epoch": 0.17461426891890253, "grad_norm": 625.4942626953125, "learning_rate": 3.935629344295621e-05, "loss": 87.5598, "step": 43220 }, { "epoch": 0.17465467018427017, "grad_norm": 2594.657470703125, "learning_rate": 3.935559047585991e-05, "loss": 75.9269, "step": 43230 }, { "epoch": 0.1746950714496378, "grad_norm": 841.3003540039062, "learning_rate": 3.935488713141641e-05, "loss": 95.3032, "step": 43240 }, { "epoch": 0.17473547271500545, "grad_norm": 807.5640258789062, "learning_rate": 3.935418340963943e-05, "loss": 95.9208, "step": 43250 }, { "epoch": 0.17477587398037306, "grad_norm": 1481.22509765625, "learning_rate": 3.935347931054267e-05, "loss": 71.7565, "step": 43260 }, { "epoch": 0.1748162752457407, "grad_norm": 1714.5614013671875, "learning_rate": 3.9352774834139875e-05, "loss": 98.7609, "step": 43270 }, { "epoch": 0.17485667651110834, "grad_norm": 801.7985229492188, "learning_rate": 3.9352069980444764e-05, "loss": 103.7962, "step": 43280 }, { "epoch": 0.17489707777647595, "grad_norm": 970.6455078125, "learning_rate": 3.9351364749471095e-05, "loss": 66.2405, "step": 43290 }, { "epoch": 0.1749374790418436, "grad_norm": 665.2471313476562, "learning_rate": 3.93506591412326e-05, "loss": 62.2794, "step": 43300 }, { "epoch": 0.17497788030721123, "grad_norm": 1091.2325439453125, "learning_rate": 3.9349953155743046e-05, "loss": 75.6549, "step": 43310 }, { "epoch": 0.17501828157257884, "grad_norm": 819.3821411132812, "learning_rate": 3.934924679301619e-05, "loss": 78.266, "step": 43320 }, { "epoch": 0.17505868283794648, "grad_norm": 0.0, "learning_rate": 3.934854005306581e-05, "loss": 108.1789, "step": 43330 }, { "epoch": 0.17509908410331412, "grad_norm": 283.76165771484375, "learning_rate": 3.934783293590568e-05, "loss": 67.2186, "step": 43340 }, { "epoch": 0.17513948536868174, "grad_norm": 577.4725341796875, "learning_rate": 3.9347125441549585e-05, "loss": 72.1054, "step": 43350 }, { "epoch": 0.17517988663404938, "grad_norm": 737.1785278320312, "learning_rate": 3.9346417570011316e-05, "loss": 112.302, "step": 43360 }, { "epoch": 0.17522028789941702, "grad_norm": 3394.2783203125, "learning_rate": 3.934570932130468e-05, "loss": 117.2829, "step": 43370 }, { "epoch": 0.17526068916478463, "grad_norm": 893.6383666992188, "learning_rate": 3.934500069544348e-05, "loss": 79.7919, "step": 43380 }, { "epoch": 0.17530109043015227, "grad_norm": 835.4736938476562, "learning_rate": 3.934429169244154e-05, "loss": 87.5427, "step": 43390 }, { "epoch": 0.1753414916955199, "grad_norm": 416.30816650390625, "learning_rate": 3.934358231231266e-05, "loss": 95.4932, "step": 43400 }, { "epoch": 0.17538189296088755, "grad_norm": 764.5811157226562, "learning_rate": 3.934287255507069e-05, "loss": 68.0534, "step": 43410 }, { "epoch": 0.17542229422625516, "grad_norm": 404.3819274902344, "learning_rate": 3.9342162420729464e-05, "loss": 65.4451, "step": 43420 }, { "epoch": 0.1754626954916228, "grad_norm": 862.0521240234375, "learning_rate": 3.9341451909302814e-05, "loss": 96.0394, "step": 43430 }, { "epoch": 0.17550309675699044, "grad_norm": 1187.9178466796875, "learning_rate": 3.934074102080461e-05, "loss": 85.2422, "step": 43440 }, { "epoch": 0.17554349802235805, "grad_norm": 914.9650268554688, "learning_rate": 3.9340029755248707e-05, "loss": 85.6274, "step": 43450 }, { "epoch": 0.1755838992877257, "grad_norm": 528.726318359375, "learning_rate": 3.933931811264896e-05, "loss": 87.7742, "step": 43460 }, { "epoch": 0.17562430055309333, "grad_norm": 575.98095703125, "learning_rate": 3.933860609301925e-05, "loss": 92.3105, "step": 43470 }, { "epoch": 0.17566470181846094, "grad_norm": 656.6376953125, "learning_rate": 3.9337893696373456e-05, "loss": 50.5727, "step": 43480 }, { "epoch": 0.17570510308382858, "grad_norm": 599.2984619140625, "learning_rate": 3.933718092272547e-05, "loss": 80.5968, "step": 43490 }, { "epoch": 0.17574550434919622, "grad_norm": 794.0865478515625, "learning_rate": 3.9336467772089195e-05, "loss": 77.5349, "step": 43500 }, { "epoch": 0.17578590561456384, "grad_norm": 1055.6077880859375, "learning_rate": 3.9335754244478514e-05, "loss": 96.4366, "step": 43510 }, { "epoch": 0.17582630687993148, "grad_norm": 378.260009765625, "learning_rate": 3.9335040339907354e-05, "loss": 57.8697, "step": 43520 }, { "epoch": 0.17586670814529912, "grad_norm": 536.3444213867188, "learning_rate": 3.933432605838963e-05, "loss": 72.2509, "step": 43530 }, { "epoch": 0.17590710941066673, "grad_norm": 2800.318603515625, "learning_rate": 3.933361139993926e-05, "loss": 102.6901, "step": 43540 }, { "epoch": 0.17594751067603437, "grad_norm": 637.18701171875, "learning_rate": 3.933289636457019e-05, "loss": 53.6399, "step": 43550 }, { "epoch": 0.175987911941402, "grad_norm": 835.831787109375, "learning_rate": 3.933218095229634e-05, "loss": 109.055, "step": 43560 }, { "epoch": 0.17602831320676965, "grad_norm": 671.2997436523438, "learning_rate": 3.933146516313169e-05, "loss": 73.9181, "step": 43570 }, { "epoch": 0.17606871447213726, "grad_norm": 916.1385498046875, "learning_rate": 3.933074899709016e-05, "loss": 137.5969, "step": 43580 }, { "epoch": 0.1761091157375049, "grad_norm": 548.91943359375, "learning_rate": 3.933003245418572e-05, "loss": 77.7896, "step": 43590 }, { "epoch": 0.17614951700287254, "grad_norm": 1265.389404296875, "learning_rate": 3.932931553443235e-05, "loss": 82.4361, "step": 43600 }, { "epoch": 0.17618991826824015, "grad_norm": 1766.3031005859375, "learning_rate": 3.9328598237844035e-05, "loss": 82.3127, "step": 43610 }, { "epoch": 0.1762303195336078, "grad_norm": 833.5023803710938, "learning_rate": 3.9327880564434735e-05, "loss": 115.4179, "step": 43620 }, { "epoch": 0.17627072079897543, "grad_norm": 1055.803955078125, "learning_rate": 3.932716251421845e-05, "loss": 67.2918, "step": 43630 }, { "epoch": 0.17631112206434305, "grad_norm": 898.137939453125, "learning_rate": 3.932644408720919e-05, "loss": 99.6032, "step": 43640 }, { "epoch": 0.17635152332971069, "grad_norm": 2444.6591796875, "learning_rate": 3.932572528342094e-05, "loss": 100.4514, "step": 43650 }, { "epoch": 0.17639192459507833, "grad_norm": 1445.8916015625, "learning_rate": 3.932500610286774e-05, "loss": 70.1387, "step": 43660 }, { "epoch": 0.17643232586044594, "grad_norm": 927.367919921875, "learning_rate": 3.9324286545563595e-05, "loss": 93.5744, "step": 43670 }, { "epoch": 0.17647272712581358, "grad_norm": 459.9096984863281, "learning_rate": 3.932356661152253e-05, "loss": 80.8082, "step": 43680 }, { "epoch": 0.17651312839118122, "grad_norm": 2630.177490234375, "learning_rate": 3.9322846300758585e-05, "loss": 79.219, "step": 43690 }, { "epoch": 0.17655352965654883, "grad_norm": 0.0, "learning_rate": 3.9322125613285805e-05, "loss": 60.184, "step": 43700 }, { "epoch": 0.17659393092191647, "grad_norm": 883.2347412109375, "learning_rate": 3.9321404549118236e-05, "loss": 74.5576, "step": 43710 }, { "epoch": 0.1766343321872841, "grad_norm": 650.3308715820312, "learning_rate": 3.9320683108269945e-05, "loss": 103.2459, "step": 43720 }, { "epoch": 0.17667473345265175, "grad_norm": 555.4349365234375, "learning_rate": 3.9319961290754985e-05, "loss": 64.6827, "step": 43730 }, { "epoch": 0.17671513471801936, "grad_norm": 310.9591064453125, "learning_rate": 3.931923909658744e-05, "loss": 77.393, "step": 43740 }, { "epoch": 0.176755535983387, "grad_norm": 919.4443969726562, "learning_rate": 3.931851652578137e-05, "loss": 100.588, "step": 43750 }, { "epoch": 0.17679593724875464, "grad_norm": 673.5975341796875, "learning_rate": 3.931779357835088e-05, "loss": 92.4325, "step": 43760 }, { "epoch": 0.17683633851412225, "grad_norm": 1112.824951171875, "learning_rate": 3.9317070254310056e-05, "loss": 74.7196, "step": 43770 }, { "epoch": 0.1768767397794899, "grad_norm": 1260.9169921875, "learning_rate": 3.931634655367301e-05, "loss": 73.1931, "step": 43780 }, { "epoch": 0.17691714104485753, "grad_norm": 0.0, "learning_rate": 3.931562247645384e-05, "loss": 89.5991, "step": 43790 }, { "epoch": 0.17695754231022515, "grad_norm": 335.5714111328125, "learning_rate": 3.9314898022666656e-05, "loss": 76.3316, "step": 43800 }, { "epoch": 0.1769979435755928, "grad_norm": 840.8833618164062, "learning_rate": 3.9314173192325603e-05, "loss": 131.7811, "step": 43810 }, { "epoch": 0.17703834484096043, "grad_norm": 823.3009643554688, "learning_rate": 3.9313447985444796e-05, "loss": 86.1308, "step": 43820 }, { "epoch": 0.17707874610632804, "grad_norm": 627.9525146484375, "learning_rate": 3.931272240203838e-05, "loss": 70.9252, "step": 43830 }, { "epoch": 0.17711914737169568, "grad_norm": 781.281982421875, "learning_rate": 3.93119964421205e-05, "loss": 66.6196, "step": 43840 }, { "epoch": 0.17715954863706332, "grad_norm": 878.00048828125, "learning_rate": 3.9311270105705296e-05, "loss": 96.4722, "step": 43850 }, { "epoch": 0.17719994990243093, "grad_norm": 634.25244140625, "learning_rate": 3.931054339280695e-05, "loss": 83.8569, "step": 43860 }, { "epoch": 0.17724035116779857, "grad_norm": 3098.12255859375, "learning_rate": 3.930981630343962e-05, "loss": 103.4742, "step": 43870 }, { "epoch": 0.1772807524331662, "grad_norm": 655.8944091796875, "learning_rate": 3.9309088837617464e-05, "loss": 97.8886, "step": 43880 }, { "epoch": 0.17732115369853385, "grad_norm": 849.6776733398438, "learning_rate": 3.930836099535469e-05, "loss": 75.4539, "step": 43890 }, { "epoch": 0.17736155496390146, "grad_norm": 593.6189575195312, "learning_rate": 3.930763277666548e-05, "loss": 96.333, "step": 43900 }, { "epoch": 0.1774019562292691, "grad_norm": 0.0, "learning_rate": 3.9306904181564025e-05, "loss": 46.8365, "step": 43910 }, { "epoch": 0.17744235749463674, "grad_norm": 654.2417602539062, "learning_rate": 3.930617521006454e-05, "loss": 70.6252, "step": 43920 }, { "epoch": 0.17748275876000436, "grad_norm": 984.5786743164062, "learning_rate": 3.9305445862181225e-05, "loss": 79.7624, "step": 43930 }, { "epoch": 0.177523160025372, "grad_norm": 1487.3980712890625, "learning_rate": 3.93047161379283e-05, "loss": 100.9878, "step": 43940 }, { "epoch": 0.17756356129073964, "grad_norm": 725.6891479492188, "learning_rate": 3.9303986037320004e-05, "loss": 75.8188, "step": 43950 }, { "epoch": 0.17760396255610725, "grad_norm": 410.4052734375, "learning_rate": 3.9303255560370554e-05, "loss": 93.6783, "step": 43960 }, { "epoch": 0.1776443638214749, "grad_norm": 746.7178344726562, "learning_rate": 3.93025247070942e-05, "loss": 67.6668, "step": 43970 }, { "epoch": 0.17768476508684253, "grad_norm": 586.8406982421875, "learning_rate": 3.930179347750519e-05, "loss": 74.0758, "step": 43980 }, { "epoch": 0.17772516635221014, "grad_norm": 555.885009765625, "learning_rate": 3.930106187161778e-05, "loss": 79.881, "step": 43990 }, { "epoch": 0.17776556761757778, "grad_norm": 1578.247314453125, "learning_rate": 3.930032988944623e-05, "loss": 101.9592, "step": 44000 }, { "epoch": 0.17780596888294542, "grad_norm": 890.3057861328125, "learning_rate": 3.9299597531004816e-05, "loss": 72.5262, "step": 44010 }, { "epoch": 0.17784637014831303, "grad_norm": 896.8656616210938, "learning_rate": 3.929886479630781e-05, "loss": 113.3441, "step": 44020 }, { "epoch": 0.17788677141368067, "grad_norm": 355.8651123046875, "learning_rate": 3.92981316853695e-05, "loss": 70.6483, "step": 44030 }, { "epoch": 0.1779271726790483, "grad_norm": 547.337890625, "learning_rate": 3.929739819820418e-05, "loss": 131.6473, "step": 44040 }, { "epoch": 0.17796757394441595, "grad_norm": 1019.341064453125, "learning_rate": 3.9296664334826135e-05, "loss": 51.182, "step": 44050 }, { "epoch": 0.17800797520978356, "grad_norm": 1067.612548828125, "learning_rate": 3.9295930095249695e-05, "loss": 117.0978, "step": 44060 }, { "epoch": 0.1780483764751512, "grad_norm": 832.2264404296875, "learning_rate": 3.9295195479489165e-05, "loss": 63.1411, "step": 44070 }, { "epoch": 0.17808877774051884, "grad_norm": 605.8679809570312, "learning_rate": 3.9294460487558865e-05, "loss": 97.1942, "step": 44080 }, { "epoch": 0.17812917900588646, "grad_norm": 546.1611938476562, "learning_rate": 3.929372511947311e-05, "loss": 77.0141, "step": 44090 }, { "epoch": 0.1781695802712541, "grad_norm": 1067.419921875, "learning_rate": 3.929298937524627e-05, "loss": 266.8003, "step": 44100 }, { "epoch": 0.17820998153662174, "grad_norm": 428.70965576171875, "learning_rate": 3.929225325489266e-05, "loss": 76.3916, "step": 44110 }, { "epoch": 0.17825038280198935, "grad_norm": 632.5858154296875, "learning_rate": 3.929151675842664e-05, "loss": 58.2997, "step": 44120 }, { "epoch": 0.178290784067357, "grad_norm": 895.70654296875, "learning_rate": 3.929077988586257e-05, "loss": 93.6626, "step": 44130 }, { "epoch": 0.17833118533272463, "grad_norm": 776.591552734375, "learning_rate": 3.9290042637214815e-05, "loss": 81.59, "step": 44140 }, { "epoch": 0.17837158659809224, "grad_norm": 1873.4609375, "learning_rate": 3.9289305012497745e-05, "loss": 100.9556, "step": 44150 }, { "epoch": 0.17841198786345988, "grad_norm": 761.1587524414062, "learning_rate": 3.928856701172575e-05, "loss": 80.4297, "step": 44160 }, { "epoch": 0.17845238912882752, "grad_norm": 805.0286254882812, "learning_rate": 3.928782863491321e-05, "loss": 76.5413, "step": 44170 }, { "epoch": 0.17849279039419513, "grad_norm": 711.6513671875, "learning_rate": 3.928708988207452e-05, "loss": 51.0569, "step": 44180 }, { "epoch": 0.17853319165956277, "grad_norm": 1271.82568359375, "learning_rate": 3.928635075322407e-05, "loss": 73.9333, "step": 44190 }, { "epoch": 0.1785735929249304, "grad_norm": 864.5879516601562, "learning_rate": 3.92856112483763e-05, "loss": 50.6767, "step": 44200 }, { "epoch": 0.17861399419029805, "grad_norm": 958.3281860351562, "learning_rate": 3.9284871367545595e-05, "loss": 88.4344, "step": 44210 }, { "epoch": 0.17865439545566567, "grad_norm": 1121.1851806640625, "learning_rate": 3.9284131110746404e-05, "loss": 100.7003, "step": 44220 }, { "epoch": 0.1786947967210333, "grad_norm": 685.8429565429688, "learning_rate": 3.928339047799315e-05, "loss": 73.3415, "step": 44230 }, { "epoch": 0.17873519798640095, "grad_norm": 2012.408935546875, "learning_rate": 3.928264946930027e-05, "loss": 81.0713, "step": 44240 }, { "epoch": 0.17877559925176856, "grad_norm": 1126.882568359375, "learning_rate": 3.92819080846822e-05, "loss": 70.0383, "step": 44250 }, { "epoch": 0.1788160005171362, "grad_norm": 861.8523559570312, "learning_rate": 3.928116632415342e-05, "loss": 91.0342, "step": 44260 }, { "epoch": 0.17885640178250384, "grad_norm": 1251.7584228515625, "learning_rate": 3.928042418772837e-05, "loss": 138.0804, "step": 44270 }, { "epoch": 0.17889680304787145, "grad_norm": 1022.703857421875, "learning_rate": 3.927968167542152e-05, "loss": 82.487, "step": 44280 }, { "epoch": 0.1789372043132391, "grad_norm": 1013.7217407226562, "learning_rate": 3.9278938787247354e-05, "loss": 71.4098, "step": 44290 }, { "epoch": 0.17897760557860673, "grad_norm": 1512.89990234375, "learning_rate": 3.927819552322035e-05, "loss": 90.1114, "step": 44300 }, { "epoch": 0.17901800684397434, "grad_norm": 365.7040100097656, "learning_rate": 3.9277451883355005e-05, "loss": 59.6052, "step": 44310 }, { "epoch": 0.17905840810934198, "grad_norm": 4800.70263671875, "learning_rate": 3.9276707867665805e-05, "loss": 81.1484, "step": 44320 }, { "epoch": 0.17909880937470962, "grad_norm": 537.82568359375, "learning_rate": 3.927596347616726e-05, "loss": 85.3282, "step": 44330 }, { "epoch": 0.17913921064007723, "grad_norm": 815.4710693359375, "learning_rate": 3.9275218708873887e-05, "loss": 72.1017, "step": 44340 }, { "epoch": 0.17917961190544487, "grad_norm": 1025.384765625, "learning_rate": 3.92744735658002e-05, "loss": 132.5589, "step": 44350 }, { "epoch": 0.17922001317081251, "grad_norm": 359.1304016113281, "learning_rate": 3.9273728046960726e-05, "loss": 82.7631, "step": 44360 }, { "epoch": 0.17926041443618015, "grad_norm": 1203.0955810546875, "learning_rate": 3.927298215237e-05, "loss": 66.8553, "step": 44370 }, { "epoch": 0.17930081570154777, "grad_norm": 689.65478515625, "learning_rate": 3.927223588204257e-05, "loss": 66.0106, "step": 44380 }, { "epoch": 0.1793412169669154, "grad_norm": 832.0293579101562, "learning_rate": 3.927148923599297e-05, "loss": 67.1883, "step": 44390 }, { "epoch": 0.17938161823228305, "grad_norm": 1041.6201171875, "learning_rate": 3.927074221423578e-05, "loss": 67.549, "step": 44400 }, { "epoch": 0.17942201949765066, "grad_norm": 566.5820922851562, "learning_rate": 3.9269994816785535e-05, "loss": 75.9646, "step": 44410 }, { "epoch": 0.1794624207630183, "grad_norm": 1437.30029296875, "learning_rate": 3.9269247043656825e-05, "loss": 107.2695, "step": 44420 }, { "epoch": 0.17950282202838594, "grad_norm": 663.2697143554688, "learning_rate": 3.926849889486423e-05, "loss": 72.5427, "step": 44430 }, { "epoch": 0.17954322329375355, "grad_norm": 1476.98974609375, "learning_rate": 3.926775037042232e-05, "loss": 84.6121, "step": 44440 }, { "epoch": 0.1795836245591212, "grad_norm": 629.34130859375, "learning_rate": 3.92670014703457e-05, "loss": 86.1632, "step": 44450 }, { "epoch": 0.17962402582448883, "grad_norm": 900.3147583007812, "learning_rate": 3.926625219464897e-05, "loss": 61.9779, "step": 44460 }, { "epoch": 0.17966442708985644, "grad_norm": 444.7907409667969, "learning_rate": 3.926550254334673e-05, "loss": 71.5317, "step": 44470 }, { "epoch": 0.17970482835522408, "grad_norm": 1078.9156494140625, "learning_rate": 3.92647525164536e-05, "loss": 74.6889, "step": 44480 }, { "epoch": 0.17974522962059172, "grad_norm": 1081.789794921875, "learning_rate": 3.92640021139842e-05, "loss": 104.8707, "step": 44490 }, { "epoch": 0.17978563088595934, "grad_norm": 642.1022338867188, "learning_rate": 3.926325133595317e-05, "loss": 65.5858, "step": 44500 }, { "epoch": 0.17982603215132698, "grad_norm": 1025.814208984375, "learning_rate": 3.926250018237513e-05, "loss": 86.6868, "step": 44510 }, { "epoch": 0.17986643341669462, "grad_norm": 2212.697021484375, "learning_rate": 3.926174865326473e-05, "loss": 110.0421, "step": 44520 }, { "epoch": 0.17990683468206226, "grad_norm": 890.4210815429688, "learning_rate": 3.926099674863663e-05, "loss": 58.2174, "step": 44530 }, { "epoch": 0.17994723594742987, "grad_norm": 1108.4072265625, "learning_rate": 3.926024446850548e-05, "loss": 82.1017, "step": 44540 }, { "epoch": 0.1799876372127975, "grad_norm": 1117.5262451171875, "learning_rate": 3.925949181288595e-05, "loss": 48.2448, "step": 44550 }, { "epoch": 0.18002803847816515, "grad_norm": 665.634521484375, "learning_rate": 3.92587387817927e-05, "loss": 67.9298, "step": 44560 }, { "epoch": 0.18006843974353276, "grad_norm": 1632.61328125, "learning_rate": 3.9257985375240435e-05, "loss": 108.7696, "step": 44570 }, { "epoch": 0.1801088410089004, "grad_norm": 790.5283813476562, "learning_rate": 3.925723159324383e-05, "loss": 98.9906, "step": 44580 }, { "epoch": 0.18014924227426804, "grad_norm": 500.9121398925781, "learning_rate": 3.925647743581758e-05, "loss": 68.065, "step": 44590 }, { "epoch": 0.18018964353963565, "grad_norm": 327.4957580566406, "learning_rate": 3.925572290297638e-05, "loss": 79.8086, "step": 44600 }, { "epoch": 0.1802300448050033, "grad_norm": 786.4631958007812, "learning_rate": 3.9254967994734954e-05, "loss": 87.2137, "step": 44610 }, { "epoch": 0.18027044607037093, "grad_norm": 743.5513305664062, "learning_rate": 3.925421271110802e-05, "loss": 96.5469, "step": 44620 }, { "epoch": 0.18031084733573854, "grad_norm": 1273.8321533203125, "learning_rate": 3.925345705211029e-05, "loss": 109.8698, "step": 44630 }, { "epoch": 0.18035124860110618, "grad_norm": 876.0645751953125, "learning_rate": 3.92527010177565e-05, "loss": 73.0859, "step": 44640 }, { "epoch": 0.18039164986647382, "grad_norm": 811.0771484375, "learning_rate": 3.9251944608061395e-05, "loss": 54.302, "step": 44650 }, { "epoch": 0.18043205113184144, "grad_norm": 1616.77685546875, "learning_rate": 3.9251187823039715e-05, "loss": 101.0898, "step": 44660 }, { "epoch": 0.18047245239720908, "grad_norm": 517.669189453125, "learning_rate": 3.925043066270622e-05, "loss": 55.0952, "step": 44670 }, { "epoch": 0.18051285366257672, "grad_norm": 445.7159118652344, "learning_rate": 3.9249673127075675e-05, "loss": 76.503, "step": 44680 }, { "epoch": 0.18055325492794436, "grad_norm": 1248.5576171875, "learning_rate": 3.924891521616283e-05, "loss": 91.7174, "step": 44690 }, { "epoch": 0.18059365619331197, "grad_norm": 721.5814819335938, "learning_rate": 3.924815692998248e-05, "loss": 82.0018, "step": 44700 }, { "epoch": 0.1806340574586796, "grad_norm": 2136.960693359375, "learning_rate": 3.9247398268549405e-05, "loss": 85.2536, "step": 44710 }, { "epoch": 0.18067445872404725, "grad_norm": 509.6750183105469, "learning_rate": 3.9246639231878384e-05, "loss": 102.8554, "step": 44720 }, { "epoch": 0.18071485998941486, "grad_norm": 865.4503173828125, "learning_rate": 3.924587981998422e-05, "loss": 83.089, "step": 44730 }, { "epoch": 0.1807552612547825, "grad_norm": 921.2974243164062, "learning_rate": 3.924512003288173e-05, "loss": 85.8162, "step": 44740 }, { "epoch": 0.18079566252015014, "grad_norm": 1070.1435546875, "learning_rate": 3.924435987058571e-05, "loss": 64.5532, "step": 44750 }, { "epoch": 0.18083606378551775, "grad_norm": 1231.571044921875, "learning_rate": 3.924359933311099e-05, "loss": 77.4203, "step": 44760 }, { "epoch": 0.1808764650508854, "grad_norm": 943.9061279296875, "learning_rate": 3.9242838420472393e-05, "loss": 104.9452, "step": 44770 }, { "epoch": 0.18091686631625303, "grad_norm": 963.1873779296875, "learning_rate": 3.9242077132684755e-05, "loss": 47.5799, "step": 44780 }, { "epoch": 0.18095726758162065, "grad_norm": 648.9423828125, "learning_rate": 3.9241315469762915e-05, "loss": 64.1059, "step": 44790 }, { "epoch": 0.18099766884698829, "grad_norm": 454.11474609375, "learning_rate": 3.924055343172172e-05, "loss": 94.2031, "step": 44800 }, { "epoch": 0.18103807011235593, "grad_norm": 713.5806274414062, "learning_rate": 3.9239791018576034e-05, "loss": 82.6018, "step": 44810 }, { "epoch": 0.18107847137772354, "grad_norm": 497.6768798828125, "learning_rate": 3.923902823034072e-05, "loss": 60.9297, "step": 44820 }, { "epoch": 0.18111887264309118, "grad_norm": 369.3504333496094, "learning_rate": 3.923826506703064e-05, "loss": 77.6983, "step": 44830 }, { "epoch": 0.18115927390845882, "grad_norm": 633.0171508789062, "learning_rate": 3.923750152866068e-05, "loss": 65.9964, "step": 44840 }, { "epoch": 0.18119967517382646, "grad_norm": 1098.6373291015625, "learning_rate": 3.9236737615245726e-05, "loss": 82.9418, "step": 44850 }, { "epoch": 0.18124007643919407, "grad_norm": 819.2840576171875, "learning_rate": 3.923597332680067e-05, "loss": 84.5484, "step": 44860 }, { "epoch": 0.1812804777045617, "grad_norm": 656.0801391601562, "learning_rate": 3.9235208663340404e-05, "loss": 93.1484, "step": 44870 }, { "epoch": 0.18132087896992935, "grad_norm": 2753.8642578125, "learning_rate": 3.923444362487985e-05, "loss": 54.1425, "step": 44880 }, { "epoch": 0.18136128023529696, "grad_norm": 1034.2335205078125, "learning_rate": 3.923367821143391e-05, "loss": 85.2575, "step": 44890 }, { "epoch": 0.1814016815006646, "grad_norm": 650.7776489257812, "learning_rate": 3.923291242301751e-05, "loss": 83.4574, "step": 44900 }, { "epoch": 0.18144208276603224, "grad_norm": 1090.718505859375, "learning_rate": 3.923214625964558e-05, "loss": 48.8431, "step": 44910 }, { "epoch": 0.18148248403139985, "grad_norm": 678.7628784179688, "learning_rate": 3.923137972133306e-05, "loss": 75.1513, "step": 44920 }, { "epoch": 0.1815228852967675, "grad_norm": 852.8782348632812, "learning_rate": 3.923061280809489e-05, "loss": 70.6103, "step": 44930 }, { "epoch": 0.18156328656213513, "grad_norm": 637.3016357421875, "learning_rate": 3.922984551994602e-05, "loss": 93.514, "step": 44940 }, { "epoch": 0.18160368782750275, "grad_norm": 600.9539184570312, "learning_rate": 3.922907785690142e-05, "loss": 43.0151, "step": 44950 }, { "epoch": 0.1816440890928704, "grad_norm": 1205.2518310546875, "learning_rate": 3.922830981897604e-05, "loss": 82.0866, "step": 44960 }, { "epoch": 0.18168449035823803, "grad_norm": 1370.759765625, "learning_rate": 3.922754140618486e-05, "loss": 63.6987, "step": 44970 }, { "epoch": 0.18172489162360564, "grad_norm": 664.2175903320312, "learning_rate": 3.9226772618542874e-05, "loss": 85.1669, "step": 44980 }, { "epoch": 0.18176529288897328, "grad_norm": 473.1387634277344, "learning_rate": 3.9226003456065045e-05, "loss": 64.7944, "step": 44990 }, { "epoch": 0.18180569415434092, "grad_norm": 971.3717651367188, "learning_rate": 3.922523391876638e-05, "loss": 82.8255, "step": 45000 }, { "epoch": 0.18184609541970856, "grad_norm": 791.2940063476562, "learning_rate": 3.9224464006661885e-05, "loss": 92.3966, "step": 45010 }, { "epoch": 0.18188649668507617, "grad_norm": 2711.322265625, "learning_rate": 3.922369371976657e-05, "loss": 97.3031, "step": 45020 }, { "epoch": 0.1819268979504438, "grad_norm": 500.21954345703125, "learning_rate": 3.9222923058095445e-05, "loss": 87.3782, "step": 45030 }, { "epoch": 0.18196729921581145, "grad_norm": 977.1084594726562, "learning_rate": 3.922215202166354e-05, "loss": 61.2811, "step": 45040 }, { "epoch": 0.18200770048117906, "grad_norm": 710.3567504882812, "learning_rate": 3.922138061048588e-05, "loss": 94.9242, "step": 45050 }, { "epoch": 0.1820481017465467, "grad_norm": 866.1929931640625, "learning_rate": 3.922060882457751e-05, "loss": 95.5972, "step": 45060 }, { "epoch": 0.18208850301191434, "grad_norm": 495.1076965332031, "learning_rate": 3.921983666395348e-05, "loss": 112.3219, "step": 45070 }, { "epoch": 0.18212890427728196, "grad_norm": 582.1326293945312, "learning_rate": 3.921906412862884e-05, "loss": 86.3041, "step": 45080 }, { "epoch": 0.1821693055426496, "grad_norm": 728.2086791992188, "learning_rate": 3.921829121861865e-05, "loss": 78.3341, "step": 45090 }, { "epoch": 0.18220970680801724, "grad_norm": 956.2645263671875, "learning_rate": 3.9217517933937974e-05, "loss": 94.1187, "step": 45100 }, { "epoch": 0.18225010807338485, "grad_norm": 1114.2762451171875, "learning_rate": 3.9216744274601895e-05, "loss": 84.1699, "step": 45110 }, { "epoch": 0.1822905093387525, "grad_norm": 477.52532958984375, "learning_rate": 3.921597024062549e-05, "loss": 110.3044, "step": 45120 }, { "epoch": 0.18233091060412013, "grad_norm": 801.48828125, "learning_rate": 3.921519583202386e-05, "loss": 65.7045, "step": 45130 }, { "epoch": 0.18237131186948774, "grad_norm": 406.8154602050781, "learning_rate": 3.921442104881209e-05, "loss": 65.1984, "step": 45140 }, { "epoch": 0.18241171313485538, "grad_norm": 597.41162109375, "learning_rate": 3.9213645891005284e-05, "loss": 74.5257, "step": 45150 }, { "epoch": 0.18245211440022302, "grad_norm": 1153.83837890625, "learning_rate": 3.921287035861857e-05, "loss": 94.8159, "step": 45160 }, { "epoch": 0.18249251566559066, "grad_norm": 621.4681396484375, "learning_rate": 3.921209445166705e-05, "loss": 58.4845, "step": 45170 }, { "epoch": 0.18253291693095827, "grad_norm": 1161.484130859375, "learning_rate": 3.9211318170165854e-05, "loss": 77.8721, "step": 45180 }, { "epoch": 0.1825733181963259, "grad_norm": 1004.8511352539062, "learning_rate": 3.9210541514130126e-05, "loss": 80.6697, "step": 45190 }, { "epoch": 0.18261371946169355, "grad_norm": 974.8493041992188, "learning_rate": 3.9209764483575e-05, "loss": 111.7471, "step": 45200 }, { "epoch": 0.18265412072706116, "grad_norm": 810.5325927734375, "learning_rate": 3.9208987078515625e-05, "loss": 81.0349, "step": 45210 }, { "epoch": 0.1826945219924288, "grad_norm": 514.47998046875, "learning_rate": 3.9208209298967156e-05, "loss": 49.2597, "step": 45220 }, { "epoch": 0.18273492325779644, "grad_norm": 864.6641235351562, "learning_rate": 3.920743114494476e-05, "loss": 81.931, "step": 45230 }, { "epoch": 0.18277532452316406, "grad_norm": 1110.948974609375, "learning_rate": 3.92066526164636e-05, "loss": 70.5063, "step": 45240 }, { "epoch": 0.1828157257885317, "grad_norm": 775.122314453125, "learning_rate": 3.9205873713538864e-05, "loss": 74.6872, "step": 45250 }, { "epoch": 0.18285612705389934, "grad_norm": 1156.91064453125, "learning_rate": 3.9205094436185734e-05, "loss": 76.9457, "step": 45260 }, { "epoch": 0.18289652831926695, "grad_norm": 1151.217041015625, "learning_rate": 3.9204314784419396e-05, "loss": 82.7603, "step": 45270 }, { "epoch": 0.1829369295846346, "grad_norm": 1386.2657470703125, "learning_rate": 3.9203534758255056e-05, "loss": 96.6438, "step": 45280 }, { "epoch": 0.18297733085000223, "grad_norm": 991.702880859375, "learning_rate": 3.920275435770791e-05, "loss": 75.2325, "step": 45290 }, { "epoch": 0.18301773211536984, "grad_norm": 782.2591552734375, "learning_rate": 3.92019735827932e-05, "loss": 73.0687, "step": 45300 }, { "epoch": 0.18305813338073748, "grad_norm": 850.5282592773438, "learning_rate": 3.920119243352612e-05, "loss": 60.5605, "step": 45310 }, { "epoch": 0.18309853464610512, "grad_norm": 1552.738037109375, "learning_rate": 3.9200410909921903e-05, "loss": 65.0465, "step": 45320 }, { "epoch": 0.18313893591147276, "grad_norm": 1523.2933349609375, "learning_rate": 3.91996290119958e-05, "loss": 82.3612, "step": 45330 }, { "epoch": 0.18317933717684037, "grad_norm": 498.4686279296875, "learning_rate": 3.919884673976304e-05, "loss": 79.3921, "step": 45340 }, { "epoch": 0.183219738442208, "grad_norm": 1048.4293212890625, "learning_rate": 3.9198064093238874e-05, "loss": 109.0177, "step": 45350 }, { "epoch": 0.18326013970757565, "grad_norm": 1083.177734375, "learning_rate": 3.9197281072438576e-05, "loss": 112.6941, "step": 45360 }, { "epoch": 0.18330054097294327, "grad_norm": 726.718505859375, "learning_rate": 3.919649767737739e-05, "loss": 70.6048, "step": 45370 }, { "epoch": 0.1833409422383109, "grad_norm": 895.0009765625, "learning_rate": 3.9195713908070606e-05, "loss": 73.5802, "step": 45380 }, { "epoch": 0.18338134350367855, "grad_norm": 878.7771606445312, "learning_rate": 3.919492976453349e-05, "loss": 114.3825, "step": 45390 }, { "epoch": 0.18342174476904616, "grad_norm": 1377.43896484375, "learning_rate": 3.9194145246781336e-05, "loss": 103.3703, "step": 45400 }, { "epoch": 0.1834621460344138, "grad_norm": 650.5206909179688, "learning_rate": 3.919336035482944e-05, "loss": 86.7826, "step": 45410 }, { "epoch": 0.18350254729978144, "grad_norm": 680.419189453125, "learning_rate": 3.9192575088693104e-05, "loss": 89.4733, "step": 45420 }, { "epoch": 0.18354294856514905, "grad_norm": 864.0675048828125, "learning_rate": 3.9191789448387634e-05, "loss": 74.6113, "step": 45430 }, { "epoch": 0.1835833498305167, "grad_norm": 359.46331787109375, "learning_rate": 3.919100343392835e-05, "loss": 61.1938, "step": 45440 }, { "epoch": 0.18362375109588433, "grad_norm": 260.6347351074219, "learning_rate": 3.919021704533058e-05, "loss": 101.0921, "step": 45450 }, { "epoch": 0.18366415236125194, "grad_norm": 714.068603515625, "learning_rate": 3.918943028260964e-05, "loss": 91.7542, "step": 45460 }, { "epoch": 0.18370455362661958, "grad_norm": 1035.927490234375, "learning_rate": 3.918864314578087e-05, "loss": 93.5492, "step": 45470 }, { "epoch": 0.18374495489198722, "grad_norm": 1542.9630126953125, "learning_rate": 3.918785563485964e-05, "loss": 106.8872, "step": 45480 }, { "epoch": 0.18378535615735486, "grad_norm": 806.6215209960938, "learning_rate": 3.918706774986128e-05, "loss": 92.174, "step": 45490 }, { "epoch": 0.18382575742272247, "grad_norm": 764.0255737304688, "learning_rate": 3.918627949080115e-05, "loss": 100.8685, "step": 45500 }, { "epoch": 0.18386615868809011, "grad_norm": 836.418212890625, "learning_rate": 3.918549085769464e-05, "loss": 75.9819, "step": 45510 }, { "epoch": 0.18390655995345775, "grad_norm": 670.6646728515625, "learning_rate": 3.918470185055709e-05, "loss": 63.5554, "step": 45520 }, { "epoch": 0.18394696121882537, "grad_norm": 1930.96435546875, "learning_rate": 3.91839124694039e-05, "loss": 109.9029, "step": 45530 }, { "epoch": 0.183987362484193, "grad_norm": 1163.8868408203125, "learning_rate": 3.918312271425047e-05, "loss": 85.8055, "step": 45540 }, { "epoch": 0.18402776374956065, "grad_norm": 786.4589233398438, "learning_rate": 3.9182332585112186e-05, "loss": 82.8965, "step": 45550 }, { "epoch": 0.18406816501492826, "grad_norm": 845.56298828125, "learning_rate": 3.918154208200445e-05, "loss": 123.4536, "step": 45560 }, { "epoch": 0.1841085662802959, "grad_norm": 677.166259765625, "learning_rate": 3.918075120494267e-05, "loss": 87.6609, "step": 45570 }, { "epoch": 0.18414896754566354, "grad_norm": 1088.3443603515625, "learning_rate": 3.9179959953942283e-05, "loss": 119.1445, "step": 45580 }, { "epoch": 0.18418936881103115, "grad_norm": 1323.2698974609375, "learning_rate": 3.917916832901869e-05, "loss": 88.3157, "step": 45590 }, { "epoch": 0.1842297700763988, "grad_norm": 1136.53759765625, "learning_rate": 3.917837633018734e-05, "loss": 93.4334, "step": 45600 }, { "epoch": 0.18427017134176643, "grad_norm": 808.8739624023438, "learning_rate": 3.917758395746368e-05, "loss": 65.8987, "step": 45610 }, { "epoch": 0.18431057260713404, "grad_norm": 850.0430908203125, "learning_rate": 3.9176791210863136e-05, "loss": 154.3409, "step": 45620 }, { "epoch": 0.18435097387250168, "grad_norm": 1013.5833740234375, "learning_rate": 3.917599809040117e-05, "loss": 119.8127, "step": 45630 }, { "epoch": 0.18439137513786932, "grad_norm": 1381.6162109375, "learning_rate": 3.917520459609326e-05, "loss": 86.2685, "step": 45640 }, { "epoch": 0.18443177640323694, "grad_norm": 307.11041259765625, "learning_rate": 3.9174410727954856e-05, "loss": 66.6993, "step": 45650 }, { "epoch": 0.18447217766860458, "grad_norm": 1673.486083984375, "learning_rate": 3.917361648600145e-05, "loss": 88.8105, "step": 45660 }, { "epoch": 0.18451257893397222, "grad_norm": 1405.7908935546875, "learning_rate": 3.9172821870248524e-05, "loss": 88.505, "step": 45670 }, { "epoch": 0.18455298019933986, "grad_norm": 954.478271484375, "learning_rate": 3.917202688071155e-05, "loss": 58.216, "step": 45680 }, { "epoch": 0.18459338146470747, "grad_norm": 1570.70703125, "learning_rate": 3.917123151740605e-05, "loss": 102.5098, "step": 45690 }, { "epoch": 0.1846337827300751, "grad_norm": 806.4921264648438, "learning_rate": 3.917043578034752e-05, "loss": 60.7017, "step": 45700 }, { "epoch": 0.18467418399544275, "grad_norm": 2194.05908203125, "learning_rate": 3.9169639669551476e-05, "loss": 67.0707, "step": 45710 }, { "epoch": 0.18471458526081036, "grad_norm": 1100.316162109375, "learning_rate": 3.9168843185033434e-05, "loss": 59.0874, "step": 45720 }, { "epoch": 0.184754986526178, "grad_norm": 1363.7559814453125, "learning_rate": 3.9168046326808934e-05, "loss": 65.7012, "step": 45730 }, { "epoch": 0.18479538779154564, "grad_norm": 704.1640014648438, "learning_rate": 3.9167249094893495e-05, "loss": 60.0356, "step": 45740 }, { "epoch": 0.18483578905691325, "grad_norm": 275.0506591796875, "learning_rate": 3.916645148930267e-05, "loss": 92.7959, "step": 45750 }, { "epoch": 0.1848761903222809, "grad_norm": 422.1753845214844, "learning_rate": 3.9165653510052004e-05, "loss": 83.3863, "step": 45760 }, { "epoch": 0.18491659158764853, "grad_norm": 612.3901977539062, "learning_rate": 3.9164855157157055e-05, "loss": 64.9848, "step": 45770 }, { "epoch": 0.18495699285301614, "grad_norm": 1202.4676513671875, "learning_rate": 3.916405643063339e-05, "loss": 80.2309, "step": 45780 }, { "epoch": 0.18499739411838378, "grad_norm": 452.2706298828125, "learning_rate": 3.9163257330496576e-05, "loss": 53.2638, "step": 45790 }, { "epoch": 0.18503779538375142, "grad_norm": 466.4120178222656, "learning_rate": 3.91624578567622e-05, "loss": 72.9452, "step": 45800 }, { "epoch": 0.18507819664911904, "grad_norm": 1306.1180419921875, "learning_rate": 3.9161658009445834e-05, "loss": 110.0645, "step": 45810 }, { "epoch": 0.18511859791448668, "grad_norm": 980.823974609375, "learning_rate": 3.9160857788563085e-05, "loss": 105.6, "step": 45820 }, { "epoch": 0.18515899917985432, "grad_norm": 313.7354431152344, "learning_rate": 3.9160057194129544e-05, "loss": 57.1493, "step": 45830 }, { "epoch": 0.18519940044522196, "grad_norm": 578.3187866210938, "learning_rate": 3.915925622616083e-05, "loss": 78.0657, "step": 45840 }, { "epoch": 0.18523980171058957, "grad_norm": 960.7958984375, "learning_rate": 3.915845488467255e-05, "loss": 80.249, "step": 45850 }, { "epoch": 0.1852802029759572, "grad_norm": 739.0011596679688, "learning_rate": 3.915765316968033e-05, "loss": 96.2227, "step": 45860 }, { "epoch": 0.18532060424132485, "grad_norm": 924.9442749023438, "learning_rate": 3.91568510811998e-05, "loss": 47.8373, "step": 45870 }, { "epoch": 0.18536100550669246, "grad_norm": 935.7515258789062, "learning_rate": 3.915604861924659e-05, "loss": 65.6091, "step": 45880 }, { "epoch": 0.1854014067720601, "grad_norm": 1007.1822509765625, "learning_rate": 3.915524578383635e-05, "loss": 62.2943, "step": 45890 }, { "epoch": 0.18544180803742774, "grad_norm": 569.2777099609375, "learning_rate": 3.9154442574984735e-05, "loss": 72.1652, "step": 45900 }, { "epoch": 0.18548220930279535, "grad_norm": 564.3727416992188, "learning_rate": 3.91536389927074e-05, "loss": 83.1148, "step": 45910 }, { "epoch": 0.185522610568163, "grad_norm": 1044.73779296875, "learning_rate": 3.9152835037020013e-05, "loss": 79.5553, "step": 45920 }, { "epoch": 0.18556301183353063, "grad_norm": 1104.0501708984375, "learning_rate": 3.915203070793825e-05, "loss": 75.241, "step": 45930 }, { "epoch": 0.18560341309889825, "grad_norm": 1001.4999389648438, "learning_rate": 3.9151226005477786e-05, "loss": 100.4497, "step": 45940 }, { "epoch": 0.18564381436426589, "grad_norm": 1332.5491943359375, "learning_rate": 3.91504209296543e-05, "loss": 103.7096, "step": 45950 }, { "epoch": 0.18568421562963353, "grad_norm": 1234.172119140625, "learning_rate": 3.914961548048351e-05, "loss": 74.3093, "step": 45960 }, { "epoch": 0.18572461689500114, "grad_norm": 1303.2974853515625, "learning_rate": 3.91488096579811e-05, "loss": 74.6654, "step": 45970 }, { "epoch": 0.18576501816036878, "grad_norm": 2073.360107421875, "learning_rate": 3.914800346216279e-05, "loss": 68.3085, "step": 45980 }, { "epoch": 0.18580541942573642, "grad_norm": 485.2785339355469, "learning_rate": 3.9147196893044305e-05, "loss": 68.9047, "step": 45990 }, { "epoch": 0.18584582069110406, "grad_norm": 1112.5963134765625, "learning_rate": 3.9146389950641345e-05, "loss": 69.142, "step": 46000 }, { "epoch": 0.18588622195647167, "grad_norm": 1303.632080078125, "learning_rate": 3.914558263496966e-05, "loss": 65.932, "step": 46010 }, { "epoch": 0.1859266232218393, "grad_norm": 855.09423828125, "learning_rate": 3.914477494604499e-05, "loss": 48.3135, "step": 46020 }, { "epoch": 0.18596702448720695, "grad_norm": 1216.570556640625, "learning_rate": 3.914396688388308e-05, "loss": 48.3435, "step": 46030 }, { "epoch": 0.18600742575257456, "grad_norm": 711.9722900390625, "learning_rate": 3.914315844849966e-05, "loss": 86.1562, "step": 46040 }, { "epoch": 0.1860478270179422, "grad_norm": 703.8426513671875, "learning_rate": 3.914234963991053e-05, "loss": 102.8623, "step": 46050 }, { "epoch": 0.18608822828330984, "grad_norm": 1274.6202392578125, "learning_rate": 3.914154045813143e-05, "loss": 79.2652, "step": 46060 }, { "epoch": 0.18612862954867745, "grad_norm": 709.7684936523438, "learning_rate": 3.914073090317814e-05, "loss": 117.7384, "step": 46070 }, { "epoch": 0.1861690308140451, "grad_norm": 422.33416748046875, "learning_rate": 3.913992097506645e-05, "loss": 82.4315, "step": 46080 }, { "epoch": 0.18620943207941273, "grad_norm": 0.0, "learning_rate": 3.9139110673812155e-05, "loss": 64.9715, "step": 46090 }, { "epoch": 0.18624983334478035, "grad_norm": 1187.5250244140625, "learning_rate": 3.913829999943103e-05, "loss": 66.9666, "step": 46100 }, { "epoch": 0.186290234610148, "grad_norm": 1188.3201904296875, "learning_rate": 3.91374889519389e-05, "loss": 121.7604, "step": 46110 }, { "epoch": 0.18633063587551563, "grad_norm": 1243.7998046875, "learning_rate": 3.913667753135157e-05, "loss": 106.2775, "step": 46120 }, { "epoch": 0.18637103714088324, "grad_norm": 1100.300048828125, "learning_rate": 3.913586573768486e-05, "loss": 67.3994, "step": 46130 }, { "epoch": 0.18641143840625088, "grad_norm": 580.4454345703125, "learning_rate": 3.91350535709546e-05, "loss": 79.131, "step": 46140 }, { "epoch": 0.18645183967161852, "grad_norm": 1536.91650390625, "learning_rate": 3.9134241031176614e-05, "loss": 74.0721, "step": 46150 }, { "epoch": 0.18649224093698616, "grad_norm": 1414.6435546875, "learning_rate": 3.9133428118366746e-05, "loss": 91.2891, "step": 46160 }, { "epoch": 0.18653264220235377, "grad_norm": 4572.24169921875, "learning_rate": 3.9132614832540854e-05, "loss": 122.7679, "step": 46170 }, { "epoch": 0.1865730434677214, "grad_norm": 670.1581420898438, "learning_rate": 3.913180117371478e-05, "loss": 69.168, "step": 46180 }, { "epoch": 0.18661344473308905, "grad_norm": 1101.5228271484375, "learning_rate": 3.913098714190439e-05, "loss": 86.6942, "step": 46190 }, { "epoch": 0.18665384599845666, "grad_norm": 890.9598999023438, "learning_rate": 3.913017273712556e-05, "loss": 94.4553, "step": 46200 }, { "epoch": 0.1866942472638243, "grad_norm": 1160.6763916015625, "learning_rate": 3.9129357959394166e-05, "loss": 175.9972, "step": 46210 }, { "epoch": 0.18673464852919194, "grad_norm": 561.4642333984375, "learning_rate": 3.9128542808726084e-05, "loss": 61.6344, "step": 46220 }, { "epoch": 0.18677504979455956, "grad_norm": 2253.88818359375, "learning_rate": 3.912772728513722e-05, "loss": 80.0333, "step": 46230 }, { "epoch": 0.1868154510599272, "grad_norm": 0.0, "learning_rate": 3.912691138864346e-05, "loss": 88.1769, "step": 46240 }, { "epoch": 0.18685585232529484, "grad_norm": 0.0, "learning_rate": 3.912609511926071e-05, "loss": 75.8361, "step": 46250 }, { "epoch": 0.18689625359066245, "grad_norm": 1239.348876953125, "learning_rate": 3.912527847700489e-05, "loss": 103.7301, "step": 46260 }, { "epoch": 0.1869366548560301, "grad_norm": 754.636474609375, "learning_rate": 3.912446146189193e-05, "loss": 81.214, "step": 46270 }, { "epoch": 0.18697705612139773, "grad_norm": 602.5750732421875, "learning_rate": 3.912364407393774e-05, "loss": 68.1354, "step": 46280 }, { "epoch": 0.18701745738676534, "grad_norm": 1123.705322265625, "learning_rate": 3.912282631315827e-05, "loss": 87.4514, "step": 46290 }, { "epoch": 0.18705785865213298, "grad_norm": 913.4268188476562, "learning_rate": 3.912200817956945e-05, "loss": 77.5673, "step": 46300 }, { "epoch": 0.18709825991750062, "grad_norm": 1332.4677734375, "learning_rate": 3.912118967318724e-05, "loss": 96.7064, "step": 46310 }, { "epoch": 0.18713866118286826, "grad_norm": 861.162841796875, "learning_rate": 3.912037079402759e-05, "loss": 65.3486, "step": 46320 }, { "epoch": 0.18717906244823587, "grad_norm": 568.9047241210938, "learning_rate": 3.911955154210646e-05, "loss": 47.261, "step": 46330 }, { "epoch": 0.1872194637136035, "grad_norm": 523.025634765625, "learning_rate": 3.9118731917439844e-05, "loss": 52.6651, "step": 46340 }, { "epoch": 0.18725986497897115, "grad_norm": 1224.9736328125, "learning_rate": 3.9117911920043695e-05, "loss": 92.0736, "step": 46350 }, { "epoch": 0.18730026624433876, "grad_norm": 824.1813354492188, "learning_rate": 3.9117091549934014e-05, "loss": 75.8925, "step": 46360 }, { "epoch": 0.1873406675097064, "grad_norm": 2099.2509765625, "learning_rate": 3.9116270807126794e-05, "loss": 88.4477, "step": 46370 }, { "epoch": 0.18738106877507404, "grad_norm": 724.2691650390625, "learning_rate": 3.9115449691638027e-05, "loss": 94.5531, "step": 46380 }, { "epoch": 0.18742147004044166, "grad_norm": 922.3722534179688, "learning_rate": 3.911462820348374e-05, "loss": 75.4537, "step": 46390 }, { "epoch": 0.1874618713058093, "grad_norm": 829.753173828125, "learning_rate": 3.9113806342679926e-05, "loss": 73.5001, "step": 46400 }, { "epoch": 0.18750227257117694, "grad_norm": 1990.830078125, "learning_rate": 3.911298410924262e-05, "loss": 136.7985, "step": 46410 }, { "epoch": 0.18754267383654455, "grad_norm": 400.29083251953125, "learning_rate": 3.911216150318784e-05, "loss": 77.2953, "step": 46420 }, { "epoch": 0.1875830751019122, "grad_norm": 1748.0518798828125, "learning_rate": 3.911133852453164e-05, "loss": 74.3119, "step": 46430 }, { "epoch": 0.18762347636727983, "grad_norm": 828.0217895507812, "learning_rate": 3.911051517329005e-05, "loss": 65.2073, "step": 46440 }, { "epoch": 0.18766387763264744, "grad_norm": 853.9519653320312, "learning_rate": 3.910969144947913e-05, "loss": 94.6754, "step": 46450 }, { "epoch": 0.18770427889801508, "grad_norm": 508.40576171875, "learning_rate": 3.910886735311494e-05, "loss": 50.4667, "step": 46460 }, { "epoch": 0.18774468016338272, "grad_norm": 441.7326965332031, "learning_rate": 3.910804288421355e-05, "loss": 108.5341, "step": 46470 }, { "epoch": 0.18778508142875036, "grad_norm": 795.6704711914062, "learning_rate": 3.910721804279101e-05, "loss": 103.7186, "step": 46480 }, { "epoch": 0.18782548269411797, "grad_norm": 386.6521301269531, "learning_rate": 3.910639282886343e-05, "loss": 82.726, "step": 46490 }, { "epoch": 0.1878658839594856, "grad_norm": 823.4021606445312, "learning_rate": 3.9105567242446874e-05, "loss": 56.3026, "step": 46500 }, { "epoch": 0.18790628522485325, "grad_norm": 1041.70361328125, "learning_rate": 3.9104741283557456e-05, "loss": 49.9035, "step": 46510 }, { "epoch": 0.18794668649022087, "grad_norm": 1130.276611328125, "learning_rate": 3.910391495221127e-05, "loss": 56.8048, "step": 46520 }, { "epoch": 0.1879870877555885, "grad_norm": 1045.17822265625, "learning_rate": 3.9103088248424425e-05, "loss": 80.3857, "step": 46530 }, { "epoch": 0.18802748902095615, "grad_norm": 702.6853637695312, "learning_rate": 3.910226117221305e-05, "loss": 64.5383, "step": 46540 }, { "epoch": 0.18806789028632376, "grad_norm": 615.02197265625, "learning_rate": 3.9101433723593245e-05, "loss": 66.2929, "step": 46550 }, { "epoch": 0.1881082915516914, "grad_norm": 585.1364135742188, "learning_rate": 3.910060590258116e-05, "loss": 60.8131, "step": 46560 }, { "epoch": 0.18814869281705904, "grad_norm": 1662.8502197265625, "learning_rate": 3.909977770919293e-05, "loss": 59.843, "step": 46570 }, { "epoch": 0.18818909408242665, "grad_norm": 641.4120483398438, "learning_rate": 3.9098949143444704e-05, "loss": 146.1808, "step": 46580 }, { "epoch": 0.1882294953477943, "grad_norm": 924.935791015625, "learning_rate": 3.909812020535262e-05, "loss": 68.6732, "step": 46590 }, { "epoch": 0.18826989661316193, "grad_norm": 573.886962890625, "learning_rate": 3.9097290894932866e-05, "loss": 97.9553, "step": 46600 }, { "epoch": 0.18831029787852954, "grad_norm": 476.7160339355469, "learning_rate": 3.909646121220158e-05, "loss": 103.4015, "step": 46610 }, { "epoch": 0.18835069914389718, "grad_norm": 693.3080444335938, "learning_rate": 3.9095631157174956e-05, "loss": 58.9891, "step": 46620 }, { "epoch": 0.18839110040926482, "grad_norm": 1162.9171142578125, "learning_rate": 3.909480072986918e-05, "loss": 71.2949, "step": 46630 }, { "epoch": 0.18843150167463246, "grad_norm": 395.7523193359375, "learning_rate": 3.9093969930300424e-05, "loss": 102.4105, "step": 46640 }, { "epoch": 0.18847190294000007, "grad_norm": 1078.971435546875, "learning_rate": 3.909313875848489e-05, "loss": 85.173, "step": 46650 }, { "epoch": 0.18851230420536771, "grad_norm": 75.84223175048828, "learning_rate": 3.9092307214438795e-05, "loss": 72.504, "step": 46660 }, { "epoch": 0.18855270547073535, "grad_norm": 657.8626708984375, "learning_rate": 3.909147529817834e-05, "loss": 56.7639, "step": 46670 }, { "epoch": 0.18859310673610297, "grad_norm": 825.9349365234375, "learning_rate": 3.909064300971975e-05, "loss": 93.713, "step": 46680 }, { "epoch": 0.1886335080014706, "grad_norm": 847.5474243164062, "learning_rate": 3.908981034907923e-05, "loss": 61.4545, "step": 46690 }, { "epoch": 0.18867390926683825, "grad_norm": 549.8899536132812, "learning_rate": 3.9088977316273044e-05, "loss": 75.5487, "step": 46700 }, { "epoch": 0.18871431053220586, "grad_norm": 408.15753173828125, "learning_rate": 3.908814391131741e-05, "loss": 132.707, "step": 46710 }, { "epoch": 0.1887547117975735, "grad_norm": 572.0899658203125, "learning_rate": 3.908731013422859e-05, "loss": 107.9505, "step": 46720 }, { "epoch": 0.18879511306294114, "grad_norm": 1054.216064453125, "learning_rate": 3.908647598502282e-05, "loss": 78.4994, "step": 46730 }, { "epoch": 0.18883551432830875, "grad_norm": 869.3137817382812, "learning_rate": 3.908564146371639e-05, "loss": 64.2145, "step": 46740 }, { "epoch": 0.1888759155936764, "grad_norm": 4045.420654296875, "learning_rate": 3.908480657032554e-05, "loss": 80.4099, "step": 46750 }, { "epoch": 0.18891631685904403, "grad_norm": 1174.1817626953125, "learning_rate": 3.9083971304866566e-05, "loss": 63.7517, "step": 46760 }, { "epoch": 0.18895671812441164, "grad_norm": 1713.68896484375, "learning_rate": 3.908313566735575e-05, "loss": 104.0484, "step": 46770 }, { "epoch": 0.18899711938977928, "grad_norm": 3929.40087890625, "learning_rate": 3.908229965780937e-05, "loss": 114.1248, "step": 46780 }, { "epoch": 0.18903752065514692, "grad_norm": 1123.20263671875, "learning_rate": 3.908146327624374e-05, "loss": 97.4944, "step": 46790 }, { "epoch": 0.18907792192051456, "grad_norm": 893.2198486328125, "learning_rate": 3.908062652267516e-05, "loss": 62.3123, "step": 46800 }, { "epoch": 0.18911832318588218, "grad_norm": 1673.565185546875, "learning_rate": 3.907978939711995e-05, "loss": 70.2366, "step": 46810 }, { "epoch": 0.18915872445124982, "grad_norm": 944.0204467773438, "learning_rate": 3.907895189959441e-05, "loss": 65.0089, "step": 46820 }, { "epoch": 0.18919912571661746, "grad_norm": 491.1232604980469, "learning_rate": 3.907811403011488e-05, "loss": 71.3438, "step": 46830 }, { "epoch": 0.18923952698198507, "grad_norm": 865.6963500976562, "learning_rate": 3.9077275788697704e-05, "loss": 95.7543, "step": 46840 }, { "epoch": 0.1892799282473527, "grad_norm": 251.933349609375, "learning_rate": 3.907643717535921e-05, "loss": 68.4161, "step": 46850 }, { "epoch": 0.18932032951272035, "grad_norm": 1765.4664306640625, "learning_rate": 3.9075598190115756e-05, "loss": 68.8602, "step": 46860 }, { "epoch": 0.18936073077808796, "grad_norm": 1550.0150146484375, "learning_rate": 3.9074758832983685e-05, "loss": 93.8396, "step": 46870 }, { "epoch": 0.1894011320434556, "grad_norm": 1457.9483642578125, "learning_rate": 3.9073919103979383e-05, "loss": 79.1825, "step": 46880 }, { "epoch": 0.18944153330882324, "grad_norm": 624.1560668945312, "learning_rate": 3.90730790031192e-05, "loss": 91.3734, "step": 46890 }, { "epoch": 0.18948193457419085, "grad_norm": 1067.4862060546875, "learning_rate": 3.9072238530419525e-05, "loss": 103.1383, "step": 46900 }, { "epoch": 0.1895223358395585, "grad_norm": 426.91864013671875, "learning_rate": 3.907139768589674e-05, "loss": 75.1453, "step": 46910 }, { "epoch": 0.18956273710492613, "grad_norm": 1307.3526611328125, "learning_rate": 3.907055646956724e-05, "loss": 80.5333, "step": 46920 }, { "epoch": 0.18960313837029374, "grad_norm": 818.2450561523438, "learning_rate": 3.9069714881447416e-05, "loss": 66.8826, "step": 46930 }, { "epoch": 0.18964353963566138, "grad_norm": 452.9537353515625, "learning_rate": 3.906887292155369e-05, "loss": 92.7734, "step": 46940 }, { "epoch": 0.18968394090102902, "grad_norm": 1140.84765625, "learning_rate": 3.906803058990247e-05, "loss": 84.7196, "step": 46950 }, { "epoch": 0.18972434216639666, "grad_norm": 1345.89453125, "learning_rate": 3.9067187886510173e-05, "loss": 77.4215, "step": 46960 }, { "epoch": 0.18976474343176428, "grad_norm": 636.783203125, "learning_rate": 3.9066344811393226e-05, "loss": 66.3102, "step": 46970 }, { "epoch": 0.18980514469713192, "grad_norm": 902.4142456054688, "learning_rate": 3.906550136456808e-05, "loss": 69.7095, "step": 46980 }, { "epoch": 0.18984554596249956, "grad_norm": 1144.4429931640625, "learning_rate": 3.9064657546051175e-05, "loss": 71.0341, "step": 46990 }, { "epoch": 0.18988594722786717, "grad_norm": 1247.4844970703125, "learning_rate": 3.9063813355858944e-05, "loss": 89.9072, "step": 47000 }, { "epoch": 0.1899263484932348, "grad_norm": 1052.9495849609375, "learning_rate": 3.906296879400786e-05, "loss": 97.3577, "step": 47010 }, { "epoch": 0.18996674975860245, "grad_norm": 1629.934814453125, "learning_rate": 3.906212386051439e-05, "loss": 81.06, "step": 47020 }, { "epoch": 0.19000715102397006, "grad_norm": 238.38658142089844, "learning_rate": 3.9061278555394995e-05, "loss": 60.9792, "step": 47030 }, { "epoch": 0.1900475522893377, "grad_norm": 767.5965576171875, "learning_rate": 3.906043287866617e-05, "loss": 92.079, "step": 47040 }, { "epoch": 0.19008795355470534, "grad_norm": 880.5455322265625, "learning_rate": 3.905958683034438e-05, "loss": 71.8466, "step": 47050 }, { "epoch": 0.19012835482007295, "grad_norm": 425.3398132324219, "learning_rate": 3.905874041044614e-05, "loss": 77.9913, "step": 47060 }, { "epoch": 0.1901687560854406, "grad_norm": 2035.0830078125, "learning_rate": 3.9057893618987946e-05, "loss": 72.4353, "step": 47070 }, { "epoch": 0.19020915735080823, "grad_norm": 568.90478515625, "learning_rate": 3.90570464559863e-05, "loss": 72.8722, "step": 47080 }, { "epoch": 0.19024955861617585, "grad_norm": 818.4541625976562, "learning_rate": 3.905619892145772e-05, "loss": 75.254, "step": 47090 }, { "epoch": 0.19028995988154349, "grad_norm": 646.7007446289062, "learning_rate": 3.9055351015418735e-05, "loss": 84.8752, "step": 47100 }, { "epoch": 0.19033036114691113, "grad_norm": 1430.896240234375, "learning_rate": 3.905450273788587e-05, "loss": 65.986, "step": 47110 }, { "epoch": 0.19037076241227877, "grad_norm": 610.7198486328125, "learning_rate": 3.9053654088875667e-05, "loss": 77.2479, "step": 47120 }, { "epoch": 0.19041116367764638, "grad_norm": 946.6858520507812, "learning_rate": 3.905280506840466e-05, "loss": 137.4436, "step": 47130 }, { "epoch": 0.19045156494301402, "grad_norm": 927.581787109375, "learning_rate": 3.905195567648942e-05, "loss": 71.1932, "step": 47140 }, { "epoch": 0.19049196620838166, "grad_norm": 871.3401489257812, "learning_rate": 3.905110591314649e-05, "loss": 89.0064, "step": 47150 }, { "epoch": 0.19053236747374927, "grad_norm": 585.0065307617188, "learning_rate": 3.905025577839244e-05, "loss": 101.181, "step": 47160 }, { "epoch": 0.1905727687391169, "grad_norm": 587.296142578125, "learning_rate": 3.904940527224385e-05, "loss": 53.9529, "step": 47170 }, { "epoch": 0.19061317000448455, "grad_norm": 580.9244995117188, "learning_rate": 3.90485543947173e-05, "loss": 92.6486, "step": 47180 }, { "epoch": 0.19065357126985216, "grad_norm": 904.3273315429688, "learning_rate": 3.904770314582937e-05, "loss": 78.1508, "step": 47190 }, { "epoch": 0.1906939725352198, "grad_norm": 950.0264892578125, "learning_rate": 3.9046851525596656e-05, "loss": 64.4661, "step": 47200 }, { "epoch": 0.19073437380058744, "grad_norm": 1081.5543212890625, "learning_rate": 3.904599953403577e-05, "loss": 120.9151, "step": 47210 }, { "epoch": 0.19077477506595505, "grad_norm": 700.6988525390625, "learning_rate": 3.904514717116332e-05, "loss": 85.0513, "step": 47220 }, { "epoch": 0.1908151763313227, "grad_norm": 793.6326293945312, "learning_rate": 3.9044294436995924e-05, "loss": 103.1733, "step": 47230 }, { "epoch": 0.19085557759669033, "grad_norm": 952.3103637695312, "learning_rate": 3.9043441331550195e-05, "loss": 106.587, "step": 47240 }, { "epoch": 0.19089597886205795, "grad_norm": 1237.8389892578125, "learning_rate": 3.9042587854842776e-05, "loss": 71.8429, "step": 47250 }, { "epoch": 0.1909363801274256, "grad_norm": 876.2874755859375, "learning_rate": 3.9041734006890304e-05, "loss": 52.6092, "step": 47260 }, { "epoch": 0.19097678139279323, "grad_norm": 847.1505126953125, "learning_rate": 3.9040879787709426e-05, "loss": 106.2469, "step": 47270 }, { "epoch": 0.19101718265816087, "grad_norm": 514.6716918945312, "learning_rate": 3.904002519731679e-05, "loss": 71.6665, "step": 47280 }, { "epoch": 0.19105758392352848, "grad_norm": 1077.2454833984375, "learning_rate": 3.903917023572907e-05, "loss": 92.8363, "step": 47290 }, { "epoch": 0.19109798518889612, "grad_norm": 825.1358032226562, "learning_rate": 3.9038314902962906e-05, "loss": 69.5302, "step": 47300 }, { "epoch": 0.19113838645426376, "grad_norm": 415.63623046875, "learning_rate": 3.9037459199035004e-05, "loss": 85.427, "step": 47310 }, { "epoch": 0.19117878771963137, "grad_norm": 665.2693481445312, "learning_rate": 3.903660312396203e-05, "loss": 71.0723, "step": 47320 }, { "epoch": 0.191219188984999, "grad_norm": 913.37548828125, "learning_rate": 3.903574667776068e-05, "loss": 60.3247, "step": 47330 }, { "epoch": 0.19125959025036665, "grad_norm": 963.267822265625, "learning_rate": 3.903488986044765e-05, "loss": 81.5899, "step": 47340 }, { "epoch": 0.19129999151573426, "grad_norm": 1512.7403564453125, "learning_rate": 3.9034032672039634e-05, "loss": 85.3047, "step": 47350 }, { "epoch": 0.1913403927811019, "grad_norm": 689.649658203125, "learning_rate": 3.9033175112553354e-05, "loss": 90.7539, "step": 47360 }, { "epoch": 0.19138079404646954, "grad_norm": 303.6385192871094, "learning_rate": 3.9032317182005536e-05, "loss": 73.4, "step": 47370 }, { "epoch": 0.19142119531183716, "grad_norm": 780.4526977539062, "learning_rate": 3.903145888041289e-05, "loss": 80.5795, "step": 47380 }, { "epoch": 0.1914615965772048, "grad_norm": 737.2525634765625, "learning_rate": 3.903060020779215e-05, "loss": 55.1407, "step": 47390 }, { "epoch": 0.19150199784257244, "grad_norm": 616.6204833984375, "learning_rate": 3.902974116416006e-05, "loss": 71.6661, "step": 47400 }, { "epoch": 0.19154239910794005, "grad_norm": 385.3418884277344, "learning_rate": 3.902888174953338e-05, "loss": 66.3275, "step": 47410 }, { "epoch": 0.1915828003733077, "grad_norm": 641.3906860351562, "learning_rate": 3.902802196392885e-05, "loss": 51.0823, "step": 47420 }, { "epoch": 0.19162320163867533, "grad_norm": 1668.456787109375, "learning_rate": 3.902716180736323e-05, "loss": 74.4394, "step": 47430 }, { "epoch": 0.19166360290404297, "grad_norm": 634.2432250976562, "learning_rate": 3.90263012798533e-05, "loss": 73.6734, "step": 47440 }, { "epoch": 0.19170400416941058, "grad_norm": 1996.33056640625, "learning_rate": 3.902544038141583e-05, "loss": 99.414, "step": 47450 }, { "epoch": 0.19174440543477822, "grad_norm": 622.3128662109375, "learning_rate": 3.9024579112067604e-05, "loss": 52.0503, "step": 47460 }, { "epoch": 0.19178480670014586, "grad_norm": 1247.3145751953125, "learning_rate": 3.9023717471825416e-05, "loss": 100.0393, "step": 47470 }, { "epoch": 0.19182520796551347, "grad_norm": 629.420166015625, "learning_rate": 3.902285546070606e-05, "loss": 111.175, "step": 47480 }, { "epoch": 0.1918656092308811, "grad_norm": 1420.0347900390625, "learning_rate": 3.902199307872634e-05, "loss": 104.5429, "step": 47490 }, { "epoch": 0.19190601049624875, "grad_norm": 521.70703125, "learning_rate": 3.9021130325903076e-05, "loss": 61.8919, "step": 47500 }, { "epoch": 0.19194641176161636, "grad_norm": 991.524169921875, "learning_rate": 3.902026720225308e-05, "loss": 117.9224, "step": 47510 }, { "epoch": 0.191986813026984, "grad_norm": 791.923583984375, "learning_rate": 3.901940370779318e-05, "loss": 77.5792, "step": 47520 }, { "epoch": 0.19202721429235164, "grad_norm": 540.6439819335938, "learning_rate": 3.901853984254023e-05, "loss": 92.2672, "step": 47530 }, { "epoch": 0.19206761555771926, "grad_norm": 461.5845947265625, "learning_rate": 3.901767560651104e-05, "loss": 80.2478, "step": 47540 }, { "epoch": 0.1921080168230869, "grad_norm": 662.9263916015625, "learning_rate": 3.901681099972247e-05, "loss": 63.0723, "step": 47550 }, { "epoch": 0.19214841808845454, "grad_norm": 779.2219848632812, "learning_rate": 3.901594602219138e-05, "loss": 82.2809, "step": 47560 }, { "epoch": 0.19218881935382215, "grad_norm": 1538.7470703125, "learning_rate": 3.901508067393464e-05, "loss": 75.5992, "step": 47570 }, { "epoch": 0.1922292206191898, "grad_norm": 1130.8165283203125, "learning_rate": 3.901421495496911e-05, "loss": 84.9875, "step": 47580 }, { "epoch": 0.19226962188455743, "grad_norm": 2149.62744140625, "learning_rate": 3.901334886531167e-05, "loss": 93.2036, "step": 47590 }, { "epoch": 0.19231002314992507, "grad_norm": 395.6761169433594, "learning_rate": 3.90124824049792e-05, "loss": 97.5525, "step": 47600 }, { "epoch": 0.19235042441529268, "grad_norm": 1826.5126953125, "learning_rate": 3.901161557398861e-05, "loss": 118.2228, "step": 47610 }, { "epoch": 0.19239082568066032, "grad_norm": 1124.4610595703125, "learning_rate": 3.901074837235677e-05, "loss": 100.9521, "step": 47620 }, { "epoch": 0.19243122694602796, "grad_norm": 840.7826538085938, "learning_rate": 3.900988080010061e-05, "loss": 86.1119, "step": 47630 }, { "epoch": 0.19247162821139557, "grad_norm": 1439.2591552734375, "learning_rate": 3.900901285723704e-05, "loss": 46.7816, "step": 47640 }, { "epoch": 0.1925120294767632, "grad_norm": 1832.960205078125, "learning_rate": 3.900814454378297e-05, "loss": 96.7867, "step": 47650 }, { "epoch": 0.19255243074213085, "grad_norm": 1689.2100830078125, "learning_rate": 3.900727585975535e-05, "loss": 75.5526, "step": 47660 }, { "epoch": 0.19259283200749847, "grad_norm": 1058.6866455078125, "learning_rate": 3.900640680517109e-05, "loss": 65.278, "step": 47670 }, { "epoch": 0.1926332332728661, "grad_norm": 466.8570556640625, "learning_rate": 3.900553738004715e-05, "loss": 78.7198, "step": 47680 }, { "epoch": 0.19267363453823375, "grad_norm": 451.8070373535156, "learning_rate": 3.9004667584400474e-05, "loss": 76.8182, "step": 47690 }, { "epoch": 0.19271403580360136, "grad_norm": 1532.7535400390625, "learning_rate": 3.900379741824802e-05, "loss": 81.1931, "step": 47700 }, { "epoch": 0.192754437068969, "grad_norm": 942.5155639648438, "learning_rate": 3.900292688160674e-05, "loss": 66.7955, "step": 47710 }, { "epoch": 0.19279483833433664, "grad_norm": 1023.271484375, "learning_rate": 3.9002055974493626e-05, "loss": 64.7008, "step": 47720 }, { "epoch": 0.19283523959970425, "grad_norm": 926.4970092773438, "learning_rate": 3.900118469692565e-05, "loss": 132.3741, "step": 47730 }, { "epoch": 0.1928756408650719, "grad_norm": 573.7406616210938, "learning_rate": 3.90003130489198e-05, "loss": 81.473, "step": 47740 }, { "epoch": 0.19291604213043953, "grad_norm": 1699.2728271484375, "learning_rate": 3.899944103049305e-05, "loss": 84.5563, "step": 47750 }, { "epoch": 0.19295644339580717, "grad_norm": 796.8876342773438, "learning_rate": 3.8998568641662425e-05, "loss": 70.3057, "step": 47760 }, { "epoch": 0.19299684466117478, "grad_norm": 348.61224365234375, "learning_rate": 3.899769588244493e-05, "loss": 89.7806, "step": 47770 }, { "epoch": 0.19303724592654242, "grad_norm": 1488.8883056640625, "learning_rate": 3.899682275285756e-05, "loss": 60.286, "step": 47780 }, { "epoch": 0.19307764719191006, "grad_norm": 616.420654296875, "learning_rate": 3.8995949252917354e-05, "loss": 70.8484, "step": 47790 }, { "epoch": 0.19311804845727767, "grad_norm": 524.6080932617188, "learning_rate": 3.899507538264134e-05, "loss": 59.4811, "step": 47800 }, { "epoch": 0.19315844972264531, "grad_norm": 822.7127075195312, "learning_rate": 3.899420114204655e-05, "loss": 75.0543, "step": 47810 }, { "epoch": 0.19319885098801295, "grad_norm": 2325.68408203125, "learning_rate": 3.899332653115004e-05, "loss": 46.1322, "step": 47820 }, { "epoch": 0.19323925225338057, "grad_norm": 577.6055908203125, "learning_rate": 3.899245154996884e-05, "loss": 53.3111, "step": 47830 }, { "epoch": 0.1932796535187482, "grad_norm": 592.7323608398438, "learning_rate": 3.8991576198520015e-05, "loss": 141.9589, "step": 47840 }, { "epoch": 0.19332005478411585, "grad_norm": 1331.01220703125, "learning_rate": 3.899070047682064e-05, "loss": 91.7902, "step": 47850 }, { "epoch": 0.19336045604948346, "grad_norm": 2341.49755859375, "learning_rate": 3.898982438488778e-05, "loss": 98.7416, "step": 47860 }, { "epoch": 0.1934008573148511, "grad_norm": 689.9047241210938, "learning_rate": 3.898894792273853e-05, "loss": 83.9316, "step": 47870 }, { "epoch": 0.19344125858021874, "grad_norm": 684.3204956054688, "learning_rate": 3.898807109038995e-05, "loss": 67.1209, "step": 47880 }, { "epoch": 0.19348165984558635, "grad_norm": 866.631103515625, "learning_rate": 3.898719388785915e-05, "loss": 76.8427, "step": 47890 }, { "epoch": 0.193522061110954, "grad_norm": 2371.7421875, "learning_rate": 3.898631631516323e-05, "loss": 84.4593, "step": 47900 }, { "epoch": 0.19356246237632163, "grad_norm": 1004.8216552734375, "learning_rate": 3.89854383723193e-05, "loss": 113.4156, "step": 47910 }, { "epoch": 0.19360286364168927, "grad_norm": 895.5128784179688, "learning_rate": 3.898456005934447e-05, "loss": 72.0463, "step": 47920 }, { "epoch": 0.19364326490705688, "grad_norm": 2553.209228515625, "learning_rate": 3.8983681376255876e-05, "loss": 116.5803, "step": 47930 }, { "epoch": 0.19368366617242452, "grad_norm": 514.206298828125, "learning_rate": 3.898280232307064e-05, "loss": 62.542, "step": 47940 }, { "epoch": 0.19372406743779216, "grad_norm": 3847.76806640625, "learning_rate": 3.898192289980589e-05, "loss": 100.5998, "step": 47950 }, { "epoch": 0.19376446870315978, "grad_norm": 710.794677734375, "learning_rate": 3.898104310647879e-05, "loss": 84.6892, "step": 47960 }, { "epoch": 0.19380486996852742, "grad_norm": 244.08517456054688, "learning_rate": 3.8980162943106486e-05, "loss": 66.8276, "step": 47970 }, { "epoch": 0.19384527123389506, "grad_norm": 1008.19677734375, "learning_rate": 3.897928240970612e-05, "loss": 83.5675, "step": 47980 }, { "epoch": 0.19388567249926267, "grad_norm": 1166.093505859375, "learning_rate": 3.8978401506294885e-05, "loss": 105.6205, "step": 47990 }, { "epoch": 0.1939260737646303, "grad_norm": 645.5625, "learning_rate": 3.897752023288993e-05, "loss": 105.2789, "step": 48000 }, { "epoch": 0.19396647502999795, "grad_norm": 679.8927612304688, "learning_rate": 3.8976638589508456e-05, "loss": 51.4738, "step": 48010 }, { "epoch": 0.19400687629536556, "grad_norm": 599.9944458007812, "learning_rate": 3.897575657616764e-05, "loss": 76.4371, "step": 48020 }, { "epoch": 0.1940472775607332, "grad_norm": 4461.6572265625, "learning_rate": 3.897487419288468e-05, "loss": 124.2049, "step": 48030 }, { "epoch": 0.19408767882610084, "grad_norm": 1515.529296875, "learning_rate": 3.8973991439676783e-05, "loss": 87.721, "step": 48040 }, { "epoch": 0.19412808009146845, "grad_norm": 669.8505249023438, "learning_rate": 3.897310831656115e-05, "loss": 60.2474, "step": 48050 }, { "epoch": 0.1941684813568361, "grad_norm": 697.6897583007812, "learning_rate": 3.897222482355501e-05, "loss": 96.5461, "step": 48060 }, { "epoch": 0.19420888262220373, "grad_norm": 517.4088134765625, "learning_rate": 3.897134096067557e-05, "loss": 70.281, "step": 48070 }, { "epoch": 0.19424928388757137, "grad_norm": 645.1104736328125, "learning_rate": 3.8970456727940076e-05, "loss": 89.9621, "step": 48080 }, { "epoch": 0.19428968515293898, "grad_norm": 534.7449340820312, "learning_rate": 3.896957212536576e-05, "loss": 81.962, "step": 48090 }, { "epoch": 0.19433008641830662, "grad_norm": 1291.22119140625, "learning_rate": 3.896868715296987e-05, "loss": 108.5878, "step": 48100 }, { "epoch": 0.19437048768367426, "grad_norm": 1313.7838134765625, "learning_rate": 3.896780181076966e-05, "loss": 57.1638, "step": 48110 }, { "epoch": 0.19441088894904188, "grad_norm": 1689.31640625, "learning_rate": 3.8966916098782377e-05, "loss": 71.2753, "step": 48120 }, { "epoch": 0.19445129021440952, "grad_norm": 628.2205810546875, "learning_rate": 3.896603001702531e-05, "loss": 64.059, "step": 48130 }, { "epoch": 0.19449169147977716, "grad_norm": 1219.47509765625, "learning_rate": 3.896514356551572e-05, "loss": 95.7702, "step": 48140 }, { "epoch": 0.19453209274514477, "grad_norm": 1684.8138427734375, "learning_rate": 3.896425674427089e-05, "loss": 63.9552, "step": 48150 }, { "epoch": 0.1945724940105124, "grad_norm": 926.5546264648438, "learning_rate": 3.896336955330812e-05, "loss": 115.7368, "step": 48160 }, { "epoch": 0.19461289527588005, "grad_norm": 2358.445556640625, "learning_rate": 3.8962481992644694e-05, "loss": 122.3302, "step": 48170 }, { "epoch": 0.19465329654124766, "grad_norm": 1648.923583984375, "learning_rate": 3.896159406229792e-05, "loss": 62.5087, "step": 48180 }, { "epoch": 0.1946936978066153, "grad_norm": 637.2569580078125, "learning_rate": 3.896070576228511e-05, "loss": 77.8548, "step": 48190 }, { "epoch": 0.19473409907198294, "grad_norm": 1143.2896728515625, "learning_rate": 3.895981709262357e-05, "loss": 60.9295, "step": 48200 }, { "epoch": 0.19477450033735055, "grad_norm": 2155.3466796875, "learning_rate": 3.895892805333064e-05, "loss": 89.5178, "step": 48210 }, { "epoch": 0.1948149016027182, "grad_norm": 1476.8765869140625, "learning_rate": 3.895803864442365e-05, "loss": 95.1555, "step": 48220 }, { "epoch": 0.19485530286808583, "grad_norm": 601.9876708984375, "learning_rate": 3.895714886591993e-05, "loss": 86.8787, "step": 48230 }, { "epoch": 0.19489570413345347, "grad_norm": 701.3627319335938, "learning_rate": 3.895625871783684e-05, "loss": 68.1065, "step": 48240 }, { "epoch": 0.19493610539882109, "grad_norm": 1250.448486328125, "learning_rate": 3.895536820019172e-05, "loss": 91.6428, "step": 48250 }, { "epoch": 0.19497650666418873, "grad_norm": 993.2598876953125, "learning_rate": 3.895447731300194e-05, "loss": 61.6161, "step": 48260 }, { "epoch": 0.19501690792955637, "grad_norm": 542.6673583984375, "learning_rate": 3.8953586056284866e-05, "loss": 49.7327, "step": 48270 }, { "epoch": 0.19505730919492398, "grad_norm": 1102.58642578125, "learning_rate": 3.895269443005788e-05, "loss": 70.2154, "step": 48280 }, { "epoch": 0.19509771046029162, "grad_norm": 1155.5128173828125, "learning_rate": 3.895180243433835e-05, "loss": 77.5229, "step": 48290 }, { "epoch": 0.19513811172565926, "grad_norm": 856.4344482421875, "learning_rate": 3.895091006914368e-05, "loss": 79.453, "step": 48300 }, { "epoch": 0.19517851299102687, "grad_norm": 1338.512451171875, "learning_rate": 3.8950017334491256e-05, "loss": 102.0866, "step": 48310 }, { "epoch": 0.1952189142563945, "grad_norm": 943.7992553710938, "learning_rate": 3.894912423039849e-05, "loss": 106.8494, "step": 48320 }, { "epoch": 0.19525931552176215, "grad_norm": 477.94769287109375, "learning_rate": 3.89482307568828e-05, "loss": 74.899, "step": 48330 }, { "epoch": 0.19529971678712976, "grad_norm": 2688.3466796875, "learning_rate": 3.894733691396159e-05, "loss": 68.6513, "step": 48340 }, { "epoch": 0.1953401180524974, "grad_norm": 783.2159423828125, "learning_rate": 3.894644270165229e-05, "loss": 111.6442, "step": 48350 }, { "epoch": 0.19538051931786504, "grad_norm": 667.3715209960938, "learning_rate": 3.894554811997234e-05, "loss": 70.8076, "step": 48360 }, { "epoch": 0.19542092058323265, "grad_norm": 912.3576049804688, "learning_rate": 3.894465316893917e-05, "loss": 83.1459, "step": 48370 }, { "epoch": 0.1954613218486003, "grad_norm": 830.3079833984375, "learning_rate": 3.894375784857023e-05, "loss": 106.7063, "step": 48380 }, { "epoch": 0.19550172311396793, "grad_norm": 1187.99560546875, "learning_rate": 3.894286215888299e-05, "loss": 92.7125, "step": 48390 }, { "epoch": 0.19554212437933557, "grad_norm": 2235.258056640625, "learning_rate": 3.894196609989489e-05, "loss": 77.8964, "step": 48400 }, { "epoch": 0.1955825256447032, "grad_norm": 1120.1141357421875, "learning_rate": 3.894106967162341e-05, "loss": 75.0723, "step": 48410 }, { "epoch": 0.19562292691007083, "grad_norm": 1820.933837890625, "learning_rate": 3.894017287408603e-05, "loss": 74.4507, "step": 48420 }, { "epoch": 0.19566332817543847, "grad_norm": 911.1453247070312, "learning_rate": 3.8939275707300226e-05, "loss": 59.5158, "step": 48430 }, { "epoch": 0.19570372944080608, "grad_norm": 1145.5992431640625, "learning_rate": 3.8938378171283485e-05, "loss": 66.2683, "step": 48440 }, { "epoch": 0.19574413070617372, "grad_norm": 1104.505859375, "learning_rate": 3.893748026605332e-05, "loss": 64.669, "step": 48450 }, { "epoch": 0.19578453197154136, "grad_norm": 1642.31396484375, "learning_rate": 3.893658199162723e-05, "loss": 86.1956, "step": 48460 }, { "epoch": 0.19582493323690897, "grad_norm": 2475.31787109375, "learning_rate": 3.893568334802272e-05, "loss": 104.0921, "step": 48470 }, { "epoch": 0.1958653345022766, "grad_norm": 952.88134765625, "learning_rate": 3.893478433525731e-05, "loss": 87.7983, "step": 48480 }, { "epoch": 0.19590573576764425, "grad_norm": 1892.2713623046875, "learning_rate": 3.8933884953348545e-05, "loss": 88.2686, "step": 48490 }, { "epoch": 0.19594613703301186, "grad_norm": 908.8019409179688, "learning_rate": 3.893298520231393e-05, "loss": 101.3182, "step": 48500 }, { "epoch": 0.1959865382983795, "grad_norm": 1779.0821533203125, "learning_rate": 3.893208508217103e-05, "loss": 91.2895, "step": 48510 }, { "epoch": 0.19602693956374714, "grad_norm": 823.1393432617188, "learning_rate": 3.893118459293738e-05, "loss": 79.2318, "step": 48520 }, { "epoch": 0.19606734082911476, "grad_norm": 1146.0291748046875, "learning_rate": 3.893028373463054e-05, "loss": 60.2765, "step": 48530 }, { "epoch": 0.1961077420944824, "grad_norm": 907.8261108398438, "learning_rate": 3.8929382507268074e-05, "loss": 81.0746, "step": 48540 }, { "epoch": 0.19614814335985004, "grad_norm": 542.8344116210938, "learning_rate": 3.892848091086755e-05, "loss": 71.9454, "step": 48550 }, { "epoch": 0.19618854462521768, "grad_norm": 735.962890625, "learning_rate": 3.8927578945446544e-05, "loss": 81.1646, "step": 48560 }, { "epoch": 0.1962289458905853, "grad_norm": 608.4637451171875, "learning_rate": 3.892667661102265e-05, "loss": 43.2689, "step": 48570 }, { "epoch": 0.19626934715595293, "grad_norm": 494.1917419433594, "learning_rate": 3.892577390761344e-05, "loss": 119.9042, "step": 48580 }, { "epoch": 0.19630974842132057, "grad_norm": 1613.7349853515625, "learning_rate": 3.892487083523653e-05, "loss": 134.0927, "step": 48590 }, { "epoch": 0.19635014968668818, "grad_norm": 570.557373046875, "learning_rate": 3.892396739390952e-05, "loss": 70.8616, "step": 48600 }, { "epoch": 0.19639055095205582, "grad_norm": 1168.809326171875, "learning_rate": 3.892306358365002e-05, "loss": 106.0381, "step": 48610 }, { "epoch": 0.19643095221742346, "grad_norm": 1149.137451171875, "learning_rate": 3.8922159404475653e-05, "loss": 74.9864, "step": 48620 }, { "epoch": 0.19647135348279107, "grad_norm": 830.5051879882812, "learning_rate": 3.8921254856404056e-05, "loss": 91.8877, "step": 48630 }, { "epoch": 0.1965117547481587, "grad_norm": 1049.21142578125, "learning_rate": 3.892034993945284e-05, "loss": 65.3357, "step": 48640 }, { "epoch": 0.19655215601352635, "grad_norm": 885.1357421875, "learning_rate": 3.891944465363968e-05, "loss": 74.8494, "step": 48650 }, { "epoch": 0.19659255727889396, "grad_norm": 1449.2069091796875, "learning_rate": 3.891853899898219e-05, "loss": 106.2978, "step": 48660 }, { "epoch": 0.1966329585442616, "grad_norm": 1177.130615234375, "learning_rate": 3.8917632975498046e-05, "loss": 59.5166, "step": 48670 }, { "epoch": 0.19667335980962924, "grad_norm": 903.5010986328125, "learning_rate": 3.891672658320491e-05, "loss": 47.0828, "step": 48680 }, { "epoch": 0.19671376107499686, "grad_norm": 527.488037109375, "learning_rate": 3.891581982212045e-05, "loss": 44.1152, "step": 48690 }, { "epoch": 0.1967541623403645, "grad_norm": 555.2778930664062, "learning_rate": 3.891491269226234e-05, "loss": 58.6444, "step": 48700 }, { "epoch": 0.19679456360573214, "grad_norm": 1518.626953125, "learning_rate": 3.891400519364827e-05, "loss": 98.2571, "step": 48710 }, { "epoch": 0.19683496487109975, "grad_norm": 1145.939697265625, "learning_rate": 3.891309732629593e-05, "loss": 83.2956, "step": 48720 }, { "epoch": 0.1968753661364674, "grad_norm": 509.7560729980469, "learning_rate": 3.891218909022302e-05, "loss": 57.1039, "step": 48730 }, { "epoch": 0.19691576740183503, "grad_norm": 1032.6080322265625, "learning_rate": 3.891128048544726e-05, "loss": 82.644, "step": 48740 }, { "epoch": 0.19695616866720267, "grad_norm": 1114.884033203125, "learning_rate": 3.891037151198634e-05, "loss": 130.455, "step": 48750 }, { "epoch": 0.19699656993257028, "grad_norm": 848.599365234375, "learning_rate": 3.890946216985799e-05, "loss": 107.0122, "step": 48760 }, { "epoch": 0.19703697119793792, "grad_norm": 635.4305419921875, "learning_rate": 3.890855245907994e-05, "loss": 95.4124, "step": 48770 }, { "epoch": 0.19707737246330556, "grad_norm": 744.4996948242188, "learning_rate": 3.890764237966993e-05, "loss": 63.5169, "step": 48780 }, { "epoch": 0.19711777372867317, "grad_norm": 1917.751708984375, "learning_rate": 3.8906731931645694e-05, "loss": 121.8446, "step": 48790 }, { "epoch": 0.1971581749940408, "grad_norm": 2296.08544921875, "learning_rate": 3.8905821115024994e-05, "loss": 91.3989, "step": 48800 }, { "epoch": 0.19719857625940845, "grad_norm": 571.256591796875, "learning_rate": 3.8904909929825565e-05, "loss": 54.8591, "step": 48810 }, { "epoch": 0.19723897752477607, "grad_norm": 582.6764526367188, "learning_rate": 3.890399837606519e-05, "loss": 88.5333, "step": 48820 }, { "epoch": 0.1972793787901437, "grad_norm": 1239.0958251953125, "learning_rate": 3.8903086453761634e-05, "loss": 96.0011, "step": 48830 }, { "epoch": 0.19731978005551135, "grad_norm": 1710.357421875, "learning_rate": 3.890217416293267e-05, "loss": 91.3379, "step": 48840 }, { "epoch": 0.19736018132087896, "grad_norm": 791.1911010742188, "learning_rate": 3.89012615035961e-05, "loss": 37.7967, "step": 48850 }, { "epoch": 0.1974005825862466, "grad_norm": 636.7476196289062, "learning_rate": 3.89003484757697e-05, "loss": 50.4115, "step": 48860 }, { "epoch": 0.19744098385161424, "grad_norm": 665.1348876953125, "learning_rate": 3.889943507947128e-05, "loss": 62.8492, "step": 48870 }, { "epoch": 0.19748138511698185, "grad_norm": 565.6461181640625, "learning_rate": 3.889852131471863e-05, "loss": 81.9738, "step": 48880 }, { "epoch": 0.1975217863823495, "grad_norm": 439.4663391113281, "learning_rate": 3.8897607181529594e-05, "loss": 56.1482, "step": 48890 }, { "epoch": 0.19756218764771713, "grad_norm": 701.4642333984375, "learning_rate": 3.889669267992197e-05, "loss": 74.9441, "step": 48900 }, { "epoch": 0.19760258891308477, "grad_norm": 519.2825927734375, "learning_rate": 3.889577780991359e-05, "loss": 64.2358, "step": 48910 }, { "epoch": 0.19764299017845238, "grad_norm": 998.2957153320312, "learning_rate": 3.8894862571522297e-05, "loss": 78.8985, "step": 48920 }, { "epoch": 0.19768339144382002, "grad_norm": 772.8052978515625, "learning_rate": 3.889394696476593e-05, "loss": 70.7561, "step": 48930 }, { "epoch": 0.19772379270918766, "grad_norm": 1149.341064453125, "learning_rate": 3.8893030989662336e-05, "loss": 83.4886, "step": 48940 }, { "epoch": 0.19776419397455527, "grad_norm": 473.2572021484375, "learning_rate": 3.889211464622938e-05, "loss": 82.0341, "step": 48950 }, { "epoch": 0.19780459523992291, "grad_norm": 1460.015869140625, "learning_rate": 3.889119793448493e-05, "loss": 88.2184, "step": 48960 }, { "epoch": 0.19784499650529055, "grad_norm": 542.736328125, "learning_rate": 3.889028085444684e-05, "loss": 59.861, "step": 48970 }, { "epoch": 0.19788539777065817, "grad_norm": 1630.9713134765625, "learning_rate": 3.8889363406133e-05, "loss": 92.3278, "step": 48980 }, { "epoch": 0.1979257990360258, "grad_norm": 891.9635620117188, "learning_rate": 3.88884455895613e-05, "loss": 101.8944, "step": 48990 }, { "epoch": 0.19796620030139345, "grad_norm": 1021.063720703125, "learning_rate": 3.888752740474962e-05, "loss": 103.379, "step": 49000 }, { "epoch": 0.19800660156676106, "grad_norm": 1191.407470703125, "learning_rate": 3.888660885171588e-05, "loss": 87.3679, "step": 49010 }, { "epoch": 0.1980470028321287, "grad_norm": 1310.1839599609375, "learning_rate": 3.8885689930477974e-05, "loss": 89.4196, "step": 49020 }, { "epoch": 0.19808740409749634, "grad_norm": 476.1018981933594, "learning_rate": 3.8884770641053815e-05, "loss": 80.5733, "step": 49030 }, { "epoch": 0.19812780536286395, "grad_norm": 905.9243774414062, "learning_rate": 3.888385098346134e-05, "loss": 70.9268, "step": 49040 }, { "epoch": 0.1981682066282316, "grad_norm": 411.78668212890625, "learning_rate": 3.888293095771846e-05, "loss": 87.6161, "step": 49050 }, { "epoch": 0.19820860789359923, "grad_norm": 1124.540771484375, "learning_rate": 3.888201056384312e-05, "loss": 106.0178, "step": 49060 }, { "epoch": 0.19824900915896687, "grad_norm": 481.7277526855469, "learning_rate": 3.888108980185326e-05, "loss": 99.5968, "step": 49070 }, { "epoch": 0.19828941042433448, "grad_norm": 1168.4268798828125, "learning_rate": 3.888016867176684e-05, "loss": 81.1295, "step": 49080 }, { "epoch": 0.19832981168970212, "grad_norm": 179.3350830078125, "learning_rate": 3.88792471736018e-05, "loss": 78.5757, "step": 49090 }, { "epoch": 0.19837021295506976, "grad_norm": 554.9774169921875, "learning_rate": 3.887832530737613e-05, "loss": 72.1782, "step": 49100 }, { "epoch": 0.19841061422043738, "grad_norm": 495.8739318847656, "learning_rate": 3.887740307310779e-05, "loss": 54.6817, "step": 49110 }, { "epoch": 0.19845101548580502, "grad_norm": 920.6334838867188, "learning_rate": 3.887648047081475e-05, "loss": 69.6979, "step": 49120 }, { "epoch": 0.19849141675117266, "grad_norm": 3227.0537109375, "learning_rate": 3.887555750051501e-05, "loss": 100.7972, "step": 49130 }, { "epoch": 0.19853181801654027, "grad_norm": 1286.3974609375, "learning_rate": 3.887463416222655e-05, "loss": 66.7063, "step": 49140 }, { "epoch": 0.1985722192819079, "grad_norm": 890.7499389648438, "learning_rate": 3.887371045596739e-05, "loss": 100.4346, "step": 49150 }, { "epoch": 0.19861262054727555, "grad_norm": 679.1954956054688, "learning_rate": 3.887278638175552e-05, "loss": 95.4674, "step": 49160 }, { "epoch": 0.19865302181264316, "grad_norm": 778.8389892578125, "learning_rate": 3.8871861939608976e-05, "loss": 93.5247, "step": 49170 }, { "epoch": 0.1986934230780108, "grad_norm": 3962.55322265625, "learning_rate": 3.8870937129545756e-05, "loss": 94.9488, "step": 49180 }, { "epoch": 0.19873382434337844, "grad_norm": 627.3102416992188, "learning_rate": 3.88700119515839e-05, "loss": 43.7665, "step": 49190 }, { "epoch": 0.19877422560874605, "grad_norm": 1436.988037109375, "learning_rate": 3.886908640574145e-05, "loss": 110.0446, "step": 49200 }, { "epoch": 0.1988146268741137, "grad_norm": 733.1195068359375, "learning_rate": 3.8868160492036444e-05, "loss": 66.3317, "step": 49210 }, { "epoch": 0.19885502813948133, "grad_norm": 710.966552734375, "learning_rate": 3.886723421048694e-05, "loss": 69.9744, "step": 49220 }, { "epoch": 0.19889542940484897, "grad_norm": 542.9668579101562, "learning_rate": 3.886630756111099e-05, "loss": 112.02, "step": 49230 }, { "epoch": 0.19893583067021658, "grad_norm": 813.4812622070312, "learning_rate": 3.886538054392665e-05, "loss": 149.1125, "step": 49240 }, { "epoch": 0.19897623193558422, "grad_norm": 286.7843933105469, "learning_rate": 3.886445315895202e-05, "loss": 70.4966, "step": 49250 }, { "epoch": 0.19901663320095186, "grad_norm": 605.0004272460938, "learning_rate": 3.886352540620516e-05, "loss": 62.9089, "step": 49260 }, { "epoch": 0.19905703446631948, "grad_norm": 495.3343200683594, "learning_rate": 3.886259728570416e-05, "loss": 68.2199, "step": 49270 }, { "epoch": 0.19909743573168712, "grad_norm": 1497.2340087890625, "learning_rate": 3.8861668797467114e-05, "loss": 87.3006, "step": 49280 }, { "epoch": 0.19913783699705476, "grad_norm": 599.7035522460938, "learning_rate": 3.886073994151212e-05, "loss": 54.236, "step": 49290 }, { "epoch": 0.19917823826242237, "grad_norm": 568.5770874023438, "learning_rate": 3.8859810717857296e-05, "loss": 56.1212, "step": 49300 }, { "epoch": 0.19921863952779, "grad_norm": 638.6181640625, "learning_rate": 3.8858881126520745e-05, "loss": 65.3909, "step": 49310 }, { "epoch": 0.19925904079315765, "grad_norm": 1189.181884765625, "learning_rate": 3.8857951167520605e-05, "loss": 69.2821, "step": 49320 }, { "epoch": 0.19929944205852526, "grad_norm": 1266.6331787109375, "learning_rate": 3.885702084087499e-05, "loss": 82.3138, "step": 49330 }, { "epoch": 0.1993398433238929, "grad_norm": 579.02685546875, "learning_rate": 3.885609014660205e-05, "loss": 99.8087, "step": 49340 }, { "epoch": 0.19938024458926054, "grad_norm": 665.1373901367188, "learning_rate": 3.885515908471993e-05, "loss": 97.9025, "step": 49350 }, { "epoch": 0.19942064585462815, "grad_norm": 461.8253173828125, "learning_rate": 3.885422765524677e-05, "loss": 59.6303, "step": 49360 }, { "epoch": 0.1994610471199958, "grad_norm": 512.667236328125, "learning_rate": 3.885329585820074e-05, "loss": 52.7883, "step": 49370 }, { "epoch": 0.19950144838536343, "grad_norm": 549.1116943359375, "learning_rate": 3.8852363693599996e-05, "loss": 71.6235, "step": 49380 }, { "epoch": 0.19954184965073107, "grad_norm": 2068.1806640625, "learning_rate": 3.885143116146272e-05, "loss": 162.5312, "step": 49390 }, { "epoch": 0.19958225091609869, "grad_norm": 1925.5687255859375, "learning_rate": 3.885049826180709e-05, "loss": 86.9825, "step": 49400 }, { "epoch": 0.19962265218146633, "grad_norm": 804.568359375, "learning_rate": 3.8849564994651284e-05, "loss": 51.5701, "step": 49410 }, { "epoch": 0.19966305344683397, "grad_norm": 709.6029052734375, "learning_rate": 3.884863136001351e-05, "loss": 57.2898, "step": 49420 }, { "epoch": 0.19970345471220158, "grad_norm": 1091.1815185546875, "learning_rate": 3.884769735791196e-05, "loss": 138.4531, "step": 49430 }, { "epoch": 0.19974385597756922, "grad_norm": 366.7632141113281, "learning_rate": 3.8846762988364845e-05, "loss": 71.2259, "step": 49440 }, { "epoch": 0.19978425724293686, "grad_norm": 247.80860900878906, "learning_rate": 3.884582825139038e-05, "loss": 67.1933, "step": 49450 }, { "epoch": 0.19982465850830447, "grad_norm": 1035.06103515625, "learning_rate": 3.88448931470068e-05, "loss": 74.0027, "step": 49460 }, { "epoch": 0.1998650597736721, "grad_norm": 1263.3199462890625, "learning_rate": 3.884395767523232e-05, "loss": 49.4541, "step": 49470 }, { "epoch": 0.19990546103903975, "grad_norm": 681.6302490234375, "learning_rate": 3.884302183608519e-05, "loss": 63.8364, "step": 49480 }, { "epoch": 0.19994586230440736, "grad_norm": 604.8681640625, "learning_rate": 3.884208562958364e-05, "loss": 61.3758, "step": 49490 }, { "epoch": 0.199986263569775, "grad_norm": 838.427490234375, "learning_rate": 3.884114905574594e-05, "loss": 63.9567, "step": 49500 }, { "epoch": 0.20002666483514264, "grad_norm": 2336.60693359375, "learning_rate": 3.884021211459033e-05, "loss": 102.7997, "step": 49510 }, { "epoch": 0.20006706610051025, "grad_norm": 590.4444580078125, "learning_rate": 3.8839274806135095e-05, "loss": 90.1219, "step": 49520 }, { "epoch": 0.2001074673658779, "grad_norm": 924.6959228515625, "learning_rate": 3.883833713039849e-05, "loss": 50.8415, "step": 49530 }, { "epoch": 0.20014786863124553, "grad_norm": 677.3268432617188, "learning_rate": 3.8837399087398803e-05, "loss": 75.5059, "step": 49540 }, { "epoch": 0.20018826989661317, "grad_norm": 402.6632385253906, "learning_rate": 3.883646067715433e-05, "loss": 76.6058, "step": 49550 }, { "epoch": 0.2002286711619808, "grad_norm": 1494.4140625, "learning_rate": 3.8835521899683345e-05, "loss": 115.2886, "step": 49560 }, { "epoch": 0.20026907242734843, "grad_norm": 1487.2633056640625, "learning_rate": 3.883458275500417e-05, "loss": 89.6929, "step": 49570 }, { "epoch": 0.20030947369271607, "grad_norm": 500.047119140625, "learning_rate": 3.883364324313512e-05, "loss": 72.081, "step": 49580 }, { "epoch": 0.20034987495808368, "grad_norm": 827.5897827148438, "learning_rate": 3.883270336409448e-05, "loss": 64.2101, "step": 49590 }, { "epoch": 0.20039027622345132, "grad_norm": 878.098388671875, "learning_rate": 3.8831763117900605e-05, "loss": 90.874, "step": 49600 }, { "epoch": 0.20043067748881896, "grad_norm": 701.7119140625, "learning_rate": 3.883082250457181e-05, "loss": 85.7707, "step": 49610 }, { "epoch": 0.20047107875418657, "grad_norm": 750.2167358398438, "learning_rate": 3.882988152412643e-05, "loss": 105.379, "step": 49620 }, { "epoch": 0.2005114800195542, "grad_norm": 1229.0809326171875, "learning_rate": 3.882894017658283e-05, "loss": 82.8475, "step": 49630 }, { "epoch": 0.20055188128492185, "grad_norm": 745.6799926757812, "learning_rate": 3.882799846195933e-05, "loss": 66.1066, "step": 49640 }, { "epoch": 0.20059228255028946, "grad_norm": 866.1677856445312, "learning_rate": 3.8827056380274316e-05, "loss": 68.2047, "step": 49650 }, { "epoch": 0.2006326838156571, "grad_norm": 1317.9329833984375, "learning_rate": 3.882611393154614e-05, "loss": 89.9241, "step": 49660 }, { "epoch": 0.20067308508102474, "grad_norm": 607.7474365234375, "learning_rate": 3.882517111579319e-05, "loss": 81.1579, "step": 49670 }, { "epoch": 0.20071348634639236, "grad_norm": 1269.857666015625, "learning_rate": 3.882422793303382e-05, "loss": 70.4856, "step": 49680 }, { "epoch": 0.20075388761176, "grad_norm": 800.6294555664062, "learning_rate": 3.882328438328645e-05, "loss": 73.7126, "step": 49690 }, { "epoch": 0.20079428887712764, "grad_norm": 1065.0087890625, "learning_rate": 3.8822340466569456e-05, "loss": 84.4078, "step": 49700 }, { "epoch": 0.20083469014249528, "grad_norm": 393.25213623046875, "learning_rate": 3.882139618290125e-05, "loss": 82.6332, "step": 49710 }, { "epoch": 0.2008750914078629, "grad_norm": 671.2138671875, "learning_rate": 3.882045153230022e-05, "loss": 75.602, "step": 49720 }, { "epoch": 0.20091549267323053, "grad_norm": 2600.48828125, "learning_rate": 3.881950651478481e-05, "loss": 101.0363, "step": 49730 }, { "epoch": 0.20095589393859817, "grad_norm": 997.9617919921875, "learning_rate": 3.8818561130373436e-05, "loss": 77.1079, "step": 49740 }, { "epoch": 0.20099629520396578, "grad_norm": 725.0342407226562, "learning_rate": 3.8817615379084514e-05, "loss": 86.4621, "step": 49750 }, { "epoch": 0.20103669646933342, "grad_norm": 812.4722900390625, "learning_rate": 3.8816669260936494e-05, "loss": 109.4252, "step": 49760 }, { "epoch": 0.20107709773470106, "grad_norm": 610.0919799804688, "learning_rate": 3.881572277594782e-05, "loss": 116.1996, "step": 49770 }, { "epoch": 0.20111749900006867, "grad_norm": 841.7056884765625, "learning_rate": 3.8814775924136945e-05, "loss": 100.2897, "step": 49780 }, { "epoch": 0.2011579002654363, "grad_norm": 1134.255615234375, "learning_rate": 3.8813828705522325e-05, "loss": 66.0785, "step": 49790 }, { "epoch": 0.20119830153080395, "grad_norm": 805.7269897460938, "learning_rate": 3.881288112012243e-05, "loss": 97.9667, "step": 49800 }, { "epoch": 0.20123870279617156, "grad_norm": 1975.1326904296875, "learning_rate": 3.8811933167955734e-05, "loss": 128.7955, "step": 49810 }, { "epoch": 0.2012791040615392, "grad_norm": 1214.29345703125, "learning_rate": 3.8810984849040715e-05, "loss": 88.5912, "step": 49820 }, { "epoch": 0.20131950532690684, "grad_norm": 1285.6263427734375, "learning_rate": 3.8810036163395855e-05, "loss": 56.1671, "step": 49830 }, { "epoch": 0.20135990659227446, "grad_norm": 748.7781982421875, "learning_rate": 3.880908711103966e-05, "loss": 56.235, "step": 49840 }, { "epoch": 0.2014003078576421, "grad_norm": 453.6629638671875, "learning_rate": 3.8808137691990634e-05, "loss": 119.4733, "step": 49850 }, { "epoch": 0.20144070912300974, "grad_norm": 251.61802673339844, "learning_rate": 3.880718790626728e-05, "loss": 81.0512, "step": 49860 }, { "epoch": 0.20148111038837738, "grad_norm": 583.4518432617188, "learning_rate": 3.8806237753888104e-05, "loss": 47.1915, "step": 49870 }, { "epoch": 0.201521511653745, "grad_norm": 886.7020874023438, "learning_rate": 3.880528723487165e-05, "loss": 59.9802, "step": 49880 }, { "epoch": 0.20156191291911263, "grad_norm": 621.6359252929688, "learning_rate": 3.880433634923644e-05, "loss": 77.426, "step": 49890 }, { "epoch": 0.20160231418448027, "grad_norm": 517.2048950195312, "learning_rate": 3.880338509700101e-05, "loss": 57.5223, "step": 49900 }, { "epoch": 0.20164271544984788, "grad_norm": 603.9962768554688, "learning_rate": 3.88024334781839e-05, "loss": 68.5799, "step": 49910 }, { "epoch": 0.20168311671521552, "grad_norm": 367.4395446777344, "learning_rate": 3.880148149280368e-05, "loss": 128.3774, "step": 49920 }, { "epoch": 0.20172351798058316, "grad_norm": 506.17791748046875, "learning_rate": 3.8800529140878894e-05, "loss": 66.3896, "step": 49930 }, { "epoch": 0.20176391924595077, "grad_norm": 600.4085083007812, "learning_rate": 3.879957642242811e-05, "loss": 84.5182, "step": 49940 }, { "epoch": 0.2018043205113184, "grad_norm": 424.294921875, "learning_rate": 3.8798623337469914e-05, "loss": 104.3975, "step": 49950 }, { "epoch": 0.20184472177668605, "grad_norm": 1428.9373779296875, "learning_rate": 3.879766988602287e-05, "loss": 81.3248, "step": 49960 }, { "epoch": 0.20188512304205367, "grad_norm": 449.1412048339844, "learning_rate": 3.879671606810558e-05, "loss": 69.7736, "step": 49970 }, { "epoch": 0.2019255243074213, "grad_norm": 695.0242309570312, "learning_rate": 3.879576188373663e-05, "loss": 89.6673, "step": 49980 }, { "epoch": 0.20196592557278895, "grad_norm": 1224.8419189453125, "learning_rate": 3.879480733293462e-05, "loss": 143.0869, "step": 49990 }, { "epoch": 0.20200632683815656, "grad_norm": 499.9820556640625, "learning_rate": 3.879385241571817e-05, "loss": 68.873, "step": 50000 }, { "epoch": 0.2020467281035242, "grad_norm": 908.8736572265625, "learning_rate": 3.879289713210589e-05, "loss": 99.5502, "step": 50010 }, { "epoch": 0.20208712936889184, "grad_norm": 424.8169250488281, "learning_rate": 3.8791941482116404e-05, "loss": 86.1843, "step": 50020 }, { "epoch": 0.20212753063425948, "grad_norm": 1957.55029296875, "learning_rate": 3.879098546576835e-05, "loss": 87.1757, "step": 50030 }, { "epoch": 0.2021679318996271, "grad_norm": 1173.882568359375, "learning_rate": 3.879002908308036e-05, "loss": 114.1178, "step": 50040 }, { "epoch": 0.20220833316499473, "grad_norm": 2099.638916015625, "learning_rate": 3.878907233407107e-05, "loss": 84.3291, "step": 50050 }, { "epoch": 0.20224873443036237, "grad_norm": 460.4996643066406, "learning_rate": 3.878811521875915e-05, "loss": 61.1567, "step": 50060 }, { "epoch": 0.20228913569572998, "grad_norm": 400.74761962890625, "learning_rate": 3.878715773716325e-05, "loss": 79.2012, "step": 50070 }, { "epoch": 0.20232953696109762, "grad_norm": 1148.2376708984375, "learning_rate": 3.8786199889302035e-05, "loss": 73.1642, "step": 50080 }, { "epoch": 0.20236993822646526, "grad_norm": 1028.424072265625, "learning_rate": 3.8785241675194175e-05, "loss": 73.1485, "step": 50090 }, { "epoch": 0.20241033949183287, "grad_norm": 832.64697265625, "learning_rate": 3.878428309485837e-05, "loss": 70.0815, "step": 50100 }, { "epoch": 0.20245074075720051, "grad_norm": 1009.1151733398438, "learning_rate": 3.878332414831329e-05, "loss": 74.1212, "step": 50110 }, { "epoch": 0.20249114202256815, "grad_norm": 651.4840698242188, "learning_rate": 3.878236483557763e-05, "loss": 101.4476, "step": 50120 }, { "epoch": 0.20253154328793577, "grad_norm": 852.27978515625, "learning_rate": 3.878140515667011e-05, "loss": 122.2347, "step": 50130 }, { "epoch": 0.2025719445533034, "grad_norm": 675.4700927734375, "learning_rate": 3.8780445111609414e-05, "loss": 87.2724, "step": 50140 }, { "epoch": 0.20261234581867105, "grad_norm": 901.0217895507812, "learning_rate": 3.877948470041428e-05, "loss": 86.2449, "step": 50150 }, { "epoch": 0.20265274708403866, "grad_norm": 1145.986083984375, "learning_rate": 3.877852392310342e-05, "loss": 90.7556, "step": 50160 }, { "epoch": 0.2026931483494063, "grad_norm": 373.2372131347656, "learning_rate": 3.877756277969557e-05, "loss": 81.8044, "step": 50170 }, { "epoch": 0.20273354961477394, "grad_norm": 822.2389526367188, "learning_rate": 3.877660127020947e-05, "loss": 72.069, "step": 50180 }, { "epoch": 0.20277395088014158, "grad_norm": 687.273193359375, "learning_rate": 3.877563939466385e-05, "loss": 56.9589, "step": 50190 }, { "epoch": 0.2028143521455092, "grad_norm": 887.560791015625, "learning_rate": 3.8774677153077485e-05, "loss": 67.9246, "step": 50200 }, { "epoch": 0.20285475341087683, "grad_norm": 526.19287109375, "learning_rate": 3.8773714545469116e-05, "loss": 90.1091, "step": 50210 }, { "epoch": 0.20289515467624447, "grad_norm": 1176.3887939453125, "learning_rate": 3.8772751571857525e-05, "loss": 91.4007, "step": 50220 }, { "epoch": 0.20293555594161208, "grad_norm": 1239.13037109375, "learning_rate": 3.877178823226147e-05, "loss": 117.7525, "step": 50230 }, { "epoch": 0.20297595720697972, "grad_norm": 521.9329833984375, "learning_rate": 3.877082452669974e-05, "loss": 103.9068, "step": 50240 }, { "epoch": 0.20301635847234736, "grad_norm": 807.4822998046875, "learning_rate": 3.876986045519112e-05, "loss": 62.8522, "step": 50250 }, { "epoch": 0.20305675973771498, "grad_norm": 386.10101318359375, "learning_rate": 3.876889601775441e-05, "loss": 68.9867, "step": 50260 }, { "epoch": 0.20309716100308262, "grad_norm": 1182.9283447265625, "learning_rate": 3.876793121440841e-05, "loss": 88.6812, "step": 50270 }, { "epoch": 0.20313756226845026, "grad_norm": 698.7742309570312, "learning_rate": 3.876696604517193e-05, "loss": 77.0877, "step": 50280 }, { "epoch": 0.20317796353381787, "grad_norm": 366.74517822265625, "learning_rate": 3.876600051006378e-05, "loss": 51.5957, "step": 50290 }, { "epoch": 0.2032183647991855, "grad_norm": 834.828857421875, "learning_rate": 3.8765034609102794e-05, "loss": 64.3455, "step": 50300 }, { "epoch": 0.20325876606455315, "grad_norm": 1612.7303466796875, "learning_rate": 3.876406834230779e-05, "loss": 90.6349, "step": 50310 }, { "epoch": 0.20329916732992076, "grad_norm": 986.4638671875, "learning_rate": 3.8763101709697625e-05, "loss": 92.7768, "step": 50320 }, { "epoch": 0.2033395685952884, "grad_norm": 1258.14404296875, "learning_rate": 3.876213471129112e-05, "loss": 98.2655, "step": 50330 }, { "epoch": 0.20337996986065604, "grad_norm": 480.3702087402344, "learning_rate": 3.876116734710714e-05, "loss": 46.751, "step": 50340 }, { "epoch": 0.20342037112602368, "grad_norm": 128.0701141357422, "learning_rate": 3.8760199617164556e-05, "loss": 87.2124, "step": 50350 }, { "epoch": 0.2034607723913913, "grad_norm": 753.6339721679688, "learning_rate": 3.8759231521482215e-05, "loss": 52.4494, "step": 50360 }, { "epoch": 0.20350117365675893, "grad_norm": 717.4784545898438, "learning_rate": 3.875826306007899e-05, "loss": 82.5807, "step": 50370 }, { "epoch": 0.20354157492212657, "grad_norm": 817.7188110351562, "learning_rate": 3.875729423297378e-05, "loss": 61.2956, "step": 50380 }, { "epoch": 0.20358197618749418, "grad_norm": 794.8400268554688, "learning_rate": 3.875632504018546e-05, "loss": 75.8191, "step": 50390 }, { "epoch": 0.20362237745286182, "grad_norm": 1094.506591796875, "learning_rate": 3.875535548173292e-05, "loss": 61.9088, "step": 50400 }, { "epoch": 0.20366277871822946, "grad_norm": 1352.447998046875, "learning_rate": 3.8754385557635076e-05, "loss": 91.2527, "step": 50410 }, { "epoch": 0.20370317998359708, "grad_norm": 474.88079833984375, "learning_rate": 3.875341526791083e-05, "loss": 66.049, "step": 50420 }, { "epoch": 0.20374358124896472, "grad_norm": 1338.16748046875, "learning_rate": 3.87524446125791e-05, "loss": 70.1112, "step": 50430 }, { "epoch": 0.20378398251433236, "grad_norm": 1756.7215576171875, "learning_rate": 3.8751473591658805e-05, "loss": 91.4397, "step": 50440 }, { "epoch": 0.20382438377969997, "grad_norm": 1858.53173828125, "learning_rate": 3.8750502205168876e-05, "loss": 70.961, "step": 50450 }, { "epoch": 0.2038647850450676, "grad_norm": 637.2379150390625, "learning_rate": 3.874953045312825e-05, "loss": 73.8202, "step": 50460 }, { "epoch": 0.20390518631043525, "grad_norm": 1144.32763671875, "learning_rate": 3.8748558335555885e-05, "loss": 129.5399, "step": 50470 }, { "epoch": 0.20394558757580286, "grad_norm": 1619.4119873046875, "learning_rate": 3.8747585852470716e-05, "loss": 100.1211, "step": 50480 }, { "epoch": 0.2039859888411705, "grad_norm": 742.9066162109375, "learning_rate": 3.874661300389171e-05, "loss": 78.3759, "step": 50490 }, { "epoch": 0.20402639010653814, "grad_norm": 663.3787841796875, "learning_rate": 3.874563978983784e-05, "loss": 70.6641, "step": 50500 }, { "epoch": 0.20406679137190578, "grad_norm": 792.5663452148438, "learning_rate": 3.874466621032806e-05, "loss": 74.8681, "step": 50510 }, { "epoch": 0.2041071926372734, "grad_norm": 356.4153747558594, "learning_rate": 3.874369226538136e-05, "loss": 48.0686, "step": 50520 }, { "epoch": 0.20414759390264103, "grad_norm": 848.6477661132812, "learning_rate": 3.874271795501674e-05, "loss": 75.6694, "step": 50530 }, { "epoch": 0.20418799516800867, "grad_norm": 968.4424438476562, "learning_rate": 3.8741743279253176e-05, "loss": 85.1814, "step": 50540 }, { "epoch": 0.20422839643337629, "grad_norm": 906.9072265625, "learning_rate": 3.874076823810968e-05, "loss": 102.1449, "step": 50550 }, { "epoch": 0.20426879769874393, "grad_norm": 1824.4039306640625, "learning_rate": 3.873979283160526e-05, "loss": 78.7749, "step": 50560 }, { "epoch": 0.20430919896411157, "grad_norm": 2528.183349609375, "learning_rate": 3.8738817059758936e-05, "loss": 76.1273, "step": 50570 }, { "epoch": 0.20434960022947918, "grad_norm": 552.7752685546875, "learning_rate": 3.873784092258972e-05, "loss": 55.3903, "step": 50580 }, { "epoch": 0.20439000149484682, "grad_norm": 845.9926147460938, "learning_rate": 3.873686442011665e-05, "loss": 66.7958, "step": 50590 }, { "epoch": 0.20443040276021446, "grad_norm": 1158.8406982421875, "learning_rate": 3.873588755235876e-05, "loss": 71.1778, "step": 50600 }, { "epoch": 0.20447080402558207, "grad_norm": 640.7486572265625, "learning_rate": 3.87349103193351e-05, "loss": 91.4142, "step": 50610 }, { "epoch": 0.2045112052909497, "grad_norm": 1400.6976318359375, "learning_rate": 3.8733932721064714e-05, "loss": 87.4391, "step": 50620 }, { "epoch": 0.20455160655631735, "grad_norm": 552.7567749023438, "learning_rate": 3.873295475756667e-05, "loss": 78.1331, "step": 50630 }, { "epoch": 0.20459200782168496, "grad_norm": 564.9442138671875, "learning_rate": 3.873197642886002e-05, "loss": 85.3637, "step": 50640 }, { "epoch": 0.2046324090870526, "grad_norm": 626.0609130859375, "learning_rate": 3.873099773496385e-05, "loss": 99.6655, "step": 50650 }, { "epoch": 0.20467281035242024, "grad_norm": 425.89886474609375, "learning_rate": 3.873001867589723e-05, "loss": 58.2747, "step": 50660 }, { "epoch": 0.20471321161778788, "grad_norm": 699.8375854492188, "learning_rate": 3.872903925167927e-05, "loss": 70.8648, "step": 50670 }, { "epoch": 0.2047536128831555, "grad_norm": 1484.742919921875, "learning_rate": 3.8728059462329035e-05, "loss": 55.6887, "step": 50680 }, { "epoch": 0.20479401414852313, "grad_norm": 1168.312255859375, "learning_rate": 3.872707930786564e-05, "loss": 105.1602, "step": 50690 }, { "epoch": 0.20483441541389077, "grad_norm": 942.3574829101562, "learning_rate": 3.872609878830819e-05, "loss": 88.1229, "step": 50700 }, { "epoch": 0.2048748166792584, "grad_norm": 527.11572265625, "learning_rate": 3.872511790367581e-05, "loss": 73.1579, "step": 50710 }, { "epoch": 0.20491521794462603, "grad_norm": 569.9839477539062, "learning_rate": 3.872413665398761e-05, "loss": 100.4503, "step": 50720 }, { "epoch": 0.20495561920999367, "grad_norm": 712.02880859375, "learning_rate": 3.872315503926273e-05, "loss": 67.1564, "step": 50730 }, { "epoch": 0.20499602047536128, "grad_norm": 894.4396362304688, "learning_rate": 3.87221730595203e-05, "loss": 61.3889, "step": 50740 }, { "epoch": 0.20503642174072892, "grad_norm": 1530.17724609375, "learning_rate": 3.872119071477947e-05, "loss": 82.5945, "step": 50750 }, { "epoch": 0.20507682300609656, "grad_norm": 529.53515625, "learning_rate": 3.8720208005059386e-05, "loss": 68.7454, "step": 50760 }, { "epoch": 0.20511722427146417, "grad_norm": 674.3864135742188, "learning_rate": 3.871922493037921e-05, "loss": 86.0123, "step": 50770 }, { "epoch": 0.2051576255368318, "grad_norm": 1044.22021484375, "learning_rate": 3.8718241490758106e-05, "loss": 92.6389, "step": 50780 }, { "epoch": 0.20519802680219945, "grad_norm": 853.1271362304688, "learning_rate": 3.871725768621526e-05, "loss": 66.9134, "step": 50790 }, { "epoch": 0.20523842806756706, "grad_norm": 583.3541259765625, "learning_rate": 3.871627351676982e-05, "loss": 64.4168, "step": 50800 }, { "epoch": 0.2052788293329347, "grad_norm": 573.4091796875, "learning_rate": 3.8715288982441e-05, "loss": 76.0434, "step": 50810 }, { "epoch": 0.20531923059830234, "grad_norm": 379.55615234375, "learning_rate": 3.8714304083247984e-05, "loss": 51.8694, "step": 50820 }, { "epoch": 0.20535963186366998, "grad_norm": 832.7188110351562, "learning_rate": 3.871331881920998e-05, "loss": 58.693, "step": 50830 }, { "epoch": 0.2054000331290376, "grad_norm": 2003.7796630859375, "learning_rate": 3.871233319034619e-05, "loss": 75.3883, "step": 50840 }, { "epoch": 0.20544043439440524, "grad_norm": 882.434814453125, "learning_rate": 3.871134719667583e-05, "loss": 103.4826, "step": 50850 }, { "epoch": 0.20548083565977288, "grad_norm": 796.6503295898438, "learning_rate": 3.8710360838218125e-05, "loss": 103.9689, "step": 50860 }, { "epoch": 0.2055212369251405, "grad_norm": 844.0625610351562, "learning_rate": 3.870937411499229e-05, "loss": 85.5435, "step": 50870 }, { "epoch": 0.20556163819050813, "grad_norm": 1352.2198486328125, "learning_rate": 3.870838702701759e-05, "loss": 89.8294, "step": 50880 }, { "epoch": 0.20560203945587577, "grad_norm": 1182.203369140625, "learning_rate": 3.8707399574313246e-05, "loss": 72.2934, "step": 50890 }, { "epoch": 0.20564244072124338, "grad_norm": 541.3485107421875, "learning_rate": 3.870641175689852e-05, "loss": 72.552, "step": 50900 }, { "epoch": 0.20568284198661102, "grad_norm": 773.8545532226562, "learning_rate": 3.870542357479266e-05, "loss": 99.4841, "step": 50910 }, { "epoch": 0.20572324325197866, "grad_norm": 521.749755859375, "learning_rate": 3.870443502801494e-05, "loss": 71.8828, "step": 50920 }, { "epoch": 0.20576364451734627, "grad_norm": 651.1212768554688, "learning_rate": 3.870344611658464e-05, "loss": 113.8711, "step": 50930 }, { "epoch": 0.2058040457827139, "grad_norm": 718.744140625, "learning_rate": 3.870245684052101e-05, "loss": 98.3473, "step": 50940 }, { "epoch": 0.20584444704808155, "grad_norm": 568.1864013671875, "learning_rate": 3.870146719984337e-05, "loss": 99.5937, "step": 50950 }, { "epoch": 0.20588484831344916, "grad_norm": 5688.7666015625, "learning_rate": 3.870047719457099e-05, "loss": 114.7975, "step": 50960 }, { "epoch": 0.2059252495788168, "grad_norm": 1301.0987548828125, "learning_rate": 3.869948682472318e-05, "loss": 65.0882, "step": 50970 }, { "epoch": 0.20596565084418444, "grad_norm": 1446.244873046875, "learning_rate": 3.8698496090319257e-05, "loss": 77.6484, "step": 50980 }, { "epoch": 0.20600605210955208, "grad_norm": 682.64306640625, "learning_rate": 3.869750499137851e-05, "loss": 59.3151, "step": 50990 }, { "epoch": 0.2060464533749197, "grad_norm": 1040.8548583984375, "learning_rate": 3.869651352792029e-05, "loss": 89.0893, "step": 51000 }, { "epoch": 0.20608685464028734, "grad_norm": 894.2227172851562, "learning_rate": 3.869552169996391e-05, "loss": 104.3016, "step": 51010 }, { "epoch": 0.20612725590565498, "grad_norm": 1589.9327392578125, "learning_rate": 3.869452950752871e-05, "loss": 64.4769, "step": 51020 }, { "epoch": 0.2061676571710226, "grad_norm": 2216.80126953125, "learning_rate": 3.869353695063403e-05, "loss": 75.212, "step": 51030 }, { "epoch": 0.20620805843639023, "grad_norm": 1083.3333740234375, "learning_rate": 3.8692544029299225e-05, "loss": 67.3579, "step": 51040 }, { "epoch": 0.20624845970175787, "grad_norm": 1293.1751708984375, "learning_rate": 3.869155074354365e-05, "loss": 83.4344, "step": 51050 }, { "epoch": 0.20628886096712548, "grad_norm": 1822.27587890625, "learning_rate": 3.869055709338667e-05, "loss": 114.4531, "step": 51060 }, { "epoch": 0.20632926223249312, "grad_norm": 767.5370483398438, "learning_rate": 3.8689563078847656e-05, "loss": 71.1274, "step": 51070 }, { "epoch": 0.20636966349786076, "grad_norm": 781.1602783203125, "learning_rate": 3.868856869994599e-05, "loss": 75.8399, "step": 51080 }, { "epoch": 0.20641006476322837, "grad_norm": 947.2127075195312, "learning_rate": 3.868757395670105e-05, "loss": 116.0168, "step": 51090 }, { "epoch": 0.206450466028596, "grad_norm": 1369.5260009765625, "learning_rate": 3.8686578849132244e-05, "loss": 93.0958, "step": 51100 }, { "epoch": 0.20649086729396365, "grad_norm": 949.6033935546875, "learning_rate": 3.8685583377258955e-05, "loss": 59.0235, "step": 51110 }, { "epoch": 0.20653126855933127, "grad_norm": 528.279296875, "learning_rate": 3.86845875411006e-05, "loss": 78.9101, "step": 51120 }, { "epoch": 0.2065716698246989, "grad_norm": 1002.0960083007812, "learning_rate": 3.8683591340676596e-05, "loss": 93.5453, "step": 51130 }, { "epoch": 0.20661207109006655, "grad_norm": 651.449462890625, "learning_rate": 3.868259477600636e-05, "loss": 86.5347, "step": 51140 }, { "epoch": 0.20665247235543419, "grad_norm": 851.9552001953125, "learning_rate": 3.868159784710931e-05, "loss": 110.314, "step": 51150 }, { "epoch": 0.2066928736208018, "grad_norm": 632.49951171875, "learning_rate": 3.8680600554004905e-05, "loss": 70.093, "step": 51160 }, { "epoch": 0.20673327488616944, "grad_norm": 987.5447998046875, "learning_rate": 3.867960289671257e-05, "loss": 73.0326, "step": 51170 }, { "epoch": 0.20677367615153708, "grad_norm": 1574.3292236328125, "learning_rate": 3.867860487525175e-05, "loss": 109.6682, "step": 51180 }, { "epoch": 0.2068140774169047, "grad_norm": 530.6597900390625, "learning_rate": 3.8677606489641924e-05, "loss": 69.3783, "step": 51190 }, { "epoch": 0.20685447868227233, "grad_norm": 1183.2138671875, "learning_rate": 3.867660773990254e-05, "loss": 102.358, "step": 51200 }, { "epoch": 0.20689487994763997, "grad_norm": 623.4737548828125, "learning_rate": 3.867560862605307e-05, "loss": 63.161, "step": 51210 }, { "epoch": 0.20693528121300758, "grad_norm": 302.43426513671875, "learning_rate": 3.8674609148112996e-05, "loss": 76.0289, "step": 51220 }, { "epoch": 0.20697568247837522, "grad_norm": 532.8798217773438, "learning_rate": 3.86736093061018e-05, "loss": 115.1544, "step": 51230 }, { "epoch": 0.20701608374374286, "grad_norm": 871.456787109375, "learning_rate": 3.8672609100038986e-05, "loss": 78.027, "step": 51240 }, { "epoch": 0.20705648500911047, "grad_norm": 733.9722290039062, "learning_rate": 3.8671608529944035e-05, "loss": 89.2671, "step": 51250 }, { "epoch": 0.20709688627447811, "grad_norm": 589.8917236328125, "learning_rate": 3.867060759583647e-05, "loss": 61.1345, "step": 51260 }, { "epoch": 0.20713728753984575, "grad_norm": 550.5474243164062, "learning_rate": 3.866960629773579e-05, "loss": 76.382, "step": 51270 }, { "epoch": 0.20717768880521337, "grad_norm": 612.1178588867188, "learning_rate": 3.866860463566153e-05, "loss": 70.8993, "step": 51280 }, { "epoch": 0.207218090070581, "grad_norm": 1227.650634765625, "learning_rate": 3.866760260963321e-05, "loss": 54.2812, "step": 51290 }, { "epoch": 0.20725849133594865, "grad_norm": 629.446533203125, "learning_rate": 3.8666600219670365e-05, "loss": 86.5742, "step": 51300 }, { "epoch": 0.2072988926013163, "grad_norm": 643.613525390625, "learning_rate": 3.866559746579254e-05, "loss": 62.8, "step": 51310 }, { "epoch": 0.2073392938666839, "grad_norm": 1438.744140625, "learning_rate": 3.866459434801928e-05, "loss": 68.6836, "step": 51320 }, { "epoch": 0.20737969513205154, "grad_norm": 446.6057434082031, "learning_rate": 3.8663590866370147e-05, "loss": 67.867, "step": 51330 }, { "epoch": 0.20742009639741918, "grad_norm": 733.7489624023438, "learning_rate": 3.86625870208647e-05, "loss": 88.8869, "step": 51340 }, { "epoch": 0.2074604976627868, "grad_norm": 1379.1614990234375, "learning_rate": 3.866158281152251e-05, "loss": 61.0902, "step": 51350 }, { "epoch": 0.20750089892815443, "grad_norm": 1295.2818603515625, "learning_rate": 3.8660578238363156e-05, "loss": 55.145, "step": 51360 }, { "epoch": 0.20754130019352207, "grad_norm": 1555.087158203125, "learning_rate": 3.865957330140622e-05, "loss": 85.2966, "step": 51370 }, { "epoch": 0.20758170145888968, "grad_norm": 1380.683837890625, "learning_rate": 3.86585680006713e-05, "loss": 81.9897, "step": 51380 }, { "epoch": 0.20762210272425732, "grad_norm": 670.623291015625, "learning_rate": 3.865756233617799e-05, "loss": 101.2558, "step": 51390 }, { "epoch": 0.20766250398962496, "grad_norm": 602.7833251953125, "learning_rate": 3.8656556307945894e-05, "loss": 57.1718, "step": 51400 }, { "epoch": 0.20770290525499258, "grad_norm": 1327.6904296875, "learning_rate": 3.8655549915994626e-05, "loss": 82.1553, "step": 51410 }, { "epoch": 0.20774330652036022, "grad_norm": 1156.4627685546875, "learning_rate": 3.8654543160343816e-05, "loss": 101.3851, "step": 51420 }, { "epoch": 0.20778370778572786, "grad_norm": 1305.48974609375, "learning_rate": 3.8653536041013075e-05, "loss": 82.6139, "step": 51430 }, { "epoch": 0.20782410905109547, "grad_norm": 755.2139892578125, "learning_rate": 3.865252855802205e-05, "loss": 49.8036, "step": 51440 }, { "epoch": 0.2078645103164631, "grad_norm": 902.1483764648438, "learning_rate": 3.865152071139038e-05, "loss": 74.7251, "step": 51450 }, { "epoch": 0.20790491158183075, "grad_norm": 910.2382202148438, "learning_rate": 3.8650512501137704e-05, "loss": 59.3921, "step": 51460 }, { "epoch": 0.2079453128471984, "grad_norm": 1980.9892578125, "learning_rate": 3.8649503927283686e-05, "loss": 114.7764, "step": 51470 }, { "epoch": 0.207985714112566, "grad_norm": 922.23583984375, "learning_rate": 3.864849498984799e-05, "loss": 60.0026, "step": 51480 }, { "epoch": 0.20802611537793364, "grad_norm": 565.62548828125, "learning_rate": 3.864748568885029e-05, "loss": 101.4692, "step": 51490 }, { "epoch": 0.20806651664330128, "grad_norm": 476.4505615234375, "learning_rate": 3.864647602431025e-05, "loss": 59.0585, "step": 51500 }, { "epoch": 0.2081069179086689, "grad_norm": 794.0309448242188, "learning_rate": 3.864546599624756e-05, "loss": 75.9052, "step": 51510 }, { "epoch": 0.20814731917403653, "grad_norm": 659.0484619140625, "learning_rate": 3.864445560468191e-05, "loss": 94.3838, "step": 51520 }, { "epoch": 0.20818772043940417, "grad_norm": 1586.227294921875, "learning_rate": 3.8643444849633e-05, "loss": 109.3399, "step": 51530 }, { "epoch": 0.20822812170477178, "grad_norm": 415.051513671875, "learning_rate": 3.864243373112053e-05, "loss": 52.1959, "step": 51540 }, { "epoch": 0.20826852297013942, "grad_norm": 1006.6847534179688, "learning_rate": 3.864142224916422e-05, "loss": 77.8924, "step": 51550 }, { "epoch": 0.20830892423550706, "grad_norm": 642.0609130859375, "learning_rate": 3.864041040378379e-05, "loss": 48.1212, "step": 51560 }, { "epoch": 0.20834932550087468, "grad_norm": 704.0679931640625, "learning_rate": 3.863939819499896e-05, "loss": 59.1892, "step": 51570 }, { "epoch": 0.20838972676624232, "grad_norm": 866.2320556640625, "learning_rate": 3.8638385622829464e-05, "loss": 109.2826, "step": 51580 }, { "epoch": 0.20843012803160996, "grad_norm": 560.8289794921875, "learning_rate": 3.863737268729504e-05, "loss": 59.0756, "step": 51590 }, { "epoch": 0.20847052929697757, "grad_norm": 1119.5093994140625, "learning_rate": 3.863635938841545e-05, "loss": 63.5763, "step": 51600 }, { "epoch": 0.2085109305623452, "grad_norm": 484.5401306152344, "learning_rate": 3.8635345726210435e-05, "loss": 100.063, "step": 51610 }, { "epoch": 0.20855133182771285, "grad_norm": 1919.9112548828125, "learning_rate": 3.863433170069976e-05, "loss": 65.146, "step": 51620 }, { "epoch": 0.20859173309308046, "grad_norm": 329.201904296875, "learning_rate": 3.86333173119032e-05, "loss": 79.1359, "step": 51630 }, { "epoch": 0.2086321343584481, "grad_norm": 1020.9974365234375, "learning_rate": 3.863230255984052e-05, "loss": 43.7593, "step": 51640 }, { "epoch": 0.20867253562381574, "grad_norm": 394.7405700683594, "learning_rate": 3.863128744453152e-05, "loss": 59.4796, "step": 51650 }, { "epoch": 0.20871293688918338, "grad_norm": 674.1074829101562, "learning_rate": 3.863027196599596e-05, "loss": 76.5259, "step": 51660 }, { "epoch": 0.208753338154551, "grad_norm": 1158.9613037109375, "learning_rate": 3.8629256124253675e-05, "loss": 83.799, "step": 51670 }, { "epoch": 0.20879373941991863, "grad_norm": 998.66064453125, "learning_rate": 3.8628239919324435e-05, "loss": 50.7926, "step": 51680 }, { "epoch": 0.20883414068528627, "grad_norm": 1256.56396484375, "learning_rate": 3.862722335122808e-05, "loss": 94.6407, "step": 51690 }, { "epoch": 0.20887454195065389, "grad_norm": 0.0, "learning_rate": 3.862620641998441e-05, "loss": 70.0929, "step": 51700 }, { "epoch": 0.20891494321602153, "grad_norm": 926.8525390625, "learning_rate": 3.862518912561326e-05, "loss": 67.6057, "step": 51710 }, { "epoch": 0.20895534448138917, "grad_norm": 326.4804382324219, "learning_rate": 3.862417146813445e-05, "loss": 61.7977, "step": 51720 }, { "epoch": 0.20899574574675678, "grad_norm": 477.6756896972656, "learning_rate": 3.862315344756784e-05, "loss": 92.2266, "step": 51730 }, { "epoch": 0.20903614701212442, "grad_norm": 405.753173828125, "learning_rate": 3.862213506393326e-05, "loss": 70.3351, "step": 51740 }, { "epoch": 0.20907654827749206, "grad_norm": 1080.689453125, "learning_rate": 3.862111631725057e-05, "loss": 96.0838, "step": 51750 }, { "epoch": 0.20911694954285967, "grad_norm": 627.3723754882812, "learning_rate": 3.862009720753963e-05, "loss": 93.8915, "step": 51760 }, { "epoch": 0.2091573508082273, "grad_norm": 820.82763671875, "learning_rate": 3.8619077734820314e-05, "loss": 61.6034, "step": 51770 }, { "epoch": 0.20919775207359495, "grad_norm": 842.6414184570312, "learning_rate": 3.861805789911248e-05, "loss": 153.1677, "step": 51780 }, { "epoch": 0.20923815333896256, "grad_norm": 714.5660400390625, "learning_rate": 3.8617037700436034e-05, "loss": 57.6857, "step": 51790 }, { "epoch": 0.2092785546043302, "grad_norm": 627.9758911132812, "learning_rate": 3.861601713881085e-05, "loss": 55.2468, "step": 51800 }, { "epoch": 0.20931895586969784, "grad_norm": 1113.80712890625, "learning_rate": 3.8614996214256826e-05, "loss": 89.2621, "step": 51810 }, { "epoch": 0.20935935713506548, "grad_norm": 531.0305786132812, "learning_rate": 3.861397492679387e-05, "loss": 61.7609, "step": 51820 }, { "epoch": 0.2093997584004331, "grad_norm": 1602.581787109375, "learning_rate": 3.861295327644189e-05, "loss": 76.7713, "step": 51830 }, { "epoch": 0.20944015966580073, "grad_norm": 1647.34130859375, "learning_rate": 3.8611931263220794e-05, "loss": 88.0155, "step": 51840 }, { "epoch": 0.20948056093116837, "grad_norm": 1438.4656982421875, "learning_rate": 3.861090888715052e-05, "loss": 94.1094, "step": 51850 }, { "epoch": 0.209520962196536, "grad_norm": 1173.360107421875, "learning_rate": 3.8609886148251006e-05, "loss": 135.0007, "step": 51860 }, { "epoch": 0.20956136346190363, "grad_norm": 1216.895751953125, "learning_rate": 3.8608863046542164e-05, "loss": 61.3545, "step": 51870 }, { "epoch": 0.20960176472727127, "grad_norm": 852.8516845703125, "learning_rate": 3.860783958204397e-05, "loss": 90.4901, "step": 51880 }, { "epoch": 0.20964216599263888, "grad_norm": 452.8900146484375, "learning_rate": 3.860681575477636e-05, "loss": 62.5983, "step": 51890 }, { "epoch": 0.20968256725800652, "grad_norm": 377.25396728515625, "learning_rate": 3.8605791564759296e-05, "loss": 59.1545, "step": 51900 }, { "epoch": 0.20972296852337416, "grad_norm": 1327.0433349609375, "learning_rate": 3.8604767012012746e-05, "loss": 90.5913, "step": 51910 }, { "epoch": 0.20976336978874177, "grad_norm": 653.3995971679688, "learning_rate": 3.8603742096556687e-05, "loss": 86.1637, "step": 51920 }, { "epoch": 0.2098037710541094, "grad_norm": 574.94873046875, "learning_rate": 3.8602716818411093e-05, "loss": 74.9849, "step": 51930 }, { "epoch": 0.20984417231947705, "grad_norm": 793.8121337890625, "learning_rate": 3.8601691177595964e-05, "loss": 71.8904, "step": 51940 }, { "epoch": 0.20988457358484466, "grad_norm": 1037.7066650390625, "learning_rate": 3.860066517413129e-05, "loss": 84.2814, "step": 51950 }, { "epoch": 0.2099249748502123, "grad_norm": 623.3878784179688, "learning_rate": 3.859963880803706e-05, "loss": 63.5703, "step": 51960 }, { "epoch": 0.20996537611557994, "grad_norm": 418.7212219238281, "learning_rate": 3.859861207933331e-05, "loss": 64.8058, "step": 51970 }, { "epoch": 0.21000577738094758, "grad_norm": 913.1371459960938, "learning_rate": 3.8597584988040034e-05, "loss": 55.0011, "step": 51980 }, { "epoch": 0.2100461786463152, "grad_norm": 1014.57861328125, "learning_rate": 3.859655753417726e-05, "loss": 82.6761, "step": 51990 }, { "epoch": 0.21008657991168284, "grad_norm": 930.0344848632812, "learning_rate": 3.859552971776503e-05, "loss": 86.5342, "step": 52000 }, { "epoch": 0.21012698117705048, "grad_norm": 1351.761474609375, "learning_rate": 3.8594501538823374e-05, "loss": 62.6794, "step": 52010 }, { "epoch": 0.2101673824424181, "grad_norm": 786.5313720703125, "learning_rate": 3.8593472997372336e-05, "loss": 77.6322, "step": 52020 }, { "epoch": 0.21020778370778573, "grad_norm": 1234.1259765625, "learning_rate": 3.8592444093431976e-05, "loss": 62.3295, "step": 52030 }, { "epoch": 0.21024818497315337, "grad_norm": 829.6130981445312, "learning_rate": 3.859141482702233e-05, "loss": 84.1906, "step": 52040 }, { "epoch": 0.21028858623852098, "grad_norm": 488.3553771972656, "learning_rate": 3.8590385198163495e-05, "loss": 65.9482, "step": 52050 }, { "epoch": 0.21032898750388862, "grad_norm": 1383.439208984375, "learning_rate": 3.858935520687553e-05, "loss": 84.0547, "step": 52060 }, { "epoch": 0.21036938876925626, "grad_norm": 555.8527221679688, "learning_rate": 3.858832485317851e-05, "loss": 82.3044, "step": 52070 }, { "epoch": 0.21040979003462387, "grad_norm": 410.3157958984375, "learning_rate": 3.8587294137092526e-05, "loss": 68.6609, "step": 52080 }, { "epoch": 0.2104501912999915, "grad_norm": 867.0938720703125, "learning_rate": 3.858626305863767e-05, "loss": 86.5969, "step": 52090 }, { "epoch": 0.21049059256535915, "grad_norm": 1007.9744262695312, "learning_rate": 3.8585231617834054e-05, "loss": 87.7424, "step": 52100 }, { "epoch": 0.21053099383072676, "grad_norm": 855.5218505859375, "learning_rate": 3.8584199814701774e-05, "loss": 67.6049, "step": 52110 }, { "epoch": 0.2105713950960944, "grad_norm": 1032.388427734375, "learning_rate": 3.8583167649260956e-05, "loss": 81.8633, "step": 52120 }, { "epoch": 0.21061179636146204, "grad_norm": 846.8680419921875, "learning_rate": 3.858213512153171e-05, "loss": 122.2433, "step": 52130 }, { "epoch": 0.21065219762682968, "grad_norm": 658.4915161132812, "learning_rate": 3.858110223153418e-05, "loss": 43.3111, "step": 52140 }, { "epoch": 0.2106925988921973, "grad_norm": 494.8357238769531, "learning_rate": 3.8580068979288495e-05, "loss": 81.8783, "step": 52150 }, { "epoch": 0.21073300015756494, "grad_norm": 657.0929565429688, "learning_rate": 3.8579035364814793e-05, "loss": 95.437, "step": 52160 }, { "epoch": 0.21077340142293258, "grad_norm": 607.5389404296875, "learning_rate": 3.857800138813324e-05, "loss": 99.3467, "step": 52170 }, { "epoch": 0.2108138026883002, "grad_norm": 1181.9508056640625, "learning_rate": 3.857696704926398e-05, "loss": 71.005, "step": 52180 }, { "epoch": 0.21085420395366783, "grad_norm": 888.6665649414062, "learning_rate": 3.857593234822718e-05, "loss": 97.3609, "step": 52190 }, { "epoch": 0.21089460521903547, "grad_norm": 0.0, "learning_rate": 3.857489728504303e-05, "loss": 54.3101, "step": 52200 }, { "epoch": 0.21093500648440308, "grad_norm": 316.5389099121094, "learning_rate": 3.857386185973168e-05, "loss": 108.8267, "step": 52210 }, { "epoch": 0.21097540774977072, "grad_norm": 838.7566528320312, "learning_rate": 3.857282607231334e-05, "loss": 69.4257, "step": 52220 }, { "epoch": 0.21101580901513836, "grad_norm": 485.1976623535156, "learning_rate": 3.857178992280819e-05, "loss": 60.2769, "step": 52230 }, { "epoch": 0.21105621028050597, "grad_norm": 528.6210327148438, "learning_rate": 3.857075341123643e-05, "loss": 63.1721, "step": 52240 }, { "epoch": 0.2110966115458736, "grad_norm": 664.318115234375, "learning_rate": 3.856971653761827e-05, "loss": 81.4066, "step": 52250 }, { "epoch": 0.21113701281124125, "grad_norm": 818.0326538085938, "learning_rate": 3.856867930197393e-05, "loss": 106.0535, "step": 52260 }, { "epoch": 0.21117741407660887, "grad_norm": 646.04638671875, "learning_rate": 3.8567641704323636e-05, "loss": 82.7102, "step": 52270 }, { "epoch": 0.2112178153419765, "grad_norm": 515.3419799804688, "learning_rate": 3.8566603744687595e-05, "loss": 60.0256, "step": 52280 }, { "epoch": 0.21125821660734415, "grad_norm": 570.6306762695312, "learning_rate": 3.856556542308606e-05, "loss": 118.7118, "step": 52290 }, { "epoch": 0.21129861787271179, "grad_norm": 588.5262451171875, "learning_rate": 3.8564526739539266e-05, "loss": 87.5939, "step": 52300 }, { "epoch": 0.2113390191380794, "grad_norm": 433.0246887207031, "learning_rate": 3.856348769406747e-05, "loss": 57.9482, "step": 52310 }, { "epoch": 0.21137942040344704, "grad_norm": 856.47216796875, "learning_rate": 3.856244828669092e-05, "loss": 78.945, "step": 52320 }, { "epoch": 0.21141982166881468, "grad_norm": 381.0574951171875, "learning_rate": 3.856140851742989e-05, "loss": 48.9479, "step": 52330 }, { "epoch": 0.2114602229341823, "grad_norm": 1054.4591064453125, "learning_rate": 3.856036838630464e-05, "loss": 101.4416, "step": 52340 }, { "epoch": 0.21150062419954993, "grad_norm": 488.54656982421875, "learning_rate": 3.855932789333546e-05, "loss": 94.751, "step": 52350 }, { "epoch": 0.21154102546491757, "grad_norm": 1185.2191162109375, "learning_rate": 3.855828703854262e-05, "loss": 103.5967, "step": 52360 }, { "epoch": 0.21158142673028518, "grad_norm": 958.7376708984375, "learning_rate": 3.8557245821946414e-05, "loss": 84.2619, "step": 52370 }, { "epoch": 0.21162182799565282, "grad_norm": 808.8856811523438, "learning_rate": 3.8556204243567156e-05, "loss": 138.5501, "step": 52380 }, { "epoch": 0.21166222926102046, "grad_norm": 712.3560791015625, "learning_rate": 3.855516230342514e-05, "loss": 71.5041, "step": 52390 }, { "epoch": 0.21170263052638807, "grad_norm": 976.9653930664062, "learning_rate": 3.8554120001540684e-05, "loss": 64.854, "step": 52400 }, { "epoch": 0.21174303179175571, "grad_norm": 1233.3726806640625, "learning_rate": 3.8553077337934114e-05, "loss": 65.5629, "step": 52410 }, { "epoch": 0.21178343305712335, "grad_norm": 502.90521240234375, "learning_rate": 3.855203431262574e-05, "loss": 73.8202, "step": 52420 }, { "epoch": 0.21182383432249097, "grad_norm": 975.7722778320312, "learning_rate": 3.85509909256359e-05, "loss": 103.7028, "step": 52430 }, { "epoch": 0.2118642355878586, "grad_norm": 1887.445068359375, "learning_rate": 3.854994717698495e-05, "loss": 108.6903, "step": 52440 }, { "epoch": 0.21190463685322625, "grad_norm": 1207.58203125, "learning_rate": 3.8548903066693234e-05, "loss": 60.712, "step": 52450 }, { "epoch": 0.2119450381185939, "grad_norm": 774.343017578125, "learning_rate": 3.854785859478109e-05, "loss": 68.7223, "step": 52460 }, { "epoch": 0.2119854393839615, "grad_norm": 659.7357788085938, "learning_rate": 3.85468137612689e-05, "loss": 50.9767, "step": 52470 }, { "epoch": 0.21202584064932914, "grad_norm": 601.4818115234375, "learning_rate": 3.8545768566177034e-05, "loss": 63.6934, "step": 52480 }, { "epoch": 0.21206624191469678, "grad_norm": 1886.767333984375, "learning_rate": 3.8544723009525855e-05, "loss": 75.3949, "step": 52490 }, { "epoch": 0.2121066431800644, "grad_norm": 621.9034423828125, "learning_rate": 3.854367709133575e-05, "loss": 67.8432, "step": 52500 }, { "epoch": 0.21214704444543203, "grad_norm": 966.6736450195312, "learning_rate": 3.854263081162712e-05, "loss": 94.2898, "step": 52510 }, { "epoch": 0.21218744571079967, "grad_norm": 649.41162109375, "learning_rate": 3.854158417042035e-05, "loss": 80.9697, "step": 52520 }, { "epoch": 0.21222784697616728, "grad_norm": 616.9508666992188, "learning_rate": 3.854053716773586e-05, "loss": 43.8091, "step": 52530 }, { "epoch": 0.21226824824153492, "grad_norm": 714.7644653320312, "learning_rate": 3.8539489803594044e-05, "loss": 62.8353, "step": 52540 }, { "epoch": 0.21230864950690256, "grad_norm": 2239.86865234375, "learning_rate": 3.853844207801533e-05, "loss": 59.3144, "step": 52550 }, { "epoch": 0.21234905077227018, "grad_norm": 999.2901611328125, "learning_rate": 3.853739399102014e-05, "loss": 67.6813, "step": 52560 }, { "epoch": 0.21238945203763782, "grad_norm": 983.7887573242188, "learning_rate": 3.8536345542628925e-05, "loss": 72.0771, "step": 52570 }, { "epoch": 0.21242985330300546, "grad_norm": 2161.25634765625, "learning_rate": 3.85352967328621e-05, "loss": 97.4594, "step": 52580 }, { "epoch": 0.21247025456837307, "grad_norm": 323.06915283203125, "learning_rate": 3.8534247561740124e-05, "loss": 79.5232, "step": 52590 }, { "epoch": 0.2125106558337407, "grad_norm": 605.979248046875, "learning_rate": 3.853319802928345e-05, "loss": 87.567, "step": 52600 }, { "epoch": 0.21255105709910835, "grad_norm": 1341.1494140625, "learning_rate": 3.853214813551254e-05, "loss": 83.6259, "step": 52610 }, { "epoch": 0.212591458364476, "grad_norm": 652.4749755859375, "learning_rate": 3.8531097880447854e-05, "loss": 61.8694, "step": 52620 }, { "epoch": 0.2126318596298436, "grad_norm": 1503.4381103515625, "learning_rate": 3.853004726410988e-05, "loss": 101.6983, "step": 52630 }, { "epoch": 0.21267226089521124, "grad_norm": 1674.4990234375, "learning_rate": 3.852899628651909e-05, "loss": 95.5688, "step": 52640 }, { "epoch": 0.21271266216057888, "grad_norm": 196.89166259765625, "learning_rate": 3.852794494769599e-05, "loss": 41.1336, "step": 52650 }, { "epoch": 0.2127530634259465, "grad_norm": 1766.521240234375, "learning_rate": 3.8526893247661056e-05, "loss": 82.6849, "step": 52660 }, { "epoch": 0.21279346469131413, "grad_norm": 744.296142578125, "learning_rate": 3.85258411864348e-05, "loss": 101.108, "step": 52670 }, { "epoch": 0.21283386595668177, "grad_norm": 627.980712890625, "learning_rate": 3.852478876403773e-05, "loss": 103.0711, "step": 52680 }, { "epoch": 0.21287426722204938, "grad_norm": 1504.383056640625, "learning_rate": 3.8523735980490375e-05, "loss": 93.4536, "step": 52690 }, { "epoch": 0.21291466848741702, "grad_norm": 1216.134033203125, "learning_rate": 3.8522682835813244e-05, "loss": 71.4415, "step": 52700 }, { "epoch": 0.21295506975278466, "grad_norm": 537.6961059570312, "learning_rate": 3.852162933002687e-05, "loss": 61.1766, "step": 52710 }, { "epoch": 0.21299547101815228, "grad_norm": 490.43096923828125, "learning_rate": 3.8520575463151805e-05, "loss": 54.5532, "step": 52720 }, { "epoch": 0.21303587228351992, "grad_norm": 859.5098876953125, "learning_rate": 3.851952123520859e-05, "loss": 90.5423, "step": 52730 }, { "epoch": 0.21307627354888756, "grad_norm": 1086.8525390625, "learning_rate": 3.851846664621776e-05, "loss": 58.3408, "step": 52740 }, { "epoch": 0.21311667481425517, "grad_norm": 696.736328125, "learning_rate": 3.85174116961999e-05, "loss": 113.3828, "step": 52750 }, { "epoch": 0.2131570760796228, "grad_norm": 2076.949462890625, "learning_rate": 3.851635638517556e-05, "loss": 63.6123, "step": 52760 }, { "epoch": 0.21319747734499045, "grad_norm": 498.77569580078125, "learning_rate": 3.851530071316532e-05, "loss": 80.4989, "step": 52770 }, { "epoch": 0.2132378786103581, "grad_norm": 845.3126831054688, "learning_rate": 3.851424468018976e-05, "loss": 72.9964, "step": 52780 }, { "epoch": 0.2132782798757257, "grad_norm": 2801.711181640625, "learning_rate": 3.851318828626947e-05, "loss": 144.1869, "step": 52790 }, { "epoch": 0.21331868114109334, "grad_norm": 325.9803161621094, "learning_rate": 3.851213153142505e-05, "loss": 58.3193, "step": 52800 }, { "epoch": 0.21335908240646098, "grad_norm": 1257.360107421875, "learning_rate": 3.851107441567708e-05, "loss": 62.4607, "step": 52810 }, { "epoch": 0.2133994836718286, "grad_norm": 1192.9422607421875, "learning_rate": 3.85100169390462e-05, "loss": 77.8515, "step": 52820 }, { "epoch": 0.21343988493719623, "grad_norm": 911.9278564453125, "learning_rate": 3.8508959101553e-05, "loss": 87.0119, "step": 52830 }, { "epoch": 0.21348028620256387, "grad_norm": 663.7138671875, "learning_rate": 3.850790090321812e-05, "loss": 78.5091, "step": 52840 }, { "epoch": 0.21352068746793149, "grad_norm": 660.0030517578125, "learning_rate": 3.850684234406217e-05, "loss": 45.0362, "step": 52850 }, { "epoch": 0.21356108873329913, "grad_norm": 905.1046142578125, "learning_rate": 3.850578342410581e-05, "loss": 45.9832, "step": 52860 }, { "epoch": 0.21360148999866677, "grad_norm": 867.9483642578125, "learning_rate": 3.8504724143369674e-05, "loss": 53.6359, "step": 52870 }, { "epoch": 0.21364189126403438, "grad_norm": 476.4436340332031, "learning_rate": 3.850366450187442e-05, "loss": 91.5648, "step": 52880 }, { "epoch": 0.21368229252940202, "grad_norm": 839.8441162109375, "learning_rate": 3.850260449964069e-05, "loss": 85.5399, "step": 52890 }, { "epoch": 0.21372269379476966, "grad_norm": 611.8558349609375, "learning_rate": 3.850154413668916e-05, "loss": 73.8346, "step": 52900 }, { "epoch": 0.21376309506013727, "grad_norm": 688.4009399414062, "learning_rate": 3.8500483413040514e-05, "loss": 66.8995, "step": 52910 }, { "epoch": 0.2138034963255049, "grad_norm": 1049.5130615234375, "learning_rate": 3.849942232871541e-05, "loss": 104.3818, "step": 52920 }, { "epoch": 0.21384389759087255, "grad_norm": 533.0537109375, "learning_rate": 3.849836088373454e-05, "loss": 70.7031, "step": 52930 }, { "epoch": 0.2138842988562402, "grad_norm": 533.7456665039062, "learning_rate": 3.849729907811861e-05, "loss": 81.8711, "step": 52940 }, { "epoch": 0.2139247001216078, "grad_norm": 1017.4571533203125, "learning_rate": 3.8496236911888306e-05, "loss": 71.7599, "step": 52950 }, { "epoch": 0.21396510138697544, "grad_norm": 2414.6064453125, "learning_rate": 3.8495174385064335e-05, "loss": 107.8708, "step": 52960 }, { "epoch": 0.21400550265234308, "grad_norm": 415.21685791015625, "learning_rate": 3.849411149766743e-05, "loss": 87.1992, "step": 52970 }, { "epoch": 0.2140459039177107, "grad_norm": 1234.713134765625, "learning_rate": 3.849304824971829e-05, "loss": 62.1045, "step": 52980 }, { "epoch": 0.21408630518307833, "grad_norm": 842.6171264648438, "learning_rate": 3.849198464123766e-05, "loss": 113.1145, "step": 52990 }, { "epoch": 0.21412670644844597, "grad_norm": 841.9878540039062, "learning_rate": 3.849092067224627e-05, "loss": 56.3479, "step": 53000 }, { "epoch": 0.2141671077138136, "grad_norm": 1213.228271484375, "learning_rate": 3.848985634276486e-05, "loss": 87.3512, "step": 53010 }, { "epoch": 0.21420750897918123, "grad_norm": 1443.3297119140625, "learning_rate": 3.848879165281417e-05, "loss": 52.2816, "step": 53020 }, { "epoch": 0.21424791024454887, "grad_norm": 1061.485107421875, "learning_rate": 3.848772660241499e-05, "loss": 86.4943, "step": 53030 }, { "epoch": 0.21428831150991648, "grad_norm": 624.9254150390625, "learning_rate": 3.848666119158804e-05, "loss": 63.9003, "step": 53040 }, { "epoch": 0.21432871277528412, "grad_norm": 1313.3116455078125, "learning_rate": 3.848559542035412e-05, "loss": 69.6492, "step": 53050 }, { "epoch": 0.21436911404065176, "grad_norm": 1044.481689453125, "learning_rate": 3.848452928873401e-05, "loss": 79.7042, "step": 53060 }, { "epoch": 0.21440951530601937, "grad_norm": 989.2664184570312, "learning_rate": 3.848346279674847e-05, "loss": 105.1888, "step": 53070 }, { "epoch": 0.214449916571387, "grad_norm": 902.6455688476562, "learning_rate": 3.848239594441831e-05, "loss": 90.2427, "step": 53080 }, { "epoch": 0.21449031783675465, "grad_norm": 2429.779541015625, "learning_rate": 3.848132873176434e-05, "loss": 85.9649, "step": 53090 }, { "epoch": 0.2145307191021223, "grad_norm": 658.7740478515625, "learning_rate": 3.848026115880734e-05, "loss": 52.3369, "step": 53100 }, { "epoch": 0.2145711203674899, "grad_norm": 743.6819458007812, "learning_rate": 3.847919322556814e-05, "loss": 112.591, "step": 53110 }, { "epoch": 0.21461152163285754, "grad_norm": 0.0, "learning_rate": 3.847812493206754e-05, "loss": 59.0467, "step": 53120 }, { "epoch": 0.21465192289822518, "grad_norm": 4304.6552734375, "learning_rate": 3.84770562783264e-05, "loss": 78.0041, "step": 53130 }, { "epoch": 0.2146923241635928, "grad_norm": 735.5576782226562, "learning_rate": 3.847598726436553e-05, "loss": 63.3402, "step": 53140 }, { "epoch": 0.21473272542896044, "grad_norm": 766.4822998046875, "learning_rate": 3.847491789020576e-05, "loss": 149.0328, "step": 53150 }, { "epoch": 0.21477312669432808, "grad_norm": 682.111572265625, "learning_rate": 3.8473848155867976e-05, "loss": 84.1817, "step": 53160 }, { "epoch": 0.2148135279596957, "grad_norm": 791.5773315429688, "learning_rate": 3.8472778061373e-05, "loss": 74.4928, "step": 53170 }, { "epoch": 0.21485392922506333, "grad_norm": 898.5239868164062, "learning_rate": 3.8471707606741706e-05, "loss": 66.1085, "step": 53180 }, { "epoch": 0.21489433049043097, "grad_norm": 621.9130249023438, "learning_rate": 3.8470636791994965e-05, "loss": 75.2301, "step": 53190 }, { "epoch": 0.21493473175579858, "grad_norm": 1194.8358154296875, "learning_rate": 3.8469565617153646e-05, "loss": 95.7897, "step": 53200 }, { "epoch": 0.21497513302116622, "grad_norm": 3260.6416015625, "learning_rate": 3.846849408223865e-05, "loss": 61.8074, "step": 53210 }, { "epoch": 0.21501553428653386, "grad_norm": 1330.064208984375, "learning_rate": 3.846742218727084e-05, "loss": 80.6778, "step": 53220 }, { "epoch": 0.21505593555190147, "grad_norm": 768.6731567382812, "learning_rate": 3.846634993227113e-05, "loss": 98.0611, "step": 53230 }, { "epoch": 0.2150963368172691, "grad_norm": 726.4506225585938, "learning_rate": 3.846527731726042e-05, "loss": 56.8013, "step": 53240 }, { "epoch": 0.21513673808263675, "grad_norm": 595.484130859375, "learning_rate": 3.846420434225962e-05, "loss": 68.5684, "step": 53250 }, { "epoch": 0.2151771393480044, "grad_norm": 2421.9501953125, "learning_rate": 3.846313100728965e-05, "loss": 90.2925, "step": 53260 }, { "epoch": 0.215217540613372, "grad_norm": 1195.77880859375, "learning_rate": 3.846205731237144e-05, "loss": 55.6472, "step": 53270 }, { "epoch": 0.21525794187873964, "grad_norm": 556.8364868164062, "learning_rate": 3.846098325752592e-05, "loss": 67.6011, "step": 53280 }, { "epoch": 0.21529834314410728, "grad_norm": 688.4712524414062, "learning_rate": 3.845990884277402e-05, "loss": 79.6667, "step": 53290 }, { "epoch": 0.2153387444094749, "grad_norm": 906.529541015625, "learning_rate": 3.8458834068136704e-05, "loss": 70.9961, "step": 53300 }, { "epoch": 0.21537914567484254, "grad_norm": 5431.75146484375, "learning_rate": 3.8457758933634905e-05, "loss": 139.834, "step": 53310 }, { "epoch": 0.21541954694021018, "grad_norm": 556.4915771484375, "learning_rate": 3.84566834392896e-05, "loss": 69.6828, "step": 53320 }, { "epoch": 0.2154599482055778, "grad_norm": 783.346923828125, "learning_rate": 3.8455607585121745e-05, "loss": 91.7018, "step": 53330 }, { "epoch": 0.21550034947094543, "grad_norm": 444.45556640625, "learning_rate": 3.8454531371152315e-05, "loss": 64.6532, "step": 53340 }, { "epoch": 0.21554075073631307, "grad_norm": 1354.439453125, "learning_rate": 3.84534547974023e-05, "loss": 92.3306, "step": 53350 }, { "epoch": 0.21558115200168068, "grad_norm": 593.5985717773438, "learning_rate": 3.8452377863892686e-05, "loss": 63.3151, "step": 53360 }, { "epoch": 0.21562155326704832, "grad_norm": 868.5457763671875, "learning_rate": 3.845130057064446e-05, "loss": 64.5144, "step": 53370 }, { "epoch": 0.21566195453241596, "grad_norm": 708.3560180664062, "learning_rate": 3.8450222917678626e-05, "loss": 77.0622, "step": 53380 }, { "epoch": 0.21570235579778357, "grad_norm": 1146.61376953125, "learning_rate": 3.84491449050162e-05, "loss": 96.9242, "step": 53390 }, { "epoch": 0.2157427570631512, "grad_norm": 501.0816345214844, "learning_rate": 3.8448066532678206e-05, "loss": 72.2287, "step": 53400 }, { "epoch": 0.21578315832851885, "grad_norm": 1105.1387939453125, "learning_rate": 3.8446987800685646e-05, "loss": 120.5814, "step": 53410 }, { "epoch": 0.2158235595938865, "grad_norm": 696.0546875, "learning_rate": 3.844590870905957e-05, "loss": 85.6093, "step": 53420 }, { "epoch": 0.2158639608592541, "grad_norm": 1029.6751708984375, "learning_rate": 3.8444829257821e-05, "loss": 106.5893, "step": 53430 }, { "epoch": 0.21590436212462175, "grad_norm": 1049.6455078125, "learning_rate": 3.844374944699099e-05, "loss": 57.9386, "step": 53440 }, { "epoch": 0.21594476338998939, "grad_norm": 322.9332580566406, "learning_rate": 3.844266927659059e-05, "loss": 80.7071, "step": 53450 }, { "epoch": 0.215985164655357, "grad_norm": 649.258056640625, "learning_rate": 3.8441588746640856e-05, "loss": 60.9057, "step": 53460 }, { "epoch": 0.21602556592072464, "grad_norm": 703.34326171875, "learning_rate": 3.8440507857162856e-05, "loss": 65.4921, "step": 53470 }, { "epoch": 0.21606596718609228, "grad_norm": 451.3043518066406, "learning_rate": 3.843942660817767e-05, "loss": 73.565, "step": 53480 }, { "epoch": 0.2161063684514599, "grad_norm": 1561.3681640625, "learning_rate": 3.8438344999706355e-05, "loss": 77.2143, "step": 53490 }, { "epoch": 0.21614676971682753, "grad_norm": 613.5264282226562, "learning_rate": 3.8437263031770015e-05, "loss": 67.6038, "step": 53500 }, { "epoch": 0.21618717098219517, "grad_norm": 580.0654296875, "learning_rate": 3.843618070438974e-05, "loss": 80.3659, "step": 53510 }, { "epoch": 0.21622757224756278, "grad_norm": 411.0973205566406, "learning_rate": 3.8435098017586635e-05, "loss": 57.7491, "step": 53520 }, { "epoch": 0.21626797351293042, "grad_norm": 515.7644653320312, "learning_rate": 3.84340149713818e-05, "loss": 96.419, "step": 53530 }, { "epoch": 0.21630837477829806, "grad_norm": 1984.08740234375, "learning_rate": 3.843293156579635e-05, "loss": 80.8797, "step": 53540 }, { "epoch": 0.21634877604366567, "grad_norm": 536.7109375, "learning_rate": 3.843184780085142e-05, "loss": 88.4045, "step": 53550 }, { "epoch": 0.21638917730903331, "grad_norm": 916.5641479492188, "learning_rate": 3.843076367656811e-05, "loss": 85.3823, "step": 53560 }, { "epoch": 0.21642957857440095, "grad_norm": 573.3203735351562, "learning_rate": 3.842967919296759e-05, "loss": 62.3376, "step": 53570 }, { "epoch": 0.2164699798397686, "grad_norm": 717.7938842773438, "learning_rate": 3.842859435007098e-05, "loss": 63.3645, "step": 53580 }, { "epoch": 0.2165103811051362, "grad_norm": 641.4676513671875, "learning_rate": 3.8427509147899436e-05, "loss": 81.8669, "step": 53590 }, { "epoch": 0.21655078237050385, "grad_norm": 790.5074462890625, "learning_rate": 3.842642358647411e-05, "loss": 67.9639, "step": 53600 }, { "epoch": 0.2165911836358715, "grad_norm": 1648.721435546875, "learning_rate": 3.842533766581618e-05, "loss": 83.7934, "step": 53610 }, { "epoch": 0.2166315849012391, "grad_norm": 722.0618896484375, "learning_rate": 3.842425138594679e-05, "loss": 64.1934, "step": 53620 }, { "epoch": 0.21667198616660674, "grad_norm": 702.053955078125, "learning_rate": 3.842316474688715e-05, "loss": 174.5111, "step": 53630 }, { "epoch": 0.21671238743197438, "grad_norm": 868.510986328125, "learning_rate": 3.8422077748658415e-05, "loss": 55.1778, "step": 53640 }, { "epoch": 0.216752788697342, "grad_norm": 644.6915893554688, "learning_rate": 3.842099039128179e-05, "loss": 74.3898, "step": 53650 }, { "epoch": 0.21679318996270963, "grad_norm": 1687.4537353515625, "learning_rate": 3.8419902674778484e-05, "loss": 79.454, "step": 53660 }, { "epoch": 0.21683359122807727, "grad_norm": 581.4046020507812, "learning_rate": 3.8418814599169684e-05, "loss": 80.527, "step": 53670 }, { "epoch": 0.21687399249344488, "grad_norm": 0.0, "learning_rate": 3.841772616447662e-05, "loss": 59.3345, "step": 53680 }, { "epoch": 0.21691439375881252, "grad_norm": 747.6123046875, "learning_rate": 3.84166373707205e-05, "loss": 64.1358, "step": 53690 }, { "epoch": 0.21695479502418016, "grad_norm": 893.1629638671875, "learning_rate": 3.8415548217922546e-05, "loss": 96.7892, "step": 53700 }, { "epoch": 0.21699519628954778, "grad_norm": 815.9344482421875, "learning_rate": 3.8414458706104e-05, "loss": 76.8955, "step": 53710 }, { "epoch": 0.21703559755491542, "grad_norm": 977.8809204101562, "learning_rate": 3.8413368835286104e-05, "loss": 110.5061, "step": 53720 }, { "epoch": 0.21707599882028306, "grad_norm": 925.52392578125, "learning_rate": 3.84122786054901e-05, "loss": 98.8299, "step": 53730 }, { "epoch": 0.2171164000856507, "grad_norm": 865.4520263671875, "learning_rate": 3.8411188016737245e-05, "loss": 79.2513, "step": 53740 }, { "epoch": 0.2171568013510183, "grad_norm": 981.2018432617188, "learning_rate": 3.841009706904881e-05, "loss": 65.9173, "step": 53750 }, { "epoch": 0.21719720261638595, "grad_norm": 531.883056640625, "learning_rate": 3.8409005762446046e-05, "loss": 53.9927, "step": 53760 }, { "epoch": 0.2172376038817536, "grad_norm": 1539.730712890625, "learning_rate": 3.840791409695024e-05, "loss": 87.3034, "step": 53770 }, { "epoch": 0.2172780051471212, "grad_norm": 846.1365356445312, "learning_rate": 3.840682207258267e-05, "loss": 57.685, "step": 53780 }, { "epoch": 0.21731840641248884, "grad_norm": 906.4349365234375, "learning_rate": 3.8405729689364625e-05, "loss": 94.1517, "step": 53790 }, { "epoch": 0.21735880767785648, "grad_norm": 1619.25634765625, "learning_rate": 3.840463694731741e-05, "loss": 102.3085, "step": 53800 }, { "epoch": 0.2173992089432241, "grad_norm": 700.124267578125, "learning_rate": 3.840354384646232e-05, "loss": 81.1631, "step": 53810 }, { "epoch": 0.21743961020859173, "grad_norm": 759.2460327148438, "learning_rate": 3.840245038682067e-05, "loss": 63.1743, "step": 53820 }, { "epoch": 0.21748001147395937, "grad_norm": 474.08856201171875, "learning_rate": 3.8401356568413766e-05, "loss": 86.7374, "step": 53830 }, { "epoch": 0.21752041273932698, "grad_norm": 1223.6085205078125, "learning_rate": 3.840026239126295e-05, "loss": 98.9601, "step": 53840 }, { "epoch": 0.21756081400469462, "grad_norm": 701.7997436523438, "learning_rate": 3.839916785538955e-05, "loss": 113.6616, "step": 53850 }, { "epoch": 0.21760121527006226, "grad_norm": 535.2505493164062, "learning_rate": 3.839807296081489e-05, "loss": 53.6258, "step": 53860 }, { "epoch": 0.21764161653542988, "grad_norm": 498.7857360839844, "learning_rate": 3.839697770756033e-05, "loss": 45.3512, "step": 53870 }, { "epoch": 0.21768201780079752, "grad_norm": 1155.1322021484375, "learning_rate": 3.839588209564721e-05, "loss": 91.6865, "step": 53880 }, { "epoch": 0.21772241906616516, "grad_norm": 509.37603759765625, "learning_rate": 3.839478612509691e-05, "loss": 68.8964, "step": 53890 }, { "epoch": 0.2177628203315328, "grad_norm": 986.0476684570312, "learning_rate": 3.839368979593079e-05, "loss": 77.7923, "step": 53900 }, { "epoch": 0.2178032215969004, "grad_norm": 503.35791015625, "learning_rate": 3.83925931081702e-05, "loss": 45.5724, "step": 53910 }, { "epoch": 0.21784362286226805, "grad_norm": 612.2771606445312, "learning_rate": 3.8391496061836545e-05, "loss": 64.179, "step": 53920 }, { "epoch": 0.2178840241276357, "grad_norm": 693.4111938476562, "learning_rate": 3.8390398656951204e-05, "loss": 59.0524, "step": 53930 }, { "epoch": 0.2179244253930033, "grad_norm": 3350.630126953125, "learning_rate": 3.838930089353558e-05, "loss": 124.8092, "step": 53940 }, { "epoch": 0.21796482665837094, "grad_norm": 388.133056640625, "learning_rate": 3.838820277161106e-05, "loss": 57.8169, "step": 53950 }, { "epoch": 0.21800522792373858, "grad_norm": 512.2550659179688, "learning_rate": 3.8387104291199066e-05, "loss": 58.9273, "step": 53960 }, { "epoch": 0.2180456291891062, "grad_norm": 429.9571228027344, "learning_rate": 3.8386005452321e-05, "loss": 67.8202, "step": 53970 }, { "epoch": 0.21808603045447383, "grad_norm": 1184.2342529296875, "learning_rate": 3.83849062549983e-05, "loss": 95.5747, "step": 53980 }, { "epoch": 0.21812643171984147, "grad_norm": 1229.4407958984375, "learning_rate": 3.838380669925238e-05, "loss": 64.7428, "step": 53990 }, { "epoch": 0.21816683298520909, "grad_norm": 926.0938110351562, "learning_rate": 3.838270678510469e-05, "loss": 56.7303, "step": 54000 }, { "epoch": 0.21820723425057673, "grad_norm": 776.5170288085938, "learning_rate": 3.8381606512576664e-05, "loss": 67.6247, "step": 54010 }, { "epoch": 0.21824763551594437, "grad_norm": 756.7637939453125, "learning_rate": 3.838050588168976e-05, "loss": 74.2062, "step": 54020 }, { "epoch": 0.21828803678131198, "grad_norm": 1093.98291015625, "learning_rate": 3.837940489246543e-05, "loss": 67.1495, "step": 54030 }, { "epoch": 0.21832843804667962, "grad_norm": 1240.094970703125, "learning_rate": 3.8378303544925134e-05, "loss": 61.8741, "step": 54040 }, { "epoch": 0.21836883931204726, "grad_norm": 696.2560424804688, "learning_rate": 3.837720183909035e-05, "loss": 103.6368, "step": 54050 }, { "epoch": 0.2184092405774149, "grad_norm": 632.3446044921875, "learning_rate": 3.8376099774982553e-05, "loss": 80.5212, "step": 54060 }, { "epoch": 0.2184496418427825, "grad_norm": 665.3270874023438, "learning_rate": 3.8374997352623234e-05, "loss": 55.7393, "step": 54070 }, { "epoch": 0.21849004310815015, "grad_norm": 1013.0740966796875, "learning_rate": 3.837389457203388e-05, "loss": 78.6451, "step": 54080 }, { "epoch": 0.2185304443735178, "grad_norm": 814.3826293945312, "learning_rate": 3.8372791433236e-05, "loss": 87.6565, "step": 54090 }, { "epoch": 0.2185708456388854, "grad_norm": 1004.903564453125, "learning_rate": 3.8371687936251085e-05, "loss": 74.4121, "step": 54100 }, { "epoch": 0.21861124690425304, "grad_norm": 609.3934326171875, "learning_rate": 3.837058408110066e-05, "loss": 92.1348, "step": 54110 }, { "epoch": 0.21865164816962068, "grad_norm": 498.446044921875, "learning_rate": 3.836947986780623e-05, "loss": 140.8018, "step": 54120 }, { "epoch": 0.2186920494349883, "grad_norm": 1687.71435546875, "learning_rate": 3.836837529638934e-05, "loss": 105.1114, "step": 54130 }, { "epoch": 0.21873245070035593, "grad_norm": 759.4395751953125, "learning_rate": 3.836727036687151e-05, "loss": 73.0021, "step": 54140 }, { "epoch": 0.21877285196572357, "grad_norm": 866.75341796875, "learning_rate": 3.83661650792743e-05, "loss": 78.3296, "step": 54150 }, { "epoch": 0.2188132532310912, "grad_norm": 1064.7254638671875, "learning_rate": 3.8365059433619236e-05, "loss": 70.7086, "step": 54160 }, { "epoch": 0.21885365449645883, "grad_norm": 480.29156494140625, "learning_rate": 3.836395342992789e-05, "loss": 52.3391, "step": 54170 }, { "epoch": 0.21889405576182647, "grad_norm": 465.5809326171875, "learning_rate": 3.8362847068221816e-05, "loss": 77.8524, "step": 54180 }, { "epoch": 0.21893445702719408, "grad_norm": 853.2993774414062, "learning_rate": 3.836174034852258e-05, "loss": 79.3692, "step": 54190 }, { "epoch": 0.21897485829256172, "grad_norm": 781.8079223632812, "learning_rate": 3.8360633270851765e-05, "loss": 64.2943, "step": 54200 }, { "epoch": 0.21901525955792936, "grad_norm": 704.2810668945312, "learning_rate": 3.835952583523095e-05, "loss": 76.673, "step": 54210 }, { "epoch": 0.219055660823297, "grad_norm": 1497.0550537109375, "learning_rate": 3.835841804168174e-05, "loss": 75.4406, "step": 54220 }, { "epoch": 0.2190960620886646, "grad_norm": 1838.40869140625, "learning_rate": 3.8357309890225696e-05, "loss": 102.6016, "step": 54230 }, { "epoch": 0.21913646335403225, "grad_norm": 469.5345153808594, "learning_rate": 3.8356201380884465e-05, "loss": 63.3835, "step": 54240 }, { "epoch": 0.2191768646193999, "grad_norm": 1066.0787353515625, "learning_rate": 3.835509251367963e-05, "loss": 77.8217, "step": 54250 }, { "epoch": 0.2192172658847675, "grad_norm": 1482.1439208984375, "learning_rate": 3.835398328863281e-05, "loss": 77.4048, "step": 54260 }, { "epoch": 0.21925766715013514, "grad_norm": 811.32080078125, "learning_rate": 3.835287370576564e-05, "loss": 76.4093, "step": 54270 }, { "epoch": 0.21929806841550278, "grad_norm": 643.56787109375, "learning_rate": 3.8351763765099755e-05, "loss": 101.0972, "step": 54280 }, { "epoch": 0.2193384696808704, "grad_norm": 552.455810546875, "learning_rate": 3.835065346665679e-05, "loss": 77.881, "step": 54290 }, { "epoch": 0.21937887094623804, "grad_norm": 1201.8634033203125, "learning_rate": 3.8349542810458374e-05, "loss": 65.2546, "step": 54300 }, { "epoch": 0.21941927221160568, "grad_norm": 160.37318420410156, "learning_rate": 3.834843179652618e-05, "loss": 63.4198, "step": 54310 }, { "epoch": 0.2194596734769733, "grad_norm": 1110.407470703125, "learning_rate": 3.834732042488186e-05, "loss": 56.3331, "step": 54320 }, { "epoch": 0.21950007474234093, "grad_norm": 863.4337768554688, "learning_rate": 3.834620869554709e-05, "loss": 66.6363, "step": 54330 }, { "epoch": 0.21954047600770857, "grad_norm": 1122.90576171875, "learning_rate": 3.834509660854353e-05, "loss": 68.35, "step": 54340 }, { "epoch": 0.21958087727307618, "grad_norm": 1227.4989013671875, "learning_rate": 3.834398416389287e-05, "loss": 75.4078, "step": 54350 }, { "epoch": 0.21962127853844382, "grad_norm": 855.5853271484375, "learning_rate": 3.834287136161679e-05, "loss": 87.4391, "step": 54360 }, { "epoch": 0.21966167980381146, "grad_norm": 1111.5548095703125, "learning_rate": 3.8341758201736995e-05, "loss": 82.4853, "step": 54370 }, { "epoch": 0.2197020810691791, "grad_norm": 639.7208862304688, "learning_rate": 3.8340644684275184e-05, "loss": 76.4388, "step": 54380 }, { "epoch": 0.2197424823345467, "grad_norm": 661.3689575195312, "learning_rate": 3.833953080925305e-05, "loss": 43.4275, "step": 54390 }, { "epoch": 0.21978288359991435, "grad_norm": 515.0191040039062, "learning_rate": 3.8338416576692335e-05, "loss": 58.5762, "step": 54400 }, { "epoch": 0.219823284865282, "grad_norm": 2175.806396484375, "learning_rate": 3.833730198661473e-05, "loss": 102.4807, "step": 54410 }, { "epoch": 0.2198636861306496, "grad_norm": 1177.308837890625, "learning_rate": 3.8336187039042e-05, "loss": 103.9685, "step": 54420 }, { "epoch": 0.21990408739601724, "grad_norm": 2354.74072265625, "learning_rate": 3.833507173399586e-05, "loss": 83.4062, "step": 54430 }, { "epoch": 0.21994448866138488, "grad_norm": 483.1558532714844, "learning_rate": 3.8333956071498047e-05, "loss": 81.7757, "step": 54440 }, { "epoch": 0.2199848899267525, "grad_norm": 1119.9464111328125, "learning_rate": 3.833284005157033e-05, "loss": 62.2444, "step": 54450 }, { "epoch": 0.22002529119212014, "grad_norm": 1096.0882568359375, "learning_rate": 3.833172367423445e-05, "loss": 76.2725, "step": 54460 }, { "epoch": 0.22006569245748778, "grad_norm": 853.37646484375, "learning_rate": 3.833060693951219e-05, "loss": 60.5453, "step": 54470 }, { "epoch": 0.2201060937228554, "grad_norm": 874.2672729492188, "learning_rate": 3.83294898474253e-05, "loss": 86.5318, "step": 54480 }, { "epoch": 0.22014649498822303, "grad_norm": 2054.371826171875, "learning_rate": 3.832837239799558e-05, "loss": 135.8652, "step": 54490 }, { "epoch": 0.22018689625359067, "grad_norm": 506.741943359375, "learning_rate": 3.8327254591244795e-05, "loss": 68.7912, "step": 54500 }, { "epoch": 0.22022729751895828, "grad_norm": 524.73193359375, "learning_rate": 3.832613642719475e-05, "loss": 36.3728, "step": 54510 }, { "epoch": 0.22026769878432592, "grad_norm": 1817.7291259765625, "learning_rate": 3.832501790586724e-05, "loss": 54.225, "step": 54520 }, { "epoch": 0.22030810004969356, "grad_norm": 1189.2894287109375, "learning_rate": 3.832389902728407e-05, "loss": 100.2454, "step": 54530 }, { "epoch": 0.2203485013150612, "grad_norm": 280.9716491699219, "learning_rate": 3.832277979146706e-05, "loss": 47.7974, "step": 54540 }, { "epoch": 0.2203889025804288, "grad_norm": 985.0027465820312, "learning_rate": 3.8321660198438013e-05, "loss": 94.2698, "step": 54550 }, { "epoch": 0.22042930384579645, "grad_norm": 1097.96875, "learning_rate": 3.832054024821877e-05, "loss": 62.8399, "step": 54560 }, { "epoch": 0.2204697051111641, "grad_norm": 904.7685546875, "learning_rate": 3.831941994083118e-05, "loss": 62.1448, "step": 54570 }, { "epoch": 0.2205101063765317, "grad_norm": 569.3748779296875, "learning_rate": 3.831829927629705e-05, "loss": 47.2207, "step": 54580 }, { "epoch": 0.22055050764189935, "grad_norm": 888.6369018554688, "learning_rate": 3.831717825463825e-05, "loss": 117.8203, "step": 54590 }, { "epoch": 0.22059090890726699, "grad_norm": 813.579833984375, "learning_rate": 3.831605687587663e-05, "loss": 107.6004, "step": 54600 }, { "epoch": 0.2206313101726346, "grad_norm": 686.66845703125, "learning_rate": 3.831493514003405e-05, "loss": 53.9199, "step": 54610 }, { "epoch": 0.22067171143800224, "grad_norm": 1741.1114501953125, "learning_rate": 3.8313813047132384e-05, "loss": 61.756, "step": 54620 }, { "epoch": 0.22071211270336988, "grad_norm": 1898.2445068359375, "learning_rate": 3.8312690597193494e-05, "loss": 75.3093, "step": 54630 }, { "epoch": 0.2207525139687375, "grad_norm": 1430.97509765625, "learning_rate": 3.8311567790239284e-05, "loss": 78.689, "step": 54640 }, { "epoch": 0.22079291523410513, "grad_norm": 453.10400390625, "learning_rate": 3.831044462629163e-05, "loss": 72.2116, "step": 54650 }, { "epoch": 0.22083331649947277, "grad_norm": 977.4793090820312, "learning_rate": 3.830932110537242e-05, "loss": 82.8747, "step": 54660 }, { "epoch": 0.22087371776484038, "grad_norm": 523.4397583007812, "learning_rate": 3.830819722750358e-05, "loss": 127.7064, "step": 54670 }, { "epoch": 0.22091411903020802, "grad_norm": 1129.9034423828125, "learning_rate": 3.830707299270701e-05, "loss": 69.922, "step": 54680 }, { "epoch": 0.22095452029557566, "grad_norm": 657.69580078125, "learning_rate": 3.830594840100463e-05, "loss": 63.3711, "step": 54690 }, { "epoch": 0.22099492156094327, "grad_norm": 1762.107177734375, "learning_rate": 3.830482345241835e-05, "loss": 61.1276, "step": 54700 }, { "epoch": 0.22103532282631091, "grad_norm": 1589.8001708984375, "learning_rate": 3.830369814697013e-05, "loss": 99.1144, "step": 54710 }, { "epoch": 0.22107572409167855, "grad_norm": 764.2601928710938, "learning_rate": 3.830257248468187e-05, "loss": 72.5749, "step": 54720 }, { "epoch": 0.2211161253570462, "grad_norm": 1804.06640625, "learning_rate": 3.8301446465575554e-05, "loss": 85.9519, "step": 54730 }, { "epoch": 0.2211565266224138, "grad_norm": 561.2105102539062, "learning_rate": 3.8300320089673106e-05, "loss": 57.1239, "step": 54740 }, { "epoch": 0.22119692788778145, "grad_norm": 545.8154907226562, "learning_rate": 3.82991933569965e-05, "loss": 67.4139, "step": 54750 }, { "epoch": 0.2212373291531491, "grad_norm": 1868.6494140625, "learning_rate": 3.82980662675677e-05, "loss": 115.0834, "step": 54760 }, { "epoch": 0.2212777304185167, "grad_norm": 856.7229614257812, "learning_rate": 3.829693882140867e-05, "loss": 84.5909, "step": 54770 }, { "epoch": 0.22131813168388434, "grad_norm": 547.6751098632812, "learning_rate": 3.8295811018541406e-05, "loss": 54.9399, "step": 54780 }, { "epoch": 0.22135853294925198, "grad_norm": 805.74072265625, "learning_rate": 3.829468285898789e-05, "loss": 91.6851, "step": 54790 }, { "epoch": 0.2213989342146196, "grad_norm": 781.6884155273438, "learning_rate": 3.82935543427701e-05, "loss": 66.7251, "step": 54800 }, { "epoch": 0.22143933547998723, "grad_norm": 1233.2275390625, "learning_rate": 3.829242546991006e-05, "loss": 81.143, "step": 54810 }, { "epoch": 0.22147973674535487, "grad_norm": 951.1732177734375, "learning_rate": 3.8291296240429766e-05, "loss": 63.9419, "step": 54820 }, { "epoch": 0.22152013801072248, "grad_norm": 814.5828857421875, "learning_rate": 3.8290166654351236e-05, "loss": 63.3376, "step": 54830 }, { "epoch": 0.22156053927609012, "grad_norm": 456.1623840332031, "learning_rate": 3.828903671169649e-05, "loss": 84.4635, "step": 54840 }, { "epoch": 0.22160094054145776, "grad_norm": 1096.0460205078125, "learning_rate": 3.8287906412487555e-05, "loss": 104.485, "step": 54850 }, { "epoch": 0.22164134180682538, "grad_norm": 625.3572998046875, "learning_rate": 3.828677575674647e-05, "loss": 65.4626, "step": 54860 }, { "epoch": 0.22168174307219302, "grad_norm": 1885.492919921875, "learning_rate": 3.828564474449527e-05, "loss": 68.5083, "step": 54870 }, { "epoch": 0.22172214433756066, "grad_norm": 558.2794799804688, "learning_rate": 3.8284513375756024e-05, "loss": 61.8752, "step": 54880 }, { "epoch": 0.2217625456029283, "grad_norm": 529.7760009765625, "learning_rate": 3.828338165055077e-05, "loss": 86.1901, "step": 54890 }, { "epoch": 0.2218029468682959, "grad_norm": 726.2923583984375, "learning_rate": 3.828224956890158e-05, "loss": 66.4456, "step": 54900 }, { "epoch": 0.22184334813366355, "grad_norm": 1140.76416015625, "learning_rate": 3.828111713083052e-05, "loss": 68.4928, "step": 54910 }, { "epoch": 0.2218837493990312, "grad_norm": 774.8322143554688, "learning_rate": 3.827998433635967e-05, "loss": 63.8003, "step": 54920 }, { "epoch": 0.2219241506643988, "grad_norm": 929.5682373046875, "learning_rate": 3.8278851185511115e-05, "loss": 112.3734, "step": 54930 }, { "epoch": 0.22196455192976644, "grad_norm": 825.6917724609375, "learning_rate": 3.8277717678306946e-05, "loss": 94.8121, "step": 54940 }, { "epoch": 0.22200495319513408, "grad_norm": 762.4610595703125, "learning_rate": 3.8276583814769265e-05, "loss": 55.5008, "step": 54950 }, { "epoch": 0.2220453544605017, "grad_norm": 577.0433959960938, "learning_rate": 3.827544959492017e-05, "loss": 80.0855, "step": 54960 }, { "epoch": 0.22208575572586933, "grad_norm": 1125.5518798828125, "learning_rate": 3.827431501878177e-05, "loss": 85.5211, "step": 54970 }, { "epoch": 0.22212615699123697, "grad_norm": 797.3460083007812, "learning_rate": 3.8273180086376195e-05, "loss": 91.1367, "step": 54980 }, { "epoch": 0.22216655825660458, "grad_norm": 1483.161865234375, "learning_rate": 3.827204479772557e-05, "loss": 51.6119, "step": 54990 }, { "epoch": 0.22220695952197222, "grad_norm": 1471.6492919921875, "learning_rate": 3.827090915285202e-05, "loss": 137.4107, "step": 55000 }, { "epoch": 0.22224736078733986, "grad_norm": 593.1961669921875, "learning_rate": 3.826977315177769e-05, "loss": 58.0393, "step": 55010 }, { "epoch": 0.22228776205270748, "grad_norm": 1406.79541015625, "learning_rate": 3.8268636794524724e-05, "loss": 81.1842, "step": 55020 }, { "epoch": 0.22232816331807512, "grad_norm": 1183.783203125, "learning_rate": 3.826750008111529e-05, "loss": 73.7566, "step": 55030 }, { "epoch": 0.22236856458344276, "grad_norm": 718.0703735351562, "learning_rate": 3.826636301157152e-05, "loss": 70.2362, "step": 55040 }, { "epoch": 0.2224089658488104, "grad_norm": 381.4212646484375, "learning_rate": 3.8265225585915616e-05, "loss": 63.0713, "step": 55050 }, { "epoch": 0.222449367114178, "grad_norm": 0.0, "learning_rate": 3.826408780416973e-05, "loss": 61.6525, "step": 55060 }, { "epoch": 0.22248976837954565, "grad_norm": 458.7480773925781, "learning_rate": 3.8262949666356046e-05, "loss": 48.9388, "step": 55070 }, { "epoch": 0.2225301696449133, "grad_norm": 785.8255004882812, "learning_rate": 3.826181117249676e-05, "loss": 61.7339, "step": 55080 }, { "epoch": 0.2225705709102809, "grad_norm": 790.076171875, "learning_rate": 3.8260672322614054e-05, "loss": 82.4103, "step": 55090 }, { "epoch": 0.22261097217564854, "grad_norm": 998.904296875, "learning_rate": 3.825953311673015e-05, "loss": 86.9125, "step": 55100 }, { "epoch": 0.22265137344101618, "grad_norm": 661.4468994140625, "learning_rate": 3.825839355486724e-05, "loss": 73.2599, "step": 55110 }, { "epoch": 0.2226917747063838, "grad_norm": 419.6131286621094, "learning_rate": 3.825725363704755e-05, "loss": 85.9492, "step": 55120 }, { "epoch": 0.22273217597175143, "grad_norm": 1006.8649291992188, "learning_rate": 3.82561133632933e-05, "loss": 76.4517, "step": 55130 }, { "epoch": 0.22277257723711907, "grad_norm": 595.6953125, "learning_rate": 3.825497273362673e-05, "loss": 67.5577, "step": 55140 }, { "epoch": 0.22281297850248669, "grad_norm": 559.351806640625, "learning_rate": 3.825383174807006e-05, "loss": 89.7158, "step": 55150 }, { "epoch": 0.22285337976785433, "grad_norm": 1137.574462890625, "learning_rate": 3.825269040664554e-05, "loss": 123.4338, "step": 55160 }, { "epoch": 0.22289378103322197, "grad_norm": 800.9525756835938, "learning_rate": 3.825154870937543e-05, "loss": 63.4805, "step": 55170 }, { "epoch": 0.22293418229858958, "grad_norm": 0.0, "learning_rate": 3.825040665628198e-05, "loss": 67.7657, "step": 55180 }, { "epoch": 0.22297458356395722, "grad_norm": 667.6600952148438, "learning_rate": 3.824926424738745e-05, "loss": 74.2919, "step": 55190 }, { "epoch": 0.22301498482932486, "grad_norm": 668.782958984375, "learning_rate": 3.824812148271413e-05, "loss": 60.5619, "step": 55200 }, { "epoch": 0.2230553860946925, "grad_norm": 883.674560546875, "learning_rate": 3.824697836228428e-05, "loss": 61.6603, "step": 55210 }, { "epoch": 0.2230957873600601, "grad_norm": 635.1038818359375, "learning_rate": 3.824583488612019e-05, "loss": 72.0887, "step": 55220 }, { "epoch": 0.22313618862542775, "grad_norm": 1303.10595703125, "learning_rate": 3.824469105424416e-05, "loss": 59.0078, "step": 55230 }, { "epoch": 0.2231765898907954, "grad_norm": 611.309814453125, "learning_rate": 3.824354686667848e-05, "loss": 64.6639, "step": 55240 }, { "epoch": 0.223216991156163, "grad_norm": 1070.7742919921875, "learning_rate": 3.8242402323445464e-05, "loss": 77.4441, "step": 55250 }, { "epoch": 0.22325739242153064, "grad_norm": 694.8192138671875, "learning_rate": 3.824125742456742e-05, "loss": 69.6013, "step": 55260 }, { "epoch": 0.22329779368689828, "grad_norm": 752.5867919921875, "learning_rate": 3.824011217006668e-05, "loss": 81.2646, "step": 55270 }, { "epoch": 0.2233381949522659, "grad_norm": 547.7276611328125, "learning_rate": 3.823896655996556e-05, "loss": 96.0991, "step": 55280 }, { "epoch": 0.22337859621763353, "grad_norm": 612.2052612304688, "learning_rate": 3.823782059428639e-05, "loss": 66.8349, "step": 55290 }, { "epoch": 0.22341899748300117, "grad_norm": 1166.7867431640625, "learning_rate": 3.823667427305152e-05, "loss": 79.9102, "step": 55300 }, { "epoch": 0.2234593987483688, "grad_norm": 985.7227172851562, "learning_rate": 3.8235527596283294e-05, "loss": 76.2051, "step": 55310 }, { "epoch": 0.22349980001373643, "grad_norm": 1459.9608154296875, "learning_rate": 3.823438056400408e-05, "loss": 96.8804, "step": 55320 }, { "epoch": 0.22354020127910407, "grad_norm": 584.8526000976562, "learning_rate": 3.823323317623622e-05, "loss": 37.4984, "step": 55330 }, { "epoch": 0.22358060254447168, "grad_norm": 1119.935546875, "learning_rate": 3.823208543300209e-05, "loss": 101.0891, "step": 55340 }, { "epoch": 0.22362100380983932, "grad_norm": 2631.894775390625, "learning_rate": 3.8230937334324075e-05, "loss": 102.7161, "step": 55350 }, { "epoch": 0.22366140507520696, "grad_norm": 1560.7054443359375, "learning_rate": 3.822978888022455e-05, "loss": 106.3137, "step": 55360 }, { "epoch": 0.2237018063405746, "grad_norm": 715.1233520507812, "learning_rate": 3.82286400707259e-05, "loss": 67.4711, "step": 55370 }, { "epoch": 0.2237422076059422, "grad_norm": 993.5302734375, "learning_rate": 3.8227490905850534e-05, "loss": 80.1368, "step": 55380 }, { "epoch": 0.22378260887130985, "grad_norm": 548.71484375, "learning_rate": 3.8226341385620845e-05, "loss": 68.046, "step": 55390 }, { "epoch": 0.2238230101366775, "grad_norm": 819.5445556640625, "learning_rate": 3.822519151005925e-05, "loss": 81.1588, "step": 55400 }, { "epoch": 0.2238634114020451, "grad_norm": 516.8148193359375, "learning_rate": 3.822404127918816e-05, "loss": 78.6651, "step": 55410 }, { "epoch": 0.22390381266741274, "grad_norm": 489.1547546386719, "learning_rate": 3.8222890693030007e-05, "loss": 104.5522, "step": 55420 }, { "epoch": 0.22394421393278038, "grad_norm": 440.1009216308594, "learning_rate": 3.8221739751607205e-05, "loss": 79.4987, "step": 55430 }, { "epoch": 0.223984615198148, "grad_norm": 583.6029663085938, "learning_rate": 3.822058845494222e-05, "loss": 69.6008, "step": 55440 }, { "epoch": 0.22402501646351564, "grad_norm": 528.2589111328125, "learning_rate": 3.8219436803057484e-05, "loss": 79.5607, "step": 55450 }, { "epoch": 0.22406541772888328, "grad_norm": 413.80364990234375, "learning_rate": 3.821828479597543e-05, "loss": 47.3397, "step": 55460 }, { "epoch": 0.2241058189942509, "grad_norm": 461.31805419921875, "learning_rate": 3.8217132433718556e-05, "loss": 79.1027, "step": 55470 }, { "epoch": 0.22414622025961853, "grad_norm": 686.8890991210938, "learning_rate": 3.821597971630929e-05, "loss": 77.6598, "step": 55480 }, { "epoch": 0.22418662152498617, "grad_norm": 711.459716796875, "learning_rate": 3.821482664377013e-05, "loss": 67.9017, "step": 55490 }, { "epoch": 0.22422702279035378, "grad_norm": 5779.49951171875, "learning_rate": 3.821367321612354e-05, "loss": 112.8159, "step": 55500 }, { "epoch": 0.22426742405572142, "grad_norm": 1230.7401123046875, "learning_rate": 3.821251943339202e-05, "loss": 87.1288, "step": 55510 }, { "epoch": 0.22430782532108906, "grad_norm": 412.5945129394531, "learning_rate": 3.821136529559806e-05, "loss": 78.4689, "step": 55520 }, { "epoch": 0.2243482265864567, "grad_norm": 644.5408325195312, "learning_rate": 3.821021080276415e-05, "loss": 69.6584, "step": 55530 }, { "epoch": 0.2243886278518243, "grad_norm": 530.8232421875, "learning_rate": 3.8209055954912815e-05, "loss": 80.4994, "step": 55540 }, { "epoch": 0.22442902911719195, "grad_norm": 754.1405639648438, "learning_rate": 3.820790075206655e-05, "loss": 96.8612, "step": 55550 }, { "epoch": 0.2244694303825596, "grad_norm": 706.6307373046875, "learning_rate": 3.820674519424789e-05, "loss": 62.1917, "step": 55560 }, { "epoch": 0.2245098316479272, "grad_norm": 949.218505859375, "learning_rate": 3.820558928147935e-05, "loss": 75.9506, "step": 55570 }, { "epoch": 0.22455023291329484, "grad_norm": 817.1376342773438, "learning_rate": 3.820443301378348e-05, "loss": 43.3707, "step": 55580 }, { "epoch": 0.22459063417866248, "grad_norm": 702.3113403320312, "learning_rate": 3.820327639118282e-05, "loss": 94.1257, "step": 55590 }, { "epoch": 0.2246310354440301, "grad_norm": 1354.1820068359375, "learning_rate": 3.8202119413699914e-05, "loss": 51.7388, "step": 55600 }, { "epoch": 0.22467143670939774, "grad_norm": 1198.4644775390625, "learning_rate": 3.820096208135732e-05, "loss": 91.0793, "step": 55610 }, { "epoch": 0.22471183797476538, "grad_norm": 521.9857788085938, "learning_rate": 3.8199804394177594e-05, "loss": 48.2309, "step": 55620 }, { "epoch": 0.224752239240133, "grad_norm": 500.62518310546875, "learning_rate": 3.819864635218332e-05, "loss": 91.5175, "step": 55630 }, { "epoch": 0.22479264050550063, "grad_norm": 592.45361328125, "learning_rate": 3.819748795539706e-05, "loss": 86.7297, "step": 55640 }, { "epoch": 0.22483304177086827, "grad_norm": 719.0573120117188, "learning_rate": 3.8196329203841404e-05, "loss": 102.5109, "step": 55650 }, { "epoch": 0.22487344303623588, "grad_norm": 1039.817626953125, "learning_rate": 3.819517009753894e-05, "loss": 82.0633, "step": 55660 }, { "epoch": 0.22491384430160352, "grad_norm": 584.3021240234375, "learning_rate": 3.8194010636512274e-05, "loss": 67.5605, "step": 55670 }, { "epoch": 0.22495424556697116, "grad_norm": 365.3063659667969, "learning_rate": 3.8192850820784e-05, "loss": 76.2773, "step": 55680 }, { "epoch": 0.2249946468323388, "grad_norm": 683.2793579101562, "learning_rate": 3.8191690650376734e-05, "loss": 111.8646, "step": 55690 }, { "epoch": 0.2250350480977064, "grad_norm": 5094.06884765625, "learning_rate": 3.819053012531309e-05, "loss": 98.7676, "step": 55700 }, { "epoch": 0.22507544936307405, "grad_norm": 1392.592041015625, "learning_rate": 3.8189369245615695e-05, "loss": 97.9336, "step": 55710 }, { "epoch": 0.2251158506284417, "grad_norm": 1905.1131591796875, "learning_rate": 3.818820801130719e-05, "loss": 76.434, "step": 55720 }, { "epoch": 0.2251562518938093, "grad_norm": 1060.8299560546875, "learning_rate": 3.81870464224102e-05, "loss": 76.4275, "step": 55730 }, { "epoch": 0.22519665315917695, "grad_norm": 697.6439819335938, "learning_rate": 3.818588447894738e-05, "loss": 74.6944, "step": 55740 }, { "epoch": 0.22523705442454459, "grad_norm": 1250.4781494140625, "learning_rate": 3.8184722180941376e-05, "loss": 85.4563, "step": 55750 }, { "epoch": 0.2252774556899122, "grad_norm": 889.2294311523438, "learning_rate": 3.818355952841485e-05, "loss": 86.7714, "step": 55760 }, { "epoch": 0.22531785695527984, "grad_norm": 442.0536804199219, "learning_rate": 3.818239652139047e-05, "loss": 64.0727, "step": 55770 }, { "epoch": 0.22535825822064748, "grad_norm": 449.8352966308594, "learning_rate": 3.818123315989091e-05, "loss": 92.5837, "step": 55780 }, { "epoch": 0.2253986594860151, "grad_norm": 450.0088195800781, "learning_rate": 3.818006944393885e-05, "loss": 78.8138, "step": 55790 }, { "epoch": 0.22543906075138273, "grad_norm": 486.0877685546875, "learning_rate": 3.817890537355698e-05, "loss": 58.394, "step": 55800 }, { "epoch": 0.22547946201675037, "grad_norm": 472.1658630371094, "learning_rate": 3.817774094876798e-05, "loss": 48.9125, "step": 55810 }, { "epoch": 0.22551986328211798, "grad_norm": 475.9318542480469, "learning_rate": 3.8176576169594574e-05, "loss": 84.1555, "step": 55820 }, { "epoch": 0.22556026454748562, "grad_norm": 847.432861328125, "learning_rate": 3.817541103605945e-05, "loss": 80.2289, "step": 55830 }, { "epoch": 0.22560066581285326, "grad_norm": 814.4562377929688, "learning_rate": 3.817424554818533e-05, "loss": 56.8917, "step": 55840 }, { "epoch": 0.2256410670782209, "grad_norm": 1087.533203125, "learning_rate": 3.817307970599494e-05, "loss": 61.4748, "step": 55850 }, { "epoch": 0.22568146834358851, "grad_norm": 1206.31005859375, "learning_rate": 3.817191350951101e-05, "loss": 81.1293, "step": 55860 }, { "epoch": 0.22572186960895615, "grad_norm": 1083.26904296875, "learning_rate": 3.817074695875626e-05, "loss": 83.8608, "step": 55870 }, { "epoch": 0.2257622708743238, "grad_norm": 584.4603271484375, "learning_rate": 3.816958005375344e-05, "loss": 67.0806, "step": 55880 }, { "epoch": 0.2258026721396914, "grad_norm": 1291.0843505859375, "learning_rate": 3.816841279452532e-05, "loss": 90.554, "step": 55890 }, { "epoch": 0.22584307340505905, "grad_norm": 831.9024047851562, "learning_rate": 3.816724518109463e-05, "loss": 65.832, "step": 55900 }, { "epoch": 0.2258834746704267, "grad_norm": 407.05072021484375, "learning_rate": 3.8166077213484135e-05, "loss": 58.9891, "step": 55910 }, { "epoch": 0.2259238759357943, "grad_norm": 1658.80908203125, "learning_rate": 3.816490889171662e-05, "loss": 65.7529, "step": 55920 }, { "epoch": 0.22596427720116194, "grad_norm": 1227.1304931640625, "learning_rate": 3.816374021581485e-05, "loss": 92.7391, "step": 55930 }, { "epoch": 0.22600467846652958, "grad_norm": 703.5914916992188, "learning_rate": 3.816257118580161e-05, "loss": 82.3, "step": 55940 }, { "epoch": 0.2260450797318972, "grad_norm": 529.1463012695312, "learning_rate": 3.816140180169971e-05, "loss": 62.4928, "step": 55950 }, { "epoch": 0.22608548099726483, "grad_norm": 769.5684814453125, "learning_rate": 3.816023206353192e-05, "loss": 74.4622, "step": 55960 }, { "epoch": 0.22612588226263247, "grad_norm": 784.1138916015625, "learning_rate": 3.815906197132106e-05, "loss": 58.6365, "step": 55970 }, { "epoch": 0.22616628352800008, "grad_norm": 1663.0970458984375, "learning_rate": 3.815789152508993e-05, "loss": 58.3804, "step": 55980 }, { "epoch": 0.22620668479336772, "grad_norm": 806.343505859375, "learning_rate": 3.815672072486136e-05, "loss": 51.7544, "step": 55990 }, { "epoch": 0.22624708605873536, "grad_norm": 295.89154052734375, "learning_rate": 3.815554957065818e-05, "loss": 68.6234, "step": 56000 }, { "epoch": 0.226287487324103, "grad_norm": 492.88128662109375, "learning_rate": 3.8154378062503207e-05, "loss": 83.6571, "step": 56010 }, { "epoch": 0.22632788858947062, "grad_norm": 598.7699584960938, "learning_rate": 3.815320620041929e-05, "loss": 63.2561, "step": 56020 }, { "epoch": 0.22636828985483826, "grad_norm": 1166.81201171875, "learning_rate": 3.815203398442927e-05, "loss": 66.7785, "step": 56030 }, { "epoch": 0.2264086911202059, "grad_norm": 1060.6319580078125, "learning_rate": 3.8150861414556e-05, "loss": 65.2264, "step": 56040 }, { "epoch": 0.2264490923855735, "grad_norm": 738.6588745117188, "learning_rate": 3.814968849082234e-05, "loss": 81.1179, "step": 56050 }, { "epoch": 0.22648949365094115, "grad_norm": 1409.0303955078125, "learning_rate": 3.8148515213251166e-05, "loss": 93.0806, "step": 56060 }, { "epoch": 0.2265298949163088, "grad_norm": 389.2867736816406, "learning_rate": 3.814734158186534e-05, "loss": 69.5129, "step": 56070 }, { "epoch": 0.2265702961816764, "grad_norm": 873.1412963867188, "learning_rate": 3.8146167596687746e-05, "loss": 87.3668, "step": 56080 }, { "epoch": 0.22661069744704404, "grad_norm": 988.9115600585938, "learning_rate": 3.814499325774128e-05, "loss": 70.7732, "step": 56090 }, { "epoch": 0.22665109871241168, "grad_norm": 1032.6561279296875, "learning_rate": 3.814381856504882e-05, "loss": 73.4399, "step": 56100 }, { "epoch": 0.2266914999777793, "grad_norm": 689.4199829101562, "learning_rate": 3.814264351863328e-05, "loss": 53.0507, "step": 56110 }, { "epoch": 0.22673190124314693, "grad_norm": 363.88037109375, "learning_rate": 3.814146811851757e-05, "loss": 54.2716, "step": 56120 }, { "epoch": 0.22677230250851457, "grad_norm": 979.6790771484375, "learning_rate": 3.814029236472459e-05, "loss": 68.8041, "step": 56130 }, { "epoch": 0.22681270377388218, "grad_norm": 1053.1512451171875, "learning_rate": 3.813911625727727e-05, "loss": 65.6413, "step": 56140 }, { "epoch": 0.22685310503924982, "grad_norm": 832.77099609375, "learning_rate": 3.813793979619854e-05, "loss": 56.1802, "step": 56150 }, { "epoch": 0.22689350630461746, "grad_norm": 738.0537719726562, "learning_rate": 3.813676298151135e-05, "loss": 97.4371, "step": 56160 }, { "epoch": 0.2269339075699851, "grad_norm": 1048.9244384765625, "learning_rate": 3.8135585813238616e-05, "loss": 101.326, "step": 56170 }, { "epoch": 0.22697430883535272, "grad_norm": 1091.7122802734375, "learning_rate": 3.81344082914033e-05, "loss": 62.9711, "step": 56180 }, { "epoch": 0.22701471010072036, "grad_norm": 656.7578735351562, "learning_rate": 3.8133230416028355e-05, "loss": 67.8833, "step": 56190 }, { "epoch": 0.227055111366088, "grad_norm": 1725.5526123046875, "learning_rate": 3.813205218713676e-05, "loss": 106.0663, "step": 56200 }, { "epoch": 0.2270955126314556, "grad_norm": 558.1640625, "learning_rate": 3.813087360475146e-05, "loss": 49.1143, "step": 56210 }, { "epoch": 0.22713591389682325, "grad_norm": 1560.0738525390625, "learning_rate": 3.812969466889545e-05, "loss": 69.2833, "step": 56220 }, { "epoch": 0.2271763151621909, "grad_norm": 1940.1322021484375, "learning_rate": 3.812851537959171e-05, "loss": 77.8127, "step": 56230 }, { "epoch": 0.2272167164275585, "grad_norm": 1342.005615234375, "learning_rate": 3.8127335736863227e-05, "loss": 69.5098, "step": 56240 }, { "epoch": 0.22725711769292614, "grad_norm": 768.77490234375, "learning_rate": 3.812615574073301e-05, "loss": 63.5844, "step": 56250 }, { "epoch": 0.22729751895829378, "grad_norm": 1894.3037109375, "learning_rate": 3.812497539122404e-05, "loss": 92.5692, "step": 56260 }, { "epoch": 0.2273379202236614, "grad_norm": 886.0888061523438, "learning_rate": 3.812379468835935e-05, "loss": 66.3142, "step": 56270 }, { "epoch": 0.22737832148902903, "grad_norm": 2326.173095703125, "learning_rate": 3.812261363216195e-05, "loss": 81.2702, "step": 56280 }, { "epoch": 0.22741872275439667, "grad_norm": 601.2302856445312, "learning_rate": 3.812143222265487e-05, "loss": 83.3885, "step": 56290 }, { "epoch": 0.22745912401976429, "grad_norm": 715.7938842773438, "learning_rate": 3.8120250459861144e-05, "loss": 60.7751, "step": 56300 }, { "epoch": 0.22749952528513193, "grad_norm": 1360.266845703125, "learning_rate": 3.81190683438038e-05, "loss": 111.9645, "step": 56310 }, { "epoch": 0.22753992655049957, "grad_norm": 874.0000610351562, "learning_rate": 3.811788587450589e-05, "loss": 61.2237, "step": 56320 }, { "epoch": 0.2275803278158672, "grad_norm": 531.2874755859375, "learning_rate": 3.8116703051990464e-05, "loss": 84.936, "step": 56330 }, { "epoch": 0.22762072908123482, "grad_norm": 1022.2213745117188, "learning_rate": 3.811551987628059e-05, "loss": 52.4177, "step": 56340 }, { "epoch": 0.22766113034660246, "grad_norm": 813.2898559570312, "learning_rate": 3.811433634739933e-05, "loss": 49.6749, "step": 56350 }, { "epoch": 0.2277015316119701, "grad_norm": 661.9342651367188, "learning_rate": 3.8113152465369756e-05, "loss": 67.0093, "step": 56360 }, { "epoch": 0.2277419328773377, "grad_norm": 1208.7891845703125, "learning_rate": 3.8111968230214945e-05, "loss": 74.5439, "step": 56370 }, { "epoch": 0.22778233414270535, "grad_norm": 859.9384155273438, "learning_rate": 3.811078364195799e-05, "loss": 43.7246, "step": 56380 }, { "epoch": 0.227822735408073, "grad_norm": 825.8929443359375, "learning_rate": 3.810959870062199e-05, "loss": 79.4211, "step": 56390 }, { "epoch": 0.2278631366734406, "grad_norm": 1221.9461669921875, "learning_rate": 3.810841340623004e-05, "loss": 95.7748, "step": 56400 }, { "epoch": 0.22790353793880824, "grad_norm": 1074.3507080078125, "learning_rate": 3.810722775880524e-05, "loss": 67.8669, "step": 56410 }, { "epoch": 0.22794393920417588, "grad_norm": 706.3327026367188, "learning_rate": 3.810604175837072e-05, "loss": 93.8343, "step": 56420 }, { "epoch": 0.2279843404695435, "grad_norm": 1404.4757080078125, "learning_rate": 3.8104855404949585e-05, "loss": 68.0535, "step": 56430 }, { "epoch": 0.22802474173491113, "grad_norm": 732.0487670898438, "learning_rate": 3.810366869856498e-05, "loss": 96.2141, "step": 56440 }, { "epoch": 0.22806514300027877, "grad_norm": 1142.020751953125, "learning_rate": 3.8102481639240035e-05, "loss": 67.8034, "step": 56450 }, { "epoch": 0.2281055442656464, "grad_norm": 646.2639770507812, "learning_rate": 3.810129422699789e-05, "loss": 91.0126, "step": 56460 }, { "epoch": 0.22814594553101403, "grad_norm": 1303.6097412109375, "learning_rate": 3.810010646186169e-05, "loss": 97.9156, "step": 56470 }, { "epoch": 0.22818634679638167, "grad_norm": 1019.5330810546875, "learning_rate": 3.80989183438546e-05, "loss": 69.5221, "step": 56480 }, { "epoch": 0.2282267480617493, "grad_norm": 389.3905029296875, "learning_rate": 3.809772987299978e-05, "loss": 125.0458, "step": 56490 }, { "epoch": 0.22826714932711692, "grad_norm": 720.518310546875, "learning_rate": 3.809654104932039e-05, "loss": 121.4447, "step": 56500 }, { "epoch": 0.22830755059248456, "grad_norm": 901.7028198242188, "learning_rate": 3.809535187283962e-05, "loss": 49.731, "step": 56510 }, { "epoch": 0.2283479518578522, "grad_norm": 469.4713134765625, "learning_rate": 3.8094162343580654e-05, "loss": 98.3911, "step": 56520 }, { "epoch": 0.2283883531232198, "grad_norm": 1055.3323974609375, "learning_rate": 3.809297246156668e-05, "loss": 75.417, "step": 56530 }, { "epoch": 0.22842875438858745, "grad_norm": 632.37109375, "learning_rate": 3.8091782226820886e-05, "loss": 95.9724, "step": 56540 }, { "epoch": 0.2284691556539551, "grad_norm": 1114.0428466796875, "learning_rate": 3.809059163936648e-05, "loss": 50.7466, "step": 56550 }, { "epoch": 0.2285095569193227, "grad_norm": 1918.535400390625, "learning_rate": 3.808940069922669e-05, "loss": 91.4348, "step": 56560 }, { "epoch": 0.22854995818469034, "grad_norm": 453.5932922363281, "learning_rate": 3.808820940642471e-05, "loss": 65.904, "step": 56570 }, { "epoch": 0.22859035945005798, "grad_norm": 699.6660766601562, "learning_rate": 3.8087017760983774e-05, "loss": 67.6307, "step": 56580 }, { "epoch": 0.2286307607154256, "grad_norm": 954.3672485351562, "learning_rate": 3.808582576292712e-05, "loss": 78.6105, "step": 56590 }, { "epoch": 0.22867116198079324, "grad_norm": 991.3224487304688, "learning_rate": 3.8084633412277974e-05, "loss": 72.0767, "step": 56600 }, { "epoch": 0.22871156324616088, "grad_norm": 896.9384765625, "learning_rate": 3.808344070905959e-05, "loss": 39.0718, "step": 56610 }, { "epoch": 0.2287519645115285, "grad_norm": 1196.9996337890625, "learning_rate": 3.808224765329523e-05, "loss": 99.872, "step": 56620 }, { "epoch": 0.22879236577689613, "grad_norm": 1548.3662109375, "learning_rate": 3.808105424500814e-05, "loss": 107.1882, "step": 56630 }, { "epoch": 0.22883276704226377, "grad_norm": 1136.67822265625, "learning_rate": 3.807986048422158e-05, "loss": 84.6685, "step": 56640 }, { "epoch": 0.2288731683076314, "grad_norm": 1998.874267578125, "learning_rate": 3.807866637095884e-05, "loss": 92.7216, "step": 56650 }, { "epoch": 0.22891356957299902, "grad_norm": 973.369873046875, "learning_rate": 3.807747190524318e-05, "loss": 84.9332, "step": 56660 }, { "epoch": 0.22895397083836666, "grad_norm": 684.3551025390625, "learning_rate": 3.8076277087097915e-05, "loss": 56.3043, "step": 56670 }, { "epoch": 0.2289943721037343, "grad_norm": 929.8072509765625, "learning_rate": 3.807508191654632e-05, "loss": 100.4173, "step": 56680 }, { "epoch": 0.2290347733691019, "grad_norm": 375.4898986816406, "learning_rate": 3.807388639361168e-05, "loss": 70.284, "step": 56690 }, { "epoch": 0.22907517463446955, "grad_norm": 1525.9503173828125, "learning_rate": 3.8072690518317334e-05, "loss": 55.3537, "step": 56700 }, { "epoch": 0.2291155758998372, "grad_norm": 792.93896484375, "learning_rate": 3.807149429068658e-05, "loss": 71.9927, "step": 56710 }, { "epoch": 0.2291559771652048, "grad_norm": 931.7693481445312, "learning_rate": 3.807029771074274e-05, "loss": 96.6875, "step": 56720 }, { "epoch": 0.22919637843057244, "grad_norm": 1130.6885986328125, "learning_rate": 3.806910077850914e-05, "loss": 80.8834, "step": 56730 }, { "epoch": 0.22923677969594008, "grad_norm": 365.8529052734375, "learning_rate": 3.806790349400912e-05, "loss": 64.6014, "step": 56740 }, { "epoch": 0.2292771809613077, "grad_norm": 5867.74560546875, "learning_rate": 3.806670585726602e-05, "loss": 98.2985, "step": 56750 }, { "epoch": 0.22931758222667534, "grad_norm": 962.7188110351562, "learning_rate": 3.806550786830319e-05, "loss": 99.5331, "step": 56760 }, { "epoch": 0.22935798349204298, "grad_norm": 413.5105895996094, "learning_rate": 3.806430952714398e-05, "loss": 63.9609, "step": 56770 }, { "epoch": 0.2293983847574106, "grad_norm": 979.9683837890625, "learning_rate": 3.806311083381175e-05, "loss": 92.7886, "step": 56780 }, { "epoch": 0.22943878602277823, "grad_norm": 1042.4696044921875, "learning_rate": 3.806191178832989e-05, "loss": 52.4567, "step": 56790 }, { "epoch": 0.22947918728814587, "grad_norm": 1003.2159423828125, "learning_rate": 3.806071239072175e-05, "loss": 44.2825, "step": 56800 }, { "epoch": 0.2295195885535135, "grad_norm": 1484.490966796875, "learning_rate": 3.8059512641010726e-05, "loss": 89.2549, "step": 56810 }, { "epoch": 0.22955998981888112, "grad_norm": 1329.6519775390625, "learning_rate": 3.80583125392202e-05, "loss": 63.9567, "step": 56820 }, { "epoch": 0.22960039108424876, "grad_norm": 667.831298828125, "learning_rate": 3.805711208537358e-05, "loss": 66.5759, "step": 56830 }, { "epoch": 0.2296407923496164, "grad_norm": 1251.3240966796875, "learning_rate": 3.805591127949426e-05, "loss": 95.0565, "step": 56840 }, { "epoch": 0.229681193614984, "grad_norm": 832.2625122070312, "learning_rate": 3.805471012160566e-05, "loss": 93.1982, "step": 56850 }, { "epoch": 0.22972159488035165, "grad_norm": 501.2583312988281, "learning_rate": 3.8053508611731185e-05, "loss": 55.0099, "step": 56860 }, { "epoch": 0.2297619961457193, "grad_norm": 766.9327392578125, "learning_rate": 3.805230674989427e-05, "loss": 85.5932, "step": 56870 }, { "epoch": 0.2298023974110869, "grad_norm": 712.4401245117188, "learning_rate": 3.805110453611834e-05, "loss": 86.5868, "step": 56880 }, { "epoch": 0.22984279867645455, "grad_norm": 1190.645263671875, "learning_rate": 3.804990197042683e-05, "loss": 86.9735, "step": 56890 }, { "epoch": 0.22988319994182219, "grad_norm": 908.5149536132812, "learning_rate": 3.804869905284319e-05, "loss": 66.2249, "step": 56900 }, { "epoch": 0.2299236012071898, "grad_norm": 739.5358276367188, "learning_rate": 3.8047495783390874e-05, "loss": 74.1746, "step": 56910 }, { "epoch": 0.22996400247255744, "grad_norm": 414.0364990234375, "learning_rate": 3.804629216209333e-05, "loss": 65.4793, "step": 56920 }, { "epoch": 0.23000440373792508, "grad_norm": 1083.235595703125, "learning_rate": 3.8045088188974026e-05, "loss": 118.2313, "step": 56930 }, { "epoch": 0.2300448050032927, "grad_norm": 969.8745727539062, "learning_rate": 3.804388386405645e-05, "loss": 81.8473, "step": 56940 }, { "epoch": 0.23008520626866033, "grad_norm": 727.8637084960938, "learning_rate": 3.804267918736406e-05, "loss": 83.6049, "step": 56950 }, { "epoch": 0.23012560753402797, "grad_norm": 1320.610595703125, "learning_rate": 3.8041474158920356e-05, "loss": 95.4228, "step": 56960 }, { "epoch": 0.2301660087993956, "grad_norm": 699.66796875, "learning_rate": 3.804026877874882e-05, "loss": 83.4832, "step": 56970 }, { "epoch": 0.23020641006476322, "grad_norm": 968.6633911132812, "learning_rate": 3.803906304687296e-05, "loss": 78.6762, "step": 56980 }, { "epoch": 0.23024681133013086, "grad_norm": 408.0656433105469, "learning_rate": 3.803785696331627e-05, "loss": 79.3933, "step": 56990 }, { "epoch": 0.2302872125954985, "grad_norm": 360.78814697265625, "learning_rate": 3.803665052810228e-05, "loss": 64.7716, "step": 57000 }, { "epoch": 0.23032761386086611, "grad_norm": 607.16748046875, "learning_rate": 3.803544374125449e-05, "loss": 60.6444, "step": 57010 }, { "epoch": 0.23036801512623375, "grad_norm": 761.7095947265625, "learning_rate": 3.803423660279646e-05, "loss": 66.7402, "step": 57020 }, { "epoch": 0.2304084163916014, "grad_norm": 603.35009765625, "learning_rate": 3.803302911275169e-05, "loss": 84.5132, "step": 57030 }, { "epoch": 0.230448817656969, "grad_norm": 1848.899169921875, "learning_rate": 3.803182127114374e-05, "loss": 61.8791, "step": 57040 }, { "epoch": 0.23048921892233665, "grad_norm": 559.4889526367188, "learning_rate": 3.803061307799614e-05, "loss": 90.565, "step": 57050 }, { "epoch": 0.2305296201877043, "grad_norm": 487.5803527832031, "learning_rate": 3.802940453333246e-05, "loss": 83.0407, "step": 57060 }, { "epoch": 0.2305700214530719, "grad_norm": 913.366455078125, "learning_rate": 3.802819563717626e-05, "loss": 112.3049, "step": 57070 }, { "epoch": 0.23061042271843954, "grad_norm": 1263.8419189453125, "learning_rate": 3.8026986389551105e-05, "loss": 85.9298, "step": 57080 }, { "epoch": 0.23065082398380718, "grad_norm": 305.4476623535156, "learning_rate": 3.802577679048057e-05, "loss": 45.9334, "step": 57090 }, { "epoch": 0.2306912252491748, "grad_norm": 593.847900390625, "learning_rate": 3.802456683998823e-05, "loss": 53.0649, "step": 57100 }, { "epoch": 0.23073162651454243, "grad_norm": 1953.02001953125, "learning_rate": 3.802335653809768e-05, "loss": 81.2339, "step": 57110 }, { "epoch": 0.23077202777991007, "grad_norm": 1080.8931884765625, "learning_rate": 3.802214588483252e-05, "loss": 81.1776, "step": 57120 }, { "epoch": 0.2308124290452777, "grad_norm": 396.26373291015625, "learning_rate": 3.8020934880216344e-05, "loss": 71.585, "step": 57130 }, { "epoch": 0.23085283031064532, "grad_norm": 1105.419189453125, "learning_rate": 3.801972352427277e-05, "loss": 102.6832, "step": 57140 }, { "epoch": 0.23089323157601296, "grad_norm": 795.7288818359375, "learning_rate": 3.801851181702541e-05, "loss": 48.1124, "step": 57150 }, { "epoch": 0.2309336328413806, "grad_norm": 1058.8551025390625, "learning_rate": 3.8017299758497875e-05, "loss": 79.2187, "step": 57160 }, { "epoch": 0.23097403410674822, "grad_norm": 1670.0687255859375, "learning_rate": 3.801608734871381e-05, "loss": 56.1381, "step": 57170 }, { "epoch": 0.23101443537211586, "grad_norm": 427.2680358886719, "learning_rate": 3.8014874587696846e-05, "loss": 96.5046, "step": 57180 }, { "epoch": 0.2310548366374835, "grad_norm": 677.8304443359375, "learning_rate": 3.8013661475470634e-05, "loss": 64.1499, "step": 57190 }, { "epoch": 0.2310952379028511, "grad_norm": 1529.733642578125, "learning_rate": 3.801244801205881e-05, "loss": 87.2802, "step": 57200 }, { "epoch": 0.23113563916821875, "grad_norm": 0.0, "learning_rate": 3.801123419748504e-05, "loss": 58.6589, "step": 57210 }, { "epoch": 0.2311760404335864, "grad_norm": 1265.4381103515625, "learning_rate": 3.801002003177299e-05, "loss": 71.0298, "step": 57220 }, { "epoch": 0.231216441698954, "grad_norm": 1491.0811767578125, "learning_rate": 3.800880551494632e-05, "loss": 86.0877, "step": 57230 }, { "epoch": 0.23125684296432164, "grad_norm": 557.4287719726562, "learning_rate": 3.800759064702873e-05, "loss": 93.9744, "step": 57240 }, { "epoch": 0.23129724422968928, "grad_norm": 326.94873046875, "learning_rate": 3.800637542804387e-05, "loss": 56.5683, "step": 57250 }, { "epoch": 0.2313376454950569, "grad_norm": 733.343505859375, "learning_rate": 3.800515985801546e-05, "loss": 60.8453, "step": 57260 }, { "epoch": 0.23137804676042453, "grad_norm": 883.8818359375, "learning_rate": 3.800394393696719e-05, "loss": 102.1983, "step": 57270 }, { "epoch": 0.23141844802579217, "grad_norm": 553.3629150390625, "learning_rate": 3.800272766492277e-05, "loss": 59.822, "step": 57280 }, { "epoch": 0.2314588492911598, "grad_norm": 1296.9984130859375, "learning_rate": 3.800151104190589e-05, "loss": 96.5139, "step": 57290 }, { "epoch": 0.23149925055652743, "grad_norm": 1746.950439453125, "learning_rate": 3.8000294067940295e-05, "loss": 84.3049, "step": 57300 }, { "epoch": 0.23153965182189506, "grad_norm": 372.6632080078125, "learning_rate": 3.79990767430497e-05, "loss": 115.2568, "step": 57310 }, { "epoch": 0.2315800530872627, "grad_norm": 1053.0244140625, "learning_rate": 3.799785906725784e-05, "loss": 109.0187, "step": 57320 }, { "epoch": 0.23162045435263032, "grad_norm": 0.0, "learning_rate": 3.7996641040588443e-05, "loss": 50.7161, "step": 57330 }, { "epoch": 0.23166085561799796, "grad_norm": 487.5233459472656, "learning_rate": 3.7995422663065264e-05, "loss": 65.7552, "step": 57340 }, { "epoch": 0.2317012568833656, "grad_norm": 549.9506225585938, "learning_rate": 3.799420393471206e-05, "loss": 94.2068, "step": 57350 }, { "epoch": 0.2317416581487332, "grad_norm": 2816.931884765625, "learning_rate": 3.799298485555259e-05, "loss": 76.761, "step": 57360 }, { "epoch": 0.23178205941410085, "grad_norm": 794.5421752929688, "learning_rate": 3.799176542561061e-05, "loss": 72.8375, "step": 57370 }, { "epoch": 0.2318224606794685, "grad_norm": 614.9068603515625, "learning_rate": 3.7990545644909905e-05, "loss": 60.9909, "step": 57380 }, { "epoch": 0.2318628619448361, "grad_norm": 961.1927490234375, "learning_rate": 3.798932551347424e-05, "loss": 83.393, "step": 57390 }, { "epoch": 0.23190326321020374, "grad_norm": 672.9813232421875, "learning_rate": 3.798810503132742e-05, "loss": 78.9836, "step": 57400 }, { "epoch": 0.23194366447557138, "grad_norm": 821.4522705078125, "learning_rate": 3.798688419849324e-05, "loss": 94.6433, "step": 57410 }, { "epoch": 0.231984065740939, "grad_norm": 415.84783935546875, "learning_rate": 3.798566301499548e-05, "loss": 98.24, "step": 57420 }, { "epoch": 0.23202446700630663, "grad_norm": 792.7560424804688, "learning_rate": 3.798444148085796e-05, "loss": 55.6949, "step": 57430 }, { "epoch": 0.23206486827167427, "grad_norm": 419.20770263671875, "learning_rate": 3.79832195961045e-05, "loss": 77.6947, "step": 57440 }, { "epoch": 0.23210526953704191, "grad_norm": 1155.12451171875, "learning_rate": 3.7981997360758917e-05, "loss": 93.3328, "step": 57450 }, { "epoch": 0.23214567080240953, "grad_norm": 1187.1175537109375, "learning_rate": 3.7980774774845035e-05, "loss": 81.4386, "step": 57460 }, { "epoch": 0.23218607206777717, "grad_norm": 660.607421875, "learning_rate": 3.797955183838669e-05, "loss": 105.6275, "step": 57470 }, { "epoch": 0.2322264733331448, "grad_norm": 649.1986694335938, "learning_rate": 3.797832855140773e-05, "loss": 73.3256, "step": 57480 }, { "epoch": 0.23226687459851242, "grad_norm": 415.9481201171875, "learning_rate": 3.797710491393199e-05, "loss": 72.9341, "step": 57490 }, { "epoch": 0.23230727586388006, "grad_norm": 1390.0479736328125, "learning_rate": 3.7975880925983345e-05, "loss": 86.1443, "step": 57500 }, { "epoch": 0.2323476771292477, "grad_norm": 721.8440551757812, "learning_rate": 3.7974656587585645e-05, "loss": 86.9662, "step": 57510 }, { "epoch": 0.2323880783946153, "grad_norm": 1014.0465698242188, "learning_rate": 3.7973431898762757e-05, "loss": 89.9118, "step": 57520 }, { "epoch": 0.23242847965998295, "grad_norm": 499.45050048828125, "learning_rate": 3.797220685953856e-05, "loss": 66.7644, "step": 57530 }, { "epoch": 0.2324688809253506, "grad_norm": 544.9583129882812, "learning_rate": 3.7970981469936936e-05, "loss": 39.842, "step": 57540 }, { "epoch": 0.2325092821907182, "grad_norm": 752.9443969726562, "learning_rate": 3.7969755729981776e-05, "loss": 52.3351, "step": 57550 }, { "epoch": 0.23254968345608584, "grad_norm": 666.3173217773438, "learning_rate": 3.796852963969699e-05, "loss": 86.4459, "step": 57560 }, { "epoch": 0.23259008472145348, "grad_norm": 882.573974609375, "learning_rate": 3.796730319910645e-05, "loss": 93.6017, "step": 57570 }, { "epoch": 0.2326304859868211, "grad_norm": 556.37109375, "learning_rate": 3.796607640823409e-05, "loss": 97.7774, "step": 57580 }, { "epoch": 0.23267088725218874, "grad_norm": 1168.2757568359375, "learning_rate": 3.7964849267103824e-05, "loss": 62.6684, "step": 57590 }, { "epoch": 0.23271128851755638, "grad_norm": 601.9468383789062, "learning_rate": 3.796362177573957e-05, "loss": 92.6433, "step": 57600 }, { "epoch": 0.23275168978292402, "grad_norm": 1131.87353515625, "learning_rate": 3.796239393416526e-05, "loss": 80.5643, "step": 57610 }, { "epoch": 0.23279209104829163, "grad_norm": 459.4023742675781, "learning_rate": 3.796116574240484e-05, "loss": 62.8203, "step": 57620 }, { "epoch": 0.23283249231365927, "grad_norm": 1018.473388671875, "learning_rate": 3.795993720048224e-05, "loss": 61.0011, "step": 57630 }, { "epoch": 0.2328728935790269, "grad_norm": 1023.8245849609375, "learning_rate": 3.795870830842142e-05, "loss": 76.0714, "step": 57640 }, { "epoch": 0.23291329484439452, "grad_norm": 667.9993286132812, "learning_rate": 3.795747906624634e-05, "loss": 81.131, "step": 57650 }, { "epoch": 0.23295369610976216, "grad_norm": 939.4752807617188, "learning_rate": 3.795624947398096e-05, "loss": 65.7513, "step": 57660 }, { "epoch": 0.2329940973751298, "grad_norm": 811.994384765625, "learning_rate": 3.795501953164924e-05, "loss": 69.7315, "step": 57670 }, { "epoch": 0.2330344986404974, "grad_norm": 863.0499877929688, "learning_rate": 3.795378923927518e-05, "loss": 110.3658, "step": 57680 }, { "epoch": 0.23307489990586505, "grad_norm": 885.8748779296875, "learning_rate": 3.795255859688276e-05, "loss": 93.2206, "step": 57690 }, { "epoch": 0.2331153011712327, "grad_norm": 708.9866943359375, "learning_rate": 3.7951327604495957e-05, "loss": 62.6562, "step": 57700 }, { "epoch": 0.2331557024366003, "grad_norm": 640.904052734375, "learning_rate": 3.7950096262138784e-05, "loss": 51.269, "step": 57710 }, { "epoch": 0.23319610370196794, "grad_norm": 695.4147338867188, "learning_rate": 3.794886456983524e-05, "loss": 101.4676, "step": 57720 }, { "epoch": 0.23323650496733558, "grad_norm": 817.9987182617188, "learning_rate": 3.7947632527609345e-05, "loss": 83.4514, "step": 57730 }, { "epoch": 0.2332769062327032, "grad_norm": 2216.619873046875, "learning_rate": 3.7946400135485115e-05, "loss": 76.9581, "step": 57740 }, { "epoch": 0.23331730749807084, "grad_norm": 1862.0076904296875, "learning_rate": 3.794516739348657e-05, "loss": 77.3212, "step": 57750 }, { "epoch": 0.23335770876343848, "grad_norm": 510.77276611328125, "learning_rate": 3.794393430163776e-05, "loss": 51.2993, "step": 57760 }, { "epoch": 0.2333981100288061, "grad_norm": 278.53564453125, "learning_rate": 3.7942700859962694e-05, "loss": 87.068, "step": 57770 }, { "epoch": 0.23343851129417373, "grad_norm": 1577.158935546875, "learning_rate": 3.794146706848545e-05, "loss": 114.5216, "step": 57780 }, { "epoch": 0.23347891255954137, "grad_norm": 1093.2125244140625, "learning_rate": 3.794023292723006e-05, "loss": 72.6221, "step": 57790 }, { "epoch": 0.233519313824909, "grad_norm": 1251.1124267578125, "learning_rate": 3.7938998436220604e-05, "loss": 86.3804, "step": 57800 }, { "epoch": 0.23355971509027662, "grad_norm": 3054.21240234375, "learning_rate": 3.793776359548113e-05, "loss": 87.1267, "step": 57810 }, { "epoch": 0.23360011635564426, "grad_norm": 1642.425537109375, "learning_rate": 3.793652840503572e-05, "loss": 53.5206, "step": 57820 }, { "epoch": 0.2336405176210119, "grad_norm": 997.0510864257812, "learning_rate": 3.793529286490846e-05, "loss": 80.6299, "step": 57830 }, { "epoch": 0.2336809188863795, "grad_norm": 304.63812255859375, "learning_rate": 3.7934056975123424e-05, "loss": 68.2548, "step": 57840 }, { "epoch": 0.23372132015174715, "grad_norm": 576.9412231445312, "learning_rate": 3.793282073570472e-05, "loss": 100.3076, "step": 57850 }, { "epoch": 0.2337617214171148, "grad_norm": 967.371826171875, "learning_rate": 3.7931584146676444e-05, "loss": 103.107, "step": 57860 }, { "epoch": 0.2338021226824824, "grad_norm": 851.3161010742188, "learning_rate": 3.793034720806269e-05, "loss": 59.3635, "step": 57870 }, { "epoch": 0.23384252394785005, "grad_norm": 748.4859008789062, "learning_rate": 3.792910991988761e-05, "loss": 77.2267, "step": 57880 }, { "epoch": 0.23388292521321769, "grad_norm": 1159.9666748046875, "learning_rate": 3.792787228217529e-05, "loss": 96.6831, "step": 57890 }, { "epoch": 0.2339233264785853, "grad_norm": 1524.46240234375, "learning_rate": 3.792663429494986e-05, "loss": 71.0455, "step": 57900 }, { "epoch": 0.23396372774395294, "grad_norm": 2358.887939453125, "learning_rate": 3.792539595823548e-05, "loss": 70.4084, "step": 57910 }, { "epoch": 0.23400412900932058, "grad_norm": 779.4794921875, "learning_rate": 3.792415727205627e-05, "loss": 73.8759, "step": 57920 }, { "epoch": 0.2340445302746882, "grad_norm": 1307.2642822265625, "learning_rate": 3.7922918236436384e-05, "loss": 107.4841, "step": 57930 }, { "epoch": 0.23408493154005583, "grad_norm": 1434.2913818359375, "learning_rate": 3.7921678851399984e-05, "loss": 56.1901, "step": 57940 }, { "epoch": 0.23412533280542347, "grad_norm": 524.6749877929688, "learning_rate": 3.792043911697122e-05, "loss": 66.964, "step": 57950 }, { "epoch": 0.2341657340707911, "grad_norm": 756.17822265625, "learning_rate": 3.791919903317428e-05, "loss": 55.6833, "step": 57960 }, { "epoch": 0.23420613533615872, "grad_norm": 943.431396484375, "learning_rate": 3.7917958600033326e-05, "loss": 85.9044, "step": 57970 }, { "epoch": 0.23424653660152636, "grad_norm": 856.8779907226562, "learning_rate": 3.791671781757254e-05, "loss": 53.0595, "step": 57980 }, { "epoch": 0.234286937866894, "grad_norm": 432.1573181152344, "learning_rate": 3.7915476685816124e-05, "loss": 55.1902, "step": 57990 }, { "epoch": 0.23432733913226161, "grad_norm": 1029.0245361328125, "learning_rate": 3.791423520478826e-05, "loss": 79.4546, "step": 58000 }, { "epoch": 0.23436774039762925, "grad_norm": 949.74609375, "learning_rate": 3.791299337451316e-05, "loss": 62.7363, "step": 58010 }, { "epoch": 0.2344081416629969, "grad_norm": 2479.85888671875, "learning_rate": 3.791175119501503e-05, "loss": 100.7351, "step": 58020 }, { "epoch": 0.2344485429283645, "grad_norm": 876.0036010742188, "learning_rate": 3.791050866631809e-05, "loss": 57.7869, "step": 58030 }, { "epoch": 0.23448894419373215, "grad_norm": 1590.4725341796875, "learning_rate": 3.790926578844657e-05, "loss": 109.7307, "step": 58040 }, { "epoch": 0.2345293454590998, "grad_norm": 633.0686645507812, "learning_rate": 3.790802256142468e-05, "loss": 72.1402, "step": 58050 }, { "epoch": 0.2345697467244674, "grad_norm": 700.5449829101562, "learning_rate": 3.790677898527668e-05, "loss": 106.0299, "step": 58060 }, { "epoch": 0.23461014798983504, "grad_norm": 851.0189208984375, "learning_rate": 3.79055350600268e-05, "loss": 84.5764, "step": 58070 }, { "epoch": 0.23465054925520268, "grad_norm": 615.2695922851562, "learning_rate": 3.79042907856993e-05, "loss": 88.8549, "step": 58080 }, { "epoch": 0.2346909505205703, "grad_norm": 552.6085205078125, "learning_rate": 3.790304616231843e-05, "loss": 81.1972, "step": 58090 }, { "epoch": 0.23473135178593793, "grad_norm": 307.0838928222656, "learning_rate": 3.790180118990845e-05, "loss": 48.328, "step": 58100 }, { "epoch": 0.23477175305130557, "grad_norm": 529.787841796875, "learning_rate": 3.7900555868493656e-05, "loss": 42.8784, "step": 58110 }, { "epoch": 0.2348121543166732, "grad_norm": 378.6830139160156, "learning_rate": 3.78993101980983e-05, "loss": 50.0545, "step": 58120 }, { "epoch": 0.23485255558204082, "grad_norm": 1031.341064453125, "learning_rate": 3.789806417874668e-05, "loss": 55.2613, "step": 58130 }, { "epoch": 0.23489295684740846, "grad_norm": 585.3533325195312, "learning_rate": 3.789681781046308e-05, "loss": 74.903, "step": 58140 }, { "epoch": 0.2349333581127761, "grad_norm": 5360.59130859375, "learning_rate": 3.789557109327181e-05, "loss": 74.3554, "step": 58150 }, { "epoch": 0.23497375937814372, "grad_norm": 848.1822509765625, "learning_rate": 3.789432402719716e-05, "loss": 66.7466, "step": 58160 }, { "epoch": 0.23501416064351136, "grad_norm": 967.7154541015625, "learning_rate": 3.7893076612263454e-05, "loss": 100.5138, "step": 58170 }, { "epoch": 0.235054561908879, "grad_norm": 565.66845703125, "learning_rate": 3.7891828848495006e-05, "loss": 49.7677, "step": 58180 }, { "epoch": 0.2350949631742466, "grad_norm": 437.8297424316406, "learning_rate": 3.789058073591615e-05, "loss": 60.5386, "step": 58190 }, { "epoch": 0.23513536443961425, "grad_norm": 997.5803833007812, "learning_rate": 3.78893322745512e-05, "loss": 66.6851, "step": 58200 }, { "epoch": 0.2351757657049819, "grad_norm": 1354.269775390625, "learning_rate": 3.7888083464424513e-05, "loss": 81.9496, "step": 58210 }, { "epoch": 0.2352161669703495, "grad_norm": 885.1353149414062, "learning_rate": 3.788683430556043e-05, "loss": 64.9704, "step": 58220 }, { "epoch": 0.23525656823571714, "grad_norm": 1185.8641357421875, "learning_rate": 3.78855847979833e-05, "loss": 71.7671, "step": 58230 }, { "epoch": 0.23529696950108478, "grad_norm": 1009.7171630859375, "learning_rate": 3.7884334941717494e-05, "loss": 113.77, "step": 58240 }, { "epoch": 0.2353373707664524, "grad_norm": 5088.71240234375, "learning_rate": 3.788308473678737e-05, "loss": 168.055, "step": 58250 }, { "epoch": 0.23537777203182003, "grad_norm": 506.412109375, "learning_rate": 3.78818341832173e-05, "loss": 90.3233, "step": 58260 }, { "epoch": 0.23541817329718767, "grad_norm": 486.355712890625, "learning_rate": 3.788058328103166e-05, "loss": 96.4967, "step": 58270 }, { "epoch": 0.2354585745625553, "grad_norm": 651.5347290039062, "learning_rate": 3.787933203025485e-05, "loss": 43.4667, "step": 58280 }, { "epoch": 0.23549897582792292, "grad_norm": 678.38330078125, "learning_rate": 3.787808043091126e-05, "loss": 79.4518, "step": 58290 }, { "epoch": 0.23553937709329056, "grad_norm": 248.24075317382812, "learning_rate": 3.787682848302528e-05, "loss": 80.3003, "step": 58300 }, { "epoch": 0.2355797783586582, "grad_norm": 1245.1475830078125, "learning_rate": 3.787557618662133e-05, "loss": 61.9618, "step": 58310 }, { "epoch": 0.23562017962402582, "grad_norm": 1014.1390380859375, "learning_rate": 3.787432354172381e-05, "loss": 67.9585, "step": 58320 }, { "epoch": 0.23566058088939346, "grad_norm": 3150.63330078125, "learning_rate": 3.787307054835716e-05, "loss": 93.1764, "step": 58330 }, { "epoch": 0.2357009821547611, "grad_norm": 898.7249145507812, "learning_rate": 3.7871817206545805e-05, "loss": 77.5884, "step": 58340 }, { "epoch": 0.2357413834201287, "grad_norm": 635.70166015625, "learning_rate": 3.787056351631416e-05, "loss": 66.2552, "step": 58350 }, { "epoch": 0.23578178468549635, "grad_norm": 3590.01318359375, "learning_rate": 3.786930947768668e-05, "loss": 104.977, "step": 58360 }, { "epoch": 0.235822185950864, "grad_norm": 490.37164306640625, "learning_rate": 3.7868055090687814e-05, "loss": 74.0476, "step": 58370 }, { "epoch": 0.2358625872162316, "grad_norm": 920.75146484375, "learning_rate": 3.786680035534202e-05, "loss": 65.383, "step": 58380 }, { "epoch": 0.23590298848159924, "grad_norm": 548.8573608398438, "learning_rate": 3.7865545271673744e-05, "loss": 66.729, "step": 58390 }, { "epoch": 0.23594338974696688, "grad_norm": 436.5902099609375, "learning_rate": 3.786428983970748e-05, "loss": 81.463, "step": 58400 }, { "epoch": 0.2359837910123345, "grad_norm": 1367.1328125, "learning_rate": 3.7863034059467676e-05, "loss": 92.1821, "step": 58410 }, { "epoch": 0.23602419227770213, "grad_norm": 478.23712158203125, "learning_rate": 3.786177793097883e-05, "loss": 71.7802, "step": 58420 }, { "epoch": 0.23606459354306977, "grad_norm": 1330.28076171875, "learning_rate": 3.786052145426543e-05, "loss": 66.2717, "step": 58430 }, { "epoch": 0.2361049948084374, "grad_norm": 682.5220947265625, "learning_rate": 3.7859264629351965e-05, "loss": 69.2333, "step": 58440 }, { "epoch": 0.23614539607380503, "grad_norm": 290.62176513671875, "learning_rate": 3.785800745626294e-05, "loss": 68.7359, "step": 58450 }, { "epoch": 0.23618579733917267, "grad_norm": 1322.16552734375, "learning_rate": 3.785674993502287e-05, "loss": 92.5002, "step": 58460 }, { "epoch": 0.2362261986045403, "grad_norm": 464.6538391113281, "learning_rate": 3.785549206565626e-05, "loss": 64.4816, "step": 58470 }, { "epoch": 0.23626659986990792, "grad_norm": 493.19158935546875, "learning_rate": 3.785423384818765e-05, "loss": 65.7234, "step": 58480 }, { "epoch": 0.23630700113527556, "grad_norm": 1342.987060546875, "learning_rate": 3.7852975282641555e-05, "loss": 72.9235, "step": 58490 }, { "epoch": 0.2363474024006432, "grad_norm": 1095.0755615234375, "learning_rate": 3.785171636904252e-05, "loss": 67.7989, "step": 58500 }, { "epoch": 0.2363878036660108, "grad_norm": 581.5772094726562, "learning_rate": 3.785045710741507e-05, "loss": 71.6452, "step": 58510 }, { "epoch": 0.23642820493137845, "grad_norm": 900.9180908203125, "learning_rate": 3.7849197497783775e-05, "loss": 72.144, "step": 58520 }, { "epoch": 0.2364686061967461, "grad_norm": 350.960693359375, "learning_rate": 3.784793754017319e-05, "loss": 89.0088, "step": 58530 }, { "epoch": 0.2365090074621137, "grad_norm": 0.0, "learning_rate": 3.7846677234607874e-05, "loss": 96.5052, "step": 58540 }, { "epoch": 0.23654940872748134, "grad_norm": 0.0, "learning_rate": 3.7845416581112394e-05, "loss": 51.9279, "step": 58550 }, { "epoch": 0.23658980999284898, "grad_norm": 3187.671142578125, "learning_rate": 3.784415557971133e-05, "loss": 93.1097, "step": 58560 }, { "epoch": 0.2366302112582166, "grad_norm": 1416.762939453125, "learning_rate": 3.784289423042927e-05, "loss": 94.1914, "step": 58570 }, { "epoch": 0.23667061252358423, "grad_norm": 884.7224731445312, "learning_rate": 3.784163253329079e-05, "loss": 98.9293, "step": 58580 }, { "epoch": 0.23671101378895187, "grad_norm": 1644.8016357421875, "learning_rate": 3.7840370488320514e-05, "loss": 139.2957, "step": 58590 }, { "epoch": 0.23675141505431951, "grad_norm": 820.919921875, "learning_rate": 3.7839108095543016e-05, "loss": 71.7127, "step": 58600 }, { "epoch": 0.23679181631968713, "grad_norm": 708.4010009765625, "learning_rate": 3.783784535498293e-05, "loss": 55.7633, "step": 58610 }, { "epoch": 0.23683221758505477, "grad_norm": 1582.913818359375, "learning_rate": 3.7836582266664864e-05, "loss": 92.6639, "step": 58620 }, { "epoch": 0.2368726188504224, "grad_norm": 834.7432861328125, "learning_rate": 3.783531883061345e-05, "loss": 82.2903, "step": 58630 }, { "epoch": 0.23691302011579002, "grad_norm": 605.028076171875, "learning_rate": 3.7834055046853297e-05, "loss": 65.3869, "step": 58640 }, { "epoch": 0.23695342138115766, "grad_norm": 2071.533447265625, "learning_rate": 3.783279091540907e-05, "loss": 76.7022, "step": 58650 }, { "epoch": 0.2369938226465253, "grad_norm": 768.4856567382812, "learning_rate": 3.783152643630541e-05, "loss": 56.1115, "step": 58660 }, { "epoch": 0.2370342239118929, "grad_norm": 617.2097778320312, "learning_rate": 3.783026160956695e-05, "loss": 56.9971, "step": 58670 }, { "epoch": 0.23707462517726055, "grad_norm": 665.7985229492188, "learning_rate": 3.782899643521836e-05, "loss": 64.8416, "step": 58680 }, { "epoch": 0.2371150264426282, "grad_norm": 860.6461181640625, "learning_rate": 3.782773091328431e-05, "loss": 54.3539, "step": 58690 }, { "epoch": 0.2371554277079958, "grad_norm": 1088.1025390625, "learning_rate": 3.782646504378947e-05, "loss": 55.3993, "step": 58700 }, { "epoch": 0.23719582897336344, "grad_norm": 916.010498046875, "learning_rate": 3.782519882675851e-05, "loss": 72.0945, "step": 58710 }, { "epoch": 0.23723623023873108, "grad_norm": 428.01141357421875, "learning_rate": 3.782393226221613e-05, "loss": 52.974, "step": 58720 }, { "epoch": 0.2372766315040987, "grad_norm": 1858.70703125, "learning_rate": 3.7822665350187006e-05, "loss": 111.1884, "step": 58730 }, { "epoch": 0.23731703276946634, "grad_norm": 1709.9642333984375, "learning_rate": 3.782139809069585e-05, "loss": 92.5917, "step": 58740 }, { "epoch": 0.23735743403483398, "grad_norm": 1273.9083251953125, "learning_rate": 3.782013048376736e-05, "loss": 75.8697, "step": 58750 }, { "epoch": 0.23739783530020162, "grad_norm": 816.621826171875, "learning_rate": 3.7818862529426255e-05, "loss": 91.2924, "step": 58760 }, { "epoch": 0.23743823656556923, "grad_norm": 566.1441650390625, "learning_rate": 3.781759422769725e-05, "loss": 61.1575, "step": 58770 }, { "epoch": 0.23747863783093687, "grad_norm": 368.1184997558594, "learning_rate": 3.781632557860507e-05, "loss": 55.6892, "step": 58780 }, { "epoch": 0.2375190390963045, "grad_norm": 1110.0902099609375, "learning_rate": 3.7815056582174455e-05, "loss": 83.4707, "step": 58790 }, { "epoch": 0.23755944036167212, "grad_norm": 765.1774291992188, "learning_rate": 3.781378723843014e-05, "loss": 77.6408, "step": 58800 }, { "epoch": 0.23759984162703976, "grad_norm": 1125.1318359375, "learning_rate": 3.781251754739686e-05, "loss": 83.5344, "step": 58810 }, { "epoch": 0.2376402428924074, "grad_norm": 615.6629028320312, "learning_rate": 3.781124750909939e-05, "loss": 51.5137, "step": 58820 }, { "epoch": 0.237680644157775, "grad_norm": 702.6609497070312, "learning_rate": 3.7809977123562486e-05, "loss": 82.915, "step": 58830 }, { "epoch": 0.23772104542314265, "grad_norm": 962.4164428710938, "learning_rate": 3.78087063908109e-05, "loss": 80.9654, "step": 58840 }, { "epoch": 0.2377614466885103, "grad_norm": 205.25384521484375, "learning_rate": 3.7807435310869415e-05, "loss": 87.7431, "step": 58850 }, { "epoch": 0.2378018479538779, "grad_norm": 573.6884765625, "learning_rate": 3.780616388376281e-05, "loss": 75.8221, "step": 58860 }, { "epoch": 0.23784224921924554, "grad_norm": 1432.1715087890625, "learning_rate": 3.780489210951588e-05, "loss": 77.8596, "step": 58870 }, { "epoch": 0.23788265048461318, "grad_norm": 437.041015625, "learning_rate": 3.7803619988153404e-05, "loss": 73.7691, "step": 58880 }, { "epoch": 0.2379230517499808, "grad_norm": 1854.2607421875, "learning_rate": 3.780234751970019e-05, "loss": 54.8822, "step": 58890 }, { "epoch": 0.23796345301534844, "grad_norm": 1220.783203125, "learning_rate": 3.7801074704181046e-05, "loss": 89.3865, "step": 58900 }, { "epoch": 0.23800385428071608, "grad_norm": 1539.205322265625, "learning_rate": 3.77998015416208e-05, "loss": 88.4137, "step": 58910 }, { "epoch": 0.23804425554608372, "grad_norm": 572.49609375, "learning_rate": 3.779852803204424e-05, "loss": 48.2826, "step": 58920 }, { "epoch": 0.23808465681145133, "grad_norm": 812.40966796875, "learning_rate": 3.779725417547622e-05, "loss": 79.3678, "step": 58930 }, { "epoch": 0.23812505807681897, "grad_norm": 553.4939575195312, "learning_rate": 3.7795979971941573e-05, "loss": 59.0748, "step": 58940 }, { "epoch": 0.2381654593421866, "grad_norm": 650.33251953125, "learning_rate": 3.779470542146513e-05, "loss": 61.1547, "step": 58950 }, { "epoch": 0.23820586060755422, "grad_norm": 1057.7850341796875, "learning_rate": 3.779343052407174e-05, "loss": 55.8551, "step": 58960 }, { "epoch": 0.23824626187292186, "grad_norm": 616.528076171875, "learning_rate": 3.779215527978626e-05, "loss": 77.092, "step": 58970 }, { "epoch": 0.2382866631382895, "grad_norm": 733.9642333984375, "learning_rate": 3.7790879688633565e-05, "loss": 81.81, "step": 58980 }, { "epoch": 0.2383270644036571, "grad_norm": 751.3402099609375, "learning_rate": 3.77896037506385e-05, "loss": 142.6629, "step": 58990 }, { "epoch": 0.23836746566902475, "grad_norm": 1286.316162109375, "learning_rate": 3.778832746582596e-05, "loss": 99.7319, "step": 59000 }, { "epoch": 0.2384078669343924, "grad_norm": 672.05712890625, "learning_rate": 3.778705083422081e-05, "loss": 77.2239, "step": 59010 }, { "epoch": 0.23844826819976, "grad_norm": 1207.39892578125, "learning_rate": 3.7785773855847944e-05, "loss": 64.5921, "step": 59020 }, { "epoch": 0.23848866946512765, "grad_norm": 1469.94580078125, "learning_rate": 3.7784496530732264e-05, "loss": 66.1509, "step": 59030 }, { "epoch": 0.23852907073049529, "grad_norm": 630.2105102539062, "learning_rate": 3.778321885889867e-05, "loss": 75.3852, "step": 59040 }, { "epoch": 0.2385694719958629, "grad_norm": 601.9887084960938, "learning_rate": 3.778194084037207e-05, "loss": 74.5009, "step": 59050 }, { "epoch": 0.23860987326123054, "grad_norm": 940.4525756835938, "learning_rate": 3.778066247517737e-05, "loss": 57.7681, "step": 59060 }, { "epoch": 0.23865027452659818, "grad_norm": 361.8607482910156, "learning_rate": 3.7779383763339505e-05, "loss": 57.5467, "step": 59070 }, { "epoch": 0.23869067579196582, "grad_norm": 383.443359375, "learning_rate": 3.7778104704883405e-05, "loss": 107.8757, "step": 59080 }, { "epoch": 0.23873107705733343, "grad_norm": 575.1685180664062, "learning_rate": 3.7776825299834e-05, "loss": 63.5811, "step": 59090 }, { "epoch": 0.23877147832270107, "grad_norm": 978.732177734375, "learning_rate": 3.777554554821623e-05, "loss": 88.5463, "step": 59100 }, { "epoch": 0.2388118795880687, "grad_norm": 629.1954956054688, "learning_rate": 3.777426545005505e-05, "loss": 57.9253, "step": 59110 }, { "epoch": 0.23885228085343632, "grad_norm": 402.6628723144531, "learning_rate": 3.777298500537542e-05, "loss": 104.7018, "step": 59120 }, { "epoch": 0.23889268211880396, "grad_norm": 416.7928161621094, "learning_rate": 3.7771704214202287e-05, "loss": 80.7156, "step": 59130 }, { "epoch": 0.2389330833841716, "grad_norm": 612.1146240234375, "learning_rate": 3.7770423076560635e-05, "loss": 63.2797, "step": 59140 }, { "epoch": 0.23897348464953921, "grad_norm": 779.5694580078125, "learning_rate": 3.776914159247544e-05, "loss": 79.3714, "step": 59150 }, { "epoch": 0.23901388591490685, "grad_norm": 450.3630676269531, "learning_rate": 3.776785976197168e-05, "loss": 64.3583, "step": 59160 }, { "epoch": 0.2390542871802745, "grad_norm": 236.486328125, "learning_rate": 3.776657758507434e-05, "loss": 66.5476, "step": 59170 }, { "epoch": 0.2390946884456421, "grad_norm": 1080.7608642578125, "learning_rate": 3.776529506180843e-05, "loss": 39.3434, "step": 59180 }, { "epoch": 0.23913508971100975, "grad_norm": 1653.2723388671875, "learning_rate": 3.776401219219894e-05, "loss": 84.2639, "step": 59190 }, { "epoch": 0.2391754909763774, "grad_norm": 595.5156860351562, "learning_rate": 3.7762728976270897e-05, "loss": 73.5355, "step": 59200 }, { "epoch": 0.239215892241745, "grad_norm": 1844.938232421875, "learning_rate": 3.77614454140493e-05, "loss": 73.5855, "step": 59210 }, { "epoch": 0.23925629350711264, "grad_norm": 553.8212890625, "learning_rate": 3.776016150555918e-05, "loss": 61.2807, "step": 59220 }, { "epoch": 0.23929669477248028, "grad_norm": 887.1659545898438, "learning_rate": 3.775887725082557e-05, "loss": 56.4401, "step": 59230 }, { "epoch": 0.23933709603784792, "grad_norm": 925.3824462890625, "learning_rate": 3.7757592649873503e-05, "loss": 97.8124, "step": 59240 }, { "epoch": 0.23937749730321553, "grad_norm": 669.0916748046875, "learning_rate": 3.7756307702728026e-05, "loss": 83.3682, "step": 59250 }, { "epoch": 0.23941789856858317, "grad_norm": 510.4156799316406, "learning_rate": 3.7755022409414195e-05, "loss": 74.0489, "step": 59260 }, { "epoch": 0.2394582998339508, "grad_norm": 832.69580078125, "learning_rate": 3.775373676995705e-05, "loss": 55.0958, "step": 59270 }, { "epoch": 0.23949870109931842, "grad_norm": 1248.660888671875, "learning_rate": 3.775245078438168e-05, "loss": 59.6337, "step": 59280 }, { "epoch": 0.23953910236468606, "grad_norm": 962.0632934570312, "learning_rate": 3.775116445271313e-05, "loss": 54.9472, "step": 59290 }, { "epoch": 0.2395795036300537, "grad_norm": 1186.34130859375, "learning_rate": 3.77498777749765e-05, "loss": 73.6589, "step": 59300 }, { "epoch": 0.23961990489542132, "grad_norm": 554.897705078125, "learning_rate": 3.7748590751196854e-05, "loss": 77.572, "step": 59310 }, { "epoch": 0.23966030616078896, "grad_norm": 1017.129638671875, "learning_rate": 3.7747303381399304e-05, "loss": 76.4657, "step": 59320 }, { "epoch": 0.2397007074261566, "grad_norm": 3422.4931640625, "learning_rate": 3.7746015665608934e-05, "loss": 89.2775, "step": 59330 }, { "epoch": 0.2397411086915242, "grad_norm": 2242.440185546875, "learning_rate": 3.774472760385085e-05, "loss": 99.2422, "step": 59340 }, { "epoch": 0.23978150995689185, "grad_norm": 636.38134765625, "learning_rate": 3.7743439196150166e-05, "loss": 74.0525, "step": 59350 }, { "epoch": 0.2398219112222595, "grad_norm": 733.93701171875, "learning_rate": 3.774215044253201e-05, "loss": 69.8146, "step": 59360 }, { "epoch": 0.2398623124876271, "grad_norm": 1365.994140625, "learning_rate": 3.774086134302148e-05, "loss": 96.8566, "step": 59370 }, { "epoch": 0.23990271375299474, "grad_norm": 828.5872192382812, "learning_rate": 3.773957189764373e-05, "loss": 66.3401, "step": 59380 }, { "epoch": 0.23994311501836238, "grad_norm": 607.1014404296875, "learning_rate": 3.77382821064239e-05, "loss": 93.6759, "step": 59390 }, { "epoch": 0.23998351628373002, "grad_norm": 1687.7762451171875, "learning_rate": 3.773699196938712e-05, "loss": 89.2844, "step": 59400 }, { "epoch": 0.24002391754909763, "grad_norm": 822.9414672851562, "learning_rate": 3.7735701486558555e-05, "loss": 45.2697, "step": 59410 }, { "epoch": 0.24006431881446527, "grad_norm": 473.8529357910156, "learning_rate": 3.773441065796335e-05, "loss": 57.0944, "step": 59420 }, { "epoch": 0.2401047200798329, "grad_norm": 1023.8804321289062, "learning_rate": 3.7733119483626694e-05, "loss": 62.7734, "step": 59430 }, { "epoch": 0.24014512134520052, "grad_norm": 525.4328002929688, "learning_rate": 3.7731827963573734e-05, "loss": 83.7604, "step": 59440 }, { "epoch": 0.24018552261056816, "grad_norm": 706.7175903320312, "learning_rate": 3.7730536097829655e-05, "loss": 54.1134, "step": 59450 }, { "epoch": 0.2402259238759358, "grad_norm": 956.0654907226562, "learning_rate": 3.7729243886419656e-05, "loss": 57.8804, "step": 59460 }, { "epoch": 0.24026632514130342, "grad_norm": 444.929931640625, "learning_rate": 3.772795132936891e-05, "loss": 42.6263, "step": 59470 }, { "epoch": 0.24030672640667106, "grad_norm": 1101.1414794921875, "learning_rate": 3.7726658426702636e-05, "loss": 91.3343, "step": 59480 }, { "epoch": 0.2403471276720387, "grad_norm": 1087.0360107421875, "learning_rate": 3.7725365178446024e-05, "loss": 68.0614, "step": 59490 }, { "epoch": 0.2403875289374063, "grad_norm": 806.5647583007812, "learning_rate": 3.7724071584624296e-05, "loss": 70.7971, "step": 59500 }, { "epoch": 0.24042793020277395, "grad_norm": 911.4544067382812, "learning_rate": 3.772277764526267e-05, "loss": 85.7331, "step": 59510 }, { "epoch": 0.2404683314681416, "grad_norm": 469.47430419921875, "learning_rate": 3.772148336038636e-05, "loss": 51.1957, "step": 59520 }, { "epoch": 0.2405087327335092, "grad_norm": 324.8692932128906, "learning_rate": 3.772018873002061e-05, "loss": 67.4201, "step": 59530 }, { "epoch": 0.24054913399887684, "grad_norm": 585.5924682617188, "learning_rate": 3.7718893754190665e-05, "loss": 80.5849, "step": 59540 }, { "epoch": 0.24058953526424448, "grad_norm": 538.8850708007812, "learning_rate": 3.7717598432921766e-05, "loss": 61.0507, "step": 59550 }, { "epoch": 0.24062993652961212, "grad_norm": 716.21044921875, "learning_rate": 3.771630276623915e-05, "loss": 49.5787, "step": 59560 }, { "epoch": 0.24067033779497973, "grad_norm": 728.6638793945312, "learning_rate": 3.77150067541681e-05, "loss": 65.5644, "step": 59570 }, { "epoch": 0.24071073906034737, "grad_norm": 380.37115478515625, "learning_rate": 3.7713710396733866e-05, "loss": 53.3074, "step": 59580 }, { "epoch": 0.240751140325715, "grad_norm": 418.3934326171875, "learning_rate": 3.771241369396174e-05, "loss": 61.8226, "step": 59590 }, { "epoch": 0.24079154159108263, "grad_norm": 672.7009887695312, "learning_rate": 3.7711116645876984e-05, "loss": 57.1527, "step": 59600 }, { "epoch": 0.24083194285645027, "grad_norm": 681.2256469726562, "learning_rate": 3.770981925250489e-05, "loss": 98.6313, "step": 59610 }, { "epoch": 0.2408723441218179, "grad_norm": 658.5431518554688, "learning_rate": 3.7708521513870756e-05, "loss": 74.5584, "step": 59620 }, { "epoch": 0.24091274538718552, "grad_norm": 426.46783447265625, "learning_rate": 3.7707223429999874e-05, "loss": 67.2407, "step": 59630 }, { "epoch": 0.24095314665255316, "grad_norm": 1107.43310546875, "learning_rate": 3.770592500091755e-05, "loss": 66.4551, "step": 59640 }, { "epoch": 0.2409935479179208, "grad_norm": 346.4035949707031, "learning_rate": 3.7704626226649106e-05, "loss": 80.4863, "step": 59650 }, { "epoch": 0.2410339491832884, "grad_norm": 631.7588500976562, "learning_rate": 3.7703327107219866e-05, "loss": 79.9864, "step": 59660 }, { "epoch": 0.24107435044865605, "grad_norm": 560.8602294921875, "learning_rate": 3.770202764265514e-05, "loss": 64.0211, "step": 59670 }, { "epoch": 0.2411147517140237, "grad_norm": 664.188232421875, "learning_rate": 3.7700727832980275e-05, "loss": 56.0255, "step": 59680 }, { "epoch": 0.2411551529793913, "grad_norm": 456.9137268066406, "learning_rate": 3.769942767822061e-05, "loss": 69.6144, "step": 59690 }, { "epoch": 0.24119555424475894, "grad_norm": 1441.332763671875, "learning_rate": 3.769812717840149e-05, "loss": 49.5474, "step": 59700 }, { "epoch": 0.24123595551012658, "grad_norm": 893.2605590820312, "learning_rate": 3.7696826333548265e-05, "loss": 83.8505, "step": 59710 }, { "epoch": 0.24127635677549422, "grad_norm": 1629.607421875, "learning_rate": 3.7695525143686305e-05, "loss": 84.8785, "step": 59720 }, { "epoch": 0.24131675804086183, "grad_norm": 472.76495361328125, "learning_rate": 3.7694223608840966e-05, "loss": 75.3418, "step": 59730 }, { "epoch": 0.24135715930622947, "grad_norm": 1648.9901123046875, "learning_rate": 3.7692921729037636e-05, "loss": 71.5154, "step": 59740 }, { "epoch": 0.24139756057159711, "grad_norm": 602.0360717773438, "learning_rate": 3.769161950430168e-05, "loss": 67.145, "step": 59750 }, { "epoch": 0.24143796183696473, "grad_norm": 299.7599182128906, "learning_rate": 3.7690316934658497e-05, "loss": 53.0463, "step": 59760 }, { "epoch": 0.24147836310233237, "grad_norm": 681.124755859375, "learning_rate": 3.768901402013348e-05, "loss": 85.4751, "step": 59770 }, { "epoch": 0.2415187643677, "grad_norm": 942.6168212890625, "learning_rate": 3.7687710760752026e-05, "loss": 92.2869, "step": 59780 }, { "epoch": 0.24155916563306762, "grad_norm": 723.7786865234375, "learning_rate": 3.768640715653955e-05, "loss": 85.5628, "step": 59790 }, { "epoch": 0.24159956689843526, "grad_norm": 0.0, "learning_rate": 3.768510320752145e-05, "loss": 59.9159, "step": 59800 }, { "epoch": 0.2416399681638029, "grad_norm": 2249.366455078125, "learning_rate": 3.768379891372316e-05, "loss": 72.3002, "step": 59810 }, { "epoch": 0.2416803694291705, "grad_norm": 956.3267822265625, "learning_rate": 3.768249427517011e-05, "loss": 44.7645, "step": 59820 }, { "epoch": 0.24172077069453815, "grad_norm": 4947.322265625, "learning_rate": 3.7681189291887734e-05, "loss": 111.1965, "step": 59830 }, { "epoch": 0.2417611719599058, "grad_norm": 1276.848876953125, "learning_rate": 3.767988396390146e-05, "loss": 64.6901, "step": 59840 }, { "epoch": 0.2418015732252734, "grad_norm": 1012.7622680664062, "learning_rate": 3.7678578291236756e-05, "loss": 79.9848, "step": 59850 }, { "epoch": 0.24184197449064104, "grad_norm": 789.6127319335938, "learning_rate": 3.767727227391906e-05, "loss": 89.3822, "step": 59860 }, { "epoch": 0.24188237575600868, "grad_norm": 1364.601318359375, "learning_rate": 3.7675965911973846e-05, "loss": 84.0104, "step": 59870 }, { "epoch": 0.24192277702137632, "grad_norm": 1051.7496337890625, "learning_rate": 3.767465920542657e-05, "loss": 95.6532, "step": 59880 }, { "epoch": 0.24196317828674394, "grad_norm": 976.9489135742188, "learning_rate": 3.767335215430271e-05, "loss": 83.0921, "step": 59890 }, { "epoch": 0.24200357955211158, "grad_norm": 846.85107421875, "learning_rate": 3.767204475862777e-05, "loss": 79.4891, "step": 59900 }, { "epoch": 0.24204398081747922, "grad_norm": 667.676025390625, "learning_rate": 3.76707370184272e-05, "loss": 75.6804, "step": 59910 }, { "epoch": 0.24208438208284683, "grad_norm": 1053.187255859375, "learning_rate": 3.766942893372652e-05, "loss": 96.0024, "step": 59920 }, { "epoch": 0.24212478334821447, "grad_norm": 2702.04736328125, "learning_rate": 3.766812050455123e-05, "loss": 77.5523, "step": 59930 }, { "epoch": 0.2421651846135821, "grad_norm": 1904.9150390625, "learning_rate": 3.766681173092683e-05, "loss": 107.33, "step": 59940 }, { "epoch": 0.24220558587894972, "grad_norm": 3805.7177734375, "learning_rate": 3.766550261287884e-05, "loss": 91.3345, "step": 59950 }, { "epoch": 0.24224598714431736, "grad_norm": 913.11328125, "learning_rate": 3.766419315043278e-05, "loss": 84.3102, "step": 59960 }, { "epoch": 0.242286388409685, "grad_norm": 679.9208374023438, "learning_rate": 3.7662883343614184e-05, "loss": 68.6057, "step": 59970 }, { "epoch": 0.2423267896750526, "grad_norm": 1311.968505859375, "learning_rate": 3.766157319244858e-05, "loss": 107.963, "step": 59980 }, { "epoch": 0.24236719094042025, "grad_norm": 674.0853271484375, "learning_rate": 3.766026269696152e-05, "loss": 83.4591, "step": 59990 }, { "epoch": 0.2424075922057879, "grad_norm": 759.6544189453125, "learning_rate": 3.7658951857178544e-05, "loss": 91.7942, "step": 60000 }, { "epoch": 0.2424479934711555, "grad_norm": 4039.81005859375, "learning_rate": 3.765764067312521e-05, "loss": 82.161, "step": 60010 }, { "epoch": 0.24248839473652314, "grad_norm": 800.8172607421875, "learning_rate": 3.7656329144827076e-05, "loss": 68.165, "step": 60020 }, { "epoch": 0.24252879600189078, "grad_norm": 579.8431396484375, "learning_rate": 3.765501727230972e-05, "loss": 88.252, "step": 60030 }, { "epoch": 0.24256919726725842, "grad_norm": 625.5407104492188, "learning_rate": 3.765370505559871e-05, "loss": 57.5803, "step": 60040 }, { "epoch": 0.24260959853262604, "grad_norm": 679.8961791992188, "learning_rate": 3.765239249471964e-05, "loss": 65.6697, "step": 60050 }, { "epoch": 0.24264999979799368, "grad_norm": 380.233154296875, "learning_rate": 3.7651079589698075e-05, "loss": 66.8145, "step": 60060 }, { "epoch": 0.24269040106336132, "grad_norm": 1168.4925537109375, "learning_rate": 3.764976634055963e-05, "loss": 91.2174, "step": 60070 }, { "epoch": 0.24273080232872893, "grad_norm": 979.0671997070312, "learning_rate": 3.764845274732992e-05, "loss": 86.8945, "step": 60080 }, { "epoch": 0.24277120359409657, "grad_norm": 587.3710327148438, "learning_rate": 3.7647138810034526e-05, "loss": 88.6826, "step": 60090 }, { "epoch": 0.2428116048594642, "grad_norm": 796.7279663085938, "learning_rate": 3.764582452869907e-05, "loss": 78.2054, "step": 60100 }, { "epoch": 0.24285200612483182, "grad_norm": 1938.2862548828125, "learning_rate": 3.7644509903349186e-05, "loss": 69.0388, "step": 60110 }, { "epoch": 0.24289240739019946, "grad_norm": 934.1618041992188, "learning_rate": 3.7643194934010494e-05, "loss": 87.1884, "step": 60120 }, { "epoch": 0.2429328086555671, "grad_norm": 586.398193359375, "learning_rate": 3.7641879620708636e-05, "loss": 80.0338, "step": 60130 }, { "epoch": 0.2429732099209347, "grad_norm": 738.1567993164062, "learning_rate": 3.764056396346925e-05, "loss": 67.8395, "step": 60140 }, { "epoch": 0.24301361118630235, "grad_norm": 1113.212890625, "learning_rate": 3.763924796231799e-05, "loss": 73.3919, "step": 60150 }, { "epoch": 0.24305401245167, "grad_norm": 586.4918823242188, "learning_rate": 3.763793161728051e-05, "loss": 53.713, "step": 60160 }, { "epoch": 0.2430944137170376, "grad_norm": 1516.4075927734375, "learning_rate": 3.763661492838247e-05, "loss": 58.0344, "step": 60170 }, { "epoch": 0.24313481498240525, "grad_norm": 791.6630249023438, "learning_rate": 3.763529789564955e-05, "loss": 62.3867, "step": 60180 }, { "epoch": 0.24317521624777289, "grad_norm": 966.2651977539062, "learning_rate": 3.7633980519107406e-05, "loss": 79.9166, "step": 60190 }, { "epoch": 0.24321561751314053, "grad_norm": 635.9515380859375, "learning_rate": 3.763266279878174e-05, "loss": 92.245, "step": 60200 }, { "epoch": 0.24325601877850814, "grad_norm": 775.6006469726562, "learning_rate": 3.763134473469824e-05, "loss": 55.5144, "step": 60210 }, { "epoch": 0.24329642004387578, "grad_norm": 683.8140258789062, "learning_rate": 3.7630026326882586e-05, "loss": 46.8155, "step": 60220 }, { "epoch": 0.24333682130924342, "grad_norm": 348.5704345703125, "learning_rate": 3.76287075753605e-05, "loss": 97.6181, "step": 60230 }, { "epoch": 0.24337722257461103, "grad_norm": 3307.7119140625, "learning_rate": 3.762738848015768e-05, "loss": 58.3651, "step": 60240 }, { "epoch": 0.24341762383997867, "grad_norm": 851.7776489257812, "learning_rate": 3.7626069041299847e-05, "loss": 71.8492, "step": 60250 }, { "epoch": 0.2434580251053463, "grad_norm": 983.8527221679688, "learning_rate": 3.7624749258812726e-05, "loss": 99.8106, "step": 60260 }, { "epoch": 0.24349842637071392, "grad_norm": 529.5291748046875, "learning_rate": 3.762342913272204e-05, "loss": 58.98, "step": 60270 }, { "epoch": 0.24353882763608156, "grad_norm": 689.0316772460938, "learning_rate": 3.762210866305354e-05, "loss": 78.8336, "step": 60280 }, { "epoch": 0.2435792289014492, "grad_norm": 468.4165344238281, "learning_rate": 3.762078784983294e-05, "loss": 52.1385, "step": 60290 }, { "epoch": 0.24361963016681681, "grad_norm": 794.3234252929688, "learning_rate": 3.7619466693086025e-05, "loss": 70.7858, "step": 60300 }, { "epoch": 0.24366003143218445, "grad_norm": 526.5389404296875, "learning_rate": 3.761814519283853e-05, "loss": 52.2118, "step": 60310 }, { "epoch": 0.2437004326975521, "grad_norm": 689.9909057617188, "learning_rate": 3.761682334911623e-05, "loss": 52.7487, "step": 60320 }, { "epoch": 0.2437408339629197, "grad_norm": 543.2650146484375, "learning_rate": 3.761550116194488e-05, "loss": 72.3932, "step": 60330 }, { "epoch": 0.24378123522828735, "grad_norm": 622.240966796875, "learning_rate": 3.7614178631350274e-05, "loss": 56.8141, "step": 60340 }, { "epoch": 0.243821636493655, "grad_norm": 674.28369140625, "learning_rate": 3.761285575735818e-05, "loss": 63.2067, "step": 60350 }, { "epoch": 0.24386203775902263, "grad_norm": 1530.883544921875, "learning_rate": 3.7611532539994405e-05, "loss": 91.0302, "step": 60360 }, { "epoch": 0.24390243902439024, "grad_norm": 751.9327392578125, "learning_rate": 3.7610208979284724e-05, "loss": 61.8245, "step": 60370 }, { "epoch": 0.24394284028975788, "grad_norm": 934.640869140625, "learning_rate": 3.7608885075254965e-05, "loss": 61.9268, "step": 60380 }, { "epoch": 0.24398324155512552, "grad_norm": 917.923583984375, "learning_rate": 3.760756082793092e-05, "loss": 55.6999, "step": 60390 }, { "epoch": 0.24402364282049313, "grad_norm": 518.9017944335938, "learning_rate": 3.7606236237338406e-05, "loss": 57.7457, "step": 60400 }, { "epoch": 0.24406404408586077, "grad_norm": 589.4769287109375, "learning_rate": 3.7604911303503255e-05, "loss": 45.9069, "step": 60410 }, { "epoch": 0.2441044453512284, "grad_norm": 930.5416870117188, "learning_rate": 3.7603586026451296e-05, "loss": 77.5556, "step": 60420 }, { "epoch": 0.24414484661659602, "grad_norm": 1381.807373046875, "learning_rate": 3.760226040620837e-05, "loss": 113.4836, "step": 60430 }, { "epoch": 0.24418524788196366, "grad_norm": 783.2221069335938, "learning_rate": 3.760093444280031e-05, "loss": 62.2471, "step": 60440 }, { "epoch": 0.2442256491473313, "grad_norm": 676.7279663085938, "learning_rate": 3.7599608136252975e-05, "loss": 82.4463, "step": 60450 }, { "epoch": 0.24426605041269892, "grad_norm": 639.694091796875, "learning_rate": 3.759828148659221e-05, "loss": 74.3016, "step": 60460 }, { "epoch": 0.24430645167806656, "grad_norm": 464.94781494140625, "learning_rate": 3.759695449384389e-05, "loss": 50.5467, "step": 60470 }, { "epoch": 0.2443468529434342, "grad_norm": 1204.313720703125, "learning_rate": 3.7595627158033895e-05, "loss": 68.3696, "step": 60480 }, { "epoch": 0.2443872542088018, "grad_norm": 702.4818725585938, "learning_rate": 3.759429947918808e-05, "loss": 89.8446, "step": 60490 }, { "epoch": 0.24442765547416945, "grad_norm": 854.7379760742188, "learning_rate": 3.759297145733234e-05, "loss": 65.2083, "step": 60500 }, { "epoch": 0.2444680567395371, "grad_norm": 435.7986145019531, "learning_rate": 3.7591643092492554e-05, "loss": 49.3518, "step": 60510 }, { "epoch": 0.24450845800490473, "grad_norm": 1312.701171875, "learning_rate": 3.759031438469464e-05, "loss": 85.8196, "step": 60520 }, { "epoch": 0.24454885927027234, "grad_norm": 1406.1134033203125, "learning_rate": 3.7588985333964486e-05, "loss": 64.5632, "step": 60530 }, { "epoch": 0.24458926053563998, "grad_norm": 985.60400390625, "learning_rate": 3.758765594032801e-05, "loss": 117.3803, "step": 60540 }, { "epoch": 0.24462966180100762, "grad_norm": 1442.556396484375, "learning_rate": 3.7586326203811124e-05, "loss": 108.1851, "step": 60550 }, { "epoch": 0.24467006306637523, "grad_norm": 749.6279296875, "learning_rate": 3.758499612443976e-05, "loss": 72.7907, "step": 60560 }, { "epoch": 0.24471046433174287, "grad_norm": 360.9629821777344, "learning_rate": 3.758366570223984e-05, "loss": 41.274, "step": 60570 }, { "epoch": 0.2447508655971105, "grad_norm": 1088.8790283203125, "learning_rate": 3.758233493723731e-05, "loss": 52.9848, "step": 60580 }, { "epoch": 0.24479126686247812, "grad_norm": 662.2022705078125, "learning_rate": 3.7581003829458104e-05, "loss": 78.0779, "step": 60590 }, { "epoch": 0.24483166812784576, "grad_norm": 636.2974853515625, "learning_rate": 3.757967237892818e-05, "loss": 55.419, "step": 60600 }, { "epoch": 0.2448720693932134, "grad_norm": 622.9697265625, "learning_rate": 3.757834058567348e-05, "loss": 63.7335, "step": 60610 }, { "epoch": 0.24491247065858102, "grad_norm": 291.7939758300781, "learning_rate": 3.757700844971999e-05, "loss": 32.439, "step": 60620 }, { "epoch": 0.24495287192394866, "grad_norm": 421.67889404296875, "learning_rate": 3.7575675971093674e-05, "loss": 107.8974, "step": 60630 }, { "epoch": 0.2449932731893163, "grad_norm": 681.85595703125, "learning_rate": 3.75743431498205e-05, "loss": 92.9533, "step": 60640 }, { "epoch": 0.2450336744546839, "grad_norm": 633.4293823242188, "learning_rate": 3.757300998592646e-05, "loss": 51.9228, "step": 60650 }, { "epoch": 0.24507407572005155, "grad_norm": 600.5732421875, "learning_rate": 3.757167647943755e-05, "loss": 38.8882, "step": 60660 }, { "epoch": 0.2451144769854192, "grad_norm": 597.346435546875, "learning_rate": 3.757034263037975e-05, "loss": 79.2042, "step": 60670 }, { "epoch": 0.24515487825078683, "grad_norm": 1310.50537109375, "learning_rate": 3.756900843877908e-05, "loss": 79.4892, "step": 60680 }, { "epoch": 0.24519527951615444, "grad_norm": 1101.57861328125, "learning_rate": 3.756767390466154e-05, "loss": 72.6483, "step": 60690 }, { "epoch": 0.24523568078152208, "grad_norm": 1607.89599609375, "learning_rate": 3.756633902805316e-05, "loss": 89.7742, "step": 60700 }, { "epoch": 0.24527608204688972, "grad_norm": 901.9148559570312, "learning_rate": 3.756500380897995e-05, "loss": 69.8062, "step": 60710 }, { "epoch": 0.24531648331225733, "grad_norm": 1015.1190795898438, "learning_rate": 3.756366824746795e-05, "loss": 97.6566, "step": 60720 }, { "epoch": 0.24535688457762497, "grad_norm": 1168.988037109375, "learning_rate": 3.75623323435432e-05, "loss": 83.2587, "step": 60730 }, { "epoch": 0.2453972858429926, "grad_norm": 1274.6507568359375, "learning_rate": 3.7560996097231736e-05, "loss": 75.8513, "step": 60740 }, { "epoch": 0.24543768710836023, "grad_norm": 1174.125, "learning_rate": 3.755965950855961e-05, "loss": 68.9883, "step": 60750 }, { "epoch": 0.24547808837372787, "grad_norm": 1353.937744140625, "learning_rate": 3.755832257755289e-05, "loss": 57.9202, "step": 60760 }, { "epoch": 0.2455184896390955, "grad_norm": 1133.187744140625, "learning_rate": 3.7556985304237625e-05, "loss": 108.1759, "step": 60770 }, { "epoch": 0.24555889090446312, "grad_norm": 664.1755981445312, "learning_rate": 3.755564768863989e-05, "loss": 71.9331, "step": 60780 }, { "epoch": 0.24559929216983076, "grad_norm": 812.7107543945312, "learning_rate": 3.7554309730785765e-05, "loss": 59.1399, "step": 60790 }, { "epoch": 0.2456396934351984, "grad_norm": 2424.87744140625, "learning_rate": 3.7552971430701344e-05, "loss": 72.2935, "step": 60800 }, { "epoch": 0.245680094700566, "grad_norm": 613.9686889648438, "learning_rate": 3.75516327884127e-05, "loss": 58.8767, "step": 60810 }, { "epoch": 0.24572049596593365, "grad_norm": 1093.13525390625, "learning_rate": 3.755029380394594e-05, "loss": 84.119, "step": 60820 }, { "epoch": 0.2457608972313013, "grad_norm": 906.2931518554688, "learning_rate": 3.7548954477327166e-05, "loss": 96.3142, "step": 60830 }, { "epoch": 0.2458012984966689, "grad_norm": 1076.640380859375, "learning_rate": 3.754761480858249e-05, "loss": 77.1947, "step": 60840 }, { "epoch": 0.24584169976203654, "grad_norm": 558.1048583984375, "learning_rate": 3.7546274797738034e-05, "loss": 57.5041, "step": 60850 }, { "epoch": 0.24588210102740418, "grad_norm": 1044.107177734375, "learning_rate": 3.7544934444819915e-05, "loss": 67.853, "step": 60860 }, { "epoch": 0.24592250229277182, "grad_norm": 317.490966796875, "learning_rate": 3.754359374985426e-05, "loss": 80.8347, "step": 60870 }, { "epoch": 0.24596290355813943, "grad_norm": 1043.4903564453125, "learning_rate": 3.7542252712867214e-05, "loss": 104.5762, "step": 60880 }, { "epoch": 0.24600330482350707, "grad_norm": 795.8134765625, "learning_rate": 3.7540911333884926e-05, "loss": 78.7376, "step": 60890 }, { "epoch": 0.24604370608887471, "grad_norm": 1080.662841796875, "learning_rate": 3.7539569612933536e-05, "loss": 105.4763, "step": 60900 }, { "epoch": 0.24608410735424233, "grad_norm": 568.9183349609375, "learning_rate": 3.753822755003921e-05, "loss": 94.6521, "step": 60910 }, { "epoch": 0.24612450861960997, "grad_norm": 805.295654296875, "learning_rate": 3.753688514522811e-05, "loss": 50.2088, "step": 60920 }, { "epoch": 0.2461649098849776, "grad_norm": 735.4624633789062, "learning_rate": 3.75355423985264e-05, "loss": 82.6926, "step": 60930 }, { "epoch": 0.24620531115034522, "grad_norm": 3377.027587890625, "learning_rate": 3.753419930996026e-05, "loss": 88.3906, "step": 60940 }, { "epoch": 0.24624571241571286, "grad_norm": 636.9912109375, "learning_rate": 3.7532855879555887e-05, "loss": 77.4918, "step": 60950 }, { "epoch": 0.2462861136810805, "grad_norm": 547.3155517578125, "learning_rate": 3.753151210733946e-05, "loss": 65.1002, "step": 60960 }, { "epoch": 0.2463265149464481, "grad_norm": 957.2113647460938, "learning_rate": 3.753016799333717e-05, "loss": 65.5449, "step": 60970 }, { "epoch": 0.24636691621181575, "grad_norm": 656.7042236328125, "learning_rate": 3.752882353757524e-05, "loss": 65.8079, "step": 60980 }, { "epoch": 0.2464073174771834, "grad_norm": 594.960205078125, "learning_rate": 3.752747874007987e-05, "loss": 64.1638, "step": 60990 }, { "epoch": 0.246447718742551, "grad_norm": 309.1339111328125, "learning_rate": 3.7526133600877275e-05, "loss": 65.9514, "step": 61000 }, { "epoch": 0.24648812000791864, "grad_norm": 1143.3817138671875, "learning_rate": 3.7524788119993687e-05, "loss": 138.5115, "step": 61010 }, { "epoch": 0.24652852127328628, "grad_norm": 833.0191650390625, "learning_rate": 3.752344229745532e-05, "loss": 106.2841, "step": 61020 }, { "epoch": 0.24656892253865392, "grad_norm": 537.9629516601562, "learning_rate": 3.7522096133288434e-05, "loss": 65.1042, "step": 61030 }, { "epoch": 0.24660932380402154, "grad_norm": 1437.601806640625, "learning_rate": 3.752074962751926e-05, "loss": 111.3172, "step": 61040 }, { "epoch": 0.24664972506938918, "grad_norm": 746.3345336914062, "learning_rate": 3.751940278017405e-05, "loss": 48.527, "step": 61050 }, { "epoch": 0.24669012633475682, "grad_norm": 455.0274353027344, "learning_rate": 3.751805559127907e-05, "loss": 59.7064, "step": 61060 }, { "epoch": 0.24673052760012443, "grad_norm": 928.3031616210938, "learning_rate": 3.751670806086058e-05, "loss": 52.3294, "step": 61070 }, { "epoch": 0.24677092886549207, "grad_norm": 766.8455810546875, "learning_rate": 3.7515360188944835e-05, "loss": 67.7144, "step": 61080 }, { "epoch": 0.2468113301308597, "grad_norm": 1098.980224609375, "learning_rate": 3.751401197555813e-05, "loss": 76.7141, "step": 61090 }, { "epoch": 0.24685173139622732, "grad_norm": 522.4241943359375, "learning_rate": 3.751266342072675e-05, "loss": 72.6551, "step": 61100 }, { "epoch": 0.24689213266159496, "grad_norm": 1027.0843505859375, "learning_rate": 3.751131452447697e-05, "loss": 96.9705, "step": 61110 }, { "epoch": 0.2469325339269626, "grad_norm": 1297.1356201171875, "learning_rate": 3.7509965286835106e-05, "loss": 97.9248, "step": 61120 }, { "epoch": 0.2469729351923302, "grad_norm": 740.5809326171875, "learning_rate": 3.7508615707827456e-05, "loss": 77.4702, "step": 61130 }, { "epoch": 0.24701333645769785, "grad_norm": 187.4073486328125, "learning_rate": 3.750726578748032e-05, "loss": 75.4316, "step": 61140 }, { "epoch": 0.2470537377230655, "grad_norm": 769.25634765625, "learning_rate": 3.750591552582003e-05, "loss": 100.9645, "step": 61150 }, { "epoch": 0.2470941389884331, "grad_norm": 582.0955810546875, "learning_rate": 3.750456492287291e-05, "loss": 94.5516, "step": 61160 }, { "epoch": 0.24713454025380074, "grad_norm": 518.8898315429688, "learning_rate": 3.7503213978665275e-05, "loss": 107.41, "step": 61170 }, { "epoch": 0.24717494151916838, "grad_norm": 808.5994873046875, "learning_rate": 3.750186269322348e-05, "loss": 56.5193, "step": 61180 }, { "epoch": 0.24721534278453602, "grad_norm": 818.251953125, "learning_rate": 3.750051106657386e-05, "loss": 64.9534, "step": 61190 }, { "epoch": 0.24725574404990364, "grad_norm": 1727.6558837890625, "learning_rate": 3.749915909874276e-05, "loss": 67.6505, "step": 61200 }, { "epoch": 0.24729614531527128, "grad_norm": 482.91314697265625, "learning_rate": 3.749780678975655e-05, "loss": 66.262, "step": 61210 }, { "epoch": 0.24733654658063892, "grad_norm": 445.8804626464844, "learning_rate": 3.7496454139641584e-05, "loss": 59.1876, "step": 61220 }, { "epoch": 0.24737694784600653, "grad_norm": 817.4906005859375, "learning_rate": 3.749510114842424e-05, "loss": 79.4554, "step": 61230 }, { "epoch": 0.24741734911137417, "grad_norm": 244.64735412597656, "learning_rate": 3.7493747816130887e-05, "loss": 66.7083, "step": 61240 }, { "epoch": 0.2474577503767418, "grad_norm": 790.3543701171875, "learning_rate": 3.749239414278792e-05, "loss": 71.0745, "step": 61250 }, { "epoch": 0.24749815164210942, "grad_norm": 1867.8568115234375, "learning_rate": 3.749104012842172e-05, "loss": 87.3792, "step": 61260 }, { "epoch": 0.24753855290747706, "grad_norm": 1511.1171875, "learning_rate": 3.748968577305869e-05, "loss": 89.4993, "step": 61270 }, { "epoch": 0.2475789541728447, "grad_norm": 1624.3121337890625, "learning_rate": 3.748833107672523e-05, "loss": 77.4677, "step": 61280 }, { "epoch": 0.2476193554382123, "grad_norm": 602.2093505859375, "learning_rate": 3.748697603944775e-05, "loss": 52.443, "step": 61290 }, { "epoch": 0.24765975670357995, "grad_norm": 270.013671875, "learning_rate": 3.7485620661252676e-05, "loss": 47.6834, "step": 61300 }, { "epoch": 0.2477001579689476, "grad_norm": 1774.5087890625, "learning_rate": 3.748426494216643e-05, "loss": 78.0292, "step": 61310 }, { "epoch": 0.2477405592343152, "grad_norm": 1068.1976318359375, "learning_rate": 3.748290888221542e-05, "loss": 95.4075, "step": 61320 }, { "epoch": 0.24778096049968285, "grad_norm": 737.3121337890625, "learning_rate": 3.748155248142611e-05, "loss": 83.9512, "step": 61330 }, { "epoch": 0.24782136176505049, "grad_norm": 760.8573608398438, "learning_rate": 3.748019573982493e-05, "loss": 73.3414, "step": 61340 }, { "epoch": 0.24786176303041813, "grad_norm": 665.1573486328125, "learning_rate": 3.747883865743834e-05, "loss": 106.5665, "step": 61350 }, { "epoch": 0.24790216429578574, "grad_norm": 657.4424438476562, "learning_rate": 3.747748123429279e-05, "loss": 57.0647, "step": 61360 }, { "epoch": 0.24794256556115338, "grad_norm": 946.54248046875, "learning_rate": 3.7476123470414745e-05, "loss": 69.0039, "step": 61370 }, { "epoch": 0.24798296682652102, "grad_norm": 1425.1268310546875, "learning_rate": 3.747476536583068e-05, "loss": 80.416, "step": 61380 }, { "epoch": 0.24802336809188863, "grad_norm": 659.6389770507812, "learning_rate": 3.747340692056706e-05, "loss": 82.6614, "step": 61390 }, { "epoch": 0.24806376935725627, "grad_norm": 908.4437866210938, "learning_rate": 3.7472048134650376e-05, "loss": 105.7427, "step": 61400 }, { "epoch": 0.2481041706226239, "grad_norm": 577.42724609375, "learning_rate": 3.747068900810712e-05, "loss": 67.8526, "step": 61410 }, { "epoch": 0.24814457188799152, "grad_norm": 1340.310791015625, "learning_rate": 3.7469329540963795e-05, "loss": 57.7734, "step": 61420 }, { "epoch": 0.24818497315335916, "grad_norm": 2040.929443359375, "learning_rate": 3.746796973324689e-05, "loss": 87.0531, "step": 61430 }, { "epoch": 0.2482253744187268, "grad_norm": 1341.835693359375, "learning_rate": 3.746660958498292e-05, "loss": 88.1157, "step": 61440 }, { "epoch": 0.24826577568409441, "grad_norm": 682.9309692382812, "learning_rate": 3.746524909619841e-05, "loss": 68.3139, "step": 61450 }, { "epoch": 0.24830617694946205, "grad_norm": 875.2620849609375, "learning_rate": 3.746388826691987e-05, "loss": 65.5418, "step": 61460 }, { "epoch": 0.2483465782148297, "grad_norm": 0.0, "learning_rate": 3.746252709717384e-05, "loss": 51.6307, "step": 61470 }, { "epoch": 0.2483869794801973, "grad_norm": 624.052978515625, "learning_rate": 3.746116558698686e-05, "loss": 56.3622, "step": 61480 }, { "epoch": 0.24842738074556495, "grad_norm": 633.302001953125, "learning_rate": 3.745980373638546e-05, "loss": 52.7323, "step": 61490 }, { "epoch": 0.2484677820109326, "grad_norm": 1113.2095947265625, "learning_rate": 3.745844154539619e-05, "loss": 96.0454, "step": 61500 }, { "epoch": 0.24850818327630023, "grad_norm": 766.4423217773438, "learning_rate": 3.745707901404563e-05, "loss": 87.217, "step": 61510 }, { "epoch": 0.24854858454166784, "grad_norm": 1302.5953369140625, "learning_rate": 3.7455716142360324e-05, "loss": 100.6767, "step": 61520 }, { "epoch": 0.24858898580703548, "grad_norm": 322.5316467285156, "learning_rate": 3.7454352930366834e-05, "loss": 85.5547, "step": 61530 }, { "epoch": 0.24862938707240312, "grad_norm": 809.63916015625, "learning_rate": 3.7452989378091755e-05, "loss": 67.3807, "step": 61540 }, { "epoch": 0.24866978833777073, "grad_norm": 513.2466430664062, "learning_rate": 3.7451625485561665e-05, "loss": 50.81, "step": 61550 }, { "epoch": 0.24871018960313837, "grad_norm": 3258.237060546875, "learning_rate": 3.7450261252803146e-05, "loss": 105.0272, "step": 61560 }, { "epoch": 0.248750590868506, "grad_norm": 745.373046875, "learning_rate": 3.74488966798428e-05, "loss": 40.7696, "step": 61570 }, { "epoch": 0.24879099213387362, "grad_norm": 652.2501831054688, "learning_rate": 3.744753176670724e-05, "loss": 70.7613, "step": 61580 }, { "epoch": 0.24883139339924126, "grad_norm": 619.707763671875, "learning_rate": 3.744616651342305e-05, "loss": 86.4871, "step": 61590 }, { "epoch": 0.2488717946646089, "grad_norm": 2102.8876953125, "learning_rate": 3.7444800920016875e-05, "loss": 79.4103, "step": 61600 }, { "epoch": 0.24891219592997652, "grad_norm": 1047.6876220703125, "learning_rate": 3.744343498651532e-05, "loss": 60.9901, "step": 61610 }, { "epoch": 0.24895259719534416, "grad_norm": 762.4063110351562, "learning_rate": 3.744206871294502e-05, "loss": 84.9273, "step": 61620 }, { "epoch": 0.2489929984607118, "grad_norm": 595.7750244140625, "learning_rate": 3.744070209933261e-05, "loss": 78.6703, "step": 61630 }, { "epoch": 0.2490333997260794, "grad_norm": 920.8694458007812, "learning_rate": 3.743933514570473e-05, "loss": 74.1275, "step": 61640 }, { "epoch": 0.24907380099144705, "grad_norm": 802.4329223632812, "learning_rate": 3.743796785208804e-05, "loss": 51.6497, "step": 61650 }, { "epoch": 0.2491142022568147, "grad_norm": 921.7340087890625, "learning_rate": 3.743660021850919e-05, "loss": 53.6544, "step": 61660 }, { "epoch": 0.24915460352218233, "grad_norm": 992.1342163085938, "learning_rate": 3.743523224499483e-05, "loss": 69.4302, "step": 61670 }, { "epoch": 0.24919500478754994, "grad_norm": 1299.7554931640625, "learning_rate": 3.743386393157165e-05, "loss": 64.1878, "step": 61680 }, { "epoch": 0.24923540605291758, "grad_norm": 500.22796630859375, "learning_rate": 3.743249527826632e-05, "loss": 36.1513, "step": 61690 }, { "epoch": 0.24927580731828522, "grad_norm": 325.031005859375, "learning_rate": 3.7431126285105516e-05, "loss": 138.1334, "step": 61700 }, { "epoch": 0.24931620858365283, "grad_norm": 630.5838623046875, "learning_rate": 3.742975695211593e-05, "loss": 88.5154, "step": 61710 }, { "epoch": 0.24935660984902047, "grad_norm": 534.2533569335938, "learning_rate": 3.742838727932426e-05, "loss": 61.4114, "step": 61720 }, { "epoch": 0.2493970111143881, "grad_norm": 437.1034851074219, "learning_rate": 3.74270172667572e-05, "loss": 93.259, "step": 61730 }, { "epoch": 0.24943741237975572, "grad_norm": 530.0656127929688, "learning_rate": 3.742564691444147e-05, "loss": 81.5553, "step": 61740 }, { "epoch": 0.24947781364512336, "grad_norm": 577.9969482421875, "learning_rate": 3.7424276222403795e-05, "loss": 70.7233, "step": 61750 }, { "epoch": 0.249518214910491, "grad_norm": 1053.4208984375, "learning_rate": 3.742290519067087e-05, "loss": 90.1125, "step": 61760 }, { "epoch": 0.24955861617585862, "grad_norm": 633.0479736328125, "learning_rate": 3.742153381926945e-05, "loss": 60.1169, "step": 61770 }, { "epoch": 0.24959901744122626, "grad_norm": 548.5878295898438, "learning_rate": 3.742016210822624e-05, "loss": 88.1457, "step": 61780 }, { "epoch": 0.2496394187065939, "grad_norm": 368.5179748535156, "learning_rate": 3.741879005756802e-05, "loss": 101.9935, "step": 61790 }, { "epoch": 0.2496798199719615, "grad_norm": 2975.427978515625, "learning_rate": 3.741741766732151e-05, "loss": 93.7819, "step": 61800 }, { "epoch": 0.24972022123732915, "grad_norm": 767.5460205078125, "learning_rate": 3.741604493751348e-05, "loss": 76.1625, "step": 61810 }, { "epoch": 0.2497606225026968, "grad_norm": 704.7868041992188, "learning_rate": 3.741467186817068e-05, "loss": 133.6862, "step": 61820 }, { "epoch": 0.24980102376806443, "grad_norm": 1289.1790771484375, "learning_rate": 3.7413298459319897e-05, "loss": 91.6314, "step": 61830 }, { "epoch": 0.24984142503343204, "grad_norm": 815.9835815429688, "learning_rate": 3.741192471098789e-05, "loss": 66.9851, "step": 61840 }, { "epoch": 0.24988182629879968, "grad_norm": 683.07763671875, "learning_rate": 3.741055062320145e-05, "loss": 36.704, "step": 61850 }, { "epoch": 0.24992222756416732, "grad_norm": 1541.2674560546875, "learning_rate": 3.740917619598736e-05, "loss": 79.5865, "step": 61860 }, { "epoch": 0.24996262882953493, "grad_norm": 738.2477416992188, "learning_rate": 3.740780142937241e-05, "loss": 57.1532, "step": 61870 }, { "epoch": 0.25000303009490255, "grad_norm": 403.5694885253906, "learning_rate": 3.7406426323383416e-05, "loss": 87.1524, "step": 61880 }, { "epoch": 0.2500434313602702, "grad_norm": 779.46533203125, "learning_rate": 3.740505087804718e-05, "loss": 53.8653, "step": 61890 }, { "epoch": 0.2500838326256378, "grad_norm": 359.40716552734375, "learning_rate": 3.740367509339052e-05, "loss": 87.3346, "step": 61900 }, { "epoch": 0.25012423389100547, "grad_norm": 1014.7713012695312, "learning_rate": 3.740229896944025e-05, "loss": 61.4974, "step": 61910 }, { "epoch": 0.2501646351563731, "grad_norm": 870.2494506835938, "learning_rate": 3.74009225062232e-05, "loss": 93.1416, "step": 61920 }, { "epoch": 0.25020503642174075, "grad_norm": 578.4398193359375, "learning_rate": 3.739954570376621e-05, "loss": 50.1696, "step": 61930 }, { "epoch": 0.2502454376871084, "grad_norm": 1939.190185546875, "learning_rate": 3.7398168562096115e-05, "loss": 128.2365, "step": 61940 }, { "epoch": 0.25028583895247597, "grad_norm": 838.7545776367188, "learning_rate": 3.7396791081239766e-05, "loss": 55.7641, "step": 61950 }, { "epoch": 0.2503262402178436, "grad_norm": 1380.188232421875, "learning_rate": 3.7395413261224026e-05, "loss": 58.8139, "step": 61960 }, { "epoch": 0.25036664148321125, "grad_norm": 657.556884765625, "learning_rate": 3.739403510207574e-05, "loss": 40.2833, "step": 61970 }, { "epoch": 0.2504070427485789, "grad_norm": 827.7333984375, "learning_rate": 3.7392656603821794e-05, "loss": 62.6232, "step": 61980 }, { "epoch": 0.25044744401394653, "grad_norm": 933.279541015625, "learning_rate": 3.7391277766489044e-05, "loss": 63.5383, "step": 61990 }, { "epoch": 0.25048784527931417, "grad_norm": 1004.0429077148438, "learning_rate": 3.738989859010438e-05, "loss": 101.3109, "step": 62000 }, { "epoch": 0.25052824654468175, "grad_norm": 0.0, "learning_rate": 3.73885190746947e-05, "loss": 65.8957, "step": 62010 }, { "epoch": 0.2505686478100494, "grad_norm": 806.5186767578125, "learning_rate": 3.738713922028688e-05, "loss": 53.8241, "step": 62020 }, { "epoch": 0.25060904907541703, "grad_norm": 2066.307861328125, "learning_rate": 3.7385759026907836e-05, "loss": 77.2466, "step": 62030 }, { "epoch": 0.2506494503407847, "grad_norm": 1385.8570556640625, "learning_rate": 3.738437849458446e-05, "loss": 93.8371, "step": 62040 }, { "epoch": 0.2506898516061523, "grad_norm": 1187.6527099609375, "learning_rate": 3.738299762334368e-05, "loss": 66.5861, "step": 62050 }, { "epoch": 0.25073025287151995, "grad_norm": 567.3513793945312, "learning_rate": 3.738161641321242e-05, "loss": 59.9558, "step": 62060 }, { "epoch": 0.2507706541368876, "grad_norm": 611.0079345703125, "learning_rate": 3.7380234864217584e-05, "loss": 84.0864, "step": 62070 }, { "epoch": 0.2508110554022552, "grad_norm": 646.5040283203125, "learning_rate": 3.737885297638613e-05, "loss": 70.7648, "step": 62080 }, { "epoch": 0.2508514566676228, "grad_norm": 855.0535278320312, "learning_rate": 3.737747074974498e-05, "loss": 76.4832, "step": 62090 }, { "epoch": 0.25089185793299046, "grad_norm": 560.4104614257812, "learning_rate": 3.737608818432111e-05, "loss": 61.4092, "step": 62100 }, { "epoch": 0.2509322591983581, "grad_norm": 695.6069946289062, "learning_rate": 3.737470528014145e-05, "loss": 62.4705, "step": 62110 }, { "epoch": 0.25097266046372574, "grad_norm": 646.8819580078125, "learning_rate": 3.7373322037232956e-05, "loss": 53.1848, "step": 62120 }, { "epoch": 0.2510130617290934, "grad_norm": 1300.523681640625, "learning_rate": 3.737193845562261e-05, "loss": 79.2234, "step": 62130 }, { "epoch": 0.25105346299446096, "grad_norm": 917.3502197265625, "learning_rate": 3.737055453533738e-05, "loss": 49.7496, "step": 62140 }, { "epoch": 0.2510938642598286, "grad_norm": 1223.5814208984375, "learning_rate": 3.7369170276404245e-05, "loss": 79.9229, "step": 62150 }, { "epoch": 0.25113426552519624, "grad_norm": 823.5051879882812, "learning_rate": 3.7367785678850196e-05, "loss": 65.1247, "step": 62160 }, { "epoch": 0.2511746667905639, "grad_norm": 1042.2567138671875, "learning_rate": 3.736640074270222e-05, "loss": 62.8723, "step": 62170 }, { "epoch": 0.2512150680559315, "grad_norm": 1149.8233642578125, "learning_rate": 3.736501546798732e-05, "loss": 69.4781, "step": 62180 }, { "epoch": 0.25125546932129916, "grad_norm": 905.4100341796875, "learning_rate": 3.7363629854732506e-05, "loss": 66.374, "step": 62190 }, { "epoch": 0.25129587058666675, "grad_norm": 1982.888916015625, "learning_rate": 3.736224390296479e-05, "loss": 89.6081, "step": 62200 }, { "epoch": 0.2513362718520344, "grad_norm": 725.7110595703125, "learning_rate": 3.736085761271119e-05, "loss": 64.456, "step": 62210 }, { "epoch": 0.251376673117402, "grad_norm": 1051.466796875, "learning_rate": 3.735947098399873e-05, "loss": 60.7936, "step": 62220 }, { "epoch": 0.25141707438276967, "grad_norm": 702.5640258789062, "learning_rate": 3.7358084016854445e-05, "loss": 44.4488, "step": 62230 }, { "epoch": 0.2514574756481373, "grad_norm": 1065.532958984375, "learning_rate": 3.735669671130538e-05, "loss": 60.1263, "step": 62240 }, { "epoch": 0.25149787691350495, "grad_norm": 872.7839965820312, "learning_rate": 3.735530906737857e-05, "loss": 67.0821, "step": 62250 }, { "epoch": 0.2515382781788726, "grad_norm": 906.0625, "learning_rate": 3.735392108510108e-05, "loss": 65.652, "step": 62260 }, { "epoch": 0.25157867944424017, "grad_norm": 697.1912841796875, "learning_rate": 3.735253276449997e-05, "loss": 100.2691, "step": 62270 }, { "epoch": 0.2516190807096078, "grad_norm": 1175.5914306640625, "learning_rate": 3.735114410560229e-05, "loss": 69.7755, "step": 62280 }, { "epoch": 0.25165948197497545, "grad_norm": 1274.5322265625, "learning_rate": 3.734975510843512e-05, "loss": 74.5015, "step": 62290 }, { "epoch": 0.2516998832403431, "grad_norm": 442.2524108886719, "learning_rate": 3.734836577302555e-05, "loss": 66.8449, "step": 62300 }, { "epoch": 0.25174028450571073, "grad_norm": 466.80865478515625, "learning_rate": 3.734697609940066e-05, "loss": 79.3407, "step": 62310 }, { "epoch": 0.25178068577107837, "grad_norm": 192.1374969482422, "learning_rate": 3.7345586087587535e-05, "loss": 61.5628, "step": 62320 }, { "epoch": 0.25182108703644596, "grad_norm": 2281.552978515625, "learning_rate": 3.734419573761328e-05, "loss": 72.6444, "step": 62330 }, { "epoch": 0.2518614883018136, "grad_norm": 821.1915893554688, "learning_rate": 3.7342805049504994e-05, "loss": 111.8539, "step": 62340 }, { "epoch": 0.25190188956718124, "grad_norm": 576.8593139648438, "learning_rate": 3.73414140232898e-05, "loss": 63.514, "step": 62350 }, { "epoch": 0.2519422908325489, "grad_norm": 285.91815185546875, "learning_rate": 3.7340022658994816e-05, "loss": 74.2678, "step": 62360 }, { "epoch": 0.2519826920979165, "grad_norm": 779.8338012695312, "learning_rate": 3.733863095664716e-05, "loss": 55.4725, "step": 62370 }, { "epoch": 0.25202309336328416, "grad_norm": 601.2800903320312, "learning_rate": 3.733723891627396e-05, "loss": 57.6946, "step": 62380 }, { "epoch": 0.2520634946286518, "grad_norm": 1605.4139404296875, "learning_rate": 3.733584653790237e-05, "loss": 88.7472, "step": 62390 }, { "epoch": 0.2521038958940194, "grad_norm": 755.4089965820312, "learning_rate": 3.7334453821559534e-05, "loss": 52.6565, "step": 62400 }, { "epoch": 0.252144297159387, "grad_norm": 696.624755859375, "learning_rate": 3.733306076727258e-05, "loss": 65.9678, "step": 62410 }, { "epoch": 0.25218469842475466, "grad_norm": 1728.781982421875, "learning_rate": 3.733166737506869e-05, "loss": 65.9936, "step": 62420 }, { "epoch": 0.2522250996901223, "grad_norm": 329.3301086425781, "learning_rate": 3.733027364497502e-05, "loss": 93.0544, "step": 62430 }, { "epoch": 0.25226550095548994, "grad_norm": 928.8203735351562, "learning_rate": 3.732887957701874e-05, "loss": 76.0958, "step": 62440 }, { "epoch": 0.2523059022208576, "grad_norm": 765.6958618164062, "learning_rate": 3.732748517122704e-05, "loss": 53.0726, "step": 62450 }, { "epoch": 0.25234630348622517, "grad_norm": 510.4883117675781, "learning_rate": 3.732609042762709e-05, "loss": 58.0227, "step": 62460 }, { "epoch": 0.2523867047515928, "grad_norm": 980.6634521484375, "learning_rate": 3.732469534624609e-05, "loss": 104.429, "step": 62470 }, { "epoch": 0.25242710601696045, "grad_norm": 1157.6436767578125, "learning_rate": 3.732329992711123e-05, "loss": 91.0166, "step": 62480 }, { "epoch": 0.2524675072823281, "grad_norm": 946.9705200195312, "learning_rate": 3.732190417024972e-05, "loss": 89.0631, "step": 62490 }, { "epoch": 0.2525079085476957, "grad_norm": 719.9625244140625, "learning_rate": 3.732050807568878e-05, "loss": 80.3477, "step": 62500 }, { "epoch": 0.25254830981306337, "grad_norm": 636.4144897460938, "learning_rate": 3.731911164345561e-05, "loss": 56.9953, "step": 62510 }, { "epoch": 0.25258871107843095, "grad_norm": 370.7101135253906, "learning_rate": 3.731771487357744e-05, "loss": 66.2618, "step": 62520 }, { "epoch": 0.2526291123437986, "grad_norm": 892.8953247070312, "learning_rate": 3.731631776608151e-05, "loss": 74.3697, "step": 62530 }, { "epoch": 0.25266951360916623, "grad_norm": 421.4278259277344, "learning_rate": 3.731492032099504e-05, "loss": 100.9044, "step": 62540 }, { "epoch": 0.25270991487453387, "grad_norm": 969.6364135742188, "learning_rate": 3.731352253834529e-05, "loss": 94.6507, "step": 62550 }, { "epoch": 0.2527503161399015, "grad_norm": 855.2815551757812, "learning_rate": 3.73121244181595e-05, "loss": 120.8158, "step": 62560 }, { "epoch": 0.25279071740526915, "grad_norm": 3310.03515625, "learning_rate": 3.731072596046493e-05, "loss": 86.2226, "step": 62570 }, { "epoch": 0.2528311186706368, "grad_norm": 393.9659423828125, "learning_rate": 3.7309327165288857e-05, "loss": 77.2755, "step": 62580 }, { "epoch": 0.2528715199360044, "grad_norm": 702.1710815429688, "learning_rate": 3.730792803265853e-05, "loss": 47.471, "step": 62590 }, { "epoch": 0.252911921201372, "grad_norm": 2308.98095703125, "learning_rate": 3.7306528562601245e-05, "loss": 64.0197, "step": 62600 }, { "epoch": 0.25295232246673965, "grad_norm": 1097.486083984375, "learning_rate": 3.7305128755144265e-05, "loss": 120.0064, "step": 62610 }, { "epoch": 0.2529927237321073, "grad_norm": 750.9093627929688, "learning_rate": 3.73037286103149e-05, "loss": 101.2599, "step": 62620 }, { "epoch": 0.25303312499747493, "grad_norm": 502.05645751953125, "learning_rate": 3.730232812814043e-05, "loss": 68.1188, "step": 62630 }, { "epoch": 0.2530735262628426, "grad_norm": 1803.06005859375, "learning_rate": 3.730092730864816e-05, "loss": 70.3783, "step": 62640 }, { "epoch": 0.25311392752821016, "grad_norm": 1725.7425537109375, "learning_rate": 3.729952615186542e-05, "loss": 69.9886, "step": 62650 }, { "epoch": 0.2531543287935778, "grad_norm": 656.0050048828125, "learning_rate": 3.72981246578195e-05, "loss": 116.2143, "step": 62660 }, { "epoch": 0.25319473005894544, "grad_norm": 1667.094970703125, "learning_rate": 3.729672282653774e-05, "loss": 81.6159, "step": 62670 }, { "epoch": 0.2532351313243131, "grad_norm": 1342.62890625, "learning_rate": 3.729532065804746e-05, "loss": 102.0702, "step": 62680 }, { "epoch": 0.2532755325896807, "grad_norm": 648.2364501953125, "learning_rate": 3.7293918152376004e-05, "loss": 67.3058, "step": 62690 }, { "epoch": 0.25331593385504836, "grad_norm": 773.669921875, "learning_rate": 3.72925153095507e-05, "loss": 69.3494, "step": 62700 }, { "epoch": 0.253356335120416, "grad_norm": 616.6019897460938, "learning_rate": 3.729111212959891e-05, "loss": 49.2931, "step": 62710 }, { "epoch": 0.2533967363857836, "grad_norm": 1789.956787109375, "learning_rate": 3.7289708612547995e-05, "loss": 44.2154, "step": 62720 }, { "epoch": 0.2534371376511512, "grad_norm": 603.3187255859375, "learning_rate": 3.72883047584253e-05, "loss": 65.0651, "step": 62730 }, { "epoch": 0.25347753891651886, "grad_norm": 669.8085327148438, "learning_rate": 3.728690056725821e-05, "loss": 99.6179, "step": 62740 }, { "epoch": 0.2535179401818865, "grad_norm": 1055.7017822265625, "learning_rate": 3.72854960390741e-05, "loss": 86.5876, "step": 62750 }, { "epoch": 0.25355834144725414, "grad_norm": 323.4556884765625, "learning_rate": 3.728409117390033e-05, "loss": 72.2166, "step": 62760 }, { "epoch": 0.2535987427126218, "grad_norm": 3452.567626953125, "learning_rate": 3.7282685971764316e-05, "loss": 188.2608, "step": 62770 }, { "epoch": 0.25363914397798937, "grad_norm": 684.7014770507812, "learning_rate": 3.7281280432693436e-05, "loss": 58.8837, "step": 62780 }, { "epoch": 0.253679545243357, "grad_norm": 240.87582397460938, "learning_rate": 3.72798745567151e-05, "loss": 63.6725, "step": 62790 }, { "epoch": 0.25371994650872465, "grad_norm": 1372.149658203125, "learning_rate": 3.727846834385671e-05, "loss": 104.2431, "step": 62800 }, { "epoch": 0.2537603477740923, "grad_norm": 1499.7823486328125, "learning_rate": 3.727706179414568e-05, "loss": 102.7563, "step": 62810 }, { "epoch": 0.2538007490394599, "grad_norm": 661.4194946289062, "learning_rate": 3.7275654907609444e-05, "loss": 45.8049, "step": 62820 }, { "epoch": 0.25384115030482757, "grad_norm": 399.1147766113281, "learning_rate": 3.727424768427542e-05, "loss": 74.2699, "step": 62830 }, { "epoch": 0.25388155157019515, "grad_norm": 908.5316772460938, "learning_rate": 3.727284012417104e-05, "loss": 71.9168, "step": 62840 }, { "epoch": 0.2539219528355628, "grad_norm": 636.9518432617188, "learning_rate": 3.727143222732375e-05, "loss": 71.2355, "step": 62850 }, { "epoch": 0.25396235410093043, "grad_norm": 1124.2080078125, "learning_rate": 3.7270023993761e-05, "loss": 104.8447, "step": 62860 }, { "epoch": 0.25400275536629807, "grad_norm": 1469.802001953125, "learning_rate": 3.726861542351024e-05, "loss": 56.2755, "step": 62870 }, { "epoch": 0.2540431566316657, "grad_norm": 532.6864624023438, "learning_rate": 3.726720651659893e-05, "loss": 70.7993, "step": 62880 }, { "epoch": 0.25408355789703335, "grad_norm": 783.446044921875, "learning_rate": 3.726579727305454e-05, "loss": 47.7729, "step": 62890 }, { "epoch": 0.254123959162401, "grad_norm": 1531.939697265625, "learning_rate": 3.726438769290454e-05, "loss": 85.5388, "step": 62900 }, { "epoch": 0.2541643604277686, "grad_norm": 571.3388061523438, "learning_rate": 3.726297777617642e-05, "loss": 94.1963, "step": 62910 }, { "epoch": 0.2542047616931362, "grad_norm": 1117.3424072265625, "learning_rate": 3.7261567522897656e-05, "loss": 80.618, "step": 62920 }, { "epoch": 0.25424516295850386, "grad_norm": 613.401611328125, "learning_rate": 3.726015693309575e-05, "loss": 67.6146, "step": 62930 }, { "epoch": 0.2542855642238715, "grad_norm": 194.6875762939453, "learning_rate": 3.7258746006798194e-05, "loss": 70.3354, "step": 62940 }, { "epoch": 0.25432596548923914, "grad_norm": 586.9131469726562, "learning_rate": 3.72573347440325e-05, "loss": 91.6146, "step": 62950 }, { "epoch": 0.2543663667546068, "grad_norm": 886.0733032226562, "learning_rate": 3.725592314482619e-05, "loss": 69.5728, "step": 62960 }, { "epoch": 0.25440676801997436, "grad_norm": 887.9442138671875, "learning_rate": 3.725451120920676e-05, "loss": 77.3229, "step": 62970 }, { "epoch": 0.254447169285342, "grad_norm": 617.3834228515625, "learning_rate": 3.725309893720176e-05, "loss": 56.8363, "step": 62980 }, { "epoch": 0.25448757055070964, "grad_norm": 105.37464141845703, "learning_rate": 3.725168632883871e-05, "loss": 45.1374, "step": 62990 }, { "epoch": 0.2545279718160773, "grad_norm": 1526.1341552734375, "learning_rate": 3.725027338414515e-05, "loss": 105.5693, "step": 63000 }, { "epoch": 0.2545683730814449, "grad_norm": 1037.0675048828125, "learning_rate": 3.7248860103148636e-05, "loss": 74.4302, "step": 63010 }, { "epoch": 0.25460877434681256, "grad_norm": 436.3997802734375, "learning_rate": 3.724744648587671e-05, "loss": 47.4108, "step": 63020 }, { "epoch": 0.2546491756121802, "grad_norm": 812.2733764648438, "learning_rate": 3.724603253235694e-05, "loss": 58.7146, "step": 63030 }, { "epoch": 0.2546895768775478, "grad_norm": 764.3171997070312, "learning_rate": 3.724461824261688e-05, "loss": 84.978, "step": 63040 }, { "epoch": 0.2547299781429154, "grad_norm": 605.2657470703125, "learning_rate": 3.724320361668412e-05, "loss": 63.5626, "step": 63050 }, { "epoch": 0.25477037940828307, "grad_norm": 607.5908813476562, "learning_rate": 3.7241788654586215e-05, "loss": 72.101, "step": 63060 }, { "epoch": 0.2548107806736507, "grad_norm": 550.0535888671875, "learning_rate": 3.724037335635076e-05, "loss": 58.6045, "step": 63070 }, { "epoch": 0.25485118193901835, "grad_norm": 756.3518676757812, "learning_rate": 3.7238957722005364e-05, "loss": 79.5881, "step": 63080 }, { "epoch": 0.254891583204386, "grad_norm": 736.576904296875, "learning_rate": 3.723754175157761e-05, "loss": 43.176, "step": 63090 }, { "epoch": 0.25493198446975357, "grad_norm": 1498.8287353515625, "learning_rate": 3.72361254450951e-05, "loss": 71.7575, "step": 63100 }, { "epoch": 0.2549723857351212, "grad_norm": 989.7176513671875, "learning_rate": 3.7234708802585455e-05, "loss": 62.0116, "step": 63110 }, { "epoch": 0.25501278700048885, "grad_norm": 663.2655029296875, "learning_rate": 3.723329182407629e-05, "loss": 64.7273, "step": 63120 }, { "epoch": 0.2550531882658565, "grad_norm": 528.9407958984375, "learning_rate": 3.723187450959523e-05, "loss": 60.0449, "step": 63130 }, { "epoch": 0.25509358953122413, "grad_norm": 1840.953857421875, "learning_rate": 3.7230456859169894e-05, "loss": 86.6429, "step": 63140 }, { "epoch": 0.25513399079659177, "grad_norm": 707.001708984375, "learning_rate": 3.7229038872827936e-05, "loss": 65.0906, "step": 63150 }, { "epoch": 0.25517439206195935, "grad_norm": 584.7863159179688, "learning_rate": 3.7227620550597e-05, "loss": 44.3554, "step": 63160 }, { "epoch": 0.255214793327327, "grad_norm": 316.122314453125, "learning_rate": 3.722620189250473e-05, "loss": 84.5047, "step": 63170 }, { "epoch": 0.25525519459269463, "grad_norm": 710.6033935546875, "learning_rate": 3.722478289857878e-05, "loss": 48.4606, "step": 63180 }, { "epoch": 0.2552955958580623, "grad_norm": 1002.5296020507812, "learning_rate": 3.722336356884682e-05, "loss": 79.9188, "step": 63190 }, { "epoch": 0.2553359971234299, "grad_norm": 868.291748046875, "learning_rate": 3.722194390333653e-05, "loss": 82.2959, "step": 63200 }, { "epoch": 0.25537639838879755, "grad_norm": 528.7097778320312, "learning_rate": 3.722052390207557e-05, "loss": 69.8529, "step": 63210 }, { "epoch": 0.2554167996541652, "grad_norm": 429.8751525878906, "learning_rate": 3.721910356509163e-05, "loss": 99.49, "step": 63220 }, { "epoch": 0.2554572009195328, "grad_norm": 500.3701477050781, "learning_rate": 3.7217682892412404e-05, "loss": 66.9053, "step": 63230 }, { "epoch": 0.2554976021849004, "grad_norm": 415.3155212402344, "learning_rate": 3.7216261884065585e-05, "loss": 75.5311, "step": 63240 }, { "epoch": 0.25553800345026806, "grad_norm": 934.2700805664062, "learning_rate": 3.721484054007888e-05, "loss": 74.0346, "step": 63250 }, { "epoch": 0.2555784047156357, "grad_norm": 1153.2728271484375, "learning_rate": 3.721341886047999e-05, "loss": 78.737, "step": 63260 }, { "epoch": 0.25561880598100334, "grad_norm": 1125.8875732421875, "learning_rate": 3.7211996845296636e-05, "loss": 74.579, "step": 63270 }, { "epoch": 0.255659207246371, "grad_norm": 561.3126220703125, "learning_rate": 3.7210574494556546e-05, "loss": 111.5302, "step": 63280 }, { "epoch": 0.25569960851173856, "grad_norm": 967.9246215820312, "learning_rate": 3.7209151808287447e-05, "loss": 70.2826, "step": 63290 }, { "epoch": 0.2557400097771062, "grad_norm": 702.910400390625, "learning_rate": 3.720772878651707e-05, "loss": 87.1197, "step": 63300 }, { "epoch": 0.25578041104247384, "grad_norm": 1672.2962646484375, "learning_rate": 3.7206305429273164e-05, "loss": 77.6456, "step": 63310 }, { "epoch": 0.2558208123078415, "grad_norm": 980.3726806640625, "learning_rate": 3.720488173658347e-05, "loss": 80.2127, "step": 63320 }, { "epoch": 0.2558612135732091, "grad_norm": 1012.27880859375, "learning_rate": 3.720345770847575e-05, "loss": 60.8711, "step": 63330 }, { "epoch": 0.25590161483857676, "grad_norm": 1901.3438720703125, "learning_rate": 3.7202033344977774e-05, "loss": 74.989, "step": 63340 }, { "epoch": 0.2559420161039444, "grad_norm": 1224.521484375, "learning_rate": 3.72006086461173e-05, "loss": 45.6287, "step": 63350 }, { "epoch": 0.255982417369312, "grad_norm": 586.5023193359375, "learning_rate": 3.7199183611922095e-05, "loss": 83.5081, "step": 63360 }, { "epoch": 0.2560228186346796, "grad_norm": 1021.5157470703125, "learning_rate": 3.719775824241996e-05, "loss": 55.7175, "step": 63370 }, { "epoch": 0.25606321990004727, "grad_norm": 1121.025146484375, "learning_rate": 3.719633253763867e-05, "loss": 92.7057, "step": 63380 }, { "epoch": 0.2561036211654149, "grad_norm": 721.4158935546875, "learning_rate": 3.719490649760603e-05, "loss": 77.5612, "step": 63390 }, { "epoch": 0.25614402243078255, "grad_norm": 0.0, "learning_rate": 3.7193480122349824e-05, "loss": 66.09, "step": 63400 }, { "epoch": 0.2561844236961502, "grad_norm": 998.5385131835938, "learning_rate": 3.7192053411897876e-05, "loss": 77.9643, "step": 63410 }, { "epoch": 0.25622482496151777, "grad_norm": 1532.927001953125, "learning_rate": 3.7190626366278e-05, "loss": 105.0879, "step": 63420 }, { "epoch": 0.2562652262268854, "grad_norm": 1900.2274169921875, "learning_rate": 3.718919898551801e-05, "loss": 70.104, "step": 63430 }, { "epoch": 0.25630562749225305, "grad_norm": 675.3201293945312, "learning_rate": 3.718777126964573e-05, "loss": 80.4306, "step": 63440 }, { "epoch": 0.2563460287576207, "grad_norm": 599.4599609375, "learning_rate": 3.718634321868901e-05, "loss": 77.3058, "step": 63450 }, { "epoch": 0.25638643002298833, "grad_norm": 2024.2564697265625, "learning_rate": 3.718491483267568e-05, "loss": 126.2976, "step": 63460 }, { "epoch": 0.25642683128835597, "grad_norm": 451.1012268066406, "learning_rate": 3.7183486111633585e-05, "loss": 80.9007, "step": 63470 }, { "epoch": 0.25646723255372356, "grad_norm": 560.759765625, "learning_rate": 3.7182057055590576e-05, "loss": 107.118, "step": 63480 }, { "epoch": 0.2565076338190912, "grad_norm": 898.7105102539062, "learning_rate": 3.7180627664574524e-05, "loss": 90.2963, "step": 63490 }, { "epoch": 0.25654803508445884, "grad_norm": 579.5004272460938, "learning_rate": 3.717919793861329e-05, "loss": 63.7845, "step": 63500 }, { "epoch": 0.2565884363498265, "grad_norm": 809.5593872070312, "learning_rate": 3.717776787773475e-05, "loss": 76.0838, "step": 63510 }, { "epoch": 0.2566288376151941, "grad_norm": 491.0308532714844, "learning_rate": 3.717633748196678e-05, "loss": 71.3151, "step": 63520 }, { "epoch": 0.25666923888056176, "grad_norm": 513.8741455078125, "learning_rate": 3.717490675133727e-05, "loss": 82.5305, "step": 63530 }, { "epoch": 0.2567096401459294, "grad_norm": 682.1354370117188, "learning_rate": 3.7173475685874105e-05, "loss": 58.0706, "step": 63540 }, { "epoch": 0.256750041411297, "grad_norm": 783.206298828125, "learning_rate": 3.7172044285605195e-05, "loss": 53.3748, "step": 63550 }, { "epoch": 0.2567904426766646, "grad_norm": 755.7714233398438, "learning_rate": 3.717061255055844e-05, "loss": 76.3537, "step": 63560 }, { "epoch": 0.25683084394203226, "grad_norm": 1231.5157470703125, "learning_rate": 3.7169180480761754e-05, "loss": 78.1751, "step": 63570 }, { "epoch": 0.2568712452073999, "grad_norm": 1083.4071044921875, "learning_rate": 3.7167748076243046e-05, "loss": 70.2778, "step": 63580 }, { "epoch": 0.25691164647276754, "grad_norm": 594.28564453125, "learning_rate": 3.716631533703026e-05, "loss": 64.8809, "step": 63590 }, { "epoch": 0.2569520477381352, "grad_norm": 930.3058471679688, "learning_rate": 3.7164882263151315e-05, "loss": 79.9638, "step": 63600 }, { "epoch": 0.25699244900350277, "grad_norm": 496.72998046875, "learning_rate": 3.716344885463416e-05, "loss": 80.5776, "step": 63610 }, { "epoch": 0.2570328502688704, "grad_norm": 641.936279296875, "learning_rate": 3.716201511150673e-05, "loss": 66.4667, "step": 63620 }, { "epoch": 0.25707325153423805, "grad_norm": 541.667236328125, "learning_rate": 3.716058103379697e-05, "loss": 72.8316, "step": 63630 }, { "epoch": 0.2571136527996057, "grad_norm": 522.942138671875, "learning_rate": 3.715914662153286e-05, "loss": 81.5492, "step": 63640 }, { "epoch": 0.2571540540649733, "grad_norm": 813.4447631835938, "learning_rate": 3.715771187474235e-05, "loss": 64.2315, "step": 63650 }, { "epoch": 0.25719445533034097, "grad_norm": 543.0736083984375, "learning_rate": 3.7156276793453414e-05, "loss": 56.0146, "step": 63660 }, { "epoch": 0.2572348565957086, "grad_norm": 622.9854125976562, "learning_rate": 3.715484137769402e-05, "loss": 72.0535, "step": 63670 }, { "epoch": 0.2572752578610762, "grad_norm": 382.42767333984375, "learning_rate": 3.7153405627492175e-05, "loss": 78.4168, "step": 63680 }, { "epoch": 0.25731565912644383, "grad_norm": 746.149658203125, "learning_rate": 3.715196954287585e-05, "loss": 47.0814, "step": 63690 }, { "epoch": 0.25735606039181147, "grad_norm": 828.6617431640625, "learning_rate": 3.715053312387305e-05, "loss": 83.7009, "step": 63700 }, { "epoch": 0.2573964616571791, "grad_norm": 1059.0740966796875, "learning_rate": 3.7149096370511776e-05, "loss": 81.3558, "step": 63710 }, { "epoch": 0.25743686292254675, "grad_norm": 2411.71142578125, "learning_rate": 3.714765928282004e-05, "loss": 96.2945, "step": 63720 }, { "epoch": 0.2574772641879144, "grad_norm": 867.219482421875, "learning_rate": 3.714622186082585e-05, "loss": 82.7037, "step": 63730 }, { "epoch": 0.257517665453282, "grad_norm": 865.6116333007812, "learning_rate": 3.7144784104557246e-05, "loss": 77.5183, "step": 63740 }, { "epoch": 0.2575580667186496, "grad_norm": 736.4535522460938, "learning_rate": 3.714334601404225e-05, "loss": 58.0193, "step": 63750 }, { "epoch": 0.25759846798401725, "grad_norm": 792.9779052734375, "learning_rate": 3.714190758930889e-05, "loss": 79.0416, "step": 63760 }, { "epoch": 0.2576388692493849, "grad_norm": 539.277099609375, "learning_rate": 3.7140468830385227e-05, "loss": 64.7736, "step": 63770 }, { "epoch": 0.25767927051475253, "grad_norm": 1489.8095703125, "learning_rate": 3.713902973729929e-05, "loss": 93.2199, "step": 63780 }, { "epoch": 0.2577196717801202, "grad_norm": 295.87420654296875, "learning_rate": 3.713759031007915e-05, "loss": 85.7172, "step": 63790 }, { "epoch": 0.25776007304548776, "grad_norm": 1169.1015625, "learning_rate": 3.713615054875286e-05, "loss": 70.2371, "step": 63800 }, { "epoch": 0.2578004743108554, "grad_norm": 1383.2265625, "learning_rate": 3.7134710453348496e-05, "loss": 74.2921, "step": 63810 }, { "epoch": 0.25784087557622304, "grad_norm": 633.1953735351562, "learning_rate": 3.713327002389413e-05, "loss": 53.2052, "step": 63820 }, { "epoch": 0.2578812768415907, "grad_norm": 1091.2930908203125, "learning_rate": 3.713182926041785e-05, "loss": 97.7233, "step": 63830 }, { "epoch": 0.2579216781069583, "grad_norm": 835.3092651367188, "learning_rate": 3.7130388162947726e-05, "loss": 68.7806, "step": 63840 }, { "epoch": 0.25796207937232596, "grad_norm": 811.8614501953125, "learning_rate": 3.712894673151187e-05, "loss": 76.9277, "step": 63850 }, { "epoch": 0.2580024806376936, "grad_norm": 873.2691040039062, "learning_rate": 3.712750496613838e-05, "loss": 56.6611, "step": 63860 }, { "epoch": 0.2580428819030612, "grad_norm": 534.2119140625, "learning_rate": 3.712606286685536e-05, "loss": 91.2972, "step": 63870 }, { "epoch": 0.2580832831684288, "grad_norm": 2092.973876953125, "learning_rate": 3.712462043369093e-05, "loss": 65.7608, "step": 63880 }, { "epoch": 0.25812368443379646, "grad_norm": 1082.7713623046875, "learning_rate": 3.712317766667321e-05, "loss": 61.7201, "step": 63890 }, { "epoch": 0.2581640856991641, "grad_norm": 540.9574584960938, "learning_rate": 3.712173456583033e-05, "loss": 43.2988, "step": 63900 }, { "epoch": 0.25820448696453174, "grad_norm": 719.4511108398438, "learning_rate": 3.712029113119041e-05, "loss": 43.7259, "step": 63910 }, { "epoch": 0.2582448882298994, "grad_norm": 837.5140380859375, "learning_rate": 3.7118847362781605e-05, "loss": 116.4185, "step": 63920 }, { "epoch": 0.25828528949526697, "grad_norm": 1147.0135498046875, "learning_rate": 3.711740326063206e-05, "loss": 68.8039, "step": 63930 }, { "epoch": 0.2583256907606346, "grad_norm": 322.7746887207031, "learning_rate": 3.711595882476992e-05, "loss": 56.783, "step": 63940 }, { "epoch": 0.25836609202600225, "grad_norm": 561.376953125, "learning_rate": 3.711451405522335e-05, "loss": 56.7755, "step": 63950 }, { "epoch": 0.2584064932913699, "grad_norm": 1156.663818359375, "learning_rate": 3.711306895202052e-05, "loss": 94.3347, "step": 63960 }, { "epoch": 0.2584468945567375, "grad_norm": 511.6494445800781, "learning_rate": 3.7111623515189605e-05, "loss": 53.324, "step": 63970 }, { "epoch": 0.25848729582210517, "grad_norm": 855.0453491210938, "learning_rate": 3.7110177744758774e-05, "loss": 77.657, "step": 63980 }, { "epoch": 0.25852769708747275, "grad_norm": 664.17578125, "learning_rate": 3.7108731640756224e-05, "loss": 57.8758, "step": 63990 }, { "epoch": 0.2585680983528404, "grad_norm": 785.47607421875, "learning_rate": 3.710728520321014e-05, "loss": 56.7535, "step": 64000 }, { "epoch": 0.25860849961820803, "grad_norm": 868.9013061523438, "learning_rate": 3.710583843214872e-05, "loss": 72.5036, "step": 64010 }, { "epoch": 0.25864890088357567, "grad_norm": 1320.7911376953125, "learning_rate": 3.7104391327600176e-05, "loss": 77.4738, "step": 64020 }, { "epoch": 0.2586893021489433, "grad_norm": 2751.525390625, "learning_rate": 3.710294388959272e-05, "loss": 64.6033, "step": 64030 }, { "epoch": 0.25872970341431095, "grad_norm": 518.192138671875, "learning_rate": 3.7101496118154557e-05, "loss": 63.9527, "step": 64040 }, { "epoch": 0.2587701046796786, "grad_norm": 556.1539306640625, "learning_rate": 3.7100048013313933e-05, "loss": 69.788, "step": 64050 }, { "epoch": 0.2588105059450462, "grad_norm": 910.7564086914062, "learning_rate": 3.709859957509907e-05, "loss": 83.9675, "step": 64060 }, { "epoch": 0.2588509072104138, "grad_norm": 799.0169677734375, "learning_rate": 3.70971508035382e-05, "loss": 81.879, "step": 64070 }, { "epoch": 0.25889130847578146, "grad_norm": 344.5417785644531, "learning_rate": 3.7095701698659576e-05, "loss": 62.578, "step": 64080 }, { "epoch": 0.2589317097411491, "grad_norm": 988.5333251953125, "learning_rate": 3.709425226049145e-05, "loss": 70.7378, "step": 64090 }, { "epoch": 0.25897211100651674, "grad_norm": 1221.0657958984375, "learning_rate": 3.709280248906206e-05, "loss": 99.6785, "step": 64100 }, { "epoch": 0.2590125122718844, "grad_norm": 850.9677124023438, "learning_rate": 3.70913523843997e-05, "loss": 103.1617, "step": 64110 }, { "epoch": 0.25905291353725196, "grad_norm": 689.6639404296875, "learning_rate": 3.708990194653262e-05, "loss": 61.4117, "step": 64120 }, { "epoch": 0.2590933148026196, "grad_norm": 577.7689208984375, "learning_rate": 3.708845117548911e-05, "loss": 97.5373, "step": 64130 }, { "epoch": 0.25913371606798724, "grad_norm": 963.0814208984375, "learning_rate": 3.708700007129744e-05, "loss": 74.6624, "step": 64140 }, { "epoch": 0.2591741173333549, "grad_norm": 586.4309692382812, "learning_rate": 3.7085548633985906e-05, "loss": 66.786, "step": 64150 }, { "epoch": 0.2592145185987225, "grad_norm": 1598.16455078125, "learning_rate": 3.708409686358281e-05, "loss": 58.5813, "step": 64160 }, { "epoch": 0.25925491986409016, "grad_norm": 395.3716735839844, "learning_rate": 3.708264476011645e-05, "loss": 65.7176, "step": 64170 }, { "epoch": 0.2592953211294578, "grad_norm": 951.0369873046875, "learning_rate": 3.708119232361513e-05, "loss": 83.8157, "step": 64180 }, { "epoch": 0.2593357223948254, "grad_norm": 868.982421875, "learning_rate": 3.707973955410717e-05, "loss": 110.5201, "step": 64190 }, { "epoch": 0.259376123660193, "grad_norm": 1175.660400390625, "learning_rate": 3.707828645162091e-05, "loss": 59.3471, "step": 64200 }, { "epoch": 0.25941652492556067, "grad_norm": 587.8705444335938, "learning_rate": 3.7076833016184646e-05, "loss": 66.6157, "step": 64210 }, { "epoch": 0.2594569261909283, "grad_norm": 882.6228637695312, "learning_rate": 3.707537924782673e-05, "loss": 49.927, "step": 64220 }, { "epoch": 0.25949732745629595, "grad_norm": 404.48907470703125, "learning_rate": 3.7073925146575514e-05, "loss": 100.524, "step": 64230 }, { "epoch": 0.2595377287216636, "grad_norm": 3163.03076171875, "learning_rate": 3.707247071245933e-05, "loss": 71.4904, "step": 64240 }, { "epoch": 0.25957812998703117, "grad_norm": 982.10400390625, "learning_rate": 3.707101594550655e-05, "loss": 77.9145, "step": 64250 }, { "epoch": 0.2596185312523988, "grad_norm": 720.5392456054688, "learning_rate": 3.706956084574552e-05, "loss": 75.0282, "step": 64260 }, { "epoch": 0.25965893251776645, "grad_norm": 754.7142333984375, "learning_rate": 3.706810541320462e-05, "loss": 53.8455, "step": 64270 }, { "epoch": 0.2596993337831341, "grad_norm": 1012.333984375, "learning_rate": 3.706664964791221e-05, "loss": 75.8129, "step": 64280 }, { "epoch": 0.25973973504850173, "grad_norm": 1253.0430908203125, "learning_rate": 3.7065193549896676e-05, "loss": 87.2013, "step": 64290 }, { "epoch": 0.25978013631386937, "grad_norm": 384.3373718261719, "learning_rate": 3.706373711918641e-05, "loss": 53.5726, "step": 64300 }, { "epoch": 0.25982053757923695, "grad_norm": 0.0, "learning_rate": 3.706228035580981e-05, "loss": 53.8202, "step": 64310 }, { "epoch": 0.2598609388446046, "grad_norm": 452.86566162109375, "learning_rate": 3.7060823259795256e-05, "loss": 88.486, "step": 64320 }, { "epoch": 0.25990134010997223, "grad_norm": 1422.01025390625, "learning_rate": 3.705936583117118e-05, "loss": 94.5883, "step": 64330 }, { "epoch": 0.2599417413753399, "grad_norm": 410.7811584472656, "learning_rate": 3.7057908069965984e-05, "loss": 73.8817, "step": 64340 }, { "epoch": 0.2599821426407075, "grad_norm": 943.582763671875, "learning_rate": 3.705644997620809e-05, "loss": 65.7515, "step": 64350 }, { "epoch": 0.26002254390607515, "grad_norm": 836.2688598632812, "learning_rate": 3.705499154992592e-05, "loss": 93.7108, "step": 64360 }, { "epoch": 0.2600629451714428, "grad_norm": 915.242919921875, "learning_rate": 3.705353279114791e-05, "loss": 76.5149, "step": 64370 }, { "epoch": 0.2601033464368104, "grad_norm": 964.8082885742188, "learning_rate": 3.7052073699902494e-05, "loss": 69.2267, "step": 64380 }, { "epoch": 0.260143747702178, "grad_norm": 715.5172119140625, "learning_rate": 3.705061427621813e-05, "loss": 64.467, "step": 64390 }, { "epoch": 0.26018414896754566, "grad_norm": 868.9583129882812, "learning_rate": 3.7049154520123256e-05, "loss": 77.0933, "step": 64400 }, { "epoch": 0.2602245502329133, "grad_norm": 485.1432800292969, "learning_rate": 3.7047694431646334e-05, "loss": 68.6032, "step": 64410 }, { "epoch": 0.26026495149828094, "grad_norm": 731.9483642578125, "learning_rate": 3.704623401081584e-05, "loss": 44.2681, "step": 64420 }, { "epoch": 0.2603053527636486, "grad_norm": 504.07818603515625, "learning_rate": 3.7044773257660234e-05, "loss": 59.9306, "step": 64430 }, { "epoch": 0.26034575402901616, "grad_norm": 872.7208862304688, "learning_rate": 3.7043312172207996e-05, "loss": 93.4973, "step": 64440 }, { "epoch": 0.2603861552943838, "grad_norm": 1535.181640625, "learning_rate": 3.7041850754487623e-05, "loss": 58.0155, "step": 64450 }, { "epoch": 0.26042655655975144, "grad_norm": 554.6906127929688, "learning_rate": 3.704038900452758e-05, "loss": 68.4291, "step": 64460 }, { "epoch": 0.2604669578251191, "grad_norm": 561.7522583007812, "learning_rate": 3.7038926922356395e-05, "loss": 70.7031, "step": 64470 }, { "epoch": 0.2605073590904867, "grad_norm": 1062.995361328125, "learning_rate": 3.703746450800255e-05, "loss": 55.4427, "step": 64480 }, { "epoch": 0.26054776035585436, "grad_norm": 796.6412963867188, "learning_rate": 3.703600176149457e-05, "loss": 93.2851, "step": 64490 }, { "epoch": 0.260588161621222, "grad_norm": 961.7485961914062, "learning_rate": 3.703453868286096e-05, "loss": 60.0274, "step": 64500 }, { "epoch": 0.2606285628865896, "grad_norm": 638.6123657226562, "learning_rate": 3.703307527213024e-05, "loss": 85.6026, "step": 64510 }, { "epoch": 0.2606689641519572, "grad_norm": 1373.725830078125, "learning_rate": 3.7031611529330956e-05, "loss": 82.0446, "step": 64520 }, { "epoch": 0.26070936541732487, "grad_norm": 1274.551513671875, "learning_rate": 3.703014745449164e-05, "loss": 91.4852, "step": 64530 }, { "epoch": 0.2607497666826925, "grad_norm": 805.552001953125, "learning_rate": 3.702868304764083e-05, "loss": 67.7787, "step": 64540 }, { "epoch": 0.26079016794806015, "grad_norm": 1122.33837890625, "learning_rate": 3.702721830880707e-05, "loss": 140.0442, "step": 64550 }, { "epoch": 0.2608305692134278, "grad_norm": 1279.6610107421875, "learning_rate": 3.702575323801893e-05, "loss": 102.1003, "step": 64560 }, { "epoch": 0.26087097047879537, "grad_norm": 540.7999267578125, "learning_rate": 3.702428783530497e-05, "loss": 65.1975, "step": 64570 }, { "epoch": 0.260911371744163, "grad_norm": 576.8917846679688, "learning_rate": 3.7022822100693746e-05, "loss": 82.8855, "step": 64580 }, { "epoch": 0.26095177300953065, "grad_norm": 469.16571044921875, "learning_rate": 3.702135603421385e-05, "loss": 67.3132, "step": 64590 }, { "epoch": 0.2609921742748983, "grad_norm": 452.0544738769531, "learning_rate": 3.701988963589384e-05, "loss": 104.5678, "step": 64600 }, { "epoch": 0.26103257554026593, "grad_norm": 476.0960998535156, "learning_rate": 3.701842290576233e-05, "loss": 82.0795, "step": 64610 }, { "epoch": 0.26107297680563357, "grad_norm": 1001.22314453125, "learning_rate": 3.70169558438479e-05, "loss": 97.6005, "step": 64620 }, { "epoch": 0.26111337807100116, "grad_norm": 1013.3632202148438, "learning_rate": 3.701548845017915e-05, "loss": 77.4689, "step": 64630 }, { "epoch": 0.2611537793363688, "grad_norm": 640.3134765625, "learning_rate": 3.7014020724784703e-05, "loss": 103.3352, "step": 64640 }, { "epoch": 0.26119418060173644, "grad_norm": 870.0180053710938, "learning_rate": 3.701255266769316e-05, "loss": 63.6067, "step": 64650 }, { "epoch": 0.2612345818671041, "grad_norm": 224.58753967285156, "learning_rate": 3.701108427893314e-05, "loss": 50.96, "step": 64660 }, { "epoch": 0.2612749831324717, "grad_norm": 1057.7576904296875, "learning_rate": 3.7009615558533275e-05, "loss": 49.2088, "step": 64670 }, { "epoch": 0.26131538439783936, "grad_norm": 899.7709350585938, "learning_rate": 3.70081465065222e-05, "loss": 88.2398, "step": 64680 }, { "epoch": 0.261355785663207, "grad_norm": 400.64068603515625, "learning_rate": 3.7006677122928546e-05, "loss": 86.5793, "step": 64690 }, { "epoch": 0.2613961869285746, "grad_norm": 534.3073120117188, "learning_rate": 3.7005207407780975e-05, "loss": 86.3469, "step": 64700 }, { "epoch": 0.2614365881939422, "grad_norm": 836.5368041992188, "learning_rate": 3.7003737361108124e-05, "loss": 67.9904, "step": 64710 }, { "epoch": 0.26147698945930986, "grad_norm": 614.4078369140625, "learning_rate": 3.700226698293866e-05, "loss": 38.8319, "step": 64720 }, { "epoch": 0.2615173907246775, "grad_norm": 999.4403686523438, "learning_rate": 3.7000796273301254e-05, "loss": 76.9349, "step": 64730 }, { "epoch": 0.26155779199004514, "grad_norm": 1237.0614013671875, "learning_rate": 3.6999325232224564e-05, "loss": 61.3102, "step": 64740 }, { "epoch": 0.2615981932554128, "grad_norm": 1526.6424560546875, "learning_rate": 3.699785385973728e-05, "loss": 75.9351, "step": 64750 }, { "epoch": 0.26163859452078037, "grad_norm": 1123.9716796875, "learning_rate": 3.6996382155868084e-05, "loss": 53.3472, "step": 64760 }, { "epoch": 0.261678995786148, "grad_norm": 681.2954711914062, "learning_rate": 3.699491012064567e-05, "loss": 80.2444, "step": 64770 }, { "epoch": 0.26171939705151565, "grad_norm": 490.217529296875, "learning_rate": 3.6993437754098734e-05, "loss": 74.4671, "step": 64780 }, { "epoch": 0.2617597983168833, "grad_norm": 731.5593872070312, "learning_rate": 3.6991965056255976e-05, "loss": 76.7609, "step": 64790 }, { "epoch": 0.2618001995822509, "grad_norm": 443.29791259765625, "learning_rate": 3.6990492027146114e-05, "loss": 53.8047, "step": 64800 }, { "epoch": 0.26184060084761857, "grad_norm": 1002.4287109375, "learning_rate": 3.698901866679786e-05, "loss": 75.2404, "step": 64810 }, { "epoch": 0.2618810021129862, "grad_norm": 0.0, "learning_rate": 3.698754497523994e-05, "loss": 73.0152, "step": 64820 }, { "epoch": 0.2619214033783538, "grad_norm": 466.6201171875, "learning_rate": 3.6986070952501085e-05, "loss": 58.1316, "step": 64830 }, { "epoch": 0.26196180464372143, "grad_norm": 515.5542602539062, "learning_rate": 3.698459659861003e-05, "loss": 84.0784, "step": 64840 }, { "epoch": 0.26200220590908907, "grad_norm": 540.0292358398438, "learning_rate": 3.698312191359553e-05, "loss": 76.5521, "step": 64850 }, { "epoch": 0.2620426071744567, "grad_norm": 611.6865844726562, "learning_rate": 3.698164689748631e-05, "loss": 57.7677, "step": 64860 }, { "epoch": 0.26208300843982435, "grad_norm": 545.26416015625, "learning_rate": 3.6980171550311156e-05, "loss": 58.3426, "step": 64870 }, { "epoch": 0.262123409705192, "grad_norm": 954.208984375, "learning_rate": 3.6978695872098806e-05, "loss": 115.1971, "step": 64880 }, { "epoch": 0.2621638109705596, "grad_norm": 882.8229370117188, "learning_rate": 3.697721986287804e-05, "loss": 54.8908, "step": 64890 }, { "epoch": 0.2622042122359272, "grad_norm": 857.4375610351562, "learning_rate": 3.697574352267764e-05, "loss": 78.1034, "step": 64900 }, { "epoch": 0.26224461350129485, "grad_norm": 549.2964477539062, "learning_rate": 3.697426685152637e-05, "loss": 57.651, "step": 64910 }, { "epoch": 0.2622850147666625, "grad_norm": 983.4130249023438, "learning_rate": 3.697278984945304e-05, "loss": 68.4534, "step": 64920 }, { "epoch": 0.26232541603203013, "grad_norm": 612.8244018554688, "learning_rate": 3.6971312516486426e-05, "loss": 95.7964, "step": 64930 }, { "epoch": 0.2623658172973978, "grad_norm": 713.4525146484375, "learning_rate": 3.6969834852655334e-05, "loss": 64.7911, "step": 64940 }, { "epoch": 0.26240621856276536, "grad_norm": 3631.888671875, "learning_rate": 3.696835685798858e-05, "loss": 86.5216, "step": 64950 }, { "epoch": 0.262446619828133, "grad_norm": 1668.7647705078125, "learning_rate": 3.696687853251497e-05, "loss": 108.1082, "step": 64960 }, { "epoch": 0.26248702109350064, "grad_norm": 621.1637573242188, "learning_rate": 3.696539987626334e-05, "loss": 69.7315, "step": 64970 }, { "epoch": 0.2625274223588683, "grad_norm": 670.1963500976562, "learning_rate": 3.696392088926248e-05, "loss": 94.8227, "step": 64980 }, { "epoch": 0.2625678236242359, "grad_norm": 395.003662109375, "learning_rate": 3.696244157154128e-05, "loss": 55.8034, "step": 64990 }, { "epoch": 0.26260822488960356, "grad_norm": 789.5413208007812, "learning_rate": 3.696096192312852e-05, "loss": 90.5435, "step": 65000 }, { "epoch": 0.2626486261549712, "grad_norm": 598.6563720703125, "learning_rate": 3.695948194405309e-05, "loss": 60.1081, "step": 65010 }, { "epoch": 0.2626890274203388, "grad_norm": 290.1271057128906, "learning_rate": 3.6958001634343825e-05, "loss": 80.8175, "step": 65020 }, { "epoch": 0.2627294286857064, "grad_norm": 553.1530151367188, "learning_rate": 3.695652099402959e-05, "loss": 40.2118, "step": 65030 }, { "epoch": 0.26276982995107406, "grad_norm": 2150.581298828125, "learning_rate": 3.695504002313924e-05, "loss": 82.7208, "step": 65040 }, { "epoch": 0.2628102312164417, "grad_norm": 837.5015258789062, "learning_rate": 3.6953558721701666e-05, "loss": 78.5257, "step": 65050 }, { "epoch": 0.26285063248180934, "grad_norm": 428.619140625, "learning_rate": 3.6952077089745735e-05, "loss": 84.7003, "step": 65060 }, { "epoch": 0.262891033747177, "grad_norm": 990.3151245117188, "learning_rate": 3.695059512730033e-05, "loss": 78.2993, "step": 65070 }, { "epoch": 0.26293143501254457, "grad_norm": 502.7200927734375, "learning_rate": 3.694911283439435e-05, "loss": 88.1456, "step": 65080 }, { "epoch": 0.2629718362779122, "grad_norm": 1459.866943359375, "learning_rate": 3.6947630211056684e-05, "loss": 58.0296, "step": 65090 }, { "epoch": 0.26301223754327985, "grad_norm": 583.2791748046875, "learning_rate": 3.6946147257316247e-05, "loss": 63.4767, "step": 65100 }, { "epoch": 0.2630526388086475, "grad_norm": 572.3054809570312, "learning_rate": 3.6944663973201945e-05, "loss": 50.8657, "step": 65110 }, { "epoch": 0.2630930400740151, "grad_norm": 576.8928833007812, "learning_rate": 3.694318035874269e-05, "loss": 58.8146, "step": 65120 }, { "epoch": 0.26313344133938277, "grad_norm": 699.8406982421875, "learning_rate": 3.694169641396741e-05, "loss": 65.0462, "step": 65130 }, { "epoch": 0.2631738426047504, "grad_norm": 545.7052001953125, "learning_rate": 3.6940212138905043e-05, "loss": 64.3889, "step": 65140 }, { "epoch": 0.263214243870118, "grad_norm": 966.1185302734375, "learning_rate": 3.6938727533584515e-05, "loss": 56.8199, "step": 65150 }, { "epoch": 0.26325464513548563, "grad_norm": 418.309326171875, "learning_rate": 3.6937242598034776e-05, "loss": 53.9059, "step": 65160 }, { "epoch": 0.26329504640085327, "grad_norm": 0.0, "learning_rate": 3.693575733228477e-05, "loss": 71.7491, "step": 65170 }, { "epoch": 0.2633354476662209, "grad_norm": 626.2059326171875, "learning_rate": 3.6934271736363455e-05, "loss": 43.8876, "step": 65180 }, { "epoch": 0.26337584893158855, "grad_norm": 472.5262756347656, "learning_rate": 3.69327858102998e-05, "loss": 93.9019, "step": 65190 }, { "epoch": 0.2634162501969562, "grad_norm": 1224.383544921875, "learning_rate": 3.6931299554122754e-05, "loss": 64.856, "step": 65200 }, { "epoch": 0.2634566514623238, "grad_norm": 1208.1915283203125, "learning_rate": 3.692981296786132e-05, "loss": 88.3372, "step": 65210 }, { "epoch": 0.2634970527276914, "grad_norm": 838.8875122070312, "learning_rate": 3.692832605154446e-05, "loss": 68.6252, "step": 65220 }, { "epoch": 0.26353745399305906, "grad_norm": 2221.710693359375, "learning_rate": 3.692683880520117e-05, "loss": 79.5289, "step": 65230 }, { "epoch": 0.2635778552584267, "grad_norm": 1237.591064453125, "learning_rate": 3.6925351228860445e-05, "loss": 77.699, "step": 65240 }, { "epoch": 0.26361825652379434, "grad_norm": 549.4052734375, "learning_rate": 3.6923863322551284e-05, "loss": 67.4263, "step": 65250 }, { "epoch": 0.263658657789162, "grad_norm": 614.0408935546875, "learning_rate": 3.692237508630269e-05, "loss": 55.3787, "step": 65260 }, { "epoch": 0.26369905905452956, "grad_norm": 1487.8175048828125, "learning_rate": 3.6920886520143684e-05, "loss": 56.1122, "step": 65270 }, { "epoch": 0.2637394603198972, "grad_norm": 468.63720703125, "learning_rate": 3.6919397624103284e-05, "loss": 60.9861, "step": 65280 }, { "epoch": 0.26377986158526484, "grad_norm": 1053.509033203125, "learning_rate": 3.691790839821052e-05, "loss": 47.5022, "step": 65290 }, { "epoch": 0.2638202628506325, "grad_norm": 731.3139038085938, "learning_rate": 3.6916418842494416e-05, "loss": 56.3728, "step": 65300 }, { "epoch": 0.2638606641160001, "grad_norm": 652.5587768554688, "learning_rate": 3.691492895698402e-05, "loss": 57.3569, "step": 65310 }, { "epoch": 0.26390106538136776, "grad_norm": 672.2579345703125, "learning_rate": 3.691343874170838e-05, "loss": 87.096, "step": 65320 }, { "epoch": 0.2639414666467354, "grad_norm": 789.9293212890625, "learning_rate": 3.6911948196696533e-05, "loss": 58.2702, "step": 65330 }, { "epoch": 0.263981867912103, "grad_norm": 682.2620239257812, "learning_rate": 3.691045732197756e-05, "loss": 77.8977, "step": 65340 }, { "epoch": 0.2640222691774706, "grad_norm": 985.931884765625, "learning_rate": 3.690896611758051e-05, "loss": 65.8317, "step": 65350 }, { "epoch": 0.26406267044283827, "grad_norm": 732.6767578125, "learning_rate": 3.690747458353446e-05, "loss": 88.3303, "step": 65360 }, { "epoch": 0.2641030717082059, "grad_norm": 416.494140625, "learning_rate": 3.6905982719868493e-05, "loss": 82.3161, "step": 65370 }, { "epoch": 0.26414347297357355, "grad_norm": 656.120361328125, "learning_rate": 3.6904490526611684e-05, "loss": 89.1459, "step": 65380 }, { "epoch": 0.2641838742389412, "grad_norm": 381.7622985839844, "learning_rate": 3.690299800379313e-05, "loss": 47.3332, "step": 65390 }, { "epoch": 0.26422427550430877, "grad_norm": 578.4981079101562, "learning_rate": 3.6901505151441935e-05, "loss": 96.7956, "step": 65400 }, { "epoch": 0.2642646767696764, "grad_norm": 952.101318359375, "learning_rate": 3.690001196958719e-05, "loss": 92.0501, "step": 65410 }, { "epoch": 0.26430507803504405, "grad_norm": 681.7398071289062, "learning_rate": 3.6898518458258006e-05, "loss": 47.1945, "step": 65420 }, { "epoch": 0.2643454793004117, "grad_norm": 132.1608123779297, "learning_rate": 3.689702461748351e-05, "loss": 81.5882, "step": 65430 }, { "epoch": 0.26438588056577933, "grad_norm": 1409.0716552734375, "learning_rate": 3.689553044729282e-05, "loss": 96.7558, "step": 65440 }, { "epoch": 0.26442628183114697, "grad_norm": 580.634521484375, "learning_rate": 3.689403594771506e-05, "loss": 65.5121, "step": 65450 }, { "epoch": 0.2644666830965146, "grad_norm": 1463.0916748046875, "learning_rate": 3.689254111877938e-05, "loss": 69.6791, "step": 65460 }, { "epoch": 0.2645070843618822, "grad_norm": 1450.8865966796875, "learning_rate": 3.6891045960514904e-05, "loss": 67.0916, "step": 65470 }, { "epoch": 0.26454748562724983, "grad_norm": 592.7374267578125, "learning_rate": 3.6889550472950804e-05, "loss": 79.5903, "step": 65480 }, { "epoch": 0.2645878868926175, "grad_norm": 473.9434509277344, "learning_rate": 3.688805465611621e-05, "loss": 62.0096, "step": 65490 }, { "epoch": 0.2646282881579851, "grad_norm": 664.6771240234375, "learning_rate": 3.6886558510040305e-05, "loss": 47.8189, "step": 65500 }, { "epoch": 0.26466868942335275, "grad_norm": 791.1522216796875, "learning_rate": 3.6885062034752244e-05, "loss": 67.1695, "step": 65510 }, { "epoch": 0.2647090906887204, "grad_norm": 638.1292114257812, "learning_rate": 3.6883565230281205e-05, "loss": 51.381, "step": 65520 }, { "epoch": 0.264749491954088, "grad_norm": 521.1875, "learning_rate": 3.688206809665637e-05, "loss": 51.432, "step": 65530 }, { "epoch": 0.2647898932194556, "grad_norm": 658.73095703125, "learning_rate": 3.688057063390693e-05, "loss": 56.7711, "step": 65540 }, { "epoch": 0.26483029448482326, "grad_norm": 562.1188354492188, "learning_rate": 3.687907284206207e-05, "loss": 54.7807, "step": 65550 }, { "epoch": 0.2648706957501909, "grad_norm": 474.15313720703125, "learning_rate": 3.6877574721151e-05, "loss": 86.3629, "step": 65560 }, { "epoch": 0.26491109701555854, "grad_norm": 870.3119506835938, "learning_rate": 3.687607627120291e-05, "loss": 76.91, "step": 65570 }, { "epoch": 0.2649514982809262, "grad_norm": 495.1028747558594, "learning_rate": 3.6874577492247036e-05, "loss": 103.5563, "step": 65580 }, { "epoch": 0.26499189954629376, "grad_norm": 775.1270751953125, "learning_rate": 3.687307838431258e-05, "loss": 63.2456, "step": 65590 }, { "epoch": 0.2650323008116614, "grad_norm": 761.67333984375, "learning_rate": 3.687157894742878e-05, "loss": 92.0111, "step": 65600 }, { "epoch": 0.26507270207702904, "grad_norm": 579.744873046875, "learning_rate": 3.687007918162486e-05, "loss": 52.8847, "step": 65610 }, { "epoch": 0.2651131033423967, "grad_norm": 929.838134765625, "learning_rate": 3.686857908693006e-05, "loss": 52.4094, "step": 65620 }, { "epoch": 0.2651535046077643, "grad_norm": 833.6460571289062, "learning_rate": 3.6867078663373624e-05, "loss": 86.709, "step": 65630 }, { "epoch": 0.26519390587313196, "grad_norm": 485.7236633300781, "learning_rate": 3.686557791098481e-05, "loss": 71.4412, "step": 65640 }, { "epoch": 0.2652343071384996, "grad_norm": 1216.905029296875, "learning_rate": 3.6864076829792865e-05, "loss": 87.901, "step": 65650 }, { "epoch": 0.2652747084038672, "grad_norm": 923.685546875, "learning_rate": 3.686257541982706e-05, "loss": 85.9243, "step": 65660 }, { "epoch": 0.2653151096692348, "grad_norm": 951.8165283203125, "learning_rate": 3.6861073681116674e-05, "loss": 81.3739, "step": 65670 }, { "epoch": 0.26535551093460247, "grad_norm": 342.7435607910156, "learning_rate": 3.685957161369098e-05, "loss": 68.2152, "step": 65680 }, { "epoch": 0.2653959121999701, "grad_norm": 2444.807373046875, "learning_rate": 3.685806921757925e-05, "loss": 86.507, "step": 65690 }, { "epoch": 0.26543631346533775, "grad_norm": 1393.3995361328125, "learning_rate": 3.685656649281078e-05, "loss": 67.6843, "step": 65700 }, { "epoch": 0.2654767147307054, "grad_norm": 1030.308837890625, "learning_rate": 3.6855063439414866e-05, "loss": 49.6981, "step": 65710 }, { "epoch": 0.26551711599607297, "grad_norm": 722.55810546875, "learning_rate": 3.685356005742082e-05, "loss": 86.8199, "step": 65720 }, { "epoch": 0.2655575172614406, "grad_norm": 2299.416259765625, "learning_rate": 3.685205634685794e-05, "loss": 115.0299, "step": 65730 }, { "epoch": 0.26559791852680825, "grad_norm": 761.451904296875, "learning_rate": 3.685055230775554e-05, "loss": 57.6311, "step": 65740 }, { "epoch": 0.2656383197921759, "grad_norm": 679.90625, "learning_rate": 3.684904794014296e-05, "loss": 71.5767, "step": 65750 }, { "epoch": 0.26567872105754353, "grad_norm": 1391.4635009765625, "learning_rate": 3.6847543244049505e-05, "loss": 48.7501, "step": 65760 }, { "epoch": 0.26571912232291117, "grad_norm": 581.217041015625, "learning_rate": 3.684603821950452e-05, "loss": 80.6224, "step": 65770 }, { "epoch": 0.2657595235882788, "grad_norm": 960.2913818359375, "learning_rate": 3.6844532866537355e-05, "loss": 106.6197, "step": 65780 }, { "epoch": 0.2657999248536464, "grad_norm": 1181.9364013671875, "learning_rate": 3.684302718517734e-05, "loss": 74.9867, "step": 65790 }, { "epoch": 0.26584032611901404, "grad_norm": 480.9149475097656, "learning_rate": 3.684152117545385e-05, "loss": 91.1819, "step": 65800 }, { "epoch": 0.2658807273843817, "grad_norm": 1295.970458984375, "learning_rate": 3.684001483739623e-05, "loss": 88.7823, "step": 65810 }, { "epoch": 0.2659211286497493, "grad_norm": 447.7242431640625, "learning_rate": 3.6838508171033846e-05, "loss": 68.8692, "step": 65820 }, { "epoch": 0.26596152991511696, "grad_norm": 556.4803466796875, "learning_rate": 3.6837001176396084e-05, "loss": 69.6128, "step": 65830 }, { "epoch": 0.2660019311804846, "grad_norm": 856.51513671875, "learning_rate": 3.683549385351231e-05, "loss": 55.587, "step": 65840 }, { "epoch": 0.2660423324458522, "grad_norm": 908.120849609375, "learning_rate": 3.6833986202411914e-05, "loss": 69.7311, "step": 65850 }, { "epoch": 0.2660827337112198, "grad_norm": 640.2660522460938, "learning_rate": 3.68324782231243e-05, "loss": 70.6052, "step": 65860 }, { "epoch": 0.26612313497658746, "grad_norm": 524.6282348632812, "learning_rate": 3.683096991567885e-05, "loss": 49.6814, "step": 65870 }, { "epoch": 0.2661635362419551, "grad_norm": 1259.0841064453125, "learning_rate": 3.682946128010498e-05, "loss": 66.3145, "step": 65880 }, { "epoch": 0.26620393750732274, "grad_norm": 1364.9793701171875, "learning_rate": 3.682795231643209e-05, "loss": 84.0134, "step": 65890 }, { "epoch": 0.2662443387726904, "grad_norm": 1976.0228271484375, "learning_rate": 3.682644302468961e-05, "loss": 84.8792, "step": 65900 }, { "epoch": 0.26628474003805797, "grad_norm": 616.8114013671875, "learning_rate": 3.682493340490697e-05, "loss": 98.148, "step": 65910 }, { "epoch": 0.2663251413034256, "grad_norm": 1352.3936767578125, "learning_rate": 3.6823423457113575e-05, "loss": 70.8769, "step": 65920 }, { "epoch": 0.26636554256879325, "grad_norm": 833.4444580078125, "learning_rate": 3.682191318133889e-05, "loss": 83.6237, "step": 65930 }, { "epoch": 0.2664059438341609, "grad_norm": 276.592529296875, "learning_rate": 3.6820402577612336e-05, "loss": 72.8676, "step": 65940 }, { "epoch": 0.2664463450995285, "grad_norm": 521.146728515625, "learning_rate": 3.681889164596339e-05, "loss": 38.219, "step": 65950 }, { "epoch": 0.26648674636489617, "grad_norm": 1227.583740234375, "learning_rate": 3.6817380386421475e-05, "loss": 56.357, "step": 65960 }, { "epoch": 0.2665271476302638, "grad_norm": 928.7647094726562, "learning_rate": 3.681586879901608e-05, "loss": 72.9631, "step": 65970 }, { "epoch": 0.2665675488956314, "grad_norm": 645.942138671875, "learning_rate": 3.6814356883776665e-05, "loss": 83.9413, "step": 65980 }, { "epoch": 0.26660795016099903, "grad_norm": 1118.24365234375, "learning_rate": 3.681284464073271e-05, "loss": 78.2679, "step": 65990 }, { "epoch": 0.26664835142636667, "grad_norm": 427.3819580078125, "learning_rate": 3.6811332069913685e-05, "loss": 47.7537, "step": 66000 }, { "epoch": 0.2666887526917343, "grad_norm": 1441.3486328125, "learning_rate": 3.680981917134909e-05, "loss": 66.2791, "step": 66010 }, { "epoch": 0.26672915395710195, "grad_norm": 621.1433715820312, "learning_rate": 3.6808305945068424e-05, "loss": 113.461, "step": 66020 }, { "epoch": 0.2667695552224696, "grad_norm": 705.9483642578125, "learning_rate": 3.6806792391101166e-05, "loss": 81.4384, "step": 66030 }, { "epoch": 0.2668099564878372, "grad_norm": 2083.895751953125, "learning_rate": 3.6805278509476844e-05, "loss": 65.3315, "step": 66040 }, { "epoch": 0.2668503577532048, "grad_norm": 1001.9884033203125, "learning_rate": 3.680376430022497e-05, "loss": 50.324, "step": 66050 }, { "epoch": 0.26689075901857245, "grad_norm": 618.3958740234375, "learning_rate": 3.680224976337505e-05, "loss": 66.6406, "step": 66060 }, { "epoch": 0.2669311602839401, "grad_norm": 531.05712890625, "learning_rate": 3.680073489895663e-05, "loss": 69.8654, "step": 66070 }, { "epoch": 0.26697156154930773, "grad_norm": 780.5286254882812, "learning_rate": 3.679921970699923e-05, "loss": 63.6973, "step": 66080 }, { "epoch": 0.2670119628146754, "grad_norm": 1168.5731201171875, "learning_rate": 3.679770418753239e-05, "loss": 64.4696, "step": 66090 }, { "epoch": 0.267052364080043, "grad_norm": 461.4474182128906, "learning_rate": 3.679618834058566e-05, "loss": 63.9347, "step": 66100 }, { "epoch": 0.2670927653454106, "grad_norm": 317.3716125488281, "learning_rate": 3.6794672166188595e-05, "loss": 74.5739, "step": 66110 }, { "epoch": 0.26713316661077824, "grad_norm": 397.1957092285156, "learning_rate": 3.679315566437074e-05, "loss": 56.8725, "step": 66120 }, { "epoch": 0.2671735678761459, "grad_norm": 505.0995788574219, "learning_rate": 3.679163883516168e-05, "loss": 84.3939, "step": 66130 }, { "epoch": 0.2672139691415135, "grad_norm": 3382.34033203125, "learning_rate": 3.6790121678590975e-05, "loss": 71.4125, "step": 66140 }, { "epoch": 0.26725437040688116, "grad_norm": 391.44189453125, "learning_rate": 3.6788604194688205e-05, "loss": 55.7094, "step": 66150 }, { "epoch": 0.2672947716722488, "grad_norm": 562.6239013671875, "learning_rate": 3.6787086383482946e-05, "loss": 60.2443, "step": 66160 }, { "epoch": 0.2673351729376164, "grad_norm": 754.8357543945312, "learning_rate": 3.6785568245004796e-05, "loss": 69.7856, "step": 66170 }, { "epoch": 0.267375574202984, "grad_norm": 623.0692138671875, "learning_rate": 3.6784049779283355e-05, "loss": 56.0616, "step": 66180 }, { "epoch": 0.26741597546835166, "grad_norm": 1186.1983642578125, "learning_rate": 3.678253098634822e-05, "loss": 78.3309, "step": 66190 }, { "epoch": 0.2674563767337193, "grad_norm": 398.3062438964844, "learning_rate": 3.678101186622901e-05, "loss": 64.1084, "step": 66200 }, { "epoch": 0.26749677799908694, "grad_norm": 1136.1226806640625, "learning_rate": 3.6779492418955324e-05, "loss": 57.2094, "step": 66210 }, { "epoch": 0.2675371792644546, "grad_norm": 658.5238647460938, "learning_rate": 3.67779726445568e-05, "loss": 96.4897, "step": 66220 }, { "epoch": 0.26757758052982217, "grad_norm": 653.9174194335938, "learning_rate": 3.677645254306306e-05, "loss": 61.6924, "step": 66230 }, { "epoch": 0.2676179817951898, "grad_norm": 707.5989990234375, "learning_rate": 3.6774932114503743e-05, "loss": 71.5184, "step": 66240 }, { "epoch": 0.26765838306055745, "grad_norm": 1067.349609375, "learning_rate": 3.6773411358908486e-05, "loss": 86.1329, "step": 66250 }, { "epoch": 0.2676987843259251, "grad_norm": 1056.942138671875, "learning_rate": 3.677189027630694e-05, "loss": 57.0639, "step": 66260 }, { "epoch": 0.2677391855912927, "grad_norm": 604.6203002929688, "learning_rate": 3.6770368866728756e-05, "loss": 70.5156, "step": 66270 }, { "epoch": 0.26777958685666037, "grad_norm": 1239.1727294921875, "learning_rate": 3.6768847130203595e-05, "loss": 68.9191, "step": 66280 }, { "epoch": 0.267819988122028, "grad_norm": 1437.4583740234375, "learning_rate": 3.6767325066761136e-05, "loss": 98.3323, "step": 66290 }, { "epoch": 0.2678603893873956, "grad_norm": 696.0053100585938, "learning_rate": 3.676580267643103e-05, "loss": 123.7425, "step": 66300 }, { "epoch": 0.26790079065276323, "grad_norm": 660.8330688476562, "learning_rate": 3.676427995924297e-05, "loss": 44.3089, "step": 66310 }, { "epoch": 0.26794119191813087, "grad_norm": 0.0, "learning_rate": 3.6762756915226654e-05, "loss": 60.4803, "step": 66320 }, { "epoch": 0.2679815931834985, "grad_norm": 326.1693115234375, "learning_rate": 3.676123354441175e-05, "loss": 61.5102, "step": 66330 }, { "epoch": 0.26802199444886615, "grad_norm": 409.2984924316406, "learning_rate": 3.6759709846827977e-05, "loss": 91.4111, "step": 66340 }, { "epoch": 0.2680623957142338, "grad_norm": 866.605224609375, "learning_rate": 3.675818582250503e-05, "loss": 72.9242, "step": 66350 }, { "epoch": 0.2681027969796014, "grad_norm": 1267.880615234375, "learning_rate": 3.6756661471472626e-05, "loss": 95.2002, "step": 66360 }, { "epoch": 0.268143198244969, "grad_norm": 591.977783203125, "learning_rate": 3.675513679376047e-05, "loss": 60.2687, "step": 66370 }, { "epoch": 0.26818359951033666, "grad_norm": 1351.8251953125, "learning_rate": 3.67536117893983e-05, "loss": 62.6659, "step": 66380 }, { "epoch": 0.2682240007757043, "grad_norm": 874.5169677734375, "learning_rate": 3.675208645841584e-05, "loss": 66.4748, "step": 66390 }, { "epoch": 0.26826440204107194, "grad_norm": 4129.86474609375, "learning_rate": 3.675056080084284e-05, "loss": 109.5643, "step": 66400 }, { "epoch": 0.2683048033064396, "grad_norm": 681.3414916992188, "learning_rate": 3.674903481670902e-05, "loss": 76.4744, "step": 66410 }, { "epoch": 0.2683452045718072, "grad_norm": 989.4423828125, "learning_rate": 3.6747508506044156e-05, "loss": 56.9486, "step": 66420 }, { "epoch": 0.2683856058371748, "grad_norm": 644.98681640625, "learning_rate": 3.6745981868877986e-05, "loss": 107.4632, "step": 66430 }, { "epoch": 0.26842600710254244, "grad_norm": 709.885009765625, "learning_rate": 3.674445490524027e-05, "loss": 83.2144, "step": 66440 }, { "epoch": 0.2684664083679101, "grad_norm": 268.4350280761719, "learning_rate": 3.67429276151608e-05, "loss": 44.282, "step": 66450 }, { "epoch": 0.2685068096332777, "grad_norm": 972.099365234375, "learning_rate": 3.674139999866933e-05, "loss": 68.6298, "step": 66460 }, { "epoch": 0.26854721089864536, "grad_norm": 1137.8095703125, "learning_rate": 3.6739872055795646e-05, "loss": 49.5315, "step": 66470 }, { "epoch": 0.268587612164013, "grad_norm": 1797.35595703125, "learning_rate": 3.673834378656954e-05, "loss": 54.9943, "step": 66480 }, { "epoch": 0.2686280134293806, "grad_norm": 668.4421997070312, "learning_rate": 3.67368151910208e-05, "loss": 92.2195, "step": 66490 }, { "epoch": 0.2686684146947482, "grad_norm": 848.138671875, "learning_rate": 3.673528626917924e-05, "loss": 105.8378, "step": 66500 }, { "epoch": 0.26870881596011587, "grad_norm": 599.9146728515625, "learning_rate": 3.673375702107465e-05, "loss": 71.8781, "step": 66510 }, { "epoch": 0.2687492172254835, "grad_norm": 665.2034301757812, "learning_rate": 3.673222744673686e-05, "loss": 66.1195, "step": 66520 }, { "epoch": 0.26878961849085115, "grad_norm": 1226.78515625, "learning_rate": 3.673069754619567e-05, "loss": 81.691, "step": 66530 }, { "epoch": 0.2688300197562188, "grad_norm": 676.3750610351562, "learning_rate": 3.672916731948092e-05, "loss": 66.9422, "step": 66540 }, { "epoch": 0.26887042102158637, "grad_norm": 990.93994140625, "learning_rate": 3.672763676662245e-05, "loss": 80.2395, "step": 66550 }, { "epoch": 0.268910822286954, "grad_norm": 2785.474853515625, "learning_rate": 3.672610588765008e-05, "loss": 85.2976, "step": 66560 }, { "epoch": 0.26895122355232165, "grad_norm": 777.316162109375, "learning_rate": 3.672457468259367e-05, "loss": 69.1201, "step": 66570 }, { "epoch": 0.2689916248176893, "grad_norm": 911.9228515625, "learning_rate": 3.6723043151483066e-05, "loss": 73.0671, "step": 66580 }, { "epoch": 0.26903202608305693, "grad_norm": 1047.5543212890625, "learning_rate": 3.6721511294348124e-05, "loss": 96.2293, "step": 66590 }, { "epoch": 0.26907242734842457, "grad_norm": 0.0, "learning_rate": 3.671997911121871e-05, "loss": 67.165, "step": 66600 }, { "epoch": 0.2691128286137922, "grad_norm": 1073.5423583984375, "learning_rate": 3.67184466021247e-05, "loss": 83.9561, "step": 66610 }, { "epoch": 0.2691532298791598, "grad_norm": 599.5601806640625, "learning_rate": 3.6716913767095964e-05, "loss": 60.168, "step": 66620 }, { "epoch": 0.26919363114452743, "grad_norm": 626.68701171875, "learning_rate": 3.671538060616239e-05, "loss": 50.7169, "step": 66630 }, { "epoch": 0.2692340324098951, "grad_norm": 2009.712158203125, "learning_rate": 3.671384711935386e-05, "loss": 71.0345, "step": 66640 }, { "epoch": 0.2692744336752627, "grad_norm": 805.1333618164062, "learning_rate": 3.6712313306700276e-05, "loss": 102.1495, "step": 66650 }, { "epoch": 0.26931483494063035, "grad_norm": 950.0916137695312, "learning_rate": 3.6710779168231535e-05, "loss": 79.8175, "step": 66660 }, { "epoch": 0.269355236205998, "grad_norm": 436.6391906738281, "learning_rate": 3.670924470397756e-05, "loss": 56.7998, "step": 66670 }, { "epoch": 0.2693956374713656, "grad_norm": 601.5894165039062, "learning_rate": 3.6707709913968254e-05, "loss": 69.9057, "step": 66680 }, { "epoch": 0.2694360387367332, "grad_norm": 910.7144165039062, "learning_rate": 3.6706174798233536e-05, "loss": 62.8246, "step": 66690 }, { "epoch": 0.26947644000210086, "grad_norm": 683.0995483398438, "learning_rate": 3.670463935680335e-05, "loss": 82.9362, "step": 66700 }, { "epoch": 0.2695168412674685, "grad_norm": 510.84930419921875, "learning_rate": 3.670310358970762e-05, "loss": 50.4376, "step": 66710 }, { "epoch": 0.26955724253283614, "grad_norm": 555.572998046875, "learning_rate": 3.670156749697627e-05, "loss": 65.4187, "step": 66720 }, { "epoch": 0.2695976437982038, "grad_norm": 564.907470703125, "learning_rate": 3.670003107863928e-05, "loss": 84.8135, "step": 66730 }, { "epoch": 0.2696380450635714, "grad_norm": 1052.8568115234375, "learning_rate": 3.6698494334726575e-05, "loss": 101.7041, "step": 66740 }, { "epoch": 0.269678446328939, "grad_norm": 246.42845153808594, "learning_rate": 3.6696957265268134e-05, "loss": 48.1932, "step": 66750 }, { "epoch": 0.26971884759430664, "grad_norm": 1020.048583984375, "learning_rate": 3.6695419870293915e-05, "loss": 100.3386, "step": 66760 }, { "epoch": 0.2697592488596743, "grad_norm": 1148.8536376953125, "learning_rate": 3.669388214983388e-05, "loss": 59.7867, "step": 66770 }, { "epoch": 0.2697996501250419, "grad_norm": 382.64501953125, "learning_rate": 3.669234410391803e-05, "loss": 44.2975, "step": 66780 }, { "epoch": 0.26984005139040956, "grad_norm": 1493.79296875, "learning_rate": 3.669080573257633e-05, "loss": 62.6379, "step": 66790 }, { "epoch": 0.2698804526557772, "grad_norm": 369.2339172363281, "learning_rate": 3.668926703583878e-05, "loss": 54.9538, "step": 66800 }, { "epoch": 0.2699208539211448, "grad_norm": 716.9329223632812, "learning_rate": 3.668772801373538e-05, "loss": 82.7792, "step": 66810 }, { "epoch": 0.2699612551865124, "grad_norm": 882.6861572265625, "learning_rate": 3.6686188666296135e-05, "loss": 75.0362, "step": 66820 }, { "epoch": 0.27000165645188007, "grad_norm": 359.2427978515625, "learning_rate": 3.668464899355105e-05, "loss": 88.4607, "step": 66830 }, { "epoch": 0.2700420577172477, "grad_norm": 941.4803466796875, "learning_rate": 3.668310899553014e-05, "loss": 63.2962, "step": 66840 }, { "epoch": 0.27008245898261535, "grad_norm": 839.5689697265625, "learning_rate": 3.668156867226343e-05, "loss": 51.2695, "step": 66850 }, { "epoch": 0.270122860247983, "grad_norm": 950.5919189453125, "learning_rate": 3.668002802378094e-05, "loss": 79.6808, "step": 66860 }, { "epoch": 0.27016326151335057, "grad_norm": 794.5095825195312, "learning_rate": 3.6678487050112735e-05, "loss": 60.239, "step": 66870 }, { "epoch": 0.2702036627787182, "grad_norm": 300.521240234375, "learning_rate": 3.667694575128883e-05, "loss": 78.1244, "step": 66880 }, { "epoch": 0.27024406404408585, "grad_norm": 1067.5, "learning_rate": 3.667540412733928e-05, "loss": 56.125, "step": 66890 }, { "epoch": 0.2702844653094535, "grad_norm": 496.9468688964844, "learning_rate": 3.667386217829415e-05, "loss": 46.4149, "step": 66900 }, { "epoch": 0.27032486657482113, "grad_norm": 670.4450073242188, "learning_rate": 3.667231990418348e-05, "loss": 70.8287, "step": 66910 }, { "epoch": 0.27036526784018877, "grad_norm": 787.8795776367188, "learning_rate": 3.667077730503736e-05, "loss": 57.6352, "step": 66920 }, { "epoch": 0.2704056691055564, "grad_norm": 542.7733764648438, "learning_rate": 3.666923438088585e-05, "loss": 55.8081, "step": 66930 }, { "epoch": 0.270446070370924, "grad_norm": 568.5580444335938, "learning_rate": 3.6667691131759034e-05, "loss": 87.649, "step": 66940 }, { "epoch": 0.27048647163629164, "grad_norm": 1132.9769287109375, "learning_rate": 3.6666147557687e-05, "loss": 102.21, "step": 66950 }, { "epoch": 0.2705268729016593, "grad_norm": 1040.704345703125, "learning_rate": 3.6664603658699836e-05, "loss": 74.1205, "step": 66960 }, { "epoch": 0.2705672741670269, "grad_norm": 593.2798461914062, "learning_rate": 3.666305943482765e-05, "loss": 64.604, "step": 66970 }, { "epoch": 0.27060767543239456, "grad_norm": 1937.970458984375, "learning_rate": 3.666151488610053e-05, "loss": 87.1729, "step": 66980 }, { "epoch": 0.2706480766977622, "grad_norm": 746.2176513671875, "learning_rate": 3.6659970012548606e-05, "loss": 72.0054, "step": 66990 }, { "epoch": 0.2706884779631298, "grad_norm": 0.0, "learning_rate": 3.665842481420199e-05, "loss": 66.9999, "step": 67000 }, { "epoch": 0.2707288792284974, "grad_norm": 725.7628784179688, "learning_rate": 3.665687929109081e-05, "loss": 87.2939, "step": 67010 }, { "epoch": 0.27076928049386506, "grad_norm": 1472.5765380859375, "learning_rate": 3.6655333443245184e-05, "loss": 61.8437, "step": 67020 }, { "epoch": 0.2708096817592327, "grad_norm": 2046.2059326171875, "learning_rate": 3.6653787270695266e-05, "loss": 67.9492, "step": 67030 }, { "epoch": 0.27085008302460034, "grad_norm": 1022.05517578125, "learning_rate": 3.665224077347119e-05, "loss": 65.6357, "step": 67040 }, { "epoch": 0.270890484289968, "grad_norm": 1938.1826171875, "learning_rate": 3.66506939516031e-05, "loss": 51.326, "step": 67050 }, { "epoch": 0.27093088555533557, "grad_norm": 1422.065185546875, "learning_rate": 3.664914680512117e-05, "loss": 56.8821, "step": 67060 }, { "epoch": 0.2709712868207032, "grad_norm": 644.83349609375, "learning_rate": 3.664759933405554e-05, "loss": 59.517, "step": 67070 }, { "epoch": 0.27101168808607085, "grad_norm": 537.67578125, "learning_rate": 3.6646051538436395e-05, "loss": 43.3435, "step": 67080 }, { "epoch": 0.2710520893514385, "grad_norm": 485.49444580078125, "learning_rate": 3.6644503418293905e-05, "loss": 80.7018, "step": 67090 }, { "epoch": 0.2710924906168061, "grad_norm": 975.3925170898438, "learning_rate": 3.6642954973658256e-05, "loss": 99.1135, "step": 67100 }, { "epoch": 0.27113289188217377, "grad_norm": 980.0559692382812, "learning_rate": 3.664140620455962e-05, "loss": 77.6215, "step": 67110 }, { "epoch": 0.2711732931475414, "grad_norm": 532.5707397460938, "learning_rate": 3.6639857111028215e-05, "loss": 60.3496, "step": 67120 }, { "epoch": 0.271213694412909, "grad_norm": 524.780517578125, "learning_rate": 3.663830769309423e-05, "loss": 88.2819, "step": 67130 }, { "epoch": 0.27125409567827663, "grad_norm": 691.8131713867188, "learning_rate": 3.663675795078786e-05, "loss": 103.3954, "step": 67140 }, { "epoch": 0.27129449694364427, "grad_norm": 1298.9471435546875, "learning_rate": 3.663520788413933e-05, "loss": 91.5792, "step": 67150 }, { "epoch": 0.2713348982090119, "grad_norm": 783.9149169921875, "learning_rate": 3.663365749317886e-05, "loss": 47.3599, "step": 67160 }, { "epoch": 0.27137529947437955, "grad_norm": 2600.474853515625, "learning_rate": 3.663210677793668e-05, "loss": 72.9183, "step": 67170 }, { "epoch": 0.2714157007397472, "grad_norm": 728.5089111328125, "learning_rate": 3.6630555738443e-05, "loss": 58.8554, "step": 67180 }, { "epoch": 0.2714561020051148, "grad_norm": 1014.21435546875, "learning_rate": 3.662900437472809e-05, "loss": 61.5755, "step": 67190 }, { "epoch": 0.2714965032704824, "grad_norm": 557.588623046875, "learning_rate": 3.662745268682217e-05, "loss": 71.5919, "step": 67200 }, { "epoch": 0.27153690453585005, "grad_norm": 539.9105834960938, "learning_rate": 3.6625900674755503e-05, "loss": 70.7316, "step": 67210 }, { "epoch": 0.2715773058012177, "grad_norm": 980.9306030273438, "learning_rate": 3.662434833855835e-05, "loss": 62.9494, "step": 67220 }, { "epoch": 0.27161770706658533, "grad_norm": 646.4301147460938, "learning_rate": 3.662279567826096e-05, "loss": 56.7494, "step": 67230 }, { "epoch": 0.271658108331953, "grad_norm": 573.99072265625, "learning_rate": 3.66212426938936e-05, "loss": 84.0111, "step": 67240 }, { "epoch": 0.2716985095973206, "grad_norm": 1240.050048828125, "learning_rate": 3.6619689385486566e-05, "loss": 74.4962, "step": 67250 }, { "epoch": 0.2717389108626882, "grad_norm": 776.9219970703125, "learning_rate": 3.6618135753070136e-05, "loss": 65.1254, "step": 67260 }, { "epoch": 0.27177931212805584, "grad_norm": 828.0167846679688, "learning_rate": 3.661658179667459e-05, "loss": 74.3977, "step": 67270 }, { "epoch": 0.2718197133934235, "grad_norm": 785.6934814453125, "learning_rate": 3.6615027516330226e-05, "loss": 87.0982, "step": 67280 }, { "epoch": 0.2718601146587911, "grad_norm": 464.1484680175781, "learning_rate": 3.6613472912067345e-05, "loss": 45.3541, "step": 67290 }, { "epoch": 0.27190051592415876, "grad_norm": 463.8283386230469, "learning_rate": 3.661191798391626e-05, "loss": 77.3246, "step": 67300 }, { "epoch": 0.2719409171895264, "grad_norm": 1396.08984375, "learning_rate": 3.661036273190727e-05, "loss": 70.516, "step": 67310 }, { "epoch": 0.271981318454894, "grad_norm": 665.4253540039062, "learning_rate": 3.660880715607072e-05, "loss": 83.0413, "step": 67320 }, { "epoch": 0.2720217197202616, "grad_norm": 501.6424560546875, "learning_rate": 3.6607251256436925e-05, "loss": 68.0622, "step": 67330 }, { "epoch": 0.27206212098562926, "grad_norm": 608.7156982421875, "learning_rate": 3.660569503303621e-05, "loss": 56.7109, "step": 67340 }, { "epoch": 0.2721025222509969, "grad_norm": 482.20233154296875, "learning_rate": 3.660413848589892e-05, "loss": 68.6953, "step": 67350 }, { "epoch": 0.27214292351636454, "grad_norm": 738.3167114257812, "learning_rate": 3.6602581615055406e-05, "loss": 65.3163, "step": 67360 }, { "epoch": 0.2721833247817322, "grad_norm": 357.5082092285156, "learning_rate": 3.660102442053601e-05, "loss": 73.2594, "step": 67370 }, { "epoch": 0.27222372604709977, "grad_norm": 775.5992431640625, "learning_rate": 3.659946690237111e-05, "loss": 74.1652, "step": 67380 }, { "epoch": 0.2722641273124674, "grad_norm": 926.7460327148438, "learning_rate": 3.659790906059105e-05, "loss": 67.9021, "step": 67390 }, { "epoch": 0.27230452857783505, "grad_norm": 614.5941162109375, "learning_rate": 3.65963508952262e-05, "loss": 89.038, "step": 67400 }, { "epoch": 0.2723449298432027, "grad_norm": 503.3794250488281, "learning_rate": 3.659479240630695e-05, "loss": 123.8151, "step": 67410 }, { "epoch": 0.2723853311085703, "grad_norm": 680.7290649414062, "learning_rate": 3.659323359386368e-05, "loss": 68.3755, "step": 67420 }, { "epoch": 0.27242573237393797, "grad_norm": 876.765869140625, "learning_rate": 3.659167445792677e-05, "loss": 69.3434, "step": 67430 }, { "epoch": 0.2724661336393056, "grad_norm": 466.19525146484375, "learning_rate": 3.659011499852664e-05, "loss": 70.4111, "step": 67440 }, { "epoch": 0.2725065349046732, "grad_norm": 1058.6571044921875, "learning_rate": 3.658855521569367e-05, "loss": 60.2211, "step": 67450 }, { "epoch": 0.27254693617004083, "grad_norm": 516.0714111328125, "learning_rate": 3.6586995109458266e-05, "loss": 70.7459, "step": 67460 }, { "epoch": 0.27258733743540847, "grad_norm": 452.1062927246094, "learning_rate": 3.6585434679850866e-05, "loss": 44.2226, "step": 67470 }, { "epoch": 0.2726277387007761, "grad_norm": 719.1903076171875, "learning_rate": 3.6583873926901866e-05, "loss": 64.644, "step": 67480 }, { "epoch": 0.27266813996614375, "grad_norm": 807.54052734375, "learning_rate": 3.658231285064172e-05, "loss": 65.3155, "step": 67490 }, { "epoch": 0.2727085412315114, "grad_norm": 656.8856201171875, "learning_rate": 3.658075145110083e-05, "loss": 58.9538, "step": 67500 }, { "epoch": 0.272748942496879, "grad_norm": 619.9434814453125, "learning_rate": 3.657918972830967e-05, "loss": 75.0069, "step": 67510 }, { "epoch": 0.2727893437622466, "grad_norm": 970.5076293945312, "learning_rate": 3.657762768229867e-05, "loss": 48.6334, "step": 67520 }, { "epoch": 0.27282974502761426, "grad_norm": 243.0670928955078, "learning_rate": 3.6576065313098276e-05, "loss": 77.6509, "step": 67530 }, { "epoch": 0.2728701462929819, "grad_norm": 420.6954345703125, "learning_rate": 3.657450262073896e-05, "loss": 51.6488, "step": 67540 }, { "epoch": 0.27291054755834954, "grad_norm": 1002.0394287109375, "learning_rate": 3.657293960525118e-05, "loss": 68.1972, "step": 67550 }, { "epoch": 0.2729509488237172, "grad_norm": 814.0970458984375, "learning_rate": 3.657137626666541e-05, "loss": 48.7616, "step": 67560 }, { "epoch": 0.2729913500890848, "grad_norm": 674.52978515625, "learning_rate": 3.656981260501213e-05, "loss": 67.1653, "step": 67570 }, { "epoch": 0.2730317513544524, "grad_norm": 740.9395751953125, "learning_rate": 3.656824862032182e-05, "loss": 67.1278, "step": 67580 }, { "epoch": 0.27307215261982004, "grad_norm": 467.32293701171875, "learning_rate": 3.6566684312624966e-05, "loss": 66.5664, "step": 67590 }, { "epoch": 0.2731125538851877, "grad_norm": 1745.7122802734375, "learning_rate": 3.6565119681952086e-05, "loss": 79.5672, "step": 67600 }, { "epoch": 0.2731529551505553, "grad_norm": 1980.6866455078125, "learning_rate": 3.6563554728333664e-05, "loss": 61.6905, "step": 67610 }, { "epoch": 0.27319335641592296, "grad_norm": 650.4952392578125, "learning_rate": 3.6561989451800215e-05, "loss": 66.1603, "step": 67620 }, { "epoch": 0.2732337576812906, "grad_norm": 501.155517578125, "learning_rate": 3.656042385238225e-05, "loss": 60.0304, "step": 67630 }, { "epoch": 0.2732741589466582, "grad_norm": 437.7528991699219, "learning_rate": 3.655885793011031e-05, "loss": 118.2731, "step": 67640 }, { "epoch": 0.2733145602120258, "grad_norm": 601.8849487304688, "learning_rate": 3.6557291685014896e-05, "loss": 73.234, "step": 67650 }, { "epoch": 0.27335496147739347, "grad_norm": 1878.159912109375, "learning_rate": 3.655572511712656e-05, "loss": 96.2345, "step": 67660 }, { "epoch": 0.2733953627427611, "grad_norm": 1368.6416015625, "learning_rate": 3.655415822647584e-05, "loss": 87.2413, "step": 67670 }, { "epoch": 0.27343576400812875, "grad_norm": 760.669677734375, "learning_rate": 3.655259101309328e-05, "loss": 70.731, "step": 67680 }, { "epoch": 0.2734761652734964, "grad_norm": 915.2348022460938, "learning_rate": 3.655102347700944e-05, "loss": 87.2867, "step": 67690 }, { "epoch": 0.27351656653886397, "grad_norm": 710.090087890625, "learning_rate": 3.654945561825488e-05, "loss": 74.8702, "step": 67700 }, { "epoch": 0.2735569678042316, "grad_norm": 1369.951171875, "learning_rate": 3.6547887436860164e-05, "loss": 81.5384, "step": 67710 }, { "epoch": 0.27359736906959925, "grad_norm": 1007.9266967773438, "learning_rate": 3.654631893285585e-05, "loss": 56.8456, "step": 67720 }, { "epoch": 0.2736377703349669, "grad_norm": 447.4447326660156, "learning_rate": 3.6544750106272534e-05, "loss": 42.0591, "step": 67730 }, { "epoch": 0.27367817160033453, "grad_norm": 568.894287109375, "learning_rate": 3.6543180957140804e-05, "loss": 80.5745, "step": 67740 }, { "epoch": 0.27371857286570217, "grad_norm": 843.6287231445312, "learning_rate": 3.654161148549124e-05, "loss": 90.9321, "step": 67750 }, { "epoch": 0.2737589741310698, "grad_norm": 1305.992919921875, "learning_rate": 3.654004169135444e-05, "loss": 71.9361, "step": 67760 }, { "epoch": 0.2737993753964374, "grad_norm": 798.0366821289062, "learning_rate": 3.653847157476101e-05, "loss": 49.0851, "step": 67770 }, { "epoch": 0.27383977666180503, "grad_norm": 863.2374877929688, "learning_rate": 3.653690113574158e-05, "loss": 84.5428, "step": 67780 }, { "epoch": 0.2738801779271727, "grad_norm": 584.3214111328125, "learning_rate": 3.6535330374326734e-05, "loss": 59.1151, "step": 67790 }, { "epoch": 0.2739205791925403, "grad_norm": 805.32568359375, "learning_rate": 3.6533759290547114e-05, "loss": 90.4222, "step": 67800 }, { "epoch": 0.27396098045790795, "grad_norm": 531.2085571289062, "learning_rate": 3.653218788443334e-05, "loss": 41.2134, "step": 67810 }, { "epoch": 0.2740013817232756, "grad_norm": 496.3345642089844, "learning_rate": 3.653061615601606e-05, "loss": 52.7027, "step": 67820 }, { "epoch": 0.2740417829886432, "grad_norm": 987.3363647460938, "learning_rate": 3.652904410532589e-05, "loss": 62.067, "step": 67830 }, { "epoch": 0.2740821842540108, "grad_norm": 778.4385986328125, "learning_rate": 3.6527471732393515e-05, "loss": 69.8173, "step": 67840 }, { "epoch": 0.27412258551937846, "grad_norm": 565.9769897460938, "learning_rate": 3.652589903724956e-05, "loss": 66.5563, "step": 67850 }, { "epoch": 0.2741629867847461, "grad_norm": 1377.9952392578125, "learning_rate": 3.65243260199247e-05, "loss": 58.3849, "step": 67860 }, { "epoch": 0.27420338805011374, "grad_norm": 641.1226196289062, "learning_rate": 3.652275268044959e-05, "loss": 106.0223, "step": 67870 }, { "epoch": 0.2742437893154814, "grad_norm": 1087.67236328125, "learning_rate": 3.6521179018854914e-05, "loss": 64.6192, "step": 67880 }, { "epoch": 0.274284190580849, "grad_norm": 1090.8638916015625, "learning_rate": 3.651960503517135e-05, "loss": 99.0607, "step": 67890 }, { "epoch": 0.2743245918462166, "grad_norm": 299.8224792480469, "learning_rate": 3.651803072942957e-05, "loss": 45.6526, "step": 67900 }, { "epoch": 0.27436499311158424, "grad_norm": 1958.9107666015625, "learning_rate": 3.6516456101660286e-05, "loss": 119.4297, "step": 67910 }, { "epoch": 0.2744053943769519, "grad_norm": 1904.2401123046875, "learning_rate": 3.651488115189419e-05, "loss": 86.1005, "step": 67920 }, { "epoch": 0.2744457956423195, "grad_norm": 1208.603271484375, "learning_rate": 3.651330588016198e-05, "loss": 75.8416, "step": 67930 }, { "epoch": 0.27448619690768716, "grad_norm": 868.4407958984375, "learning_rate": 3.6511730286494366e-05, "loss": 51.1043, "step": 67940 }, { "epoch": 0.2745265981730548, "grad_norm": 997.5648193359375, "learning_rate": 3.651015437092208e-05, "loss": 54.7892, "step": 67950 }, { "epoch": 0.2745669994384224, "grad_norm": 651.9278564453125, "learning_rate": 3.650857813347582e-05, "loss": 70.1957, "step": 67960 }, { "epoch": 0.27460740070379, "grad_norm": 788.9302368164062, "learning_rate": 3.6507001574186346e-05, "loss": 110.4476, "step": 67970 }, { "epoch": 0.27464780196915767, "grad_norm": 737.9695434570312, "learning_rate": 3.650542469308437e-05, "loss": 75.0885, "step": 67980 }, { "epoch": 0.2746882032345253, "grad_norm": 0.0, "learning_rate": 3.6503847490200636e-05, "loss": 76.1971, "step": 67990 }, { "epoch": 0.27472860449989295, "grad_norm": 494.2500915527344, "learning_rate": 3.6502269965565904e-05, "loss": 51.3936, "step": 68000 }, { "epoch": 0.2747690057652606, "grad_norm": 775.4039916992188, "learning_rate": 3.650069211921093e-05, "loss": 51.5934, "step": 68010 }, { "epoch": 0.27480940703062817, "grad_norm": 541.7156982421875, "learning_rate": 3.649911395116646e-05, "loss": 51.5515, "step": 68020 }, { "epoch": 0.2748498082959958, "grad_norm": 745.6041870117188, "learning_rate": 3.649753546146327e-05, "loss": 48.3755, "step": 68030 }, { "epoch": 0.27489020956136345, "grad_norm": 969.8783569335938, "learning_rate": 3.6495956650132126e-05, "loss": 95.7821, "step": 68040 }, { "epoch": 0.2749306108267311, "grad_norm": 513.9328002929688, "learning_rate": 3.6494377517203825e-05, "loss": 77.0404, "step": 68050 }, { "epoch": 0.27497101209209873, "grad_norm": 396.3102111816406, "learning_rate": 3.649279806270914e-05, "loss": 63.6906, "step": 68060 }, { "epoch": 0.27501141335746637, "grad_norm": 519.8453979492188, "learning_rate": 3.6491218286678867e-05, "loss": 66.4816, "step": 68070 }, { "epoch": 0.275051814622834, "grad_norm": 1306.8505859375, "learning_rate": 3.648963818914379e-05, "loss": 55.9493, "step": 68080 }, { "epoch": 0.2750922158882016, "grad_norm": 610.3496704101562, "learning_rate": 3.6488057770134746e-05, "loss": 54.7979, "step": 68090 }, { "epoch": 0.27513261715356924, "grad_norm": 779.8862915039062, "learning_rate": 3.648647702968252e-05, "loss": 81.0502, "step": 68100 }, { "epoch": 0.2751730184189369, "grad_norm": 871.0150146484375, "learning_rate": 3.6484895967817935e-05, "loss": 66.1679, "step": 68110 }, { "epoch": 0.2752134196843045, "grad_norm": 635.9258422851562, "learning_rate": 3.6483314584571815e-05, "loss": 41.4136, "step": 68120 }, { "epoch": 0.27525382094967216, "grad_norm": 360.8929138183594, "learning_rate": 3.6481732879974995e-05, "loss": 73.421, "step": 68130 }, { "epoch": 0.2752942222150398, "grad_norm": 674.0567016601562, "learning_rate": 3.64801508540583e-05, "loss": 68.4341, "step": 68140 }, { "epoch": 0.2753346234804074, "grad_norm": 957.514892578125, "learning_rate": 3.647856850685259e-05, "loss": 80.5406, "step": 68150 }, { "epoch": 0.275375024745775, "grad_norm": 2177.1552734375, "learning_rate": 3.6476985838388694e-05, "loss": 76.5557, "step": 68160 }, { "epoch": 0.27541542601114266, "grad_norm": 749.5941162109375, "learning_rate": 3.647540284869748e-05, "loss": 72.2992, "step": 68170 }, { "epoch": 0.2754558272765103, "grad_norm": 736.7278442382812, "learning_rate": 3.647381953780981e-05, "loss": 94.8278, "step": 68180 }, { "epoch": 0.27549622854187794, "grad_norm": 895.4473876953125, "learning_rate": 3.647223590575654e-05, "loss": 63.1256, "step": 68190 }, { "epoch": 0.2755366298072456, "grad_norm": 361.90374755859375, "learning_rate": 3.647065195256855e-05, "loss": 82.1848, "step": 68200 }, { "epoch": 0.2755770310726132, "grad_norm": 746.0802001953125, "learning_rate": 3.6469067678276726e-05, "loss": 78.7891, "step": 68210 }, { "epoch": 0.2756174323379808, "grad_norm": 260.87823486328125, "learning_rate": 3.6467483082911945e-05, "loss": 53.1604, "step": 68220 }, { "epoch": 0.27565783360334845, "grad_norm": 544.7657470703125, "learning_rate": 3.646589816650511e-05, "loss": 79.9751, "step": 68230 }, { "epoch": 0.2756982348687161, "grad_norm": 709.5982666015625, "learning_rate": 3.64643129290871e-05, "loss": 64.3202, "step": 68240 }, { "epoch": 0.2757386361340837, "grad_norm": 650.8275756835938, "learning_rate": 3.6462727370688844e-05, "loss": 55.9401, "step": 68250 }, { "epoch": 0.27577903739945137, "grad_norm": 303.5444030761719, "learning_rate": 3.6461141491341235e-05, "loss": 100.8449, "step": 68260 }, { "epoch": 0.275819438664819, "grad_norm": 456.7483825683594, "learning_rate": 3.6459555291075204e-05, "loss": 78.0596, "step": 68270 }, { "epoch": 0.2758598399301866, "grad_norm": 457.44207763671875, "learning_rate": 3.6457968769921664e-05, "loss": 59.3378, "step": 68280 }, { "epoch": 0.27590024119555423, "grad_norm": 1555.9522705078125, "learning_rate": 3.645638192791155e-05, "loss": 79.8157, "step": 68290 }, { "epoch": 0.27594064246092187, "grad_norm": 822.7843627929688, "learning_rate": 3.64547947650758e-05, "loss": 71.4093, "step": 68300 }, { "epoch": 0.2759810437262895, "grad_norm": 948.1829833984375, "learning_rate": 3.6453207281445356e-05, "loss": 98.2951, "step": 68310 }, { "epoch": 0.27602144499165715, "grad_norm": 1063.466796875, "learning_rate": 3.6451619477051165e-05, "loss": 81.6967, "step": 68320 }, { "epoch": 0.2760618462570248, "grad_norm": 790.401123046875, "learning_rate": 3.645003135192418e-05, "loss": 107.049, "step": 68330 }, { "epoch": 0.2761022475223924, "grad_norm": 1264.2486572265625, "learning_rate": 3.6448442906095365e-05, "loss": 91.9635, "step": 68340 }, { "epoch": 0.27614264878776, "grad_norm": 358.43896484375, "learning_rate": 3.644685413959569e-05, "loss": 62.9964, "step": 68350 }, { "epoch": 0.27618305005312765, "grad_norm": 1053.30615234375, "learning_rate": 3.644526505245612e-05, "loss": 105.667, "step": 68360 }, { "epoch": 0.2762234513184953, "grad_norm": 741.2691040039062, "learning_rate": 3.6443675644707645e-05, "loss": 65.3842, "step": 68370 }, { "epoch": 0.27626385258386293, "grad_norm": 614.4652709960938, "learning_rate": 3.6442085916381256e-05, "loss": 63.0738, "step": 68380 }, { "epoch": 0.2763042538492306, "grad_norm": 545.9593505859375, "learning_rate": 3.644049586750792e-05, "loss": 76.6601, "step": 68390 }, { "epoch": 0.2763446551145982, "grad_norm": 351.7081298828125, "learning_rate": 3.6438905498118665e-05, "loss": 80.7119, "step": 68400 }, { "epoch": 0.2763850563799658, "grad_norm": 450.5742492675781, "learning_rate": 3.643731480824448e-05, "loss": 56.7873, "step": 68410 }, { "epoch": 0.27642545764533344, "grad_norm": 833.8705444335938, "learning_rate": 3.6435723797916384e-05, "loss": 52.5591, "step": 68420 }, { "epoch": 0.2764658589107011, "grad_norm": 304.5091857910156, "learning_rate": 3.643413246716539e-05, "loss": 35.8782, "step": 68430 }, { "epoch": 0.2765062601760687, "grad_norm": 386.6527404785156, "learning_rate": 3.643254081602252e-05, "loss": 66.0746, "step": 68440 }, { "epoch": 0.27654666144143636, "grad_norm": 1705.518798828125, "learning_rate": 3.6430948844518806e-05, "loss": 92.2069, "step": 68450 }, { "epoch": 0.276587062706804, "grad_norm": 807.954833984375, "learning_rate": 3.6429356552685285e-05, "loss": 53.035, "step": 68460 }, { "epoch": 0.2766274639721716, "grad_norm": 818.7935180664062, "learning_rate": 3.6427763940553004e-05, "loss": 57.0737, "step": 68470 }, { "epoch": 0.2766678652375392, "grad_norm": 510.9322204589844, "learning_rate": 3.6426171008153e-05, "loss": 71.6756, "step": 68480 }, { "epoch": 0.27670826650290686, "grad_norm": 898.836181640625, "learning_rate": 3.6424577755516344e-05, "loss": 73.5406, "step": 68490 }, { "epoch": 0.2767486677682745, "grad_norm": 512.6793823242188, "learning_rate": 3.642298418267408e-05, "loss": 81.9066, "step": 68500 }, { "epoch": 0.27678906903364214, "grad_norm": 597.5086669921875, "learning_rate": 3.642139028965729e-05, "loss": 43.1931, "step": 68510 }, { "epoch": 0.2768294702990098, "grad_norm": 1361.612548828125, "learning_rate": 3.6419796076497045e-05, "loss": 69.7019, "step": 68520 }, { "epoch": 0.2768698715643774, "grad_norm": 8374.60546875, "learning_rate": 3.6418201543224414e-05, "loss": 79.9838, "step": 68530 }, { "epoch": 0.276910272829745, "grad_norm": 1369.9517822265625, "learning_rate": 3.64166066898705e-05, "loss": 96.7081, "step": 68540 }, { "epoch": 0.27695067409511265, "grad_norm": 564.1013793945312, "learning_rate": 3.641501151646638e-05, "loss": 82.5782, "step": 68550 }, { "epoch": 0.2769910753604803, "grad_norm": 1302.3148193359375, "learning_rate": 3.641341602304316e-05, "loss": 66.468, "step": 68560 }, { "epoch": 0.27703147662584793, "grad_norm": 1007.4530639648438, "learning_rate": 3.6411820209631944e-05, "loss": 61.811, "step": 68570 }, { "epoch": 0.27707187789121557, "grad_norm": 2957.66455078125, "learning_rate": 3.641022407626385e-05, "loss": 69.4076, "step": 68580 }, { "epoch": 0.2771122791565832, "grad_norm": 1141.3900146484375, "learning_rate": 3.640862762296999e-05, "loss": 55.4128, "step": 68590 }, { "epoch": 0.2771526804219508, "grad_norm": 1451.926513671875, "learning_rate": 3.6407030849781475e-05, "loss": 75.5169, "step": 68600 }, { "epoch": 0.27719308168731843, "grad_norm": 824.8275146484375, "learning_rate": 3.6405433756729456e-05, "loss": 50.1131, "step": 68610 }, { "epoch": 0.27723348295268607, "grad_norm": 549.1823120117188, "learning_rate": 3.640383634384505e-05, "loss": 50.6561, "step": 68620 }, { "epoch": 0.2772738842180537, "grad_norm": 459.22216796875, "learning_rate": 3.6402238611159424e-05, "loss": 92.3009, "step": 68630 }, { "epoch": 0.27731428548342135, "grad_norm": 651.8291625976562, "learning_rate": 3.64006405587037e-05, "loss": 63.6321, "step": 68640 }, { "epoch": 0.277354686748789, "grad_norm": 1424.467041015625, "learning_rate": 3.639904218650905e-05, "loss": 72.4273, "step": 68650 }, { "epoch": 0.2773950880141566, "grad_norm": 768.332763671875, "learning_rate": 3.639744349460663e-05, "loss": 57.5873, "step": 68660 }, { "epoch": 0.2774354892795242, "grad_norm": 657.2222290039062, "learning_rate": 3.639584448302761e-05, "loss": 52.1681, "step": 68670 }, { "epoch": 0.27747589054489186, "grad_norm": 692.5430297851562, "learning_rate": 3.639424515180315e-05, "loss": 61.6016, "step": 68680 }, { "epoch": 0.2775162918102595, "grad_norm": 700.7813110351562, "learning_rate": 3.639264550096445e-05, "loss": 86.2401, "step": 68690 }, { "epoch": 0.27755669307562714, "grad_norm": 354.1077575683594, "learning_rate": 3.639104553054268e-05, "loss": 84.5295, "step": 68700 }, { "epoch": 0.2775970943409948, "grad_norm": 1245.1407470703125, "learning_rate": 3.6389445240569043e-05, "loss": 67.8237, "step": 68710 }, { "epoch": 0.2776374956063624, "grad_norm": 736.7760620117188, "learning_rate": 3.6387844631074726e-05, "loss": 61.6653, "step": 68720 }, { "epoch": 0.27767789687173, "grad_norm": 282.4181213378906, "learning_rate": 3.638624370209095e-05, "loss": 59.5905, "step": 68730 }, { "epoch": 0.27771829813709764, "grad_norm": 407.526123046875, "learning_rate": 3.638464245364891e-05, "loss": 70.27, "step": 68740 }, { "epoch": 0.2777586994024653, "grad_norm": 1361.0655517578125, "learning_rate": 3.638304088577984e-05, "loss": 68.5818, "step": 68750 }, { "epoch": 0.2777991006678329, "grad_norm": 749.9307250976562, "learning_rate": 3.6381438998514945e-05, "loss": 58.5423, "step": 68760 }, { "epoch": 0.27783950193320056, "grad_norm": 986.7581787109375, "learning_rate": 3.637983679188547e-05, "loss": 89.6241, "step": 68770 }, { "epoch": 0.2778799031985682, "grad_norm": 441.9813232421875, "learning_rate": 3.6378234265922636e-05, "loss": 52.9904, "step": 68780 }, { "epoch": 0.2779203044639358, "grad_norm": 871.7626342773438, "learning_rate": 3.6376631420657695e-05, "loss": 85.0174, "step": 68790 }, { "epoch": 0.2779607057293034, "grad_norm": 1010.308837890625, "learning_rate": 3.637502825612189e-05, "loss": 70.3216, "step": 68800 }, { "epoch": 0.27800110699467107, "grad_norm": 394.0765380859375, "learning_rate": 3.637342477234649e-05, "loss": 54.1732, "step": 68810 }, { "epoch": 0.2780415082600387, "grad_norm": 1378.3243408203125, "learning_rate": 3.637182096936274e-05, "loss": 78.4019, "step": 68820 }, { "epoch": 0.27808190952540635, "grad_norm": 807.8096313476562, "learning_rate": 3.63702168472019e-05, "loss": 70.5899, "step": 68830 }, { "epoch": 0.278122310790774, "grad_norm": 748.4146728515625, "learning_rate": 3.636861240589527e-05, "loss": 62.313, "step": 68840 }, { "epoch": 0.2781627120561416, "grad_norm": 1081.0704345703125, "learning_rate": 3.63670076454741e-05, "loss": 77.3886, "step": 68850 }, { "epoch": 0.2782031133215092, "grad_norm": 1138.8304443359375, "learning_rate": 3.63654025659697e-05, "loss": 89.0992, "step": 68860 }, { "epoch": 0.27824351458687685, "grad_norm": 1212.7421875, "learning_rate": 3.636379716741335e-05, "loss": 53.7292, "step": 68870 }, { "epoch": 0.2782839158522445, "grad_norm": 656.9420776367188, "learning_rate": 3.6362191449836346e-05, "loss": 68.1364, "step": 68880 }, { "epoch": 0.27832431711761213, "grad_norm": 702.285400390625, "learning_rate": 3.6360585413269995e-05, "loss": 54.8042, "step": 68890 }, { "epoch": 0.27836471838297977, "grad_norm": 1127.06591796875, "learning_rate": 3.6358979057745615e-05, "loss": 59.6537, "step": 68900 }, { "epoch": 0.2784051196483474, "grad_norm": 1014.2935180664062, "learning_rate": 3.635737238329451e-05, "loss": 58.5084, "step": 68910 }, { "epoch": 0.278445520913715, "grad_norm": 724.390625, "learning_rate": 3.6355765389948015e-05, "loss": 85.7793, "step": 68920 }, { "epoch": 0.27848592217908263, "grad_norm": 1114.0927734375, "learning_rate": 3.635415807773745e-05, "loss": 84.4643, "step": 68930 }, { "epoch": 0.2785263234444503, "grad_norm": 676.5823364257812, "learning_rate": 3.635255044669415e-05, "loss": 70.0983, "step": 68940 }, { "epoch": 0.2785667247098179, "grad_norm": 1191.34619140625, "learning_rate": 3.635094249684946e-05, "loss": 57.6529, "step": 68950 }, { "epoch": 0.27860712597518555, "grad_norm": 1170.3385009765625, "learning_rate": 3.6349334228234736e-05, "loss": 65.0031, "step": 68960 }, { "epoch": 0.2786475272405532, "grad_norm": 862.3743896484375, "learning_rate": 3.634772564088132e-05, "loss": 50.383, "step": 68970 }, { "epoch": 0.2786879285059208, "grad_norm": 211.3525848388672, "learning_rate": 3.6346116734820575e-05, "loss": 59.9885, "step": 68980 }, { "epoch": 0.2787283297712884, "grad_norm": 1631.1240234375, "learning_rate": 3.634450751008387e-05, "loss": 49.5971, "step": 68990 }, { "epoch": 0.27876873103665606, "grad_norm": 648.0172729492188, "learning_rate": 3.634289796670257e-05, "loss": 68.3455, "step": 69000 }, { "epoch": 0.2788091323020237, "grad_norm": 1328.7872314453125, "learning_rate": 3.634128810470807e-05, "loss": 78.5994, "step": 69010 }, { "epoch": 0.27884953356739134, "grad_norm": 1701.229248046875, "learning_rate": 3.633967792413174e-05, "loss": 78.8696, "step": 69020 }, { "epoch": 0.278889934832759, "grad_norm": 937.852783203125, "learning_rate": 3.6338067425004975e-05, "loss": 90.5087, "step": 69030 }, { "epoch": 0.2789303360981266, "grad_norm": 506.93182373046875, "learning_rate": 3.633645660735918e-05, "loss": 88.4482, "step": 69040 }, { "epoch": 0.2789707373634942, "grad_norm": 837.9589233398438, "learning_rate": 3.633484547122575e-05, "loss": 67.4671, "step": 69050 }, { "epoch": 0.27901113862886184, "grad_norm": 354.0655822753906, "learning_rate": 3.63332340166361e-05, "loss": 61.7967, "step": 69060 }, { "epoch": 0.2790515398942295, "grad_norm": 471.1148681640625, "learning_rate": 3.633162224362164e-05, "loss": 38.5641, "step": 69070 }, { "epoch": 0.2790919411595971, "grad_norm": 1305.4991455078125, "learning_rate": 3.63300101522138e-05, "loss": 109.4982, "step": 69080 }, { "epoch": 0.27913234242496476, "grad_norm": 1326.2255859375, "learning_rate": 3.6328397742444003e-05, "loss": 60.515, "step": 69090 }, { "epoch": 0.2791727436903324, "grad_norm": 674.3216552734375, "learning_rate": 3.632678501434368e-05, "loss": 77.833, "step": 69100 }, { "epoch": 0.2792131449557, "grad_norm": 798.8903198242188, "learning_rate": 3.632517196794429e-05, "loss": 67.0676, "step": 69110 }, { "epoch": 0.27925354622106763, "grad_norm": 662.2749633789062, "learning_rate": 3.632355860327725e-05, "loss": 70.8592, "step": 69120 }, { "epoch": 0.27929394748643527, "grad_norm": 559.8394165039062, "learning_rate": 3.632194492037404e-05, "loss": 55.1575, "step": 69130 }, { "epoch": 0.2793343487518029, "grad_norm": 406.7459716796875, "learning_rate": 3.632033091926612e-05, "loss": 67.0519, "step": 69140 }, { "epoch": 0.27937475001717055, "grad_norm": 1365.2215576171875, "learning_rate": 3.631871659998494e-05, "loss": 78.6769, "step": 69150 }, { "epoch": 0.2794151512825382, "grad_norm": 2076.132080078125, "learning_rate": 3.6317101962561974e-05, "loss": 114.1054, "step": 69160 }, { "epoch": 0.27945555254790583, "grad_norm": 582.463623046875, "learning_rate": 3.6315487007028706e-05, "loss": 79.3306, "step": 69170 }, { "epoch": 0.2794959538132734, "grad_norm": 803.3694458007812, "learning_rate": 3.631387173341662e-05, "loss": 88.376, "step": 69180 }, { "epoch": 0.27953635507864105, "grad_norm": 403.2090148925781, "learning_rate": 3.631225614175721e-05, "loss": 84.4055, "step": 69190 }, { "epoch": 0.2795767563440087, "grad_norm": 682.4560546875, "learning_rate": 3.6310640232081966e-05, "loss": 112.4025, "step": 69200 }, { "epoch": 0.27961715760937633, "grad_norm": 549.6954345703125, "learning_rate": 3.630902400442239e-05, "loss": 70.8671, "step": 69210 }, { "epoch": 0.27965755887474397, "grad_norm": 880.340087890625, "learning_rate": 3.630740745881e-05, "loss": 78.0043, "step": 69220 }, { "epoch": 0.2796979601401116, "grad_norm": 924.3558349609375, "learning_rate": 3.6305790595276303e-05, "loss": 59.7715, "step": 69230 }, { "epoch": 0.2797383614054792, "grad_norm": 0.0, "learning_rate": 3.630417341385282e-05, "loss": 47.4674, "step": 69240 }, { "epoch": 0.27977876267084684, "grad_norm": 1742.8243408203125, "learning_rate": 3.630255591457108e-05, "loss": 101.035, "step": 69250 }, { "epoch": 0.2798191639362145, "grad_norm": 2068.00048828125, "learning_rate": 3.6300938097462634e-05, "loss": 60.5279, "step": 69260 }, { "epoch": 0.2798595652015821, "grad_norm": 984.5831298828125, "learning_rate": 3.6299319962559e-05, "loss": 87.9492, "step": 69270 }, { "epoch": 0.27989996646694976, "grad_norm": 775.4241333007812, "learning_rate": 3.629770150989173e-05, "loss": 105.0423, "step": 69280 }, { "epoch": 0.2799403677323174, "grad_norm": 941.29248046875, "learning_rate": 3.629608273949238e-05, "loss": 77.3444, "step": 69290 }, { "epoch": 0.279980768997685, "grad_norm": 809.008544921875, "learning_rate": 3.62944636513925e-05, "loss": 54.6458, "step": 69300 }, { "epoch": 0.2800211702630526, "grad_norm": 848.7319946289062, "learning_rate": 3.629284424562367e-05, "loss": 82.872, "step": 69310 }, { "epoch": 0.28006157152842026, "grad_norm": 483.484130859375, "learning_rate": 3.629122452221746e-05, "loss": 79.1142, "step": 69320 }, { "epoch": 0.2801019727937879, "grad_norm": 295.2940979003906, "learning_rate": 3.6289604481205424e-05, "loss": 71.1907, "step": 69330 }, { "epoch": 0.28014237405915554, "grad_norm": 685.3955688476562, "learning_rate": 3.628798412261917e-05, "loss": 65.1816, "step": 69340 }, { "epoch": 0.2801827753245232, "grad_norm": 611.7601318359375, "learning_rate": 3.628636344649028e-05, "loss": 104.939, "step": 69350 }, { "epoch": 0.2802231765898908, "grad_norm": 1643.69677734375, "learning_rate": 3.628474245285035e-05, "loss": 66.5051, "step": 69360 }, { "epoch": 0.2802635778552584, "grad_norm": 894.060546875, "learning_rate": 3.628312114173098e-05, "loss": 57.6963, "step": 69370 }, { "epoch": 0.28030397912062605, "grad_norm": 570.6958618164062, "learning_rate": 3.628149951316378e-05, "loss": 56.4009, "step": 69380 }, { "epoch": 0.2803443803859937, "grad_norm": 662.741455078125, "learning_rate": 3.627987756718037e-05, "loss": 64.0986, "step": 69390 }, { "epoch": 0.2803847816513613, "grad_norm": 932.7979125976562, "learning_rate": 3.6278255303812366e-05, "loss": 83.0091, "step": 69400 }, { "epoch": 0.28042518291672897, "grad_norm": 541.6932983398438, "learning_rate": 3.627663272309139e-05, "loss": 72.5464, "step": 69410 }, { "epoch": 0.2804655841820966, "grad_norm": 1677.179443359375, "learning_rate": 3.627500982504908e-05, "loss": 79.6893, "step": 69420 }, { "epoch": 0.2805059854474642, "grad_norm": 921.6911010742188, "learning_rate": 3.6273386609717076e-05, "loss": 89.6619, "step": 69430 }, { "epoch": 0.28054638671283183, "grad_norm": 1072.14111328125, "learning_rate": 3.627176307712702e-05, "loss": 56.191, "step": 69440 }, { "epoch": 0.28058678797819947, "grad_norm": 885.6234130859375, "learning_rate": 3.627013922731056e-05, "loss": 70.9728, "step": 69450 }, { "epoch": 0.2806271892435671, "grad_norm": 1424.7335205078125, "learning_rate": 3.626851506029937e-05, "loss": 60.7801, "step": 69460 }, { "epoch": 0.28066759050893475, "grad_norm": 1208.6298828125, "learning_rate": 3.6266890576125095e-05, "loss": 68.1118, "step": 69470 }, { "epoch": 0.2807079917743024, "grad_norm": 2981.10107421875, "learning_rate": 3.6265265774819415e-05, "loss": 83.2845, "step": 69480 }, { "epoch": 0.28074839303967003, "grad_norm": 630.0430297851562, "learning_rate": 3.626364065641401e-05, "loss": 49.1042, "step": 69490 }, { "epoch": 0.2807887943050376, "grad_norm": 621.1134643554688, "learning_rate": 3.6262015220940556e-05, "loss": 118.1, "step": 69500 }, { "epoch": 0.28082919557040525, "grad_norm": 781.4208984375, "learning_rate": 3.626038946843074e-05, "loss": 64.6514, "step": 69510 }, { "epoch": 0.2808695968357729, "grad_norm": 446.54052734375, "learning_rate": 3.625876339891626e-05, "loss": 90.1646, "step": 69520 }, { "epoch": 0.28090999810114053, "grad_norm": 615.22900390625, "learning_rate": 3.625713701242882e-05, "loss": 69.1315, "step": 69530 }, { "epoch": 0.2809503993665082, "grad_norm": 791.39501953125, "learning_rate": 3.6255510309000126e-05, "loss": 74.8812, "step": 69540 }, { "epoch": 0.2809908006318758, "grad_norm": 995.6783447265625, "learning_rate": 3.625388328866188e-05, "loss": 68.0451, "step": 69550 }, { "epoch": 0.2810312018972434, "grad_norm": 277.51214599609375, "learning_rate": 3.625225595144582e-05, "loss": 85.4179, "step": 69560 }, { "epoch": 0.28107160316261104, "grad_norm": 1276.77685546875, "learning_rate": 3.6250628297383664e-05, "loss": 65.1418, "step": 69570 }, { "epoch": 0.2811120044279787, "grad_norm": 551.7674560546875, "learning_rate": 3.624900032650714e-05, "loss": 73.1631, "step": 69580 }, { "epoch": 0.2811524056933463, "grad_norm": 577.58544921875, "learning_rate": 3.624737203884798e-05, "loss": 50.5589, "step": 69590 }, { "epoch": 0.28119280695871396, "grad_norm": 1459.6531982421875, "learning_rate": 3.624574343443794e-05, "loss": 108.8682, "step": 69600 }, { "epoch": 0.2812332082240816, "grad_norm": 866.807861328125, "learning_rate": 3.624411451330878e-05, "loss": 69.1802, "step": 69610 }, { "epoch": 0.2812736094894492, "grad_norm": 476.4073791503906, "learning_rate": 3.624248527549224e-05, "loss": 53.6986, "step": 69620 }, { "epoch": 0.2813140107548168, "grad_norm": 829.8703002929688, "learning_rate": 3.624085572102009e-05, "loss": 69.2045, "step": 69630 }, { "epoch": 0.28135441202018446, "grad_norm": 836.7081298828125, "learning_rate": 3.6239225849924086e-05, "loss": 75.2816, "step": 69640 }, { "epoch": 0.2813948132855521, "grad_norm": 838.9620971679688, "learning_rate": 3.623759566223602e-05, "loss": 79.1252, "step": 69650 }, { "epoch": 0.28143521455091974, "grad_norm": 1362.0731201171875, "learning_rate": 3.6235965157987665e-05, "loss": 72.6706, "step": 69660 }, { "epoch": 0.2814756158162874, "grad_norm": 597.159912109375, "learning_rate": 3.623433433721081e-05, "loss": 72.3076, "step": 69670 }, { "epoch": 0.281516017081655, "grad_norm": 471.72637939453125, "learning_rate": 3.6232703199937246e-05, "loss": 39.1326, "step": 69680 }, { "epoch": 0.2815564183470226, "grad_norm": 645.277099609375, "learning_rate": 3.6231071746198784e-05, "loss": 97.0042, "step": 69690 }, { "epoch": 0.28159681961239025, "grad_norm": 985.9708862304688, "learning_rate": 3.6229439976027225e-05, "loss": 62.6094, "step": 69700 }, { "epoch": 0.2816372208777579, "grad_norm": 535.4151611328125, "learning_rate": 3.622780788945437e-05, "loss": 75.3714, "step": 69710 }, { "epoch": 0.28167762214312553, "grad_norm": 381.4093017578125, "learning_rate": 3.622617548651205e-05, "loss": 93.5277, "step": 69720 }, { "epoch": 0.28171802340849317, "grad_norm": 681.9583740234375, "learning_rate": 3.6224542767232076e-05, "loss": 66.7195, "step": 69730 }, { "epoch": 0.2817584246738608, "grad_norm": 748.0338745117188, "learning_rate": 3.62229097316463e-05, "loss": 112.8168, "step": 69740 }, { "epoch": 0.2817988259392284, "grad_norm": 588.4625854492188, "learning_rate": 3.6221276379786534e-05, "loss": 85.1326, "step": 69750 }, { "epoch": 0.28183922720459603, "grad_norm": 489.44464111328125, "learning_rate": 3.621964271168464e-05, "loss": 84.1481, "step": 69760 }, { "epoch": 0.2818796284699637, "grad_norm": 875.048828125, "learning_rate": 3.621800872737247e-05, "loss": 62.7425, "step": 69770 }, { "epoch": 0.2819200297353313, "grad_norm": 923.087646484375, "learning_rate": 3.6216374426881854e-05, "loss": 67.2313, "step": 69780 }, { "epoch": 0.28196043100069895, "grad_norm": 1275.936279296875, "learning_rate": 3.6214739810244684e-05, "loss": 69.6002, "step": 69790 }, { "epoch": 0.2820008322660666, "grad_norm": 778.4428100585938, "learning_rate": 3.62131048774928e-05, "loss": 58.0094, "step": 69800 }, { "epoch": 0.28204123353143423, "grad_norm": 868.2858276367188, "learning_rate": 3.62114696286581e-05, "loss": 86.843, "step": 69810 }, { "epoch": 0.2820816347968018, "grad_norm": 390.36212158203125, "learning_rate": 3.6209834063772446e-05, "loss": 36.7542, "step": 69820 }, { "epoch": 0.28212203606216946, "grad_norm": 660.957763671875, "learning_rate": 3.6208198182867737e-05, "loss": 66.1943, "step": 69830 }, { "epoch": 0.2821624373275371, "grad_norm": 882.1265869140625, "learning_rate": 3.620656198597586e-05, "loss": 115.4422, "step": 69840 }, { "epoch": 0.28220283859290474, "grad_norm": 762.0680541992188, "learning_rate": 3.620492547312871e-05, "loss": 62.1258, "step": 69850 }, { "epoch": 0.2822432398582724, "grad_norm": 1419.7911376953125, "learning_rate": 3.62032886443582e-05, "loss": 59.704, "step": 69860 }, { "epoch": 0.28228364112364, "grad_norm": 1616.5609130859375, "learning_rate": 3.620165149969623e-05, "loss": 102.7321, "step": 69870 }, { "epoch": 0.2823240423890076, "grad_norm": 544.9016723632812, "learning_rate": 3.620001403917472e-05, "loss": 105.496, "step": 69880 }, { "epoch": 0.28236444365437524, "grad_norm": 887.2398071289062, "learning_rate": 3.6198376262825606e-05, "loss": 88.13, "step": 69890 }, { "epoch": 0.2824048449197429, "grad_norm": 1018.4188842773438, "learning_rate": 3.61967381706808e-05, "loss": 75.7496, "step": 69900 }, { "epoch": 0.2824452461851105, "grad_norm": 677.3698120117188, "learning_rate": 3.619509976277225e-05, "loss": 83.6201, "step": 69910 }, { "epoch": 0.28248564745047816, "grad_norm": 816.182861328125, "learning_rate": 3.619346103913189e-05, "loss": 51.5256, "step": 69920 }, { "epoch": 0.2825260487158458, "grad_norm": 779.9092407226562, "learning_rate": 3.6191821999791665e-05, "loss": 46.279, "step": 69930 }, { "epoch": 0.2825664499812134, "grad_norm": 862.4857788085938, "learning_rate": 3.619018264478354e-05, "loss": 64.6215, "step": 69940 }, { "epoch": 0.282606851246581, "grad_norm": 1061.5059814453125, "learning_rate": 3.6188542974139466e-05, "loss": 70.1019, "step": 69950 }, { "epoch": 0.28264725251194867, "grad_norm": 980.0887451171875, "learning_rate": 3.618690298789142e-05, "loss": 57.2568, "step": 69960 }, { "epoch": 0.2826876537773163, "grad_norm": 683.5441284179688, "learning_rate": 3.618526268607136e-05, "loss": 103.2784, "step": 69970 }, { "epoch": 0.28272805504268395, "grad_norm": 731.2699584960938, "learning_rate": 3.618362206871127e-05, "loss": 72.0942, "step": 69980 }, { "epoch": 0.2827684563080516, "grad_norm": 914.0678100585938, "learning_rate": 3.6181981135843134e-05, "loss": 45.8803, "step": 69990 }, { "epoch": 0.2828088575734192, "grad_norm": 1745.98486328125, "learning_rate": 3.6180339887498953e-05, "loss": 70.0135, "step": 70000 }, { "epoch": 0.2828492588387868, "grad_norm": 538.3794555664062, "learning_rate": 3.617869832371071e-05, "loss": 87.8444, "step": 70010 }, { "epoch": 0.28288966010415445, "grad_norm": 1619.2493896484375, "learning_rate": 3.617705644451041e-05, "loss": 60.7661, "step": 70020 }, { "epoch": 0.2829300613695221, "grad_norm": 870.5992431640625, "learning_rate": 3.617541424993007e-05, "loss": 61.3241, "step": 70030 }, { "epoch": 0.28297046263488973, "grad_norm": 293.04705810546875, "learning_rate": 3.617377174000171e-05, "loss": 77.9364, "step": 70040 }, { "epoch": 0.28301086390025737, "grad_norm": 715.2342529296875, "learning_rate": 3.617212891475733e-05, "loss": 102.4682, "step": 70050 }, { "epoch": 0.283051265165625, "grad_norm": 621.2023315429688, "learning_rate": 3.6170485774228976e-05, "loss": 67.3741, "step": 70060 }, { "epoch": 0.2830916664309926, "grad_norm": 952.3382568359375, "learning_rate": 3.6168842318448684e-05, "loss": 83.3982, "step": 70070 }, { "epoch": 0.28313206769636023, "grad_norm": 489.0035705566406, "learning_rate": 3.6167198547448476e-05, "loss": 67.6127, "step": 70080 }, { "epoch": 0.2831724689617279, "grad_norm": 1169.779296875, "learning_rate": 3.616555446126041e-05, "loss": 62.9801, "step": 70090 }, { "epoch": 0.2832128702270955, "grad_norm": 601.5595092773438, "learning_rate": 3.6163910059916544e-05, "loss": 77.4872, "step": 70100 }, { "epoch": 0.28325327149246315, "grad_norm": 621.1578369140625, "learning_rate": 3.6162265343448924e-05, "loss": 68.0312, "step": 70110 }, { "epoch": 0.2832936727578308, "grad_norm": 530.8627319335938, "learning_rate": 3.616062031188962e-05, "loss": 56.4951, "step": 70120 }, { "epoch": 0.2833340740231984, "grad_norm": 786.9353637695312, "learning_rate": 3.615897496527071e-05, "loss": 80.8605, "step": 70130 }, { "epoch": 0.283374475288566, "grad_norm": 1017.2835083007812, "learning_rate": 3.615732930362426e-05, "loss": 80.876, "step": 70140 }, { "epoch": 0.28341487655393366, "grad_norm": 886.339111328125, "learning_rate": 3.615568332698235e-05, "loss": 83.4627, "step": 70150 }, { "epoch": 0.2834552778193013, "grad_norm": 617.3817749023438, "learning_rate": 3.6154037035377084e-05, "loss": 73.36, "step": 70160 }, { "epoch": 0.28349567908466894, "grad_norm": 974.203125, "learning_rate": 3.6152390428840546e-05, "loss": 63.3565, "step": 70170 }, { "epoch": 0.2835360803500366, "grad_norm": 1039.22216796875, "learning_rate": 3.6150743507404845e-05, "loss": 65.9822, "step": 70180 }, { "epoch": 0.2835764816154042, "grad_norm": 760.4146118164062, "learning_rate": 3.614909627110208e-05, "loss": 45.3543, "step": 70190 }, { "epoch": 0.2836168828807718, "grad_norm": 377.31842041015625, "learning_rate": 3.614744871996437e-05, "loss": 45.3849, "step": 70200 }, { "epoch": 0.28365728414613944, "grad_norm": 1098.9931640625, "learning_rate": 3.614580085402383e-05, "loss": 63.57, "step": 70210 }, { "epoch": 0.2836976854115071, "grad_norm": 1463.590576171875, "learning_rate": 3.6144152673312596e-05, "loss": 59.9706, "step": 70220 }, { "epoch": 0.2837380866768747, "grad_norm": 609.68896484375, "learning_rate": 3.6142504177862796e-05, "loss": 72.5341, "step": 70230 }, { "epoch": 0.28377848794224236, "grad_norm": 559.6856079101562, "learning_rate": 3.614085536770656e-05, "loss": 52.6119, "step": 70240 }, { "epoch": 0.28381888920761, "grad_norm": 695.3104248046875, "learning_rate": 3.613920624287604e-05, "loss": 63.0411, "step": 70250 }, { "epoch": 0.2838592904729776, "grad_norm": 281.5528259277344, "learning_rate": 3.613755680340339e-05, "loss": 71.9128, "step": 70260 }, { "epoch": 0.28389969173834523, "grad_norm": 812.212890625, "learning_rate": 3.6135907049320757e-05, "loss": 54.3615, "step": 70270 }, { "epoch": 0.28394009300371287, "grad_norm": 205.86573791503906, "learning_rate": 3.6134256980660306e-05, "loss": 54.9781, "step": 70280 }, { "epoch": 0.2839804942690805, "grad_norm": 707.3431396484375, "learning_rate": 3.613260659745422e-05, "loss": 90.469, "step": 70290 }, { "epoch": 0.28402089553444815, "grad_norm": 2262.424072265625, "learning_rate": 3.613095589973465e-05, "loss": 77.3084, "step": 70300 }, { "epoch": 0.2840612967998158, "grad_norm": 675.3787841796875, "learning_rate": 3.61293048875338e-05, "loss": 75.9326, "step": 70310 }, { "epoch": 0.28410169806518343, "grad_norm": 767.5772094726562, "learning_rate": 3.612765356088384e-05, "loss": 73.561, "step": 70320 }, { "epoch": 0.284142099330551, "grad_norm": 284.82177734375, "learning_rate": 3.612600191981697e-05, "loss": 56.5622, "step": 70330 }, { "epoch": 0.28418250059591865, "grad_norm": 402.4296569824219, "learning_rate": 3.61243499643654e-05, "loss": 92.6953, "step": 70340 }, { "epoch": 0.2842229018612863, "grad_norm": 1076.490234375, "learning_rate": 3.612269769456132e-05, "loss": 78.2423, "step": 70350 }, { "epoch": 0.28426330312665393, "grad_norm": 1266.93798828125, "learning_rate": 3.612104511043694e-05, "loss": 55.861, "step": 70360 }, { "epoch": 0.2843037043920216, "grad_norm": 1214.197265625, "learning_rate": 3.61193922120245e-05, "loss": 93.7413, "step": 70370 }, { "epoch": 0.2843441056573892, "grad_norm": 561.1460571289062, "learning_rate": 3.61177389993562e-05, "loss": 50.5075, "step": 70380 }, { "epoch": 0.2843845069227568, "grad_norm": 531.0656127929688, "learning_rate": 3.611608547246429e-05, "loss": 89.8019, "step": 70390 }, { "epoch": 0.28442490818812444, "grad_norm": 856.7954711914062, "learning_rate": 3.611443163138099e-05, "loss": 69.8518, "step": 70400 }, { "epoch": 0.2844653094534921, "grad_norm": 359.9229736328125, "learning_rate": 3.611277747613855e-05, "loss": 49.7166, "step": 70410 }, { "epoch": 0.2845057107188597, "grad_norm": 1801.592041015625, "learning_rate": 3.611112300676921e-05, "loss": 45.1849, "step": 70420 }, { "epoch": 0.28454611198422736, "grad_norm": 443.1506042480469, "learning_rate": 3.6109468223305244e-05, "loss": 68.0052, "step": 70430 }, { "epoch": 0.284586513249595, "grad_norm": 267.1731872558594, "learning_rate": 3.61078131257789e-05, "loss": 62.3781, "step": 70440 }, { "epoch": 0.2846269145149626, "grad_norm": 767.2887573242188, "learning_rate": 3.6106157714222436e-05, "loss": 56.1048, "step": 70450 }, { "epoch": 0.2846673157803302, "grad_norm": 774.2536010742188, "learning_rate": 3.610450198866815e-05, "loss": 95.8227, "step": 70460 }, { "epoch": 0.28470771704569786, "grad_norm": 1884.04443359375, "learning_rate": 3.61028459491483e-05, "loss": 70.254, "step": 70470 }, { "epoch": 0.2847481183110655, "grad_norm": 693.6580200195312, "learning_rate": 3.6101189595695174e-05, "loss": 52.211, "step": 70480 }, { "epoch": 0.28478851957643314, "grad_norm": 318.2597351074219, "learning_rate": 3.609953292834107e-05, "loss": 88.7317, "step": 70490 }, { "epoch": 0.2848289208418008, "grad_norm": 437.4230651855469, "learning_rate": 3.609787594711828e-05, "loss": 67.7461, "step": 70500 }, { "epoch": 0.2848693221071684, "grad_norm": 549.7716674804688, "learning_rate": 3.609621865205912e-05, "loss": 68.1587, "step": 70510 }, { "epoch": 0.284909723372536, "grad_norm": 830.4805297851562, "learning_rate": 3.609456104319589e-05, "loss": 90.8575, "step": 70520 }, { "epoch": 0.28495012463790365, "grad_norm": 321.4366149902344, "learning_rate": 3.609290312056089e-05, "loss": 80.5304, "step": 70530 }, { "epoch": 0.2849905259032713, "grad_norm": 1150.2452392578125, "learning_rate": 3.609124488418647e-05, "loss": 94.2768, "step": 70540 }, { "epoch": 0.2850309271686389, "grad_norm": 1228.3502197265625, "learning_rate": 3.608958633410495e-05, "loss": 102.8754, "step": 70550 }, { "epoch": 0.28507132843400657, "grad_norm": 1073.699951171875, "learning_rate": 3.608792747034866e-05, "loss": 67.9786, "step": 70560 }, { "epoch": 0.2851117296993742, "grad_norm": 853.513916015625, "learning_rate": 3.608626829294994e-05, "loss": 33.2278, "step": 70570 }, { "epoch": 0.2851521309647418, "grad_norm": 821.870361328125, "learning_rate": 3.608460880194113e-05, "loss": 55.8737, "step": 70580 }, { "epoch": 0.28519253223010943, "grad_norm": 634.192138671875, "learning_rate": 3.60829489973546e-05, "loss": 58.3211, "step": 70590 }, { "epoch": 0.28523293349547707, "grad_norm": 1230.9725341796875, "learning_rate": 3.6081288879222696e-05, "loss": 62.6132, "step": 70600 }, { "epoch": 0.2852733347608447, "grad_norm": 786.7669067382812, "learning_rate": 3.607962844757779e-05, "loss": 85.2633, "step": 70610 }, { "epoch": 0.28531373602621235, "grad_norm": 592.4337158203125, "learning_rate": 3.607796770245224e-05, "loss": 70.8817, "step": 70620 }, { "epoch": 0.28535413729158, "grad_norm": 1594.3375244140625, "learning_rate": 3.607630664387844e-05, "loss": 73.1753, "step": 70630 }, { "epoch": 0.28539453855694763, "grad_norm": 700.613037109375, "learning_rate": 3.6074645271888765e-05, "loss": 65.9696, "step": 70640 }, { "epoch": 0.2854349398223152, "grad_norm": 640.7323608398438, "learning_rate": 3.60729835865156e-05, "loss": 95.231, "step": 70650 }, { "epoch": 0.28547534108768285, "grad_norm": 749.009033203125, "learning_rate": 3.607132158779135e-05, "loss": 54.9461, "step": 70660 }, { "epoch": 0.2855157423530505, "grad_norm": 447.9740905761719, "learning_rate": 3.6069659275748404e-05, "loss": 70.5396, "step": 70670 }, { "epoch": 0.28555614361841813, "grad_norm": 1128.191162109375, "learning_rate": 3.606799665041918e-05, "loss": 60.2394, "step": 70680 }, { "epoch": 0.2855965448837858, "grad_norm": 796.18408203125, "learning_rate": 3.60663337118361e-05, "loss": 76.6932, "step": 70690 }, { "epoch": 0.2856369461491534, "grad_norm": 643.4987182617188, "learning_rate": 3.606467046003156e-05, "loss": 55.9977, "step": 70700 }, { "epoch": 0.285677347414521, "grad_norm": 1437.8402099609375, "learning_rate": 3.6063006895038004e-05, "loss": 71.8034, "step": 70710 }, { "epoch": 0.28571774867988864, "grad_norm": 445.0003662109375, "learning_rate": 3.6061343016887856e-05, "loss": 67.1427, "step": 70720 }, { "epoch": 0.2857581499452563, "grad_norm": 305.48779296875, "learning_rate": 3.605967882561356e-05, "loss": 52.1864, "step": 70730 }, { "epoch": 0.2857985512106239, "grad_norm": 667.2313232421875, "learning_rate": 3.6058014321247556e-05, "loss": 64.2683, "step": 70740 }, { "epoch": 0.28583895247599156, "grad_norm": 418.2080993652344, "learning_rate": 3.6056349503822295e-05, "loss": 73.9068, "step": 70750 }, { "epoch": 0.2858793537413592, "grad_norm": 7662.9677734375, "learning_rate": 3.605468437337023e-05, "loss": 93.897, "step": 70760 }, { "epoch": 0.2859197550067268, "grad_norm": 824.5432739257812, "learning_rate": 3.605301892992383e-05, "loss": 82.0532, "step": 70770 }, { "epoch": 0.2859601562720944, "grad_norm": 396.4519958496094, "learning_rate": 3.605135317351556e-05, "loss": 143.2694, "step": 70780 }, { "epoch": 0.28600055753746206, "grad_norm": 595.1510009765625, "learning_rate": 3.604968710417791e-05, "loss": 77.5454, "step": 70790 }, { "epoch": 0.2860409588028297, "grad_norm": 560.7315673828125, "learning_rate": 3.604802072194334e-05, "loss": 54.1081, "step": 70800 }, { "epoch": 0.28608136006819734, "grad_norm": 514.366455078125, "learning_rate": 3.604635402684434e-05, "loss": 88.6278, "step": 70810 }, { "epoch": 0.286121761333565, "grad_norm": 1537.122802734375, "learning_rate": 3.60446870189134e-05, "loss": 77.5263, "step": 70820 }, { "epoch": 0.2861621625989326, "grad_norm": 556.6965942382812, "learning_rate": 3.604301969818304e-05, "loss": 63.4476, "step": 70830 }, { "epoch": 0.2862025638643002, "grad_norm": 1061.7496337890625, "learning_rate": 3.604135206468574e-05, "loss": 55.0528, "step": 70840 }, { "epoch": 0.28624296512966785, "grad_norm": 916.4915771484375, "learning_rate": 3.603968411845402e-05, "loss": 64.2837, "step": 70850 }, { "epoch": 0.2862833663950355, "grad_norm": 970.0208740234375, "learning_rate": 3.603801585952042e-05, "loss": 88.3489, "step": 70860 }, { "epoch": 0.28632376766040313, "grad_norm": 469.9666442871094, "learning_rate": 3.603634728791743e-05, "loss": 60.465, "step": 70870 }, { "epoch": 0.28636416892577077, "grad_norm": 417.34771728515625, "learning_rate": 3.6034678403677595e-05, "loss": 43.7073, "step": 70880 }, { "epoch": 0.2864045701911384, "grad_norm": 715.1119384765625, "learning_rate": 3.603300920683345e-05, "loss": 57.8118, "step": 70890 }, { "epoch": 0.286444971456506, "grad_norm": 1027.1759033203125, "learning_rate": 3.6031339697417535e-05, "loss": 77.0941, "step": 70900 }, { "epoch": 0.28648537272187363, "grad_norm": 6179.7158203125, "learning_rate": 3.60296698754624e-05, "loss": 66.7193, "step": 70910 }, { "epoch": 0.2865257739872413, "grad_norm": 1350.91259765625, "learning_rate": 3.6027999741000596e-05, "loss": 64.565, "step": 70920 }, { "epoch": 0.2865661752526089, "grad_norm": 1316.241455078125, "learning_rate": 3.602632929406469e-05, "loss": 70.2296, "step": 70930 }, { "epoch": 0.28660657651797655, "grad_norm": 605.462646484375, "learning_rate": 3.602465853468724e-05, "loss": 81.5859, "step": 70940 }, { "epoch": 0.2866469777833442, "grad_norm": 964.7286987304688, "learning_rate": 3.6022987462900824e-05, "loss": 66.2818, "step": 70950 }, { "epoch": 0.28668737904871183, "grad_norm": 455.8790588378906, "learning_rate": 3.602131607873801e-05, "loss": 97.039, "step": 70960 }, { "epoch": 0.2867277803140794, "grad_norm": 1274.9256591796875, "learning_rate": 3.601964438223141e-05, "loss": 85.475, "step": 70970 }, { "epoch": 0.28676818157944706, "grad_norm": 2069.627685546875, "learning_rate": 3.601797237341358e-05, "loss": 103.252, "step": 70980 }, { "epoch": 0.2868085828448147, "grad_norm": 718.9536743164062, "learning_rate": 3.6016300052317135e-05, "loss": 67.6074, "step": 70990 }, { "epoch": 0.28684898411018234, "grad_norm": 755.44873046875, "learning_rate": 3.601462741897467e-05, "loss": 55.8639, "step": 71000 }, { "epoch": 0.28688938537555, "grad_norm": 585.0748291015625, "learning_rate": 3.6012954473418806e-05, "loss": 74.2655, "step": 71010 }, { "epoch": 0.2869297866409176, "grad_norm": 611.1162109375, "learning_rate": 3.601128121568215e-05, "loss": 72.3695, "step": 71020 }, { "epoch": 0.2869701879062852, "grad_norm": 527.9596557617188, "learning_rate": 3.600960764579732e-05, "loss": 55.1402, "step": 71030 }, { "epoch": 0.28701058917165284, "grad_norm": 466.97265625, "learning_rate": 3.6007933763796946e-05, "loss": 48.7595, "step": 71040 }, { "epoch": 0.2870509904370205, "grad_norm": 872.8701782226562, "learning_rate": 3.6006259569713665e-05, "loss": 68.9266, "step": 71050 }, { "epoch": 0.2870913917023881, "grad_norm": 1329.21044921875, "learning_rate": 3.600458506358011e-05, "loss": 60.3758, "step": 71060 }, { "epoch": 0.28713179296775576, "grad_norm": 789.2830810546875, "learning_rate": 3.6002910245428936e-05, "loss": 46.2742, "step": 71070 }, { "epoch": 0.2871721942331234, "grad_norm": 1676.2977294921875, "learning_rate": 3.6001235115292784e-05, "loss": 60.3875, "step": 71080 }, { "epoch": 0.287212595498491, "grad_norm": 543.3878173828125, "learning_rate": 3.599955967320432e-05, "loss": 45.1755, "step": 71090 }, { "epoch": 0.2872529967638586, "grad_norm": 2713.2490234375, "learning_rate": 3.5997883919196193e-05, "loss": 80.3693, "step": 71100 }, { "epoch": 0.28729339802922627, "grad_norm": 212.3776092529297, "learning_rate": 3.5996207853301084e-05, "loss": 58.0897, "step": 71110 }, { "epoch": 0.2873337992945939, "grad_norm": 613.86767578125, "learning_rate": 3.599453147555167e-05, "loss": 40.5792, "step": 71120 }, { "epoch": 0.28737420055996155, "grad_norm": 752.4508056640625, "learning_rate": 3.5992854785980635e-05, "loss": 111.4546, "step": 71130 }, { "epoch": 0.2874146018253292, "grad_norm": 739.1114501953125, "learning_rate": 3.599117778462066e-05, "loss": 78.3358, "step": 71140 }, { "epoch": 0.2874550030906968, "grad_norm": 1881.143798828125, "learning_rate": 3.598950047150444e-05, "loss": 114.0669, "step": 71150 }, { "epoch": 0.2874954043560644, "grad_norm": 781.6092529296875, "learning_rate": 3.598782284666467e-05, "loss": 91.2981, "step": 71160 }, { "epoch": 0.28753580562143205, "grad_norm": 635.6123657226562, "learning_rate": 3.5986144910134074e-05, "loss": 44.6738, "step": 71170 }, { "epoch": 0.2875762068867997, "grad_norm": 714.689208984375, "learning_rate": 3.5984466661945346e-05, "loss": 94.8069, "step": 71180 }, { "epoch": 0.28761660815216733, "grad_norm": 1838.5892333984375, "learning_rate": 3.598278810213121e-05, "loss": 102.966, "step": 71190 }, { "epoch": 0.28765700941753497, "grad_norm": 723.8616333007812, "learning_rate": 3.59811092307244e-05, "loss": 69.6785, "step": 71200 }, { "epoch": 0.2876974106829026, "grad_norm": 1040.573974609375, "learning_rate": 3.5979430047757634e-05, "loss": 61.6997, "step": 71210 }, { "epoch": 0.2877378119482702, "grad_norm": 1204.720458984375, "learning_rate": 3.597775055326365e-05, "loss": 59.2142, "step": 71220 }, { "epoch": 0.28777821321363783, "grad_norm": 1090.214599609375, "learning_rate": 3.5976070747275194e-05, "loss": 52.0799, "step": 71230 }, { "epoch": 0.2878186144790055, "grad_norm": 318.8838806152344, "learning_rate": 3.597439062982501e-05, "loss": 75.3177, "step": 71240 }, { "epoch": 0.2878590157443731, "grad_norm": 2042.7188720703125, "learning_rate": 3.597271020094586e-05, "loss": 96.1377, "step": 71250 }, { "epoch": 0.28789941700974075, "grad_norm": 280.46185302734375, "learning_rate": 3.59710294606705e-05, "loss": 79.229, "step": 71260 }, { "epoch": 0.2879398182751084, "grad_norm": 601.186767578125, "learning_rate": 3.59693484090317e-05, "loss": 96.3455, "step": 71270 }, { "epoch": 0.28798021954047603, "grad_norm": 658.820556640625, "learning_rate": 3.596766704606223e-05, "loss": 102.5423, "step": 71280 }, { "epoch": 0.2880206208058436, "grad_norm": 1202.2777099609375, "learning_rate": 3.596598537179487e-05, "loss": 68.3238, "step": 71290 }, { "epoch": 0.28806102207121126, "grad_norm": 434.55828857421875, "learning_rate": 3.596430338626241e-05, "loss": 63.3015, "step": 71300 }, { "epoch": 0.2881014233365789, "grad_norm": 895.1974487304688, "learning_rate": 3.5962621089497634e-05, "loss": 60.5648, "step": 71310 }, { "epoch": 0.28814182460194654, "grad_norm": 458.9768371582031, "learning_rate": 3.596093848153334e-05, "loss": 59.3895, "step": 71320 }, { "epoch": 0.2881822258673142, "grad_norm": 903.0233764648438, "learning_rate": 3.595925556240233e-05, "loss": 72.1123, "step": 71330 }, { "epoch": 0.2882226271326818, "grad_norm": 1241.1566162109375, "learning_rate": 3.595757233213742e-05, "loss": 60.014, "step": 71340 }, { "epoch": 0.2882630283980494, "grad_norm": 665.7273559570312, "learning_rate": 3.5955888790771426e-05, "loss": 72.169, "step": 71350 }, { "epoch": 0.28830342966341704, "grad_norm": 780.5765991210938, "learning_rate": 3.5954204938337156e-05, "loss": 70.9737, "step": 71360 }, { "epoch": 0.2883438309287847, "grad_norm": 769.46728515625, "learning_rate": 3.5952520774867454e-05, "loss": 66.6491, "step": 71370 }, { "epoch": 0.2883842321941523, "grad_norm": 876.2539672851562, "learning_rate": 3.5950836300395146e-05, "loss": 54.9499, "step": 71380 }, { "epoch": 0.28842463345951996, "grad_norm": 894.6849365234375, "learning_rate": 3.5949151514953074e-05, "loss": 63.0425, "step": 71390 }, { "epoch": 0.2884650347248876, "grad_norm": 847.6256103515625, "learning_rate": 3.594746641857407e-05, "loss": 73.8627, "step": 71400 }, { "epoch": 0.2885054359902552, "grad_norm": 959.1056518554688, "learning_rate": 3.594578101129101e-05, "loss": 91.9577, "step": 71410 }, { "epoch": 0.28854583725562283, "grad_norm": 566.6630859375, "learning_rate": 3.594409529313674e-05, "loss": 65.078, "step": 71420 }, { "epoch": 0.28858623852099047, "grad_norm": 769.6134033203125, "learning_rate": 3.594240926414412e-05, "loss": 71.833, "step": 71430 }, { "epoch": 0.2886266397863581, "grad_norm": 825.9446411132812, "learning_rate": 3.594072292434602e-05, "loss": 58.1939, "step": 71440 }, { "epoch": 0.28866704105172575, "grad_norm": 1975.863525390625, "learning_rate": 3.593903627377533e-05, "loss": 84.32, "step": 71450 }, { "epoch": 0.2887074423170934, "grad_norm": 729.3330078125, "learning_rate": 3.593734931246491e-05, "loss": 46.2589, "step": 71460 }, { "epoch": 0.28874784358246103, "grad_norm": 824.732666015625, "learning_rate": 3.593566204044767e-05, "loss": 55.354, "step": 71470 }, { "epoch": 0.2887882448478286, "grad_norm": 739.683349609375, "learning_rate": 3.5933974457756494e-05, "loss": 67.6329, "step": 71480 }, { "epoch": 0.28882864611319625, "grad_norm": 685.531005859375, "learning_rate": 3.593228656442427e-05, "loss": 73.9774, "step": 71490 }, { "epoch": 0.2888690473785639, "grad_norm": 1414.84814453125, "learning_rate": 3.5930598360483926e-05, "loss": 58.4536, "step": 71500 }, { "epoch": 0.28890944864393153, "grad_norm": 992.2027587890625, "learning_rate": 3.592890984596837e-05, "loss": 60.3957, "step": 71510 }, { "epoch": 0.2889498499092992, "grad_norm": 1208.000244140625, "learning_rate": 3.592722102091051e-05, "loss": 64.489, "step": 71520 }, { "epoch": 0.2889902511746668, "grad_norm": 1417.686279296875, "learning_rate": 3.592553188534328e-05, "loss": 82.0553, "step": 71530 }, { "epoch": 0.2890306524400344, "grad_norm": 432.866943359375, "learning_rate": 3.59238424392996e-05, "loss": 50.9258, "step": 71540 }, { "epoch": 0.28907105370540204, "grad_norm": 982.0166625976562, "learning_rate": 3.592215268281242e-05, "loss": 55.183, "step": 71550 }, { "epoch": 0.2891114549707697, "grad_norm": 462.43292236328125, "learning_rate": 3.592046261591467e-05, "loss": 75.8433, "step": 71560 }, { "epoch": 0.2891518562361373, "grad_norm": 1216.3463134765625, "learning_rate": 3.5918772238639304e-05, "loss": 77.9336, "step": 71570 }, { "epoch": 0.28919225750150496, "grad_norm": 2263.478271484375, "learning_rate": 3.591708155101928e-05, "loss": 67.5811, "step": 71580 }, { "epoch": 0.2892326587668726, "grad_norm": 998.69873046875, "learning_rate": 3.591539055308756e-05, "loss": 72.0668, "step": 71590 }, { "epoch": 0.28927306003224024, "grad_norm": 782.7774047851562, "learning_rate": 3.591369924487711e-05, "loss": 84.08, "step": 71600 }, { "epoch": 0.2893134612976078, "grad_norm": 746.9514770507812, "learning_rate": 3.59120076264209e-05, "loss": 39.7181, "step": 71610 }, { "epoch": 0.28935386256297546, "grad_norm": 480.704345703125, "learning_rate": 3.59103156977519e-05, "loss": 75.6742, "step": 71620 }, { "epoch": 0.2893942638283431, "grad_norm": 823.4539794921875, "learning_rate": 3.590862345890311e-05, "loss": 79.9501, "step": 71630 }, { "epoch": 0.28943466509371074, "grad_norm": 3223.1875, "learning_rate": 3.590693090990752e-05, "loss": 100.8133, "step": 71640 }, { "epoch": 0.2894750663590784, "grad_norm": 270.4209289550781, "learning_rate": 3.590523805079812e-05, "loss": 52.6623, "step": 71650 }, { "epoch": 0.289515467624446, "grad_norm": 896.4325561523438, "learning_rate": 3.590354488160791e-05, "loss": 81.3142, "step": 71660 }, { "epoch": 0.2895558688898136, "grad_norm": 295.3815612792969, "learning_rate": 3.5901851402369905e-05, "loss": 71.8267, "step": 71670 }, { "epoch": 0.28959627015518125, "grad_norm": 982.9450073242188, "learning_rate": 3.590015761311713e-05, "loss": 64.7104, "step": 71680 }, { "epoch": 0.2896366714205489, "grad_norm": 1250.133056640625, "learning_rate": 3.5898463513882584e-05, "loss": 105.7186, "step": 71690 }, { "epoch": 0.2896770726859165, "grad_norm": 321.7370300292969, "learning_rate": 3.589676910469932e-05, "loss": 54.8599, "step": 71700 }, { "epoch": 0.28971747395128417, "grad_norm": 935.1970825195312, "learning_rate": 3.589507438560034e-05, "loss": 88.217, "step": 71710 }, { "epoch": 0.2897578752166518, "grad_norm": 1932.10693359375, "learning_rate": 3.589337935661871e-05, "loss": 97.3057, "step": 71720 }, { "epoch": 0.2897982764820194, "grad_norm": 1101.5948486328125, "learning_rate": 3.589168401778747e-05, "loss": 85.077, "step": 71730 }, { "epoch": 0.28983867774738703, "grad_norm": 1101.748779296875, "learning_rate": 3.588998836913967e-05, "loss": 87.2077, "step": 71740 }, { "epoch": 0.28987907901275467, "grad_norm": 1247.614013671875, "learning_rate": 3.5888292410708364e-05, "loss": 65.8963, "step": 71750 }, { "epoch": 0.2899194802781223, "grad_norm": 644.5755615234375, "learning_rate": 3.588659614252662e-05, "loss": 48.0149, "step": 71760 }, { "epoch": 0.28995988154348995, "grad_norm": 538.0592041015625, "learning_rate": 3.5884899564627504e-05, "loss": 56.984, "step": 71770 }, { "epoch": 0.2900002828088576, "grad_norm": 988.62646484375, "learning_rate": 3.588320267704409e-05, "loss": 77.014, "step": 71780 }, { "epoch": 0.29004068407422523, "grad_norm": 681.0162353515625, "learning_rate": 3.588150547980946e-05, "loss": 63.0402, "step": 71790 }, { "epoch": 0.2900810853395928, "grad_norm": 1037.8740234375, "learning_rate": 3.587980797295671e-05, "loss": 67.6628, "step": 71800 }, { "epoch": 0.29012148660496045, "grad_norm": 2027.07568359375, "learning_rate": 3.587811015651893e-05, "loss": 45.8344, "step": 71810 }, { "epoch": 0.2901618878703281, "grad_norm": 728.1589965820312, "learning_rate": 3.587641203052921e-05, "loss": 73.745, "step": 71820 }, { "epoch": 0.29020228913569573, "grad_norm": 612.0015869140625, "learning_rate": 3.587471359502066e-05, "loss": 57.5115, "step": 71830 }, { "epoch": 0.2902426904010634, "grad_norm": 1017.7273559570312, "learning_rate": 3.587301485002641e-05, "loss": 64.8747, "step": 71840 }, { "epoch": 0.290283091666431, "grad_norm": 1304.318115234375, "learning_rate": 3.587131579557956e-05, "loss": 83.5981, "step": 71850 }, { "epoch": 0.2903234929317986, "grad_norm": 724.4077758789062, "learning_rate": 3.5869616431713235e-05, "loss": 55.5681, "step": 71860 }, { "epoch": 0.29036389419716624, "grad_norm": 571.9274291992188, "learning_rate": 3.586791675846057e-05, "loss": 89.1574, "step": 71870 }, { "epoch": 0.2904042954625339, "grad_norm": 321.2984313964844, "learning_rate": 3.586621677585469e-05, "loss": 70.9063, "step": 71880 }, { "epoch": 0.2904446967279015, "grad_norm": 348.6775207519531, "learning_rate": 3.586451648392875e-05, "loss": 45.3349, "step": 71890 }, { "epoch": 0.29048509799326916, "grad_norm": 463.2661437988281, "learning_rate": 3.58628158827159e-05, "loss": 70.9898, "step": 71900 }, { "epoch": 0.2905254992586368, "grad_norm": 370.6292419433594, "learning_rate": 3.586111497224928e-05, "loss": 57.8408, "step": 71910 }, { "epoch": 0.29056590052400444, "grad_norm": 907.023193359375, "learning_rate": 3.585941375256206e-05, "loss": 81.2636, "step": 71920 }, { "epoch": 0.290606301789372, "grad_norm": 1051.9798583984375, "learning_rate": 3.58577122236874e-05, "loss": 66.2477, "step": 71930 }, { "epoch": 0.29064670305473966, "grad_norm": 763.0676879882812, "learning_rate": 3.585601038565848e-05, "loss": 62.0955, "step": 71940 }, { "epoch": 0.2906871043201073, "grad_norm": 473.68792724609375, "learning_rate": 3.585430823850847e-05, "loss": 83.7084, "step": 71950 }, { "epoch": 0.29072750558547494, "grad_norm": 742.2813110351562, "learning_rate": 3.5852605782270566e-05, "loss": 66.4683, "step": 71960 }, { "epoch": 0.2907679068508426, "grad_norm": 1462.05517578125, "learning_rate": 3.585090301697795e-05, "loss": 97.0444, "step": 71970 }, { "epoch": 0.2908083081162102, "grad_norm": 553.8416137695312, "learning_rate": 3.584919994266382e-05, "loss": 47.8547, "step": 71980 }, { "epoch": 0.2908487093815778, "grad_norm": 1666.500244140625, "learning_rate": 3.584749655936137e-05, "loss": 105.8823, "step": 71990 }, { "epoch": 0.29088911064694545, "grad_norm": 1440.423095703125, "learning_rate": 3.584579286710382e-05, "loss": 91.9585, "step": 72000 }, { "epoch": 0.2909295119123131, "grad_norm": 500.3939514160156, "learning_rate": 3.5844088865924376e-05, "loss": 55.625, "step": 72010 }, { "epoch": 0.29096991317768073, "grad_norm": 654.8592529296875, "learning_rate": 3.584238455585626e-05, "loss": 69.709, "step": 72020 }, { "epoch": 0.29101031444304837, "grad_norm": 395.8098449707031, "learning_rate": 3.5840679936932714e-05, "loss": 49.6149, "step": 72030 }, { "epoch": 0.291050715708416, "grad_norm": 667.9511108398438, "learning_rate": 3.5838975009186945e-05, "loss": 99.7107, "step": 72040 }, { "epoch": 0.2910911169737836, "grad_norm": 964.6243896484375, "learning_rate": 3.583726977265222e-05, "loss": 92.4027, "step": 72050 }, { "epoch": 0.29113151823915123, "grad_norm": 1799.1485595703125, "learning_rate": 3.583556422736175e-05, "loss": 123.6818, "step": 72060 }, { "epoch": 0.2911719195045189, "grad_norm": 562.0403442382812, "learning_rate": 3.583385837334881e-05, "loss": 37.064, "step": 72070 }, { "epoch": 0.2912123207698865, "grad_norm": 562.592529296875, "learning_rate": 3.5832152210646646e-05, "loss": 59.6503, "step": 72080 }, { "epoch": 0.29125272203525415, "grad_norm": 1167.1383056640625, "learning_rate": 3.583044573928853e-05, "loss": 77.4025, "step": 72090 }, { "epoch": 0.2912931233006218, "grad_norm": 2110.247802734375, "learning_rate": 3.5828738959307715e-05, "loss": 67.5149, "step": 72100 }, { "epoch": 0.29133352456598943, "grad_norm": 680.3276977539062, "learning_rate": 3.582703187073749e-05, "loss": 60.1068, "step": 72110 }, { "epoch": 0.291373925831357, "grad_norm": 495.9038391113281, "learning_rate": 3.582532447361114e-05, "loss": 64.1526, "step": 72120 }, { "epoch": 0.29141432709672466, "grad_norm": 968.1295166015625, "learning_rate": 3.5823616767961924e-05, "loss": 58.9892, "step": 72130 }, { "epoch": 0.2914547283620923, "grad_norm": 337.22491455078125, "learning_rate": 3.5821908753823163e-05, "loss": 54.9471, "step": 72140 }, { "epoch": 0.29149512962745994, "grad_norm": 1134.8824462890625, "learning_rate": 3.582020043122814e-05, "loss": 100.2467, "step": 72150 }, { "epoch": 0.2915355308928276, "grad_norm": 1638.614013671875, "learning_rate": 3.581849180021017e-05, "loss": 74.3465, "step": 72160 }, { "epoch": 0.2915759321581952, "grad_norm": 1058.03515625, "learning_rate": 3.581678286080256e-05, "loss": 90.0985, "step": 72170 }, { "epoch": 0.2916163334235628, "grad_norm": 329.9512634277344, "learning_rate": 3.581507361303862e-05, "loss": 48.0506, "step": 72180 }, { "epoch": 0.29165673468893044, "grad_norm": 653.0698852539062, "learning_rate": 3.5813364056951676e-05, "loss": 92.9112, "step": 72190 }, { "epoch": 0.2916971359542981, "grad_norm": 934.0549926757812, "learning_rate": 3.5811654192575064e-05, "loss": 50.3939, "step": 72200 }, { "epoch": 0.2917375372196657, "grad_norm": 562.1233520507812, "learning_rate": 3.580994401994211e-05, "loss": 82.1733, "step": 72210 }, { "epoch": 0.29177793848503336, "grad_norm": 485.84515380859375, "learning_rate": 3.5808233539086155e-05, "loss": 49.1954, "step": 72220 }, { "epoch": 0.291818339750401, "grad_norm": 1089.2716064453125, "learning_rate": 3.580652275004055e-05, "loss": 60.4214, "step": 72230 }, { "epoch": 0.29185874101576864, "grad_norm": 1303.6624755859375, "learning_rate": 3.580481165283865e-05, "loss": 101.3685, "step": 72240 }, { "epoch": 0.2918991422811362, "grad_norm": 951.2037963867188, "learning_rate": 3.580310024751381e-05, "loss": 82.3328, "step": 72250 }, { "epoch": 0.29193954354650387, "grad_norm": 659.6070556640625, "learning_rate": 3.5801388534099396e-05, "loss": 85.988, "step": 72260 }, { "epoch": 0.2919799448118715, "grad_norm": 524.1322631835938, "learning_rate": 3.5799676512628775e-05, "loss": 49.4976, "step": 72270 }, { "epoch": 0.29202034607723915, "grad_norm": 814.81591796875, "learning_rate": 3.579796418313532e-05, "loss": 51.3425, "step": 72280 }, { "epoch": 0.2920607473426068, "grad_norm": 313.1018371582031, "learning_rate": 3.5796251545652425e-05, "loss": 86.0607, "step": 72290 }, { "epoch": 0.2921011486079744, "grad_norm": 528.6152954101562, "learning_rate": 3.579453860021348e-05, "loss": 61.0077, "step": 72300 }, { "epoch": 0.292141549873342, "grad_norm": 1399.322998046875, "learning_rate": 3.579282534685186e-05, "loss": 99.063, "step": 72310 }, { "epoch": 0.29218195113870965, "grad_norm": 2749.59130859375, "learning_rate": 3.579111178560099e-05, "loss": 66.6921, "step": 72320 }, { "epoch": 0.2922223524040773, "grad_norm": 759.787841796875, "learning_rate": 3.578939791649426e-05, "loss": 102.9235, "step": 72330 }, { "epoch": 0.29226275366944493, "grad_norm": 730.5199584960938, "learning_rate": 3.5787683739565096e-05, "loss": 55.3452, "step": 72340 }, { "epoch": 0.29230315493481257, "grad_norm": 707.774658203125, "learning_rate": 3.57859692548469e-05, "loss": 81.0743, "step": 72350 }, { "epoch": 0.2923435562001802, "grad_norm": 1520.0377197265625, "learning_rate": 3.578425446237312e-05, "loss": 78.8926, "step": 72360 }, { "epoch": 0.2923839574655478, "grad_norm": 1314.706298828125, "learning_rate": 3.578253936217716e-05, "loss": 84.5639, "step": 72370 }, { "epoch": 0.29242435873091543, "grad_norm": 315.541748046875, "learning_rate": 3.578082395429247e-05, "loss": 62.6152, "step": 72380 }, { "epoch": 0.2924647599962831, "grad_norm": 801.6674194335938, "learning_rate": 3.57791082387525e-05, "loss": 47.1351, "step": 72390 }, { "epoch": 0.2925051612616507, "grad_norm": 1115.9136962890625, "learning_rate": 3.577739221559069e-05, "loss": 61.5287, "step": 72400 }, { "epoch": 0.29254556252701835, "grad_norm": 1236.024658203125, "learning_rate": 3.577567588484049e-05, "loss": 62.6104, "step": 72410 }, { "epoch": 0.292585963792386, "grad_norm": 508.2314147949219, "learning_rate": 3.577395924653537e-05, "loss": 53.6619, "step": 72420 }, { "epoch": 0.29262636505775363, "grad_norm": 645.4722290039062, "learning_rate": 3.577224230070879e-05, "loss": 78.5961, "step": 72430 }, { "epoch": 0.2926667663231212, "grad_norm": 1093.773681640625, "learning_rate": 3.577052504739423e-05, "loss": 81.6924, "step": 72440 }, { "epoch": 0.29270716758848886, "grad_norm": 799.0197143554688, "learning_rate": 3.5768807486625166e-05, "loss": 76.3117, "step": 72450 }, { "epoch": 0.2927475688538565, "grad_norm": 462.7156677246094, "learning_rate": 3.576708961843508e-05, "loss": 64.3802, "step": 72460 }, { "epoch": 0.29278797011922414, "grad_norm": 687.6199951171875, "learning_rate": 3.576537144285747e-05, "loss": 62.2381, "step": 72470 }, { "epoch": 0.2928283713845918, "grad_norm": 793.7186889648438, "learning_rate": 3.576365295992582e-05, "loss": 73.4337, "step": 72480 }, { "epoch": 0.2928687726499594, "grad_norm": 704.6602783203125, "learning_rate": 3.576193416967364e-05, "loss": 79.5488, "step": 72490 }, { "epoch": 0.292909173915327, "grad_norm": 447.6377258300781, "learning_rate": 3.576021507213444e-05, "loss": 55.2884, "step": 72500 }, { "epoch": 0.29294957518069464, "grad_norm": 1093.624267578125, "learning_rate": 3.575849566734174e-05, "loss": 55.568, "step": 72510 }, { "epoch": 0.2929899764460623, "grad_norm": 863.1414184570312, "learning_rate": 3.5756775955329045e-05, "loss": 52.4301, "step": 72520 }, { "epoch": 0.2930303777114299, "grad_norm": 726.0432739257812, "learning_rate": 3.575505593612989e-05, "loss": 42.6705, "step": 72530 }, { "epoch": 0.29307077897679756, "grad_norm": 426.0909423828125, "learning_rate": 3.575333560977782e-05, "loss": 57.5002, "step": 72540 }, { "epoch": 0.2931111802421652, "grad_norm": 985.9451293945312, "learning_rate": 3.5751614976306347e-05, "loss": 71.6082, "step": 72550 }, { "epoch": 0.29315158150753284, "grad_norm": 810.1474609375, "learning_rate": 3.574989403574904e-05, "loss": 103.1282, "step": 72560 }, { "epoch": 0.29319198277290043, "grad_norm": 467.8982849121094, "learning_rate": 3.574817278813943e-05, "loss": 78.4023, "step": 72570 }, { "epoch": 0.29323238403826807, "grad_norm": 615.1262817382812, "learning_rate": 3.574645123351109e-05, "loss": 33.2686, "step": 72580 }, { "epoch": 0.2932727853036357, "grad_norm": 2113.16845703125, "learning_rate": 3.5744729371897584e-05, "loss": 77.6977, "step": 72590 }, { "epoch": 0.29331318656900335, "grad_norm": 1216.817626953125, "learning_rate": 3.574300720333247e-05, "loss": 56.1763, "step": 72600 }, { "epoch": 0.293353587834371, "grad_norm": 1282.3033447265625, "learning_rate": 3.574128472784932e-05, "loss": 49.9721, "step": 72610 }, { "epoch": 0.29339398909973863, "grad_norm": 502.74530029296875, "learning_rate": 3.573956194548172e-05, "loss": 43.6453, "step": 72620 }, { "epoch": 0.2934343903651062, "grad_norm": 842.286865234375, "learning_rate": 3.573783885626326e-05, "loss": 75.6466, "step": 72630 }, { "epoch": 0.29347479163047385, "grad_norm": 757.489501953125, "learning_rate": 3.573611546022753e-05, "loss": 100.9482, "step": 72640 }, { "epoch": 0.2935151928958415, "grad_norm": 525.7161254882812, "learning_rate": 3.5734391757408123e-05, "loss": 100.7201, "step": 72650 }, { "epoch": 0.29355559416120913, "grad_norm": 556.4071655273438, "learning_rate": 3.5732667747838654e-05, "loss": 55.8495, "step": 72660 }, { "epoch": 0.2935959954265768, "grad_norm": 629.6777954101562, "learning_rate": 3.573094343155272e-05, "loss": 68.8656, "step": 72670 }, { "epoch": 0.2936363966919444, "grad_norm": 315.14923095703125, "learning_rate": 3.572921880858395e-05, "loss": 58.5177, "step": 72680 }, { "epoch": 0.293676797957312, "grad_norm": 844.389892578125, "learning_rate": 3.572749387896596e-05, "loss": 58.0804, "step": 72690 }, { "epoch": 0.29371719922267964, "grad_norm": 829.093017578125, "learning_rate": 3.572576864273238e-05, "loss": 57.9544, "step": 72700 }, { "epoch": 0.2937576004880473, "grad_norm": 807.382568359375, "learning_rate": 3.572404309991685e-05, "loss": 81.5726, "step": 72710 }, { "epoch": 0.2937980017534149, "grad_norm": 1963.383544921875, "learning_rate": 3.5722317250553e-05, "loss": 170.714, "step": 72720 }, { "epoch": 0.29383840301878256, "grad_norm": 692.9369506835938, "learning_rate": 3.572059109467447e-05, "loss": 80.3831, "step": 72730 }, { "epoch": 0.2938788042841502, "grad_norm": 729.177490234375, "learning_rate": 3.5718864632314936e-05, "loss": 66.1752, "step": 72740 }, { "epoch": 0.29391920554951784, "grad_norm": 2613.15380859375, "learning_rate": 3.571713786350804e-05, "loss": 134.1205, "step": 72750 }, { "epoch": 0.2939596068148854, "grad_norm": 909.3247680664062, "learning_rate": 3.571541078828745e-05, "loss": 71.8222, "step": 72760 }, { "epoch": 0.29400000808025306, "grad_norm": 649.9159545898438, "learning_rate": 3.5713683406686834e-05, "loss": 76.7148, "step": 72770 }, { "epoch": 0.2940404093456207, "grad_norm": 413.2486267089844, "learning_rate": 3.571195571873988e-05, "loss": 67.5009, "step": 72780 }, { "epoch": 0.29408081061098834, "grad_norm": 471.48583984375, "learning_rate": 3.571022772448024e-05, "loss": 72.5633, "step": 72790 }, { "epoch": 0.294121211876356, "grad_norm": 337.2598571777344, "learning_rate": 3.570849942394164e-05, "loss": 84.3592, "step": 72800 }, { "epoch": 0.2941616131417236, "grad_norm": 1427.0047607421875, "learning_rate": 3.570677081715775e-05, "loss": 65.8118, "step": 72810 }, { "epoch": 0.2942020144070912, "grad_norm": 1616.63134765625, "learning_rate": 3.5705041904162274e-05, "loss": 63.8992, "step": 72820 }, { "epoch": 0.29424241567245885, "grad_norm": 4020.565185546875, "learning_rate": 3.5703312684988924e-05, "loss": 103.3089, "step": 72830 }, { "epoch": 0.2942828169378265, "grad_norm": 737.0076293945312, "learning_rate": 3.570158315967141e-05, "loss": 75.0354, "step": 72840 }, { "epoch": 0.2943232182031941, "grad_norm": 407.1966857910156, "learning_rate": 3.569985332824345e-05, "loss": 46.7118, "step": 72850 }, { "epoch": 0.29436361946856177, "grad_norm": 1618.404052734375, "learning_rate": 3.569812319073876e-05, "loss": 109.4477, "step": 72860 }, { "epoch": 0.2944040207339294, "grad_norm": 726.337646484375, "learning_rate": 3.5696392747191084e-05, "loss": 52.4097, "step": 72870 }, { "epoch": 0.29444442199929705, "grad_norm": 1107.2373046875, "learning_rate": 3.569466199763414e-05, "loss": 68.6211, "step": 72880 }, { "epoch": 0.29448482326466463, "grad_norm": 1022.0324096679688, "learning_rate": 3.569293094210169e-05, "loss": 51.532, "step": 72890 }, { "epoch": 0.29452522453003227, "grad_norm": 289.0499267578125, "learning_rate": 3.569119958062747e-05, "loss": 48.7533, "step": 72900 }, { "epoch": 0.2945656257953999, "grad_norm": 1667.3704833984375, "learning_rate": 3.568946791324523e-05, "loss": 89.5318, "step": 72910 }, { "epoch": 0.29460602706076755, "grad_norm": 422.2791442871094, "learning_rate": 3.5687735939988745e-05, "loss": 93.2784, "step": 72920 }, { "epoch": 0.2946464283261352, "grad_norm": 894.854248046875, "learning_rate": 3.5686003660891763e-05, "loss": 69.9056, "step": 72930 }, { "epoch": 0.29468682959150283, "grad_norm": 435.1180114746094, "learning_rate": 3.568427107598807e-05, "loss": 43.495, "step": 72940 }, { "epoch": 0.2947272308568704, "grad_norm": 742.6530151367188, "learning_rate": 3.568253818531143e-05, "loss": 79.4282, "step": 72950 }, { "epoch": 0.29476763212223805, "grad_norm": 2197.233642578125, "learning_rate": 3.568080498889564e-05, "loss": 79.8292, "step": 72960 }, { "epoch": 0.2948080333876057, "grad_norm": 1193.3448486328125, "learning_rate": 3.567907148677448e-05, "loss": 95.2717, "step": 72970 }, { "epoch": 0.29484843465297333, "grad_norm": 1066.093994140625, "learning_rate": 3.567733767898176e-05, "loss": 74.4719, "step": 72980 }, { "epoch": 0.294888835918341, "grad_norm": 525.0219116210938, "learning_rate": 3.567560356555126e-05, "loss": 49.5146, "step": 72990 }, { "epoch": 0.2949292371837086, "grad_norm": 1088.7265625, "learning_rate": 3.56738691465168e-05, "loss": 55.829, "step": 73000 }, { "epoch": 0.2949696384490762, "grad_norm": 931.0994873046875, "learning_rate": 3.5672134421912186e-05, "loss": 82.1505, "step": 73010 }, { "epoch": 0.29501003971444384, "grad_norm": 481.8676452636719, "learning_rate": 3.567039939177125e-05, "loss": 72.3018, "step": 73020 }, { "epoch": 0.2950504409798115, "grad_norm": 1044.1207275390625, "learning_rate": 3.566866405612781e-05, "loss": 63.6777, "step": 73030 }, { "epoch": 0.2950908422451791, "grad_norm": 945.3455200195312, "learning_rate": 3.56669284150157e-05, "loss": 65.7787, "step": 73040 }, { "epoch": 0.29513124351054676, "grad_norm": 391.0890197753906, "learning_rate": 3.566519246846875e-05, "loss": 93.8222, "step": 73050 }, { "epoch": 0.2951716447759144, "grad_norm": 319.558349609375, "learning_rate": 3.566345621652081e-05, "loss": 42.0089, "step": 73060 }, { "epoch": 0.29521204604128204, "grad_norm": 0.0, "learning_rate": 3.566171965920573e-05, "loss": 72.8675, "step": 73070 }, { "epoch": 0.2952524473066496, "grad_norm": 1246.403076171875, "learning_rate": 3.565998279655736e-05, "loss": 85.1066, "step": 73080 }, { "epoch": 0.29529284857201726, "grad_norm": 484.09002685546875, "learning_rate": 3.565824562860956e-05, "loss": 68.7531, "step": 73090 }, { "epoch": 0.2953332498373849, "grad_norm": 709.6483764648438, "learning_rate": 3.56565081553962e-05, "loss": 70.2082, "step": 73100 }, { "epoch": 0.29537365110275254, "grad_norm": 500.7955322265625, "learning_rate": 3.565477037695116e-05, "loss": 61.7862, "step": 73110 }, { "epoch": 0.2954140523681202, "grad_norm": 970.36572265625, "learning_rate": 3.56530322933083e-05, "loss": 64.6293, "step": 73120 }, { "epoch": 0.2954544536334878, "grad_norm": 875.14306640625, "learning_rate": 3.565129390450152e-05, "loss": 79.7622, "step": 73130 }, { "epoch": 0.2954948548988554, "grad_norm": 2398.982421875, "learning_rate": 3.564955521056471e-05, "loss": 75.0798, "step": 73140 }, { "epoch": 0.29553525616422305, "grad_norm": 559.6068115234375, "learning_rate": 3.5647816211531765e-05, "loss": 52.8689, "step": 73150 }, { "epoch": 0.2955756574295907, "grad_norm": 573.7216186523438, "learning_rate": 3.5646076907436586e-05, "loss": 73.3646, "step": 73160 }, { "epoch": 0.29561605869495833, "grad_norm": 761.6354370117188, "learning_rate": 3.5644337298313086e-05, "loss": 83.2933, "step": 73170 }, { "epoch": 0.29565645996032597, "grad_norm": 475.8253173828125, "learning_rate": 3.5642597384195166e-05, "loss": 110.5668, "step": 73180 }, { "epoch": 0.2956968612256936, "grad_norm": 823.4723510742188, "learning_rate": 3.564085716511677e-05, "loss": 52.4007, "step": 73190 }, { "epoch": 0.2957372624910612, "grad_norm": 2419.842041015625, "learning_rate": 3.5639116641111804e-05, "loss": 80.5333, "step": 73200 }, { "epoch": 0.29577766375642883, "grad_norm": 746.0689697265625, "learning_rate": 3.563737581221421e-05, "loss": 77.6496, "step": 73210 }, { "epoch": 0.2958180650217965, "grad_norm": 850.6876831054688, "learning_rate": 3.563563467845792e-05, "loss": 56.607, "step": 73220 }, { "epoch": 0.2958584662871641, "grad_norm": 854.9337158203125, "learning_rate": 3.563389323987688e-05, "loss": 106.5573, "step": 73230 }, { "epoch": 0.29589886755253175, "grad_norm": 1438.9986572265625, "learning_rate": 3.563215149650505e-05, "loss": 101.9418, "step": 73240 }, { "epoch": 0.2959392688178994, "grad_norm": 720.9024047851562, "learning_rate": 3.563040944837638e-05, "loss": 83.7277, "step": 73250 }, { "epoch": 0.29597967008326703, "grad_norm": 1647.9295654296875, "learning_rate": 3.562866709552483e-05, "loss": 56.4929, "step": 73260 }, { "epoch": 0.2960200713486346, "grad_norm": 614.6683349609375, "learning_rate": 3.562692443798436e-05, "loss": 53.4081, "step": 73270 }, { "epoch": 0.29606047261400226, "grad_norm": 1394.190185546875, "learning_rate": 3.562518147578896e-05, "loss": 84.1321, "step": 73280 }, { "epoch": 0.2961008738793699, "grad_norm": 955.5640869140625, "learning_rate": 3.56234382089726e-05, "loss": 70.5042, "step": 73290 }, { "epoch": 0.29614127514473754, "grad_norm": 0.0, "learning_rate": 3.5621694637569263e-05, "loss": 47.5671, "step": 73300 }, { "epoch": 0.2961816764101052, "grad_norm": 470.16400146484375, "learning_rate": 3.561995076161296e-05, "loss": 62.6391, "step": 73310 }, { "epoch": 0.2962220776754728, "grad_norm": 819.5801391601562, "learning_rate": 3.5618206581137664e-05, "loss": 45.0165, "step": 73320 }, { "epoch": 0.2962624789408404, "grad_norm": 590.9551391601562, "learning_rate": 3.5616462096177396e-05, "loss": 55.2823, "step": 73330 }, { "epoch": 0.29630288020620804, "grad_norm": 958.3380737304688, "learning_rate": 3.561471730676616e-05, "loss": 100.6481, "step": 73340 }, { "epoch": 0.2963432814715757, "grad_norm": 460.4815368652344, "learning_rate": 3.561297221293797e-05, "loss": 37.1382, "step": 73350 }, { "epoch": 0.2963836827369433, "grad_norm": 706.3472290039062, "learning_rate": 3.561122681472684e-05, "loss": 65.2587, "step": 73360 }, { "epoch": 0.29642408400231096, "grad_norm": 876.3798217773438, "learning_rate": 3.560948111216682e-05, "loss": 63.1272, "step": 73370 }, { "epoch": 0.2964644852676786, "grad_norm": 758.551025390625, "learning_rate": 3.560773510529192e-05, "loss": 101.7248, "step": 73380 }, { "epoch": 0.29650488653304624, "grad_norm": 1736.4576416015625, "learning_rate": 3.560598879413619e-05, "loss": 83.9242, "step": 73390 }, { "epoch": 0.2965452877984138, "grad_norm": 453.7065734863281, "learning_rate": 3.560424217873368e-05, "loss": 54.8189, "step": 73400 }, { "epoch": 0.29658568906378147, "grad_norm": 1004.9888305664062, "learning_rate": 3.560249525911842e-05, "loss": 65.9622, "step": 73410 }, { "epoch": 0.2966260903291491, "grad_norm": 704.4619140625, "learning_rate": 3.56007480353245e-05, "loss": 53.3604, "step": 73420 }, { "epoch": 0.29666649159451675, "grad_norm": 544.6813354492188, "learning_rate": 3.559900050738596e-05, "loss": 57.0338, "step": 73430 }, { "epoch": 0.2967068928598844, "grad_norm": 809.4789428710938, "learning_rate": 3.559725267533686e-05, "loss": 63.4728, "step": 73440 }, { "epoch": 0.296747294125252, "grad_norm": 1127.5126953125, "learning_rate": 3.559550453921131e-05, "loss": 60.555, "step": 73450 }, { "epoch": 0.2967876953906196, "grad_norm": 1059.709228515625, "learning_rate": 3.559375609904336e-05, "loss": 77.2083, "step": 73460 }, { "epoch": 0.29682809665598725, "grad_norm": 526.6636352539062, "learning_rate": 3.559200735486711e-05, "loss": 50.3187, "step": 73470 }, { "epoch": 0.2968684979213549, "grad_norm": 937.5421752929688, "learning_rate": 3.559025830671664e-05, "loss": 70.7462, "step": 73480 }, { "epoch": 0.29690889918672253, "grad_norm": 1234.875, "learning_rate": 3.558850895462607e-05, "loss": 78.2883, "step": 73490 }, { "epoch": 0.29694930045209017, "grad_norm": 407.6966857910156, "learning_rate": 3.5586759298629486e-05, "loss": 40.0754, "step": 73500 }, { "epoch": 0.2969897017174578, "grad_norm": 1737.535888671875, "learning_rate": 3.5585009338761005e-05, "loss": 64.8734, "step": 73510 }, { "epoch": 0.2970301029828254, "grad_norm": 335.2982482910156, "learning_rate": 3.5583259075054746e-05, "loss": 58.7745, "step": 73520 }, { "epoch": 0.29707050424819303, "grad_norm": 642.6002197265625, "learning_rate": 3.5581508507544825e-05, "loss": 99.0578, "step": 73530 }, { "epoch": 0.2971109055135607, "grad_norm": 107.51019287109375, "learning_rate": 3.5579757636265377e-05, "loss": 55.8075, "step": 73540 }, { "epoch": 0.2971513067789283, "grad_norm": 650.3108520507812, "learning_rate": 3.557800646125053e-05, "loss": 83.543, "step": 73550 }, { "epoch": 0.29719170804429595, "grad_norm": 650.6173706054688, "learning_rate": 3.557625498253443e-05, "loss": 61.2145, "step": 73560 }, { "epoch": 0.2972321093096636, "grad_norm": 1853.13037109375, "learning_rate": 3.5574503200151213e-05, "loss": 101.2655, "step": 73570 }, { "epoch": 0.29727251057503123, "grad_norm": 846.0449829101562, "learning_rate": 3.557275111413505e-05, "loss": 104.6277, "step": 73580 }, { "epoch": 0.2973129118403988, "grad_norm": 472.6047058105469, "learning_rate": 3.557099872452008e-05, "loss": 83.1568, "step": 73590 }, { "epoch": 0.29735331310576646, "grad_norm": 655.8888549804688, "learning_rate": 3.5569246031340474e-05, "loss": 81.285, "step": 73600 }, { "epoch": 0.2973937143711341, "grad_norm": 833.2941284179688, "learning_rate": 3.5567493034630395e-05, "loss": 88.6944, "step": 73610 }, { "epoch": 0.29743411563650174, "grad_norm": 969.6983642578125, "learning_rate": 3.5565739734424034e-05, "loss": 67.491, "step": 73620 }, { "epoch": 0.2974745169018694, "grad_norm": 482.8932189941406, "learning_rate": 3.5563986130755557e-05, "loss": 52.3097, "step": 73630 }, { "epoch": 0.297514918167237, "grad_norm": 594.0313110351562, "learning_rate": 3.556223222365916e-05, "loss": 60.7423, "step": 73640 }, { "epoch": 0.2975553194326046, "grad_norm": 1269.3704833984375, "learning_rate": 3.556047801316903e-05, "loss": 86.5832, "step": 73650 }, { "epoch": 0.29759572069797224, "grad_norm": 527.30615234375, "learning_rate": 3.555872349931938e-05, "loss": 80.0871, "step": 73660 }, { "epoch": 0.2976361219633399, "grad_norm": 1384.492431640625, "learning_rate": 3.5556968682144395e-05, "loss": 56.7632, "step": 73670 }, { "epoch": 0.2976765232287075, "grad_norm": 686.9051513671875, "learning_rate": 3.5555213561678305e-05, "loss": 59.716, "step": 73680 }, { "epoch": 0.29771692449407516, "grad_norm": 965.8775634765625, "learning_rate": 3.555345813795531e-05, "loss": 86.5317, "step": 73690 }, { "epoch": 0.2977573257594428, "grad_norm": 502.6318054199219, "learning_rate": 3.5551702411009645e-05, "loss": 40.2029, "step": 73700 }, { "epoch": 0.29779772702481044, "grad_norm": 1723.965576171875, "learning_rate": 3.5549946380875536e-05, "loss": 66.3792, "step": 73710 }, { "epoch": 0.29783812829017803, "grad_norm": 859.0875854492188, "learning_rate": 3.554819004758721e-05, "loss": 62.8533, "step": 73720 }, { "epoch": 0.29787852955554567, "grad_norm": 994.7597045898438, "learning_rate": 3.554643341117892e-05, "loss": 69.4177, "step": 73730 }, { "epoch": 0.2979189308209133, "grad_norm": 1833.2977294921875, "learning_rate": 3.5544676471684906e-05, "loss": 108.2813, "step": 73740 }, { "epoch": 0.29795933208628095, "grad_norm": 513.6215209960938, "learning_rate": 3.554291922913942e-05, "loss": 64.1931, "step": 73750 }, { "epoch": 0.2979997333516486, "grad_norm": 458.6168212890625, "learning_rate": 3.554116168357673e-05, "loss": 49.7643, "step": 73760 }, { "epoch": 0.29804013461701623, "grad_norm": 634.9912109375, "learning_rate": 3.5539403835031075e-05, "loss": 47.9218, "step": 73770 }, { "epoch": 0.2980805358823838, "grad_norm": 1256.944091796875, "learning_rate": 3.553764568353676e-05, "loss": 66.0002, "step": 73780 }, { "epoch": 0.29812093714775145, "grad_norm": 776.3186645507812, "learning_rate": 3.553588722912803e-05, "loss": 76.021, "step": 73790 }, { "epoch": 0.2981613384131191, "grad_norm": 755.112548828125, "learning_rate": 3.553412847183919e-05, "loss": 75.2325, "step": 73800 }, { "epoch": 0.29820173967848673, "grad_norm": 896.2506103515625, "learning_rate": 3.5532369411704505e-05, "loss": 55.7502, "step": 73810 }, { "epoch": 0.2982421409438544, "grad_norm": 754.2654418945312, "learning_rate": 3.5530610048758295e-05, "loss": 71.5779, "step": 73820 }, { "epoch": 0.298282542209222, "grad_norm": 678.224365234375, "learning_rate": 3.552885038303484e-05, "loss": 45.6851, "step": 73830 }, { "epoch": 0.2983229434745896, "grad_norm": 925.0071411132812, "learning_rate": 3.552709041456845e-05, "loss": 54.6353, "step": 73840 }, { "epoch": 0.29836334473995724, "grad_norm": 1991.972412109375, "learning_rate": 3.552533014339344e-05, "loss": 86.6209, "step": 73850 }, { "epoch": 0.2984037460053249, "grad_norm": 2509.668212890625, "learning_rate": 3.552356956954413e-05, "loss": 122.7409, "step": 73860 }, { "epoch": 0.2984441472706925, "grad_norm": 898.805908203125, "learning_rate": 3.552180869305483e-05, "loss": 57.9041, "step": 73870 }, { "epoch": 0.29848454853606016, "grad_norm": 789.3969116210938, "learning_rate": 3.552004751395989e-05, "loss": 55.5541, "step": 73880 }, { "epoch": 0.2985249498014278, "grad_norm": 1287.8016357421875, "learning_rate": 3.551828603229363e-05, "loss": 67.9635, "step": 73890 }, { "epoch": 0.29856535106679544, "grad_norm": 2758.351806640625, "learning_rate": 3.551652424809039e-05, "loss": 65.6716, "step": 73900 }, { "epoch": 0.298605752332163, "grad_norm": 676.35595703125, "learning_rate": 3.551476216138453e-05, "loss": 53.9801, "step": 73910 }, { "epoch": 0.29864615359753066, "grad_norm": 1682.7109375, "learning_rate": 3.551299977221038e-05, "loss": 105.8357, "step": 73920 }, { "epoch": 0.2986865548628983, "grad_norm": 485.17279052734375, "learning_rate": 3.551123708060233e-05, "loss": 46.7262, "step": 73930 }, { "epoch": 0.29872695612826594, "grad_norm": 1494.2843017578125, "learning_rate": 3.550947408659471e-05, "loss": 113.5479, "step": 73940 }, { "epoch": 0.2987673573936336, "grad_norm": 495.2820129394531, "learning_rate": 3.550771079022192e-05, "loss": 50.9028, "step": 73950 }, { "epoch": 0.2988077586590012, "grad_norm": 1544.240234375, "learning_rate": 3.550594719151832e-05, "loss": 56.2776, "step": 73960 }, { "epoch": 0.2988481599243688, "grad_norm": 451.206787109375, "learning_rate": 3.550418329051829e-05, "loss": 71.9325, "step": 73970 }, { "epoch": 0.29888856118973645, "grad_norm": 442.68548583984375, "learning_rate": 3.550241908725624e-05, "loss": 63.0581, "step": 73980 }, { "epoch": 0.2989289624551041, "grad_norm": 519.7935180664062, "learning_rate": 3.550065458176653e-05, "loss": 51.5123, "step": 73990 }, { "epoch": 0.2989693637204717, "grad_norm": 497.5556945800781, "learning_rate": 3.549888977408359e-05, "loss": 61.3014, "step": 74000 }, { "epoch": 0.29900976498583937, "grad_norm": 700.3739013671875, "learning_rate": 3.5497124664241816e-05, "loss": 81.6645, "step": 74010 }, { "epoch": 0.299050166251207, "grad_norm": 497.64599609375, "learning_rate": 3.549535925227562e-05, "loss": 48.5412, "step": 74020 }, { "epoch": 0.29909056751657465, "grad_norm": 499.0395812988281, "learning_rate": 3.549359353821941e-05, "loss": 59.0803, "step": 74030 }, { "epoch": 0.29913096878194223, "grad_norm": 1075.3427734375, "learning_rate": 3.5491827522107624e-05, "loss": 67.4793, "step": 74040 }, { "epoch": 0.29917137004730987, "grad_norm": 627.8690185546875, "learning_rate": 3.5490061203974676e-05, "loss": 69.5395, "step": 74050 }, { "epoch": 0.2992117713126775, "grad_norm": 1320.747314453125, "learning_rate": 3.548829458385502e-05, "loss": 81.1121, "step": 74060 }, { "epoch": 0.29925217257804515, "grad_norm": 751.7114868164062, "learning_rate": 3.548652766178308e-05, "loss": 49.1731, "step": 74070 }, { "epoch": 0.2992925738434128, "grad_norm": 974.225830078125, "learning_rate": 3.5484760437793316e-05, "loss": 79.8373, "step": 74080 }, { "epoch": 0.29933297510878043, "grad_norm": 2224.293701171875, "learning_rate": 3.5482992911920174e-05, "loss": 65.8389, "step": 74090 }, { "epoch": 0.299373376374148, "grad_norm": 1174.2479248046875, "learning_rate": 3.548122508419811e-05, "loss": 108.1642, "step": 74100 }, { "epoch": 0.29941377763951565, "grad_norm": 264.214599609375, "learning_rate": 3.547945695466159e-05, "loss": 74.0446, "step": 74110 }, { "epoch": 0.2994541789048833, "grad_norm": 1260.7275390625, "learning_rate": 3.5477688523345095e-05, "loss": 120.0823, "step": 74120 }, { "epoch": 0.29949458017025093, "grad_norm": 886.28515625, "learning_rate": 3.547591979028309e-05, "loss": 79.4073, "step": 74130 }, { "epoch": 0.2995349814356186, "grad_norm": 1063.769775390625, "learning_rate": 3.5474150755510065e-05, "loss": 60.8101, "step": 74140 }, { "epoch": 0.2995753827009862, "grad_norm": 761.662109375, "learning_rate": 3.54723814190605e-05, "loss": 67.1386, "step": 74150 }, { "epoch": 0.2996157839663538, "grad_norm": 901.34521484375, "learning_rate": 3.54706117809689e-05, "loss": 73.7574, "step": 74160 }, { "epoch": 0.29965618523172144, "grad_norm": 1164.415771484375, "learning_rate": 3.546884184126975e-05, "loss": 84.1045, "step": 74170 }, { "epoch": 0.2996965864970891, "grad_norm": 808.2579956054688, "learning_rate": 3.546707159999756e-05, "loss": 77.337, "step": 74180 }, { "epoch": 0.2997369877624567, "grad_norm": 238.32205200195312, "learning_rate": 3.5465301057186864e-05, "loss": 38.9205, "step": 74190 }, { "epoch": 0.29977738902782436, "grad_norm": 651.8353881835938, "learning_rate": 3.5463530212872145e-05, "loss": 55.3188, "step": 74200 }, { "epoch": 0.299817790293192, "grad_norm": 323.20458984375, "learning_rate": 3.546175906708795e-05, "loss": 42.7316, "step": 74210 }, { "epoch": 0.29985819155855964, "grad_norm": 469.6799011230469, "learning_rate": 3.545998761986881e-05, "loss": 59.3492, "step": 74220 }, { "epoch": 0.2998985928239272, "grad_norm": 2659.387451171875, "learning_rate": 3.545821587124924e-05, "loss": 56.3482, "step": 74230 }, { "epoch": 0.29993899408929486, "grad_norm": 893.9676513671875, "learning_rate": 3.54564438212638e-05, "loss": 67.2324, "step": 74240 }, { "epoch": 0.2999793953546625, "grad_norm": 499.2391662597656, "learning_rate": 3.5454671469947024e-05, "loss": 50.6774, "step": 74250 }, { "epoch": 0.30001979662003014, "grad_norm": 738.5967407226562, "learning_rate": 3.5452898817333474e-05, "loss": 82.5717, "step": 74260 }, { "epoch": 0.3000601978853978, "grad_norm": 384.5932922363281, "learning_rate": 3.545112586345771e-05, "loss": 54.1004, "step": 74270 }, { "epoch": 0.3001005991507654, "grad_norm": 1085.3819580078125, "learning_rate": 3.544935260835429e-05, "loss": 77.678, "step": 74280 }, { "epoch": 0.300141000416133, "grad_norm": 436.23541259765625, "learning_rate": 3.5447579052057776e-05, "loss": 60.4012, "step": 74290 }, { "epoch": 0.30018140168150065, "grad_norm": 505.67169189453125, "learning_rate": 3.544580519460277e-05, "loss": 70.9453, "step": 74300 }, { "epoch": 0.3002218029468683, "grad_norm": 1916.5804443359375, "learning_rate": 3.5444031036023837e-05, "loss": 81.2167, "step": 74310 }, { "epoch": 0.30026220421223593, "grad_norm": 835.12158203125, "learning_rate": 3.5442256576355564e-05, "loss": 80.3109, "step": 74320 }, { "epoch": 0.30030260547760357, "grad_norm": 746.8428344726562, "learning_rate": 3.544048181563255e-05, "loss": 123.7258, "step": 74330 }, { "epoch": 0.3003430067429712, "grad_norm": 900.0025024414062, "learning_rate": 3.5438706753889396e-05, "loss": 80.3383, "step": 74340 }, { "epoch": 0.30038340800833885, "grad_norm": 634.4719848632812, "learning_rate": 3.54369313911607e-05, "loss": 80.1079, "step": 74350 }, { "epoch": 0.30042380927370643, "grad_norm": 1878.78466796875, "learning_rate": 3.543515572748108e-05, "loss": 66.6467, "step": 74360 }, { "epoch": 0.3004642105390741, "grad_norm": 749.2390747070312, "learning_rate": 3.5433379762885165e-05, "loss": 82.4085, "step": 74370 }, { "epoch": 0.3005046118044417, "grad_norm": 3661.9140625, "learning_rate": 3.543160349740755e-05, "loss": 69.3461, "step": 74380 }, { "epoch": 0.30054501306980935, "grad_norm": 276.2242736816406, "learning_rate": 3.542982693108289e-05, "loss": 74.4839, "step": 74390 }, { "epoch": 0.300585414335177, "grad_norm": 2024.406005859375, "learning_rate": 3.542805006394581e-05, "loss": 75.6991, "step": 74400 }, { "epoch": 0.30062581560054463, "grad_norm": 1021.9595336914062, "learning_rate": 3.5426272896030944e-05, "loss": 77.9724, "step": 74410 }, { "epoch": 0.3006662168659122, "grad_norm": 353.10577392578125, "learning_rate": 3.5424495427372946e-05, "loss": 74.5174, "step": 74420 }, { "epoch": 0.30070661813127986, "grad_norm": 814.4290161132812, "learning_rate": 3.5422717658006475e-05, "loss": 77.0783, "step": 74430 }, { "epoch": 0.3007470193966475, "grad_norm": 1428.374267578125, "learning_rate": 3.542093958796618e-05, "loss": 71.8454, "step": 74440 }, { "epoch": 0.30078742066201514, "grad_norm": 358.1316833496094, "learning_rate": 3.541916121728673e-05, "loss": 41.539, "step": 74450 }, { "epoch": 0.3008278219273828, "grad_norm": 610.126220703125, "learning_rate": 3.541738254600279e-05, "loss": 58.5676, "step": 74460 }, { "epoch": 0.3008682231927504, "grad_norm": 570.5245971679688, "learning_rate": 3.541560357414904e-05, "loss": 58.6159, "step": 74470 }, { "epoch": 0.300908624458118, "grad_norm": 795.262939453125, "learning_rate": 3.5413824301760165e-05, "loss": 82.1075, "step": 74480 }, { "epoch": 0.30094902572348564, "grad_norm": 949.2442016601562, "learning_rate": 3.541204472887085e-05, "loss": 64.7017, "step": 74490 }, { "epoch": 0.3009894269888533, "grad_norm": 506.1993713378906, "learning_rate": 3.541026485551579e-05, "loss": 52.474, "step": 74500 }, { "epoch": 0.3010298282542209, "grad_norm": 966.4857788085938, "learning_rate": 3.540848468172968e-05, "loss": 90.3593, "step": 74510 }, { "epoch": 0.30107022951958856, "grad_norm": 864.8375854492188, "learning_rate": 3.540670420754722e-05, "loss": 69.907, "step": 74520 }, { "epoch": 0.3011106307849562, "grad_norm": 426.7903137207031, "learning_rate": 3.540492343300314e-05, "loss": 96.3889, "step": 74530 }, { "epoch": 0.30115103205032384, "grad_norm": 573.3229370117188, "learning_rate": 3.540314235813215e-05, "loss": 66.8149, "step": 74540 }, { "epoch": 0.3011914333156914, "grad_norm": 583.5394897460938, "learning_rate": 3.540136098296896e-05, "loss": 66.5317, "step": 74550 }, { "epoch": 0.30123183458105907, "grad_norm": 1171.0955810546875, "learning_rate": 3.5399579307548314e-05, "loss": 74.4664, "step": 74560 }, { "epoch": 0.3012722358464267, "grad_norm": 771.04345703125, "learning_rate": 3.539779733190494e-05, "loss": 59.7181, "step": 74570 }, { "epoch": 0.30131263711179435, "grad_norm": 1087.257080078125, "learning_rate": 3.539601505607358e-05, "loss": 61.8407, "step": 74580 }, { "epoch": 0.301353038377162, "grad_norm": 571.8591918945312, "learning_rate": 3.5394232480088986e-05, "loss": 61.0519, "step": 74590 }, { "epoch": 0.3013934396425296, "grad_norm": 1148.0616455078125, "learning_rate": 3.5392449603985894e-05, "loss": 84.9272, "step": 74600 }, { "epoch": 0.3014338409078972, "grad_norm": 669.8700561523438, "learning_rate": 3.539066642779907e-05, "loss": 72.5787, "step": 74610 }, { "epoch": 0.30147424217326485, "grad_norm": 533.347900390625, "learning_rate": 3.538888295156329e-05, "loss": 61.7806, "step": 74620 }, { "epoch": 0.3015146434386325, "grad_norm": 1645.3468017578125, "learning_rate": 3.538709917531331e-05, "loss": 59.6045, "step": 74630 }, { "epoch": 0.30155504470400013, "grad_norm": 1048.553955078125, "learning_rate": 3.538531509908391e-05, "loss": 67.4957, "step": 74640 }, { "epoch": 0.30159544596936777, "grad_norm": 629.9532470703125, "learning_rate": 3.538353072290988e-05, "loss": 75.4748, "step": 74650 }, { "epoch": 0.3016358472347354, "grad_norm": 561.3018798828125, "learning_rate": 3.538174604682599e-05, "loss": 63.2835, "step": 74660 }, { "epoch": 0.30167624850010305, "grad_norm": 690.0833129882812, "learning_rate": 3.537996107086704e-05, "loss": 86.1682, "step": 74670 }, { "epoch": 0.30171664976547063, "grad_norm": 634.8307495117188, "learning_rate": 3.537817579506783e-05, "loss": 65.4851, "step": 74680 }, { "epoch": 0.3017570510308383, "grad_norm": 1294.4716796875, "learning_rate": 3.537639021946317e-05, "loss": 77.3135, "step": 74690 }, { "epoch": 0.3017974522962059, "grad_norm": 1165.074462890625, "learning_rate": 3.5374604344087866e-05, "loss": 58.4666, "step": 74700 }, { "epoch": 0.30183785356157355, "grad_norm": 833.4439086914062, "learning_rate": 3.5372818168976734e-05, "loss": 47.627, "step": 74710 }, { "epoch": 0.3018782548269412, "grad_norm": 1249.32763671875, "learning_rate": 3.53710316941646e-05, "loss": 62.6458, "step": 74720 }, { "epoch": 0.30191865609230883, "grad_norm": 466.3185119628906, "learning_rate": 3.5369244919686284e-05, "loss": 62.2887, "step": 74730 }, { "epoch": 0.3019590573576764, "grad_norm": 1040.11669921875, "learning_rate": 3.536745784557663e-05, "loss": 66.7786, "step": 74740 }, { "epoch": 0.30199945862304406, "grad_norm": 980.9957885742188, "learning_rate": 3.536567047187047e-05, "loss": 68.8723, "step": 74750 }, { "epoch": 0.3020398598884117, "grad_norm": 927.2369384765625, "learning_rate": 3.536388279860266e-05, "loss": 69.8665, "step": 74760 }, { "epoch": 0.30208026115377934, "grad_norm": 902.037353515625, "learning_rate": 3.536209482580804e-05, "loss": 92.5639, "step": 74770 }, { "epoch": 0.302120662419147, "grad_norm": 831.7162475585938, "learning_rate": 3.536030655352147e-05, "loss": 67.2394, "step": 74780 }, { "epoch": 0.3021610636845146, "grad_norm": 381.0028381347656, "learning_rate": 3.535851798177782e-05, "loss": 58.4229, "step": 74790 }, { "epoch": 0.3022014649498822, "grad_norm": 1212.741455078125, "learning_rate": 3.535672911061196e-05, "loss": 55.6143, "step": 74800 }, { "epoch": 0.30224186621524984, "grad_norm": 400.9530944824219, "learning_rate": 3.535493994005874e-05, "loss": 55.2473, "step": 74810 }, { "epoch": 0.3022822674806175, "grad_norm": 631.3129272460938, "learning_rate": 3.535315047015308e-05, "loss": 49.2248, "step": 74820 }, { "epoch": 0.3023226687459851, "grad_norm": 1094.7774658203125, "learning_rate": 3.535136070092984e-05, "loss": 80.757, "step": 74830 }, { "epoch": 0.30236307001135276, "grad_norm": 753.2235107421875, "learning_rate": 3.5349570632423925e-05, "loss": 80.3722, "step": 74840 }, { "epoch": 0.3024034712767204, "grad_norm": 490.4667053222656, "learning_rate": 3.534778026467022e-05, "loss": 67.9342, "step": 74850 }, { "epoch": 0.30244387254208804, "grad_norm": 1227.3990478515625, "learning_rate": 3.534598959770364e-05, "loss": 112.9435, "step": 74860 }, { "epoch": 0.30248427380745563, "grad_norm": 414.38543701171875, "learning_rate": 3.5344198631559096e-05, "loss": 60.2385, "step": 74870 }, { "epoch": 0.30252467507282327, "grad_norm": 1657.522216796875, "learning_rate": 3.5342407366271495e-05, "loss": 54.7299, "step": 74880 }, { "epoch": 0.3025650763381909, "grad_norm": 1603.0582275390625, "learning_rate": 3.534061580187577e-05, "loss": 79.5543, "step": 74890 }, { "epoch": 0.30260547760355855, "grad_norm": 674.0008544921875, "learning_rate": 3.5338823938406834e-05, "loss": 56.2018, "step": 74900 }, { "epoch": 0.3026458788689262, "grad_norm": 1061.078125, "learning_rate": 3.533703177589964e-05, "loss": 65.559, "step": 74910 }, { "epoch": 0.30268628013429383, "grad_norm": 466.67864990234375, "learning_rate": 3.53352393143891e-05, "loss": 54.7194, "step": 74920 }, { "epoch": 0.3027266813996614, "grad_norm": 419.76861572265625, "learning_rate": 3.5333446553910184e-05, "loss": 71.4791, "step": 74930 }, { "epoch": 0.30276708266502905, "grad_norm": 1148.4244384765625, "learning_rate": 3.533165349449783e-05, "loss": 107.1067, "step": 74940 }, { "epoch": 0.3028074839303967, "grad_norm": 1184.2965087890625, "learning_rate": 3.5329860136187e-05, "loss": 63.7784, "step": 74950 }, { "epoch": 0.30284788519576433, "grad_norm": 552.2979125976562, "learning_rate": 3.5328066479012655e-05, "loss": 69.7092, "step": 74960 }, { "epoch": 0.302888286461132, "grad_norm": 405.5337829589844, "learning_rate": 3.5326272523009754e-05, "loss": 60.7039, "step": 74970 }, { "epoch": 0.3029286877264996, "grad_norm": 963.63330078125, "learning_rate": 3.532447826821329e-05, "loss": 44.4223, "step": 74980 }, { "epoch": 0.30296908899186725, "grad_norm": 574.0205078125, "learning_rate": 3.532268371465823e-05, "loss": 66.2384, "step": 74990 }, { "epoch": 0.30300949025723484, "grad_norm": 607.9288940429688, "learning_rate": 3.532088886237956e-05, "loss": 53.3745, "step": 75000 }, { "epoch": 0.3030498915226025, "grad_norm": 1956.13623046875, "learning_rate": 3.531909371141228e-05, "loss": 109.1442, "step": 75010 }, { "epoch": 0.3030902927879701, "grad_norm": 1171.8389892578125, "learning_rate": 3.531729826179138e-05, "loss": 62.4617, "step": 75020 }, { "epoch": 0.30313069405333776, "grad_norm": 1014.7931518554688, "learning_rate": 3.531550251355186e-05, "loss": 58.9204, "step": 75030 }, { "epoch": 0.3031710953187054, "grad_norm": 1173.664306640625, "learning_rate": 3.531370646672874e-05, "loss": 59.5735, "step": 75040 }, { "epoch": 0.30321149658407304, "grad_norm": 913.2959594726562, "learning_rate": 3.5311910121357016e-05, "loss": 62.6237, "step": 75050 }, { "epoch": 0.3032518978494406, "grad_norm": 637.90185546875, "learning_rate": 3.531011347747173e-05, "loss": 68.8141, "step": 75060 }, { "epoch": 0.30329229911480826, "grad_norm": 1701.0555419921875, "learning_rate": 3.53083165351079e-05, "loss": 122.4263, "step": 75070 }, { "epoch": 0.3033327003801759, "grad_norm": 656.2587280273438, "learning_rate": 3.530651929430055e-05, "loss": 64.5342, "step": 75080 }, { "epoch": 0.30337310164554354, "grad_norm": 884.7515869140625, "learning_rate": 3.5304721755084734e-05, "loss": 45.98, "step": 75090 }, { "epoch": 0.3034135029109112, "grad_norm": 628.9338989257812, "learning_rate": 3.530292391749549e-05, "loss": 63.3537, "step": 75100 }, { "epoch": 0.3034539041762788, "grad_norm": 512.0625, "learning_rate": 3.530112578156786e-05, "loss": 79.4432, "step": 75110 }, { "epoch": 0.3034943054416464, "grad_norm": 695.7696533203125, "learning_rate": 3.529932734733691e-05, "loss": 82.5149, "step": 75120 }, { "epoch": 0.30353470670701405, "grad_norm": 704.0012817382812, "learning_rate": 3.529752861483769e-05, "loss": 77.3432, "step": 75130 }, { "epoch": 0.3035751079723817, "grad_norm": 930.8369750976562, "learning_rate": 3.529572958410528e-05, "loss": 43.9781, "step": 75140 }, { "epoch": 0.3036155092377493, "grad_norm": 1269.88720703125, "learning_rate": 3.529393025517475e-05, "loss": 72.7654, "step": 75150 }, { "epoch": 0.30365591050311697, "grad_norm": 739.17578125, "learning_rate": 3.529213062808116e-05, "loss": 43.866, "step": 75160 }, { "epoch": 0.3036963117684846, "grad_norm": 1014.5413818359375, "learning_rate": 3.5290330702859624e-05, "loss": 64.9485, "step": 75170 }, { "epoch": 0.30373671303385225, "grad_norm": 440.79931640625, "learning_rate": 3.528853047954521e-05, "loss": 65.0976, "step": 75180 }, { "epoch": 0.30377711429921983, "grad_norm": 861.1629028320312, "learning_rate": 3.5286729958173036e-05, "loss": 91.4645, "step": 75190 }, { "epoch": 0.30381751556458747, "grad_norm": 574.344482421875, "learning_rate": 3.528492913877818e-05, "loss": 65.2671, "step": 75200 }, { "epoch": 0.3038579168299551, "grad_norm": 993.7310791015625, "learning_rate": 3.528312802139577e-05, "loss": 73.2417, "step": 75210 }, { "epoch": 0.30389831809532275, "grad_norm": 735.0557250976562, "learning_rate": 3.5281326606060905e-05, "loss": 73.2255, "step": 75220 }, { "epoch": 0.3039387193606904, "grad_norm": 0.0, "learning_rate": 3.5279524892808714e-05, "loss": 49.4044, "step": 75230 }, { "epoch": 0.30397912062605803, "grad_norm": 721.876220703125, "learning_rate": 3.5277722881674314e-05, "loss": 71.4891, "step": 75240 }, { "epoch": 0.3040195218914256, "grad_norm": 816.6832885742188, "learning_rate": 3.527592057269285e-05, "loss": 44.2561, "step": 75250 }, { "epoch": 0.30405992315679325, "grad_norm": 619.421630859375, "learning_rate": 3.527411796589944e-05, "loss": 64.0264, "step": 75260 }, { "epoch": 0.3041003244221609, "grad_norm": 0.0, "learning_rate": 3.5272315061329236e-05, "loss": 52.1535, "step": 75270 }, { "epoch": 0.30414072568752853, "grad_norm": 571.986572265625, "learning_rate": 3.52705118590174e-05, "loss": 50.209, "step": 75280 }, { "epoch": 0.3041811269528962, "grad_norm": 402.1851501464844, "learning_rate": 3.5268708358999064e-05, "loss": 62.3056, "step": 75290 }, { "epoch": 0.3042215282182638, "grad_norm": 503.4267272949219, "learning_rate": 3.52669045613094e-05, "loss": 44.4806, "step": 75300 }, { "epoch": 0.30426192948363145, "grad_norm": 685.234130859375, "learning_rate": 3.5265100465983564e-05, "loss": 63.299, "step": 75310 }, { "epoch": 0.30430233074899904, "grad_norm": 2023.0379638671875, "learning_rate": 3.526329607305675e-05, "loss": 57.7, "step": 75320 }, { "epoch": 0.3043427320143667, "grad_norm": 1253.1976318359375, "learning_rate": 3.526149138256411e-05, "loss": 61.8799, "step": 75330 }, { "epoch": 0.3043831332797343, "grad_norm": 493.9773864746094, "learning_rate": 3.525968639454084e-05, "loss": 64.0784, "step": 75340 }, { "epoch": 0.30442353454510196, "grad_norm": 1406.955322265625, "learning_rate": 3.525788110902213e-05, "loss": 77.3711, "step": 75350 }, { "epoch": 0.3044639358104696, "grad_norm": 1954.9525146484375, "learning_rate": 3.525607552604317e-05, "loss": 72.2056, "step": 75360 }, { "epoch": 0.30450433707583724, "grad_norm": 2237.93115234375, "learning_rate": 3.525426964563916e-05, "loss": 78.1073, "step": 75370 }, { "epoch": 0.3045447383412048, "grad_norm": 966.1798095703125, "learning_rate": 3.525246346784532e-05, "loss": 79.0288, "step": 75380 }, { "epoch": 0.30458513960657246, "grad_norm": 691.7649536132812, "learning_rate": 3.525065699269684e-05, "loss": 77.6586, "step": 75390 }, { "epoch": 0.3046255408719401, "grad_norm": 638.4725952148438, "learning_rate": 3.524885022022896e-05, "loss": 67.9005, "step": 75400 }, { "epoch": 0.30466594213730774, "grad_norm": 994.4598388671875, "learning_rate": 3.5247043150476895e-05, "loss": 66.9641, "step": 75410 }, { "epoch": 0.3047063434026754, "grad_norm": 849.9085693359375, "learning_rate": 3.5245235783475866e-05, "loss": 51.2462, "step": 75420 }, { "epoch": 0.304746744668043, "grad_norm": 569.6253051757812, "learning_rate": 3.524342811926112e-05, "loss": 53.9249, "step": 75430 }, { "epoch": 0.3047871459334106, "grad_norm": 1156.7808837890625, "learning_rate": 3.524162015786789e-05, "loss": 73.3264, "step": 75440 }, { "epoch": 0.30482754719877825, "grad_norm": 1112.2142333984375, "learning_rate": 3.523981189933144e-05, "loss": 74.5475, "step": 75450 }, { "epoch": 0.3048679484641459, "grad_norm": 857.4349365234375, "learning_rate": 3.5238003343687005e-05, "loss": 89.3386, "step": 75460 }, { "epoch": 0.30490834972951353, "grad_norm": 591.4561767578125, "learning_rate": 3.523619449096985e-05, "loss": 34.5415, "step": 75470 }, { "epoch": 0.30494875099488117, "grad_norm": 791.2875366210938, "learning_rate": 3.523438534121524e-05, "loss": 56.9031, "step": 75480 }, { "epoch": 0.3049891522602488, "grad_norm": 719.8794555664062, "learning_rate": 3.523257589445845e-05, "loss": 53.7153, "step": 75490 }, { "epoch": 0.30502955352561645, "grad_norm": 1207.1207275390625, "learning_rate": 3.523076615073474e-05, "loss": 93.9197, "step": 75500 }, { "epoch": 0.30506995479098403, "grad_norm": 747.3038330078125, "learning_rate": 3.522895611007941e-05, "loss": 91.8553, "step": 75510 }, { "epoch": 0.3051103560563517, "grad_norm": 646.2911987304688, "learning_rate": 3.522714577252773e-05, "loss": 51.9658, "step": 75520 }, { "epoch": 0.3051507573217193, "grad_norm": 528.4061279296875, "learning_rate": 3.5225335138115016e-05, "loss": 61.4797, "step": 75530 }, { "epoch": 0.30519115858708695, "grad_norm": 714.3599243164062, "learning_rate": 3.522352420687655e-05, "loss": 60.6513, "step": 75540 }, { "epoch": 0.3052315598524546, "grad_norm": 1063.8582763671875, "learning_rate": 3.522171297884764e-05, "loss": 84.7672, "step": 75550 }, { "epoch": 0.30527196111782223, "grad_norm": 539.630126953125, "learning_rate": 3.52199014540636e-05, "loss": 83.8034, "step": 75560 }, { "epoch": 0.3053123623831898, "grad_norm": 853.978515625, "learning_rate": 3.5218089632559744e-05, "loss": 58.7987, "step": 75570 }, { "epoch": 0.30535276364855746, "grad_norm": 271.160888671875, "learning_rate": 3.52162775143714e-05, "loss": 43.0782, "step": 75580 }, { "epoch": 0.3053931649139251, "grad_norm": 465.7930908203125, "learning_rate": 3.521446509953389e-05, "loss": 61.485, "step": 75590 }, { "epoch": 0.30543356617929274, "grad_norm": 978.6604614257812, "learning_rate": 3.521265238808255e-05, "loss": 49.8255, "step": 75600 }, { "epoch": 0.3054739674446604, "grad_norm": 1103.0828857421875, "learning_rate": 3.521083938005272e-05, "loss": 78.9844, "step": 75610 }, { "epoch": 0.305514368710028, "grad_norm": 750.4556884765625, "learning_rate": 3.520902607547974e-05, "loss": 57.8552, "step": 75620 }, { "epoch": 0.30555476997539566, "grad_norm": 667.4697265625, "learning_rate": 3.520721247439897e-05, "loss": 37.519, "step": 75630 }, { "epoch": 0.30559517124076324, "grad_norm": 1199.414306640625, "learning_rate": 3.520539857684577e-05, "loss": 91.3736, "step": 75640 }, { "epoch": 0.3056355725061309, "grad_norm": 583.2481689453125, "learning_rate": 3.520358438285548e-05, "loss": 86.4529, "step": 75650 }, { "epoch": 0.3056759737714985, "grad_norm": 1052.4718017578125, "learning_rate": 3.5201769892463506e-05, "loss": 66.1742, "step": 75660 }, { "epoch": 0.30571637503686616, "grad_norm": 515.3801879882812, "learning_rate": 3.519995510570519e-05, "loss": 87.5196, "step": 75670 }, { "epoch": 0.3057567763022338, "grad_norm": 666.802490234375, "learning_rate": 3.519814002261593e-05, "loss": 55.1974, "step": 75680 }, { "epoch": 0.30579717756760144, "grad_norm": 973.0885009765625, "learning_rate": 3.5196324643231094e-05, "loss": 66.9718, "step": 75690 }, { "epoch": 0.305837578832969, "grad_norm": 1037.1934814453125, "learning_rate": 3.51945089675861e-05, "loss": 54.3903, "step": 75700 }, { "epoch": 0.30587798009833667, "grad_norm": 1361.24560546875, "learning_rate": 3.5192692995716324e-05, "loss": 57.3873, "step": 75710 }, { "epoch": 0.3059183813637043, "grad_norm": 409.15191650390625, "learning_rate": 3.519087672765717e-05, "loss": 47.4263, "step": 75720 }, { "epoch": 0.30595878262907195, "grad_norm": 472.4361877441406, "learning_rate": 3.518906016344406e-05, "loss": 66.8334, "step": 75730 }, { "epoch": 0.3059991838944396, "grad_norm": 229.70162963867188, "learning_rate": 3.5187243303112406e-05, "loss": 60.1793, "step": 75740 }, { "epoch": 0.3060395851598072, "grad_norm": 493.8519287109375, "learning_rate": 3.518542614669762e-05, "loss": 53.0308, "step": 75750 }, { "epoch": 0.3060799864251748, "grad_norm": 646.2508544921875, "learning_rate": 3.518360869423514e-05, "loss": 89.02, "step": 75760 }, { "epoch": 0.30612038769054245, "grad_norm": 0.0, "learning_rate": 3.518179094576038e-05, "loss": 63.6022, "step": 75770 }, { "epoch": 0.3061607889559101, "grad_norm": 1185.7301025390625, "learning_rate": 3.5179972901308794e-05, "loss": 60.0821, "step": 75780 }, { "epoch": 0.30620119022127773, "grad_norm": 1166.6944580078125, "learning_rate": 3.5178154560915825e-05, "loss": 91.2704, "step": 75790 }, { "epoch": 0.30624159148664537, "grad_norm": 1487.95166015625, "learning_rate": 3.5176335924616916e-05, "loss": 91.0684, "step": 75800 }, { "epoch": 0.306281992752013, "grad_norm": 524.3199462890625, "learning_rate": 3.517451699244752e-05, "loss": 108.7739, "step": 75810 }, { "epoch": 0.30632239401738065, "grad_norm": 0.0, "learning_rate": 3.517269776444311e-05, "loss": 70.3399, "step": 75820 }, { "epoch": 0.30636279528274823, "grad_norm": 1462.861572265625, "learning_rate": 3.5170878240639145e-05, "loss": 90.5969, "step": 75830 }, { "epoch": 0.3064031965481159, "grad_norm": 309.7550964355469, "learning_rate": 3.516905842107109e-05, "loss": 48.8648, "step": 75840 }, { "epoch": 0.3064435978134835, "grad_norm": 1339.1041259765625, "learning_rate": 3.5167238305774444e-05, "loss": 56.7237, "step": 75850 }, { "epoch": 0.30648399907885115, "grad_norm": 1145.357177734375, "learning_rate": 3.516541789478467e-05, "loss": 93.9895, "step": 75860 }, { "epoch": 0.3065244003442188, "grad_norm": 585.9578857421875, "learning_rate": 3.516359718813727e-05, "loss": 53.5185, "step": 75870 }, { "epoch": 0.30656480160958643, "grad_norm": 633.2238159179688, "learning_rate": 3.516177618586773e-05, "loss": 50.6192, "step": 75880 }, { "epoch": 0.306605202874954, "grad_norm": 664.184326171875, "learning_rate": 3.5159954888011564e-05, "loss": 85.1204, "step": 75890 }, { "epoch": 0.30664560414032166, "grad_norm": 1013.7833862304688, "learning_rate": 3.515813329460427e-05, "loss": 48.1728, "step": 75900 }, { "epoch": 0.3066860054056893, "grad_norm": 904.5157470703125, "learning_rate": 3.5156311405681366e-05, "loss": 40.3763, "step": 75910 }, { "epoch": 0.30672640667105694, "grad_norm": 571.832275390625, "learning_rate": 3.5154489221278366e-05, "loss": 76.1373, "step": 75920 }, { "epoch": 0.3067668079364246, "grad_norm": 1582.66015625, "learning_rate": 3.51526667414308e-05, "loss": 60.2339, "step": 75930 }, { "epoch": 0.3068072092017922, "grad_norm": 799.6014404296875, "learning_rate": 3.515084396617419e-05, "loss": 77.2938, "step": 75940 }, { "epoch": 0.3068476104671598, "grad_norm": 670.2860107421875, "learning_rate": 3.514902089554408e-05, "loss": 83.6992, "step": 75950 }, { "epoch": 0.30688801173252744, "grad_norm": 515.1392822265625, "learning_rate": 3.5147197529576e-05, "loss": 70.0786, "step": 75960 }, { "epoch": 0.3069284129978951, "grad_norm": 711.9067993164062, "learning_rate": 3.514537386830552e-05, "loss": 53.2691, "step": 75970 }, { "epoch": 0.3069688142632627, "grad_norm": 1451.2913818359375, "learning_rate": 3.5143549911768166e-05, "loss": 69.1946, "step": 75980 }, { "epoch": 0.30700921552863036, "grad_norm": 539.037841796875, "learning_rate": 3.514172565999952e-05, "loss": 76.1712, "step": 75990 }, { "epoch": 0.307049616793998, "grad_norm": 811.0957641601562, "learning_rate": 3.513990111303513e-05, "loss": 80.0229, "step": 76000 }, { "epoch": 0.30709001805936564, "grad_norm": 866.5407104492188, "learning_rate": 3.5138076270910575e-05, "loss": 66.0944, "step": 76010 }, { "epoch": 0.30713041932473323, "grad_norm": 797.3560180664062, "learning_rate": 3.513625113366144e-05, "loss": 47.7251, "step": 76020 }, { "epoch": 0.30717082059010087, "grad_norm": 495.70745849609375, "learning_rate": 3.513442570132328e-05, "loss": 55.8337, "step": 76030 }, { "epoch": 0.3072112218554685, "grad_norm": 1171.0638427734375, "learning_rate": 3.513259997393171e-05, "loss": 65.7174, "step": 76040 }, { "epoch": 0.30725162312083615, "grad_norm": 978.8792114257812, "learning_rate": 3.51307739515223e-05, "loss": 66.6673, "step": 76050 }, { "epoch": 0.3072920243862038, "grad_norm": 1636.1409912109375, "learning_rate": 3.512894763413068e-05, "loss": 77.5981, "step": 76060 }, { "epoch": 0.30733242565157143, "grad_norm": 803.0050048828125, "learning_rate": 3.5127121021792425e-05, "loss": 66.8965, "step": 76070 }, { "epoch": 0.307372826916939, "grad_norm": 745.1494750976562, "learning_rate": 3.512529411454316e-05, "loss": 69.0238, "step": 76080 }, { "epoch": 0.30741322818230665, "grad_norm": 547.9581298828125, "learning_rate": 3.51234669124185e-05, "loss": 44.1149, "step": 76090 }, { "epoch": 0.3074536294476743, "grad_norm": 1483.5592041015625, "learning_rate": 3.512163941545407e-05, "loss": 73.7918, "step": 76100 }, { "epoch": 0.30749403071304193, "grad_norm": 605.3894653320312, "learning_rate": 3.511981162368549e-05, "loss": 71.8262, "step": 76110 }, { "epoch": 0.3075344319784096, "grad_norm": 687.6273803710938, "learning_rate": 3.5117983537148395e-05, "loss": 71.0229, "step": 76120 }, { "epoch": 0.3075748332437772, "grad_norm": 1335.818115234375, "learning_rate": 3.511615515587843e-05, "loss": 71.5045, "step": 76130 }, { "epoch": 0.30761523450914485, "grad_norm": 822.9049682617188, "learning_rate": 3.5114326479911244e-05, "loss": 58.5649, "step": 76140 }, { "epoch": 0.30765563577451244, "grad_norm": 729.94384765625, "learning_rate": 3.5112497509282474e-05, "loss": 61.9444, "step": 76150 }, { "epoch": 0.3076960370398801, "grad_norm": 1042.3271484375, "learning_rate": 3.511066824402779e-05, "loss": 92.8846, "step": 76160 }, { "epoch": 0.3077364383052477, "grad_norm": 794.2461547851562, "learning_rate": 3.510883868418284e-05, "loss": 72.1416, "step": 76170 }, { "epoch": 0.30777683957061536, "grad_norm": 984.2166748046875, "learning_rate": 3.5107008829783314e-05, "loss": 73.4033, "step": 76180 }, { "epoch": 0.307817240835983, "grad_norm": 513.4242553710938, "learning_rate": 3.510517868086487e-05, "loss": 53.9028, "step": 76190 }, { "epoch": 0.30785764210135064, "grad_norm": 797.1355590820312, "learning_rate": 3.5103348237463184e-05, "loss": 68.143, "step": 76200 }, { "epoch": 0.3078980433667182, "grad_norm": 370.9480285644531, "learning_rate": 3.510151749961395e-05, "loss": 109.3378, "step": 76210 }, { "epoch": 0.30793844463208586, "grad_norm": 606.9356689453125, "learning_rate": 3.509968646735287e-05, "loss": 58.9858, "step": 76220 }, { "epoch": 0.3079788458974535, "grad_norm": 894.0355224609375, "learning_rate": 3.509785514071562e-05, "loss": 87.4262, "step": 76230 }, { "epoch": 0.30801924716282114, "grad_norm": 1086.481689453125, "learning_rate": 3.50960235197379e-05, "loss": 71.8501, "step": 76240 }, { "epoch": 0.3080596484281888, "grad_norm": 499.71099853515625, "learning_rate": 3.5094191604455446e-05, "loss": 53.5725, "step": 76250 }, { "epoch": 0.3081000496935564, "grad_norm": 692.6753540039062, "learning_rate": 3.509235939490394e-05, "loss": 83.9878, "step": 76260 }, { "epoch": 0.308140450958924, "grad_norm": 751.8206176757812, "learning_rate": 3.509052689111913e-05, "loss": 88.5014, "step": 76270 }, { "epoch": 0.30818085222429165, "grad_norm": 567.2142333984375, "learning_rate": 3.5088694093136726e-05, "loss": 53.7215, "step": 76280 }, { "epoch": 0.3082212534896593, "grad_norm": 944.86376953125, "learning_rate": 3.508686100099246e-05, "loss": 87.2339, "step": 76290 }, { "epoch": 0.3082616547550269, "grad_norm": 872.0805053710938, "learning_rate": 3.508502761472208e-05, "loss": 80.3885, "step": 76300 }, { "epoch": 0.30830205602039457, "grad_norm": 715.2340087890625, "learning_rate": 3.508319393436131e-05, "loss": 86.7982, "step": 76310 }, { "epoch": 0.3083424572857622, "grad_norm": 1295.7491455078125, "learning_rate": 3.5081359959945916e-05, "loss": 65.9816, "step": 76320 }, { "epoch": 0.30838285855112985, "grad_norm": 2658.11962890625, "learning_rate": 3.5079525691511644e-05, "loss": 46.9333, "step": 76330 }, { "epoch": 0.30842325981649743, "grad_norm": 754.1270141601562, "learning_rate": 3.507769112909425e-05, "loss": 72.6826, "step": 76340 }, { "epoch": 0.30846366108186507, "grad_norm": 646.848876953125, "learning_rate": 3.507585627272951e-05, "loss": 51.2794, "step": 76350 }, { "epoch": 0.3085040623472327, "grad_norm": 644.6250610351562, "learning_rate": 3.507402112245319e-05, "loss": 54.5868, "step": 76360 }, { "epoch": 0.30854446361260035, "grad_norm": 2552.768798828125, "learning_rate": 3.507218567830107e-05, "loss": 94.0588, "step": 76370 }, { "epoch": 0.308584864877968, "grad_norm": 402.1734924316406, "learning_rate": 3.507034994030892e-05, "loss": 59.7754, "step": 76380 }, { "epoch": 0.30862526614333563, "grad_norm": 1175.5374755859375, "learning_rate": 3.506851390851255e-05, "loss": 65.8783, "step": 76390 }, { "epoch": 0.3086656674087032, "grad_norm": 819.6211547851562, "learning_rate": 3.5066677582947744e-05, "loss": 88.8508, "step": 76400 }, { "epoch": 0.30870606867407085, "grad_norm": 697.6337890625, "learning_rate": 3.5064840963650295e-05, "loss": 49.8864, "step": 76410 }, { "epoch": 0.3087464699394385, "grad_norm": 438.434814453125, "learning_rate": 3.5063004050656016e-05, "loss": 84.5187, "step": 76420 }, { "epoch": 0.30878687120480613, "grad_norm": 894.0114135742188, "learning_rate": 3.506116684400072e-05, "loss": 63.094, "step": 76430 }, { "epoch": 0.3088272724701738, "grad_norm": 544.4618530273438, "learning_rate": 3.505932934372022e-05, "loss": 63.0377, "step": 76440 }, { "epoch": 0.3088676737355414, "grad_norm": 485.47479248046875, "learning_rate": 3.505749154985035e-05, "loss": 50.5823, "step": 76450 }, { "epoch": 0.30890807500090905, "grad_norm": 914.7135620117188, "learning_rate": 3.505565346242692e-05, "loss": 78.1928, "step": 76460 }, { "epoch": 0.30894847626627664, "grad_norm": 694.2228393554688, "learning_rate": 3.5053815081485776e-05, "loss": 66.9965, "step": 76470 }, { "epoch": 0.3089888775316443, "grad_norm": 900.4125366210938, "learning_rate": 3.505197640706276e-05, "loss": 59.6299, "step": 76480 }, { "epoch": 0.3090292787970119, "grad_norm": 899.3259887695312, "learning_rate": 3.505013743919372e-05, "loss": 49.0054, "step": 76490 }, { "epoch": 0.30906968006237956, "grad_norm": 506.0701599121094, "learning_rate": 3.504829817791449e-05, "loss": 67.3172, "step": 76500 }, { "epoch": 0.3091100813277472, "grad_norm": 488.9270324707031, "learning_rate": 3.5046458623260946e-05, "loss": 64.5393, "step": 76510 }, { "epoch": 0.30915048259311484, "grad_norm": 532.37939453125, "learning_rate": 3.5044618775268944e-05, "loss": 77.8112, "step": 76520 }, { "epoch": 0.3091908838584824, "grad_norm": 1935.016845703125, "learning_rate": 3.5042778633974355e-05, "loss": 81.3636, "step": 76530 }, { "epoch": 0.30923128512385006, "grad_norm": 1644.5169677734375, "learning_rate": 3.504093819941305e-05, "loss": 75.7931, "step": 76540 }, { "epoch": 0.3092716863892177, "grad_norm": 980.7655029296875, "learning_rate": 3.503909747162091e-05, "loss": 70.2966, "step": 76550 }, { "epoch": 0.30931208765458534, "grad_norm": 1009.39599609375, "learning_rate": 3.503725645063383e-05, "loss": 123.4905, "step": 76560 }, { "epoch": 0.309352488919953, "grad_norm": 980.9332275390625, "learning_rate": 3.5035415136487685e-05, "loss": 60.888, "step": 76570 }, { "epoch": 0.3093928901853206, "grad_norm": 764.3687133789062, "learning_rate": 3.503357352921839e-05, "loss": 68.6264, "step": 76580 }, { "epoch": 0.3094332914506882, "grad_norm": 1286.886962890625, "learning_rate": 3.503173162886183e-05, "loss": 54.0793, "step": 76590 }, { "epoch": 0.30947369271605585, "grad_norm": 1331.6363525390625, "learning_rate": 3.5029889435453924e-05, "loss": 85.7386, "step": 76600 }, { "epoch": 0.3095140939814235, "grad_norm": 393.58111572265625, "learning_rate": 3.5028046949030584e-05, "loss": 74.7227, "step": 76610 }, { "epoch": 0.30955449524679113, "grad_norm": 512.5623168945312, "learning_rate": 3.5026204169627744e-05, "loss": 97.1083, "step": 76620 }, { "epoch": 0.30959489651215877, "grad_norm": 1050.2479248046875, "learning_rate": 3.50243610972813e-05, "loss": 61.3017, "step": 76630 }, { "epoch": 0.3096352977775264, "grad_norm": 682.5951538085938, "learning_rate": 3.502251773202722e-05, "loss": 77.5261, "step": 76640 }, { "epoch": 0.30967569904289405, "grad_norm": 1385.592529296875, "learning_rate": 3.5020674073901406e-05, "loss": 96.6229, "step": 76650 }, { "epoch": 0.30971610030826163, "grad_norm": 1093.2525634765625, "learning_rate": 3.501883012293983e-05, "loss": 82.5706, "step": 76660 }, { "epoch": 0.3097565015736293, "grad_norm": 1153.8555908203125, "learning_rate": 3.501698587917842e-05, "loss": 49.4748, "step": 76670 }, { "epoch": 0.3097969028389969, "grad_norm": 424.3147888183594, "learning_rate": 3.501514134265315e-05, "loss": 65.4982, "step": 76680 }, { "epoch": 0.30983730410436455, "grad_norm": 1009.4806518554688, "learning_rate": 3.501329651339996e-05, "loss": 64.0963, "step": 76690 }, { "epoch": 0.3098777053697322, "grad_norm": 1129.088623046875, "learning_rate": 3.501145139145483e-05, "loss": 110.2901, "step": 76700 }, { "epoch": 0.30991810663509983, "grad_norm": 426.0953369140625, "learning_rate": 3.500960597685372e-05, "loss": 73.1976, "step": 76710 }, { "epoch": 0.3099585079004674, "grad_norm": 0.0, "learning_rate": 3.500776026963262e-05, "loss": 55.8843, "step": 76720 }, { "epoch": 0.30999890916583506, "grad_norm": 387.7458190917969, "learning_rate": 3.50059142698275e-05, "loss": 43.3823, "step": 76730 }, { "epoch": 0.3100393104312027, "grad_norm": 823.2538452148438, "learning_rate": 3.500406797747436e-05, "loss": 71.2642, "step": 76740 }, { "epoch": 0.31007971169657034, "grad_norm": 1283.1015625, "learning_rate": 3.5002221392609196e-05, "loss": 54.9085, "step": 76750 }, { "epoch": 0.310120112961938, "grad_norm": 1642.7064208984375, "learning_rate": 3.5000374515268e-05, "loss": 102.8734, "step": 76760 }, { "epoch": 0.3101605142273056, "grad_norm": 732.9937133789062, "learning_rate": 3.499852734548677e-05, "loss": 57.9155, "step": 76770 }, { "epoch": 0.31020091549267326, "grad_norm": 1375.7432861328125, "learning_rate": 3.4996679883301535e-05, "loss": 65.2121, "step": 76780 }, { "epoch": 0.31024131675804084, "grad_norm": 434.67779541015625, "learning_rate": 3.49948321287483e-05, "loss": 71.201, "step": 76790 }, { "epoch": 0.3102817180234085, "grad_norm": 1775.1773681640625, "learning_rate": 3.49929840818631e-05, "loss": 68.5134, "step": 76800 }, { "epoch": 0.3103221192887761, "grad_norm": 531.1075439453125, "learning_rate": 3.499113574268196e-05, "loss": 50.1119, "step": 76810 }, { "epoch": 0.31036252055414376, "grad_norm": 1148.52001953125, "learning_rate": 3.49892871112409e-05, "loss": 77.1326, "step": 76820 }, { "epoch": 0.3104029218195114, "grad_norm": 1055.23828125, "learning_rate": 3.498743818757598e-05, "loss": 62.7596, "step": 76830 }, { "epoch": 0.31044332308487904, "grad_norm": 885.1285400390625, "learning_rate": 3.498558897172324e-05, "loss": 75.788, "step": 76840 }, { "epoch": 0.3104837243502466, "grad_norm": 733.3975830078125, "learning_rate": 3.4983739463718706e-05, "loss": 51.2205, "step": 76850 }, { "epoch": 0.31052412561561427, "grad_norm": 620.9288940429688, "learning_rate": 3.498188966359848e-05, "loss": 93.8355, "step": 76860 }, { "epoch": 0.3105645268809819, "grad_norm": 1757.525146484375, "learning_rate": 3.498003957139859e-05, "loss": 63.8485, "step": 76870 }, { "epoch": 0.31060492814634955, "grad_norm": 162.37403869628906, "learning_rate": 3.4978189187155114e-05, "loss": 62.836, "step": 76880 }, { "epoch": 0.3106453294117172, "grad_norm": 573.8180541992188, "learning_rate": 3.4976338510904134e-05, "loss": 50.8696, "step": 76890 }, { "epoch": 0.3106857306770848, "grad_norm": 734.051025390625, "learning_rate": 3.4974487542681724e-05, "loss": 46.2981, "step": 76900 }, { "epoch": 0.3107261319424524, "grad_norm": 1082.28955078125, "learning_rate": 3.497263628252397e-05, "loss": 70.5763, "step": 76910 }, { "epoch": 0.31076653320782005, "grad_norm": 742.8665161132812, "learning_rate": 3.497078473046697e-05, "loss": 62.9141, "step": 76920 }, { "epoch": 0.3108069344731877, "grad_norm": 758.2858276367188, "learning_rate": 3.49689328865468e-05, "loss": 57.3749, "step": 76930 }, { "epoch": 0.31084733573855533, "grad_norm": 478.9095764160156, "learning_rate": 3.496708075079959e-05, "loss": 36.4947, "step": 76940 }, { "epoch": 0.31088773700392297, "grad_norm": 969.2333374023438, "learning_rate": 3.496522832326143e-05, "loss": 104.6406, "step": 76950 }, { "epoch": 0.3109281382692906, "grad_norm": 642.496826171875, "learning_rate": 3.496337560396844e-05, "loss": 54.6326, "step": 76960 }, { "epoch": 0.31096853953465825, "grad_norm": 979.9578857421875, "learning_rate": 3.496152259295673e-05, "loss": 70.7781, "step": 76970 }, { "epoch": 0.31100894080002583, "grad_norm": 1154.487060546875, "learning_rate": 3.495966929026244e-05, "loss": 53.4658, "step": 76980 }, { "epoch": 0.3110493420653935, "grad_norm": 1025.4051513671875, "learning_rate": 3.49578156959217e-05, "loss": 58.9349, "step": 76990 }, { "epoch": 0.3110897433307611, "grad_norm": 1516.22998046875, "learning_rate": 3.495596180997064e-05, "loss": 108.4065, "step": 77000 }, { "epoch": 0.31113014459612875, "grad_norm": 1075.0015869140625, "learning_rate": 3.495410763244541e-05, "loss": 81.8675, "step": 77010 }, { "epoch": 0.3111705458614964, "grad_norm": 1219.5714111328125, "learning_rate": 3.4952253163382144e-05, "loss": 64.139, "step": 77020 }, { "epoch": 0.31121094712686403, "grad_norm": 854.6863403320312, "learning_rate": 3.4950398402817006e-05, "loss": 93.2717, "step": 77030 }, { "epoch": 0.3112513483922316, "grad_norm": 487.3167724609375, "learning_rate": 3.4948543350786156e-05, "loss": 46.6558, "step": 77040 }, { "epoch": 0.31129174965759926, "grad_norm": 181.5683135986328, "learning_rate": 3.494668800732575e-05, "loss": 64.1349, "step": 77050 }, { "epoch": 0.3113321509229669, "grad_norm": 940.066650390625, "learning_rate": 3.4944832372471977e-05, "loss": 87.6811, "step": 77060 }, { "epoch": 0.31137255218833454, "grad_norm": 636.5015869140625, "learning_rate": 3.4942976446261e-05, "loss": 68.7378, "step": 77070 }, { "epoch": 0.3114129534537022, "grad_norm": 477.9764709472656, "learning_rate": 3.494112022872901e-05, "loss": 66.3613, "step": 77080 }, { "epoch": 0.3114533547190698, "grad_norm": 1191.1737060546875, "learning_rate": 3.493926371991218e-05, "loss": 70.3055, "step": 77090 }, { "epoch": 0.31149375598443746, "grad_norm": 873.9033203125, "learning_rate": 3.493740691984672e-05, "loss": 72.6238, "step": 77100 }, { "epoch": 0.31153415724980504, "grad_norm": 656.4010620117188, "learning_rate": 3.4935549828568807e-05, "loss": 63.6683, "step": 77110 }, { "epoch": 0.3115745585151727, "grad_norm": 533.2958374023438, "learning_rate": 3.493369244611467e-05, "loss": 75.7154, "step": 77120 }, { "epoch": 0.3116149597805403, "grad_norm": 477.51409912109375, "learning_rate": 3.493183477252051e-05, "loss": 47.95, "step": 77130 }, { "epoch": 0.31165536104590796, "grad_norm": 808.3378295898438, "learning_rate": 3.4929976807822546e-05, "loss": 54.0265, "step": 77140 }, { "epoch": 0.3116957623112756, "grad_norm": 540.817626953125, "learning_rate": 3.4928118552056994e-05, "loss": 39.5284, "step": 77150 }, { "epoch": 0.31173616357664324, "grad_norm": 0.0, "learning_rate": 3.492626000526008e-05, "loss": 78.5489, "step": 77160 }, { "epoch": 0.31177656484201083, "grad_norm": 1253.980712890625, "learning_rate": 3.492440116746805e-05, "loss": 62.1076, "step": 77170 }, { "epoch": 0.31181696610737847, "grad_norm": 427.8959045410156, "learning_rate": 3.492254203871714e-05, "loss": 84.0081, "step": 77180 }, { "epoch": 0.3118573673727461, "grad_norm": 1483.2845458984375, "learning_rate": 3.4920682619043584e-05, "loss": 66.6233, "step": 77190 }, { "epoch": 0.31189776863811375, "grad_norm": 578.9376831054688, "learning_rate": 3.4918822908483645e-05, "loss": 61.4817, "step": 77200 }, { "epoch": 0.3119381699034814, "grad_norm": 415.4118957519531, "learning_rate": 3.491696290707357e-05, "loss": 69.8285, "step": 77210 }, { "epoch": 0.31197857116884903, "grad_norm": 1217.6763916015625, "learning_rate": 3.491510261484962e-05, "loss": 77.4011, "step": 77220 }, { "epoch": 0.3120189724342166, "grad_norm": 1282.310302734375, "learning_rate": 3.4913242031848064e-05, "loss": 70.3646, "step": 77230 }, { "epoch": 0.31205937369958425, "grad_norm": 478.583251953125, "learning_rate": 3.4911381158105185e-05, "loss": 75.6116, "step": 77240 }, { "epoch": 0.3120997749649519, "grad_norm": 778.688232421875, "learning_rate": 3.4909519993657244e-05, "loss": 62.7415, "step": 77250 }, { "epoch": 0.31214017623031953, "grad_norm": 1136.1414794921875, "learning_rate": 3.490765853854054e-05, "loss": 106.0001, "step": 77260 }, { "epoch": 0.3121805774956872, "grad_norm": 640.33203125, "learning_rate": 3.490579679279136e-05, "loss": 51.8542, "step": 77270 }, { "epoch": 0.3122209787610548, "grad_norm": 750.8095092773438, "learning_rate": 3.4903934756445995e-05, "loss": 43.308, "step": 77280 }, { "epoch": 0.31226138002642245, "grad_norm": 1000.5927734375, "learning_rate": 3.490207242954075e-05, "loss": 66.9694, "step": 77290 }, { "epoch": 0.31230178129179004, "grad_norm": 788.5052490234375, "learning_rate": 3.4900209812111927e-05, "loss": 72.0724, "step": 77300 }, { "epoch": 0.3123421825571577, "grad_norm": 1133.585693359375, "learning_rate": 3.489834690419584e-05, "loss": 81.6365, "step": 77310 }, { "epoch": 0.3123825838225253, "grad_norm": 701.0135498046875, "learning_rate": 3.489648370582882e-05, "loss": 92.826, "step": 77320 }, { "epoch": 0.31242298508789296, "grad_norm": 754.3093872070312, "learning_rate": 3.489462021704717e-05, "loss": 75.8383, "step": 77330 }, { "epoch": 0.3124633863532606, "grad_norm": 523.960693359375, "learning_rate": 3.489275643788724e-05, "loss": 76.4888, "step": 77340 }, { "epoch": 0.31250378761862824, "grad_norm": 859.30029296875, "learning_rate": 3.489089236838535e-05, "loss": 96.7169, "step": 77350 }, { "epoch": 0.3125441888839958, "grad_norm": 3732.568603515625, "learning_rate": 3.488902800857785e-05, "loss": 71.5349, "step": 77360 }, { "epoch": 0.31258459014936346, "grad_norm": 763.2684936523438, "learning_rate": 3.488716335850108e-05, "loss": 56.7226, "step": 77370 }, { "epoch": 0.3126249914147311, "grad_norm": 479.9824523925781, "learning_rate": 3.4885298418191405e-05, "loss": 69.6836, "step": 77380 }, { "epoch": 0.31266539268009874, "grad_norm": 1107.8890380859375, "learning_rate": 3.488343318768516e-05, "loss": 73.3432, "step": 77390 }, { "epoch": 0.3127057939454664, "grad_norm": 517.3402099609375, "learning_rate": 3.488156766701873e-05, "loss": 90.4566, "step": 77400 }, { "epoch": 0.312746195210834, "grad_norm": 727.0076293945312, "learning_rate": 3.487970185622848e-05, "loss": 111.965, "step": 77410 }, { "epoch": 0.31278659647620166, "grad_norm": 507.585693359375, "learning_rate": 3.487783575535078e-05, "loss": 39.8812, "step": 77420 }, { "epoch": 0.31282699774156925, "grad_norm": 1095.2391357421875, "learning_rate": 3.487596936442201e-05, "loss": 65.871, "step": 77430 }, { "epoch": 0.3128673990069369, "grad_norm": 462.9928283691406, "learning_rate": 3.487410268347856e-05, "loss": 57.1373, "step": 77440 }, { "epoch": 0.3129078002723045, "grad_norm": 531.0853271484375, "learning_rate": 3.487223571255682e-05, "loss": 87.5273, "step": 77450 }, { "epoch": 0.31294820153767217, "grad_norm": 648.5484619140625, "learning_rate": 3.4870368451693184e-05, "loss": 71.222, "step": 77460 }, { "epoch": 0.3129886028030398, "grad_norm": 512.2035522460938, "learning_rate": 3.486850090092407e-05, "loss": 78.9093, "step": 77470 }, { "epoch": 0.31302900406840745, "grad_norm": 549.501220703125, "learning_rate": 3.486663306028587e-05, "loss": 120.1285, "step": 77480 }, { "epoch": 0.31306940533377503, "grad_norm": 826.307861328125, "learning_rate": 3.4864764929815e-05, "loss": 39.0094, "step": 77490 }, { "epoch": 0.31310980659914267, "grad_norm": 1610.810791015625, "learning_rate": 3.4862896509547886e-05, "loss": 107.4009, "step": 77500 }, { "epoch": 0.3131502078645103, "grad_norm": 1580.2742919921875, "learning_rate": 3.4861027799520956e-05, "loss": 126.6358, "step": 77510 }, { "epoch": 0.31319060912987795, "grad_norm": 1006.2523193359375, "learning_rate": 3.4859158799770635e-05, "loss": 55.5261, "step": 77520 }, { "epoch": 0.3132310103952456, "grad_norm": 463.68115234375, "learning_rate": 3.4857289510333365e-05, "loss": 43.9542, "step": 77530 }, { "epoch": 0.31327141166061323, "grad_norm": 1687.350341796875, "learning_rate": 3.485541993124559e-05, "loss": 55.4924, "step": 77540 }, { "epoch": 0.3133118129259808, "grad_norm": 844.6075439453125, "learning_rate": 3.485355006254375e-05, "loss": 69.7897, "step": 77550 }, { "epoch": 0.31335221419134845, "grad_norm": 1155.069580078125, "learning_rate": 3.4851679904264314e-05, "loss": 60.9398, "step": 77560 }, { "epoch": 0.3133926154567161, "grad_norm": 444.0328369140625, "learning_rate": 3.4849809456443725e-05, "loss": 43.1102, "step": 77570 }, { "epoch": 0.31343301672208373, "grad_norm": 1202.2822265625, "learning_rate": 3.484793871911845e-05, "loss": 90.6961, "step": 77580 }, { "epoch": 0.3134734179874514, "grad_norm": 1158.4188232421875, "learning_rate": 3.4846067692324976e-05, "loss": 63.8845, "step": 77590 }, { "epoch": 0.313513819252819, "grad_norm": 1004.656982421875, "learning_rate": 3.484419637609977e-05, "loss": 89.7987, "step": 77600 }, { "epoch": 0.31355422051818665, "grad_norm": 365.622802734375, "learning_rate": 3.48423247704793e-05, "loss": 58.2506, "step": 77610 }, { "epoch": 0.31359462178355424, "grad_norm": 534.121337890625, "learning_rate": 3.484045287550007e-05, "loss": 59.851, "step": 77620 }, { "epoch": 0.3136350230489219, "grad_norm": 564.9100341796875, "learning_rate": 3.4838580691198584e-05, "loss": 77.1259, "step": 77630 }, { "epoch": 0.3136754243142895, "grad_norm": 953.2900390625, "learning_rate": 3.4836708217611316e-05, "loss": 68.8026, "step": 77640 }, { "epoch": 0.31371582557965716, "grad_norm": 951.2789916992188, "learning_rate": 3.4834835454774784e-05, "loss": 59.8535, "step": 77650 }, { "epoch": 0.3137562268450248, "grad_norm": 1164.3463134765625, "learning_rate": 3.48329624027255e-05, "loss": 68.9722, "step": 77660 }, { "epoch": 0.31379662811039244, "grad_norm": 1070.16162109375, "learning_rate": 3.4831089061499975e-05, "loss": 45.6516, "step": 77670 }, { "epoch": 0.31383702937576, "grad_norm": 473.43255615234375, "learning_rate": 3.482921543113474e-05, "loss": 87.9103, "step": 77680 }, { "epoch": 0.31387743064112766, "grad_norm": 570.1047973632812, "learning_rate": 3.4827341511666315e-05, "loss": 59.4656, "step": 77690 }, { "epoch": 0.3139178319064953, "grad_norm": 865.6242065429688, "learning_rate": 3.482546730313122e-05, "loss": 46.6567, "step": 77700 }, { "epoch": 0.31395823317186294, "grad_norm": 771.6071166992188, "learning_rate": 3.482359280556602e-05, "loss": 82.0451, "step": 77710 }, { "epoch": 0.3139986344372306, "grad_norm": 896.2112426757812, "learning_rate": 3.482171801900725e-05, "loss": 90.4533, "step": 77720 }, { "epoch": 0.3140390357025982, "grad_norm": 1196.485107421875, "learning_rate": 3.481984294349145e-05, "loss": 64.6757, "step": 77730 }, { "epoch": 0.31407943696796586, "grad_norm": 807.33349609375, "learning_rate": 3.4817967579055176e-05, "loss": 75.7475, "step": 77740 }, { "epoch": 0.31411983823333345, "grad_norm": 1021.001220703125, "learning_rate": 3.4816091925735e-05, "loss": 70.141, "step": 77750 }, { "epoch": 0.3141602394987011, "grad_norm": 662.2337036132812, "learning_rate": 3.481421598356749e-05, "loss": 71.5092, "step": 77760 }, { "epoch": 0.31420064076406873, "grad_norm": 1262.9063720703125, "learning_rate": 3.4812339752589206e-05, "loss": 72.7259, "step": 77770 }, { "epoch": 0.31424104202943637, "grad_norm": 399.2818908691406, "learning_rate": 3.481046323283674e-05, "loss": 51.4161, "step": 77780 }, { "epoch": 0.314281443294804, "grad_norm": 1037.9417724609375, "learning_rate": 3.480858642434666e-05, "loss": 80.6908, "step": 77790 }, { "epoch": 0.31432184456017165, "grad_norm": 1524.025634765625, "learning_rate": 3.4806709327155564e-05, "loss": 68.7706, "step": 77800 }, { "epoch": 0.31436224582553923, "grad_norm": 1578.2554931640625, "learning_rate": 3.480483194130005e-05, "loss": 51.718, "step": 77810 }, { "epoch": 0.3144026470909069, "grad_norm": 713.9732666015625, "learning_rate": 3.480295426681671e-05, "loss": 63.0997, "step": 77820 }, { "epoch": 0.3144430483562745, "grad_norm": 542.3390502929688, "learning_rate": 3.480107630374217e-05, "loss": 51.8624, "step": 77830 }, { "epoch": 0.31448344962164215, "grad_norm": 561.2479858398438, "learning_rate": 3.479919805211301e-05, "loss": 42.934, "step": 77840 }, { "epoch": 0.3145238508870098, "grad_norm": 1051.489990234375, "learning_rate": 3.4797319511965875e-05, "loss": 80.7784, "step": 77850 }, { "epoch": 0.31456425215237743, "grad_norm": 1260.8740234375, "learning_rate": 3.479544068333737e-05, "loss": 87.1448, "step": 77860 }, { "epoch": 0.314604653417745, "grad_norm": 724.090087890625, "learning_rate": 3.479356156626414e-05, "loss": 53.8852, "step": 77870 }, { "epoch": 0.31464505468311266, "grad_norm": 1093.593505859375, "learning_rate": 3.479168216078281e-05, "loss": 61.8862, "step": 77880 }, { "epoch": 0.3146854559484803, "grad_norm": 875.7823486328125, "learning_rate": 3.478980246693001e-05, "loss": 84.8797, "step": 77890 }, { "epoch": 0.31472585721384794, "grad_norm": 1626.4224853515625, "learning_rate": 3.478792248474241e-05, "loss": 85.2176, "step": 77900 }, { "epoch": 0.3147662584792156, "grad_norm": 746.38134765625, "learning_rate": 3.478604221425665e-05, "loss": 56.6662, "step": 77910 }, { "epoch": 0.3148066597445832, "grad_norm": 805.35498046875, "learning_rate": 3.478416165550938e-05, "loss": 87.0488, "step": 77920 }, { "epoch": 0.31484706100995086, "grad_norm": 634.9224243164062, "learning_rate": 3.478228080853726e-05, "loss": 63.2272, "step": 77930 }, { "epoch": 0.31488746227531844, "grad_norm": 627.2695922851562, "learning_rate": 3.478039967337697e-05, "loss": 84.5405, "step": 77940 }, { "epoch": 0.3149278635406861, "grad_norm": 1291.2822265625, "learning_rate": 3.477851825006518e-05, "loss": 69.2081, "step": 77950 }, { "epoch": 0.3149682648060537, "grad_norm": 0.0, "learning_rate": 3.4776636538638565e-05, "loss": 58.2442, "step": 77960 }, { "epoch": 0.31500866607142136, "grad_norm": 693.0581665039062, "learning_rate": 3.477475453913381e-05, "loss": 51.7055, "step": 77970 }, { "epoch": 0.315049067336789, "grad_norm": 416.64129638671875, "learning_rate": 3.477287225158762e-05, "loss": 41.2215, "step": 77980 }, { "epoch": 0.31508946860215664, "grad_norm": 744.0357666015625, "learning_rate": 3.477098967603667e-05, "loss": 71.1339, "step": 77990 }, { "epoch": 0.3151298698675242, "grad_norm": 2952.7724609375, "learning_rate": 3.4769106812517685e-05, "loss": 79.9298, "step": 78000 }, { "epoch": 0.31517027113289187, "grad_norm": 365.7235412597656, "learning_rate": 3.476722366106734e-05, "loss": 48.3076, "step": 78010 }, { "epoch": 0.3152106723982595, "grad_norm": 806.73876953125, "learning_rate": 3.476534022172238e-05, "loss": 58.0707, "step": 78020 }, { "epoch": 0.31525107366362715, "grad_norm": 520.0816650390625, "learning_rate": 3.4763456494519505e-05, "loss": 84.0921, "step": 78030 }, { "epoch": 0.3152914749289948, "grad_norm": 986.6627197265625, "learning_rate": 3.476157247949545e-05, "loss": 55.3089, "step": 78040 }, { "epoch": 0.3153318761943624, "grad_norm": 754.4058837890625, "learning_rate": 3.475968817668694e-05, "loss": 69.4051, "step": 78050 }, { "epoch": 0.31537227745973007, "grad_norm": 559.5108642578125, "learning_rate": 3.47578035861307e-05, "loss": 40.0251, "step": 78060 }, { "epoch": 0.31541267872509765, "grad_norm": 329.8301086425781, "learning_rate": 3.475591870786349e-05, "loss": 56.468, "step": 78070 }, { "epoch": 0.3154530799904653, "grad_norm": 768.6515502929688, "learning_rate": 3.4754033541922054e-05, "loss": 57.4969, "step": 78080 }, { "epoch": 0.31549348125583293, "grad_norm": 616.1010131835938, "learning_rate": 3.475214808834313e-05, "loss": 57.8262, "step": 78090 }, { "epoch": 0.31553388252120057, "grad_norm": 655.2771606445312, "learning_rate": 3.475026234716348e-05, "loss": 67.711, "step": 78100 }, { "epoch": 0.3155742837865682, "grad_norm": 1178.9937744140625, "learning_rate": 3.474837631841988e-05, "loss": 81.5335, "step": 78110 }, { "epoch": 0.31561468505193585, "grad_norm": 536.2959594726562, "learning_rate": 3.474649000214909e-05, "loss": 51.0797, "step": 78120 }, { "epoch": 0.31565508631730343, "grad_norm": 752.1845092773438, "learning_rate": 3.474460339838788e-05, "loss": 56.8901, "step": 78130 }, { "epoch": 0.3156954875826711, "grad_norm": 634.9712524414062, "learning_rate": 3.474271650717303e-05, "loss": 77.3995, "step": 78140 }, { "epoch": 0.3157358888480387, "grad_norm": 882.2861938476562, "learning_rate": 3.474082932854135e-05, "loss": 56.3155, "step": 78150 }, { "epoch": 0.31577629011340635, "grad_norm": 916.5146484375, "learning_rate": 3.47389418625296e-05, "loss": 67.5177, "step": 78160 }, { "epoch": 0.315816691378774, "grad_norm": 571.06298828125, "learning_rate": 3.4737054109174596e-05, "loss": 68.8433, "step": 78170 }, { "epoch": 0.31585709264414163, "grad_norm": 1100.0029296875, "learning_rate": 3.473516606851313e-05, "loss": 66.3951, "step": 78180 }, { "epoch": 0.3158974939095092, "grad_norm": 924.810791015625, "learning_rate": 3.473327774058201e-05, "loss": 85.4612, "step": 78190 }, { "epoch": 0.31593789517487686, "grad_norm": 1606.62890625, "learning_rate": 3.473138912541807e-05, "loss": 107.2161, "step": 78200 }, { "epoch": 0.3159782964402445, "grad_norm": 969.39892578125, "learning_rate": 3.472950022305811e-05, "loss": 51.2587, "step": 78210 }, { "epoch": 0.31601869770561214, "grad_norm": 520.0035400390625, "learning_rate": 3.472761103353895e-05, "loss": 109.1551, "step": 78220 }, { "epoch": 0.3160590989709798, "grad_norm": 434.693603515625, "learning_rate": 3.472572155689744e-05, "loss": 66.7653, "step": 78230 }, { "epoch": 0.3160995002363474, "grad_norm": 845.1107788085938, "learning_rate": 3.4723831793170406e-05, "loss": 53.3309, "step": 78240 }, { "epoch": 0.31613990150171506, "grad_norm": 435.4742431640625, "learning_rate": 3.4721941742394694e-05, "loss": 47.1871, "step": 78250 }, { "epoch": 0.31618030276708264, "grad_norm": 1057.7083740234375, "learning_rate": 3.472005140460714e-05, "loss": 51.1896, "step": 78260 }, { "epoch": 0.3162207040324503, "grad_norm": 621.1998901367188, "learning_rate": 3.471816077984461e-05, "loss": 62.4693, "step": 78270 }, { "epoch": 0.3162611052978179, "grad_norm": 950.8551635742188, "learning_rate": 3.4716269868143956e-05, "loss": 79.8929, "step": 78280 }, { "epoch": 0.31630150656318556, "grad_norm": 913.15625, "learning_rate": 3.4714378669542046e-05, "loss": 55.8315, "step": 78290 }, { "epoch": 0.3163419078285532, "grad_norm": 1112.23046875, "learning_rate": 3.471248718407575e-05, "loss": 81.3037, "step": 78300 }, { "epoch": 0.31638230909392084, "grad_norm": 645.7435302734375, "learning_rate": 3.471059541178194e-05, "loss": 45.1448, "step": 78310 }, { "epoch": 0.31642271035928843, "grad_norm": 480.93511962890625, "learning_rate": 3.4708703352697496e-05, "loss": 59.0489, "step": 78320 }, { "epoch": 0.31646311162465607, "grad_norm": 1059.6514892578125, "learning_rate": 3.4706811006859315e-05, "loss": 68.5006, "step": 78330 }, { "epoch": 0.3165035128900237, "grad_norm": 1040.41748046875, "learning_rate": 3.470491837430428e-05, "loss": 95.524, "step": 78340 }, { "epoch": 0.31654391415539135, "grad_norm": 352.0108337402344, "learning_rate": 3.470302545506929e-05, "loss": 49.154, "step": 78350 }, { "epoch": 0.316584315420759, "grad_norm": 794.4562377929688, "learning_rate": 3.4701132249191245e-05, "loss": 69.4265, "step": 78360 }, { "epoch": 0.31662471668612663, "grad_norm": 1108.3975830078125, "learning_rate": 3.469923875670706e-05, "loss": 88.2527, "step": 78370 }, { "epoch": 0.31666511795149427, "grad_norm": 754.4819946289062, "learning_rate": 3.469734497765365e-05, "loss": 74.1756, "step": 78380 }, { "epoch": 0.31670551921686185, "grad_norm": 554.4060668945312, "learning_rate": 3.469545091206793e-05, "loss": 48.7816, "step": 78390 }, { "epoch": 0.3167459204822295, "grad_norm": 670.9364013671875, "learning_rate": 3.469355655998683e-05, "loss": 74.2816, "step": 78400 }, { "epoch": 0.31678632174759713, "grad_norm": 548.6668701171875, "learning_rate": 3.4691661921447284e-05, "loss": 72.3777, "step": 78410 }, { "epoch": 0.3168267230129648, "grad_norm": 1143.9090576171875, "learning_rate": 3.468976699648622e-05, "loss": 70.6955, "step": 78420 }, { "epoch": 0.3168671242783324, "grad_norm": 230.33255004882812, "learning_rate": 3.4687871785140586e-05, "loss": 51.944, "step": 78430 }, { "epoch": 0.31690752554370005, "grad_norm": 294.7291259765625, "learning_rate": 3.4685976287447326e-05, "loss": 59.3842, "step": 78440 }, { "epoch": 0.31694792680906764, "grad_norm": 595.3283081054688, "learning_rate": 3.4684080503443405e-05, "loss": 83.9593, "step": 78450 }, { "epoch": 0.3169883280744353, "grad_norm": 1100.2291259765625, "learning_rate": 3.468218443316577e-05, "loss": 82.4289, "step": 78460 }, { "epoch": 0.3170287293398029, "grad_norm": 1326.83740234375, "learning_rate": 3.46802880766514e-05, "loss": 70.2116, "step": 78470 }, { "epoch": 0.31706913060517056, "grad_norm": 594.4410400390625, "learning_rate": 3.467839143393724e-05, "loss": 69.3761, "step": 78480 }, { "epoch": 0.3171095318705382, "grad_norm": 511.62786865234375, "learning_rate": 3.46764945050603e-05, "loss": 75.8792, "step": 78490 }, { "epoch": 0.31714993313590584, "grad_norm": 2378.63134765625, "learning_rate": 3.467459729005753e-05, "loss": 72.8961, "step": 78500 }, { "epoch": 0.3171903344012734, "grad_norm": 1395.0682373046875, "learning_rate": 3.467269978896594e-05, "loss": 102.6495, "step": 78510 }, { "epoch": 0.31723073566664106, "grad_norm": 473.0601501464844, "learning_rate": 3.467080200182251e-05, "loss": 54.5584, "step": 78520 }, { "epoch": 0.3172711369320087, "grad_norm": 349.14813232421875, "learning_rate": 3.4668903928664234e-05, "loss": 68.1884, "step": 78530 }, { "epoch": 0.31731153819737634, "grad_norm": 932.8010864257812, "learning_rate": 3.4667005569528134e-05, "loss": 48.9466, "step": 78540 }, { "epoch": 0.317351939462744, "grad_norm": 1845.6824951171875, "learning_rate": 3.466510692445121e-05, "loss": 62.2658, "step": 78550 }, { "epoch": 0.3173923407281116, "grad_norm": 1455.640380859375, "learning_rate": 3.4663207993470466e-05, "loss": 56.2996, "step": 78560 }, { "epoch": 0.31743274199347926, "grad_norm": 447.1221618652344, "learning_rate": 3.466130877662294e-05, "loss": 48.1869, "step": 78570 }, { "epoch": 0.31747314325884685, "grad_norm": 901.009521484375, "learning_rate": 3.465940927394565e-05, "loss": 68.94, "step": 78580 }, { "epoch": 0.3175135445242145, "grad_norm": 1162.469482421875, "learning_rate": 3.465750948547563e-05, "loss": 56.6787, "step": 78590 }, { "epoch": 0.3175539457895821, "grad_norm": 1017.1430053710938, "learning_rate": 3.465560941124992e-05, "loss": 42.6346, "step": 78600 }, { "epoch": 0.31759434705494977, "grad_norm": 474.6357727050781, "learning_rate": 3.4653709051305546e-05, "loss": 45.265, "step": 78610 }, { "epoch": 0.3176347483203174, "grad_norm": 772.208251953125, "learning_rate": 3.465180840567958e-05, "loss": 60.2214, "step": 78620 }, { "epoch": 0.31767514958568505, "grad_norm": 716.8950805664062, "learning_rate": 3.4649907474409074e-05, "loss": 78.6945, "step": 78630 }, { "epoch": 0.31771555085105263, "grad_norm": 782.5782470703125, "learning_rate": 3.464800625753107e-05, "loss": 55.895, "step": 78640 }, { "epoch": 0.31775595211642027, "grad_norm": 1307.5980224609375, "learning_rate": 3.464610475508264e-05, "loss": 85.9435, "step": 78650 }, { "epoch": 0.3177963533817879, "grad_norm": 625.3878784179688, "learning_rate": 3.464420296710086e-05, "loss": 45.7682, "step": 78660 }, { "epoch": 0.31783675464715555, "grad_norm": 662.2884521484375, "learning_rate": 3.46423008936228e-05, "loss": 75.9776, "step": 78670 }, { "epoch": 0.3178771559125232, "grad_norm": 797.3787841796875, "learning_rate": 3.464039853468555e-05, "loss": 93.6652, "step": 78680 }, { "epoch": 0.31791755717789083, "grad_norm": 2007.70947265625, "learning_rate": 3.4638495890326194e-05, "loss": 119.6598, "step": 78690 }, { "epoch": 0.31795795844325847, "grad_norm": 929.6837768554688, "learning_rate": 3.4636592960581825e-05, "loss": 60.2757, "step": 78700 }, { "epoch": 0.31799835970862605, "grad_norm": 1003.391845703125, "learning_rate": 3.463468974548954e-05, "loss": 55.3076, "step": 78710 }, { "epoch": 0.3180387609739937, "grad_norm": 664.688720703125, "learning_rate": 3.463278624508644e-05, "loss": 73.8371, "step": 78720 }, { "epoch": 0.31807916223936133, "grad_norm": 759.0806884765625, "learning_rate": 3.463088245940965e-05, "loss": 113.9012, "step": 78730 }, { "epoch": 0.318119563504729, "grad_norm": 765.0343017578125, "learning_rate": 3.4628978388496266e-05, "loss": 54.457, "step": 78740 }, { "epoch": 0.3181599647700966, "grad_norm": 542.20654296875, "learning_rate": 3.462707403238341e-05, "loss": 62.4326, "step": 78750 }, { "epoch": 0.31820036603546425, "grad_norm": 0.0, "learning_rate": 3.4625169391108224e-05, "loss": 59.6151, "step": 78760 }, { "epoch": 0.31824076730083184, "grad_norm": 823.3811645507812, "learning_rate": 3.4623264464707834e-05, "loss": 64.7899, "step": 78770 }, { "epoch": 0.3182811685661995, "grad_norm": 1177.2208251953125, "learning_rate": 3.462135925321937e-05, "loss": 84.4173, "step": 78780 }, { "epoch": 0.3183215698315671, "grad_norm": 422.4059143066406, "learning_rate": 3.461945375667998e-05, "loss": 84.0472, "step": 78790 }, { "epoch": 0.31836197109693476, "grad_norm": 1313.7181396484375, "learning_rate": 3.461754797512681e-05, "loss": 72.8415, "step": 78800 }, { "epoch": 0.3184023723623024, "grad_norm": 672.3056640625, "learning_rate": 3.4615641908597016e-05, "loss": 51.1723, "step": 78810 }, { "epoch": 0.31844277362767004, "grad_norm": 493.34539794921875, "learning_rate": 3.461373555712776e-05, "loss": 61.0822, "step": 78820 }, { "epoch": 0.3184831748930376, "grad_norm": 1001.4664916992188, "learning_rate": 3.4611828920756204e-05, "loss": 47.7915, "step": 78830 }, { "epoch": 0.31852357615840526, "grad_norm": 1193.3172607421875, "learning_rate": 3.4609921999519525e-05, "loss": 91.9756, "step": 78840 }, { "epoch": 0.3185639774237729, "grad_norm": 861.0563354492188, "learning_rate": 3.46080147934549e-05, "loss": 56.0792, "step": 78850 }, { "epoch": 0.31860437868914054, "grad_norm": 358.73297119140625, "learning_rate": 3.46061073025995e-05, "loss": 51.1715, "step": 78860 }, { "epoch": 0.3186447799545082, "grad_norm": 549.15673828125, "learning_rate": 3.4604199526990514e-05, "loss": 81.9061, "step": 78870 }, { "epoch": 0.3186851812198758, "grad_norm": 428.2076721191406, "learning_rate": 3.460229146666514e-05, "loss": 72.709, "step": 78880 }, { "epoch": 0.31872558248524346, "grad_norm": 793.41748046875, "learning_rate": 3.460038312166058e-05, "loss": 61.257, "step": 78890 }, { "epoch": 0.31876598375061105, "grad_norm": 425.1347351074219, "learning_rate": 3.4598474492014036e-05, "loss": 67.8388, "step": 78900 }, { "epoch": 0.3188063850159787, "grad_norm": 1619.48876953125, "learning_rate": 3.459656557776271e-05, "loss": 67.4973, "step": 78910 }, { "epoch": 0.31884678628134633, "grad_norm": 809.5908203125, "learning_rate": 3.459465637894383e-05, "loss": 64.2227, "step": 78920 }, { "epoch": 0.31888718754671397, "grad_norm": 1020.9970092773438, "learning_rate": 3.4592746895594604e-05, "loss": 53.4182, "step": 78930 }, { "epoch": 0.3189275888120816, "grad_norm": 736.4168090820312, "learning_rate": 3.459083712775226e-05, "loss": 68.4549, "step": 78940 }, { "epoch": 0.31896799007744925, "grad_norm": 611.6744384765625, "learning_rate": 3.458892707545405e-05, "loss": 82.075, "step": 78950 }, { "epoch": 0.31900839134281683, "grad_norm": 786.9500732421875, "learning_rate": 3.4587016738737186e-05, "loss": 59.6807, "step": 78960 }, { "epoch": 0.3190487926081845, "grad_norm": 669.5952758789062, "learning_rate": 3.4585106117638916e-05, "loss": 42.0674, "step": 78970 }, { "epoch": 0.3190891938735521, "grad_norm": 1109.5372314453125, "learning_rate": 3.45831952121965e-05, "loss": 89.6373, "step": 78980 }, { "epoch": 0.31912959513891975, "grad_norm": 1470.2607421875, "learning_rate": 3.458128402244719e-05, "loss": 75.5235, "step": 78990 }, { "epoch": 0.3191699964042874, "grad_norm": 543.140625, "learning_rate": 3.457937254842823e-05, "loss": 60.188, "step": 79000 }, { "epoch": 0.31921039766965503, "grad_norm": 932.54296875, "learning_rate": 3.457746079017691e-05, "loss": 48.5002, "step": 79010 }, { "epoch": 0.3192507989350226, "grad_norm": 661.5245971679688, "learning_rate": 3.457554874773047e-05, "loss": 84.905, "step": 79020 }, { "epoch": 0.31929120020039026, "grad_norm": 591.1777954101562, "learning_rate": 3.457363642112622e-05, "loss": 67.2805, "step": 79030 }, { "epoch": 0.3193316014657579, "grad_norm": 910.734130859375, "learning_rate": 3.457172381040141e-05, "loss": 98.3831, "step": 79040 }, { "epoch": 0.31937200273112554, "grad_norm": 733.234375, "learning_rate": 3.4569810915593356e-05, "loss": 74.8495, "step": 79050 }, { "epoch": 0.3194124039964932, "grad_norm": 597.4061889648438, "learning_rate": 3.456789773673933e-05, "loss": 41.353, "step": 79060 }, { "epoch": 0.3194528052618608, "grad_norm": 466.7047424316406, "learning_rate": 3.4565984273876635e-05, "loss": 56.502, "step": 79070 }, { "epoch": 0.31949320652722846, "grad_norm": 1494.1431884765625, "learning_rate": 3.456407052704258e-05, "loss": 70.4316, "step": 79080 }, { "epoch": 0.31953360779259604, "grad_norm": 761.2359008789062, "learning_rate": 3.456215649627447e-05, "loss": 52.0162, "step": 79090 }, { "epoch": 0.3195740090579637, "grad_norm": 988.449951171875, "learning_rate": 3.456024218160963e-05, "loss": 57.3673, "step": 79100 }, { "epoch": 0.3196144103233313, "grad_norm": 629.7996215820312, "learning_rate": 3.455832758308536e-05, "loss": 91.0117, "step": 79110 }, { "epoch": 0.31965481158869896, "grad_norm": 314.0284118652344, "learning_rate": 3.455641270073901e-05, "loss": 30.0743, "step": 79120 }, { "epoch": 0.3196952128540666, "grad_norm": 524.0881958007812, "learning_rate": 3.4554497534607895e-05, "loss": 74.0941, "step": 79130 }, { "epoch": 0.31973561411943424, "grad_norm": 798.7915649414062, "learning_rate": 3.455258208472936e-05, "loss": 69.5489, "step": 79140 }, { "epoch": 0.3197760153848018, "grad_norm": 1003.20361328125, "learning_rate": 3.455066635114074e-05, "loss": 66.3634, "step": 79150 }, { "epoch": 0.31981641665016947, "grad_norm": 757.2113037109375, "learning_rate": 3.4548750333879395e-05, "loss": 73.0947, "step": 79160 }, { "epoch": 0.3198568179155371, "grad_norm": 2536.526611328125, "learning_rate": 3.454683403298266e-05, "loss": 107.1402, "step": 79170 }, { "epoch": 0.31989721918090475, "grad_norm": 1102.933837890625, "learning_rate": 3.4544917448487915e-05, "loss": 40.2471, "step": 79180 }, { "epoch": 0.3199376204462724, "grad_norm": 523.2879638671875, "learning_rate": 3.454300058043252e-05, "loss": 56.0137, "step": 79190 }, { "epoch": 0.31997802171164, "grad_norm": 1049.5115966796875, "learning_rate": 3.454108342885384e-05, "loss": 88.8176, "step": 79200 }, { "epoch": 0.32001842297700767, "grad_norm": 1003.3671264648438, "learning_rate": 3.453916599378925e-05, "loss": 62.6265, "step": 79210 }, { "epoch": 0.32005882424237525, "grad_norm": 1479.1932373046875, "learning_rate": 3.453724827527613e-05, "loss": 86.5033, "step": 79220 }, { "epoch": 0.3200992255077429, "grad_norm": 793.9293823242188, "learning_rate": 3.453533027335188e-05, "loss": 72.8704, "step": 79230 }, { "epoch": 0.32013962677311053, "grad_norm": 738.2943725585938, "learning_rate": 3.453341198805388e-05, "loss": 69.7305, "step": 79240 }, { "epoch": 0.32018002803847817, "grad_norm": 609.8193969726562, "learning_rate": 3.4531493419419525e-05, "loss": 63.8748, "step": 79250 }, { "epoch": 0.3202204293038458, "grad_norm": 747.0555419921875, "learning_rate": 3.452957456748622e-05, "loss": 72.1068, "step": 79260 }, { "epoch": 0.32026083056921345, "grad_norm": 1100.9898681640625, "learning_rate": 3.4527655432291384e-05, "loss": 87.2401, "step": 79270 }, { "epoch": 0.32030123183458103, "grad_norm": 874.8444213867188, "learning_rate": 3.4525736013872433e-05, "loss": 64.6822, "step": 79280 }, { "epoch": 0.3203416330999487, "grad_norm": 552.16015625, "learning_rate": 3.4523816312266773e-05, "loss": 57.4605, "step": 79290 }, { "epoch": 0.3203820343653163, "grad_norm": 5363.94482421875, "learning_rate": 3.4521896327511836e-05, "loss": 94.8203, "step": 79300 }, { "epoch": 0.32042243563068395, "grad_norm": 499.93145751953125, "learning_rate": 3.451997605964506e-05, "loss": 96.6472, "step": 79310 }, { "epoch": 0.3204628368960516, "grad_norm": 1525.2781982421875, "learning_rate": 3.451805550870387e-05, "loss": 46.9949, "step": 79320 }, { "epoch": 0.32050323816141923, "grad_norm": 871.80322265625, "learning_rate": 3.4516134674725723e-05, "loss": 63.2105, "step": 79330 }, { "epoch": 0.3205436394267868, "grad_norm": 562.4641723632812, "learning_rate": 3.4514213557748046e-05, "loss": 74.4778, "step": 79340 }, { "epoch": 0.32058404069215446, "grad_norm": 942.83447265625, "learning_rate": 3.4512292157808306e-05, "loss": 81.0777, "step": 79350 }, { "epoch": 0.3206244419575221, "grad_norm": 1135.0953369140625, "learning_rate": 3.4510370474943956e-05, "loss": 62.4802, "step": 79360 }, { "epoch": 0.32066484322288974, "grad_norm": 766.571044921875, "learning_rate": 3.450844850919247e-05, "loss": 65.9881, "step": 79370 }, { "epoch": 0.3207052444882574, "grad_norm": 927.8790283203125, "learning_rate": 3.450652626059131e-05, "loss": 67.4498, "step": 79380 }, { "epoch": 0.320745645753625, "grad_norm": 1346.9913330078125, "learning_rate": 3.4504603729177945e-05, "loss": 71.3516, "step": 79390 }, { "epoch": 0.32078604701899266, "grad_norm": 595.7301635742188, "learning_rate": 3.450268091498987e-05, "loss": 88.8037, "step": 79400 }, { "epoch": 0.32082644828436024, "grad_norm": 913.6329956054688, "learning_rate": 3.4500757818064565e-05, "loss": 65.262, "step": 79410 }, { "epoch": 0.3208668495497279, "grad_norm": 1271.197509765625, "learning_rate": 3.4498834438439516e-05, "loss": 103.464, "step": 79420 }, { "epoch": 0.3209072508150955, "grad_norm": 891.8692626953125, "learning_rate": 3.4496910776152226e-05, "loss": 75.6412, "step": 79430 }, { "epoch": 0.32094765208046316, "grad_norm": 835.5440673828125, "learning_rate": 3.44949868312402e-05, "loss": 80.8117, "step": 79440 }, { "epoch": 0.3209880533458308, "grad_norm": 821.7725219726562, "learning_rate": 3.4493062603740934e-05, "loss": 81.0348, "step": 79450 }, { "epoch": 0.32102845461119844, "grad_norm": 579.9627685546875, "learning_rate": 3.449113809369196e-05, "loss": 77.9407, "step": 79460 }, { "epoch": 0.32106885587656603, "grad_norm": 436.1594543457031, "learning_rate": 3.448921330113079e-05, "loss": 35.9948, "step": 79470 }, { "epoch": 0.32110925714193367, "grad_norm": 500.90777587890625, "learning_rate": 3.448728822609494e-05, "loss": 75.6861, "step": 79480 }, { "epoch": 0.3211496584073013, "grad_norm": 808.7592163085938, "learning_rate": 3.448536286862195e-05, "loss": 65.08, "step": 79490 }, { "epoch": 0.32119005967266895, "grad_norm": 428.0213317871094, "learning_rate": 3.4483437228749356e-05, "loss": 62.6515, "step": 79500 }, { "epoch": 0.3212304609380366, "grad_norm": 669.585693359375, "learning_rate": 3.448151130651469e-05, "loss": 66.1477, "step": 79510 }, { "epoch": 0.32127086220340423, "grad_norm": 461.6833190917969, "learning_rate": 3.4479585101955506e-05, "loss": 72.4844, "step": 79520 }, { "epoch": 0.32131126346877187, "grad_norm": 0.0, "learning_rate": 3.4477658615109365e-05, "loss": 71.0492, "step": 79530 }, { "epoch": 0.32135166473413945, "grad_norm": 636.3035888671875, "learning_rate": 3.447573184601381e-05, "loss": 55.4109, "step": 79540 }, { "epoch": 0.3213920659995071, "grad_norm": 1040.3365478515625, "learning_rate": 3.447380479470641e-05, "loss": 64.308, "step": 79550 }, { "epoch": 0.32143246726487473, "grad_norm": 1166.4844970703125, "learning_rate": 3.4471877461224735e-05, "loss": 49.8759, "step": 79560 }, { "epoch": 0.3214728685302424, "grad_norm": 618.626708984375, "learning_rate": 3.446994984560636e-05, "loss": 48.319, "step": 79570 }, { "epoch": 0.32151326979561, "grad_norm": 990.6760864257812, "learning_rate": 3.4468021947888855e-05, "loss": 68.7862, "step": 79580 }, { "epoch": 0.32155367106097765, "grad_norm": 417.9886474609375, "learning_rate": 3.4466093768109825e-05, "loss": 58.8706, "step": 79590 }, { "epoch": 0.32159407232634524, "grad_norm": 575.5824584960938, "learning_rate": 3.4464165306306845e-05, "loss": 73.3827, "step": 79600 }, { "epoch": 0.3216344735917129, "grad_norm": 943.4527587890625, "learning_rate": 3.446223656251751e-05, "loss": 56.8044, "step": 79610 }, { "epoch": 0.3216748748570805, "grad_norm": 383.214599609375, "learning_rate": 3.4460307536779434e-05, "loss": 55.9928, "step": 79620 }, { "epoch": 0.32171527612244816, "grad_norm": 960.987548828125, "learning_rate": 3.4458378229130214e-05, "loss": 54.2587, "step": 79630 }, { "epoch": 0.3217556773878158, "grad_norm": 720.4596557617188, "learning_rate": 3.4456448639607476e-05, "loss": 60.2574, "step": 79640 }, { "epoch": 0.32179607865318344, "grad_norm": 860.6263427734375, "learning_rate": 3.4454518768248816e-05, "loss": 70.9943, "step": 79650 }, { "epoch": 0.321836479918551, "grad_norm": 1033.0247802734375, "learning_rate": 3.445258861509188e-05, "loss": 82.2327, "step": 79660 }, { "epoch": 0.32187688118391866, "grad_norm": 739.957763671875, "learning_rate": 3.4450658180174286e-05, "loss": 43.3071, "step": 79670 }, { "epoch": 0.3219172824492863, "grad_norm": 422.6085510253906, "learning_rate": 3.4448727463533666e-05, "loss": 46.9807, "step": 79680 }, { "epoch": 0.32195768371465394, "grad_norm": 470.52569580078125, "learning_rate": 3.4446796465207665e-05, "loss": 84.7386, "step": 79690 }, { "epoch": 0.3219980849800216, "grad_norm": 333.7068786621094, "learning_rate": 3.444486518523394e-05, "loss": 59.9149, "step": 79700 }, { "epoch": 0.3220384862453892, "grad_norm": 1886.801513671875, "learning_rate": 3.4442933623650124e-05, "loss": 55.9101, "step": 79710 }, { "epoch": 0.32207888751075686, "grad_norm": 725.7935180664062, "learning_rate": 3.444100178049389e-05, "loss": 62.0386, "step": 79720 }, { "epoch": 0.32211928877612445, "grad_norm": 519.2236328125, "learning_rate": 3.4439069655802875e-05, "loss": 43.5637, "step": 79730 }, { "epoch": 0.3221596900414921, "grad_norm": 915.39208984375, "learning_rate": 3.443713724961478e-05, "loss": 77.7298, "step": 79740 }, { "epoch": 0.3222000913068597, "grad_norm": 1126.2271728515625, "learning_rate": 3.4435204561967244e-05, "loss": 45.7563, "step": 79750 }, { "epoch": 0.32224049257222737, "grad_norm": 1261.709228515625, "learning_rate": 3.443327159289798e-05, "loss": 79.6718, "step": 79760 }, { "epoch": 0.322280893837595, "grad_norm": 1179.6519775390625, "learning_rate": 3.443133834244465e-05, "loss": 71.1249, "step": 79770 }, { "epoch": 0.32232129510296265, "grad_norm": 279.9851989746094, "learning_rate": 3.4429404810644944e-05, "loss": 47.8068, "step": 79780 }, { "epoch": 0.32236169636833023, "grad_norm": 411.3631591796875, "learning_rate": 3.4427470997536567e-05, "loss": 60.9555, "step": 79790 }, { "epoch": 0.32240209763369787, "grad_norm": 423.3763732910156, "learning_rate": 3.442553690315722e-05, "loss": 72.2827, "step": 79800 }, { "epoch": 0.3224424988990655, "grad_norm": 814.5885620117188, "learning_rate": 3.4423602527544594e-05, "loss": 96.0732, "step": 79810 }, { "epoch": 0.32248290016443315, "grad_norm": 434.79327392578125, "learning_rate": 3.442166787073642e-05, "loss": 36.7295, "step": 79820 }, { "epoch": 0.3225233014298008, "grad_norm": 317.3850402832031, "learning_rate": 3.4419732932770394e-05, "loss": 49.3777, "step": 79830 }, { "epoch": 0.32256370269516843, "grad_norm": 637.21435546875, "learning_rate": 3.441779771368426e-05, "loss": 56.3814, "step": 79840 }, { "epoch": 0.32260410396053607, "grad_norm": 654.25927734375, "learning_rate": 3.4415862213515735e-05, "loss": 45.2075, "step": 79850 }, { "epoch": 0.32264450522590365, "grad_norm": 1865.554931640625, "learning_rate": 3.4413926432302554e-05, "loss": 70.6747, "step": 79860 }, { "epoch": 0.3226849064912713, "grad_norm": 845.0151977539062, "learning_rate": 3.441199037008246e-05, "loss": 53.3493, "step": 79870 }, { "epoch": 0.32272530775663893, "grad_norm": 0.0, "learning_rate": 3.441005402689319e-05, "loss": 50.3135, "step": 79880 }, { "epoch": 0.3227657090220066, "grad_norm": 726.01123046875, "learning_rate": 3.4408117402772494e-05, "loss": 107.5813, "step": 79890 }, { "epoch": 0.3228061102873742, "grad_norm": 622.0538940429688, "learning_rate": 3.440618049775814e-05, "loss": 45.5656, "step": 79900 }, { "epoch": 0.32284651155274185, "grad_norm": 610.9111328125, "learning_rate": 3.440424331188788e-05, "loss": 52.3628, "step": 79910 }, { "epoch": 0.32288691281810944, "grad_norm": 655.2313232421875, "learning_rate": 3.4402305845199475e-05, "loss": 93.0792, "step": 79920 }, { "epoch": 0.3229273140834771, "grad_norm": 739.9714965820312, "learning_rate": 3.4400368097730705e-05, "loss": 51.3259, "step": 79930 }, { "epoch": 0.3229677153488447, "grad_norm": 1402.1278076171875, "learning_rate": 3.439843006951935e-05, "loss": 84.88, "step": 79940 }, { "epoch": 0.32300811661421236, "grad_norm": 383.4258728027344, "learning_rate": 3.439649176060318e-05, "loss": 73.4971, "step": 79950 }, { "epoch": 0.32304851787958, "grad_norm": 553.3306274414062, "learning_rate": 3.439455317102e-05, "loss": 64.7679, "step": 79960 }, { "epoch": 0.32308891914494764, "grad_norm": 677.3023071289062, "learning_rate": 3.439261430080759e-05, "loss": 77.7907, "step": 79970 }, { "epoch": 0.3231293204103152, "grad_norm": 1300.3046875, "learning_rate": 3.439067515000375e-05, "loss": 60.2865, "step": 79980 }, { "epoch": 0.32316972167568286, "grad_norm": 734.296142578125, "learning_rate": 3.4388735718646294e-05, "loss": 94.4354, "step": 79990 }, { "epoch": 0.3232101229410505, "grad_norm": 543.9745483398438, "learning_rate": 3.438679600677303e-05, "loss": 43.9692, "step": 80000 }, { "epoch": 0.32325052420641814, "grad_norm": 590.18994140625, "learning_rate": 3.438485601442176e-05, "loss": 83.3607, "step": 80010 }, { "epoch": 0.3232909254717858, "grad_norm": 840.5568237304688, "learning_rate": 3.438291574163032e-05, "loss": 46.152, "step": 80020 }, { "epoch": 0.3233313267371534, "grad_norm": 746.9127197265625, "learning_rate": 3.4380975188436547e-05, "loss": 66.6191, "step": 80030 }, { "epoch": 0.32337172800252106, "grad_norm": 1017.9415893554688, "learning_rate": 3.437903435487825e-05, "loss": 65.5659, "step": 80040 }, { "epoch": 0.32341212926788865, "grad_norm": 1068.95654296875, "learning_rate": 3.437709324099327e-05, "loss": 67.7499, "step": 80050 }, { "epoch": 0.3234525305332563, "grad_norm": 460.35491943359375, "learning_rate": 3.4375151846819456e-05, "loss": 60.3891, "step": 80060 }, { "epoch": 0.32349293179862393, "grad_norm": 750.3103637695312, "learning_rate": 3.437321017239466e-05, "loss": 71.0767, "step": 80070 }, { "epoch": 0.32353333306399157, "grad_norm": 514.40234375, "learning_rate": 3.4371268217756734e-05, "loss": 86.9577, "step": 80080 }, { "epoch": 0.3235737343293592, "grad_norm": 784.3499145507812, "learning_rate": 3.4369325982943536e-05, "loss": 41.5145, "step": 80090 }, { "epoch": 0.32361413559472685, "grad_norm": 993.7069702148438, "learning_rate": 3.4367383467992926e-05, "loss": 67.2231, "step": 80100 }, { "epoch": 0.32365453686009443, "grad_norm": 269.11529541015625, "learning_rate": 3.436544067294278e-05, "loss": 60.3321, "step": 80110 }, { "epoch": 0.3236949381254621, "grad_norm": 1333.7032470703125, "learning_rate": 3.436349759783097e-05, "loss": 80.0917, "step": 80120 }, { "epoch": 0.3237353393908297, "grad_norm": 642.1954956054688, "learning_rate": 3.4361554242695384e-05, "loss": 54.0837, "step": 80130 }, { "epoch": 0.32377574065619735, "grad_norm": 1311.14453125, "learning_rate": 3.43596106075739e-05, "loss": 88.6644, "step": 80140 }, { "epoch": 0.323816141921565, "grad_norm": 981.5884399414062, "learning_rate": 3.4357666692504415e-05, "loss": 66.0389, "step": 80150 }, { "epoch": 0.32385654318693263, "grad_norm": 673.2181396484375, "learning_rate": 3.4355722497524826e-05, "loss": 45.9753, "step": 80160 }, { "epoch": 0.3238969444523003, "grad_norm": 325.1015930175781, "learning_rate": 3.4353778022673035e-05, "loss": 65.172, "step": 80170 }, { "epoch": 0.32393734571766786, "grad_norm": 2092.246826171875, "learning_rate": 3.4351833267986956e-05, "loss": 87.7155, "step": 80180 }, { "epoch": 0.3239777469830355, "grad_norm": 422.2587585449219, "learning_rate": 3.434988823350449e-05, "loss": 73.1039, "step": 80190 }, { "epoch": 0.32401814824840314, "grad_norm": 947.8048706054688, "learning_rate": 3.434794291926358e-05, "loss": 83.3786, "step": 80200 }, { "epoch": 0.3240585495137708, "grad_norm": 737.5042114257812, "learning_rate": 3.434599732530212e-05, "loss": 52.9744, "step": 80210 }, { "epoch": 0.3240989507791384, "grad_norm": 1040.675048828125, "learning_rate": 3.434405145165807e-05, "loss": 88.4331, "step": 80220 }, { "epoch": 0.32413935204450606, "grad_norm": 748.6989135742188, "learning_rate": 3.434210529836934e-05, "loss": 75.7019, "step": 80230 }, { "epoch": 0.32417975330987364, "grad_norm": 292.437255859375, "learning_rate": 3.434015886547389e-05, "loss": 50.4198, "step": 80240 }, { "epoch": 0.3242201545752413, "grad_norm": 574.4251708984375, "learning_rate": 3.433821215300966e-05, "loss": 57.2804, "step": 80250 }, { "epoch": 0.3242605558406089, "grad_norm": 997.3341064453125, "learning_rate": 3.4336265161014596e-05, "loss": 65.6375, "step": 80260 }, { "epoch": 0.32430095710597656, "grad_norm": 617.52392578125, "learning_rate": 3.433431788952667e-05, "loss": 57.1869, "step": 80270 }, { "epoch": 0.3243413583713442, "grad_norm": 1012.9528198242188, "learning_rate": 3.433237033858384e-05, "loss": 90.5776, "step": 80280 }, { "epoch": 0.32438175963671184, "grad_norm": 616.6028442382812, "learning_rate": 3.4330422508224064e-05, "loss": 50.6281, "step": 80290 }, { "epoch": 0.3244221609020794, "grad_norm": 867.1862182617188, "learning_rate": 3.432847439848532e-05, "loss": 57.7962, "step": 80300 }, { "epoch": 0.32446256216744707, "grad_norm": 295.4292907714844, "learning_rate": 3.43265260094056e-05, "loss": 71.0389, "step": 80310 }, { "epoch": 0.3245029634328147, "grad_norm": 436.0542297363281, "learning_rate": 3.432457734102287e-05, "loss": 59.4424, "step": 80320 }, { "epoch": 0.32454336469818235, "grad_norm": 1088.5181884765625, "learning_rate": 3.4322628393375144e-05, "loss": 64.7318, "step": 80330 }, { "epoch": 0.32458376596355, "grad_norm": 441.3780517578125, "learning_rate": 3.4320679166500386e-05, "loss": 57.3388, "step": 80340 }, { "epoch": 0.3246241672289176, "grad_norm": 413.42926025390625, "learning_rate": 3.4318729660436624e-05, "loss": 108.3097, "step": 80350 }, { "epoch": 0.32466456849428527, "grad_norm": 669.157958984375, "learning_rate": 3.431677987522186e-05, "loss": 65.8818, "step": 80360 }, { "epoch": 0.32470496975965285, "grad_norm": 1144.414794921875, "learning_rate": 3.4314829810894095e-05, "loss": 59.8718, "step": 80370 }, { "epoch": 0.3247453710250205, "grad_norm": 467.1025085449219, "learning_rate": 3.431287946749136e-05, "loss": 56.5579, "step": 80380 }, { "epoch": 0.32478577229038813, "grad_norm": 852.761474609375, "learning_rate": 3.4310928845051656e-05, "loss": 58.1927, "step": 80390 }, { "epoch": 0.32482617355575577, "grad_norm": 1139.1510009765625, "learning_rate": 3.430897794361304e-05, "loss": 68.5792, "step": 80400 }, { "epoch": 0.3248665748211234, "grad_norm": 645.5872802734375, "learning_rate": 3.430702676321353e-05, "loss": 40.7631, "step": 80410 }, { "epoch": 0.32490697608649105, "grad_norm": 466.1105041503906, "learning_rate": 3.4305075303891165e-05, "loss": 73.6079, "step": 80420 }, { "epoch": 0.32494737735185864, "grad_norm": 1006.1669311523438, "learning_rate": 3.430312356568399e-05, "loss": 89.6406, "step": 80430 }, { "epoch": 0.3249877786172263, "grad_norm": 610.352294921875, "learning_rate": 3.430117154863006e-05, "loss": 38.2407, "step": 80440 }, { "epoch": 0.3250281798825939, "grad_norm": 535.408203125, "learning_rate": 3.4299219252767436e-05, "loss": 52.4986, "step": 80450 }, { "epoch": 0.32506858114796156, "grad_norm": 634.6419067382812, "learning_rate": 3.429726667813416e-05, "loss": 57.0195, "step": 80460 }, { "epoch": 0.3251089824133292, "grad_norm": 657.6990966796875, "learning_rate": 3.429531382476832e-05, "loss": 51.0841, "step": 80470 }, { "epoch": 0.32514938367869683, "grad_norm": 693.49853515625, "learning_rate": 3.429336069270796e-05, "loss": 67.1961, "step": 80480 }, { "epoch": 0.3251897849440645, "grad_norm": 1512.3751220703125, "learning_rate": 3.429140728199119e-05, "loss": 76.1084, "step": 80490 }, { "epoch": 0.32523018620943206, "grad_norm": 1344.2879638671875, "learning_rate": 3.428945359265607e-05, "loss": 81.8786, "step": 80500 }, { "epoch": 0.3252705874747997, "grad_norm": 1220.5660400390625, "learning_rate": 3.42874996247407e-05, "loss": 76.1141, "step": 80510 }, { "epoch": 0.32531098874016734, "grad_norm": 761.2682495117188, "learning_rate": 3.4285545378283165e-05, "loss": 62.3988, "step": 80520 }, { "epoch": 0.325351390005535, "grad_norm": 692.607421875, "learning_rate": 3.428359085332157e-05, "loss": 77.3975, "step": 80530 }, { "epoch": 0.3253917912709026, "grad_norm": 845.8311157226562, "learning_rate": 3.4281636049894014e-05, "loss": 58.645, "step": 80540 }, { "epoch": 0.32543219253627026, "grad_norm": 783.7555541992188, "learning_rate": 3.427968096803862e-05, "loss": 72.8846, "step": 80550 }, { "epoch": 0.32547259380163784, "grad_norm": 889.4086303710938, "learning_rate": 3.4277725607793486e-05, "loss": 68.3054, "step": 80560 }, { "epoch": 0.3255129950670055, "grad_norm": 442.7275695800781, "learning_rate": 3.4275769969196745e-05, "loss": 63.8601, "step": 80570 }, { "epoch": 0.3255533963323731, "grad_norm": 1197.6492919921875, "learning_rate": 3.427381405228651e-05, "loss": 64.7808, "step": 80580 }, { "epoch": 0.32559379759774076, "grad_norm": 534.90771484375, "learning_rate": 3.427185785710093e-05, "loss": 47.0797, "step": 80590 }, { "epoch": 0.3256341988631084, "grad_norm": 1945.9188232421875, "learning_rate": 3.426990138367813e-05, "loss": 74.8174, "step": 80600 }, { "epoch": 0.32567460012847604, "grad_norm": 978.9138793945312, "learning_rate": 3.426794463205626e-05, "loss": 52.3134, "step": 80610 }, { "epoch": 0.32571500139384363, "grad_norm": 1130.74267578125, "learning_rate": 3.426598760227346e-05, "loss": 56.3229, "step": 80620 }, { "epoch": 0.32575540265921127, "grad_norm": 486.0097351074219, "learning_rate": 3.426403029436789e-05, "loss": 51.0282, "step": 80630 }, { "epoch": 0.3257958039245789, "grad_norm": 1250.3785400390625, "learning_rate": 3.42620727083777e-05, "loss": 66.0362, "step": 80640 }, { "epoch": 0.32583620518994655, "grad_norm": 1186.8968505859375, "learning_rate": 3.426011484434106e-05, "loss": 58.2137, "step": 80650 }, { "epoch": 0.3258766064553142, "grad_norm": 1575.198974609375, "learning_rate": 3.425815670229614e-05, "loss": 49.037, "step": 80660 }, { "epoch": 0.32591700772068183, "grad_norm": 387.7889709472656, "learning_rate": 3.425619828228112e-05, "loss": 51.2633, "step": 80670 }, { "epoch": 0.32595740898604947, "grad_norm": 977.7369995117188, "learning_rate": 3.425423958433418e-05, "loss": 58.8048, "step": 80680 }, { "epoch": 0.32599781025141705, "grad_norm": 721.7234497070312, "learning_rate": 3.425228060849349e-05, "loss": 60.1916, "step": 80690 }, { "epoch": 0.3260382115167847, "grad_norm": 738.7984619140625, "learning_rate": 3.425032135479725e-05, "loss": 58.4803, "step": 80700 }, { "epoch": 0.32607861278215233, "grad_norm": 620.6669921875, "learning_rate": 3.424836182328367e-05, "loss": 75.7753, "step": 80710 }, { "epoch": 0.32611901404752, "grad_norm": 1548.1915283203125, "learning_rate": 3.4246402013990935e-05, "loss": 91.6949, "step": 80720 }, { "epoch": 0.3261594153128876, "grad_norm": 72.1471176147461, "learning_rate": 3.4244441926957254e-05, "loss": 67.6949, "step": 80730 }, { "epoch": 0.32619981657825525, "grad_norm": 921.9493408203125, "learning_rate": 3.424248156222085e-05, "loss": 81.8886, "step": 80740 }, { "epoch": 0.32624021784362284, "grad_norm": 640.4423217773438, "learning_rate": 3.424052091981994e-05, "loss": 57.0548, "step": 80750 }, { "epoch": 0.3262806191089905, "grad_norm": 535.4073486328125, "learning_rate": 3.4238559999792726e-05, "loss": 57.7797, "step": 80760 }, { "epoch": 0.3263210203743581, "grad_norm": 619.300048828125, "learning_rate": 3.423659880217747e-05, "loss": 66.9942, "step": 80770 }, { "epoch": 0.32636142163972576, "grad_norm": 1028.4447021484375, "learning_rate": 3.4234637327012384e-05, "loss": 55.0906, "step": 80780 }, { "epoch": 0.3264018229050934, "grad_norm": 459.99017333984375, "learning_rate": 3.423267557433572e-05, "loss": 51.9673, "step": 80790 }, { "epoch": 0.32644222417046104, "grad_norm": 0.0, "learning_rate": 3.4230713544185715e-05, "loss": 74.0897, "step": 80800 }, { "epoch": 0.3264826254358287, "grad_norm": 600.255126953125, "learning_rate": 3.4228751236600615e-05, "loss": 42.6606, "step": 80810 }, { "epoch": 0.32652302670119626, "grad_norm": 1884.208740234375, "learning_rate": 3.4226788651618695e-05, "loss": 76.6577, "step": 80820 }, { "epoch": 0.3265634279665639, "grad_norm": 995.8324584960938, "learning_rate": 3.42248257892782e-05, "loss": 71.5253, "step": 80830 }, { "epoch": 0.32660382923193154, "grad_norm": 530.7216186523438, "learning_rate": 3.422286264961741e-05, "loss": 45.6298, "step": 80840 }, { "epoch": 0.3266442304972992, "grad_norm": 890.0387573242188, "learning_rate": 3.422089923267458e-05, "loss": 68.8483, "step": 80850 }, { "epoch": 0.3266846317626668, "grad_norm": 761.2548828125, "learning_rate": 3.421893553848801e-05, "loss": 62.3427, "step": 80860 }, { "epoch": 0.32672503302803446, "grad_norm": 437.8416442871094, "learning_rate": 3.421697156709596e-05, "loss": 70.8058, "step": 80870 }, { "epoch": 0.32676543429340205, "grad_norm": 167.2902374267578, "learning_rate": 3.421500731853674e-05, "loss": 43.4283, "step": 80880 }, { "epoch": 0.3268058355587697, "grad_norm": 622.623291015625, "learning_rate": 3.421304279284862e-05, "loss": 39.4079, "step": 80890 }, { "epoch": 0.3268462368241373, "grad_norm": 1559.4071044921875, "learning_rate": 3.421107799006992e-05, "loss": 71.1375, "step": 80900 }, { "epoch": 0.32688663808950497, "grad_norm": 817.6478271484375, "learning_rate": 3.420911291023894e-05, "loss": 87.4325, "step": 80910 }, { "epoch": 0.3269270393548726, "grad_norm": 1013.1358032226562, "learning_rate": 3.4207147553393996e-05, "loss": 57.3408, "step": 80920 }, { "epoch": 0.32696744062024025, "grad_norm": 797.0433349609375, "learning_rate": 3.420518191957339e-05, "loss": 95.546, "step": 80930 }, { "epoch": 0.32700784188560783, "grad_norm": 647.9711303710938, "learning_rate": 3.4203216008815446e-05, "loss": 66.2914, "step": 80940 }, { "epoch": 0.32704824315097547, "grad_norm": 585.9962768554688, "learning_rate": 3.42012498211585e-05, "loss": 52.315, "step": 80950 }, { "epoch": 0.3270886444163431, "grad_norm": 1960.798095703125, "learning_rate": 3.419928335664087e-05, "loss": 60.6582, "step": 80960 }, { "epoch": 0.32712904568171075, "grad_norm": 866.8156127929688, "learning_rate": 3.4197316615300904e-05, "loss": 69.2536, "step": 80970 }, { "epoch": 0.3271694469470784, "grad_norm": 1049.0009765625, "learning_rate": 3.419534959717694e-05, "loss": 86.5613, "step": 80980 }, { "epoch": 0.32720984821244603, "grad_norm": 1512.5225830078125, "learning_rate": 3.419338230230733e-05, "loss": 88.7681, "step": 80990 }, { "epoch": 0.32725024947781367, "grad_norm": 522.5618286132812, "learning_rate": 3.419141473073042e-05, "loss": 45.2407, "step": 81000 }, { "epoch": 0.32729065074318126, "grad_norm": 1057.761474609375, "learning_rate": 3.418944688248458e-05, "loss": 73.3249, "step": 81010 }, { "epoch": 0.3273310520085489, "grad_norm": 405.7344665527344, "learning_rate": 3.4187478757608166e-05, "loss": 58.5253, "step": 81020 }, { "epoch": 0.32737145327391654, "grad_norm": 227.71484375, "learning_rate": 3.418551035613954e-05, "loss": 68.0555, "step": 81030 }, { "epoch": 0.3274118545392842, "grad_norm": 752.8635864257812, "learning_rate": 3.41835416781171e-05, "loss": 77.9322, "step": 81040 }, { "epoch": 0.3274522558046518, "grad_norm": 580.7243041992188, "learning_rate": 3.4181572723579205e-05, "loss": 60.2561, "step": 81050 }, { "epoch": 0.32749265707001946, "grad_norm": 755.0991821289062, "learning_rate": 3.417960349256425e-05, "loss": 50.1816, "step": 81060 }, { "epoch": 0.32753305833538704, "grad_norm": 875.7639770507812, "learning_rate": 3.417763398511063e-05, "loss": 47.6605, "step": 81070 }, { "epoch": 0.3275734596007547, "grad_norm": 575.2279052734375, "learning_rate": 3.417566420125673e-05, "loss": 74.7226, "step": 81080 }, { "epoch": 0.3276138608661223, "grad_norm": 652.4024047851562, "learning_rate": 3.417369414104096e-05, "loss": 41.3797, "step": 81090 }, { "epoch": 0.32765426213148996, "grad_norm": 683.2135620117188, "learning_rate": 3.417172380450173e-05, "loss": 55.1849, "step": 81100 }, { "epoch": 0.3276946633968576, "grad_norm": 488.3472595214844, "learning_rate": 3.416975319167744e-05, "loss": 78.153, "step": 81110 }, { "epoch": 0.32773506466222524, "grad_norm": 1091.7830810546875, "learning_rate": 3.416778230260652e-05, "loss": 42.6646, "step": 81120 }, { "epoch": 0.3277754659275929, "grad_norm": 2867.489990234375, "learning_rate": 3.416581113732739e-05, "loss": 77.2699, "step": 81130 }, { "epoch": 0.32781586719296046, "grad_norm": 431.8386535644531, "learning_rate": 3.416383969587848e-05, "loss": 84.1893, "step": 81140 }, { "epoch": 0.3278562684583281, "grad_norm": 1082.2401123046875, "learning_rate": 3.4161867978298225e-05, "loss": 68.9859, "step": 81150 }, { "epoch": 0.32789666972369574, "grad_norm": 543.3303833007812, "learning_rate": 3.415989598462506e-05, "loss": 59.1727, "step": 81160 }, { "epoch": 0.3279370709890634, "grad_norm": 682.0264282226562, "learning_rate": 3.415792371489743e-05, "loss": 67.5518, "step": 81170 }, { "epoch": 0.327977472254431, "grad_norm": 884.221435546875, "learning_rate": 3.415595116915379e-05, "loss": 62.808, "step": 81180 }, { "epoch": 0.32801787351979866, "grad_norm": 1019.9883422851562, "learning_rate": 3.41539783474326e-05, "loss": 67.86, "step": 81190 }, { "epoch": 0.32805827478516625, "grad_norm": 601.5031127929688, "learning_rate": 3.415200524977231e-05, "loss": 81.0774, "step": 81200 }, { "epoch": 0.3280986760505339, "grad_norm": 543.6842651367188, "learning_rate": 3.415003187621139e-05, "loss": 59.2001, "step": 81210 }, { "epoch": 0.32813907731590153, "grad_norm": 1308.574951171875, "learning_rate": 3.414805822678831e-05, "loss": 39.9627, "step": 81220 }, { "epoch": 0.32817947858126917, "grad_norm": 543.0343017578125, "learning_rate": 3.4146084301541565e-05, "loss": 59.6952, "step": 81230 }, { "epoch": 0.3282198798466368, "grad_norm": 1221.7008056640625, "learning_rate": 3.414411010050962e-05, "loss": 72.1673, "step": 81240 }, { "epoch": 0.32826028111200445, "grad_norm": 2868.123046875, "learning_rate": 3.4142135623730954e-05, "loss": 115.6474, "step": 81250 }, { "epoch": 0.32830068237737203, "grad_norm": 887.4540405273438, "learning_rate": 3.4140160871244076e-05, "loss": 43.3435, "step": 81260 }, { "epoch": 0.3283410836427397, "grad_norm": 814.0238647460938, "learning_rate": 3.413818584308749e-05, "loss": 72.639, "step": 81270 }, { "epoch": 0.3283814849081073, "grad_norm": 592.52490234375, "learning_rate": 3.413621053929969e-05, "loss": 52.7047, "step": 81280 }, { "epoch": 0.32842188617347495, "grad_norm": 197.75149536132812, "learning_rate": 3.4134234959919185e-05, "loss": 49.6054, "step": 81290 }, { "epoch": 0.3284622874388426, "grad_norm": 1070.3717041015625, "learning_rate": 3.41322591049845e-05, "loss": 103.2167, "step": 81300 }, { "epoch": 0.32850268870421023, "grad_norm": 512.3439331054688, "learning_rate": 3.413028297453414e-05, "loss": 79.9242, "step": 81310 }, { "epoch": 0.3285430899695779, "grad_norm": 594.7864990234375, "learning_rate": 3.4128306568606644e-05, "loss": 53.6589, "step": 81320 }, { "epoch": 0.32858349123494546, "grad_norm": 645.3168334960938, "learning_rate": 3.4126329887240536e-05, "loss": 64.8506, "step": 81330 }, { "epoch": 0.3286238925003131, "grad_norm": 555.6690063476562, "learning_rate": 3.412435293047435e-05, "loss": 81.0256, "step": 81340 }, { "epoch": 0.32866429376568074, "grad_norm": 1569.737548828125, "learning_rate": 3.4122375698346636e-05, "loss": 56.2626, "step": 81350 }, { "epoch": 0.3287046950310484, "grad_norm": 573.2012329101562, "learning_rate": 3.412039819089593e-05, "loss": 81.6873, "step": 81360 }, { "epoch": 0.328745096296416, "grad_norm": 1500.382568359375, "learning_rate": 3.41184204081608e-05, "loss": 47.1895, "step": 81370 }, { "epoch": 0.32878549756178366, "grad_norm": 542.472900390625, "learning_rate": 3.41164423501798e-05, "loss": 57.6808, "step": 81380 }, { "epoch": 0.32882589882715124, "grad_norm": 2190.964111328125, "learning_rate": 3.4114464016991476e-05, "loss": 60.6285, "step": 81390 }, { "epoch": 0.3288663000925189, "grad_norm": 626.0020751953125, "learning_rate": 3.411248540863442e-05, "loss": 57.284, "step": 81400 }, { "epoch": 0.3289067013578865, "grad_norm": 1373.947265625, "learning_rate": 3.411050652514719e-05, "loss": 65.5403, "step": 81410 }, { "epoch": 0.32894710262325416, "grad_norm": 636.7581176757812, "learning_rate": 3.410852736656837e-05, "loss": 52.3231, "step": 81420 }, { "epoch": 0.3289875038886218, "grad_norm": 276.45831298828125, "learning_rate": 3.410654793293654e-05, "loss": 66.9052, "step": 81430 }, { "epoch": 0.32902790515398944, "grad_norm": 1264.7098388671875, "learning_rate": 3.410456822429031e-05, "loss": 63.3931, "step": 81440 }, { "epoch": 0.3290683064193571, "grad_norm": 1080.706787109375, "learning_rate": 3.410258824066825e-05, "loss": 65.6225, "step": 81450 }, { "epoch": 0.32910870768472467, "grad_norm": 1004.5601806640625, "learning_rate": 3.4100607982108975e-05, "loss": 70.5807, "step": 81460 }, { "epoch": 0.3291491089500923, "grad_norm": 981.4774169921875, "learning_rate": 3.409862744865109e-05, "loss": 79.1536, "step": 81470 }, { "epoch": 0.32918951021545995, "grad_norm": 1003.9555053710938, "learning_rate": 3.4096646640333205e-05, "loss": 48.9285, "step": 81480 }, { "epoch": 0.3292299114808276, "grad_norm": 524.7885131835938, "learning_rate": 3.4094665557193934e-05, "loss": 73.5914, "step": 81490 }, { "epoch": 0.3292703127461952, "grad_norm": 1038.293212890625, "learning_rate": 3.4092684199271896e-05, "loss": 69.5635, "step": 81500 }, { "epoch": 0.32931071401156287, "grad_norm": 1191.782958984375, "learning_rate": 3.409070256660573e-05, "loss": 72.7884, "step": 81510 }, { "epoch": 0.32935111527693045, "grad_norm": 738.4837646484375, "learning_rate": 3.408872065923406e-05, "loss": 78.2964, "step": 81520 }, { "epoch": 0.3293915165422981, "grad_norm": 321.0873718261719, "learning_rate": 3.408673847719553e-05, "loss": 67.1254, "step": 81530 }, { "epoch": 0.32943191780766573, "grad_norm": 707.3953247070312, "learning_rate": 3.408475602052878e-05, "loss": 63.5638, "step": 81540 }, { "epoch": 0.32947231907303337, "grad_norm": 721.4423828125, "learning_rate": 3.408277328927246e-05, "loss": 63.4857, "step": 81550 }, { "epoch": 0.329512720338401, "grad_norm": 1489.64697265625, "learning_rate": 3.408079028346523e-05, "loss": 61.2157, "step": 81560 }, { "epoch": 0.32955312160376865, "grad_norm": 1054.1998291015625, "learning_rate": 3.407880700314574e-05, "loss": 74.2502, "step": 81570 }, { "epoch": 0.32959352286913624, "grad_norm": 1091.3291015625, "learning_rate": 3.407682344835266e-05, "loss": 67.4396, "step": 81580 }, { "epoch": 0.3296339241345039, "grad_norm": 744.8306274414062, "learning_rate": 3.407483961912465e-05, "loss": 62.6582, "step": 81590 }, { "epoch": 0.3296743253998715, "grad_norm": 1556.3101806640625, "learning_rate": 3.407285551550041e-05, "loss": 66.0962, "step": 81600 }, { "epoch": 0.32971472666523916, "grad_norm": 218.52691650390625, "learning_rate": 3.40708711375186e-05, "loss": 79.0563, "step": 81610 }, { "epoch": 0.3297551279306068, "grad_norm": 561.5374755859375, "learning_rate": 3.4068886485217915e-05, "loss": 43.3707, "step": 81620 }, { "epoch": 0.32979552919597444, "grad_norm": 691.798095703125, "learning_rate": 3.406690155863704e-05, "loss": 61.0591, "step": 81630 }, { "epoch": 0.3298359304613421, "grad_norm": 489.8292236328125, "learning_rate": 3.406491635781468e-05, "loss": 70.3329, "step": 81640 }, { "epoch": 0.32987633172670966, "grad_norm": 719.1077880859375, "learning_rate": 3.406293088278953e-05, "loss": 55.4561, "step": 81650 }, { "epoch": 0.3299167329920773, "grad_norm": 306.428955078125, "learning_rate": 3.406094513360031e-05, "loss": 57.011, "step": 81660 }, { "epoch": 0.32995713425744494, "grad_norm": 1282.330810546875, "learning_rate": 3.4058959110285724e-05, "loss": 55.3932, "step": 81670 }, { "epoch": 0.3299975355228126, "grad_norm": 1125.1302490234375, "learning_rate": 3.4056972812884495e-05, "loss": 92.1705, "step": 81680 }, { "epoch": 0.3300379367881802, "grad_norm": 883.2679443359375, "learning_rate": 3.405498624143533e-05, "loss": 95.1925, "step": 81690 }, { "epoch": 0.33007833805354786, "grad_norm": 1473.376953125, "learning_rate": 3.405299939597699e-05, "loss": 72.0129, "step": 81700 }, { "epoch": 0.33011873931891544, "grad_norm": 679.3895874023438, "learning_rate": 3.405101227654818e-05, "loss": 99.1354, "step": 81710 }, { "epoch": 0.3301591405842831, "grad_norm": 1817.0472412109375, "learning_rate": 3.404902488318766e-05, "loss": 67.3122, "step": 81720 }, { "epoch": 0.3301995418496507, "grad_norm": 451.7236328125, "learning_rate": 3.404703721593416e-05, "loss": 75.9052, "step": 81730 }, { "epoch": 0.33023994311501836, "grad_norm": 685.8228759765625, "learning_rate": 3.404504927482644e-05, "loss": 83.0571, "step": 81740 }, { "epoch": 0.330280344380386, "grad_norm": 917.3994750976562, "learning_rate": 3.4043061059903254e-05, "loss": 67.7444, "step": 81750 }, { "epoch": 0.33032074564575364, "grad_norm": 808.7835083007812, "learning_rate": 3.404107257120336e-05, "loss": 69.5583, "step": 81760 }, { "epoch": 0.3303611469111213, "grad_norm": 1228.276123046875, "learning_rate": 3.4039083808765534e-05, "loss": 74.209, "step": 81770 }, { "epoch": 0.33040154817648887, "grad_norm": 519.683349609375, "learning_rate": 3.403709477262853e-05, "loss": 98.1551, "step": 81780 }, { "epoch": 0.3304419494418565, "grad_norm": 729.7907104492188, "learning_rate": 3.403510546283115e-05, "loss": 71.8872, "step": 81790 }, { "epoch": 0.33048235070722415, "grad_norm": 1474.07470703125, "learning_rate": 3.403311587941215e-05, "loss": 79.9147, "step": 81800 }, { "epoch": 0.3305227519725918, "grad_norm": 272.7594299316406, "learning_rate": 3.403112602241034e-05, "loss": 70.2182, "step": 81810 }, { "epoch": 0.33056315323795943, "grad_norm": 565.2523803710938, "learning_rate": 3.40291358918645e-05, "loss": 58.0919, "step": 81820 }, { "epoch": 0.33060355450332707, "grad_norm": 679.3093872070312, "learning_rate": 3.402714548781344e-05, "loss": 77.7463, "step": 81830 }, { "epoch": 0.33064395576869465, "grad_norm": 0.0, "learning_rate": 3.402515481029595e-05, "loss": 72.2586, "step": 81840 }, { "epoch": 0.3306843570340623, "grad_norm": 1301.266845703125, "learning_rate": 3.402316385935085e-05, "loss": 75.1626, "step": 81850 }, { "epoch": 0.33072475829942993, "grad_norm": 981.5969848632812, "learning_rate": 3.402117263501695e-05, "loss": 83.7884, "step": 81860 }, { "epoch": 0.3307651595647976, "grad_norm": 508.7431335449219, "learning_rate": 3.4019181137333066e-05, "loss": 72.0551, "step": 81870 }, { "epoch": 0.3308055608301652, "grad_norm": 850.6770629882812, "learning_rate": 3.4017189366338036e-05, "loss": 89.4061, "step": 81880 }, { "epoch": 0.33084596209553285, "grad_norm": 715.669677734375, "learning_rate": 3.401519732207068e-05, "loss": 29.283, "step": 81890 }, { "epoch": 0.33088636336090044, "grad_norm": 0.0, "learning_rate": 3.401320500456984e-05, "loss": 72.7661, "step": 81900 }, { "epoch": 0.3309267646262681, "grad_norm": 735.201904296875, "learning_rate": 3.401121241387435e-05, "loss": 74.2545, "step": 81910 }, { "epoch": 0.3309671658916357, "grad_norm": 879.0210571289062, "learning_rate": 3.400921955002306e-05, "loss": 73.9921, "step": 81920 }, { "epoch": 0.33100756715700336, "grad_norm": 704.4776611328125, "learning_rate": 3.4007226413054824e-05, "loss": 44.1779, "step": 81930 }, { "epoch": 0.331047968422371, "grad_norm": 1078.0277099609375, "learning_rate": 3.400523300300849e-05, "loss": 83.9579, "step": 81940 }, { "epoch": 0.33108836968773864, "grad_norm": 876.3001098632812, "learning_rate": 3.4003239319922935e-05, "loss": 55.4064, "step": 81950 }, { "epoch": 0.3311287709531063, "grad_norm": 466.29736328125, "learning_rate": 3.4001245363837025e-05, "loss": 65.0786, "step": 81960 }, { "epoch": 0.33116917221847386, "grad_norm": 797.1023559570312, "learning_rate": 3.3999251134789624e-05, "loss": 56.1711, "step": 81970 }, { "epoch": 0.3312095734838415, "grad_norm": 1126.777587890625, "learning_rate": 3.3997256632819616e-05, "loss": 86.8346, "step": 81980 }, { "epoch": 0.33124997474920914, "grad_norm": 519.679931640625, "learning_rate": 3.399526185796588e-05, "loss": 53.7663, "step": 81990 }, { "epoch": 0.3312903760145768, "grad_norm": 1049.772705078125, "learning_rate": 3.399326681026731e-05, "loss": 68.3721, "step": 82000 }, { "epoch": 0.3313307772799444, "grad_norm": 972.539794921875, "learning_rate": 3.399127148976281e-05, "loss": 84.3961, "step": 82010 }, { "epoch": 0.33137117854531206, "grad_norm": 533.8385620117188, "learning_rate": 3.398927589649125e-05, "loss": 28.2353, "step": 82020 }, { "epoch": 0.33141157981067965, "grad_norm": 432.9517822265625, "learning_rate": 3.3987280030491564e-05, "loss": 54.7922, "step": 82030 }, { "epoch": 0.3314519810760473, "grad_norm": 1321.9207763671875, "learning_rate": 3.398528389180265e-05, "loss": 65.6759, "step": 82040 }, { "epoch": 0.3314923823414149, "grad_norm": 1934.19677734375, "learning_rate": 3.398328748046343e-05, "loss": 72.9202, "step": 82050 }, { "epoch": 0.33153278360678257, "grad_norm": 973.4382934570312, "learning_rate": 3.3981290796512825e-05, "loss": 57.7526, "step": 82060 }, { "epoch": 0.3315731848721502, "grad_norm": 567.466064453125, "learning_rate": 3.397929383998975e-05, "loss": 66.2216, "step": 82070 }, { "epoch": 0.33161358613751785, "grad_norm": 848.7359008789062, "learning_rate": 3.3977296610933145e-05, "loss": 59.2886, "step": 82080 }, { "epoch": 0.33165398740288543, "grad_norm": 702.6904296875, "learning_rate": 3.397529910938195e-05, "loss": 58.9801, "step": 82090 }, { "epoch": 0.33169438866825307, "grad_norm": 295.2632751464844, "learning_rate": 3.3973301335375104e-05, "loss": 63.8799, "step": 82100 }, { "epoch": 0.3317347899336207, "grad_norm": 492.2831115722656, "learning_rate": 3.3971303288951554e-05, "loss": 98.2528, "step": 82110 }, { "epoch": 0.33177519119898835, "grad_norm": 977.1430053710938, "learning_rate": 3.3969304970150255e-05, "loss": 53.2605, "step": 82120 }, { "epoch": 0.331815592464356, "grad_norm": 761.702392578125, "learning_rate": 3.396730637901016e-05, "loss": 77.0182, "step": 82130 }, { "epoch": 0.33185599372972363, "grad_norm": 656.1929321289062, "learning_rate": 3.396530751557024e-05, "loss": 55.2026, "step": 82140 }, { "epoch": 0.33189639499509127, "grad_norm": 516.7476196289062, "learning_rate": 3.396330837986946e-05, "loss": 77.5115, "step": 82150 }, { "epoch": 0.33193679626045886, "grad_norm": 394.3022155761719, "learning_rate": 3.396130897194679e-05, "loss": 59.2189, "step": 82160 }, { "epoch": 0.3319771975258265, "grad_norm": 707.18994140625, "learning_rate": 3.395930929184122e-05, "loss": 47.8635, "step": 82170 }, { "epoch": 0.33201759879119414, "grad_norm": 1019.1130981445312, "learning_rate": 3.395730933959172e-05, "loss": 63.8909, "step": 82180 }, { "epoch": 0.3320580000565618, "grad_norm": 530.5923461914062, "learning_rate": 3.3955309115237296e-05, "loss": 67.0644, "step": 82190 }, { "epoch": 0.3320984013219294, "grad_norm": 600.5137329101562, "learning_rate": 3.395330861881693e-05, "loss": 65.8614, "step": 82200 }, { "epoch": 0.33213880258729706, "grad_norm": 871.7723388671875, "learning_rate": 3.3951307850369634e-05, "loss": 41.2921, "step": 82210 }, { "epoch": 0.33217920385266464, "grad_norm": 593.4152221679688, "learning_rate": 3.394930680993441e-05, "loss": 95.3884, "step": 82220 }, { "epoch": 0.3322196051180323, "grad_norm": 390.56927490234375, "learning_rate": 3.394730549755027e-05, "loss": 56.8083, "step": 82230 }, { "epoch": 0.3322600063833999, "grad_norm": 474.56964111328125, "learning_rate": 3.3945303913256216e-05, "loss": 70.1066, "step": 82240 }, { "epoch": 0.33230040764876756, "grad_norm": 882.9507446289062, "learning_rate": 3.39433020570913e-05, "loss": 78.8813, "step": 82250 }, { "epoch": 0.3323408089141352, "grad_norm": 771.1679077148438, "learning_rate": 3.394129992909452e-05, "loss": 63.4995, "step": 82260 }, { "epoch": 0.33238121017950284, "grad_norm": 2786.767578125, "learning_rate": 3.3939297529304926e-05, "loss": 59.4498, "step": 82270 }, { "epoch": 0.3324216114448705, "grad_norm": 758.8515014648438, "learning_rate": 3.393729485776154e-05, "loss": 94.3925, "step": 82280 }, { "epoch": 0.33246201271023806, "grad_norm": 937.47998046875, "learning_rate": 3.393529191450342e-05, "loss": 68.8819, "step": 82290 }, { "epoch": 0.3325024139756057, "grad_norm": 418.58563232421875, "learning_rate": 3.393328869956962e-05, "loss": 47.6533, "step": 82300 }, { "epoch": 0.33254281524097334, "grad_norm": 407.52972412109375, "learning_rate": 3.393128521299917e-05, "loss": 54.5192, "step": 82310 }, { "epoch": 0.332583216506341, "grad_norm": 1072.966552734375, "learning_rate": 3.392928145483115e-05, "loss": 71.5673, "step": 82320 }, { "epoch": 0.3326236177717086, "grad_norm": 636.4874877929688, "learning_rate": 3.392727742510462e-05, "loss": 80.9786, "step": 82330 }, { "epoch": 0.33266401903707626, "grad_norm": 801.964599609375, "learning_rate": 3.3925273123858644e-05, "loss": 66.5421, "step": 82340 }, { "epoch": 0.33270442030244385, "grad_norm": 614.4639282226562, "learning_rate": 3.3923268551132294e-05, "loss": 56.6249, "step": 82350 }, { "epoch": 0.3327448215678115, "grad_norm": 2463.41162109375, "learning_rate": 3.392126370696466e-05, "loss": 95.9889, "step": 82360 }, { "epoch": 0.33278522283317913, "grad_norm": 950.767822265625, "learning_rate": 3.391925859139482e-05, "loss": 64.5511, "step": 82370 }, { "epoch": 0.33282562409854677, "grad_norm": 968.8400268554688, "learning_rate": 3.391725320446187e-05, "loss": 72.9015, "step": 82380 }, { "epoch": 0.3328660253639144, "grad_norm": 481.743408203125, "learning_rate": 3.3915247546204905e-05, "loss": 75.9669, "step": 82390 }, { "epoch": 0.33290642662928205, "grad_norm": 925.4773559570312, "learning_rate": 3.391324161666302e-05, "loss": 78.0991, "step": 82400 }, { "epoch": 0.33294682789464963, "grad_norm": 1083.5574951171875, "learning_rate": 3.3911235415875326e-05, "loss": 94.1073, "step": 82410 }, { "epoch": 0.3329872291600173, "grad_norm": 0.0, "learning_rate": 3.390922894388094e-05, "loss": 48.1652, "step": 82420 }, { "epoch": 0.3330276304253849, "grad_norm": 1400.068359375, "learning_rate": 3.390722220071897e-05, "loss": 72.396, "step": 82430 }, { "epoch": 0.33306803169075255, "grad_norm": 856.3434448242188, "learning_rate": 3.390521518642855e-05, "loss": 79.0431, "step": 82440 }, { "epoch": 0.3331084329561202, "grad_norm": 553.9498901367188, "learning_rate": 3.39032079010488e-05, "loss": 61.321, "step": 82450 }, { "epoch": 0.33314883422148783, "grad_norm": 830.5960083007812, "learning_rate": 3.390120034461886e-05, "loss": 100.5, "step": 82460 }, { "epoch": 0.3331892354868555, "grad_norm": 873.7688598632812, "learning_rate": 3.389919251717785e-05, "loss": 105.3553, "step": 82470 }, { "epoch": 0.33322963675222306, "grad_norm": 773.9730834960938, "learning_rate": 3.3897184418764925e-05, "loss": 87.4409, "step": 82480 }, { "epoch": 0.3332700380175907, "grad_norm": 694.1939086914062, "learning_rate": 3.389517604941924e-05, "loss": 86.3784, "step": 82490 }, { "epoch": 0.33331043928295834, "grad_norm": 1546.2618408203125, "learning_rate": 3.3893167409179945e-05, "loss": 61.1646, "step": 82500 }, { "epoch": 0.333350840548326, "grad_norm": 1124.92724609375, "learning_rate": 3.389115849808621e-05, "loss": 70.7375, "step": 82510 }, { "epoch": 0.3333912418136936, "grad_norm": 627.1702270507812, "learning_rate": 3.3889149316177167e-05, "loss": 82.8139, "step": 82520 }, { "epoch": 0.33343164307906126, "grad_norm": 484.9052429199219, "learning_rate": 3.388713986349202e-05, "loss": 63.3153, "step": 82530 }, { "epoch": 0.33347204434442884, "grad_norm": 1087.6514892578125, "learning_rate": 3.388513014006993e-05, "loss": 43.2902, "step": 82540 }, { "epoch": 0.3335124456097965, "grad_norm": 875.129150390625, "learning_rate": 3.388312014595008e-05, "loss": 82.1861, "step": 82550 }, { "epoch": 0.3335528468751641, "grad_norm": 628.6128540039062, "learning_rate": 3.3881109881171656e-05, "loss": 58.0868, "step": 82560 }, { "epoch": 0.33359324814053176, "grad_norm": 570.00048828125, "learning_rate": 3.387909934577384e-05, "loss": 57.2228, "step": 82570 }, { "epoch": 0.3336336494058994, "grad_norm": 590.1167602539062, "learning_rate": 3.387708853979585e-05, "loss": 47.8389, "step": 82580 }, { "epoch": 0.33367405067126704, "grad_norm": 801.5050659179688, "learning_rate": 3.3875077463276865e-05, "loss": 82.9828, "step": 82590 }, { "epoch": 0.3337144519366347, "grad_norm": 802.7755126953125, "learning_rate": 3.38730661162561e-05, "loss": 69.2401, "step": 82600 }, { "epoch": 0.33375485320200227, "grad_norm": 711.4531860351562, "learning_rate": 3.387105449877278e-05, "loss": 44.5337, "step": 82610 }, { "epoch": 0.3337952544673699, "grad_norm": 371.0378112792969, "learning_rate": 3.38690426108661e-05, "loss": 38.7474, "step": 82620 }, { "epoch": 0.33383565573273755, "grad_norm": 429.09185791015625, "learning_rate": 3.3867030452575296e-05, "loss": 70.3189, "step": 82630 }, { "epoch": 0.3338760569981052, "grad_norm": 760.4613037109375, "learning_rate": 3.3865018023939595e-05, "loss": 71.0347, "step": 82640 }, { "epoch": 0.3339164582634728, "grad_norm": 411.7153625488281, "learning_rate": 3.386300532499823e-05, "loss": 76.0749, "step": 82650 }, { "epoch": 0.33395685952884047, "grad_norm": 305.81219482421875, "learning_rate": 3.386099235579044e-05, "loss": 50.1637, "step": 82660 }, { "epoch": 0.33399726079420805, "grad_norm": 1149.036376953125, "learning_rate": 3.385897911635547e-05, "loss": 72.353, "step": 82670 }, { "epoch": 0.3340376620595757, "grad_norm": 386.5274353027344, "learning_rate": 3.385696560673257e-05, "loss": 60.3233, "step": 82680 }, { "epoch": 0.33407806332494333, "grad_norm": 1266.0382080078125, "learning_rate": 3.385495182696098e-05, "loss": 57.865, "step": 82690 }, { "epoch": 0.33411846459031097, "grad_norm": 673.6781616210938, "learning_rate": 3.3852937777079976e-05, "loss": 39.5594, "step": 82700 }, { "epoch": 0.3341588658556786, "grad_norm": 2739.0546875, "learning_rate": 3.385092345712882e-05, "loss": 54.4652, "step": 82710 }, { "epoch": 0.33419926712104625, "grad_norm": 1225.4815673828125, "learning_rate": 3.3848908867146784e-05, "loss": 77.8722, "step": 82720 }, { "epoch": 0.33423966838641384, "grad_norm": 362.6470031738281, "learning_rate": 3.3846894007173135e-05, "loss": 77.0374, "step": 82730 }, { "epoch": 0.3342800696517815, "grad_norm": 1833.700439453125, "learning_rate": 3.384487887724716e-05, "loss": 79.7389, "step": 82740 }, { "epoch": 0.3343204709171491, "grad_norm": 835.5281372070312, "learning_rate": 3.384286347740814e-05, "loss": 57.7872, "step": 82750 }, { "epoch": 0.33436087218251676, "grad_norm": 1015.665771484375, "learning_rate": 3.3840847807695367e-05, "loss": 72.3746, "step": 82760 }, { "epoch": 0.3344012734478844, "grad_norm": 876.6022338867188, "learning_rate": 3.383883186814815e-05, "loss": 63.0179, "step": 82770 }, { "epoch": 0.33444167471325204, "grad_norm": 615.6849975585938, "learning_rate": 3.3836815658805776e-05, "loss": 56.2009, "step": 82780 }, { "epoch": 0.3344820759786197, "grad_norm": 1607.5733642578125, "learning_rate": 3.383479917970756e-05, "loss": 71.4537, "step": 82790 }, { "epoch": 0.33452247724398726, "grad_norm": 527.6516723632812, "learning_rate": 3.3832782430892806e-05, "loss": 65.4176, "step": 82800 }, { "epoch": 0.3345628785093549, "grad_norm": 647.039794921875, "learning_rate": 3.383076541240084e-05, "loss": 66.098, "step": 82810 }, { "epoch": 0.33460327977472254, "grad_norm": 511.5931396484375, "learning_rate": 3.3828748124270983e-05, "loss": 74.0146, "step": 82820 }, { "epoch": 0.3346436810400902, "grad_norm": 791.4042358398438, "learning_rate": 3.3826730566542555e-05, "loss": 60.8105, "step": 82830 }, { "epoch": 0.3346840823054578, "grad_norm": 609.68603515625, "learning_rate": 3.3824712739254904e-05, "loss": 64.9001, "step": 82840 }, { "epoch": 0.33472448357082546, "grad_norm": 795.0293579101562, "learning_rate": 3.382269464244736e-05, "loss": 60.0467, "step": 82850 }, { "epoch": 0.33476488483619304, "grad_norm": 555.2614135742188, "learning_rate": 3.382067627615926e-05, "loss": 79.6582, "step": 82860 }, { "epoch": 0.3348052861015607, "grad_norm": 534.7796630859375, "learning_rate": 3.381865764042997e-05, "loss": 73.6634, "step": 82870 }, { "epoch": 0.3348456873669283, "grad_norm": 2460.16162109375, "learning_rate": 3.381663873529883e-05, "loss": 62.1055, "step": 82880 }, { "epoch": 0.33488608863229596, "grad_norm": 524.8576049804688, "learning_rate": 3.3814619560805205e-05, "loss": 123.9045, "step": 82890 }, { "epoch": 0.3349264898976636, "grad_norm": 1630.5303955078125, "learning_rate": 3.381260011698846e-05, "loss": 51.2895, "step": 82900 }, { "epoch": 0.33496689116303124, "grad_norm": 561.4447631835938, "learning_rate": 3.3810580403887965e-05, "loss": 73.8346, "step": 82910 }, { "epoch": 0.3350072924283989, "grad_norm": 598.4013671875, "learning_rate": 3.3808560421543094e-05, "loss": 60.0628, "step": 82920 }, { "epoch": 0.33504769369376647, "grad_norm": 324.10302734375, "learning_rate": 3.380654016999323e-05, "loss": 58.894, "step": 82930 }, { "epoch": 0.3350880949591341, "grad_norm": 504.5718688964844, "learning_rate": 3.3804519649277754e-05, "loss": 62.397, "step": 82940 }, { "epoch": 0.33512849622450175, "grad_norm": 707.5982666015625, "learning_rate": 3.3802498859436066e-05, "loss": 69.2411, "step": 82950 }, { "epoch": 0.3351688974898694, "grad_norm": 1631.22119140625, "learning_rate": 3.3800477800507555e-05, "loss": 77.0283, "step": 82960 }, { "epoch": 0.33520929875523703, "grad_norm": 1262.4522705078125, "learning_rate": 3.379845647253162e-05, "loss": 56.2125, "step": 82970 }, { "epoch": 0.33524970002060467, "grad_norm": 680.508544921875, "learning_rate": 3.3796434875547675e-05, "loss": 51.1179, "step": 82980 }, { "epoch": 0.33529010128597225, "grad_norm": 1367.9285888671875, "learning_rate": 3.379441300959513e-05, "loss": 78.3301, "step": 82990 }, { "epoch": 0.3353305025513399, "grad_norm": 1376.3389892578125, "learning_rate": 3.37923908747134e-05, "loss": 65.1142, "step": 83000 }, { "epoch": 0.33537090381670753, "grad_norm": 701.1461791992188, "learning_rate": 3.379036847094191e-05, "loss": 72.6158, "step": 83010 }, { "epoch": 0.3354113050820752, "grad_norm": 436.153076171875, "learning_rate": 3.378834579832008e-05, "loss": 79.1066, "step": 83020 }, { "epoch": 0.3354517063474428, "grad_norm": 664.0464477539062, "learning_rate": 3.378632285688736e-05, "loss": 75.7545, "step": 83030 }, { "epoch": 0.33549210761281045, "grad_norm": 1134.675048828125, "learning_rate": 3.378429964668318e-05, "loss": 58.1786, "step": 83040 }, { "epoch": 0.33553250887817804, "grad_norm": 1004.5962524414062, "learning_rate": 3.378227616774697e-05, "loss": 51.7707, "step": 83050 }, { "epoch": 0.3355729101435457, "grad_norm": 977.3733520507812, "learning_rate": 3.37802524201182e-05, "loss": 66.7884, "step": 83060 }, { "epoch": 0.3356133114089133, "grad_norm": 600.7833862304688, "learning_rate": 3.377822840383632e-05, "loss": 94.3781, "step": 83070 }, { "epoch": 0.33565371267428096, "grad_norm": 634.0911254882812, "learning_rate": 3.377620411894077e-05, "loss": 56.2981, "step": 83080 }, { "epoch": 0.3356941139396486, "grad_norm": 494.5894470214844, "learning_rate": 3.377417956547103e-05, "loss": 92.2448, "step": 83090 }, { "epoch": 0.33573451520501624, "grad_norm": 1046.20703125, "learning_rate": 3.377215474346657e-05, "loss": 60.8228, "step": 83100 }, { "epoch": 0.3357749164703839, "grad_norm": 2085.396484375, "learning_rate": 3.377012965296687e-05, "loss": 59.7961, "step": 83110 }, { "epoch": 0.33581531773575146, "grad_norm": 755.2464599609375, "learning_rate": 3.3768104294011394e-05, "loss": 53.3835, "step": 83120 }, { "epoch": 0.3358557190011191, "grad_norm": 768.9041137695312, "learning_rate": 3.376607866663964e-05, "loss": 57.641, "step": 83130 }, { "epoch": 0.33589612026648674, "grad_norm": 1092.0604248046875, "learning_rate": 3.3764052770891095e-05, "loss": 73.4515, "step": 83140 }, { "epoch": 0.3359365215318544, "grad_norm": 1306.6480712890625, "learning_rate": 3.376202660680526e-05, "loss": 74.138, "step": 83150 }, { "epoch": 0.335976922797222, "grad_norm": 215.6219024658203, "learning_rate": 3.376000017442162e-05, "loss": 69.0981, "step": 83160 }, { "epoch": 0.33601732406258966, "grad_norm": 464.001220703125, "learning_rate": 3.37579734737797e-05, "loss": 41.6006, "step": 83170 }, { "epoch": 0.33605772532795725, "grad_norm": 2117.4033203125, "learning_rate": 3.3755946504919e-05, "loss": 115.2802, "step": 83180 }, { "epoch": 0.3360981265933249, "grad_norm": 1764.74462890625, "learning_rate": 3.375391926787905e-05, "loss": 60.6559, "step": 83190 }, { "epoch": 0.3361385278586925, "grad_norm": 1227.6932373046875, "learning_rate": 3.375189176269935e-05, "loss": 66.6953, "step": 83200 }, { "epoch": 0.33617892912406017, "grad_norm": 828.9819946289062, "learning_rate": 3.374986398941944e-05, "loss": 66.9964, "step": 83210 }, { "epoch": 0.3362193303894278, "grad_norm": 1059.575927734375, "learning_rate": 3.374783594807887e-05, "loss": 73.6725, "step": 83220 }, { "epoch": 0.33625973165479545, "grad_norm": 346.5843811035156, "learning_rate": 3.3745807638717144e-05, "loss": 63.1543, "step": 83230 }, { "epoch": 0.3363001329201631, "grad_norm": 5886.8798828125, "learning_rate": 3.374377906137383e-05, "loss": 53.1895, "step": 83240 }, { "epoch": 0.33634053418553067, "grad_norm": 1697.4791259765625, "learning_rate": 3.3741750216088465e-05, "loss": 74.849, "step": 83250 }, { "epoch": 0.3363809354508983, "grad_norm": 559.0037231445312, "learning_rate": 3.37397211029006e-05, "loss": 70.1515, "step": 83260 }, { "epoch": 0.33642133671626595, "grad_norm": 549.3245239257812, "learning_rate": 3.373769172184981e-05, "loss": 68.2745, "step": 83270 }, { "epoch": 0.3364617379816336, "grad_norm": 530.4989013671875, "learning_rate": 3.3735662072975635e-05, "loss": 59.415, "step": 83280 }, { "epoch": 0.33650213924700123, "grad_norm": 598.5599365234375, "learning_rate": 3.373363215631766e-05, "loss": 49.4548, "step": 83290 }, { "epoch": 0.33654254051236887, "grad_norm": 1466.6553955078125, "learning_rate": 3.373160197191546e-05, "loss": 67.0955, "step": 83300 }, { "epoch": 0.33658294177773646, "grad_norm": 442.7725830078125, "learning_rate": 3.3729571519808606e-05, "loss": 56.7274, "step": 83310 }, { "epoch": 0.3366233430431041, "grad_norm": 477.98785400390625, "learning_rate": 3.372754080003669e-05, "loss": 64.1001, "step": 83320 }, { "epoch": 0.33666374430847174, "grad_norm": 673.998046875, "learning_rate": 3.372550981263929e-05, "loss": 64.573, "step": 83330 }, { "epoch": 0.3367041455738394, "grad_norm": 3385.6376953125, "learning_rate": 3.3723478557656016e-05, "loss": 72.427, "step": 83340 }, { "epoch": 0.336744546839207, "grad_norm": 401.60125732421875, "learning_rate": 3.3721447035126464e-05, "loss": 70.8086, "step": 83350 }, { "epoch": 0.33678494810457466, "grad_norm": 442.1596374511719, "learning_rate": 3.371941524509024e-05, "loss": 62.7455, "step": 83360 }, { "epoch": 0.33682534936994224, "grad_norm": 1584.1695556640625, "learning_rate": 3.371738318758694e-05, "loss": 82.1702, "step": 83370 }, { "epoch": 0.3368657506353099, "grad_norm": 1419.6942138671875, "learning_rate": 3.37153508626562e-05, "loss": 80.0828, "step": 83380 }, { "epoch": 0.3369061519006775, "grad_norm": 1289.188720703125, "learning_rate": 3.3713318270337643e-05, "loss": 73.5477, "step": 83390 }, { "epoch": 0.33694655316604516, "grad_norm": 816.6846923828125, "learning_rate": 3.3711285410670876e-05, "loss": 49.8017, "step": 83400 }, { "epoch": 0.3369869544314128, "grad_norm": 641.5718383789062, "learning_rate": 3.370925228369554e-05, "loss": 85.3183, "step": 83410 }, { "epoch": 0.33702735569678044, "grad_norm": 514.9337768554688, "learning_rate": 3.370721888945127e-05, "loss": 51.9517, "step": 83420 }, { "epoch": 0.3370677569621481, "grad_norm": 484.6968078613281, "learning_rate": 3.370518522797772e-05, "loss": 39.9968, "step": 83430 }, { "epoch": 0.33710815822751566, "grad_norm": 710.4841918945312, "learning_rate": 3.370315129931453e-05, "loss": 53.6119, "step": 83440 }, { "epoch": 0.3371485594928833, "grad_norm": 228.62632751464844, "learning_rate": 3.370111710350134e-05, "loss": 48.0815, "step": 83450 }, { "epoch": 0.33718896075825094, "grad_norm": 1313.511474609375, "learning_rate": 3.369908264057783e-05, "loss": 78.3585, "step": 83460 }, { "epoch": 0.3372293620236186, "grad_norm": 882.86279296875, "learning_rate": 3.3697047910583635e-05, "loss": 67.3662, "step": 83470 }, { "epoch": 0.3372697632889862, "grad_norm": 1051.37451171875, "learning_rate": 3.369501291355845e-05, "loss": 54.169, "step": 83480 }, { "epoch": 0.33731016455435386, "grad_norm": 495.60162353515625, "learning_rate": 3.369297764954194e-05, "loss": 65.4812, "step": 83490 }, { "epoch": 0.33735056581972145, "grad_norm": 0.0, "learning_rate": 3.369094211857378e-05, "loss": 83.5426, "step": 83500 }, { "epoch": 0.3373909670850891, "grad_norm": 588.1001586914062, "learning_rate": 3.3688906320693645e-05, "loss": 41.9696, "step": 83510 }, { "epoch": 0.33743136835045673, "grad_norm": 772.1838989257812, "learning_rate": 3.3686870255941246e-05, "loss": 67.5083, "step": 83520 }, { "epoch": 0.33747176961582437, "grad_norm": 599.001708984375, "learning_rate": 3.368483392435626e-05, "loss": 81.6238, "step": 83530 }, { "epoch": 0.337512170881192, "grad_norm": 865.5444946289062, "learning_rate": 3.368279732597839e-05, "loss": 63.9727, "step": 83540 }, { "epoch": 0.33755257214655965, "grad_norm": 600.300048828125, "learning_rate": 3.368076046084734e-05, "loss": 79.3259, "step": 83550 }, { "epoch": 0.3375929734119273, "grad_norm": 3054.122802734375, "learning_rate": 3.3678723329002826e-05, "loss": 84.2376, "step": 83560 }, { "epoch": 0.3376333746772949, "grad_norm": 0.0, "learning_rate": 3.367668593048456e-05, "loss": 98.9598, "step": 83570 }, { "epoch": 0.3376737759426625, "grad_norm": 2259.979248046875, "learning_rate": 3.367464826533225e-05, "loss": 77.0516, "step": 83580 }, { "epoch": 0.33771417720803015, "grad_norm": 864.2886352539062, "learning_rate": 3.3672610333585645e-05, "loss": 53.8932, "step": 83590 }, { "epoch": 0.3377545784733978, "grad_norm": 884.65087890625, "learning_rate": 3.3670572135284456e-05, "loss": 64.9674, "step": 83600 }, { "epoch": 0.33779497973876543, "grad_norm": 361.47686767578125, "learning_rate": 3.366853367046843e-05, "loss": 52.3785, "step": 83610 }, { "epoch": 0.3378353810041331, "grad_norm": 375.3967590332031, "learning_rate": 3.3666494939177295e-05, "loss": 76.4976, "step": 83620 }, { "epoch": 0.33787578226950066, "grad_norm": 454.41827392578125, "learning_rate": 3.366445594145081e-05, "loss": 62.219, "step": 83630 }, { "epoch": 0.3379161835348683, "grad_norm": 1941.0068359375, "learning_rate": 3.366241667732872e-05, "loss": 96.1385, "step": 83640 }, { "epoch": 0.33795658480023594, "grad_norm": 597.4854125976562, "learning_rate": 3.366037714685078e-05, "loss": 44.2322, "step": 83650 }, { "epoch": 0.3379969860656036, "grad_norm": 790.761962890625, "learning_rate": 3.365833735005676e-05, "loss": 42.7057, "step": 83660 }, { "epoch": 0.3380373873309712, "grad_norm": 608.0296020507812, "learning_rate": 3.365629728698642e-05, "loss": 56.8395, "step": 83670 }, { "epoch": 0.33807778859633886, "grad_norm": 1275.1829833984375, "learning_rate": 3.365425695767953e-05, "loss": 92.5405, "step": 83680 }, { "epoch": 0.33811818986170644, "grad_norm": 1744.1668701171875, "learning_rate": 3.365221636217588e-05, "loss": 57.669, "step": 83690 }, { "epoch": 0.3381585911270741, "grad_norm": 2922.179443359375, "learning_rate": 3.3650175500515235e-05, "loss": 58.4586, "step": 83700 }, { "epoch": 0.3381989923924417, "grad_norm": 976.9667358398438, "learning_rate": 3.364813437273739e-05, "loss": 68.5843, "step": 83710 }, { "epoch": 0.33823939365780936, "grad_norm": 773.35888671875, "learning_rate": 3.3646092978882144e-05, "loss": 63.5441, "step": 83720 }, { "epoch": 0.338279794923177, "grad_norm": 474.0544128417969, "learning_rate": 3.364405131898929e-05, "loss": 39.5866, "step": 83730 }, { "epoch": 0.33832019618854464, "grad_norm": 2066.063720703125, "learning_rate": 3.364200939309863e-05, "loss": 71.9654, "step": 83740 }, { "epoch": 0.3383605974539123, "grad_norm": 531.757568359375, "learning_rate": 3.363996720124997e-05, "loss": 97.3059, "step": 83750 }, { "epoch": 0.33840099871927987, "grad_norm": 498.4417419433594, "learning_rate": 3.363792474348313e-05, "loss": 84.403, "step": 83760 }, { "epoch": 0.3384413999846475, "grad_norm": 551.0558471679688, "learning_rate": 3.363588201983792e-05, "loss": 106.5099, "step": 83770 }, { "epoch": 0.33848180125001515, "grad_norm": 555.5409545898438, "learning_rate": 3.363383903035419e-05, "loss": 51.1536, "step": 83780 }, { "epoch": 0.3385222025153828, "grad_norm": 477.4742431640625, "learning_rate": 3.363179577507173e-05, "loss": 49.2929, "step": 83790 }, { "epoch": 0.3385626037807504, "grad_norm": 506.8771057128906, "learning_rate": 3.36297522540304e-05, "loss": 62.7076, "step": 83800 }, { "epoch": 0.33860300504611807, "grad_norm": 1209.50634765625, "learning_rate": 3.362770846727003e-05, "loss": 100.1989, "step": 83810 }, { "epoch": 0.33864340631148565, "grad_norm": 314.1358337402344, "learning_rate": 3.362566441483046e-05, "loss": 68.2915, "step": 83820 }, { "epoch": 0.3386838075768533, "grad_norm": 575.9443969726562, "learning_rate": 3.362362009675156e-05, "loss": 65.2, "step": 83830 }, { "epoch": 0.33872420884222093, "grad_norm": 745.6015014648438, "learning_rate": 3.362157551307317e-05, "loss": 75.2707, "step": 83840 }, { "epoch": 0.33876461010758857, "grad_norm": 658.1475830078125, "learning_rate": 3.361953066383515e-05, "loss": 55.5822, "step": 83850 }, { "epoch": 0.3388050113729562, "grad_norm": 811.0732421875, "learning_rate": 3.3617485549077365e-05, "loss": 67.6831, "step": 83860 }, { "epoch": 0.33884541263832385, "grad_norm": 914.1915893554688, "learning_rate": 3.3615440168839693e-05, "loss": 68.3131, "step": 83870 }, { "epoch": 0.3388858139036915, "grad_norm": 990.7426147460938, "learning_rate": 3.3613394523162e-05, "loss": 71.9516, "step": 83880 }, { "epoch": 0.3389262151690591, "grad_norm": 2585.360107421875, "learning_rate": 3.361134861208419e-05, "loss": 55.0049, "step": 83890 }, { "epoch": 0.3389666164344267, "grad_norm": 621.7271728515625, "learning_rate": 3.360930243564611e-05, "loss": 60.3423, "step": 83900 }, { "epoch": 0.33900701769979436, "grad_norm": 423.0601806640625, "learning_rate": 3.360725599388768e-05, "loss": 101.5376, "step": 83910 }, { "epoch": 0.339047418965162, "grad_norm": 509.084228515625, "learning_rate": 3.360520928684879e-05, "loss": 69.3444, "step": 83920 }, { "epoch": 0.33908782023052964, "grad_norm": 332.6028747558594, "learning_rate": 3.3603162314569334e-05, "loss": 36.2318, "step": 83930 }, { "epoch": 0.3391282214958973, "grad_norm": 1342.31201171875, "learning_rate": 3.360111507708923e-05, "loss": 98.992, "step": 83940 }, { "epoch": 0.33916862276126486, "grad_norm": 774.0115356445312, "learning_rate": 3.3599067574448385e-05, "loss": 32.5285, "step": 83950 }, { "epoch": 0.3392090240266325, "grad_norm": 459.2820129394531, "learning_rate": 3.3597019806686724e-05, "loss": 72.3013, "step": 83960 }, { "epoch": 0.33924942529200014, "grad_norm": 1240.106201171875, "learning_rate": 3.359497177384415e-05, "loss": 107.1222, "step": 83970 }, { "epoch": 0.3392898265573678, "grad_norm": 791.4290771484375, "learning_rate": 3.3592923475960604e-05, "loss": 73.2466, "step": 83980 }, { "epoch": 0.3393302278227354, "grad_norm": 854.4566040039062, "learning_rate": 3.3590874913076024e-05, "loss": 60.4601, "step": 83990 }, { "epoch": 0.33937062908810306, "grad_norm": 380.6976623535156, "learning_rate": 3.3588826085230336e-05, "loss": 63.7296, "step": 84000 }, { "epoch": 0.33941103035347064, "grad_norm": 372.28118896484375, "learning_rate": 3.3586776992463486e-05, "loss": 45.1759, "step": 84010 }, { "epoch": 0.3394514316188383, "grad_norm": 518.045654296875, "learning_rate": 3.358472763481542e-05, "loss": 80.7986, "step": 84020 }, { "epoch": 0.3394918328842059, "grad_norm": 1669.1104736328125, "learning_rate": 3.35826780123261e-05, "loss": 83.3932, "step": 84030 }, { "epoch": 0.33953223414957356, "grad_norm": 799.4876708984375, "learning_rate": 3.358062812503548e-05, "loss": 63.6164, "step": 84040 }, { "epoch": 0.3395726354149412, "grad_norm": 1021.8041381835938, "learning_rate": 3.357857797298353e-05, "loss": 54.8351, "step": 84050 }, { "epoch": 0.33961303668030884, "grad_norm": 1971.704345703125, "learning_rate": 3.35765275562102e-05, "loss": 57.4328, "step": 84060 }, { "epoch": 0.3396534379456765, "grad_norm": 461.5122985839844, "learning_rate": 3.357447687475548e-05, "loss": 56.3414, "step": 84070 }, { "epoch": 0.33969383921104407, "grad_norm": 1216.770263671875, "learning_rate": 3.357242592865934e-05, "loss": 57.114, "step": 84080 }, { "epoch": 0.3397342404764117, "grad_norm": 578.294189453125, "learning_rate": 3.357037471796178e-05, "loss": 56.4494, "step": 84090 }, { "epoch": 0.33977464174177935, "grad_norm": 642.9100952148438, "learning_rate": 3.356832324270277e-05, "loss": 66.4275, "step": 84100 }, { "epoch": 0.339815043007147, "grad_norm": 756.030029296875, "learning_rate": 3.356627150292231e-05, "loss": 80.5683, "step": 84110 }, { "epoch": 0.33985544427251463, "grad_norm": 575.1790161132812, "learning_rate": 3.356421949866041e-05, "loss": 69.7805, "step": 84120 }, { "epoch": 0.33989584553788227, "grad_norm": 2446.708740234375, "learning_rate": 3.356216722995706e-05, "loss": 61.9794, "step": 84130 }, { "epoch": 0.33993624680324985, "grad_norm": 699.87255859375, "learning_rate": 3.356011469685229e-05, "loss": 52.9517, "step": 84140 }, { "epoch": 0.3399766480686175, "grad_norm": 759.2429809570312, "learning_rate": 3.355806189938609e-05, "loss": 56.9572, "step": 84150 }, { "epoch": 0.34001704933398513, "grad_norm": 680.115478515625, "learning_rate": 3.35560088375985e-05, "loss": 59.5063, "step": 84160 }, { "epoch": 0.3400574505993528, "grad_norm": 1015.9598388671875, "learning_rate": 3.3553955511529534e-05, "loss": 63.287, "step": 84170 }, { "epoch": 0.3400978518647204, "grad_norm": 336.59588623046875, "learning_rate": 3.355190192121923e-05, "loss": 59.212, "step": 84180 }, { "epoch": 0.34013825313008805, "grad_norm": 1549.30810546875, "learning_rate": 3.3549848066707626e-05, "loss": 74.1864, "step": 84190 }, { "epoch": 0.3401786543954557, "grad_norm": 570.6986083984375, "learning_rate": 3.354779394803475e-05, "loss": 77.9518, "step": 84200 }, { "epoch": 0.3402190556608233, "grad_norm": 649.0238647460938, "learning_rate": 3.354573956524066e-05, "loss": 64.5057, "step": 84210 }, { "epoch": 0.3402594569261909, "grad_norm": 1301.7386474609375, "learning_rate": 3.3543684918365405e-05, "loss": 55.2643, "step": 84220 }, { "epoch": 0.34029985819155856, "grad_norm": 759.0762329101562, "learning_rate": 3.354163000744903e-05, "loss": 63.5302, "step": 84230 }, { "epoch": 0.3403402594569262, "grad_norm": 1009.452392578125, "learning_rate": 3.3539574832531617e-05, "loss": 48.2601, "step": 84240 }, { "epoch": 0.34038066072229384, "grad_norm": 591.0625, "learning_rate": 3.3537519393653216e-05, "loss": 53.8696, "step": 84250 }, { "epoch": 0.3404210619876615, "grad_norm": 1154.417724609375, "learning_rate": 3.353546369085391e-05, "loss": 94.1478, "step": 84260 }, { "epoch": 0.34046146325302906, "grad_norm": 735.6290283203125, "learning_rate": 3.3533407724173765e-05, "loss": 52.151, "step": 84270 }, { "epoch": 0.3405018645183967, "grad_norm": 1594.7706298828125, "learning_rate": 3.353135149365288e-05, "loss": 63.1956, "step": 84280 }, { "epoch": 0.34054226578376434, "grad_norm": 692.4290161132812, "learning_rate": 3.352929499933132e-05, "loss": 74.3355, "step": 84290 }, { "epoch": 0.340582667049132, "grad_norm": 1480.5269775390625, "learning_rate": 3.352723824124919e-05, "loss": 84.1563, "step": 84300 }, { "epoch": 0.3406230683144996, "grad_norm": 420.89239501953125, "learning_rate": 3.352518121944659e-05, "loss": 86.6826, "step": 84310 }, { "epoch": 0.34066346957986726, "grad_norm": 1095.89990234375, "learning_rate": 3.3523123933963614e-05, "loss": 85.8979, "step": 84320 }, { "epoch": 0.34070387084523485, "grad_norm": 546.4743041992188, "learning_rate": 3.352106638484038e-05, "loss": 62.4009, "step": 84330 }, { "epoch": 0.3407442721106025, "grad_norm": 808.0443725585938, "learning_rate": 3.351900857211699e-05, "loss": 61.3366, "step": 84340 }, { "epoch": 0.3407846733759701, "grad_norm": 2018.72314453125, "learning_rate": 3.351695049583357e-05, "loss": 63.5511, "step": 84350 }, { "epoch": 0.34082507464133777, "grad_norm": 1057.6278076171875, "learning_rate": 3.351489215603024e-05, "loss": 79.8961, "step": 84360 }, { "epoch": 0.3408654759067054, "grad_norm": 1301.7979736328125, "learning_rate": 3.351283355274714e-05, "loss": 82.8797, "step": 84370 }, { "epoch": 0.34090587717207305, "grad_norm": 529.9147338867188, "learning_rate": 3.351077468602438e-05, "loss": 66.2892, "step": 84380 }, { "epoch": 0.3409462784374407, "grad_norm": 870.2681274414062, "learning_rate": 3.350871555590212e-05, "loss": 69.3231, "step": 84390 }, { "epoch": 0.34098667970280827, "grad_norm": 1227.81298828125, "learning_rate": 3.350665616242049e-05, "loss": 62.5176, "step": 84400 }, { "epoch": 0.3410270809681759, "grad_norm": 829.910888671875, "learning_rate": 3.350459650561964e-05, "loss": 62.6331, "step": 84410 }, { "epoch": 0.34106748223354355, "grad_norm": 636.0309448242188, "learning_rate": 3.3502536585539746e-05, "loss": 42.9555, "step": 84420 }, { "epoch": 0.3411078834989112, "grad_norm": 856.1690673828125, "learning_rate": 3.350047640222094e-05, "loss": 56.321, "step": 84430 }, { "epoch": 0.34114828476427883, "grad_norm": 283.14874267578125, "learning_rate": 3.349841595570339e-05, "loss": 56.6846, "step": 84440 }, { "epoch": 0.34118868602964647, "grad_norm": 670.0524291992188, "learning_rate": 3.3496355246027276e-05, "loss": 56.418, "step": 84450 }, { "epoch": 0.34122908729501406, "grad_norm": 834.9964599609375, "learning_rate": 3.349429427323277e-05, "loss": 56.699, "step": 84460 }, { "epoch": 0.3412694885603817, "grad_norm": 2523.474853515625, "learning_rate": 3.349223303736005e-05, "loss": 88.8671, "step": 84470 }, { "epoch": 0.34130988982574934, "grad_norm": 618.8658447265625, "learning_rate": 3.34901715384493e-05, "loss": 60.285, "step": 84480 }, { "epoch": 0.341350291091117, "grad_norm": 683.580078125, "learning_rate": 3.3488109776540704e-05, "loss": 67.6615, "step": 84490 }, { "epoch": 0.3413906923564846, "grad_norm": 1387.2510986328125, "learning_rate": 3.3486047751674465e-05, "loss": 62.8798, "step": 84500 }, { "epoch": 0.34143109362185226, "grad_norm": 523.8330078125, "learning_rate": 3.348398546389079e-05, "loss": 43.4702, "step": 84510 }, { "epoch": 0.3414714948872199, "grad_norm": 972.6190795898438, "learning_rate": 3.3481922913229875e-05, "loss": 64.8293, "step": 84520 }, { "epoch": 0.3415118961525875, "grad_norm": 1023.8384399414062, "learning_rate": 3.347986009973193e-05, "loss": 80.5796, "step": 84530 }, { "epoch": 0.3415522974179551, "grad_norm": 306.61041259765625, "learning_rate": 3.3477797023437176e-05, "loss": 71.0203, "step": 84540 }, { "epoch": 0.34159269868332276, "grad_norm": 272.7121887207031, "learning_rate": 3.3475733684385815e-05, "loss": 60.9458, "step": 84550 }, { "epoch": 0.3416330999486904, "grad_norm": 648.0752563476562, "learning_rate": 3.3473670082618105e-05, "loss": 64.1713, "step": 84560 }, { "epoch": 0.34167350121405804, "grad_norm": 522.2056274414062, "learning_rate": 3.347160621817425e-05, "loss": 53.4062, "step": 84570 }, { "epoch": 0.3417139024794257, "grad_norm": 460.1097717285156, "learning_rate": 3.34695420910945e-05, "loss": 64.7991, "step": 84580 }, { "epoch": 0.34175430374479326, "grad_norm": 254.0565185546875, "learning_rate": 3.3467477701419095e-05, "loss": 42.2828, "step": 84590 }, { "epoch": 0.3417947050101609, "grad_norm": 964.922119140625, "learning_rate": 3.3465413049188276e-05, "loss": 61.1818, "step": 84600 }, { "epoch": 0.34183510627552854, "grad_norm": 1123.0037841796875, "learning_rate": 3.34633481344423e-05, "loss": 87.6611, "step": 84610 }, { "epoch": 0.3418755075408962, "grad_norm": 740.2823486328125, "learning_rate": 3.346128295722142e-05, "loss": 49.8443, "step": 84620 }, { "epoch": 0.3419159088062638, "grad_norm": 495.0751647949219, "learning_rate": 3.3459217517565896e-05, "loss": 79.6292, "step": 84630 }, { "epoch": 0.34195631007163146, "grad_norm": 970.404052734375, "learning_rate": 3.3457151815516e-05, "loss": 91.5376, "step": 84640 }, { "epoch": 0.34199671133699905, "grad_norm": 611.1907958984375, "learning_rate": 3.3455085851112e-05, "loss": 65.6393, "step": 84650 }, { "epoch": 0.3420371126023667, "grad_norm": 605.852294921875, "learning_rate": 3.345301962439417e-05, "loss": 60.7457, "step": 84660 }, { "epoch": 0.34207751386773433, "grad_norm": 1289.8677978515625, "learning_rate": 3.34509531354028e-05, "loss": 69.1039, "step": 84670 }, { "epoch": 0.34211791513310197, "grad_norm": 914.4951171875, "learning_rate": 3.344888638417817e-05, "loss": 62.5575, "step": 84680 }, { "epoch": 0.3421583163984696, "grad_norm": 518.4591674804688, "learning_rate": 3.3446819370760577e-05, "loss": 55.3549, "step": 84690 }, { "epoch": 0.34219871766383725, "grad_norm": 586.8419799804688, "learning_rate": 3.3444752095190326e-05, "loss": 65.0346, "step": 84700 }, { "epoch": 0.3422391189292049, "grad_norm": 640.1392822265625, "learning_rate": 3.34426845575077e-05, "loss": 56.1609, "step": 84710 }, { "epoch": 0.3422795201945725, "grad_norm": 680.588134765625, "learning_rate": 3.344061675775303e-05, "loss": 62.718, "step": 84720 }, { "epoch": 0.3423199214599401, "grad_norm": 848.21044921875, "learning_rate": 3.34385486959666e-05, "loss": 49.1798, "step": 84730 }, { "epoch": 0.34236032272530775, "grad_norm": 1049.4501953125, "learning_rate": 3.343648037218876e-05, "loss": 86.3738, "step": 84740 }, { "epoch": 0.3424007239906754, "grad_norm": 915.064453125, "learning_rate": 3.343441178645981e-05, "loss": 89.3447, "step": 84750 }, { "epoch": 0.34244112525604303, "grad_norm": 293.6428527832031, "learning_rate": 3.3432342938820084e-05, "loss": 54.0937, "step": 84760 }, { "epoch": 0.3424815265214107, "grad_norm": 1054.6556396484375, "learning_rate": 3.3430273829309925e-05, "loss": 65.8907, "step": 84770 }, { "epoch": 0.34252192778677826, "grad_norm": 632.8988037109375, "learning_rate": 3.342820445796966e-05, "loss": 48.7645, "step": 84780 }, { "epoch": 0.3425623290521459, "grad_norm": 835.9781494140625, "learning_rate": 3.342613482483963e-05, "loss": 58.9518, "step": 84790 }, { "epoch": 0.34260273031751354, "grad_norm": 490.72906494140625, "learning_rate": 3.342406492996019e-05, "loss": 72.7595, "step": 84800 }, { "epoch": 0.3426431315828812, "grad_norm": 931.8734741210938, "learning_rate": 3.34219947733717e-05, "loss": 56.5696, "step": 84810 }, { "epoch": 0.3426835328482488, "grad_norm": 1742.132568359375, "learning_rate": 3.3419924355114505e-05, "loss": 61.6062, "step": 84820 }, { "epoch": 0.34272393411361646, "grad_norm": 774.6658935546875, "learning_rate": 3.341785367522898e-05, "loss": 65.5544, "step": 84830 }, { "epoch": 0.3427643353789841, "grad_norm": 1044.997802734375, "learning_rate": 3.341578273375548e-05, "loss": 60.1936, "step": 84840 }, { "epoch": 0.3428047366443517, "grad_norm": 371.9859619140625, "learning_rate": 3.3413711530734404e-05, "loss": 53.2397, "step": 84850 }, { "epoch": 0.3428451379097193, "grad_norm": 535.1820678710938, "learning_rate": 3.341164006620611e-05, "loss": 83.0636, "step": 84860 }, { "epoch": 0.34288553917508696, "grad_norm": 1121.55029296875, "learning_rate": 3.340956834021099e-05, "loss": 68.9959, "step": 84870 }, { "epoch": 0.3429259404404546, "grad_norm": 783.1961059570312, "learning_rate": 3.340749635278942e-05, "loss": 62.8407, "step": 84880 }, { "epoch": 0.34296634170582224, "grad_norm": 680.4360961914062, "learning_rate": 3.3405424103981815e-05, "loss": 51.7818, "step": 84890 }, { "epoch": 0.3430067429711899, "grad_norm": 655.9916381835938, "learning_rate": 3.340335159382857e-05, "loss": 73.2568, "step": 84900 }, { "epoch": 0.34304714423655747, "grad_norm": 728.5745239257812, "learning_rate": 3.340127882237008e-05, "loss": 49.0942, "step": 84910 }, { "epoch": 0.3430875455019251, "grad_norm": 1191.2718505859375, "learning_rate": 3.339920578964676e-05, "loss": 76.6716, "step": 84920 }, { "epoch": 0.34312794676729275, "grad_norm": 656.1549072265625, "learning_rate": 3.339713249569902e-05, "loss": 64.1306, "step": 84930 }, { "epoch": 0.3431683480326604, "grad_norm": 883.1411743164062, "learning_rate": 3.339505894056729e-05, "loss": 61.9963, "step": 84940 }, { "epoch": 0.343208749298028, "grad_norm": 4564.13525390625, "learning_rate": 3.339298512429199e-05, "loss": 129.574, "step": 84950 }, { "epoch": 0.34324915056339567, "grad_norm": 726.3573608398438, "learning_rate": 3.339091104691355e-05, "loss": 70.7135, "step": 84960 }, { "epoch": 0.34328955182876325, "grad_norm": 435.5312805175781, "learning_rate": 3.3388836708472404e-05, "loss": 48.515, "step": 84970 }, { "epoch": 0.3433299530941309, "grad_norm": 852.5233154296875, "learning_rate": 3.3386762109009e-05, "loss": 38.0021, "step": 84980 }, { "epoch": 0.34337035435949853, "grad_norm": 741.4345092773438, "learning_rate": 3.338468724856377e-05, "loss": 43.5169, "step": 84990 }, { "epoch": 0.34341075562486617, "grad_norm": 523.470947265625, "learning_rate": 3.3382612127177166e-05, "loss": 89.3195, "step": 85000 }, { "epoch": 0.3434511568902338, "grad_norm": 903.5706176757812, "learning_rate": 3.338053674488966e-05, "loss": 64.1985, "step": 85010 }, { "epoch": 0.34349155815560145, "grad_norm": 694.2612915039062, "learning_rate": 3.3378461101741693e-05, "loss": 66.603, "step": 85020 }, { "epoch": 0.3435319594209691, "grad_norm": 696.1839599609375, "learning_rate": 3.3376385197773737e-05, "loss": 72.5089, "step": 85030 }, { "epoch": 0.3435723606863367, "grad_norm": 804.7989501953125, "learning_rate": 3.337430903302627e-05, "loss": 76.6745, "step": 85040 }, { "epoch": 0.3436127619517043, "grad_norm": 446.0153503417969, "learning_rate": 3.337223260753977e-05, "loss": 77.8143, "step": 85050 }, { "epoch": 0.34365316321707196, "grad_norm": 757.8631591796875, "learning_rate": 3.33701559213547e-05, "loss": 103.2349, "step": 85060 }, { "epoch": 0.3436935644824396, "grad_norm": 1979.259033203125, "learning_rate": 3.336807897451156e-05, "loss": 68.4003, "step": 85070 }, { "epoch": 0.34373396574780724, "grad_norm": 1280.52587890625, "learning_rate": 3.336600176705083e-05, "loss": 66.167, "step": 85080 }, { "epoch": 0.3437743670131749, "grad_norm": 698.9182739257812, "learning_rate": 3.336392429901303e-05, "loss": 59.4369, "step": 85090 }, { "epoch": 0.34381476827854246, "grad_norm": 1795.8751220703125, "learning_rate": 3.336184657043864e-05, "loss": 58.2561, "step": 85100 }, { "epoch": 0.3438551695439101, "grad_norm": 1120.764892578125, "learning_rate": 3.335976858136816e-05, "loss": 69.6759, "step": 85110 }, { "epoch": 0.34389557080927774, "grad_norm": 525.7089233398438, "learning_rate": 3.335769033184213e-05, "loss": 45.6366, "step": 85120 }, { "epoch": 0.3439359720746454, "grad_norm": 754.0464477539062, "learning_rate": 3.3355611821901046e-05, "loss": 62.9835, "step": 85130 }, { "epoch": 0.343976373340013, "grad_norm": 540.761962890625, "learning_rate": 3.335353305158543e-05, "loss": 80.4276, "step": 85140 }, { "epoch": 0.34401677460538066, "grad_norm": 628.1510009765625, "learning_rate": 3.335145402093582e-05, "loss": 43.522, "step": 85150 }, { "epoch": 0.34405717587074824, "grad_norm": 1078.826416015625, "learning_rate": 3.3349374729992725e-05, "loss": 59.7352, "step": 85160 }, { "epoch": 0.3440975771361159, "grad_norm": 748.0184936523438, "learning_rate": 3.3347295178796707e-05, "loss": 78.6812, "step": 85170 }, { "epoch": 0.3441379784014835, "grad_norm": 670.2138671875, "learning_rate": 3.334521536738829e-05, "loss": 35.4985, "step": 85180 }, { "epoch": 0.34417837966685116, "grad_norm": 2402.2900390625, "learning_rate": 3.334313529580804e-05, "loss": 93.7723, "step": 85190 }, { "epoch": 0.3442187809322188, "grad_norm": 780.775634765625, "learning_rate": 3.334105496409649e-05, "loss": 57.9252, "step": 85200 }, { "epoch": 0.34425918219758644, "grad_norm": 836.4694213867188, "learning_rate": 3.333897437229421e-05, "loss": 100.2123, "step": 85210 }, { "epoch": 0.3442995834629541, "grad_norm": 1626.2457275390625, "learning_rate": 3.333689352044175e-05, "loss": 69.8935, "step": 85220 }, { "epoch": 0.34433998472832167, "grad_norm": 931.92236328125, "learning_rate": 3.3334812408579696e-05, "loss": 61.816, "step": 85230 }, { "epoch": 0.3443803859936893, "grad_norm": 1187.04541015625, "learning_rate": 3.3332731036748604e-05, "loss": 61.946, "step": 85240 }, { "epoch": 0.34442078725905695, "grad_norm": 1133.5804443359375, "learning_rate": 3.333064940498905e-05, "loss": 66.1367, "step": 85250 }, { "epoch": 0.3444611885244246, "grad_norm": 639.8355102539062, "learning_rate": 3.332856751334163e-05, "loss": 105.5586, "step": 85260 }, { "epoch": 0.34450158978979223, "grad_norm": 1008.3250732421875, "learning_rate": 3.3326485361846924e-05, "loss": 52.8669, "step": 85270 }, { "epoch": 0.34454199105515987, "grad_norm": 1149.5870361328125, "learning_rate": 3.3324402950545524e-05, "loss": 93.5141, "step": 85280 }, { "epoch": 0.34458239232052745, "grad_norm": 541.025146484375, "learning_rate": 3.3322320279478025e-05, "loss": 51.9097, "step": 85290 }, { "epoch": 0.3446227935858951, "grad_norm": 925.6935424804688, "learning_rate": 3.332023734868504e-05, "loss": 58.3896, "step": 85300 }, { "epoch": 0.34466319485126273, "grad_norm": 693.147216796875, "learning_rate": 3.3318154158207164e-05, "loss": 111.9228, "step": 85310 }, { "epoch": 0.3447035961166304, "grad_norm": 0.0, "learning_rate": 3.3316070708085014e-05, "loss": 49.1727, "step": 85320 }, { "epoch": 0.344743997381998, "grad_norm": 2374.637451171875, "learning_rate": 3.3313986998359213e-05, "loss": 85.6452, "step": 85330 }, { "epoch": 0.34478439864736565, "grad_norm": 593.0366821289062, "learning_rate": 3.3311903029070384e-05, "loss": 56.1827, "step": 85340 }, { "epoch": 0.3448247999127333, "grad_norm": 583.0576782226562, "learning_rate": 3.330981880025915e-05, "loss": 35.6842, "step": 85350 }, { "epoch": 0.3448652011781009, "grad_norm": 1863.508056640625, "learning_rate": 3.3307734311966144e-05, "loss": 93.4643, "step": 85360 }, { "epoch": 0.3449056024434685, "grad_norm": 744.82763671875, "learning_rate": 3.330564956423201e-05, "loss": 69.3355, "step": 85370 }, { "epoch": 0.34494600370883616, "grad_norm": 1112.11279296875, "learning_rate": 3.330356455709739e-05, "loss": 63.4886, "step": 85380 }, { "epoch": 0.3449864049742038, "grad_norm": 581.6898803710938, "learning_rate": 3.3301479290602925e-05, "loss": 70.5977, "step": 85390 }, { "epoch": 0.34502680623957144, "grad_norm": 311.9263916015625, "learning_rate": 3.329939376478927e-05, "loss": 61.0018, "step": 85400 }, { "epoch": 0.3450672075049391, "grad_norm": 482.0248107910156, "learning_rate": 3.329730797969709e-05, "loss": 62.5195, "step": 85410 }, { "epoch": 0.34510760877030666, "grad_norm": 814.2288208007812, "learning_rate": 3.329522193536705e-05, "loss": 70.7657, "step": 85420 }, { "epoch": 0.3451480100356743, "grad_norm": 923.7318725585938, "learning_rate": 3.3293135631839806e-05, "loss": 46.5404, "step": 85430 }, { "epoch": 0.34518841130104194, "grad_norm": 913.3489990234375, "learning_rate": 3.329104906915604e-05, "loss": 60.2891, "step": 85440 }, { "epoch": 0.3452288125664096, "grad_norm": 2197.94384765625, "learning_rate": 3.328896224735644e-05, "loss": 108.9677, "step": 85450 }, { "epoch": 0.3452692138317772, "grad_norm": 636.381103515625, "learning_rate": 3.328687516648167e-05, "loss": 73.8755, "step": 85460 }, { "epoch": 0.34530961509714486, "grad_norm": 948.1078491210938, "learning_rate": 3.328478782657243e-05, "loss": 58.9634, "step": 85470 }, { "epoch": 0.34535001636251245, "grad_norm": 397.9957580566406, "learning_rate": 3.328270022766941e-05, "loss": 77.2741, "step": 85480 }, { "epoch": 0.3453904176278801, "grad_norm": 1208.4564208984375, "learning_rate": 3.3280612369813305e-05, "loss": 67.3984, "step": 85490 }, { "epoch": 0.3454308188932477, "grad_norm": 953.3515014648438, "learning_rate": 3.3278524253044834e-05, "loss": 63.2876, "step": 85500 }, { "epoch": 0.34547122015861537, "grad_norm": 663.9226684570312, "learning_rate": 3.327643587740469e-05, "loss": 62.1283, "step": 85510 }, { "epoch": 0.345511621423983, "grad_norm": 599.0784301757812, "learning_rate": 3.3274347242933606e-05, "loss": 65.7558, "step": 85520 }, { "epoch": 0.34555202268935065, "grad_norm": 1658.9404296875, "learning_rate": 3.327225834967227e-05, "loss": 50.9672, "step": 85530 }, { "epoch": 0.3455924239547183, "grad_norm": 1039.7579345703125, "learning_rate": 3.327016919766144e-05, "loss": 68.3731, "step": 85540 }, { "epoch": 0.34563282522008587, "grad_norm": 802.1036376953125, "learning_rate": 3.3268079786941825e-05, "loss": 85.3128, "step": 85550 }, { "epoch": 0.3456732264854535, "grad_norm": 2582.881103515625, "learning_rate": 3.326599011755416e-05, "loss": 84.6592, "step": 85560 }, { "epoch": 0.34571362775082115, "grad_norm": 1083.4248046875, "learning_rate": 3.326390018953919e-05, "loss": 59.0829, "step": 85570 }, { "epoch": 0.3457540290161888, "grad_norm": 580.1563720703125, "learning_rate": 3.326181000293766e-05, "loss": 61.7299, "step": 85580 }, { "epoch": 0.34579443028155643, "grad_norm": 847.4132080078125, "learning_rate": 3.325971955779031e-05, "loss": 71.2915, "step": 85590 }, { "epoch": 0.34583483154692407, "grad_norm": 621.8236083984375, "learning_rate": 3.325762885413791e-05, "loss": 70.941, "step": 85600 }, { "epoch": 0.34587523281229166, "grad_norm": 626.3765869140625, "learning_rate": 3.32555378920212e-05, "loss": 57.7146, "step": 85610 }, { "epoch": 0.3459156340776593, "grad_norm": 1419.3797607421875, "learning_rate": 3.325344667148095e-05, "loss": 71.6652, "step": 85620 }, { "epoch": 0.34595603534302694, "grad_norm": 1376.2755126953125, "learning_rate": 3.325135519255795e-05, "loss": 103.9367, "step": 85630 }, { "epoch": 0.3459964366083946, "grad_norm": 526.369140625, "learning_rate": 3.3249263455292954e-05, "loss": 42.4211, "step": 85640 }, { "epoch": 0.3460368378737622, "grad_norm": 554.4691772460938, "learning_rate": 3.324717145972674e-05, "loss": 82.0677, "step": 85650 }, { "epoch": 0.34607723913912986, "grad_norm": 609.2090454101562, "learning_rate": 3.32450792059001e-05, "loss": 52.1391, "step": 85660 }, { "epoch": 0.3461176404044975, "grad_norm": 561.9303588867188, "learning_rate": 3.3242986693853824e-05, "loss": 50.3393, "step": 85670 }, { "epoch": 0.3461580416698651, "grad_norm": 1017.120849609375, "learning_rate": 3.32408939236287e-05, "loss": 66.1674, "step": 85680 }, { "epoch": 0.3461984429352327, "grad_norm": 737.3256225585938, "learning_rate": 3.323880089526554e-05, "loss": 56.035, "step": 85690 }, { "epoch": 0.34623884420060036, "grad_norm": 769.2302856445312, "learning_rate": 3.323670760880513e-05, "loss": 79.3797, "step": 85700 }, { "epoch": 0.346279245465968, "grad_norm": 628.8507080078125, "learning_rate": 3.3234614064288297e-05, "loss": 53.0274, "step": 85710 }, { "epoch": 0.34631964673133564, "grad_norm": 970.371337890625, "learning_rate": 3.323252026175585e-05, "loss": 79.7288, "step": 85720 }, { "epoch": 0.3463600479967033, "grad_norm": 591.3244018554688, "learning_rate": 3.323042620124861e-05, "loss": 55.2425, "step": 85730 }, { "epoch": 0.34640044926207086, "grad_norm": 2088.962158203125, "learning_rate": 3.32283318828074e-05, "loss": 100.4404, "step": 85740 }, { "epoch": 0.3464408505274385, "grad_norm": 1535.4835205078125, "learning_rate": 3.322623730647304e-05, "loss": 76.5266, "step": 85750 }, { "epoch": 0.34648125179280614, "grad_norm": 614.2904052734375, "learning_rate": 3.322414247228638e-05, "loss": 58.9267, "step": 85760 }, { "epoch": 0.3465216530581738, "grad_norm": 1063.307373046875, "learning_rate": 3.322204738028826e-05, "loss": 49.7642, "step": 85770 }, { "epoch": 0.3465620543235414, "grad_norm": 449.1618957519531, "learning_rate": 3.321995203051951e-05, "loss": 51.4166, "step": 85780 }, { "epoch": 0.34660245558890906, "grad_norm": 883.9197387695312, "learning_rate": 3.321785642302099e-05, "loss": 101.6042, "step": 85790 }, { "epoch": 0.34664285685427665, "grad_norm": 1039.1463623046875, "learning_rate": 3.3215760557833556e-05, "loss": 56.6844, "step": 85800 }, { "epoch": 0.3466832581196443, "grad_norm": 425.7856750488281, "learning_rate": 3.3213664434998065e-05, "loss": 74.9194, "step": 85810 }, { "epoch": 0.34672365938501193, "grad_norm": 754.9216918945312, "learning_rate": 3.3211568054555384e-05, "loss": 67.509, "step": 85820 }, { "epoch": 0.34676406065037957, "grad_norm": 495.1691589355469, "learning_rate": 3.320947141654639e-05, "loss": 54.1455, "step": 85830 }, { "epoch": 0.3468044619157472, "grad_norm": 1658.7933349609375, "learning_rate": 3.320737452101194e-05, "loss": 71.4703, "step": 85840 }, { "epoch": 0.34684486318111485, "grad_norm": 1947.14501953125, "learning_rate": 3.3205277367992924e-05, "loss": 81.6708, "step": 85850 }, { "epoch": 0.3468852644464825, "grad_norm": 485.7032775878906, "learning_rate": 3.3203179957530235e-05, "loss": 72.3338, "step": 85860 }, { "epoch": 0.3469256657118501, "grad_norm": 1791.69140625, "learning_rate": 3.320108228966475e-05, "loss": 71.3995, "step": 85870 }, { "epoch": 0.3469660669772177, "grad_norm": 489.6419982910156, "learning_rate": 3.319898436443737e-05, "loss": 46.9738, "step": 85880 }, { "epoch": 0.34700646824258535, "grad_norm": 485.36065673828125, "learning_rate": 3.319688618188899e-05, "loss": 41.8324, "step": 85890 }, { "epoch": 0.347046869507953, "grad_norm": 1790.6739501953125, "learning_rate": 3.319478774206053e-05, "loss": 98.1587, "step": 85900 }, { "epoch": 0.34708727077332063, "grad_norm": 592.9716186523438, "learning_rate": 3.319268904499288e-05, "loss": 76.073, "step": 85910 }, { "epoch": 0.3471276720386883, "grad_norm": 1569.42431640625, "learning_rate": 3.3190590090726966e-05, "loss": 76.2268, "step": 85920 }, { "epoch": 0.34716807330405586, "grad_norm": 348.3356628417969, "learning_rate": 3.318849087930371e-05, "loss": 41.0145, "step": 85930 }, { "epoch": 0.3472084745694235, "grad_norm": 3468.679931640625, "learning_rate": 3.3186391410764033e-05, "loss": 68.0374, "step": 85940 }, { "epoch": 0.34724887583479114, "grad_norm": 432.0105285644531, "learning_rate": 3.3184291685148866e-05, "loss": 59.3231, "step": 85950 }, { "epoch": 0.3472892771001588, "grad_norm": 609.4529418945312, "learning_rate": 3.3182191702499146e-05, "loss": 95.9846, "step": 85960 }, { "epoch": 0.3473296783655264, "grad_norm": 1003.2774047851562, "learning_rate": 3.318009146285582e-05, "loss": 55.1918, "step": 85970 }, { "epoch": 0.34737007963089406, "grad_norm": 914.4887084960938, "learning_rate": 3.317799096625981e-05, "loss": 55.2846, "step": 85980 }, { "epoch": 0.3474104808962617, "grad_norm": 1035.424560546875, "learning_rate": 3.317589021275209e-05, "loss": 74.6195, "step": 85990 }, { "epoch": 0.3474508821616293, "grad_norm": 324.5082092285156, "learning_rate": 3.317378920237361e-05, "loss": 56.005, "step": 86000 }, { "epoch": 0.3474912834269969, "grad_norm": 1099.5167236328125, "learning_rate": 3.317168793516533e-05, "loss": 53.5784, "step": 86010 }, { "epoch": 0.34753168469236456, "grad_norm": 431.3697509765625, "learning_rate": 3.31695864111682e-05, "loss": 46.6206, "step": 86020 }, { "epoch": 0.3475720859577322, "grad_norm": 688.5933837890625, "learning_rate": 3.316748463042321e-05, "loss": 70.581, "step": 86030 }, { "epoch": 0.34761248722309984, "grad_norm": 625.9749145507812, "learning_rate": 3.316538259297133e-05, "loss": 78.3036, "step": 86040 }, { "epoch": 0.3476528884884675, "grad_norm": 1391.593994140625, "learning_rate": 3.316328029885353e-05, "loss": 75.0877, "step": 86050 }, { "epoch": 0.34769328975383507, "grad_norm": 423.77862548828125, "learning_rate": 3.3161177748110816e-05, "loss": 46.4944, "step": 86060 }, { "epoch": 0.3477336910192027, "grad_norm": 488.5755310058594, "learning_rate": 3.315907494078416e-05, "loss": 44.6107, "step": 86070 }, { "epoch": 0.34777409228457035, "grad_norm": 1501.919921875, "learning_rate": 3.315697187691456e-05, "loss": 123.4812, "step": 86080 }, { "epoch": 0.347814493549938, "grad_norm": 732.31884765625, "learning_rate": 3.315486855654302e-05, "loss": 48.9343, "step": 86090 }, { "epoch": 0.3478548948153056, "grad_norm": 342.4243469238281, "learning_rate": 3.315276497971055e-05, "loss": 52.2912, "step": 86100 }, { "epoch": 0.34789529608067327, "grad_norm": 1203.0311279296875, "learning_rate": 3.315066114645815e-05, "loss": 51.8036, "step": 86110 }, { "epoch": 0.34793569734604085, "grad_norm": 356.6678466796875, "learning_rate": 3.314855705682685e-05, "loss": 70.2863, "step": 86120 }, { "epoch": 0.3479760986114085, "grad_norm": 655.4275512695312, "learning_rate": 3.314645271085765e-05, "loss": 64.5816, "step": 86130 }, { "epoch": 0.34801649987677613, "grad_norm": 1261.5318603515625, "learning_rate": 3.3144348108591594e-05, "loss": 67.8096, "step": 86140 }, { "epoch": 0.34805690114214377, "grad_norm": 460.6929626464844, "learning_rate": 3.314224325006969e-05, "loss": 59.9346, "step": 86150 }, { "epoch": 0.3480973024075114, "grad_norm": 1493.7012939453125, "learning_rate": 3.3140138135333004e-05, "loss": 70.3878, "step": 86160 }, { "epoch": 0.34813770367287905, "grad_norm": 451.81988525390625, "learning_rate": 3.313803276442255e-05, "loss": 63.3886, "step": 86170 }, { "epoch": 0.3481781049382467, "grad_norm": 768.2365112304688, "learning_rate": 3.313592713737939e-05, "loss": 75.5211, "step": 86180 }, { "epoch": 0.3482185062036143, "grad_norm": 949.534912109375, "learning_rate": 3.3133821254244564e-05, "loss": 79.6422, "step": 86190 }, { "epoch": 0.3482589074689819, "grad_norm": 871.8472900390625, "learning_rate": 3.3131715115059134e-05, "loss": 59.8607, "step": 86200 }, { "epoch": 0.34829930873434956, "grad_norm": 1112.0130615234375, "learning_rate": 3.3129608719864154e-05, "loss": 53.979, "step": 86210 }, { "epoch": 0.3483397099997172, "grad_norm": 399.10418701171875, "learning_rate": 3.312750206870069e-05, "loss": 61.8568, "step": 86220 }, { "epoch": 0.34838011126508484, "grad_norm": 7023.32861328125, "learning_rate": 3.312539516160982e-05, "loss": 73.2986, "step": 86230 }, { "epoch": 0.3484205125304525, "grad_norm": 2396.18701171875, "learning_rate": 3.312328799863261e-05, "loss": 114.8817, "step": 86240 }, { "epoch": 0.34846091379582006, "grad_norm": 876.801513671875, "learning_rate": 3.312118057981015e-05, "loss": 77.4313, "step": 86250 }, { "epoch": 0.3485013150611877, "grad_norm": 485.1725769042969, "learning_rate": 3.311907290518352e-05, "loss": 53.1866, "step": 86260 }, { "epoch": 0.34854171632655534, "grad_norm": 844.3080444335938, "learning_rate": 3.31169649747938e-05, "loss": 58.6329, "step": 86270 }, { "epoch": 0.348582117591923, "grad_norm": 2072.34130859375, "learning_rate": 3.3114856788682105e-05, "loss": 79.7277, "step": 86280 }, { "epoch": 0.3486225188572906, "grad_norm": 791.1768798828125, "learning_rate": 3.311274834688951e-05, "loss": 81.8647, "step": 86290 }, { "epoch": 0.34866292012265826, "grad_norm": 617.4157104492188, "learning_rate": 3.3110639649457153e-05, "loss": 59.8066, "step": 86300 }, { "epoch": 0.3487033213880259, "grad_norm": 670.8255615234375, "learning_rate": 3.310853069642611e-05, "loss": 64.7944, "step": 86310 }, { "epoch": 0.3487437226533935, "grad_norm": 825.6277465820312, "learning_rate": 3.310642148783752e-05, "loss": 72.0322, "step": 86320 }, { "epoch": 0.3487841239187611, "grad_norm": 316.1906433105469, "learning_rate": 3.31043120237325e-05, "loss": 51.1401, "step": 86330 }, { "epoch": 0.34882452518412876, "grad_norm": 1181.583740234375, "learning_rate": 3.310220230415217e-05, "loss": 60.8622, "step": 86340 }, { "epoch": 0.3488649264494964, "grad_norm": 366.9600524902344, "learning_rate": 3.3100092329137654e-05, "loss": 51.5616, "step": 86350 }, { "epoch": 0.34890532771486404, "grad_norm": 1895.227783203125, "learning_rate": 3.30979820987301e-05, "loss": 66.7041, "step": 86360 }, { "epoch": 0.3489457289802317, "grad_norm": 1102.731201171875, "learning_rate": 3.3095871612970636e-05, "loss": 50.2599, "step": 86370 }, { "epoch": 0.34898613024559927, "grad_norm": 836.0551147460938, "learning_rate": 3.3093760871900414e-05, "loss": 59.316, "step": 86380 }, { "epoch": 0.3490265315109669, "grad_norm": 774.7025756835938, "learning_rate": 3.3091649875560584e-05, "loss": 76.6823, "step": 86390 }, { "epoch": 0.34906693277633455, "grad_norm": 800.423095703125, "learning_rate": 3.30895386239923e-05, "loss": 68.0474, "step": 86400 }, { "epoch": 0.3491073340417022, "grad_norm": 1548.18115234375, "learning_rate": 3.308742711723672e-05, "loss": 93.2495, "step": 86410 }, { "epoch": 0.34914773530706983, "grad_norm": 1229.29345703125, "learning_rate": 3.308531535533501e-05, "loss": 55.2976, "step": 86420 }, { "epoch": 0.34918813657243747, "grad_norm": 430.7838134765625, "learning_rate": 3.308320333832835e-05, "loss": 68.637, "step": 86430 }, { "epoch": 0.34922853783780505, "grad_norm": 630.6207885742188, "learning_rate": 3.308109106625789e-05, "loss": 78.5927, "step": 86440 }, { "epoch": 0.3492689391031727, "grad_norm": 395.88885498046875, "learning_rate": 3.307897853916483e-05, "loss": 72.9264, "step": 86450 }, { "epoch": 0.34930934036854033, "grad_norm": 787.671630859375, "learning_rate": 3.307686575709036e-05, "loss": 62.1931, "step": 86460 }, { "epoch": 0.349349741633908, "grad_norm": 816.1442260742188, "learning_rate": 3.3074752720075644e-05, "loss": 58.2558, "step": 86470 }, { "epoch": 0.3493901428992756, "grad_norm": 780.057861328125, "learning_rate": 3.30726394281619e-05, "loss": 49.8044, "step": 86480 }, { "epoch": 0.34943054416464325, "grad_norm": 694.2754516601562, "learning_rate": 3.307052588139032e-05, "loss": 82.6303, "step": 86490 }, { "epoch": 0.3494709454300109, "grad_norm": 641.3462524414062, "learning_rate": 3.3068412079802114e-05, "loss": 42.0418, "step": 86500 }, { "epoch": 0.3495113466953785, "grad_norm": 376.70440673828125, "learning_rate": 3.306629802343848e-05, "loss": 51.2786, "step": 86510 }, { "epoch": 0.3495517479607461, "grad_norm": 395.3877868652344, "learning_rate": 3.306418371234064e-05, "loss": 154.0648, "step": 86520 }, { "epoch": 0.34959214922611376, "grad_norm": 916.4523315429688, "learning_rate": 3.306206914654981e-05, "loss": 85.2943, "step": 86530 }, { "epoch": 0.3496325504914814, "grad_norm": 573.927978515625, "learning_rate": 3.305995432610722e-05, "loss": 51.809, "step": 86540 }, { "epoch": 0.34967295175684904, "grad_norm": 1681.1505126953125, "learning_rate": 3.30578392510541e-05, "loss": 61.1669, "step": 86550 }, { "epoch": 0.3497133530222167, "grad_norm": 1356.93017578125, "learning_rate": 3.305572392143168e-05, "loss": 130.7669, "step": 86560 }, { "epoch": 0.34975375428758426, "grad_norm": 379.07769775390625, "learning_rate": 3.3053608337281194e-05, "loss": 68.2804, "step": 86570 }, { "epoch": 0.3497941555529519, "grad_norm": 1200.1802978515625, "learning_rate": 3.30514924986439e-05, "loss": 84.0009, "step": 86580 }, { "epoch": 0.34983455681831954, "grad_norm": 1230.31591796875, "learning_rate": 3.3049376405561046e-05, "loss": 85.864, "step": 86590 }, { "epoch": 0.3498749580836872, "grad_norm": 1067.477294921875, "learning_rate": 3.304726005807386e-05, "loss": 77.4034, "step": 86600 }, { "epoch": 0.3499153593490548, "grad_norm": 649.9188232421875, "learning_rate": 3.304514345622364e-05, "loss": 48.0631, "step": 86610 }, { "epoch": 0.34995576061442246, "grad_norm": 1626.9434814453125, "learning_rate": 3.3043026600051624e-05, "loss": 60.3829, "step": 86620 }, { "epoch": 0.3499961618797901, "grad_norm": 713.0986328125, "learning_rate": 3.304090948959909e-05, "loss": 40.1923, "step": 86630 }, { "epoch": 0.3500365631451577, "grad_norm": 630.3682250976562, "learning_rate": 3.3038792124907325e-05, "loss": 69.6386, "step": 86640 }, { "epoch": 0.3500769644105253, "grad_norm": 3280.472900390625, "learning_rate": 3.303667450601758e-05, "loss": 75.8308, "step": 86650 }, { "epoch": 0.35011736567589297, "grad_norm": 839.1499633789062, "learning_rate": 3.303455663297116e-05, "loss": 64.2162, "step": 86660 }, { "epoch": 0.3501577669412606, "grad_norm": 729.3599853515625, "learning_rate": 3.303243850580934e-05, "loss": 71.651, "step": 86670 }, { "epoch": 0.35019816820662825, "grad_norm": 463.62384033203125, "learning_rate": 3.303032012457343e-05, "loss": 45.6416, "step": 86680 }, { "epoch": 0.3502385694719959, "grad_norm": 745.6293334960938, "learning_rate": 3.3028201489304716e-05, "loss": 69.6891, "step": 86690 }, { "epoch": 0.35027897073736347, "grad_norm": 1560.3077392578125, "learning_rate": 3.3026082600044506e-05, "loss": 66.435, "step": 86700 }, { "epoch": 0.3503193720027311, "grad_norm": 1437.30517578125, "learning_rate": 3.3023963456834115e-05, "loss": 93.1862, "step": 86710 }, { "epoch": 0.35035977326809875, "grad_norm": 476.25872802734375, "learning_rate": 3.302184405971485e-05, "loss": 82.2367, "step": 86720 }, { "epoch": 0.3504001745334664, "grad_norm": 1436.0726318359375, "learning_rate": 3.301972440872803e-05, "loss": 121.6115, "step": 86730 }, { "epoch": 0.35044057579883403, "grad_norm": 1528.564208984375, "learning_rate": 3.3017604503914976e-05, "loss": 55.2463, "step": 86740 }, { "epoch": 0.35048097706420167, "grad_norm": 399.49444580078125, "learning_rate": 3.301548434531702e-05, "loss": 49.284, "step": 86750 }, { "epoch": 0.35052137832956926, "grad_norm": 613.2311401367188, "learning_rate": 3.30133639329755e-05, "loss": 90.4632, "step": 86760 }, { "epoch": 0.3505617795949369, "grad_norm": 747.5999145507812, "learning_rate": 3.3011243266931745e-05, "loss": 53.8135, "step": 86770 }, { "epoch": 0.35060218086030454, "grad_norm": 1396.3726806640625, "learning_rate": 3.300912234722711e-05, "loss": 73.096, "step": 86780 }, { "epoch": 0.3506425821256722, "grad_norm": 562.4843139648438, "learning_rate": 3.300700117390294e-05, "loss": 91.8843, "step": 86790 }, { "epoch": 0.3506829833910398, "grad_norm": 684.2686157226562, "learning_rate": 3.300487974700058e-05, "loss": 38.9725, "step": 86800 }, { "epoch": 0.35072338465640746, "grad_norm": 904.96923828125, "learning_rate": 3.3002758066561394e-05, "loss": 60.236, "step": 86810 }, { "epoch": 0.3507637859217751, "grad_norm": 1734.596923828125, "learning_rate": 3.300063613262675e-05, "loss": 89.0508, "step": 86820 }, { "epoch": 0.3508041871871427, "grad_norm": 272.29534912109375, "learning_rate": 3.2998513945238e-05, "loss": 60.678, "step": 86830 }, { "epoch": 0.3508445884525103, "grad_norm": 693.614990234375, "learning_rate": 3.299639150443654e-05, "loss": 53.3913, "step": 86840 }, { "epoch": 0.35088498971787796, "grad_norm": 2130.129638671875, "learning_rate": 3.299426881026374e-05, "loss": 65.3584, "step": 86850 }, { "epoch": 0.3509253909832456, "grad_norm": 347.66632080078125, "learning_rate": 3.299214586276096e-05, "loss": 70.7766, "step": 86860 }, { "epoch": 0.35096579224861324, "grad_norm": 1611.8804931640625, "learning_rate": 3.2990022661969626e-05, "loss": 86.1092, "step": 86870 }, { "epoch": 0.3510061935139809, "grad_norm": 715.7470703125, "learning_rate": 3.2987899207931105e-05, "loss": 72.2611, "step": 86880 }, { "epoch": 0.35104659477934846, "grad_norm": 668.6033325195312, "learning_rate": 3.29857755006868e-05, "loss": 86.1486, "step": 86890 }, { "epoch": 0.3510869960447161, "grad_norm": 384.3507385253906, "learning_rate": 3.298365154027812e-05, "loss": 46.282, "step": 86900 }, { "epoch": 0.35112739731008374, "grad_norm": 488.5640869140625, "learning_rate": 3.298152732674647e-05, "loss": 42.0539, "step": 86910 }, { "epoch": 0.3511677985754514, "grad_norm": 1133.824462890625, "learning_rate": 3.2979402860133264e-05, "loss": 66.0079, "step": 86920 }, { "epoch": 0.351208199840819, "grad_norm": 788.2798461914062, "learning_rate": 3.297727814047991e-05, "loss": 51.0111, "step": 86930 }, { "epoch": 0.35124860110618666, "grad_norm": 488.20965576171875, "learning_rate": 3.297515316782784e-05, "loss": 50.3032, "step": 86940 }, { "epoch": 0.3512890023715543, "grad_norm": 794.2489624023438, "learning_rate": 3.297302794221849e-05, "loss": 65.2345, "step": 86950 }, { "epoch": 0.3513294036369219, "grad_norm": 939.7693481445312, "learning_rate": 3.2970902463693264e-05, "loss": 82.1904, "step": 86960 }, { "epoch": 0.35136980490228953, "grad_norm": 334.984375, "learning_rate": 3.296877673229362e-05, "loss": 75.9347, "step": 86970 }, { "epoch": 0.35141020616765717, "grad_norm": 1295.0634765625, "learning_rate": 3.2966650748061004e-05, "loss": 78.6464, "step": 86980 }, { "epoch": 0.3514506074330248, "grad_norm": 273.43988037109375, "learning_rate": 3.2964524511036856e-05, "loss": 44.0533, "step": 86990 }, { "epoch": 0.35149100869839245, "grad_norm": 690.1515502929688, "learning_rate": 3.2962398021262623e-05, "loss": 56.9621, "step": 87000 }, { "epoch": 0.3515314099637601, "grad_norm": 715.7372436523438, "learning_rate": 3.2960271278779764e-05, "loss": 69.2657, "step": 87010 }, { "epoch": 0.3515718112291277, "grad_norm": 443.57598876953125, "learning_rate": 3.295814428362975e-05, "loss": 60.3319, "step": 87020 }, { "epoch": 0.3516122124944953, "grad_norm": 566.7713623046875, "learning_rate": 3.295601703585404e-05, "loss": 39.7271, "step": 87030 }, { "epoch": 0.35165261375986295, "grad_norm": 1608.1795654296875, "learning_rate": 3.2953889535494114e-05, "loss": 56.2998, "step": 87040 }, { "epoch": 0.3516930150252306, "grad_norm": 1522.18505859375, "learning_rate": 3.295176178259143e-05, "loss": 71.0216, "step": 87050 }, { "epoch": 0.35173341629059823, "grad_norm": 899.77294921875, "learning_rate": 3.294963377718749e-05, "loss": 66.3725, "step": 87060 }, { "epoch": 0.3517738175559659, "grad_norm": 425.5625, "learning_rate": 3.294750551932377e-05, "loss": 83.7303, "step": 87070 }, { "epoch": 0.35181421882133346, "grad_norm": 1519.5233154296875, "learning_rate": 3.294537700904177e-05, "loss": 51.6581, "step": 87080 }, { "epoch": 0.3518546200867011, "grad_norm": 389.0169677734375, "learning_rate": 3.294324824638297e-05, "loss": 88.9213, "step": 87090 }, { "epoch": 0.35189502135206874, "grad_norm": 791.9541625976562, "learning_rate": 3.294111923138889e-05, "loss": 64.3564, "step": 87100 }, { "epoch": 0.3519354226174364, "grad_norm": 1049.439208984375, "learning_rate": 3.2938989964101026e-05, "loss": 91.8242, "step": 87110 }, { "epoch": 0.351975823882804, "grad_norm": 1153.3204345703125, "learning_rate": 3.293686044456089e-05, "loss": 77.8267, "step": 87120 }, { "epoch": 0.35201622514817166, "grad_norm": 607.4779052734375, "learning_rate": 3.293473067281e-05, "loss": 48.7235, "step": 87130 }, { "epoch": 0.3520566264135393, "grad_norm": 516.1886596679688, "learning_rate": 3.293260064888988e-05, "loss": 73.5357, "step": 87140 }, { "epoch": 0.3520970276789069, "grad_norm": 231.58335876464844, "learning_rate": 3.293047037284205e-05, "loss": 40.6186, "step": 87150 }, { "epoch": 0.3521374289442745, "grad_norm": 897.0509033203125, "learning_rate": 3.292833984470804e-05, "loss": 81.0548, "step": 87160 }, { "epoch": 0.35217783020964216, "grad_norm": 512.804443359375, "learning_rate": 3.2926209064529384e-05, "loss": 106.7156, "step": 87170 }, { "epoch": 0.3522182314750098, "grad_norm": 498.0970458984375, "learning_rate": 3.292407803234763e-05, "loss": 67.021, "step": 87180 }, { "epoch": 0.35225863274037744, "grad_norm": 656.015625, "learning_rate": 3.292194674820433e-05, "loss": 74.4482, "step": 87190 }, { "epoch": 0.3522990340057451, "grad_norm": 757.9091186523438, "learning_rate": 3.2919815212141025e-05, "loss": 58.3926, "step": 87200 }, { "epoch": 0.35233943527111267, "grad_norm": 521.5278930664062, "learning_rate": 3.2917683424199255e-05, "loss": 76.3875, "step": 87210 }, { "epoch": 0.3523798365364803, "grad_norm": 1062.794677734375, "learning_rate": 3.291555138442062e-05, "loss": 62.0553, "step": 87220 }, { "epoch": 0.35242023780184795, "grad_norm": 464.8687438964844, "learning_rate": 3.291341909284664e-05, "loss": 57.1597, "step": 87230 }, { "epoch": 0.3524606390672156, "grad_norm": 1125.7791748046875, "learning_rate": 3.291128654951892e-05, "loss": 64.3481, "step": 87240 }, { "epoch": 0.3525010403325832, "grad_norm": 1071.926513671875, "learning_rate": 3.290915375447902e-05, "loss": 78.0845, "step": 87250 }, { "epoch": 0.35254144159795087, "grad_norm": 1123.571533203125, "learning_rate": 3.290702070776851e-05, "loss": 57.5495, "step": 87260 }, { "epoch": 0.3525818428633185, "grad_norm": 991.531005859375, "learning_rate": 3.2904887409429e-05, "loss": 51.8717, "step": 87270 }, { "epoch": 0.3526222441286861, "grad_norm": 1023.5750122070312, "learning_rate": 3.2902753859502056e-05, "loss": 40.4581, "step": 87280 }, { "epoch": 0.35266264539405373, "grad_norm": 751.7762451171875, "learning_rate": 3.290062005802929e-05, "loss": 61.994, "step": 87290 }, { "epoch": 0.35270304665942137, "grad_norm": 250.3064422607422, "learning_rate": 3.289848600505229e-05, "loss": 54.284, "step": 87300 }, { "epoch": 0.352743447924789, "grad_norm": 519.025146484375, "learning_rate": 3.289635170061267e-05, "loss": 82.9329, "step": 87310 }, { "epoch": 0.35278384919015665, "grad_norm": 849.8499755859375, "learning_rate": 3.289421714475203e-05, "loss": 72.7599, "step": 87320 }, { "epoch": 0.3528242504555243, "grad_norm": 1280.5404052734375, "learning_rate": 3.289208233751199e-05, "loss": 60.5242, "step": 87330 }, { "epoch": 0.3528646517208919, "grad_norm": 854.2972412109375, "learning_rate": 3.288994727893416e-05, "loss": 73.9743, "step": 87340 }, { "epoch": 0.3529050529862595, "grad_norm": 1820.94482421875, "learning_rate": 3.2887811969060184e-05, "loss": 43.6074, "step": 87350 }, { "epoch": 0.35294545425162716, "grad_norm": 745.350341796875, "learning_rate": 3.288567640793167e-05, "loss": 118.5294, "step": 87360 }, { "epoch": 0.3529858555169948, "grad_norm": 1059.162109375, "learning_rate": 3.288354059559026e-05, "loss": 62.4829, "step": 87370 }, { "epoch": 0.35302625678236244, "grad_norm": 267.9883728027344, "learning_rate": 3.28814045320776e-05, "loss": 47.2061, "step": 87380 }, { "epoch": 0.3530666580477301, "grad_norm": 870.7761840820312, "learning_rate": 3.287926821743532e-05, "loss": 68.1026, "step": 87390 }, { "epoch": 0.35310705931309766, "grad_norm": 1524.5283203125, "learning_rate": 3.287713165170508e-05, "loss": 70.0073, "step": 87400 }, { "epoch": 0.3531474605784653, "grad_norm": 1669.3951416015625, "learning_rate": 3.2874994834928524e-05, "loss": 42.2512, "step": 87410 }, { "epoch": 0.35318786184383294, "grad_norm": 862.4916381835938, "learning_rate": 3.2872857767147316e-05, "loss": 54.6232, "step": 87420 }, { "epoch": 0.3532282631092006, "grad_norm": 201.9671630859375, "learning_rate": 3.2870720448403127e-05, "loss": 68.1916, "step": 87430 }, { "epoch": 0.3532686643745682, "grad_norm": 894.3649291992188, "learning_rate": 3.286858287873761e-05, "loss": 84.1714, "step": 87440 }, { "epoch": 0.35330906563993586, "grad_norm": 512.4931640625, "learning_rate": 3.286644505819244e-05, "loss": 33.4036, "step": 87450 }, { "epoch": 0.3533494669053035, "grad_norm": 853.0204467773438, "learning_rate": 3.286430698680931e-05, "loss": 50.9634, "step": 87460 }, { "epoch": 0.3533898681706711, "grad_norm": 2792.16455078125, "learning_rate": 3.286216866462988e-05, "loss": 67.5983, "step": 87470 }, { "epoch": 0.3534302694360387, "grad_norm": 479.12408447265625, "learning_rate": 3.286003009169586e-05, "loss": 43.4239, "step": 87480 }, { "epoch": 0.35347067070140636, "grad_norm": 424.5600891113281, "learning_rate": 3.285789126804893e-05, "loss": 89.1872, "step": 87490 }, { "epoch": 0.353511071966774, "grad_norm": 921.9595947265625, "learning_rate": 3.285575219373079e-05, "loss": 38.4102, "step": 87500 }, { "epoch": 0.35355147323214164, "grad_norm": 765.3430786132812, "learning_rate": 3.285361286878314e-05, "loss": 76.0972, "step": 87510 }, { "epoch": 0.3535918744975093, "grad_norm": 561.0750122070312, "learning_rate": 3.2851473293247694e-05, "loss": 52.4396, "step": 87520 }, { "epoch": 0.35363227576287687, "grad_norm": 505.8731994628906, "learning_rate": 3.2849333467166156e-05, "loss": 81.7981, "step": 87530 }, { "epoch": 0.3536726770282445, "grad_norm": 926.2390747070312, "learning_rate": 3.284719339058025e-05, "loss": 50.5313, "step": 87540 }, { "epoch": 0.35371307829361215, "grad_norm": 870.8189697265625, "learning_rate": 3.284505306353169e-05, "loss": 66.9499, "step": 87550 }, { "epoch": 0.3537534795589798, "grad_norm": 860.2401733398438, "learning_rate": 3.284291248606221e-05, "loss": 81.4307, "step": 87560 }, { "epoch": 0.35379388082434743, "grad_norm": 335.7196044921875, "learning_rate": 3.284077165821354e-05, "loss": 69.8784, "step": 87570 }, { "epoch": 0.35383428208971507, "grad_norm": 1089.861083984375, "learning_rate": 3.2838630580027416e-05, "loss": 54.2658, "step": 87580 }, { "epoch": 0.3538746833550827, "grad_norm": 546.7323608398438, "learning_rate": 3.2836489251545576e-05, "loss": 46.5775, "step": 87590 }, { "epoch": 0.3539150846204503, "grad_norm": 283.970458984375, "learning_rate": 3.2834347672809776e-05, "loss": 70.2304, "step": 87600 }, { "epoch": 0.35395548588581793, "grad_norm": 841.40576171875, "learning_rate": 3.283220584386175e-05, "loss": 63.9377, "step": 87610 }, { "epoch": 0.3539958871511856, "grad_norm": 597.7101440429688, "learning_rate": 3.283006376474327e-05, "loss": 58.1834, "step": 87620 }, { "epoch": 0.3540362884165532, "grad_norm": 344.36767578125, "learning_rate": 3.2827921435496097e-05, "loss": 51.8543, "step": 87630 }, { "epoch": 0.35407668968192085, "grad_norm": 778.3963623046875, "learning_rate": 3.2825778856161984e-05, "loss": 83.5701, "step": 87640 }, { "epoch": 0.3541170909472885, "grad_norm": 414.31817626953125, "learning_rate": 3.2823636026782715e-05, "loss": 43.3595, "step": 87650 }, { "epoch": 0.3541574922126561, "grad_norm": 983.6788330078125, "learning_rate": 3.282149294740005e-05, "loss": 58.7744, "step": 87660 }, { "epoch": 0.3541978934780237, "grad_norm": 613.5213012695312, "learning_rate": 3.2819349618055784e-05, "loss": 62.683, "step": 87670 }, { "epoch": 0.35423829474339136, "grad_norm": 985.743408203125, "learning_rate": 3.28172060387917e-05, "loss": 63.0019, "step": 87680 }, { "epoch": 0.354278696008759, "grad_norm": 1007.1724243164062, "learning_rate": 3.2815062209649585e-05, "loss": 76.2185, "step": 87690 }, { "epoch": 0.35431909727412664, "grad_norm": 2079.40087890625, "learning_rate": 3.281291813067123e-05, "loss": 104.9009, "step": 87700 }, { "epoch": 0.3543594985394943, "grad_norm": 1013.1952514648438, "learning_rate": 3.2810773801898445e-05, "loss": 70.3131, "step": 87710 }, { "epoch": 0.35439989980486186, "grad_norm": 685.0365600585938, "learning_rate": 3.2808629223373026e-05, "loss": 68.3621, "step": 87720 }, { "epoch": 0.3544403010702295, "grad_norm": 1584.983642578125, "learning_rate": 3.280648439513679e-05, "loss": 82.7203, "step": 87730 }, { "epoch": 0.35448070233559714, "grad_norm": 607.6953735351562, "learning_rate": 3.2804339317231545e-05, "loss": 53.9031, "step": 87740 }, { "epoch": 0.3545211036009648, "grad_norm": 3307.65234375, "learning_rate": 3.2802193989699116e-05, "loss": 83.5846, "step": 87750 }, { "epoch": 0.3545615048663324, "grad_norm": 792.563720703125, "learning_rate": 3.2800048412581315e-05, "loss": 49.468, "step": 87760 }, { "epoch": 0.35460190613170006, "grad_norm": 404.6004333496094, "learning_rate": 3.279790258591999e-05, "loss": 60.5075, "step": 87770 }, { "epoch": 0.3546423073970677, "grad_norm": 1089.806640625, "learning_rate": 3.279575650975696e-05, "loss": 61.289, "step": 87780 }, { "epoch": 0.3546827086624353, "grad_norm": 544.2926635742188, "learning_rate": 3.279361018413407e-05, "loss": 55.3457, "step": 87790 }, { "epoch": 0.3547231099278029, "grad_norm": 518.1968383789062, "learning_rate": 3.2791463609093164e-05, "loss": 77.1913, "step": 87800 }, { "epoch": 0.35476351119317057, "grad_norm": 690.2503662109375, "learning_rate": 3.278931678467609e-05, "loss": 87.9437, "step": 87810 }, { "epoch": 0.3548039124585382, "grad_norm": 540.9448852539062, "learning_rate": 3.27871697109247e-05, "loss": 60.3074, "step": 87820 }, { "epoch": 0.35484431372390585, "grad_norm": 465.333984375, "learning_rate": 3.2785022387880854e-05, "loss": 64.1311, "step": 87830 }, { "epoch": 0.3548847149892735, "grad_norm": 807.2095947265625, "learning_rate": 3.278287481558641e-05, "loss": 70.1779, "step": 87840 }, { "epoch": 0.35492511625464107, "grad_norm": 627.5873413085938, "learning_rate": 3.278072699408324e-05, "loss": 72.7112, "step": 87850 }, { "epoch": 0.3549655175200087, "grad_norm": 3926.626953125, "learning_rate": 3.2778578923413226e-05, "loss": 61.1929, "step": 87860 }, { "epoch": 0.35500591878537635, "grad_norm": 671.0523071289062, "learning_rate": 3.2776430603618225e-05, "loss": 71.2781, "step": 87870 }, { "epoch": 0.355046320050744, "grad_norm": 821.95947265625, "learning_rate": 3.277428203474014e-05, "loss": 65.0558, "step": 87880 }, { "epoch": 0.35508672131611163, "grad_norm": 275.886474609375, "learning_rate": 3.277213321682085e-05, "loss": 41.9689, "step": 87890 }, { "epoch": 0.35512712258147927, "grad_norm": 592.1325073242188, "learning_rate": 3.276998414990225e-05, "loss": 78.3772, "step": 87900 }, { "epoch": 0.3551675238468469, "grad_norm": 463.1005859375, "learning_rate": 3.276783483402623e-05, "loss": 84.9977, "step": 87910 }, { "epoch": 0.3552079251122145, "grad_norm": 750.5401611328125, "learning_rate": 3.27656852692347e-05, "loss": 33.9231, "step": 87920 }, { "epoch": 0.35524832637758214, "grad_norm": 456.62890625, "learning_rate": 3.276353545556956e-05, "loss": 65.4011, "step": 87930 }, { "epoch": 0.3552887276429498, "grad_norm": 1070.725830078125, "learning_rate": 3.276138539307273e-05, "loss": 54.5551, "step": 87940 }, { "epoch": 0.3553291289083174, "grad_norm": 547.5213623046875, "learning_rate": 3.275923508178611e-05, "loss": 92.1273, "step": 87950 }, { "epoch": 0.35536953017368506, "grad_norm": 3793.371337890625, "learning_rate": 3.275708452175164e-05, "loss": 86.2895, "step": 87960 }, { "epoch": 0.3554099314390527, "grad_norm": 532.2041625976562, "learning_rate": 3.2754933713011245e-05, "loss": 62.463, "step": 87970 }, { "epoch": 0.3554503327044203, "grad_norm": 380.2835693359375, "learning_rate": 3.275278265560684e-05, "loss": 57.4501, "step": 87980 }, { "epoch": 0.3554907339697879, "grad_norm": 411.328369140625, "learning_rate": 3.275063134958038e-05, "loss": 66.6874, "step": 87990 }, { "epoch": 0.35553113523515556, "grad_norm": 804.7635498046875, "learning_rate": 3.27484797949738e-05, "loss": 56.3235, "step": 88000 }, { "epoch": 0.3555715365005232, "grad_norm": 354.7542419433594, "learning_rate": 3.274632799182904e-05, "loss": 49.5995, "step": 88010 }, { "epoch": 0.35561193776589084, "grad_norm": 1237.0531005859375, "learning_rate": 3.274417594018805e-05, "loss": 59.8406, "step": 88020 }, { "epoch": 0.3556523390312585, "grad_norm": 0.0, "learning_rate": 3.2742023640092785e-05, "loss": 54.7096, "step": 88030 }, { "epoch": 0.35569274029662606, "grad_norm": 1007.9598388671875, "learning_rate": 3.2739871091585216e-05, "loss": 78.0177, "step": 88040 }, { "epoch": 0.3557331415619937, "grad_norm": 811.9549560546875, "learning_rate": 3.27377182947073e-05, "loss": 52.268, "step": 88050 }, { "epoch": 0.35577354282736134, "grad_norm": 741.9793701171875, "learning_rate": 3.2735565249501005e-05, "loss": 78.049, "step": 88060 }, { "epoch": 0.355813944092729, "grad_norm": 1019.4302368164062, "learning_rate": 3.2733411956008314e-05, "loss": 59.1742, "step": 88070 }, { "epoch": 0.3558543453580966, "grad_norm": 1513.0257568359375, "learning_rate": 3.27312584142712e-05, "loss": 82.382, "step": 88080 }, { "epoch": 0.35589474662346426, "grad_norm": 1924.9989013671875, "learning_rate": 3.2729104624331643e-05, "loss": 66.7991, "step": 88090 }, { "epoch": 0.3559351478888319, "grad_norm": 439.1302185058594, "learning_rate": 3.272695058623165e-05, "loss": 73.8741, "step": 88100 }, { "epoch": 0.3559755491541995, "grad_norm": 534.2994995117188, "learning_rate": 3.272479630001319e-05, "loss": 69.6003, "step": 88110 }, { "epoch": 0.35601595041956713, "grad_norm": 1121.692626953125, "learning_rate": 3.272264176571828e-05, "loss": 67.2641, "step": 88120 }, { "epoch": 0.35605635168493477, "grad_norm": 732.5870361328125, "learning_rate": 3.272048698338892e-05, "loss": 94.3675, "step": 88130 }, { "epoch": 0.3560967529503024, "grad_norm": 765.2777099609375, "learning_rate": 3.271833195306711e-05, "loss": 54.8232, "step": 88140 }, { "epoch": 0.35613715421567005, "grad_norm": 1179.7957763671875, "learning_rate": 3.271617667479489e-05, "loss": 105.0848, "step": 88150 }, { "epoch": 0.3561775554810377, "grad_norm": 1157.9771728515625, "learning_rate": 3.271402114861424e-05, "loss": 105.5937, "step": 88160 }, { "epoch": 0.3562179567464053, "grad_norm": 701.4404907226562, "learning_rate": 3.271186537456721e-05, "loss": 50.7279, "step": 88170 }, { "epoch": 0.3562583580117729, "grad_norm": 722.4993896484375, "learning_rate": 3.270970935269582e-05, "loss": 51.4809, "step": 88180 }, { "epoch": 0.35629875927714055, "grad_norm": 1130.43408203125, "learning_rate": 3.27075530830421e-05, "loss": 60.8365, "step": 88190 }, { "epoch": 0.3563391605425082, "grad_norm": 1566.13427734375, "learning_rate": 3.270539656564809e-05, "loss": 63.4244, "step": 88200 }, { "epoch": 0.35637956180787583, "grad_norm": 599.3772583007812, "learning_rate": 3.270323980055583e-05, "loss": 58.7716, "step": 88210 }, { "epoch": 0.3564199630732435, "grad_norm": 2744.5830078125, "learning_rate": 3.270108278780738e-05, "loss": 94.2214, "step": 88220 }, { "epoch": 0.35646036433861106, "grad_norm": 621.2742309570312, "learning_rate": 3.2698925527444775e-05, "loss": 65.0944, "step": 88230 }, { "epoch": 0.3565007656039787, "grad_norm": 720.038818359375, "learning_rate": 3.269676801951008e-05, "loss": 108.5621, "step": 88240 }, { "epoch": 0.35654116686934634, "grad_norm": 2238.180419921875, "learning_rate": 3.2694610264045355e-05, "loss": 59.7955, "step": 88250 }, { "epoch": 0.356581568134714, "grad_norm": 478.5419921875, "learning_rate": 3.269245226109267e-05, "loss": 41.493, "step": 88260 }, { "epoch": 0.3566219694000816, "grad_norm": 315.3074951171875, "learning_rate": 3.269029401069409e-05, "loss": 46.2351, "step": 88270 }, { "epoch": 0.35666237066544926, "grad_norm": 363.5831604003906, "learning_rate": 3.2688135512891696e-05, "loss": 52.8951, "step": 88280 }, { "epoch": 0.3567027719308169, "grad_norm": 240.89109802246094, "learning_rate": 3.268597676772757e-05, "loss": 58.7392, "step": 88290 }, { "epoch": 0.3567431731961845, "grad_norm": 657.1890869140625, "learning_rate": 3.268381777524379e-05, "loss": 61.5428, "step": 88300 }, { "epoch": 0.3567835744615521, "grad_norm": 1022.2767944335938, "learning_rate": 3.268165853548247e-05, "loss": 114.9119, "step": 88310 }, { "epoch": 0.35682397572691976, "grad_norm": 1009.5117797851562, "learning_rate": 3.2679499048485665e-05, "loss": 72.1214, "step": 88320 }, { "epoch": 0.3568643769922874, "grad_norm": 748.3343505859375, "learning_rate": 3.267733931429551e-05, "loss": 48.6923, "step": 88330 }, { "epoch": 0.35690477825765504, "grad_norm": 1002.4199829101562, "learning_rate": 3.2675179332954094e-05, "loss": 87.0557, "step": 88340 }, { "epoch": 0.3569451795230227, "grad_norm": 543.9052734375, "learning_rate": 3.267301910450353e-05, "loss": 68.6447, "step": 88350 }, { "epoch": 0.35698558078839027, "grad_norm": 585.1415405273438, "learning_rate": 3.267085862898594e-05, "loss": 60.3965, "step": 88360 }, { "epoch": 0.3570259820537579, "grad_norm": 664.0950317382812, "learning_rate": 3.266869790644344e-05, "loss": 57.0627, "step": 88370 }, { "epoch": 0.35706638331912555, "grad_norm": 455.95172119140625, "learning_rate": 3.266653693691814e-05, "loss": 59.9233, "step": 88380 }, { "epoch": 0.3571067845844932, "grad_norm": 621.0262451171875, "learning_rate": 3.266437572045219e-05, "loss": 63.693, "step": 88390 }, { "epoch": 0.3571471858498608, "grad_norm": 1385.278076171875, "learning_rate": 3.266221425708771e-05, "loss": 56.2835, "step": 88400 }, { "epoch": 0.35718758711522847, "grad_norm": 479.0565490722656, "learning_rate": 3.266005254686686e-05, "loss": 62.6534, "step": 88410 }, { "epoch": 0.3572279883805961, "grad_norm": 901.646484375, "learning_rate": 3.265789058983175e-05, "loss": 55.1813, "step": 88420 }, { "epoch": 0.3572683896459637, "grad_norm": 837.3994750976562, "learning_rate": 3.265572838602455e-05, "loss": 65.0508, "step": 88430 }, { "epoch": 0.35730879091133133, "grad_norm": 1183.2374267578125, "learning_rate": 3.265356593548741e-05, "loss": 60.1707, "step": 88440 }, { "epoch": 0.35734919217669897, "grad_norm": 556.451416015625, "learning_rate": 3.265140323826249e-05, "loss": 56.4609, "step": 88450 }, { "epoch": 0.3573895934420666, "grad_norm": 938.9976196289062, "learning_rate": 3.264924029439195e-05, "loss": 85.0968, "step": 88460 }, { "epoch": 0.35742999470743425, "grad_norm": 1263.639404296875, "learning_rate": 3.264707710391796e-05, "loss": 55.0165, "step": 88470 }, { "epoch": 0.3574703959728019, "grad_norm": 419.4474182128906, "learning_rate": 3.264491366688269e-05, "loss": 47.9041, "step": 88480 }, { "epoch": 0.3575107972381695, "grad_norm": 1374.3382568359375, "learning_rate": 3.264274998332831e-05, "loss": 62.4838, "step": 88490 }, { "epoch": 0.3575511985035371, "grad_norm": 1557.05078125, "learning_rate": 3.264058605329702e-05, "loss": 59.3793, "step": 88500 }, { "epoch": 0.35759159976890476, "grad_norm": 843.0123291015625, "learning_rate": 3.2638421876831e-05, "loss": 75.1318, "step": 88510 }, { "epoch": 0.3576320010342724, "grad_norm": 518.8107299804688, "learning_rate": 3.2636257453972424e-05, "loss": 74.0245, "step": 88520 }, { "epoch": 0.35767240229964004, "grad_norm": 714.6971435546875, "learning_rate": 3.2634092784763515e-05, "loss": 74.727, "step": 88530 }, { "epoch": 0.3577128035650077, "grad_norm": 1000.6592407226562, "learning_rate": 3.2631927869246456e-05, "loss": 75.3208, "step": 88540 }, { "epoch": 0.35775320483037526, "grad_norm": 361.2498779296875, "learning_rate": 3.2629762707463466e-05, "loss": 52.9834, "step": 88550 }, { "epoch": 0.3577936060957429, "grad_norm": 305.6169128417969, "learning_rate": 3.2627597299456746e-05, "loss": 69.2028, "step": 88560 }, { "epoch": 0.35783400736111054, "grad_norm": 413.1781311035156, "learning_rate": 3.262543164526852e-05, "loss": 62.5969, "step": 88570 }, { "epoch": 0.3578744086264782, "grad_norm": 572.3367309570312, "learning_rate": 3.2623265744941e-05, "loss": 76.3019, "step": 88580 }, { "epoch": 0.3579148098918458, "grad_norm": 896.4129638671875, "learning_rate": 3.262109959851642e-05, "loss": 58.3669, "step": 88590 }, { "epoch": 0.35795521115721346, "grad_norm": 1135.0291748046875, "learning_rate": 3.2618933206036994e-05, "loss": 64.4839, "step": 88600 }, { "epoch": 0.3579956124225811, "grad_norm": 4038.096435546875, "learning_rate": 3.2616766567544976e-05, "loss": 94.1078, "step": 88610 }, { "epoch": 0.3580360136879487, "grad_norm": 469.45233154296875, "learning_rate": 3.26145996830826e-05, "loss": 68.6764, "step": 88620 }, { "epoch": 0.3580764149533163, "grad_norm": 483.4333801269531, "learning_rate": 3.261243255269211e-05, "loss": 37.7474, "step": 88630 }, { "epoch": 0.35811681621868396, "grad_norm": 1233.3927001953125, "learning_rate": 3.2610265176415746e-05, "loss": 68.6271, "step": 88640 }, { "epoch": 0.3581572174840516, "grad_norm": 624.0841064453125, "learning_rate": 3.260809755429578e-05, "loss": 65.5295, "step": 88650 }, { "epoch": 0.35819761874941924, "grad_norm": 818.8887939453125, "learning_rate": 3.260592968637445e-05, "loss": 86.7369, "step": 88660 }, { "epoch": 0.3582380200147869, "grad_norm": 2144.048828125, "learning_rate": 3.260376157269404e-05, "loss": 77.7664, "step": 88670 }, { "epoch": 0.35827842128015447, "grad_norm": 284.1769104003906, "learning_rate": 3.2601593213296805e-05, "loss": 68.8628, "step": 88680 }, { "epoch": 0.3583188225455221, "grad_norm": 327.7435302734375, "learning_rate": 3.259942460822503e-05, "loss": 61.975, "step": 88690 }, { "epoch": 0.35835922381088975, "grad_norm": 985.4323120117188, "learning_rate": 3.2597255757520976e-05, "loss": 63.7146, "step": 88700 }, { "epoch": 0.3583996250762574, "grad_norm": 466.5003662109375, "learning_rate": 3.2595086661226943e-05, "loss": 54.2732, "step": 88710 }, { "epoch": 0.35844002634162503, "grad_norm": 750.10498046875, "learning_rate": 3.25929173193852e-05, "loss": 67.922, "step": 88720 }, { "epoch": 0.35848042760699267, "grad_norm": 617.0565795898438, "learning_rate": 3.259074773203806e-05, "loss": 68.2847, "step": 88730 }, { "epoch": 0.3585208288723603, "grad_norm": 519.65673828125, "learning_rate": 3.2588577899227814e-05, "loss": 90.388, "step": 88740 }, { "epoch": 0.3585612301377279, "grad_norm": 626.7796020507812, "learning_rate": 3.258640782099675e-05, "loss": 58.2107, "step": 88750 }, { "epoch": 0.35860163140309553, "grad_norm": 940.4680786132812, "learning_rate": 3.258423749738719e-05, "loss": 67.1222, "step": 88760 }, { "epoch": 0.3586420326684632, "grad_norm": 1334.208984375, "learning_rate": 3.258206692844145e-05, "loss": 53.6078, "step": 88770 }, { "epoch": 0.3586824339338308, "grad_norm": 1046.6566162109375, "learning_rate": 3.2579896114201826e-05, "loss": 76.479, "step": 88780 }, { "epoch": 0.35872283519919845, "grad_norm": 311.5898742675781, "learning_rate": 3.257772505471065e-05, "loss": 83.0157, "step": 88790 }, { "epoch": 0.3587632364645661, "grad_norm": 1352.649169921875, "learning_rate": 3.257555375001026e-05, "loss": 65.9161, "step": 88800 }, { "epoch": 0.3588036377299337, "grad_norm": 1243.841796875, "learning_rate": 3.257338220014297e-05, "loss": 72.3628, "step": 88810 }, { "epoch": 0.3588440389953013, "grad_norm": 3334.228759765625, "learning_rate": 3.257121040515112e-05, "loss": 106.4708, "step": 88820 }, { "epoch": 0.35888444026066896, "grad_norm": 927.7748413085938, "learning_rate": 3.2569038365077045e-05, "loss": 50.691, "step": 88830 }, { "epoch": 0.3589248415260366, "grad_norm": 657.2202758789062, "learning_rate": 3.2566866079963104e-05, "loss": 83.4183, "step": 88840 }, { "epoch": 0.35896524279140424, "grad_norm": 1189.534423828125, "learning_rate": 3.256469354985163e-05, "loss": 85.9154, "step": 88850 }, { "epoch": 0.3590056440567719, "grad_norm": 950.003662109375, "learning_rate": 3.2562520774785e-05, "loss": 76.2324, "step": 88860 }, { "epoch": 0.35904604532213946, "grad_norm": 284.270263671875, "learning_rate": 3.256034775480555e-05, "loss": 61.8749, "step": 88870 }, { "epoch": 0.3590864465875071, "grad_norm": 972.6361694335938, "learning_rate": 3.2558174489955656e-05, "loss": 74.3974, "step": 88880 }, { "epoch": 0.35912684785287474, "grad_norm": 525.1915893554688, "learning_rate": 3.2556000980277686e-05, "loss": 51.2244, "step": 88890 }, { "epoch": 0.3591672491182424, "grad_norm": 702.22412109375, "learning_rate": 3.255382722581401e-05, "loss": 52.7797, "step": 88900 }, { "epoch": 0.35920765038361, "grad_norm": 4037.07958984375, "learning_rate": 3.2551653226607016e-05, "loss": 89.1209, "step": 88910 }, { "epoch": 0.35924805164897766, "grad_norm": 710.5283203125, "learning_rate": 3.2549478982699074e-05, "loss": 53.0275, "step": 88920 }, { "epoch": 0.3592884529143453, "grad_norm": 548.4154663085938, "learning_rate": 3.254730449413258e-05, "loss": 66.1228, "step": 88930 }, { "epoch": 0.3593288541797129, "grad_norm": 722.6907348632812, "learning_rate": 3.2545129760949924e-05, "loss": 60.9345, "step": 88940 }, { "epoch": 0.3593692554450805, "grad_norm": 747.479248046875, "learning_rate": 3.254295478319351e-05, "loss": 55.8477, "step": 88950 }, { "epoch": 0.35940965671044817, "grad_norm": 924.00927734375, "learning_rate": 3.254077956090573e-05, "loss": 49.4122, "step": 88960 }, { "epoch": 0.3594500579758158, "grad_norm": 279.5837097167969, "learning_rate": 3.2538604094128994e-05, "loss": 82.6696, "step": 88970 }, { "epoch": 0.35949045924118345, "grad_norm": 784.2312622070312, "learning_rate": 3.253642838290572e-05, "loss": 88.6956, "step": 88980 }, { "epoch": 0.3595308605065511, "grad_norm": 1277.6805419921875, "learning_rate": 3.2534252427278316e-05, "loss": 48.7821, "step": 88990 }, { "epoch": 0.35957126177191867, "grad_norm": 835.9051513671875, "learning_rate": 3.253207622728921e-05, "loss": 64.9844, "step": 89000 }, { "epoch": 0.3596116630372863, "grad_norm": 326.860107421875, "learning_rate": 3.252989978298083e-05, "loss": 66.8233, "step": 89010 }, { "epoch": 0.35965206430265395, "grad_norm": 1021.7041015625, "learning_rate": 3.25277230943956e-05, "loss": 67.8086, "step": 89020 }, { "epoch": 0.3596924655680216, "grad_norm": 816.9478149414062, "learning_rate": 3.2525546161575964e-05, "loss": 71.5213, "step": 89030 }, { "epoch": 0.35973286683338923, "grad_norm": 746.73291015625, "learning_rate": 3.2523368984564345e-05, "loss": 58.0566, "step": 89040 }, { "epoch": 0.35977326809875687, "grad_norm": 604.9473876953125, "learning_rate": 3.252119156340321e-05, "loss": 63.2392, "step": 89050 }, { "epoch": 0.3598136693641245, "grad_norm": 431.6227111816406, "learning_rate": 3.2519013898134994e-05, "loss": 83.0441, "step": 89060 }, { "epoch": 0.3598540706294921, "grad_norm": 446.3559875488281, "learning_rate": 3.2516835988802155e-05, "loss": 57.1256, "step": 89070 }, { "epoch": 0.35989447189485974, "grad_norm": 688.2459106445312, "learning_rate": 3.251465783544716e-05, "loss": 53.325, "step": 89080 }, { "epoch": 0.3599348731602274, "grad_norm": 638.8283081054688, "learning_rate": 3.2512479438112464e-05, "loss": 42.3715, "step": 89090 }, { "epoch": 0.359975274425595, "grad_norm": 1033.72998046875, "learning_rate": 3.2510300796840546e-05, "loss": 73.0672, "step": 89100 }, { "epoch": 0.36001567569096266, "grad_norm": 1512.4454345703125, "learning_rate": 3.2508121911673866e-05, "loss": 63.4775, "step": 89110 }, { "epoch": 0.3600560769563303, "grad_norm": 584.1983642578125, "learning_rate": 3.250594278265491e-05, "loss": 57.5051, "step": 89120 }, { "epoch": 0.3600964782216979, "grad_norm": 0.0, "learning_rate": 3.250376340982616e-05, "loss": 62.77, "step": 89130 }, { "epoch": 0.3601368794870655, "grad_norm": 312.9271545410156, "learning_rate": 3.250158379323011e-05, "loss": 51.5549, "step": 89140 }, { "epoch": 0.36017728075243316, "grad_norm": 898.7651977539062, "learning_rate": 3.249940393290925e-05, "loss": 68.3641, "step": 89150 }, { "epoch": 0.3602176820178008, "grad_norm": 1063.464599609375, "learning_rate": 3.249722382890607e-05, "loss": 45.5237, "step": 89160 }, { "epoch": 0.36025808328316844, "grad_norm": 1576.947265625, "learning_rate": 3.249504348126308e-05, "loss": 54.5762, "step": 89170 }, { "epoch": 0.3602984845485361, "grad_norm": 738.2628784179688, "learning_rate": 3.249286289002278e-05, "loss": 52.6362, "step": 89180 }, { "epoch": 0.36033888581390366, "grad_norm": 977.0211181640625, "learning_rate": 3.2490682055227695e-05, "loss": 79.1429, "step": 89190 }, { "epoch": 0.3603792870792713, "grad_norm": 606.0816040039062, "learning_rate": 3.248850097692032e-05, "loss": 55.5943, "step": 89200 }, { "epoch": 0.36041968834463894, "grad_norm": 518.8965454101562, "learning_rate": 3.2486319655143196e-05, "loss": 78.7807, "step": 89210 }, { "epoch": 0.3604600896100066, "grad_norm": 314.8681335449219, "learning_rate": 3.248413808993884e-05, "loss": 48.6494, "step": 89220 }, { "epoch": 0.3605004908753742, "grad_norm": 808.1603393554688, "learning_rate": 3.248195628134979e-05, "loss": 55.6867, "step": 89230 }, { "epoch": 0.36054089214074186, "grad_norm": 549.8880004882812, "learning_rate": 3.2479774229418565e-05, "loss": 49.6709, "step": 89240 }, { "epoch": 0.3605812934061095, "grad_norm": 796.691650390625, "learning_rate": 3.247759193418773e-05, "loss": 58.077, "step": 89250 }, { "epoch": 0.3606216946714771, "grad_norm": 456.6281433105469, "learning_rate": 3.2475409395699805e-05, "loss": 61.6451, "step": 89260 }, { "epoch": 0.36066209593684473, "grad_norm": 348.8264465332031, "learning_rate": 3.2473226613997355e-05, "loss": 100.175, "step": 89270 }, { "epoch": 0.36070249720221237, "grad_norm": 778.9163208007812, "learning_rate": 3.247104358912293e-05, "loss": 92.9797, "step": 89280 }, { "epoch": 0.36074289846758, "grad_norm": 647.578857421875, "learning_rate": 3.2468860321119095e-05, "loss": 89.2201, "step": 89290 }, { "epoch": 0.36078329973294765, "grad_norm": 8475.5908203125, "learning_rate": 3.246667681002841e-05, "loss": 101.6309, "step": 89300 }, { "epoch": 0.3608237009983153, "grad_norm": 1095.387451171875, "learning_rate": 3.2464493055893436e-05, "loss": 79.6601, "step": 89310 }, { "epoch": 0.3608641022636829, "grad_norm": 345.1563720703125, "learning_rate": 3.246230905875675e-05, "loss": 59.279, "step": 89320 }, { "epoch": 0.3609045035290505, "grad_norm": 740.0067749023438, "learning_rate": 3.246012481866093e-05, "loss": 58.2936, "step": 89330 }, { "epoch": 0.36094490479441815, "grad_norm": 943.1046142578125, "learning_rate": 3.245794033564857e-05, "loss": 57.9797, "step": 89340 }, { "epoch": 0.3609853060597858, "grad_norm": 0.0, "learning_rate": 3.245575560976225e-05, "loss": 73.7864, "step": 89350 }, { "epoch": 0.36102570732515343, "grad_norm": 547.4214477539062, "learning_rate": 3.2453570641044565e-05, "loss": 47.4376, "step": 89360 }, { "epoch": 0.3610661085905211, "grad_norm": 746.9415283203125, "learning_rate": 3.24513854295381e-05, "loss": 37.7824, "step": 89370 }, { "epoch": 0.3611065098558887, "grad_norm": 581.8740234375, "learning_rate": 3.244919997528546e-05, "loss": 78.0141, "step": 89380 }, { "epoch": 0.3611469111212563, "grad_norm": 525.6729125976562, "learning_rate": 3.2447014278329275e-05, "loss": 65.2984, "step": 89390 }, { "epoch": 0.36118731238662394, "grad_norm": 1312.8070068359375, "learning_rate": 3.244482833871213e-05, "loss": 51.3537, "step": 89400 }, { "epoch": 0.3612277136519916, "grad_norm": 330.7095031738281, "learning_rate": 3.2442642156476653e-05, "loss": 35.5988, "step": 89410 }, { "epoch": 0.3612681149173592, "grad_norm": 744.0631103515625, "learning_rate": 3.244045573166545e-05, "loss": 57.9905, "step": 89420 }, { "epoch": 0.36130851618272686, "grad_norm": 5193.68212890625, "learning_rate": 3.243826906432117e-05, "loss": 76.9366, "step": 89430 }, { "epoch": 0.3613489174480945, "grad_norm": 1730.064697265625, "learning_rate": 3.2436082154486426e-05, "loss": 84.2516, "step": 89440 }, { "epoch": 0.3613893187134621, "grad_norm": 442.8031921386719, "learning_rate": 3.243389500220386e-05, "loss": 53.5648, "step": 89450 }, { "epoch": 0.3614297199788297, "grad_norm": 1522.3472900390625, "learning_rate": 3.243170760751611e-05, "loss": 88.243, "step": 89460 }, { "epoch": 0.36147012124419736, "grad_norm": 490.6719055175781, "learning_rate": 3.242951997046581e-05, "loss": 71.2257, "step": 89470 }, { "epoch": 0.361510522509565, "grad_norm": 0.0, "learning_rate": 3.242733209109563e-05, "loss": 46.0571, "step": 89480 }, { "epoch": 0.36155092377493264, "grad_norm": 703.7340698242188, "learning_rate": 3.242514396944821e-05, "loss": 40.7495, "step": 89490 }, { "epoch": 0.3615913250403003, "grad_norm": 754.905029296875, "learning_rate": 3.242295560556621e-05, "loss": 80.2023, "step": 89500 }, { "epoch": 0.36163172630566787, "grad_norm": 1635.7506103515625, "learning_rate": 3.24207669994923e-05, "loss": 70.7424, "step": 89510 }, { "epoch": 0.3616721275710355, "grad_norm": 0.0, "learning_rate": 3.2418578151269135e-05, "loss": 70.3404, "step": 89520 }, { "epoch": 0.36171252883640315, "grad_norm": 793.4153442382812, "learning_rate": 3.24163890609394e-05, "loss": 74.2491, "step": 89530 }, { "epoch": 0.3617529301017708, "grad_norm": 949.236083984375, "learning_rate": 3.2414199728545767e-05, "loss": 48.3229, "step": 89540 }, { "epoch": 0.3617933313671384, "grad_norm": 552.69091796875, "learning_rate": 3.2412010154130914e-05, "loss": 50.1861, "step": 89550 }, { "epoch": 0.36183373263250607, "grad_norm": 2303.78564453125, "learning_rate": 3.240982033773754e-05, "loss": 91.6548, "step": 89560 }, { "epoch": 0.3618741338978737, "grad_norm": 279.16015625, "learning_rate": 3.2407630279408326e-05, "loss": 32.6592, "step": 89570 }, { "epoch": 0.3619145351632413, "grad_norm": 892.2996826171875, "learning_rate": 3.240543997918598e-05, "loss": 59.1878, "step": 89580 }, { "epoch": 0.36195493642860893, "grad_norm": 757.2686767578125, "learning_rate": 3.240324943711318e-05, "loss": 56.4908, "step": 89590 }, { "epoch": 0.36199533769397657, "grad_norm": 304.4239196777344, "learning_rate": 3.240105865323266e-05, "loss": 34.8889, "step": 89600 }, { "epoch": 0.3620357389593442, "grad_norm": 532.8265380859375, "learning_rate": 3.23988676275871e-05, "loss": 107.3717, "step": 89610 }, { "epoch": 0.36207614022471185, "grad_norm": 1257.88720703125, "learning_rate": 3.239667636021925e-05, "loss": 75.3513, "step": 89620 }, { "epoch": 0.3621165414900795, "grad_norm": 907.2530517578125, "learning_rate": 3.239448485117181e-05, "loss": 60.0257, "step": 89630 }, { "epoch": 0.3621569427554471, "grad_norm": 495.6878662109375, "learning_rate": 3.23922931004875e-05, "loss": 95.7158, "step": 89640 }, { "epoch": 0.3621973440208147, "grad_norm": 723.4539184570312, "learning_rate": 3.239010110820906e-05, "loss": 50.1088, "step": 89650 }, { "epoch": 0.36223774528618236, "grad_norm": 611.675537109375, "learning_rate": 3.2387908874379213e-05, "loss": 60.4733, "step": 89660 }, { "epoch": 0.36227814655155, "grad_norm": 960.843994140625, "learning_rate": 3.2385716399040706e-05, "loss": 63.4884, "step": 89670 }, { "epoch": 0.36231854781691764, "grad_norm": 2095.677734375, "learning_rate": 3.238352368223629e-05, "loss": 81.218, "step": 89680 }, { "epoch": 0.3623589490822853, "grad_norm": 748.8034057617188, "learning_rate": 3.23813307240087e-05, "loss": 87.9378, "step": 89690 }, { "epoch": 0.3623993503476529, "grad_norm": 858.1019287109375, "learning_rate": 3.23791375244007e-05, "loss": 51.7633, "step": 89700 }, { "epoch": 0.3624397516130205, "grad_norm": 553.71826171875, "learning_rate": 3.237694408345503e-05, "loss": 80.8901, "step": 89710 }, { "epoch": 0.36248015287838814, "grad_norm": 741.5399780273438, "learning_rate": 3.2374750401214466e-05, "loss": 54.558, "step": 89720 }, { "epoch": 0.3625205541437558, "grad_norm": 545.9320678710938, "learning_rate": 3.2372556477721766e-05, "loss": 50.4829, "step": 89730 }, { "epoch": 0.3625609554091234, "grad_norm": 1244.79052734375, "learning_rate": 3.2370362313019725e-05, "loss": 66.7102, "step": 89740 }, { "epoch": 0.36260135667449106, "grad_norm": 1156.6531982421875, "learning_rate": 3.2368167907151086e-05, "loss": 58.8571, "step": 89750 }, { "epoch": 0.3626417579398587, "grad_norm": 1276.3740234375, "learning_rate": 3.236597326015865e-05, "loss": 72.6798, "step": 89760 }, { "epoch": 0.3626821592052263, "grad_norm": 768.4413452148438, "learning_rate": 3.23637783720852e-05, "loss": 59.6834, "step": 89770 }, { "epoch": 0.3627225604705939, "grad_norm": 723.568359375, "learning_rate": 3.236158324297353e-05, "loss": 60.3245, "step": 89780 }, { "epoch": 0.36276296173596156, "grad_norm": 753.067626953125, "learning_rate": 3.235938787286642e-05, "loss": 52.7935, "step": 89790 }, { "epoch": 0.3628033630013292, "grad_norm": 1123.5992431640625, "learning_rate": 3.235719226180669e-05, "loss": 43.8822, "step": 89800 }, { "epoch": 0.36284376426669684, "grad_norm": 841.1117553710938, "learning_rate": 3.2354996409837136e-05, "loss": 83.7583, "step": 89810 }, { "epoch": 0.3628841655320645, "grad_norm": 758.0658569335938, "learning_rate": 3.2352800317000555e-05, "loss": 61.8449, "step": 89820 }, { "epoch": 0.36292456679743207, "grad_norm": 745.8280639648438, "learning_rate": 3.235060398333978e-05, "loss": 51.7937, "step": 89830 }, { "epoch": 0.3629649680627997, "grad_norm": 0.0, "learning_rate": 3.234840740889762e-05, "loss": 60.1308, "step": 89840 }, { "epoch": 0.36300536932816735, "grad_norm": 422.6933288574219, "learning_rate": 3.23462105937169e-05, "loss": 53.8212, "step": 89850 }, { "epoch": 0.363045770593535, "grad_norm": 954.4671630859375, "learning_rate": 3.234401353784045e-05, "loss": 69.0127, "step": 89860 }, { "epoch": 0.36308617185890263, "grad_norm": 661.1810913085938, "learning_rate": 3.23418162413111e-05, "loss": 50.7474, "step": 89870 }, { "epoch": 0.36312657312427027, "grad_norm": 997.407958984375, "learning_rate": 3.233961870417169e-05, "loss": 88.6371, "step": 89880 }, { "epoch": 0.3631669743896379, "grad_norm": 502.9741516113281, "learning_rate": 3.233742092646506e-05, "loss": 42.5384, "step": 89890 }, { "epoch": 0.3632073756550055, "grad_norm": 779.2553100585938, "learning_rate": 3.2335222908234054e-05, "loss": 98.033, "step": 89900 }, { "epoch": 0.36324777692037313, "grad_norm": 1040.43359375, "learning_rate": 3.233302464952153e-05, "loss": 73.3634, "step": 89910 }, { "epoch": 0.3632881781857408, "grad_norm": 663.7528076171875, "learning_rate": 3.233082615037034e-05, "loss": 69.4142, "step": 89920 }, { "epoch": 0.3633285794511084, "grad_norm": 447.36468505859375, "learning_rate": 3.232862741082335e-05, "loss": 47.4241, "step": 89930 }, { "epoch": 0.36336898071647605, "grad_norm": 1314.697509765625, "learning_rate": 3.232642843092341e-05, "loss": 47.8006, "step": 89940 }, { "epoch": 0.3634093819818437, "grad_norm": 783.3645629882812, "learning_rate": 3.232422921071341e-05, "loss": 78.2489, "step": 89950 }, { "epoch": 0.3634497832472113, "grad_norm": 571.39111328125, "learning_rate": 3.2322029750236224e-05, "loss": 60.6246, "step": 89960 }, { "epoch": 0.3634901845125789, "grad_norm": 573.41259765625, "learning_rate": 3.2319830049534714e-05, "loss": 64.5672, "step": 89970 }, { "epoch": 0.36353058577794656, "grad_norm": 651.1929931640625, "learning_rate": 3.2317630108651775e-05, "loss": 82.1077, "step": 89980 }, { "epoch": 0.3635709870433142, "grad_norm": 841.3027954101562, "learning_rate": 3.231542992763029e-05, "loss": 48.8317, "step": 89990 }, { "epoch": 0.36361138830868184, "grad_norm": 1385.8458251953125, "learning_rate": 3.2313229506513167e-05, "loss": 82.3096, "step": 90000 }, { "epoch": 0.3636517895740495, "grad_norm": 633.955078125, "learning_rate": 3.231102884534329e-05, "loss": 83.0627, "step": 90010 }, { "epoch": 0.3636921908394171, "grad_norm": 873.541259765625, "learning_rate": 3.2308827944163576e-05, "loss": 82.5397, "step": 90020 }, { "epoch": 0.3637325921047847, "grad_norm": 738.3284912109375, "learning_rate": 3.230662680301692e-05, "loss": 110.8826, "step": 90030 }, { "epoch": 0.36377299337015234, "grad_norm": 799.2305297851562, "learning_rate": 3.2304425421946234e-05, "loss": 63.8086, "step": 90040 }, { "epoch": 0.36381339463552, "grad_norm": 1250.10400390625, "learning_rate": 3.230222380099445e-05, "loss": 78.0658, "step": 90050 }, { "epoch": 0.3638537959008876, "grad_norm": 1647.9473876953125, "learning_rate": 3.230002194020447e-05, "loss": 83.1571, "step": 90060 }, { "epoch": 0.36389419716625526, "grad_norm": 606.3142700195312, "learning_rate": 3.229781983961923e-05, "loss": 58.3433, "step": 90070 }, { "epoch": 0.3639345984316229, "grad_norm": 836.625244140625, "learning_rate": 3.229561749928166e-05, "loss": 60.6996, "step": 90080 }, { "epoch": 0.3639749996969905, "grad_norm": 459.7673034667969, "learning_rate": 3.22934149192347e-05, "loss": 55.3213, "step": 90090 }, { "epoch": 0.3640154009623581, "grad_norm": 681.5406494140625, "learning_rate": 3.229121209952129e-05, "loss": 57.4772, "step": 90100 }, { "epoch": 0.36405580222772577, "grad_norm": 593.47998046875, "learning_rate": 3.2289009040184375e-05, "loss": 69.6571, "step": 90110 }, { "epoch": 0.3640962034930934, "grad_norm": 646.6085205078125, "learning_rate": 3.2286805741266895e-05, "loss": 104.5698, "step": 90120 }, { "epoch": 0.36413660475846105, "grad_norm": 870.090576171875, "learning_rate": 3.228460220281181e-05, "loss": 81.7248, "step": 90130 }, { "epoch": 0.3641770060238287, "grad_norm": 647.8412475585938, "learning_rate": 3.2282398424862086e-05, "loss": 61.7405, "step": 90140 }, { "epoch": 0.36421740728919627, "grad_norm": 1034.1387939453125, "learning_rate": 3.228019440746068e-05, "loss": 60.8793, "step": 90150 }, { "epoch": 0.3642578085545639, "grad_norm": 603.2088012695312, "learning_rate": 3.2277990150650554e-05, "loss": 57.9373, "step": 90160 }, { "epoch": 0.36429820981993155, "grad_norm": 651.9238891601562, "learning_rate": 3.22757856544747e-05, "loss": 61.3861, "step": 90170 }, { "epoch": 0.3643386110852992, "grad_norm": 821.35009765625, "learning_rate": 3.227358091897608e-05, "loss": 91.5335, "step": 90180 }, { "epoch": 0.36437901235066683, "grad_norm": 570.3350830078125, "learning_rate": 3.227137594419768e-05, "loss": 53.6288, "step": 90190 }, { "epoch": 0.36441941361603447, "grad_norm": 1010.9404296875, "learning_rate": 3.2269170730182486e-05, "loss": 61.6317, "step": 90200 }, { "epoch": 0.3644598148814021, "grad_norm": 1174.55126953125, "learning_rate": 3.22669652769735e-05, "loss": 41.7424, "step": 90210 }, { "epoch": 0.3645002161467697, "grad_norm": 1207.1864013671875, "learning_rate": 3.22647595846137e-05, "loss": 73.6048, "step": 90220 }, { "epoch": 0.36454061741213734, "grad_norm": 532.3197631835938, "learning_rate": 3.2262553653146106e-05, "loss": 40.4853, "step": 90230 }, { "epoch": 0.364581018677505, "grad_norm": 310.5439453125, "learning_rate": 3.2260347482613714e-05, "loss": 57.3422, "step": 90240 }, { "epoch": 0.3646214199428726, "grad_norm": 915.779052734375, "learning_rate": 3.2258141073059533e-05, "loss": 89.975, "step": 90250 }, { "epoch": 0.36466182120824026, "grad_norm": 483.0838928222656, "learning_rate": 3.225593442452658e-05, "loss": 68.5153, "step": 90260 }, { "epoch": 0.3647022224736079, "grad_norm": 965.7793579101562, "learning_rate": 3.225372753705788e-05, "loss": 66.1839, "step": 90270 }, { "epoch": 0.3647426237389755, "grad_norm": 809.3526000976562, "learning_rate": 3.225152041069645e-05, "loss": 91.8366, "step": 90280 }, { "epoch": 0.3647830250043431, "grad_norm": 659.0712890625, "learning_rate": 3.224931304548532e-05, "loss": 66.1223, "step": 90290 }, { "epoch": 0.36482342626971076, "grad_norm": 1612.8360595703125, "learning_rate": 3.224710544146753e-05, "loss": 74.4651, "step": 90300 }, { "epoch": 0.3648638275350784, "grad_norm": 1003.6226196289062, "learning_rate": 3.224489759868612e-05, "loss": 49.3529, "step": 90310 }, { "epoch": 0.36490422880044604, "grad_norm": 2766.993896484375, "learning_rate": 3.224268951718411e-05, "loss": 77.647, "step": 90320 }, { "epoch": 0.3649446300658137, "grad_norm": 2475.29443359375, "learning_rate": 3.224048119700458e-05, "loss": 107.4289, "step": 90330 }, { "epoch": 0.3649850313311813, "grad_norm": 1772.4407958984375, "learning_rate": 3.223827263819056e-05, "loss": 81.8948, "step": 90340 }, { "epoch": 0.3650254325965489, "grad_norm": 1427.0467529296875, "learning_rate": 3.223606384078512e-05, "loss": 79.2289, "step": 90350 }, { "epoch": 0.36506583386191654, "grad_norm": 765.2987060546875, "learning_rate": 3.223385480483131e-05, "loss": 87.3908, "step": 90360 }, { "epoch": 0.3651062351272842, "grad_norm": 445.6130676269531, "learning_rate": 3.223164553037221e-05, "loss": 45.1544, "step": 90370 }, { "epoch": 0.3651466363926518, "grad_norm": 2689.060302734375, "learning_rate": 3.2229436017450876e-05, "loss": 61.4555, "step": 90380 }, { "epoch": 0.36518703765801946, "grad_norm": 5151.96533203125, "learning_rate": 3.222722626611039e-05, "loss": 94.6698, "step": 90390 }, { "epoch": 0.3652274389233871, "grad_norm": 1440.62353515625, "learning_rate": 3.222501627639384e-05, "loss": 77.1921, "step": 90400 }, { "epoch": 0.3652678401887547, "grad_norm": 547.9785766601562, "learning_rate": 3.222280604834429e-05, "loss": 52.1105, "step": 90410 }, { "epoch": 0.36530824145412233, "grad_norm": 355.76617431640625, "learning_rate": 3.222059558200486e-05, "loss": 94.4201, "step": 90420 }, { "epoch": 0.36534864271948997, "grad_norm": 989.900146484375, "learning_rate": 3.221838487741862e-05, "loss": 41.2473, "step": 90430 }, { "epoch": 0.3653890439848576, "grad_norm": 1165.8541259765625, "learning_rate": 3.2216173934628674e-05, "loss": 55.0149, "step": 90440 }, { "epoch": 0.36542944525022525, "grad_norm": 914.6465454101562, "learning_rate": 3.221396275367813e-05, "loss": 74.3536, "step": 90450 }, { "epoch": 0.3654698465155929, "grad_norm": 425.96435546875, "learning_rate": 3.2211751334610094e-05, "loss": 69.3202, "step": 90460 }, { "epoch": 0.3655102477809605, "grad_norm": 267.4327392578125, "learning_rate": 3.220953967746768e-05, "loss": 74.8355, "step": 90470 }, { "epoch": 0.3655506490463281, "grad_norm": 327.8559265136719, "learning_rate": 3.2207327782294e-05, "loss": 85.0147, "step": 90480 }, { "epoch": 0.36559105031169575, "grad_norm": 381.49578857421875, "learning_rate": 3.2205115649132185e-05, "loss": 65.0611, "step": 90490 }, { "epoch": 0.3656314515770634, "grad_norm": 960.8960571289062, "learning_rate": 3.220290327802536e-05, "loss": 63.5097, "step": 90500 }, { "epoch": 0.36567185284243103, "grad_norm": 415.2254333496094, "learning_rate": 3.2200690669016645e-05, "loss": 65.3691, "step": 90510 }, { "epoch": 0.3657122541077987, "grad_norm": 361.4107666015625, "learning_rate": 3.219847782214918e-05, "loss": 42.4056, "step": 90520 }, { "epoch": 0.3657526553731663, "grad_norm": 503.8821105957031, "learning_rate": 3.219626473746613e-05, "loss": 71.6832, "step": 90530 }, { "epoch": 0.3657930566385339, "grad_norm": 802.1369018554688, "learning_rate": 3.21940514150106e-05, "loss": 50.1877, "step": 90540 }, { "epoch": 0.36583345790390154, "grad_norm": 2277.003173828125, "learning_rate": 3.2191837854825766e-05, "loss": 75.0608, "step": 90550 }, { "epoch": 0.3658738591692692, "grad_norm": 1255.205322265625, "learning_rate": 3.218962405695478e-05, "loss": 61.8157, "step": 90560 }, { "epoch": 0.3659142604346368, "grad_norm": 426.26446533203125, "learning_rate": 3.2187410021440786e-05, "loss": 64.9316, "step": 90570 }, { "epoch": 0.36595466170000446, "grad_norm": 1173.32763671875, "learning_rate": 3.218519574832697e-05, "loss": 69.6508, "step": 90580 }, { "epoch": 0.3659950629653721, "grad_norm": 1251.942138671875, "learning_rate": 3.218298123765649e-05, "loss": 63.1345, "step": 90590 }, { "epoch": 0.3660354642307397, "grad_norm": 0.0, "learning_rate": 3.218076648947251e-05, "loss": 35.3188, "step": 90600 }, { "epoch": 0.3660758654961073, "grad_norm": 531.1837768554688, "learning_rate": 3.217855150381822e-05, "loss": 65.0267, "step": 90610 }, { "epoch": 0.36611626676147496, "grad_norm": 924.5548706054688, "learning_rate": 3.217633628073681e-05, "loss": 60.6692, "step": 90620 }, { "epoch": 0.3661566680268426, "grad_norm": 990.2822875976562, "learning_rate": 3.217412082027144e-05, "loss": 60.5045, "step": 90630 }, { "epoch": 0.36619706929221024, "grad_norm": 969.4849243164062, "learning_rate": 3.217190512246532e-05, "loss": 89.893, "step": 90640 }, { "epoch": 0.3662374705575779, "grad_norm": 575.37255859375, "learning_rate": 3.216968918736164e-05, "loss": 61.3946, "step": 90650 }, { "epoch": 0.3662778718229455, "grad_norm": 370.7169189453125, "learning_rate": 3.2167473015003616e-05, "loss": 40.7734, "step": 90660 }, { "epoch": 0.3663182730883131, "grad_norm": 859.6712646484375, "learning_rate": 3.216525660543444e-05, "loss": 60.6101, "step": 90670 }, { "epoch": 0.36635867435368075, "grad_norm": 460.1180419921875, "learning_rate": 3.216303995869731e-05, "loss": 74.8931, "step": 90680 }, { "epoch": 0.3663990756190484, "grad_norm": 522.7005004882812, "learning_rate": 3.2160823074835464e-05, "loss": 67.9626, "step": 90690 }, { "epoch": 0.366439476884416, "grad_norm": 645.8761596679688, "learning_rate": 3.215860595389211e-05, "loss": 65.7363, "step": 90700 }, { "epoch": 0.36647987814978367, "grad_norm": 436.8887634277344, "learning_rate": 3.215638859591048e-05, "loss": 99.0599, "step": 90710 }, { "epoch": 0.3665202794151513, "grad_norm": 783.563720703125, "learning_rate": 3.215417100093378e-05, "loss": 73.7595, "step": 90720 }, { "epoch": 0.3665606806805189, "grad_norm": 1526.1766357421875, "learning_rate": 3.215195316900527e-05, "loss": 80.5374, "step": 90730 }, { "epoch": 0.36660108194588653, "grad_norm": 1934.50048828125, "learning_rate": 3.2149735100168176e-05, "loss": 62.5631, "step": 90740 }, { "epoch": 0.36664148321125417, "grad_norm": 376.3502197265625, "learning_rate": 3.214751679446574e-05, "loss": 86.4771, "step": 90750 }, { "epoch": 0.3666818844766218, "grad_norm": 0.0, "learning_rate": 3.214529825194121e-05, "loss": 81.0698, "step": 90760 }, { "epoch": 0.36672228574198945, "grad_norm": 641.5331420898438, "learning_rate": 3.214307947263783e-05, "loss": 64.7252, "step": 90770 }, { "epoch": 0.3667626870073571, "grad_norm": 759.74755859375, "learning_rate": 3.2140860456598877e-05, "loss": 49.2482, "step": 90780 }, { "epoch": 0.3668030882727247, "grad_norm": 2156.34423828125, "learning_rate": 3.213864120386759e-05, "loss": 52.424, "step": 90790 }, { "epoch": 0.3668434895380923, "grad_norm": 510.70782470703125, "learning_rate": 3.213642171448725e-05, "loss": 36.5959, "step": 90800 }, { "epoch": 0.36688389080345996, "grad_norm": 630.8547973632812, "learning_rate": 3.213420198850111e-05, "loss": 49.5609, "step": 90810 }, { "epoch": 0.3669242920688276, "grad_norm": 1101.6739501953125, "learning_rate": 3.213198202595247e-05, "loss": 53.2187, "step": 90820 }, { "epoch": 0.36696469333419524, "grad_norm": 1123.56494140625, "learning_rate": 3.212976182688458e-05, "loss": 55.8884, "step": 90830 }, { "epoch": 0.3670050945995629, "grad_norm": 814.5668334960938, "learning_rate": 3.212754139134075e-05, "loss": 74.1858, "step": 90840 }, { "epoch": 0.3670454958649305, "grad_norm": 850.3318481445312, "learning_rate": 3.212532071936425e-05, "loss": 68.7365, "step": 90850 }, { "epoch": 0.3670858971302981, "grad_norm": 656.654541015625, "learning_rate": 3.2123099810998385e-05, "loss": 48.3398, "step": 90860 }, { "epoch": 0.36712629839566574, "grad_norm": 448.7389831542969, "learning_rate": 3.212087866628644e-05, "loss": 69.4263, "step": 90870 }, { "epoch": 0.3671666996610334, "grad_norm": 930.092529296875, "learning_rate": 3.211865728527173e-05, "loss": 59.9901, "step": 90880 }, { "epoch": 0.367207100926401, "grad_norm": 423.372314453125, "learning_rate": 3.211643566799756e-05, "loss": 47.8891, "step": 90890 }, { "epoch": 0.36724750219176866, "grad_norm": 634.6875610351562, "learning_rate": 3.2114213814507235e-05, "loss": 25.0227, "step": 90900 }, { "epoch": 0.3672879034571363, "grad_norm": 858.253662109375, "learning_rate": 3.211199172484407e-05, "loss": 72.241, "step": 90910 }, { "epoch": 0.3673283047225039, "grad_norm": 840.0797119140625, "learning_rate": 3.21097693990514e-05, "loss": 46.978, "step": 90920 }, { "epoch": 0.3673687059878715, "grad_norm": 1786.7987060546875, "learning_rate": 3.210754683717253e-05, "loss": 72.6414, "step": 90930 }, { "epoch": 0.36740910725323916, "grad_norm": 0.0, "learning_rate": 3.2105324039250814e-05, "loss": 52.4552, "step": 90940 }, { "epoch": 0.3674495085186068, "grad_norm": 987.6989135742188, "learning_rate": 3.210310100532956e-05, "loss": 77.9075, "step": 90950 }, { "epoch": 0.36748990978397444, "grad_norm": 611.7899169921875, "learning_rate": 3.210087773545214e-05, "loss": 45.6186, "step": 90960 }, { "epoch": 0.3675303110493421, "grad_norm": 459.25738525390625, "learning_rate": 3.209865422966186e-05, "loss": 57.6455, "step": 90970 }, { "epoch": 0.3675707123147097, "grad_norm": 723.4088745117188, "learning_rate": 3.20964304880021e-05, "loss": 54.5551, "step": 90980 }, { "epoch": 0.3676111135800773, "grad_norm": 784.4147338867188, "learning_rate": 3.209420651051619e-05, "loss": 71.0863, "step": 90990 }, { "epoch": 0.36765151484544495, "grad_norm": 479.0218200683594, "learning_rate": 3.2091982297247505e-05, "loss": 55.232, "step": 91000 }, { "epoch": 0.3676919161108126, "grad_norm": 887.7474365234375, "learning_rate": 3.2089757848239395e-05, "loss": 76.9433, "step": 91010 }, { "epoch": 0.36773231737618023, "grad_norm": 562.9622802734375, "learning_rate": 3.208753316353523e-05, "loss": 68.3854, "step": 91020 }, { "epoch": 0.36777271864154787, "grad_norm": 478.1982421875, "learning_rate": 3.2085308243178386e-05, "loss": 61.2692, "step": 91030 }, { "epoch": 0.3678131199069155, "grad_norm": 432.77459716796875, "learning_rate": 3.208308308721224e-05, "loss": 95.8793, "step": 91040 }, { "epoch": 0.3678535211722831, "grad_norm": 684.714111328125, "learning_rate": 3.2080857695680156e-05, "loss": 97.4408, "step": 91050 }, { "epoch": 0.36789392243765073, "grad_norm": 766.7606201171875, "learning_rate": 3.2078632068625534e-05, "loss": 86.9324, "step": 91060 }, { "epoch": 0.3679343237030184, "grad_norm": 475.80517578125, "learning_rate": 3.207640620609177e-05, "loss": 51.0135, "step": 91070 }, { "epoch": 0.367974724968386, "grad_norm": 651.9415283203125, "learning_rate": 3.2074180108122255e-05, "loss": 56.6108, "step": 91080 }, { "epoch": 0.36801512623375365, "grad_norm": 604.5901489257812, "learning_rate": 3.207195377476037e-05, "loss": 64.7571, "step": 91090 }, { "epoch": 0.3680555274991213, "grad_norm": 940.1326293945312, "learning_rate": 3.2069727206049536e-05, "loss": 41.8473, "step": 91100 }, { "epoch": 0.3680959287644889, "grad_norm": 799.6327514648438, "learning_rate": 3.2067500402033154e-05, "loss": 65.0775, "step": 91110 }, { "epoch": 0.3681363300298565, "grad_norm": 275.72406005859375, "learning_rate": 3.206527336275464e-05, "loss": 106.8737, "step": 91120 }, { "epoch": 0.36817673129522416, "grad_norm": 1342.920166015625, "learning_rate": 3.206304608825741e-05, "loss": 68.7097, "step": 91130 }, { "epoch": 0.3682171325605918, "grad_norm": 627.02783203125, "learning_rate": 3.206081857858489e-05, "loss": 65.4008, "step": 91140 }, { "epoch": 0.36825753382595944, "grad_norm": 1053.357666015625, "learning_rate": 3.20585908337805e-05, "loss": 44.5238, "step": 91150 }, { "epoch": 0.3682979350913271, "grad_norm": 383.9593200683594, "learning_rate": 3.205636285388767e-05, "loss": 53.7099, "step": 91160 }, { "epoch": 0.3683383363566947, "grad_norm": 287.8760986328125, "learning_rate": 3.205413463894984e-05, "loss": 78.2327, "step": 91170 }, { "epoch": 0.3683787376220623, "grad_norm": 694.3605346679688, "learning_rate": 3.2051906189010456e-05, "loss": 77.24, "step": 91180 }, { "epoch": 0.36841913888742994, "grad_norm": 893.4785766601562, "learning_rate": 3.204967750411295e-05, "loss": 40.0852, "step": 91190 }, { "epoch": 0.3684595401527976, "grad_norm": 1242.676513671875, "learning_rate": 3.2047448584300775e-05, "loss": 56.3566, "step": 91200 }, { "epoch": 0.3684999414181652, "grad_norm": 453.7666320800781, "learning_rate": 3.204521942961739e-05, "loss": 51.7536, "step": 91210 }, { "epoch": 0.36854034268353286, "grad_norm": 752.3657836914062, "learning_rate": 3.2042990040106255e-05, "loss": 67.9474, "step": 91220 }, { "epoch": 0.3685807439489005, "grad_norm": 0.0, "learning_rate": 3.204076041581082e-05, "loss": 59.873, "step": 91230 }, { "epoch": 0.3686211452142681, "grad_norm": 923.444580078125, "learning_rate": 3.2038530556774574e-05, "loss": 54.3655, "step": 91240 }, { "epoch": 0.3686615464796357, "grad_norm": 761.899169921875, "learning_rate": 3.203630046304097e-05, "loss": 65.4671, "step": 91250 }, { "epoch": 0.36870194774500337, "grad_norm": 309.4624938964844, "learning_rate": 3.203407013465349e-05, "loss": 66.5682, "step": 91260 }, { "epoch": 0.368742349010371, "grad_norm": 670.3491821289062, "learning_rate": 3.2031839571655625e-05, "loss": 87.206, "step": 91270 }, { "epoch": 0.36878275027573865, "grad_norm": 915.1228637695312, "learning_rate": 3.2029608774090845e-05, "loss": 71.0715, "step": 91280 }, { "epoch": 0.3688231515411063, "grad_norm": 728.1142578125, "learning_rate": 3.202737774200265e-05, "loss": 76.3794, "step": 91290 }, { "epoch": 0.36886355280647387, "grad_norm": 1065.0478515625, "learning_rate": 3.202514647543454e-05, "loss": 54.8924, "step": 91300 }, { "epoch": 0.3689039540718415, "grad_norm": 516.5827026367188, "learning_rate": 3.2022914974429995e-05, "loss": 67.8348, "step": 91310 }, { "epoch": 0.36894435533720915, "grad_norm": 794.9502563476562, "learning_rate": 3.2020683239032545e-05, "loss": 55.1017, "step": 91320 }, { "epoch": 0.3689847566025768, "grad_norm": 1035.4910888671875, "learning_rate": 3.2018451269285677e-05, "loss": 75.9056, "step": 91330 }, { "epoch": 0.36902515786794443, "grad_norm": 586.384765625, "learning_rate": 3.201621906523293e-05, "loss": 46.815, "step": 91340 }, { "epoch": 0.36906555913331207, "grad_norm": 2527.49609375, "learning_rate": 3.201398662691779e-05, "loss": 98.0505, "step": 91350 }, { "epoch": 0.3691059603986797, "grad_norm": 678.8090209960938, "learning_rate": 3.2011753954383805e-05, "loss": 64.6653, "step": 91360 }, { "epoch": 0.3691463616640473, "grad_norm": 695.07470703125, "learning_rate": 3.200952104767448e-05, "loss": 39.9693, "step": 91370 }, { "epoch": 0.36918676292941494, "grad_norm": 322.0420227050781, "learning_rate": 3.200728790683338e-05, "loss": 67.0857, "step": 91380 }, { "epoch": 0.3692271641947826, "grad_norm": 729.5950317382812, "learning_rate": 3.2005054531904006e-05, "loss": 87.5696, "step": 91390 }, { "epoch": 0.3692675654601502, "grad_norm": 509.17108154296875, "learning_rate": 3.200282092292991e-05, "loss": 64.9555, "step": 91400 }, { "epoch": 0.36930796672551786, "grad_norm": 1967.5986328125, "learning_rate": 3.200058707995465e-05, "loss": 58.8442, "step": 91410 }, { "epoch": 0.3693483679908855, "grad_norm": 914.3970947265625, "learning_rate": 3.1998353003021766e-05, "loss": 38.4076, "step": 91420 }, { "epoch": 0.3693887692562531, "grad_norm": 810.2129516601562, "learning_rate": 3.199611869217481e-05, "loss": 66.5225, "step": 91430 }, { "epoch": 0.3694291705216207, "grad_norm": 1145.6221923828125, "learning_rate": 3.1993884147457345e-05, "loss": 62.7686, "step": 91440 }, { "epoch": 0.36946957178698836, "grad_norm": 496.845703125, "learning_rate": 3.199164936891293e-05, "loss": 87.6221, "step": 91450 }, { "epoch": 0.369509973052356, "grad_norm": 420.1404113769531, "learning_rate": 3.198941435658514e-05, "loss": 56.4746, "step": 91460 }, { "epoch": 0.36955037431772364, "grad_norm": 629.4871826171875, "learning_rate": 3.1987179110517546e-05, "loss": 56.3853, "step": 91470 }, { "epoch": 0.3695907755830913, "grad_norm": 625.9359741210938, "learning_rate": 3.198494363075372e-05, "loss": 75.2549, "step": 91480 }, { "epoch": 0.3696311768484589, "grad_norm": 868.8189086914062, "learning_rate": 3.198270791733725e-05, "loss": 68.2451, "step": 91490 }, { "epoch": 0.3696715781138265, "grad_norm": 607.7655029296875, "learning_rate": 3.198047197031172e-05, "loss": 69.5034, "step": 91500 }, { "epoch": 0.36971197937919414, "grad_norm": 1387.5396728515625, "learning_rate": 3.197823578972072e-05, "loss": 85.6114, "step": 91510 }, { "epoch": 0.3697523806445618, "grad_norm": 464.6197814941406, "learning_rate": 3.1975999375607854e-05, "loss": 70.0409, "step": 91520 }, { "epoch": 0.3697927819099294, "grad_norm": 885.7677001953125, "learning_rate": 3.1973762728016705e-05, "loss": 77.555, "step": 91530 }, { "epoch": 0.36983318317529706, "grad_norm": 1390.1436767578125, "learning_rate": 3.1971525846990886e-05, "loss": 101.7281, "step": 91540 }, { "epoch": 0.3698735844406647, "grad_norm": 598.4069213867188, "learning_rate": 3.1969288732574015e-05, "loss": 69.8319, "step": 91550 }, { "epoch": 0.3699139857060323, "grad_norm": 491.0005187988281, "learning_rate": 3.196705138480969e-05, "loss": 46.9911, "step": 91560 }, { "epoch": 0.36995438697139993, "grad_norm": 2121.340576171875, "learning_rate": 3.196481380374154e-05, "loss": 70.583, "step": 91570 }, { "epoch": 0.36999478823676757, "grad_norm": 686.2039184570312, "learning_rate": 3.1962575989413185e-05, "loss": 71.3919, "step": 91580 }, { "epoch": 0.3700351895021352, "grad_norm": 1019.8297119140625, "learning_rate": 3.196033794186826e-05, "loss": 41.0625, "step": 91590 }, { "epoch": 0.37007559076750285, "grad_norm": 882.1055297851562, "learning_rate": 3.195809966115038e-05, "loss": 56.5875, "step": 91600 }, { "epoch": 0.3701159920328705, "grad_norm": 1562.2581787109375, "learning_rate": 3.1955861147303194e-05, "loss": 48.7799, "step": 91610 }, { "epoch": 0.3701563932982381, "grad_norm": 542.5730590820312, "learning_rate": 3.195362240037034e-05, "loss": 51.144, "step": 91620 }, { "epoch": 0.3701967945636057, "grad_norm": 602.9548950195312, "learning_rate": 3.1951383420395456e-05, "loss": 46.5236, "step": 91630 }, { "epoch": 0.37023719582897335, "grad_norm": 653.1578369140625, "learning_rate": 3.194914420742221e-05, "loss": 70.5503, "step": 91640 }, { "epoch": 0.370277597094341, "grad_norm": 539.1424560546875, "learning_rate": 3.194690476149425e-05, "loss": 42.9432, "step": 91650 }, { "epoch": 0.37031799835970863, "grad_norm": 1839.1611328125, "learning_rate": 3.194466508265522e-05, "loss": 62.7661, "step": 91660 }, { "epoch": 0.3703583996250763, "grad_norm": 387.30133056640625, "learning_rate": 3.1942425170948795e-05, "loss": 50.549, "step": 91670 }, { "epoch": 0.3703988008904439, "grad_norm": 657.3198852539062, "learning_rate": 3.194018502641864e-05, "loss": 48.9035, "step": 91680 }, { "epoch": 0.3704392021558115, "grad_norm": 1220.2711181640625, "learning_rate": 3.193794464910844e-05, "loss": 50.7038, "step": 91690 }, { "epoch": 0.37047960342117914, "grad_norm": 956.4788818359375, "learning_rate": 3.193570403906186e-05, "loss": 71.9509, "step": 91700 }, { "epoch": 0.3705200046865468, "grad_norm": 331.8046875, "learning_rate": 3.193346319632258e-05, "loss": 64.4131, "step": 91710 }, { "epoch": 0.3705604059519144, "grad_norm": 1147.4146728515625, "learning_rate": 3.193122212093429e-05, "loss": 68.8306, "step": 91720 }, { "epoch": 0.37060080721728206, "grad_norm": 540.6961059570312, "learning_rate": 3.192898081294069e-05, "loss": 78.8963, "step": 91730 }, { "epoch": 0.3706412084826497, "grad_norm": 554.406005859375, "learning_rate": 3.192673927238547e-05, "loss": 54.2971, "step": 91740 }, { "epoch": 0.3706816097480173, "grad_norm": 1999.7381591796875, "learning_rate": 3.1924497499312317e-05, "loss": 96.5161, "step": 91750 }, { "epoch": 0.3707220110133849, "grad_norm": 467.8526306152344, "learning_rate": 3.1922255493764956e-05, "loss": 57.5308, "step": 91760 }, { "epoch": 0.37076241227875256, "grad_norm": 1122.2821044921875, "learning_rate": 3.192001325578708e-05, "loss": 54.3601, "step": 91770 }, { "epoch": 0.3708028135441202, "grad_norm": 699.6976318359375, "learning_rate": 3.1917770785422406e-05, "loss": 51.2669, "step": 91780 }, { "epoch": 0.37084321480948784, "grad_norm": 1274.6541748046875, "learning_rate": 3.1915528082714664e-05, "loss": 59.7874, "step": 91790 }, { "epoch": 0.3708836160748555, "grad_norm": 600.6614379882812, "learning_rate": 3.191328514770757e-05, "loss": 72.3069, "step": 91800 }, { "epoch": 0.3709240173402231, "grad_norm": 1041.9267578125, "learning_rate": 3.1911041980444836e-05, "loss": 99.4359, "step": 91810 }, { "epoch": 0.3709644186055907, "grad_norm": 436.3846435546875, "learning_rate": 3.190879858097021e-05, "loss": 45.6987, "step": 91820 }, { "epoch": 0.37100481987095835, "grad_norm": 628.568115234375, "learning_rate": 3.190655494932742e-05, "loss": 92.0801, "step": 91830 }, { "epoch": 0.371045221136326, "grad_norm": 1433.095703125, "learning_rate": 3.190431108556022e-05, "loss": 49.9664, "step": 91840 }, { "epoch": 0.3710856224016936, "grad_norm": 1095.739501953125, "learning_rate": 3.190206698971235e-05, "loss": 51.1511, "step": 91850 }, { "epoch": 0.37112602366706127, "grad_norm": 1261.6505126953125, "learning_rate": 3.189982266182755e-05, "loss": 46.6542, "step": 91860 }, { "epoch": 0.3711664249324289, "grad_norm": 443.4308166503906, "learning_rate": 3.189757810194958e-05, "loss": 59.0583, "step": 91870 }, { "epoch": 0.3712068261977965, "grad_norm": 1636.2860107421875, "learning_rate": 3.18953333101222e-05, "loss": 62.8327, "step": 91880 }, { "epoch": 0.37124722746316413, "grad_norm": 648.6049194335938, "learning_rate": 3.189308828638917e-05, "loss": 60.4501, "step": 91890 }, { "epoch": 0.37128762872853177, "grad_norm": 839.2045288085938, "learning_rate": 3.189084303079427e-05, "loss": 39.8968, "step": 91900 }, { "epoch": 0.3713280299938994, "grad_norm": 927.9185180664062, "learning_rate": 3.1888597543381255e-05, "loss": 70.0525, "step": 91910 }, { "epoch": 0.37136843125926705, "grad_norm": 1390.071533203125, "learning_rate": 3.1886351824193916e-05, "loss": 46.6219, "step": 91920 }, { "epoch": 0.3714088325246347, "grad_norm": 2126.73486328125, "learning_rate": 3.188410587327602e-05, "loss": 60.301, "step": 91930 }, { "epoch": 0.3714492337900023, "grad_norm": 605.2738037109375, "learning_rate": 3.188185969067137e-05, "loss": 62.8989, "step": 91940 }, { "epoch": 0.3714896350553699, "grad_norm": 977.8110961914062, "learning_rate": 3.187961327642374e-05, "loss": 62.2956, "step": 91950 }, { "epoch": 0.37153003632073756, "grad_norm": 464.37969970703125, "learning_rate": 3.1877366630576945e-05, "loss": 52.0955, "step": 91960 }, { "epoch": 0.3715704375861052, "grad_norm": 710.4423217773438, "learning_rate": 3.1875119753174766e-05, "loss": 90.9263, "step": 91970 }, { "epoch": 0.37161083885147284, "grad_norm": 1177.0816650390625, "learning_rate": 3.187287264426101e-05, "loss": 89.1915, "step": 91980 }, { "epoch": 0.3716512401168405, "grad_norm": 356.5927429199219, "learning_rate": 3.18706253038795e-05, "loss": 41.8411, "step": 91990 }, { "epoch": 0.3716916413822081, "grad_norm": 539.5660400390625, "learning_rate": 3.1868377732074034e-05, "loss": 57.2294, "step": 92000 }, { "epoch": 0.3717320426475757, "grad_norm": 644.1307983398438, "learning_rate": 3.1866129928888425e-05, "loss": 68.3304, "step": 92010 }, { "epoch": 0.37177244391294334, "grad_norm": 610.5741577148438, "learning_rate": 3.186388189436652e-05, "loss": 49.4517, "step": 92020 }, { "epoch": 0.371812845178311, "grad_norm": 502.1660461425781, "learning_rate": 3.186163362855212e-05, "loss": 56.3577, "step": 92030 }, { "epoch": 0.3718532464436786, "grad_norm": 506.15301513671875, "learning_rate": 3.185938513148906e-05, "loss": 56.0064, "step": 92040 }, { "epoch": 0.37189364770904626, "grad_norm": 498.75146484375, "learning_rate": 3.185713640322119e-05, "loss": 56.7962, "step": 92050 }, { "epoch": 0.3719340489744139, "grad_norm": 972.7112426757812, "learning_rate": 3.185488744379234e-05, "loss": 70.0248, "step": 92060 }, { "epoch": 0.3719744502397815, "grad_norm": 934.3443603515625, "learning_rate": 3.185263825324635e-05, "loss": 71.4783, "step": 92070 }, { "epoch": 0.3720148515051491, "grad_norm": 1344.581298828125, "learning_rate": 3.185038883162708e-05, "loss": 85.8356, "step": 92080 }, { "epoch": 0.37205525277051676, "grad_norm": 883.6851196289062, "learning_rate": 3.184813917897838e-05, "loss": 58.2341, "step": 92090 }, { "epoch": 0.3720956540358844, "grad_norm": 564.726318359375, "learning_rate": 3.18458892953441e-05, "loss": 54.6441, "step": 92100 }, { "epoch": 0.37213605530125204, "grad_norm": 431.2422180175781, "learning_rate": 3.1843639180768115e-05, "loss": 76.4143, "step": 92110 }, { "epoch": 0.3721764565666197, "grad_norm": 497.0868225097656, "learning_rate": 3.184138883529429e-05, "loss": 60.8222, "step": 92120 }, { "epoch": 0.3722168578319873, "grad_norm": 863.14208984375, "learning_rate": 3.183913825896649e-05, "loss": 93.3023, "step": 92130 }, { "epoch": 0.3722572590973549, "grad_norm": 752.2738647460938, "learning_rate": 3.1836887451828595e-05, "loss": 77.4681, "step": 92140 }, { "epoch": 0.37229766036272255, "grad_norm": 302.57965087890625, "learning_rate": 3.183463641392448e-05, "loss": 74.6553, "step": 92150 }, { "epoch": 0.3723380616280902, "grad_norm": 598.8102416992188, "learning_rate": 3.183238514529804e-05, "loss": 56.2187, "step": 92160 }, { "epoch": 0.37237846289345783, "grad_norm": 1294.8486328125, "learning_rate": 3.183013364599316e-05, "loss": 51.3917, "step": 92170 }, { "epoch": 0.37241886415882547, "grad_norm": 826.580810546875, "learning_rate": 3.1827881916053734e-05, "loss": 53.6744, "step": 92180 }, { "epoch": 0.3724592654241931, "grad_norm": 687.9397583007812, "learning_rate": 3.182562995552366e-05, "loss": 68.3652, "step": 92190 }, { "epoch": 0.3724996666895607, "grad_norm": 269.9486999511719, "learning_rate": 3.182337776444684e-05, "loss": 57.6726, "step": 92200 }, { "epoch": 0.37254006795492833, "grad_norm": 844.561767578125, "learning_rate": 3.182112534286719e-05, "loss": 64.4658, "step": 92210 }, { "epoch": 0.372580469220296, "grad_norm": 569.0249633789062, "learning_rate": 3.181887269082861e-05, "loss": 73.5815, "step": 92220 }, { "epoch": 0.3726208704856636, "grad_norm": 1159.74853515625, "learning_rate": 3.1816619808375016e-05, "loss": 66.0471, "step": 92230 }, { "epoch": 0.37266127175103125, "grad_norm": 561.364013671875, "learning_rate": 3.1814366695550346e-05, "loss": 58.5301, "step": 92240 }, { "epoch": 0.3727016730163989, "grad_norm": 1479.8369140625, "learning_rate": 3.181211335239851e-05, "loss": 63.9735, "step": 92250 }, { "epoch": 0.3727420742817665, "grad_norm": 1265.6650390625, "learning_rate": 3.1809859778963445e-05, "loss": 47.1746, "step": 92260 }, { "epoch": 0.3727824755471341, "grad_norm": 552.6262817382812, "learning_rate": 3.180760597528908e-05, "loss": 58.6089, "step": 92270 }, { "epoch": 0.37282287681250176, "grad_norm": 943.9286499023438, "learning_rate": 3.180535194141936e-05, "loss": 83.6766, "step": 92280 }, { "epoch": 0.3728632780778694, "grad_norm": 863.5250244140625, "learning_rate": 3.180309767739823e-05, "loss": 73.9203, "step": 92290 }, { "epoch": 0.37290367934323704, "grad_norm": 0.0, "learning_rate": 3.1800843183269625e-05, "loss": 49.264, "step": 92300 }, { "epoch": 0.3729440806086047, "grad_norm": 599.714111328125, "learning_rate": 3.179858845907752e-05, "loss": 68.3465, "step": 92310 }, { "epoch": 0.3729844818739723, "grad_norm": 883.240966796875, "learning_rate": 3.179633350486584e-05, "loss": 76.5089, "step": 92320 }, { "epoch": 0.3730248831393399, "grad_norm": 558.8479614257812, "learning_rate": 3.179407832067858e-05, "loss": 83.0287, "step": 92330 }, { "epoch": 0.37306528440470754, "grad_norm": 1013.3880004882812, "learning_rate": 3.179182290655969e-05, "loss": 81.3642, "step": 92340 }, { "epoch": 0.3731056856700752, "grad_norm": 433.5400695800781, "learning_rate": 3.178956726255314e-05, "loss": 49.3599, "step": 92350 }, { "epoch": 0.3731460869354428, "grad_norm": 978.0261840820312, "learning_rate": 3.1787311388702894e-05, "loss": 57.0135, "step": 92360 }, { "epoch": 0.37318648820081046, "grad_norm": 1010.7636108398438, "learning_rate": 3.178505528505296e-05, "loss": 66.5352, "step": 92370 }, { "epoch": 0.3732268894661781, "grad_norm": 241.92910766601562, "learning_rate": 3.17827989516473e-05, "loss": 60.54, "step": 92380 }, { "epoch": 0.3732672907315457, "grad_norm": 665.576416015625, "learning_rate": 3.1780542388529906e-05, "loss": 77.0888, "step": 92390 }, { "epoch": 0.3733076919969133, "grad_norm": 715.9092407226562, "learning_rate": 3.177828559574477e-05, "loss": 63.322, "step": 92400 }, { "epoch": 0.37334809326228097, "grad_norm": 544.0072021484375, "learning_rate": 3.1776028573335906e-05, "loss": 60.1967, "step": 92410 }, { "epoch": 0.3733884945276486, "grad_norm": 856.43701171875, "learning_rate": 3.177377132134729e-05, "loss": 73.3287, "step": 92420 }, { "epoch": 0.37342889579301625, "grad_norm": 749.0526733398438, "learning_rate": 3.177151383982295e-05, "loss": 59.487, "step": 92430 }, { "epoch": 0.3734692970583839, "grad_norm": 560.5958862304688, "learning_rate": 3.176925612880688e-05, "loss": 55.7059, "step": 92440 }, { "epoch": 0.3735096983237515, "grad_norm": 604.20556640625, "learning_rate": 3.176699818834311e-05, "loss": 55.1843, "step": 92450 }, { "epoch": 0.3735500995891191, "grad_norm": 398.9888000488281, "learning_rate": 3.1764740018475646e-05, "loss": 75.0972, "step": 92460 }, { "epoch": 0.37359050085448675, "grad_norm": 466.8522033691406, "learning_rate": 3.1762481619248516e-05, "loss": 46.4847, "step": 92470 }, { "epoch": 0.3736309021198544, "grad_norm": 548.5853271484375, "learning_rate": 3.1760222990705756e-05, "loss": 80.4183, "step": 92480 }, { "epoch": 0.37367130338522203, "grad_norm": 709.9428100585938, "learning_rate": 3.175796413289139e-05, "loss": 56.5794, "step": 92490 }, { "epoch": 0.37371170465058967, "grad_norm": 694.28076171875, "learning_rate": 3.1755705045849465e-05, "loss": 65.296, "step": 92500 }, { "epoch": 0.3737521059159573, "grad_norm": 1514.2996826171875, "learning_rate": 3.175344572962402e-05, "loss": 99.0674, "step": 92510 }, { "epoch": 0.3737925071813249, "grad_norm": 1029.3662109375, "learning_rate": 3.175118618425909e-05, "loss": 75.531, "step": 92520 }, { "epoch": 0.37383290844669254, "grad_norm": 1189.4278564453125, "learning_rate": 3.174892640979875e-05, "loss": 81.1586, "step": 92530 }, { "epoch": 0.3738733097120602, "grad_norm": 513.4951782226562, "learning_rate": 3.174666640628702e-05, "loss": 86.4052, "step": 92540 }, { "epoch": 0.3739137109774278, "grad_norm": 467.4132080078125, "learning_rate": 3.174440617376799e-05, "loss": 52.4841, "step": 92550 }, { "epoch": 0.37395411224279546, "grad_norm": 628.9441528320312, "learning_rate": 3.1742145712285725e-05, "loss": 70.6902, "step": 92560 }, { "epoch": 0.3739945135081631, "grad_norm": 816.73828125, "learning_rate": 3.173988502188428e-05, "loss": 97.6481, "step": 92570 }, { "epoch": 0.3740349147735307, "grad_norm": 1033.385986328125, "learning_rate": 3.173762410260772e-05, "loss": 78.7853, "step": 92580 }, { "epoch": 0.3740753160388983, "grad_norm": 466.20404052734375, "learning_rate": 3.173536295450014e-05, "loss": 47.6014, "step": 92590 }, { "epoch": 0.37411571730426596, "grad_norm": 337.6639099121094, "learning_rate": 3.173310157760563e-05, "loss": 53.6203, "step": 92600 }, { "epoch": 0.3741561185696336, "grad_norm": 803.3235473632812, "learning_rate": 3.173083997196825e-05, "loss": 66.1968, "step": 92610 }, { "epoch": 0.37419651983500124, "grad_norm": 960.7337036132812, "learning_rate": 3.172857813763211e-05, "loss": 30.3207, "step": 92620 }, { "epoch": 0.3742369211003689, "grad_norm": 782.6198120117188, "learning_rate": 3.172631607464131e-05, "loss": 81.5086, "step": 92630 }, { "epoch": 0.3742773223657365, "grad_norm": 542.2578735351562, "learning_rate": 3.1724053783039935e-05, "loss": 42.5299, "step": 92640 }, { "epoch": 0.3743177236311041, "grad_norm": 134.53843688964844, "learning_rate": 3.172179126287209e-05, "loss": 61.441, "step": 92650 }, { "epoch": 0.37435812489647174, "grad_norm": 511.2257080078125, "learning_rate": 3.1719528514181894e-05, "loss": 70.0888, "step": 92660 }, { "epoch": 0.3743985261618394, "grad_norm": 484.2981262207031, "learning_rate": 3.1717265537013454e-05, "loss": 76.5403, "step": 92670 }, { "epoch": 0.374438927427207, "grad_norm": 782.7688598632812, "learning_rate": 3.1715002331410886e-05, "loss": 52.4916, "step": 92680 }, { "epoch": 0.37447932869257466, "grad_norm": 410.0548400878906, "learning_rate": 3.1712738897418325e-05, "loss": 40.2399, "step": 92690 }, { "epoch": 0.3745197299579423, "grad_norm": 345.84930419921875, "learning_rate": 3.1710475235079885e-05, "loss": 43.5013, "step": 92700 }, { "epoch": 0.3745601312233099, "grad_norm": 1913.61669921875, "learning_rate": 3.17082113444397e-05, "loss": 82.2697, "step": 92710 }, { "epoch": 0.37460053248867753, "grad_norm": 1220.21923828125, "learning_rate": 3.1705947225541915e-05, "loss": 64.3923, "step": 92720 }, { "epoch": 0.37464093375404517, "grad_norm": 468.8391418457031, "learning_rate": 3.170368287843065e-05, "loss": 65.5118, "step": 92730 }, { "epoch": 0.3746813350194128, "grad_norm": 716.503173828125, "learning_rate": 3.1701418303150067e-05, "loss": 75.1031, "step": 92740 }, { "epoch": 0.37472173628478045, "grad_norm": 731.4243774414062, "learning_rate": 3.169915349974432e-05, "loss": 71.5022, "step": 92750 }, { "epoch": 0.3747621375501481, "grad_norm": 752.8480834960938, "learning_rate": 3.169688846825754e-05, "loss": 42.9594, "step": 92760 }, { "epoch": 0.37480253881551573, "grad_norm": 967.5347290039062, "learning_rate": 3.16946232087339e-05, "loss": 56.4926, "step": 92770 }, { "epoch": 0.3748429400808833, "grad_norm": 1119.449951171875, "learning_rate": 3.169235772121756e-05, "loss": 76.5434, "step": 92780 }, { "epoch": 0.37488334134625095, "grad_norm": 410.9073486328125, "learning_rate": 3.169009200575268e-05, "loss": 57.9905, "step": 92790 }, { "epoch": 0.3749237426116186, "grad_norm": 550.1514892578125, "learning_rate": 3.1687826062383444e-05, "loss": 61.867, "step": 92800 }, { "epoch": 0.37496414387698623, "grad_norm": 246.54542541503906, "learning_rate": 3.168555989115402e-05, "loss": 60.9686, "step": 92810 }, { "epoch": 0.3750045451423539, "grad_norm": 586.08837890625, "learning_rate": 3.1683293492108595e-05, "loss": 50.8065, "step": 92820 }, { "epoch": 0.3750449464077215, "grad_norm": 884.8882446289062, "learning_rate": 3.168102686529135e-05, "loss": 48.4871, "step": 92830 }, { "epoch": 0.3750853476730891, "grad_norm": 761.7444458007812, "learning_rate": 3.167876001074646e-05, "loss": 67.6586, "step": 92840 }, { "epoch": 0.37512574893845674, "grad_norm": 372.81304931640625, "learning_rate": 3.167649292851814e-05, "loss": 57.7765, "step": 92850 }, { "epoch": 0.3751661502038244, "grad_norm": 193.35227966308594, "learning_rate": 3.167422561865058e-05, "loss": 50.352, "step": 92860 }, { "epoch": 0.375206551469192, "grad_norm": 967.7341918945312, "learning_rate": 3.167195808118798e-05, "loss": 69.5428, "step": 92870 }, { "epoch": 0.37524695273455966, "grad_norm": 574.5850830078125, "learning_rate": 3.166969031617455e-05, "loss": 53.5927, "step": 92880 }, { "epoch": 0.3752873539999273, "grad_norm": 1133.9346923828125, "learning_rate": 3.166742232365449e-05, "loss": 69.1476, "step": 92890 }, { "epoch": 0.3753277552652949, "grad_norm": 547.107421875, "learning_rate": 3.166515410367203e-05, "loss": 67.2989, "step": 92900 }, { "epoch": 0.3753681565306625, "grad_norm": 921.1433715820312, "learning_rate": 3.166288565627139e-05, "loss": 75.7809, "step": 92910 }, { "epoch": 0.37540855779603016, "grad_norm": 380.323486328125, "learning_rate": 3.166061698149679e-05, "loss": 56.84, "step": 92920 }, { "epoch": 0.3754489590613978, "grad_norm": 598.1117553710938, "learning_rate": 3.165834807939245e-05, "loss": 61.9656, "step": 92930 }, { "epoch": 0.37548936032676544, "grad_norm": 235.85311889648438, "learning_rate": 3.165607895000262e-05, "loss": 52.8294, "step": 92940 }, { "epoch": 0.3755297615921331, "grad_norm": 992.2584228515625, "learning_rate": 3.1653809593371526e-05, "loss": 65.1801, "step": 92950 }, { "epoch": 0.3755701628575007, "grad_norm": 912.9456176757812, "learning_rate": 3.165154000954341e-05, "loss": 73.9485, "step": 92960 }, { "epoch": 0.3756105641228683, "grad_norm": 483.7486572265625, "learning_rate": 3.164927019856253e-05, "loss": 55.6523, "step": 92970 }, { "epoch": 0.37565096538823595, "grad_norm": 597.1483764648438, "learning_rate": 3.1647000160473126e-05, "loss": 77.8391, "step": 92980 }, { "epoch": 0.3756913666536036, "grad_norm": 1421.8509521484375, "learning_rate": 3.164472989531946e-05, "loss": 44.8926, "step": 92990 }, { "epoch": 0.3757317679189712, "grad_norm": 944.5368041992188, "learning_rate": 3.1642459403145794e-05, "loss": 81.4513, "step": 93000 }, { "epoch": 0.37577216918433887, "grad_norm": 1083.89794921875, "learning_rate": 3.164018868399638e-05, "loss": 65.1026, "step": 93010 }, { "epoch": 0.3758125704497065, "grad_norm": 796.9815673828125, "learning_rate": 3.16379177379155e-05, "loss": 80.6928, "step": 93020 }, { "epoch": 0.3758529717150741, "grad_norm": 516.5150756835938, "learning_rate": 3.163564656494742e-05, "loss": 74.6458, "step": 93030 }, { "epoch": 0.37589337298044173, "grad_norm": 695.0779418945312, "learning_rate": 3.163337516513642e-05, "loss": 73.3617, "step": 93040 }, { "epoch": 0.37593377424580937, "grad_norm": 661.1953125, "learning_rate": 3.1631103538526774e-05, "loss": 44.4488, "step": 93050 }, { "epoch": 0.375974175511177, "grad_norm": 1036.6097412109375, "learning_rate": 3.162883168516279e-05, "loss": 81.1605, "step": 93060 }, { "epoch": 0.37601457677654465, "grad_norm": 1192.6729736328125, "learning_rate": 3.1626559605088734e-05, "loss": 107.4897, "step": 93070 }, { "epoch": 0.3760549780419123, "grad_norm": 594.5126953125, "learning_rate": 3.1624287298348923e-05, "loss": 66.0238, "step": 93080 }, { "epoch": 0.37609537930727993, "grad_norm": 461.3974609375, "learning_rate": 3.1622014764987637e-05, "loss": 62.9703, "step": 93090 }, { "epoch": 0.3761357805726475, "grad_norm": 220.75631713867188, "learning_rate": 3.1619742005049204e-05, "loss": 45.0191, "step": 93100 }, { "epoch": 0.37617618183801516, "grad_norm": 892.0763549804688, "learning_rate": 3.16174690185779e-05, "loss": 78.6, "step": 93110 }, { "epoch": 0.3762165831033828, "grad_norm": 1821.3922119140625, "learning_rate": 3.161519580561807e-05, "loss": 59.5127, "step": 93120 }, { "epoch": 0.37625698436875044, "grad_norm": 762.4219360351562, "learning_rate": 3.1612922366214014e-05, "loss": 117.8003, "step": 93130 }, { "epoch": 0.3762973856341181, "grad_norm": 660.0880126953125, "learning_rate": 3.1610648700410057e-05, "loss": 74.9537, "step": 93140 }, { "epoch": 0.3763377868994857, "grad_norm": 708.5904541015625, "learning_rate": 3.160837480825052e-05, "loss": 69.8171, "step": 93150 }, { "epoch": 0.3763781881648533, "grad_norm": 272.2256774902344, "learning_rate": 3.160610068977975e-05, "loss": 32.8539, "step": 93160 }, { "epoch": 0.37641858943022094, "grad_norm": 416.69927978515625, "learning_rate": 3.1603826345042064e-05, "loss": 55.2995, "step": 93170 }, { "epoch": 0.3764589906955886, "grad_norm": 756.4736328125, "learning_rate": 3.160155177408181e-05, "loss": 50.5459, "step": 93180 }, { "epoch": 0.3764993919609562, "grad_norm": 1576.5340576171875, "learning_rate": 3.159927697694334e-05, "loss": 63.1412, "step": 93190 }, { "epoch": 0.37653979322632386, "grad_norm": 622.9821166992188, "learning_rate": 3.1597001953670985e-05, "loss": 37.7336, "step": 93200 }, { "epoch": 0.3765801944916915, "grad_norm": 1226.30224609375, "learning_rate": 3.159472670430911e-05, "loss": 71.5446, "step": 93210 }, { "epoch": 0.3766205957570591, "grad_norm": 768.5546264648438, "learning_rate": 3.159245122890207e-05, "loss": 61.1742, "step": 93220 }, { "epoch": 0.3766609970224267, "grad_norm": 1447.1888427734375, "learning_rate": 3.159017552749422e-05, "loss": 74.656, "step": 93230 }, { "epoch": 0.37670139828779436, "grad_norm": 432.21624755859375, "learning_rate": 3.158789960012993e-05, "loss": 87.346, "step": 93240 }, { "epoch": 0.376741799553162, "grad_norm": 1397.281005859375, "learning_rate": 3.1585623446853574e-05, "loss": 86.6955, "step": 93250 }, { "epoch": 0.37678220081852964, "grad_norm": 203.1778564453125, "learning_rate": 3.158334706770953e-05, "loss": 61.4963, "step": 93260 }, { "epoch": 0.3768226020838973, "grad_norm": 396.96685791015625, "learning_rate": 3.158107046274217e-05, "loss": 24.4406, "step": 93270 }, { "epoch": 0.3768630033492649, "grad_norm": 512.5032958984375, "learning_rate": 3.157879363199588e-05, "loss": 65.7688, "step": 93280 }, { "epoch": 0.3769034046146325, "grad_norm": 797.4293823242188, "learning_rate": 3.1576516575515034e-05, "loss": 48.7656, "step": 93290 }, { "epoch": 0.37694380588000015, "grad_norm": 1280.97705078125, "learning_rate": 3.157423929334405e-05, "loss": 59.6893, "step": 93300 }, { "epoch": 0.3769842071453678, "grad_norm": 719.6341552734375, "learning_rate": 3.157196178552731e-05, "loss": 54.0297, "step": 93310 }, { "epoch": 0.37702460841073543, "grad_norm": 513.1243896484375, "learning_rate": 3.156968405210921e-05, "loss": 51.1877, "step": 93320 }, { "epoch": 0.37706500967610307, "grad_norm": 1609.5087890625, "learning_rate": 3.156740609313417e-05, "loss": 56.3624, "step": 93330 }, { "epoch": 0.3771054109414707, "grad_norm": 900.366943359375, "learning_rate": 3.156512790864659e-05, "loss": 82.3196, "step": 93340 }, { "epoch": 0.3771458122068383, "grad_norm": 1422.4246826171875, "learning_rate": 3.1562849498690894e-05, "loss": 57.0498, "step": 93350 }, { "epoch": 0.37718621347220593, "grad_norm": 633.8297119140625, "learning_rate": 3.1560570863311486e-05, "loss": 51.3569, "step": 93360 }, { "epoch": 0.3772266147375736, "grad_norm": 1020.2067260742188, "learning_rate": 3.15582920025528e-05, "loss": 68.3464, "step": 93370 }, { "epoch": 0.3772670160029412, "grad_norm": 452.5904235839844, "learning_rate": 3.1556012916459264e-05, "loss": 69.8498, "step": 93380 }, { "epoch": 0.37730741726830885, "grad_norm": 504.51922607421875, "learning_rate": 3.15537336050753e-05, "loss": 44.5778, "step": 93390 }, { "epoch": 0.3773478185336765, "grad_norm": 1517.9036865234375, "learning_rate": 3.155145406844535e-05, "loss": 79.1883, "step": 93400 }, { "epoch": 0.37738821979904413, "grad_norm": 1100.0196533203125, "learning_rate": 3.154917430661387e-05, "loss": 57.3691, "step": 93410 }, { "epoch": 0.3774286210644117, "grad_norm": 890.3381958007812, "learning_rate": 3.154689431962528e-05, "loss": 70.4406, "step": 93420 }, { "epoch": 0.37746902232977936, "grad_norm": 1641.3446044921875, "learning_rate": 3.1544614107524044e-05, "loss": 67.4326, "step": 93430 }, { "epoch": 0.377509423595147, "grad_norm": 1637.2950439453125, "learning_rate": 3.154233367035461e-05, "loss": 46.8634, "step": 93440 }, { "epoch": 0.37754982486051464, "grad_norm": 740.50048828125, "learning_rate": 3.154005300816144e-05, "loss": 55.2992, "step": 93450 }, { "epoch": 0.3775902261258823, "grad_norm": 541.7504272460938, "learning_rate": 3.153777212098899e-05, "loss": 48.3187, "step": 93460 }, { "epoch": 0.3776306273912499, "grad_norm": 757.1552734375, "learning_rate": 3.1535491008881735e-05, "loss": 61.7585, "step": 93470 }, { "epoch": 0.3776710286566175, "grad_norm": 748.1531372070312, "learning_rate": 3.153320967188415e-05, "loss": 53.4171, "step": 93480 }, { "epoch": 0.37771142992198514, "grad_norm": 1016.3422241210938, "learning_rate": 3.1530928110040696e-05, "loss": 44.028, "step": 93490 }, { "epoch": 0.3777518311873528, "grad_norm": 1564.3062744140625, "learning_rate": 3.1528646323395865e-05, "loss": 54.3597, "step": 93500 }, { "epoch": 0.3777922324527204, "grad_norm": 563.5382690429688, "learning_rate": 3.152636431199414e-05, "loss": 72.5935, "step": 93510 }, { "epoch": 0.37783263371808806, "grad_norm": 491.2622375488281, "learning_rate": 3.152408207588001e-05, "loss": 48.6152, "step": 93520 }, { "epoch": 0.3778730349834557, "grad_norm": 1588.01513671875, "learning_rate": 3.152179961509797e-05, "loss": 101.9268, "step": 93530 }, { "epoch": 0.3779134362488233, "grad_norm": 601.212158203125, "learning_rate": 3.1519516929692506e-05, "loss": 47.7366, "step": 93540 }, { "epoch": 0.3779538375141909, "grad_norm": 628.07177734375, "learning_rate": 3.1517234019708136e-05, "loss": 59.8987, "step": 93550 }, { "epoch": 0.37799423877955857, "grad_norm": 680.4913940429688, "learning_rate": 3.151495088518936e-05, "loss": 49.9818, "step": 93560 }, { "epoch": 0.3780346400449262, "grad_norm": 820.2808227539062, "learning_rate": 3.1512667526180686e-05, "loss": 53.5809, "step": 93570 }, { "epoch": 0.37807504131029385, "grad_norm": 923.7725219726562, "learning_rate": 3.1510383942726626e-05, "loss": 60.1639, "step": 93580 }, { "epoch": 0.3781154425756615, "grad_norm": 618.584716796875, "learning_rate": 3.1508100134871705e-05, "loss": 76.269, "step": 93590 }, { "epoch": 0.3781558438410291, "grad_norm": 201.3993377685547, "learning_rate": 3.150581610266046e-05, "loss": 36.1293, "step": 93600 }, { "epoch": 0.3781962451063967, "grad_norm": 2918.939453125, "learning_rate": 3.150353184613739e-05, "loss": 86.6181, "step": 93610 }, { "epoch": 0.37823664637176435, "grad_norm": 748.3023071289062, "learning_rate": 3.150124736534705e-05, "loss": 42.7009, "step": 93620 }, { "epoch": 0.378277047637132, "grad_norm": 377.7673034667969, "learning_rate": 3.149896266033398e-05, "loss": 44.2963, "step": 93630 }, { "epoch": 0.37831744890249963, "grad_norm": 1187.8922119140625, "learning_rate": 3.149667773114271e-05, "loss": 78.8231, "step": 93640 }, { "epoch": 0.37835785016786727, "grad_norm": 882.2860107421875, "learning_rate": 3.1494392577817775e-05, "loss": 70.4679, "step": 93650 }, { "epoch": 0.3783982514332349, "grad_norm": 799.963134765625, "learning_rate": 3.149210720040375e-05, "loss": 49.0757, "step": 93660 }, { "epoch": 0.3784386526986025, "grad_norm": 1207.2667236328125, "learning_rate": 3.148982159894518e-05, "loss": 65.3828, "step": 93670 }, { "epoch": 0.37847905396397014, "grad_norm": 511.77978515625, "learning_rate": 3.1487535773486624e-05, "loss": 49.8612, "step": 93680 }, { "epoch": 0.3785194552293378, "grad_norm": 339.2344970703125, "learning_rate": 3.148524972407263e-05, "loss": 36.7039, "step": 93690 }, { "epoch": 0.3785598564947054, "grad_norm": 1724.865966796875, "learning_rate": 3.14829634507478e-05, "loss": 81.2211, "step": 93700 }, { "epoch": 0.37860025776007306, "grad_norm": 1382.4368896484375, "learning_rate": 3.148067695355667e-05, "loss": 83.5194, "step": 93710 }, { "epoch": 0.3786406590254407, "grad_norm": 618.5198364257812, "learning_rate": 3.1478390232543835e-05, "loss": 61.2203, "step": 93720 }, { "epoch": 0.37868106029080834, "grad_norm": 514.2406005859375, "learning_rate": 3.147610328775388e-05, "loss": 73.8755, "step": 93730 }, { "epoch": 0.3787214615561759, "grad_norm": 478.9051818847656, "learning_rate": 3.1473816119231374e-05, "loss": 63.9839, "step": 93740 }, { "epoch": 0.37876186282154356, "grad_norm": 649.34521484375, "learning_rate": 3.147152872702092e-05, "loss": 53.3547, "step": 93750 }, { "epoch": 0.3788022640869112, "grad_norm": 583.468994140625, "learning_rate": 3.146924111116711e-05, "loss": 60.0845, "step": 93760 }, { "epoch": 0.37884266535227884, "grad_norm": 1076.1488037109375, "learning_rate": 3.146695327171454e-05, "loss": 52.8453, "step": 93770 }, { "epoch": 0.3788830666176465, "grad_norm": 640.3786010742188, "learning_rate": 3.146466520870781e-05, "loss": 88.6203, "step": 93780 }, { "epoch": 0.3789234678830141, "grad_norm": 546.0957641601562, "learning_rate": 3.146237692219153e-05, "loss": 73.0292, "step": 93790 }, { "epoch": 0.3789638691483817, "grad_norm": 662.7147216796875, "learning_rate": 3.146008841221031e-05, "loss": 81.7544, "step": 93800 }, { "epoch": 0.37900427041374934, "grad_norm": 746.7833251953125, "learning_rate": 3.145779967880877e-05, "loss": 64.7103, "step": 93810 }, { "epoch": 0.379044671679117, "grad_norm": 1038.830810546875, "learning_rate": 3.1455510722031516e-05, "loss": 82.4053, "step": 93820 }, { "epoch": 0.3790850729444846, "grad_norm": 643.62353515625, "learning_rate": 3.145322154192319e-05, "loss": 70.2069, "step": 93830 }, { "epoch": 0.37912547420985226, "grad_norm": 1289.2843017578125, "learning_rate": 3.145093213852842e-05, "loss": 65.6404, "step": 93840 }, { "epoch": 0.3791658754752199, "grad_norm": 790.8561401367188, "learning_rate": 3.144864251189182e-05, "loss": 121.4687, "step": 93850 }, { "epoch": 0.3792062767405875, "grad_norm": 392.8358154296875, "learning_rate": 3.144635266205804e-05, "loss": 51.6023, "step": 93860 }, { "epoch": 0.37924667800595513, "grad_norm": 264.0917663574219, "learning_rate": 3.1444062589071735e-05, "loss": 56.2863, "step": 93870 }, { "epoch": 0.37928707927132277, "grad_norm": 614.1214599609375, "learning_rate": 3.1441772292977534e-05, "loss": 64.174, "step": 93880 }, { "epoch": 0.3793274805366904, "grad_norm": 508.02008056640625, "learning_rate": 3.1439481773820086e-05, "loss": 71.7199, "step": 93890 }, { "epoch": 0.37936788180205805, "grad_norm": 404.8476257324219, "learning_rate": 3.143719103164405e-05, "loss": 42.7474, "step": 93900 }, { "epoch": 0.3794082830674257, "grad_norm": 391.021728515625, "learning_rate": 3.143490006649409e-05, "loss": 74.7712, "step": 93910 }, { "epoch": 0.37944868433279333, "grad_norm": 822.2650756835938, "learning_rate": 3.143260887841487e-05, "loss": 57.2424, "step": 93920 }, { "epoch": 0.3794890855981609, "grad_norm": 727.6133422851562, "learning_rate": 3.1430317467451045e-05, "loss": 53.5675, "step": 93930 }, { "epoch": 0.37952948686352855, "grad_norm": 485.53179931640625, "learning_rate": 3.1428025833647306e-05, "loss": 63.1455, "step": 93940 }, { "epoch": 0.3795698881288962, "grad_norm": 574.5487060546875, "learning_rate": 3.1425733977048313e-05, "loss": 67.5347, "step": 93950 }, { "epoch": 0.37961028939426383, "grad_norm": 1004.76171875, "learning_rate": 3.142344189769876e-05, "loss": 75.701, "step": 93960 }, { "epoch": 0.3796506906596315, "grad_norm": 857.5787963867188, "learning_rate": 3.142114959564332e-05, "loss": 56.6185, "step": 93970 }, { "epoch": 0.3796910919249991, "grad_norm": 666.8101806640625, "learning_rate": 3.141885707092669e-05, "loss": 41.2738, "step": 93980 }, { "epoch": 0.3797314931903667, "grad_norm": 548.367919921875, "learning_rate": 3.141656432359356e-05, "loss": 66.6917, "step": 93990 }, { "epoch": 0.37977189445573434, "grad_norm": 550.247802734375, "learning_rate": 3.141427135368864e-05, "loss": 83.6517, "step": 94000 }, { "epoch": 0.379812295721102, "grad_norm": 547.6636352539062, "learning_rate": 3.141197816125662e-05, "loss": 41.3346, "step": 94010 }, { "epoch": 0.3798526969864696, "grad_norm": 880.8660278320312, "learning_rate": 3.14096847463422e-05, "loss": 50.6206, "step": 94020 }, { "epoch": 0.37989309825183726, "grad_norm": 488.7218017578125, "learning_rate": 3.140739110899011e-05, "loss": 54.8867, "step": 94030 }, { "epoch": 0.3799334995172049, "grad_norm": 977.4651489257812, "learning_rate": 3.140509724924506e-05, "loss": 66.8367, "step": 94040 }, { "epoch": 0.37997390078257254, "grad_norm": 763.7493286132812, "learning_rate": 3.1402803167151766e-05, "loss": 90.5319, "step": 94050 }, { "epoch": 0.3800143020479401, "grad_norm": 674.5718383789062, "learning_rate": 3.140050886275496e-05, "loss": 56.2098, "step": 94060 }, { "epoch": 0.38005470331330776, "grad_norm": 797.9072875976562, "learning_rate": 3.1398214336099345e-05, "loss": 73.0468, "step": 94070 }, { "epoch": 0.3800951045786754, "grad_norm": 523.2122192382812, "learning_rate": 3.139591958722969e-05, "loss": 72.2402, "step": 94080 }, { "epoch": 0.38013550584404304, "grad_norm": 98.30523681640625, "learning_rate": 3.139362461619071e-05, "loss": 31.6787, "step": 94090 }, { "epoch": 0.3801759071094107, "grad_norm": 1349.02978515625, "learning_rate": 3.139132942302715e-05, "loss": 57.2611, "step": 94100 }, { "epoch": 0.3802163083747783, "grad_norm": 1889.5108642578125, "learning_rate": 3.138903400778376e-05, "loss": 76.0786, "step": 94110 }, { "epoch": 0.3802567096401459, "grad_norm": 455.8885498046875, "learning_rate": 3.1386738370505296e-05, "loss": 45.5989, "step": 94120 }, { "epoch": 0.38029711090551355, "grad_norm": 864.2047729492188, "learning_rate": 3.138444251123649e-05, "loss": 102.4932, "step": 94130 }, { "epoch": 0.3803375121708812, "grad_norm": 1400.811767578125, "learning_rate": 3.138214643002213e-05, "loss": 72.045, "step": 94140 }, { "epoch": 0.3803779134362488, "grad_norm": 869.3282470703125, "learning_rate": 3.1379850126906966e-05, "loss": 46.5034, "step": 94150 }, { "epoch": 0.38041831470161647, "grad_norm": 1013.1737670898438, "learning_rate": 3.137755360193575e-05, "loss": 54.6387, "step": 94160 }, { "epoch": 0.3804587159669841, "grad_norm": 841.7120361328125, "learning_rate": 3.137525685515329e-05, "loss": 46.6986, "step": 94170 }, { "epoch": 0.3804991172323517, "grad_norm": 1022.562744140625, "learning_rate": 3.137295988660432e-05, "loss": 70.4712, "step": 94180 }, { "epoch": 0.38053951849771933, "grad_norm": 719.1082153320312, "learning_rate": 3.1370662696333653e-05, "loss": 87.8841, "step": 94190 }, { "epoch": 0.38057991976308697, "grad_norm": 499.92938232421875, "learning_rate": 3.136836528438606e-05, "loss": 48.2856, "step": 94200 }, { "epoch": 0.3806203210284546, "grad_norm": 791.6148681640625, "learning_rate": 3.136606765080634e-05, "loss": 74.0039, "step": 94210 }, { "epoch": 0.38066072229382225, "grad_norm": 452.509521484375, "learning_rate": 3.136376979563927e-05, "loss": 66.7968, "step": 94220 }, { "epoch": 0.3807011235591899, "grad_norm": 4187.9775390625, "learning_rate": 3.136147171892966e-05, "loss": 66.3752, "step": 94230 }, { "epoch": 0.38074152482455753, "grad_norm": 461.0445251464844, "learning_rate": 3.135917342072231e-05, "loss": 65.7219, "step": 94240 }, { "epoch": 0.3807819260899251, "grad_norm": 767.9695434570312, "learning_rate": 3.135687490106203e-05, "loss": 105.8939, "step": 94250 }, { "epoch": 0.38082232735529276, "grad_norm": 651.4767456054688, "learning_rate": 3.135457615999362e-05, "loss": 42.9474, "step": 94260 }, { "epoch": 0.3808627286206604, "grad_norm": 379.09381103515625, "learning_rate": 3.13522771975619e-05, "loss": 54.3321, "step": 94270 }, { "epoch": 0.38090312988602804, "grad_norm": 984.194580078125, "learning_rate": 3.13499780138117e-05, "loss": 53.293, "step": 94280 }, { "epoch": 0.3809435311513957, "grad_norm": 619.849609375, "learning_rate": 3.134767860878784e-05, "loss": 65.771, "step": 94290 }, { "epoch": 0.3809839324167633, "grad_norm": 468.43536376953125, "learning_rate": 3.134537898253514e-05, "loss": 78.9491, "step": 94300 }, { "epoch": 0.3810243336821309, "grad_norm": 941.4468994140625, "learning_rate": 3.1343079135098425e-05, "loss": 51.5337, "step": 94310 }, { "epoch": 0.38106473494749854, "grad_norm": 1798.6435546875, "learning_rate": 3.134077906652255e-05, "loss": 56.2025, "step": 94320 }, { "epoch": 0.3811051362128662, "grad_norm": 1016.700927734375, "learning_rate": 3.133847877685235e-05, "loss": 48.8304, "step": 94330 }, { "epoch": 0.3811455374782338, "grad_norm": 680.2740478515625, "learning_rate": 3.133617826613266e-05, "loss": 73.8818, "step": 94340 }, { "epoch": 0.38118593874360146, "grad_norm": 628.7955932617188, "learning_rate": 3.1333877534408345e-05, "loss": 102.1594, "step": 94350 }, { "epoch": 0.3812263400089691, "grad_norm": 829.4243774414062, "learning_rate": 3.133157658172425e-05, "loss": 78.945, "step": 94360 }, { "epoch": 0.3812667412743367, "grad_norm": 1169.0318603515625, "learning_rate": 3.1329275408125245e-05, "loss": 73.4227, "step": 94370 }, { "epoch": 0.3813071425397043, "grad_norm": 984.260498046875, "learning_rate": 3.132697401365617e-05, "loss": 59.6905, "step": 94380 }, { "epoch": 0.38134754380507196, "grad_norm": 373.3670654296875, "learning_rate": 3.132467239836191e-05, "loss": 52.9543, "step": 94390 }, { "epoch": 0.3813879450704396, "grad_norm": 1990.4619140625, "learning_rate": 3.132237056228733e-05, "loss": 56.968, "step": 94400 }, { "epoch": 0.38142834633580724, "grad_norm": 1599.738525390625, "learning_rate": 3.132006850547732e-05, "loss": 55.0064, "step": 94410 }, { "epoch": 0.3814687476011749, "grad_norm": 566.6906127929688, "learning_rate": 3.131776622797673e-05, "loss": 73.1165, "step": 94420 }, { "epoch": 0.3815091488665425, "grad_norm": 1275.9754638671875, "learning_rate": 3.131546372983047e-05, "loss": 73.1124, "step": 94430 }, { "epoch": 0.3815495501319101, "grad_norm": 368.97845458984375, "learning_rate": 3.131316101108341e-05, "loss": 66.7757, "step": 94440 }, { "epoch": 0.38158995139727775, "grad_norm": 675.3721923828125, "learning_rate": 3.131085807178046e-05, "loss": 52.4319, "step": 94450 }, { "epoch": 0.3816303526626454, "grad_norm": 3169.10498046875, "learning_rate": 3.13085549119665e-05, "loss": 49.8318, "step": 94460 }, { "epoch": 0.38167075392801303, "grad_norm": 1114.07763671875, "learning_rate": 3.130625153168645e-05, "loss": 76.9605, "step": 94470 }, { "epoch": 0.38171115519338067, "grad_norm": 793.2512817382812, "learning_rate": 3.13039479309852e-05, "loss": 61.0932, "step": 94480 }, { "epoch": 0.3817515564587483, "grad_norm": 1011.723388671875, "learning_rate": 3.130164410990767e-05, "loss": 59.3565, "step": 94490 }, { "epoch": 0.3817919577241159, "grad_norm": 943.5010375976562, "learning_rate": 3.129934006849876e-05, "loss": 69.2316, "step": 94500 }, { "epoch": 0.38183235898948353, "grad_norm": 555.880126953125, "learning_rate": 3.1297035806803407e-05, "loss": 76.105, "step": 94510 }, { "epoch": 0.3818727602548512, "grad_norm": 1616.5235595703125, "learning_rate": 3.129473132486652e-05, "loss": 84.9985, "step": 94520 }, { "epoch": 0.3819131615202188, "grad_norm": 1294.511962890625, "learning_rate": 3.1292426622733036e-05, "loss": 43.3877, "step": 94530 }, { "epoch": 0.38195356278558645, "grad_norm": 636.7005004882812, "learning_rate": 3.1290121700447876e-05, "loss": 30.9998, "step": 94540 }, { "epoch": 0.3819939640509541, "grad_norm": 1390.4415283203125, "learning_rate": 3.128781655805599e-05, "loss": 59.5266, "step": 94550 }, { "epoch": 0.38203436531632173, "grad_norm": 924.8335571289062, "learning_rate": 3.12855111956023e-05, "loss": 35.2753, "step": 94560 }, { "epoch": 0.3820747665816893, "grad_norm": 3251.146484375, "learning_rate": 3.128320561313176e-05, "loss": 87.2722, "step": 94570 }, { "epoch": 0.38211516784705696, "grad_norm": 307.5578308105469, "learning_rate": 3.1280899810689316e-05, "loss": 79.3565, "step": 94580 }, { "epoch": 0.3821555691124246, "grad_norm": 838.0941772460938, "learning_rate": 3.127859378831992e-05, "loss": 77.5458, "step": 94590 }, { "epoch": 0.38219597037779224, "grad_norm": 760.473388671875, "learning_rate": 3.1276287546068536e-05, "loss": 53.5369, "step": 94600 }, { "epoch": 0.3822363716431599, "grad_norm": 761.6842041015625, "learning_rate": 3.1273981083980126e-05, "loss": 77.6941, "step": 94610 }, { "epoch": 0.3822767729085275, "grad_norm": 323.1531677246094, "learning_rate": 3.127167440209964e-05, "loss": 62.5255, "step": 94620 }, { "epoch": 0.3823171741738951, "grad_norm": 1118.4254150390625, "learning_rate": 3.1269367500472065e-05, "loss": 63.3706, "step": 94630 }, { "epoch": 0.38235757543926274, "grad_norm": 1080.7705078125, "learning_rate": 3.126706037914237e-05, "loss": 37.4864, "step": 94640 }, { "epoch": 0.3823979767046304, "grad_norm": 670.8095092773438, "learning_rate": 3.126475303815554e-05, "loss": 53.1566, "step": 94650 }, { "epoch": 0.382438377969998, "grad_norm": 1319.5672607421875, "learning_rate": 3.1262445477556536e-05, "loss": 58.251, "step": 94660 }, { "epoch": 0.38247877923536566, "grad_norm": 454.59344482421875, "learning_rate": 3.126013769739036e-05, "loss": 48.4365, "step": 94670 }, { "epoch": 0.3825191805007333, "grad_norm": 1183.15869140625, "learning_rate": 3.1257829697702004e-05, "loss": 86.8193, "step": 94680 }, { "epoch": 0.3825595817661009, "grad_norm": 570.905517578125, "learning_rate": 3.125552147853646e-05, "loss": 62.5943, "step": 94690 }, { "epoch": 0.3825999830314685, "grad_norm": 379.4037780761719, "learning_rate": 3.125321303993874e-05, "loss": 55.0205, "step": 94700 }, { "epoch": 0.38264038429683617, "grad_norm": 1375.26953125, "learning_rate": 3.125090438195383e-05, "loss": 160.7165, "step": 94710 }, { "epoch": 0.3826807855622038, "grad_norm": 587.9031372070312, "learning_rate": 3.124859550462675e-05, "loss": 43.147, "step": 94720 }, { "epoch": 0.38272118682757145, "grad_norm": 3379.8837890625, "learning_rate": 3.124628640800251e-05, "loss": 110.5884, "step": 94730 }, { "epoch": 0.3827615880929391, "grad_norm": 891.32373046875, "learning_rate": 3.124397709212613e-05, "loss": 52.807, "step": 94740 }, { "epoch": 0.3828019893583067, "grad_norm": 1796.2423095703125, "learning_rate": 3.124166755704261e-05, "loss": 59.8244, "step": 94750 }, { "epoch": 0.3828423906236743, "grad_norm": 629.0004272460938, "learning_rate": 3.123935780279701e-05, "loss": 59.133, "step": 94760 }, { "epoch": 0.38288279188904195, "grad_norm": 718.2772216796875, "learning_rate": 3.123704782943434e-05, "loss": 53.9646, "step": 94770 }, { "epoch": 0.3829231931544096, "grad_norm": 980.1384887695312, "learning_rate": 3.123473763699964e-05, "loss": 65.5862, "step": 94780 }, { "epoch": 0.38296359441977723, "grad_norm": 986.5202026367188, "learning_rate": 3.1232427225537935e-05, "loss": 56.9099, "step": 94790 }, { "epoch": 0.38300399568514487, "grad_norm": 653.0430908203125, "learning_rate": 3.123011659509429e-05, "loss": 78.1085, "step": 94800 }, { "epoch": 0.3830443969505125, "grad_norm": 670.354736328125, "learning_rate": 3.122780574571373e-05, "loss": 60.5578, "step": 94810 }, { "epoch": 0.3830847982158801, "grad_norm": 432.60552978515625, "learning_rate": 3.122549467744133e-05, "loss": 88.2522, "step": 94820 }, { "epoch": 0.38312519948124774, "grad_norm": 587.0864868164062, "learning_rate": 3.122318339032212e-05, "loss": 63.6049, "step": 94830 }, { "epoch": 0.3831656007466154, "grad_norm": 1073.345947265625, "learning_rate": 3.122087188440118e-05, "loss": 62.4557, "step": 94840 }, { "epoch": 0.383206002011983, "grad_norm": 681.5462036132812, "learning_rate": 3.121856015972356e-05, "loss": 49.4943, "step": 94850 }, { "epoch": 0.38324640327735066, "grad_norm": 467.7748718261719, "learning_rate": 3.121624821633434e-05, "loss": 46.3031, "step": 94860 }, { "epoch": 0.3832868045427183, "grad_norm": 809.7584838867188, "learning_rate": 3.121393605427858e-05, "loss": 58.0714, "step": 94870 }, { "epoch": 0.38332720580808594, "grad_norm": 171.03561401367188, "learning_rate": 3.1211623673601366e-05, "loss": 76.0863, "step": 94880 }, { "epoch": 0.3833676070734535, "grad_norm": 392.6013488769531, "learning_rate": 3.120931107434778e-05, "loss": 45.0487, "step": 94890 }, { "epoch": 0.38340800833882116, "grad_norm": 462.5611572265625, "learning_rate": 3.1206998256562894e-05, "loss": 68.4658, "step": 94900 }, { "epoch": 0.3834484096041888, "grad_norm": 521.8003540039062, "learning_rate": 3.120468522029182e-05, "loss": 92.6952, "step": 94910 }, { "epoch": 0.38348881086955644, "grad_norm": 831.3837280273438, "learning_rate": 3.1202371965579625e-05, "loss": 71.1433, "step": 94920 }, { "epoch": 0.3835292121349241, "grad_norm": 0.0, "learning_rate": 3.120005849247143e-05, "loss": 69.1638, "step": 94930 }, { "epoch": 0.3835696134002917, "grad_norm": 501.0518493652344, "learning_rate": 3.119774480101233e-05, "loss": 52.1852, "step": 94940 }, { "epoch": 0.3836100146656593, "grad_norm": 565.007080078125, "learning_rate": 3.119543089124743e-05, "loss": 82.6033, "step": 94950 }, { "epoch": 0.38365041593102694, "grad_norm": 489.13934326171875, "learning_rate": 3.119311676322183e-05, "loss": 41.9466, "step": 94960 }, { "epoch": 0.3836908171963946, "grad_norm": 1597.20947265625, "learning_rate": 3.1190802416980664e-05, "loss": 57.1254, "step": 94970 }, { "epoch": 0.3837312184617622, "grad_norm": 492.63336181640625, "learning_rate": 3.118848785256905e-05, "loss": 55.7734, "step": 94980 }, { "epoch": 0.38377161972712986, "grad_norm": 315.7362365722656, "learning_rate": 3.118617307003209e-05, "loss": 60.9176, "step": 94990 }, { "epoch": 0.3838120209924975, "grad_norm": 777.3889770507812, "learning_rate": 3.1183858069414936e-05, "loss": 60.9691, "step": 95000 }, { "epoch": 0.3838524222578651, "grad_norm": 945.9274291992188, "learning_rate": 3.118154285076271e-05, "loss": 44.9719, "step": 95010 }, { "epoch": 0.38389282352323273, "grad_norm": 969.2196655273438, "learning_rate": 3.1179227414120556e-05, "loss": 103.0269, "step": 95020 }, { "epoch": 0.38393322478860037, "grad_norm": 1758.84033203125, "learning_rate": 3.11769117595336e-05, "loss": 94.0152, "step": 95030 }, { "epoch": 0.383973626053968, "grad_norm": 750.189697265625, "learning_rate": 3.1174595887047e-05, "loss": 59.8756, "step": 95040 }, { "epoch": 0.38401402731933565, "grad_norm": 743.2469482421875, "learning_rate": 3.11722797967059e-05, "loss": 72.4545, "step": 95050 }, { "epoch": 0.3840544285847033, "grad_norm": 746.4964599609375, "learning_rate": 3.1169963488555445e-05, "loss": 85.1586, "step": 95060 }, { "epoch": 0.38409482985007093, "grad_norm": 662.7686767578125, "learning_rate": 3.116764696264081e-05, "loss": 61.581, "step": 95070 }, { "epoch": 0.3841352311154385, "grad_norm": 280.2574157714844, "learning_rate": 3.116533021900714e-05, "loss": 55.689, "step": 95080 }, { "epoch": 0.38417563238080615, "grad_norm": 0.0, "learning_rate": 3.116301325769961e-05, "loss": 46.6048, "step": 95090 }, { "epoch": 0.3842160336461738, "grad_norm": 691.7823486328125, "learning_rate": 3.11606960787634e-05, "loss": 55.2978, "step": 95100 }, { "epoch": 0.38425643491154143, "grad_norm": 492.3890686035156, "learning_rate": 3.1158378682243666e-05, "loss": 73.2431, "step": 95110 }, { "epoch": 0.3842968361769091, "grad_norm": 622.8359375, "learning_rate": 3.1156061068185595e-05, "loss": 54.8487, "step": 95120 }, { "epoch": 0.3843372374422767, "grad_norm": 857.6041259765625, "learning_rate": 3.115374323663437e-05, "loss": 79.2417, "step": 95130 }, { "epoch": 0.3843776387076443, "grad_norm": 616.6744384765625, "learning_rate": 3.115142518763518e-05, "loss": 79.7958, "step": 95140 }, { "epoch": 0.38441803997301194, "grad_norm": 1210.3330078125, "learning_rate": 3.1149106921233216e-05, "loss": 125.1222, "step": 95150 }, { "epoch": 0.3844584412383796, "grad_norm": 768.7312622070312, "learning_rate": 3.114678843747367e-05, "loss": 59.6823, "step": 95160 }, { "epoch": 0.3844988425037472, "grad_norm": 537.5147705078125, "learning_rate": 3.114446973640175e-05, "loss": 41.4512, "step": 95170 }, { "epoch": 0.38453924376911486, "grad_norm": 1011.1067504882812, "learning_rate": 3.114215081806265e-05, "loss": 48.0649, "step": 95180 }, { "epoch": 0.3845796450344825, "grad_norm": 247.35574340820312, "learning_rate": 3.113983168250158e-05, "loss": 101.9663, "step": 95190 }, { "epoch": 0.38462004629985014, "grad_norm": 588.1903686523438, "learning_rate": 3.113751232976376e-05, "loss": 62.9706, "step": 95200 }, { "epoch": 0.3846604475652177, "grad_norm": 567.7507934570312, "learning_rate": 3.1135192759894403e-05, "loss": 70.8379, "step": 95210 }, { "epoch": 0.38470084883058536, "grad_norm": 835.3895263671875, "learning_rate": 3.1132872972938735e-05, "loss": 29.8401, "step": 95220 }, { "epoch": 0.384741250095953, "grad_norm": 347.2232971191406, "learning_rate": 3.113055296894197e-05, "loss": 38.3244, "step": 95230 }, { "epoch": 0.38478165136132064, "grad_norm": 1113.609619140625, "learning_rate": 3.112823274794935e-05, "loss": 58.9926, "step": 95240 }, { "epoch": 0.3848220526266883, "grad_norm": 791.2014770507812, "learning_rate": 3.11259123100061e-05, "loss": 63.5421, "step": 95250 }, { "epoch": 0.3848624538920559, "grad_norm": 805.3345336914062, "learning_rate": 3.112359165515746e-05, "loss": 39.3376, "step": 95260 }, { "epoch": 0.3849028551574235, "grad_norm": 724.2696533203125, "learning_rate": 3.112127078344868e-05, "loss": 110.7945, "step": 95270 }, { "epoch": 0.38494325642279115, "grad_norm": 491.9793701171875, "learning_rate": 3.111894969492499e-05, "loss": 42.779, "step": 95280 }, { "epoch": 0.3849836576881588, "grad_norm": 685.3145141601562, "learning_rate": 3.1116628389631655e-05, "loss": 85.3884, "step": 95290 }, { "epoch": 0.3850240589535264, "grad_norm": 534.1943969726562, "learning_rate": 3.1114306867613935e-05, "loss": 52.4637, "step": 95300 }, { "epoch": 0.38506446021889407, "grad_norm": 832.2371215820312, "learning_rate": 3.111198512891707e-05, "loss": 51.0535, "step": 95310 }, { "epoch": 0.3851048614842617, "grad_norm": 2605.079345703125, "learning_rate": 3.110966317358634e-05, "loss": 75.6956, "step": 95320 }, { "epoch": 0.3851452627496293, "grad_norm": 1315.3677978515625, "learning_rate": 3.1107341001667007e-05, "loss": 67.2918, "step": 95330 }, { "epoch": 0.38518566401499693, "grad_norm": 776.8063354492188, "learning_rate": 3.110501861320434e-05, "loss": 52.1419, "step": 95340 }, { "epoch": 0.38522606528036457, "grad_norm": 677.2888793945312, "learning_rate": 3.110269600824362e-05, "loss": 67.7612, "step": 95350 }, { "epoch": 0.3852664665457322, "grad_norm": 1661.3873291015625, "learning_rate": 3.110037318683012e-05, "loss": 43.2306, "step": 95360 }, { "epoch": 0.38530686781109985, "grad_norm": 529.8228149414062, "learning_rate": 3.109805014900914e-05, "loss": 72.6942, "step": 95370 }, { "epoch": 0.3853472690764675, "grad_norm": 951.1519165039062, "learning_rate": 3.109572689482595e-05, "loss": 48.7285, "step": 95380 }, { "epoch": 0.38538767034183513, "grad_norm": 720.6952514648438, "learning_rate": 3.1093403424325855e-05, "loss": 60.9023, "step": 95390 }, { "epoch": 0.3854280716072027, "grad_norm": 672.1053466796875, "learning_rate": 3.109107973755415e-05, "loss": 48.6235, "step": 95400 }, { "epoch": 0.38546847287257036, "grad_norm": 1297.8304443359375, "learning_rate": 3.1088755834556135e-05, "loss": 60.5914, "step": 95410 }, { "epoch": 0.385508874137938, "grad_norm": 1021.5657348632812, "learning_rate": 3.108643171537711e-05, "loss": 63.9048, "step": 95420 }, { "epoch": 0.38554927540330564, "grad_norm": 987.0369262695312, "learning_rate": 3.10841073800624e-05, "loss": 55.4489, "step": 95430 }, { "epoch": 0.3855896766686733, "grad_norm": 723.7199096679688, "learning_rate": 3.10817828286573e-05, "loss": 69.069, "step": 95440 }, { "epoch": 0.3856300779340409, "grad_norm": 919.791015625, "learning_rate": 3.107945806120715e-05, "loss": 55.2413, "step": 95450 }, { "epoch": 0.3856704791994085, "grad_norm": 731.2409057617188, "learning_rate": 3.1077133077757257e-05, "loss": 77.2168, "step": 95460 }, { "epoch": 0.38571088046477614, "grad_norm": 551.4536743164062, "learning_rate": 3.107480787835295e-05, "loss": 69.0535, "step": 95470 }, { "epoch": 0.3857512817301438, "grad_norm": 505.9342041015625, "learning_rate": 3.107248246303956e-05, "loss": 51.6537, "step": 95480 }, { "epoch": 0.3857916829955114, "grad_norm": 969.6237182617188, "learning_rate": 3.1070156831862424e-05, "loss": 51.4906, "step": 95490 }, { "epoch": 0.38583208426087906, "grad_norm": 1107.892822265625, "learning_rate": 3.106783098486688e-05, "loss": 45.553, "step": 95500 }, { "epoch": 0.3858724855262467, "grad_norm": 786.083251953125, "learning_rate": 3.106550492209828e-05, "loss": 53.2214, "step": 95510 }, { "epoch": 0.38591288679161434, "grad_norm": 904.0626831054688, "learning_rate": 3.1063178643601957e-05, "loss": 66.1209, "step": 95520 }, { "epoch": 0.3859532880569819, "grad_norm": 1052.266845703125, "learning_rate": 3.106085214942328e-05, "loss": 54.094, "step": 95530 }, { "epoch": 0.38599368932234956, "grad_norm": 478.57110595703125, "learning_rate": 3.105852543960759e-05, "loss": 66.3925, "step": 95540 }, { "epoch": 0.3860340905877172, "grad_norm": 403.1227722167969, "learning_rate": 3.105619851420025e-05, "loss": 81.0497, "step": 95550 }, { "epoch": 0.38607449185308484, "grad_norm": 477.69342041015625, "learning_rate": 3.105387137324663e-05, "loss": 85.4535, "step": 95560 }, { "epoch": 0.3861148931184525, "grad_norm": 773.2382202148438, "learning_rate": 3.1051544016792096e-05, "loss": 60.2238, "step": 95570 }, { "epoch": 0.3861552943838201, "grad_norm": 445.6334533691406, "learning_rate": 3.1049216444882024e-05, "loss": 53.3764, "step": 95580 }, { "epoch": 0.3861956956491877, "grad_norm": 531.4219360351562, "learning_rate": 3.104688865756179e-05, "loss": 73.8038, "step": 95590 }, { "epoch": 0.38623609691455535, "grad_norm": 833.8771362304688, "learning_rate": 3.1044560654876775e-05, "loss": 80.2652, "step": 95600 }, { "epoch": 0.386276498179923, "grad_norm": 525.5472412109375, "learning_rate": 3.104223243687236e-05, "loss": 46.4954, "step": 95610 }, { "epoch": 0.38631689944529063, "grad_norm": 1135.0772705078125, "learning_rate": 3.1039904003593946e-05, "loss": 68.1692, "step": 95620 }, { "epoch": 0.38635730071065827, "grad_norm": 527.236572265625, "learning_rate": 3.103757535508691e-05, "loss": 47.8166, "step": 95630 }, { "epoch": 0.3863977019760259, "grad_norm": 1182.5828857421875, "learning_rate": 3.103524649139667e-05, "loss": 66.4084, "step": 95640 }, { "epoch": 0.3864381032413935, "grad_norm": 556.0507202148438, "learning_rate": 3.103291741256861e-05, "loss": 61.376, "step": 95650 }, { "epoch": 0.38647850450676113, "grad_norm": 0.0, "learning_rate": 3.103058811864815e-05, "loss": 57.1049, "step": 95660 }, { "epoch": 0.3865189057721288, "grad_norm": 1166.5506591796875, "learning_rate": 3.102825860968069e-05, "loss": 71.8964, "step": 95670 }, { "epoch": 0.3865593070374964, "grad_norm": 438.2193603515625, "learning_rate": 3.102592888571166e-05, "loss": 311.3151, "step": 95680 }, { "epoch": 0.38659970830286405, "grad_norm": 1997.8970947265625, "learning_rate": 3.102359894678645e-05, "loss": 74.6881, "step": 95690 }, { "epoch": 0.3866401095682317, "grad_norm": 487.5499267578125, "learning_rate": 3.1021268792950524e-05, "loss": 74.8267, "step": 95700 }, { "epoch": 0.38668051083359933, "grad_norm": 360.4342346191406, "learning_rate": 3.101893842424928e-05, "loss": 69.5386, "step": 95710 }, { "epoch": 0.3867209120989669, "grad_norm": 716.3482666015625, "learning_rate": 3.101660784072816e-05, "loss": 78.6839, "step": 95720 }, { "epoch": 0.38676131336433456, "grad_norm": 293.6958312988281, "learning_rate": 3.10142770424326e-05, "loss": 63.8406, "step": 95730 }, { "epoch": 0.3868017146297022, "grad_norm": 427.47882080078125, "learning_rate": 3.1011946029408035e-05, "loss": 71.2774, "step": 95740 }, { "epoch": 0.38684211589506984, "grad_norm": 781.7965087890625, "learning_rate": 3.1009614801699914e-05, "loss": 59.1214, "step": 95750 }, { "epoch": 0.3868825171604375, "grad_norm": 654.1803588867188, "learning_rate": 3.100728335935369e-05, "loss": 84.9144, "step": 95760 }, { "epoch": 0.3869229184258051, "grad_norm": 693.10595703125, "learning_rate": 3.10049517024148e-05, "loss": 56.505, "step": 95770 }, { "epoch": 0.3869633196911727, "grad_norm": 450.68280029296875, "learning_rate": 3.1002619830928715e-05, "loss": 40.1912, "step": 95780 }, { "epoch": 0.38700372095654034, "grad_norm": 664.6299438476562, "learning_rate": 3.100028774494089e-05, "loss": 60.4155, "step": 95790 }, { "epoch": 0.387044122221908, "grad_norm": 1185.4459228515625, "learning_rate": 3.09979554444968e-05, "loss": 45.9476, "step": 95800 }, { "epoch": 0.3870845234872756, "grad_norm": 973.5237426757812, "learning_rate": 3.09956229296419e-05, "loss": 75.4969, "step": 95810 }, { "epoch": 0.38712492475264326, "grad_norm": 921.3255004882812, "learning_rate": 3.0993290200421676e-05, "loss": 58.8776, "step": 95820 }, { "epoch": 0.3871653260180109, "grad_norm": 965.7318115234375, "learning_rate": 3.099095725688159e-05, "loss": 55.5938, "step": 95830 }, { "epoch": 0.38720572728337854, "grad_norm": 1331.021240234375, "learning_rate": 3.098862409906714e-05, "loss": 79.6023, "step": 95840 }, { "epoch": 0.3872461285487461, "grad_norm": 560.6642456054688, "learning_rate": 3.09862907270238e-05, "loss": 57.35, "step": 95850 }, { "epoch": 0.38728652981411377, "grad_norm": 604.1090087890625, "learning_rate": 3.098395714079708e-05, "loss": 70.6695, "step": 95860 }, { "epoch": 0.3873269310794814, "grad_norm": 310.1190185546875, "learning_rate": 3.0981623340432446e-05, "loss": 61.8486, "step": 95870 }, { "epoch": 0.38736733234484905, "grad_norm": 540.4014282226562, "learning_rate": 3.097928932597542e-05, "loss": 50.6746, "step": 95880 }, { "epoch": 0.3874077336102167, "grad_norm": 940.4336547851562, "learning_rate": 3.097695509747149e-05, "loss": 77.3138, "step": 95890 }, { "epoch": 0.3874481348755843, "grad_norm": 624.2680053710938, "learning_rate": 3.0974620654966175e-05, "loss": 63.9111, "step": 95900 }, { "epoch": 0.3874885361409519, "grad_norm": 513.7975463867188, "learning_rate": 3.097228599850498e-05, "loss": 71.1971, "step": 95910 }, { "epoch": 0.38752893740631955, "grad_norm": 398.7431335449219, "learning_rate": 3.096995112813341e-05, "loss": 63.1596, "step": 95920 }, { "epoch": 0.3875693386716872, "grad_norm": 432.8017272949219, "learning_rate": 3.0967616043897e-05, "loss": 46.5326, "step": 95930 }, { "epoch": 0.38760973993705483, "grad_norm": 445.6565856933594, "learning_rate": 3.096528074584128e-05, "loss": 58.431, "step": 95940 }, { "epoch": 0.38765014120242247, "grad_norm": 456.55230712890625, "learning_rate": 3.0962945234011755e-05, "loss": 59.9908, "step": 95950 }, { "epoch": 0.3876905424677901, "grad_norm": 472.28472900390625, "learning_rate": 3.096060950845397e-05, "loss": 48.5587, "step": 95960 }, { "epoch": 0.3877309437331577, "grad_norm": 1048.744873046875, "learning_rate": 3.0958273569213456e-05, "loss": 67.3828, "step": 95970 }, { "epoch": 0.38777134499852534, "grad_norm": 1082.98876953125, "learning_rate": 3.095593741633577e-05, "loss": 82.9235, "step": 95980 }, { "epoch": 0.387811746263893, "grad_norm": 530.9251708984375, "learning_rate": 3.095360104986643e-05, "loss": 64.6616, "step": 95990 }, { "epoch": 0.3878521475292606, "grad_norm": 1165.728759765625, "learning_rate": 3.095126446985101e-05, "loss": 79.6478, "step": 96000 }, { "epoch": 0.38789254879462826, "grad_norm": 1305.37158203125, "learning_rate": 3.0948927676335044e-05, "loss": 83.1801, "step": 96010 }, { "epoch": 0.3879329500599959, "grad_norm": 529.7338256835938, "learning_rate": 3.09465906693641e-05, "loss": 76.9614, "step": 96020 }, { "epoch": 0.38797335132536354, "grad_norm": 837.6686401367188, "learning_rate": 3.0944253448983725e-05, "loss": 91.7809, "step": 96030 }, { "epoch": 0.3880137525907311, "grad_norm": 457.90679931640625, "learning_rate": 3.0941916015239505e-05, "loss": 40.1575, "step": 96040 }, { "epoch": 0.38805415385609876, "grad_norm": 667.5697021484375, "learning_rate": 3.0939578368176997e-05, "loss": 51.0978, "step": 96050 }, { "epoch": 0.3880945551214664, "grad_norm": 684.8994750976562, "learning_rate": 3.093724050784178e-05, "loss": 61.2667, "step": 96060 }, { "epoch": 0.38813495638683404, "grad_norm": 405.88916015625, "learning_rate": 3.093490243427942e-05, "loss": 49.8711, "step": 96070 }, { "epoch": 0.3881753576522017, "grad_norm": 485.23602294921875, "learning_rate": 3.093256414753551e-05, "loss": 40.8211, "step": 96080 }, { "epoch": 0.3882157589175693, "grad_norm": 1215.4119873046875, "learning_rate": 3.093022564765564e-05, "loss": 81.3737, "step": 96090 }, { "epoch": 0.3882561601829369, "grad_norm": 568.2235717773438, "learning_rate": 3.092788693468539e-05, "loss": 55.6435, "step": 96100 }, { "epoch": 0.38829656144830454, "grad_norm": 774.2294311523438, "learning_rate": 3.092554800867035e-05, "loss": 42.8646, "step": 96110 }, { "epoch": 0.3883369627136722, "grad_norm": 765.2290649414062, "learning_rate": 3.092320886965614e-05, "loss": 67.741, "step": 96120 }, { "epoch": 0.3883773639790398, "grad_norm": 740.162841796875, "learning_rate": 3.0920869517688336e-05, "loss": 100.8398, "step": 96130 }, { "epoch": 0.38841776524440746, "grad_norm": 1602.5484619140625, "learning_rate": 3.091852995281257e-05, "loss": 82.2206, "step": 96140 }, { "epoch": 0.3884581665097751, "grad_norm": 579.366943359375, "learning_rate": 3.091619017507443e-05, "loss": 56.4242, "step": 96150 }, { "epoch": 0.38849856777514274, "grad_norm": 873.070556640625, "learning_rate": 3.091385018451955e-05, "loss": 84.0062, "step": 96160 }, { "epoch": 0.38853896904051033, "grad_norm": 472.2259521484375, "learning_rate": 3.091150998119354e-05, "loss": 66.6976, "step": 96170 }, { "epoch": 0.38857937030587797, "grad_norm": 2321.2109375, "learning_rate": 3.090916956514203e-05, "loss": 95.4598, "step": 96180 }, { "epoch": 0.3886197715712456, "grad_norm": 929.7415161132812, "learning_rate": 3.090682893641064e-05, "loss": 70.1647, "step": 96190 }, { "epoch": 0.38866017283661325, "grad_norm": 642.71630859375, "learning_rate": 3.090448809504501e-05, "loss": 56.4267, "step": 96200 }, { "epoch": 0.3887005741019809, "grad_norm": 531.6314697265625, "learning_rate": 3.090214704109076e-05, "loss": 41.6155, "step": 96210 }, { "epoch": 0.38874097536734853, "grad_norm": 1281.093505859375, "learning_rate": 3.089980577459354e-05, "loss": 76.9379, "step": 96220 }, { "epoch": 0.3887813766327161, "grad_norm": 184.43612670898438, "learning_rate": 3.0897464295599e-05, "loss": 61.2117, "step": 96230 }, { "epoch": 0.38882177789808375, "grad_norm": 752.0773315429688, "learning_rate": 3.0895122604152784e-05, "loss": 69.0763, "step": 96240 }, { "epoch": 0.3888621791634514, "grad_norm": 1775.0201416015625, "learning_rate": 3.0892780700300544e-05, "loss": 57.2486, "step": 96250 }, { "epoch": 0.38890258042881903, "grad_norm": 431.5108642578125, "learning_rate": 3.0890438584087936e-05, "loss": 42.2329, "step": 96260 }, { "epoch": 0.3889429816941867, "grad_norm": 1414.23046875, "learning_rate": 3.088809625556062e-05, "loss": 43.4289, "step": 96270 }, { "epoch": 0.3889833829595543, "grad_norm": 717.4227905273438, "learning_rate": 3.088575371476426e-05, "loss": 52.6565, "step": 96280 }, { "epoch": 0.3890237842249219, "grad_norm": 547.7136840820312, "learning_rate": 3.0883410961744536e-05, "loss": 43.894, "step": 96290 }, { "epoch": 0.38906418549028954, "grad_norm": 648.9277954101562, "learning_rate": 3.08810679965471e-05, "loss": 75.1155, "step": 96300 }, { "epoch": 0.3891045867556572, "grad_norm": 1267.9052734375, "learning_rate": 3.087872481921765e-05, "loss": 105.556, "step": 96310 }, { "epoch": 0.3891449880210248, "grad_norm": 1205.15234375, "learning_rate": 3.087638142980185e-05, "loss": 64.842, "step": 96320 }, { "epoch": 0.38918538928639246, "grad_norm": 394.9377746582031, "learning_rate": 3.0874037828345406e-05, "loss": 61.5966, "step": 96330 }, { "epoch": 0.3892257905517601, "grad_norm": 1072.0274658203125, "learning_rate": 3.0871694014893985e-05, "loss": 63.1424, "step": 96340 }, { "epoch": 0.38926619181712774, "grad_norm": 830.2392578125, "learning_rate": 3.08693499894933e-05, "loss": 47.6457, "step": 96350 }, { "epoch": 0.3893065930824953, "grad_norm": 832.5903930664062, "learning_rate": 3.0867005752189037e-05, "loss": 68.6419, "step": 96360 }, { "epoch": 0.38934699434786296, "grad_norm": 576.5301513671875, "learning_rate": 3.086466130302691e-05, "loss": 85.2657, "step": 96370 }, { "epoch": 0.3893873956132306, "grad_norm": 896.1205444335938, "learning_rate": 3.086231664205261e-05, "loss": 65.5966, "step": 96380 }, { "epoch": 0.38942779687859824, "grad_norm": 477.4507751464844, "learning_rate": 3.085997176931185e-05, "loss": 52.3738, "step": 96390 }, { "epoch": 0.3894681981439659, "grad_norm": 638.5934448242188, "learning_rate": 3.0857626684850355e-05, "loss": 88.827, "step": 96400 }, { "epoch": 0.3895085994093335, "grad_norm": 1253.0484619140625, "learning_rate": 3.085528138871384e-05, "loss": 44.4382, "step": 96410 }, { "epoch": 0.3895490006747011, "grad_norm": 620.13623046875, "learning_rate": 3.085293588094802e-05, "loss": 55.8784, "step": 96420 }, { "epoch": 0.38958940194006875, "grad_norm": 458.2687072753906, "learning_rate": 3.085059016159863e-05, "loss": 44.469, "step": 96430 }, { "epoch": 0.3896298032054364, "grad_norm": 1276.606689453125, "learning_rate": 3.08482442307114e-05, "loss": 50.4212, "step": 96440 }, { "epoch": 0.389670204470804, "grad_norm": 1068.017578125, "learning_rate": 3.084589808833206e-05, "loss": 58.046, "step": 96450 }, { "epoch": 0.38971060573617167, "grad_norm": 1498.3258056640625, "learning_rate": 3.084355173450636e-05, "loss": 45.1507, "step": 96460 }, { "epoch": 0.3897510070015393, "grad_norm": 1129.8160400390625, "learning_rate": 3.084120516928003e-05, "loss": 49.5181, "step": 96470 }, { "epoch": 0.38979140826690695, "grad_norm": 323.3406982421875, "learning_rate": 3.0838858392698825e-05, "loss": 33.0907, "step": 96480 }, { "epoch": 0.38983180953227453, "grad_norm": 1923.0677490234375, "learning_rate": 3.08365114048085e-05, "loss": 69.1391, "step": 96490 }, { "epoch": 0.38987221079764217, "grad_norm": 1291.444580078125, "learning_rate": 3.08341642056548e-05, "loss": 63.5927, "step": 96500 }, { "epoch": 0.3899126120630098, "grad_norm": 527.068359375, "learning_rate": 3.083181679528349e-05, "loss": 48.1131, "step": 96510 }, { "epoch": 0.38995301332837745, "grad_norm": 575.4950561523438, "learning_rate": 3.0829469173740344e-05, "loss": 60.9173, "step": 96520 }, { "epoch": 0.3899934145937451, "grad_norm": 1124.62744140625, "learning_rate": 3.082712134107111e-05, "loss": 63.2891, "step": 96530 }, { "epoch": 0.39003381585911273, "grad_norm": 2542.8984375, "learning_rate": 3.0824773297321585e-05, "loss": 85.4169, "step": 96540 }, { "epoch": 0.3900742171244803, "grad_norm": 523.2858276367188, "learning_rate": 3.082242504253752e-05, "loss": 39.4402, "step": 96550 }, { "epoch": 0.39011461838984796, "grad_norm": 902.9794311523438, "learning_rate": 3.082007657676471e-05, "loss": 52.0898, "step": 96560 }, { "epoch": 0.3901550196552156, "grad_norm": 1041.2705078125, "learning_rate": 3.081772790004894e-05, "loss": 53.8274, "step": 96570 }, { "epoch": 0.39019542092058324, "grad_norm": 767.1126098632812, "learning_rate": 3.081537901243599e-05, "loss": 59.9269, "step": 96580 }, { "epoch": 0.3902358221859509, "grad_norm": 1110.6146240234375, "learning_rate": 3.081302991397166e-05, "loss": 56.435, "step": 96590 }, { "epoch": 0.3902762234513185, "grad_norm": 398.89532470703125, "learning_rate": 3.081068060470174e-05, "loss": 66.5738, "step": 96600 }, { "epoch": 0.3903166247166861, "grad_norm": 1773.4658203125, "learning_rate": 3.080833108467204e-05, "loss": 90.1683, "step": 96610 }, { "epoch": 0.39035702598205374, "grad_norm": 671.03271484375, "learning_rate": 3.080598135392837e-05, "loss": 75.9407, "step": 96620 }, { "epoch": 0.3903974272474214, "grad_norm": 1317.63623046875, "learning_rate": 3.080363141251652e-05, "loss": 70.7445, "step": 96630 }, { "epoch": 0.390437828512789, "grad_norm": 946.417724609375, "learning_rate": 3.080128126048232e-05, "loss": 89.1544, "step": 96640 }, { "epoch": 0.39047822977815666, "grad_norm": 1541.2022705078125, "learning_rate": 3.0798930897871576e-05, "loss": 72.9328, "step": 96650 }, { "epoch": 0.3905186310435243, "grad_norm": 589.8878173828125, "learning_rate": 3.079658032473011e-05, "loss": 45.5723, "step": 96660 }, { "epoch": 0.39055903230889194, "grad_norm": 699.7723999023438, "learning_rate": 3.079422954110376e-05, "loss": 65.5379, "step": 96670 }, { "epoch": 0.3905994335742595, "grad_norm": 956.462646484375, "learning_rate": 3.0791878547038345e-05, "loss": 106.3264, "step": 96680 }, { "epoch": 0.39063983483962716, "grad_norm": 381.62884521484375, "learning_rate": 3.0789527342579705e-05, "loss": 60.4416, "step": 96690 }, { "epoch": 0.3906802361049948, "grad_norm": 853.3964233398438, "learning_rate": 3.078717592777367e-05, "loss": 56.7422, "step": 96700 }, { "epoch": 0.39072063737036244, "grad_norm": 980.0914916992188, "learning_rate": 3.078482430266609e-05, "loss": 70.416, "step": 96710 }, { "epoch": 0.3907610386357301, "grad_norm": 391.09124755859375, "learning_rate": 3.07824724673028e-05, "loss": 67.9484, "step": 96720 }, { "epoch": 0.3908014399010977, "grad_norm": 548.7298583984375, "learning_rate": 3.078012042172967e-05, "loss": 79.7029, "step": 96730 }, { "epoch": 0.3908418411664653, "grad_norm": 487.2161865234375, "learning_rate": 3.077776816599253e-05, "loss": 44.6873, "step": 96740 }, { "epoch": 0.39088224243183295, "grad_norm": 810.355224609375, "learning_rate": 3.0775415700137266e-05, "loss": 81.676, "step": 96750 }, { "epoch": 0.3909226436972006, "grad_norm": 484.19561767578125, "learning_rate": 3.077306302420971e-05, "loss": 47.9547, "step": 96760 }, { "epoch": 0.39096304496256823, "grad_norm": 1085.0274658203125, "learning_rate": 3.0770710138255754e-05, "loss": 59.3709, "step": 96770 }, { "epoch": 0.39100344622793587, "grad_norm": 805.6069946289062, "learning_rate": 3.076835704232126e-05, "loss": 57.6012, "step": 96780 }, { "epoch": 0.3910438474933035, "grad_norm": 517.8592529296875, "learning_rate": 3.076600373645209e-05, "loss": 58.5938, "step": 96790 }, { "epoch": 0.39108424875867115, "grad_norm": 1210.808349609375, "learning_rate": 3.0763650220694145e-05, "loss": 82.4248, "step": 96800 }, { "epoch": 0.39112465002403873, "grad_norm": 493.9080505371094, "learning_rate": 3.0761296495093294e-05, "loss": 82.6033, "step": 96810 }, { "epoch": 0.3911650512894064, "grad_norm": 649.4231567382812, "learning_rate": 3.075894255969543e-05, "loss": 45.7087, "step": 96820 }, { "epoch": 0.391205452554774, "grad_norm": 1165.13671875, "learning_rate": 3.075658841454644e-05, "loss": 46.0291, "step": 96830 }, { "epoch": 0.39124585382014165, "grad_norm": 1765.5262451171875, "learning_rate": 3.075423405969222e-05, "loss": 49.481, "step": 96840 }, { "epoch": 0.3912862550855093, "grad_norm": 1000.0485229492188, "learning_rate": 3.075187949517867e-05, "loss": 69.4488, "step": 96850 }, { "epoch": 0.39132665635087693, "grad_norm": 1381.7174072265625, "learning_rate": 3.07495247210517e-05, "loss": 90.2943, "step": 96860 }, { "epoch": 0.3913670576162445, "grad_norm": 1334.7869873046875, "learning_rate": 3.07471697373572e-05, "loss": 59.9629, "step": 96870 }, { "epoch": 0.39140745888161216, "grad_norm": 530.7562255859375, "learning_rate": 3.0744814544141096e-05, "loss": 80.5092, "step": 96880 }, { "epoch": 0.3914478601469798, "grad_norm": 398.388671875, "learning_rate": 3.07424591414493e-05, "loss": 88.7593, "step": 96890 }, { "epoch": 0.39148826141234744, "grad_norm": 830.0609741210938, "learning_rate": 3.0740103529327744e-05, "loss": 79.1814, "step": 96900 }, { "epoch": 0.3915286626777151, "grad_norm": 0.0, "learning_rate": 3.0737747707822334e-05, "loss": 45.4471, "step": 96910 }, { "epoch": 0.3915690639430827, "grad_norm": 781.1683349609375, "learning_rate": 3.0735391676978993e-05, "loss": 49.7049, "step": 96920 }, { "epoch": 0.3916094652084503, "grad_norm": 841.4095458984375, "learning_rate": 3.0733035436843684e-05, "loss": 74.8256, "step": 96930 }, { "epoch": 0.39164986647381794, "grad_norm": 909.210693359375, "learning_rate": 3.073067898746231e-05, "loss": 70.243, "step": 96940 }, { "epoch": 0.3916902677391856, "grad_norm": 695.3448486328125, "learning_rate": 3.0728322328880825e-05, "loss": 42.8311, "step": 96950 }, { "epoch": 0.3917306690045532, "grad_norm": 480.36798095703125, "learning_rate": 3.0725965461145175e-05, "loss": 64.1772, "step": 96960 }, { "epoch": 0.39177107026992086, "grad_norm": 376.0433349609375, "learning_rate": 3.072360838430131e-05, "loss": 57.2788, "step": 96970 }, { "epoch": 0.3918114715352885, "grad_norm": 926.780029296875, "learning_rate": 3.0721251098395175e-05, "loss": 62.5882, "step": 96980 }, { "epoch": 0.39185187280065614, "grad_norm": 625.6805419921875, "learning_rate": 3.0718893603472725e-05, "loss": 53.4292, "step": 96990 }, { "epoch": 0.3918922740660237, "grad_norm": 733.6526489257812, "learning_rate": 3.0716535899579936e-05, "loss": 48.0489, "step": 97000 }, { "epoch": 0.39193267533139137, "grad_norm": 882.0390625, "learning_rate": 3.071417798676276e-05, "loss": 60.4882, "step": 97010 }, { "epoch": 0.391973076596759, "grad_norm": 218.1538543701172, "learning_rate": 3.0711819865067165e-05, "loss": 66.0629, "step": 97020 }, { "epoch": 0.39201347786212665, "grad_norm": 232.99609375, "learning_rate": 3.070946153453913e-05, "loss": 50.7751, "step": 97030 }, { "epoch": 0.3920538791274943, "grad_norm": 958.5475463867188, "learning_rate": 3.070710299522462e-05, "loss": 48.1551, "step": 97040 }, { "epoch": 0.3920942803928619, "grad_norm": 574.308837890625, "learning_rate": 3.070474424716963e-05, "loss": 63.7208, "step": 97050 }, { "epoch": 0.3921346816582295, "grad_norm": 977.0123901367188, "learning_rate": 3.0702385290420145e-05, "loss": 67.0578, "step": 97060 }, { "epoch": 0.39217508292359715, "grad_norm": 2389.80859375, "learning_rate": 3.070002612502215e-05, "loss": 61.1409, "step": 97070 }, { "epoch": 0.3922154841889648, "grad_norm": 498.7288818359375, "learning_rate": 3.069766675102162e-05, "loss": 48.6175, "step": 97080 }, { "epoch": 0.39225588545433243, "grad_norm": 1029.843994140625, "learning_rate": 3.0695307168464584e-05, "loss": 69.121, "step": 97090 }, { "epoch": 0.39229628671970007, "grad_norm": 618.8443603515625, "learning_rate": 3.069294737739703e-05, "loss": 76.3978, "step": 97100 }, { "epoch": 0.3923366879850677, "grad_norm": 963.9727172851562, "learning_rate": 3.069058737786496e-05, "loss": 49.647, "step": 97110 }, { "epoch": 0.39237708925043535, "grad_norm": 422.73382568359375, "learning_rate": 3.068822716991438e-05, "loss": 51.2614, "step": 97120 }, { "epoch": 0.39241749051580294, "grad_norm": 1414.1259765625, "learning_rate": 3.068586675359131e-05, "loss": 74.2478, "step": 97130 }, { "epoch": 0.3924578917811706, "grad_norm": 1767.93017578125, "learning_rate": 3.0683506128941774e-05, "loss": 59.4232, "step": 97140 }, { "epoch": 0.3924982930465382, "grad_norm": 662.4166870117188, "learning_rate": 3.0681145296011774e-05, "loss": 78.8385, "step": 97150 }, { "epoch": 0.39253869431190586, "grad_norm": 504.7801208496094, "learning_rate": 3.067878425484735e-05, "loss": 52.6057, "step": 97160 }, { "epoch": 0.3925790955772735, "grad_norm": 617.1817016601562, "learning_rate": 3.067642300549453e-05, "loss": 48.8813, "step": 97170 }, { "epoch": 0.39261949684264114, "grad_norm": 898.6566772460938, "learning_rate": 3.067406154799935e-05, "loss": 56.1757, "step": 97180 }, { "epoch": 0.3926598981080087, "grad_norm": 545.7286987304688, "learning_rate": 3.0671699882407845e-05, "loss": 77.3499, "step": 97190 }, { "epoch": 0.39270029937337636, "grad_norm": 533.9075927734375, "learning_rate": 3.066933800876606e-05, "loss": 60.7204, "step": 97200 }, { "epoch": 0.392740700638744, "grad_norm": 599.5408325195312, "learning_rate": 3.066697592712003e-05, "loss": 52.177, "step": 97210 }, { "epoch": 0.39278110190411164, "grad_norm": 304.1415710449219, "learning_rate": 3.066461363751582e-05, "loss": 43.3299, "step": 97220 }, { "epoch": 0.3928215031694793, "grad_norm": 899.7566528320312, "learning_rate": 3.066225113999946e-05, "loss": 63.157, "step": 97230 }, { "epoch": 0.3928619044348469, "grad_norm": 364.7635803222656, "learning_rate": 3.0659888434617045e-05, "loss": 58.8745, "step": 97240 }, { "epoch": 0.3929023057002145, "grad_norm": 661.5354614257812, "learning_rate": 3.06575255214146e-05, "loss": 43.469, "step": 97250 }, { "epoch": 0.39294270696558214, "grad_norm": 199.37904357910156, "learning_rate": 3.0655162400438214e-05, "loss": 70.2049, "step": 97260 }, { "epoch": 0.3929831082309498, "grad_norm": 2784.505615234375, "learning_rate": 3.0652799071733956e-05, "loss": 69.4415, "step": 97270 }, { "epoch": 0.3930235094963174, "grad_norm": 2461.99951171875, "learning_rate": 3.06504355353479e-05, "loss": 87.7411, "step": 97280 }, { "epoch": 0.39306391076168506, "grad_norm": 1862.85400390625, "learning_rate": 3.0648071791326114e-05, "loss": 56.1075, "step": 97290 }, { "epoch": 0.3931043120270527, "grad_norm": 2298.992431640625, "learning_rate": 3.064570783971468e-05, "loss": 87.6112, "step": 97300 }, { "epoch": 0.39314471329242034, "grad_norm": 1390.7154541015625, "learning_rate": 3.0643343680559704e-05, "loss": 77.7914, "step": 97310 }, { "epoch": 0.39318511455778793, "grad_norm": 696.4583740234375, "learning_rate": 3.064097931390725e-05, "loss": 55.1994, "step": 97320 }, { "epoch": 0.39322551582315557, "grad_norm": 442.60455322265625, "learning_rate": 3.0638614739803435e-05, "loss": 54.2116, "step": 97330 }, { "epoch": 0.3932659170885232, "grad_norm": 1173.42431640625, "learning_rate": 3.063624995829434e-05, "loss": 57.2077, "step": 97340 }, { "epoch": 0.39330631835389085, "grad_norm": 678.0792236328125, "learning_rate": 3.0633884969426085e-05, "loss": 39.9277, "step": 97350 }, { "epoch": 0.3933467196192585, "grad_norm": 648.6259765625, "learning_rate": 3.063151977324476e-05, "loss": 83.9109, "step": 97360 }, { "epoch": 0.39338712088462613, "grad_norm": 984.8261108398438, "learning_rate": 3.0629154369796494e-05, "loss": 61.3448, "step": 97370 }, { "epoch": 0.3934275221499937, "grad_norm": 1044.814208984375, "learning_rate": 3.0626788759127384e-05, "loss": 60.0398, "step": 97380 }, { "epoch": 0.39346792341536135, "grad_norm": 487.1873779296875, "learning_rate": 3.062442294128356e-05, "loss": 61.3103, "step": 97390 }, { "epoch": 0.393508324680729, "grad_norm": 865.4752807617188, "learning_rate": 3.0622056916311134e-05, "loss": 90.9223, "step": 97400 }, { "epoch": 0.39354872594609663, "grad_norm": 667.01123046875, "learning_rate": 3.061969068425625e-05, "loss": 65.7805, "step": 97410 }, { "epoch": 0.3935891272114643, "grad_norm": 314.469482421875, "learning_rate": 3.0617324245165014e-05, "loss": 67.8273, "step": 97420 }, { "epoch": 0.3936295284768319, "grad_norm": 1064.36181640625, "learning_rate": 3.061495759908359e-05, "loss": 74.8667, "step": 97430 }, { "epoch": 0.3936699297421995, "grad_norm": 2008.50537109375, "learning_rate": 3.0612590746058094e-05, "loss": 96.3463, "step": 97440 }, { "epoch": 0.39371033100756714, "grad_norm": 670.7850952148438, "learning_rate": 3.0610223686134685e-05, "loss": 74.3781, "step": 97450 }, { "epoch": 0.3937507322729348, "grad_norm": 371.1806640625, "learning_rate": 3.06078564193595e-05, "loss": 58.2147, "step": 97460 }, { "epoch": 0.3937911335383024, "grad_norm": 210.1017608642578, "learning_rate": 3.060548894577869e-05, "loss": 50.8013, "step": 97470 }, { "epoch": 0.39383153480367006, "grad_norm": 1518.7855224609375, "learning_rate": 3.060312126543842e-05, "loss": 62.9751, "step": 97480 }, { "epoch": 0.3938719360690377, "grad_norm": 422.8009033203125, "learning_rate": 3.060075337838483e-05, "loss": 41.03, "step": 97490 }, { "epoch": 0.39391233733440534, "grad_norm": 1671.2236328125, "learning_rate": 3.05983852846641e-05, "loss": 60.0833, "step": 97500 }, { "epoch": 0.3939527385997729, "grad_norm": 1641.0908203125, "learning_rate": 3.05960169843224e-05, "loss": 67.803, "step": 97510 }, { "epoch": 0.39399313986514056, "grad_norm": 810.8970947265625, "learning_rate": 3.0593648477405885e-05, "loss": 76.5214, "step": 97520 }, { "epoch": 0.3940335411305082, "grad_norm": 1572.5703125, "learning_rate": 3.0591279763960735e-05, "loss": 68.239, "step": 97530 }, { "epoch": 0.39407394239587584, "grad_norm": 717.9578857421875, "learning_rate": 3.058891084403315e-05, "loss": 83.6105, "step": 97540 }, { "epoch": 0.3941143436612435, "grad_norm": 220.7050018310547, "learning_rate": 3.0586541717669286e-05, "loss": 51.4329, "step": 97550 }, { "epoch": 0.3941547449266111, "grad_norm": 1055.88671875, "learning_rate": 3.058417238491534e-05, "loss": 53.919, "step": 97560 }, { "epoch": 0.3941951461919787, "grad_norm": 330.67095947265625, "learning_rate": 3.058180284581751e-05, "loss": 52.7184, "step": 97570 }, { "epoch": 0.39423554745734635, "grad_norm": 620.1004638671875, "learning_rate": 3.0579433100421974e-05, "loss": 52.7724, "step": 97580 }, { "epoch": 0.394275948722714, "grad_norm": 1500.8912353515625, "learning_rate": 3.0577063148774955e-05, "loss": 52.9456, "step": 97590 }, { "epoch": 0.3943163499880816, "grad_norm": 718.37060546875, "learning_rate": 3.057469299092264e-05, "loss": 89.2843, "step": 97600 }, { "epoch": 0.39435675125344927, "grad_norm": 690.8677368164062, "learning_rate": 3.0572322626911235e-05, "loss": 58.6851, "step": 97610 }, { "epoch": 0.3943971525188169, "grad_norm": 586.640380859375, "learning_rate": 3.056995205678697e-05, "loss": 49.8112, "step": 97620 }, { "epoch": 0.39443755378418455, "grad_norm": 488.9980163574219, "learning_rate": 3.056758128059603e-05, "loss": 68.7142, "step": 97630 }, { "epoch": 0.39447795504955213, "grad_norm": 1710.490966796875, "learning_rate": 3.056521029838467e-05, "loss": 63.958, "step": 97640 }, { "epoch": 0.39451835631491977, "grad_norm": 1692.4881591796875, "learning_rate": 3.056283911019909e-05, "loss": 98.5401, "step": 97650 }, { "epoch": 0.3945587575802874, "grad_norm": 545.5447387695312, "learning_rate": 3.056046771608552e-05, "loss": 49.8526, "step": 97660 }, { "epoch": 0.39459915884565505, "grad_norm": 1038.90478515625, "learning_rate": 3.0558096116090206e-05, "loss": 74.6395, "step": 97670 }, { "epoch": 0.3946395601110227, "grad_norm": 1187.12109375, "learning_rate": 3.055572431025936e-05, "loss": 64.7024, "step": 97680 }, { "epoch": 0.39467996137639033, "grad_norm": 892.6353759765625, "learning_rate": 3.055335229863924e-05, "loss": 50.8247, "step": 97690 }, { "epoch": 0.3947203626417579, "grad_norm": 588.580322265625, "learning_rate": 3.0550980081276075e-05, "loss": 44.0533, "step": 97700 }, { "epoch": 0.39476076390712556, "grad_norm": 586.703857421875, "learning_rate": 3.054860765821613e-05, "loss": 65.8129, "step": 97710 }, { "epoch": 0.3948011651724932, "grad_norm": 501.59954833984375, "learning_rate": 3.054623502950565e-05, "loss": 54.969, "step": 97720 }, { "epoch": 0.39484156643786084, "grad_norm": 2821.66357421875, "learning_rate": 3.054386219519088e-05, "loss": 90.8097, "step": 97730 }, { "epoch": 0.3948819677032285, "grad_norm": 957.9890747070312, "learning_rate": 3.0541489155318096e-05, "loss": 73.9644, "step": 97740 }, { "epoch": 0.3949223689685961, "grad_norm": 2029.6026611328125, "learning_rate": 3.0539115909933554e-05, "loss": 70.6401, "step": 97750 }, { "epoch": 0.3949627702339637, "grad_norm": 469.2944030761719, "learning_rate": 3.053674245908352e-05, "loss": 71.6168, "step": 97760 }, { "epoch": 0.39500317149933134, "grad_norm": 521.1866455078125, "learning_rate": 3.0534368802814266e-05, "loss": 60.3303, "step": 97770 }, { "epoch": 0.395043572764699, "grad_norm": 829.1405639648438, "learning_rate": 3.053199494117206e-05, "loss": 59.322, "step": 97780 }, { "epoch": 0.3950839740300666, "grad_norm": 768.0505981445312, "learning_rate": 3.0529620874203204e-05, "loss": 80.8334, "step": 97790 }, { "epoch": 0.39512437529543426, "grad_norm": 376.0151672363281, "learning_rate": 3.052724660195396e-05, "loss": 40.3237, "step": 97800 }, { "epoch": 0.3951647765608019, "grad_norm": 0.0, "learning_rate": 3.052487212447063e-05, "loss": 76.3937, "step": 97810 }, { "epoch": 0.39520517782616954, "grad_norm": 284.3887023925781, "learning_rate": 3.05224974417995e-05, "loss": 48.1156, "step": 97820 }, { "epoch": 0.3952455790915371, "grad_norm": 613.3410034179688, "learning_rate": 3.0520122553986855e-05, "loss": 58.0234, "step": 97830 }, { "epoch": 0.39528598035690476, "grad_norm": 1629.0135498046875, "learning_rate": 3.051774746107901e-05, "loss": 82.4246, "step": 97840 }, { "epoch": 0.3953263816222724, "grad_norm": 217.6327667236328, "learning_rate": 3.051537216312227e-05, "loss": 55.1513, "step": 97850 }, { "epoch": 0.39536678288764004, "grad_norm": 827.3215942382812, "learning_rate": 3.0512996660162923e-05, "loss": 49.9513, "step": 97860 }, { "epoch": 0.3954071841530077, "grad_norm": 958.5142822265625, "learning_rate": 3.05106209522473e-05, "loss": 64.4401, "step": 97870 }, { "epoch": 0.3954475854183753, "grad_norm": 453.5467834472656, "learning_rate": 3.0508245039421713e-05, "loss": 39.7421, "step": 97880 }, { "epoch": 0.3954879866837429, "grad_norm": 733.8382568359375, "learning_rate": 3.050586892173248e-05, "loss": 61.8497, "step": 97890 }, { "epoch": 0.39552838794911055, "grad_norm": 1093.428955078125, "learning_rate": 3.0503492599225918e-05, "loss": 101.909, "step": 97900 }, { "epoch": 0.3955687892144782, "grad_norm": 477.01220703125, "learning_rate": 3.0501116071948363e-05, "loss": 63.0303, "step": 97910 }, { "epoch": 0.39560919047984583, "grad_norm": 235.736572265625, "learning_rate": 3.0498739339946143e-05, "loss": 43.2636, "step": 97920 }, { "epoch": 0.39564959174521347, "grad_norm": 570.0729370117188, "learning_rate": 3.0496362403265594e-05, "loss": 61.491, "step": 97930 }, { "epoch": 0.3956899930105811, "grad_norm": 1310.9716796875, "learning_rate": 3.0493985261953056e-05, "loss": 111.6765, "step": 97940 }, { "epoch": 0.39573039427594875, "grad_norm": 589.3435668945312, "learning_rate": 3.0491607916054867e-05, "loss": 42.1561, "step": 97950 }, { "epoch": 0.39577079554131633, "grad_norm": 1587.877685546875, "learning_rate": 3.048923036561739e-05, "loss": 58.7071, "step": 97960 }, { "epoch": 0.395811196806684, "grad_norm": 567.8264770507812, "learning_rate": 3.0486852610686967e-05, "loss": 64.5263, "step": 97970 }, { "epoch": 0.3958515980720516, "grad_norm": 651.44287109375, "learning_rate": 3.0484474651309947e-05, "loss": 54.3802, "step": 97980 }, { "epoch": 0.39589199933741925, "grad_norm": 1693.81494140625, "learning_rate": 3.04820964875327e-05, "loss": 48.9196, "step": 97990 }, { "epoch": 0.3959324006027869, "grad_norm": 304.3753967285156, "learning_rate": 3.0479718119401588e-05, "loss": 68.3677, "step": 98000 }, { "epoch": 0.39597280186815453, "grad_norm": 679.2041015625, "learning_rate": 3.0477339546962974e-05, "loss": 40.7732, "step": 98010 }, { "epoch": 0.3960132031335221, "grad_norm": 432.08453369140625, "learning_rate": 3.0474960770263223e-05, "loss": 65.5436, "step": 98020 }, { "epoch": 0.39605360439888976, "grad_norm": 646.598876953125, "learning_rate": 3.0472581789348728e-05, "loss": 85.996, "step": 98030 }, { "epoch": 0.3960940056642574, "grad_norm": 449.9706726074219, "learning_rate": 3.0470202604265858e-05, "loss": 49.8928, "step": 98040 }, { "epoch": 0.39613440692962504, "grad_norm": 1422.8955078125, "learning_rate": 3.0467823215060997e-05, "loss": 52.3219, "step": 98050 }, { "epoch": 0.3961748081949927, "grad_norm": 512.1029052734375, "learning_rate": 3.046544362178053e-05, "loss": 55.1474, "step": 98060 }, { "epoch": 0.3962152094603603, "grad_norm": 421.8543395996094, "learning_rate": 3.0463063824470854e-05, "loss": 64.9397, "step": 98070 }, { "epoch": 0.3962556107257279, "grad_norm": 1446.7802734375, "learning_rate": 3.0460683823178366e-05, "loss": 85.9275, "step": 98080 }, { "epoch": 0.39629601199109554, "grad_norm": 751.712158203125, "learning_rate": 3.045830361794946e-05, "loss": 53.0268, "step": 98090 }, { "epoch": 0.3963364132564632, "grad_norm": 984.9591064453125, "learning_rate": 3.0455923208830532e-05, "loss": 66.3646, "step": 98100 }, { "epoch": 0.3963768145218308, "grad_norm": 952.146240234375, "learning_rate": 3.0453542595868008e-05, "loss": 45.042, "step": 98110 }, { "epoch": 0.39641721578719846, "grad_norm": 1196.6591796875, "learning_rate": 3.045116177910829e-05, "loss": 65.4604, "step": 98120 }, { "epoch": 0.3964576170525661, "grad_norm": 808.1182861328125, "learning_rate": 3.044878075859778e-05, "loss": 72.1062, "step": 98130 }, { "epoch": 0.39649801831793374, "grad_norm": 558.8911743164062, "learning_rate": 3.0446399534382923e-05, "loss": 42.1433, "step": 98140 }, { "epoch": 0.3965384195833013, "grad_norm": 771.634521484375, "learning_rate": 3.0444018106510122e-05, "loss": 54.9019, "step": 98150 }, { "epoch": 0.39657882084866897, "grad_norm": 369.722412109375, "learning_rate": 3.0441636475025816e-05, "loss": 56.4451, "step": 98160 }, { "epoch": 0.3966192221140366, "grad_norm": 693.5859375, "learning_rate": 3.0439254639976427e-05, "loss": 71.9817, "step": 98170 }, { "epoch": 0.39665962337940425, "grad_norm": 650.7984619140625, "learning_rate": 3.0436872601408392e-05, "loss": 63.2401, "step": 98180 }, { "epoch": 0.3967000246447719, "grad_norm": 570.8035888671875, "learning_rate": 3.0434490359368155e-05, "loss": 51.4647, "step": 98190 }, { "epoch": 0.3967404259101395, "grad_norm": 1090.2740478515625, "learning_rate": 3.0432107913902162e-05, "loss": 66.7061, "step": 98200 }, { "epoch": 0.3967808271755071, "grad_norm": 1143.412353515625, "learning_rate": 3.0429725265056843e-05, "loss": 48.8705, "step": 98210 }, { "epoch": 0.39682122844087475, "grad_norm": 892.337158203125, "learning_rate": 3.0427342412878666e-05, "loss": 39.7136, "step": 98220 }, { "epoch": 0.3968616297062424, "grad_norm": 858.3418579101562, "learning_rate": 3.042495935741409e-05, "loss": 52.7396, "step": 98230 }, { "epoch": 0.39690203097161003, "grad_norm": 1290.9305419921875, "learning_rate": 3.0422576098709554e-05, "loss": 61.2084, "step": 98240 }, { "epoch": 0.39694243223697767, "grad_norm": 867.2406005859375, "learning_rate": 3.0420192636811535e-05, "loss": 60.7522, "step": 98250 }, { "epoch": 0.3969828335023453, "grad_norm": 968.0877075195312, "learning_rate": 3.0417808971766488e-05, "loss": 66.9356, "step": 98260 }, { "epoch": 0.39702323476771295, "grad_norm": 539.1856079101562, "learning_rate": 3.0415425103620893e-05, "loss": 52.8227, "step": 98270 }, { "epoch": 0.39706363603308054, "grad_norm": 1524.3682861328125, "learning_rate": 3.041304103242123e-05, "loss": 65.6602, "step": 98280 }, { "epoch": 0.3971040372984482, "grad_norm": 585.87158203125, "learning_rate": 3.041065675821397e-05, "loss": 60.9574, "step": 98290 }, { "epoch": 0.3971444385638158, "grad_norm": 426.2937927246094, "learning_rate": 3.0408272281045598e-05, "loss": 52.4128, "step": 98300 }, { "epoch": 0.39718483982918346, "grad_norm": 905.8636474609375, "learning_rate": 3.0405887600962592e-05, "loss": 62.0832, "step": 98310 }, { "epoch": 0.3972252410945511, "grad_norm": 684.369873046875, "learning_rate": 3.0403502718011456e-05, "loss": 54.1064, "step": 98320 }, { "epoch": 0.39726564235991874, "grad_norm": 713.6025390625, "learning_rate": 3.040111763223868e-05, "loss": 46.3603, "step": 98330 }, { "epoch": 0.3973060436252863, "grad_norm": 483.4386901855469, "learning_rate": 3.0398732343690755e-05, "loss": 55.9319, "step": 98340 }, { "epoch": 0.39734644489065396, "grad_norm": 411.5086669921875, "learning_rate": 3.039634685241419e-05, "loss": 48.6646, "step": 98350 }, { "epoch": 0.3973868461560216, "grad_norm": 785.1961669921875, "learning_rate": 3.0393961158455494e-05, "loss": 74.4425, "step": 98360 }, { "epoch": 0.39742724742138924, "grad_norm": 511.2066650390625, "learning_rate": 3.0391575261861175e-05, "loss": 58.8918, "step": 98370 }, { "epoch": 0.3974676486867569, "grad_norm": 531.2205810546875, "learning_rate": 3.0389189162677746e-05, "loss": 50.1892, "step": 98380 }, { "epoch": 0.3975080499521245, "grad_norm": 1097.158203125, "learning_rate": 3.0386802860951722e-05, "loss": 71.8221, "step": 98390 }, { "epoch": 0.3975484512174921, "grad_norm": 1047.31982421875, "learning_rate": 3.038441635672963e-05, "loss": 68.5992, "step": 98400 }, { "epoch": 0.39758885248285974, "grad_norm": 752.1648559570312, "learning_rate": 3.0382029650058e-05, "loss": 61.804, "step": 98410 }, { "epoch": 0.3976292537482274, "grad_norm": 765.8179931640625, "learning_rate": 3.037964274098335e-05, "loss": 53.9927, "step": 98420 }, { "epoch": 0.397669655013595, "grad_norm": 1045.579345703125, "learning_rate": 3.0377255629552222e-05, "loss": 61.845, "step": 98430 }, { "epoch": 0.39771005627896266, "grad_norm": 386.06671142578125, "learning_rate": 3.037486831581115e-05, "loss": 58.5219, "step": 98440 }, { "epoch": 0.3977504575443303, "grad_norm": 904.6885986328125, "learning_rate": 3.0372480799806686e-05, "loss": 73.4442, "step": 98450 }, { "epoch": 0.39779085880969794, "grad_norm": 1277.4661865234375, "learning_rate": 3.0370093081585366e-05, "loss": 110.8596, "step": 98460 }, { "epoch": 0.39783126007506553, "grad_norm": 544.1910400390625, "learning_rate": 3.036770516119374e-05, "loss": 116.7728, "step": 98470 }, { "epoch": 0.39787166134043317, "grad_norm": 1025.6640625, "learning_rate": 3.0365317038678362e-05, "loss": 53.2361, "step": 98480 }, { "epoch": 0.3979120626058008, "grad_norm": 1097.7906494140625, "learning_rate": 3.0362928714085804e-05, "loss": 46.7724, "step": 98490 }, { "epoch": 0.39795246387116845, "grad_norm": 776.1931762695312, "learning_rate": 3.036054018746261e-05, "loss": 55.6301, "step": 98500 }, { "epoch": 0.3979928651365361, "grad_norm": 755.2182006835938, "learning_rate": 3.0358151458855345e-05, "loss": 61.8012, "step": 98510 }, { "epoch": 0.39803326640190373, "grad_norm": 938.1300048828125, "learning_rate": 3.0355762528310588e-05, "loss": 44.2787, "step": 98520 }, { "epoch": 0.3980736676672713, "grad_norm": 2723.06005859375, "learning_rate": 3.035337339587491e-05, "loss": 58.1217, "step": 98530 }, { "epoch": 0.39811406893263895, "grad_norm": 1637.096923828125, "learning_rate": 3.035098406159489e-05, "loss": 88.4366, "step": 98540 }, { "epoch": 0.3981544701980066, "grad_norm": 576.7115478515625, "learning_rate": 3.0348594525517102e-05, "loss": 53.7875, "step": 98550 }, { "epoch": 0.39819487146337423, "grad_norm": 355.59912109375, "learning_rate": 3.0346204787688137e-05, "loss": 64.696, "step": 98560 }, { "epoch": 0.3982352727287419, "grad_norm": 449.38360595703125, "learning_rate": 3.0343814848154586e-05, "loss": 32.5586, "step": 98570 }, { "epoch": 0.3982756739941095, "grad_norm": 826.9142456054688, "learning_rate": 3.0341424706963035e-05, "loss": 69.185, "step": 98580 }, { "epoch": 0.39831607525947715, "grad_norm": 781.5284423828125, "learning_rate": 3.033903436416009e-05, "loss": 61.5975, "step": 98590 }, { "epoch": 0.39835647652484474, "grad_norm": 741.2518920898438, "learning_rate": 3.0336643819792342e-05, "loss": 68.1245, "step": 98600 }, { "epoch": 0.3983968777902124, "grad_norm": 894.6944580078125, "learning_rate": 3.0334253073906406e-05, "loss": 79.9229, "step": 98610 }, { "epoch": 0.39843727905558, "grad_norm": 473.85565185546875, "learning_rate": 3.0331862126548883e-05, "loss": 61.3682, "step": 98620 }, { "epoch": 0.39847768032094766, "grad_norm": 603.6492919921875, "learning_rate": 3.0329470977766387e-05, "loss": 66.0297, "step": 98630 }, { "epoch": 0.3985180815863153, "grad_norm": 889.0967407226562, "learning_rate": 3.0327079627605534e-05, "loss": 51.923, "step": 98640 }, { "epoch": 0.39855848285168294, "grad_norm": 610.2051391601562, "learning_rate": 3.0324688076112953e-05, "loss": 45.7892, "step": 98650 }, { "epoch": 0.3985988841170505, "grad_norm": 423.3575744628906, "learning_rate": 3.0322296323335263e-05, "loss": 77.9606, "step": 98660 }, { "epoch": 0.39863928538241816, "grad_norm": 492.7673645019531, "learning_rate": 3.031990436931909e-05, "loss": 45.4414, "step": 98670 }, { "epoch": 0.3986796866477858, "grad_norm": 400.84423828125, "learning_rate": 3.0317512214111066e-05, "loss": 55.4323, "step": 98680 }, { "epoch": 0.39872008791315344, "grad_norm": 1153.99560546875, "learning_rate": 3.031511985775783e-05, "loss": 68.0575, "step": 98690 }, { "epoch": 0.3987604891785211, "grad_norm": 528.0145263671875, "learning_rate": 3.0312727300306024e-05, "loss": 53.2187, "step": 98700 }, { "epoch": 0.3988008904438887, "grad_norm": 570.9860229492188, "learning_rate": 3.0310334541802287e-05, "loss": 86.0171, "step": 98710 }, { "epoch": 0.3988412917092563, "grad_norm": 479.5833740234375, "learning_rate": 3.030794158229327e-05, "loss": 80.2573, "step": 98720 }, { "epoch": 0.39888169297462395, "grad_norm": 2084.32666015625, "learning_rate": 3.030554842182563e-05, "loss": 64.0403, "step": 98730 }, { "epoch": 0.3989220942399916, "grad_norm": 534.7925415039062, "learning_rate": 3.0303155060446014e-05, "loss": 50.2246, "step": 98740 }, { "epoch": 0.3989624955053592, "grad_norm": 519.5526123046875, "learning_rate": 3.030076149820109e-05, "loss": 58.6772, "step": 98750 }, { "epoch": 0.39900289677072687, "grad_norm": 689.9923706054688, "learning_rate": 3.029836773513751e-05, "loss": 85.5789, "step": 98760 }, { "epoch": 0.3990432980360945, "grad_norm": 457.3630676269531, "learning_rate": 3.0295973771301956e-05, "loss": 43.8749, "step": 98770 }, { "epoch": 0.39908369930146215, "grad_norm": 532.2931518554688, "learning_rate": 3.0293579606741084e-05, "loss": 43.1888, "step": 98780 }, { "epoch": 0.39912410056682973, "grad_norm": 720.2774658203125, "learning_rate": 3.0291185241501587e-05, "loss": 82.4107, "step": 98790 }, { "epoch": 0.39916450183219737, "grad_norm": 822.1246948242188, "learning_rate": 3.028879067563013e-05, "loss": 43.3162, "step": 98800 }, { "epoch": 0.399204903097565, "grad_norm": 633.0422973632812, "learning_rate": 3.0286395909173403e-05, "loss": 56.5374, "step": 98810 }, { "epoch": 0.39924530436293265, "grad_norm": 1895.790283203125, "learning_rate": 3.0284000942178095e-05, "loss": 63.1824, "step": 98820 }, { "epoch": 0.3992857056283003, "grad_norm": 363.9815979003906, "learning_rate": 3.028160577469089e-05, "loss": 76.7197, "step": 98830 }, { "epoch": 0.39932610689366793, "grad_norm": 1479.4224853515625, "learning_rate": 3.0279210406758493e-05, "loss": 98.4598, "step": 98840 }, { "epoch": 0.3993665081590355, "grad_norm": 1820.2047119140625, "learning_rate": 3.0276814838427593e-05, "loss": 73.8971, "step": 98850 }, { "epoch": 0.39940690942440316, "grad_norm": 579.8154296875, "learning_rate": 3.02744190697449e-05, "loss": 60.1406, "step": 98860 }, { "epoch": 0.3994473106897708, "grad_norm": 701.7965087890625, "learning_rate": 3.027202310075711e-05, "loss": 67.9611, "step": 98870 }, { "epoch": 0.39948771195513844, "grad_norm": 425.21319580078125, "learning_rate": 3.026962693151094e-05, "loss": 50.4619, "step": 98880 }, { "epoch": 0.3995281132205061, "grad_norm": 491.5628356933594, "learning_rate": 3.0267230562053113e-05, "loss": 41.3457, "step": 98890 }, { "epoch": 0.3995685144858737, "grad_norm": 484.04644775390625, "learning_rate": 3.0264833992430343e-05, "loss": 84.0886, "step": 98900 }, { "epoch": 0.39960891575124136, "grad_norm": 869.12158203125, "learning_rate": 3.0262437222689344e-05, "loss": 63.6839, "step": 98910 }, { "epoch": 0.39964931701660894, "grad_norm": 781.686767578125, "learning_rate": 3.0260040252876856e-05, "loss": 70.227, "step": 98920 }, { "epoch": 0.3996897182819766, "grad_norm": 722.117919921875, "learning_rate": 3.0257643083039587e-05, "loss": 73.3941, "step": 98930 }, { "epoch": 0.3997301195473442, "grad_norm": 1093.8751220703125, "learning_rate": 3.0255245713224294e-05, "loss": 70.9664, "step": 98940 }, { "epoch": 0.39977052081271186, "grad_norm": 495.45074462890625, "learning_rate": 3.0252848143477706e-05, "loss": 46.6267, "step": 98950 }, { "epoch": 0.3998109220780795, "grad_norm": 625.2247314453125, "learning_rate": 3.025045037384656e-05, "loss": 67.4223, "step": 98960 }, { "epoch": 0.39985132334344714, "grad_norm": 591.3223876953125, "learning_rate": 3.0248052404377613e-05, "loss": 47.861, "step": 98970 }, { "epoch": 0.3998917246088147, "grad_norm": 2261.043212890625, "learning_rate": 3.0245654235117605e-05, "loss": 109.4352, "step": 98980 }, { "epoch": 0.39993212587418236, "grad_norm": 539.4677124023438, "learning_rate": 3.0243255866113292e-05, "loss": 58.3937, "step": 98990 }, { "epoch": 0.39997252713955, "grad_norm": 3327.10107421875, "learning_rate": 3.024085729741143e-05, "loss": 74.1484, "step": 99000 }, { "epoch": 0.40001292840491764, "grad_norm": 791.87890625, "learning_rate": 3.0238458529058792e-05, "loss": 73.247, "step": 99010 }, { "epoch": 0.4000533296702853, "grad_norm": 1245.586669921875, "learning_rate": 3.023605956110213e-05, "loss": 74.173, "step": 99020 }, { "epoch": 0.4000937309356529, "grad_norm": 538.5578002929688, "learning_rate": 3.0233660393588217e-05, "loss": 69.8499, "step": 99030 }, { "epoch": 0.4001341322010205, "grad_norm": 1586.10302734375, "learning_rate": 3.0231261026563817e-05, "loss": 58.608, "step": 99040 }, { "epoch": 0.40017453346638815, "grad_norm": 1514.328369140625, "learning_rate": 3.022886146007572e-05, "loss": 71.7001, "step": 99050 }, { "epoch": 0.4002149347317558, "grad_norm": 884.135986328125, "learning_rate": 3.0226461694170706e-05, "loss": 53.863, "step": 99060 }, { "epoch": 0.40025533599712343, "grad_norm": 366.4808654785156, "learning_rate": 3.022406172889556e-05, "loss": 58.081, "step": 99070 }, { "epoch": 0.40029573726249107, "grad_norm": 778.1707763671875, "learning_rate": 3.0221661564297062e-05, "loss": 53.0686, "step": 99080 }, { "epoch": 0.4003361385278587, "grad_norm": 570.4125366210938, "learning_rate": 3.021926120042201e-05, "loss": 67.3994, "step": 99090 }, { "epoch": 0.40037653979322635, "grad_norm": 640.3318481445312, "learning_rate": 3.02168606373172e-05, "loss": 56.3023, "step": 99100 }, { "epoch": 0.40041694105859393, "grad_norm": 757.1697998046875, "learning_rate": 3.0214459875029437e-05, "loss": 62.845, "step": 99110 }, { "epoch": 0.4004573423239616, "grad_norm": 313.5112609863281, "learning_rate": 3.0212058913605512e-05, "loss": 65.6793, "step": 99120 }, { "epoch": 0.4004977435893292, "grad_norm": 529.2091064453125, "learning_rate": 3.0209657753092242e-05, "loss": 58.1838, "step": 99130 }, { "epoch": 0.40053814485469685, "grad_norm": 1389.11083984375, "learning_rate": 3.0207256393536438e-05, "loss": 68.3362, "step": 99140 }, { "epoch": 0.4005785461200645, "grad_norm": 977.894287109375, "learning_rate": 3.020485483498492e-05, "loss": 56.8673, "step": 99150 }, { "epoch": 0.40061894738543213, "grad_norm": 1881.879150390625, "learning_rate": 3.0202453077484496e-05, "loss": 60.6721, "step": 99160 }, { "epoch": 0.4006593486507997, "grad_norm": 1947.5126953125, "learning_rate": 3.0200051121081996e-05, "loss": 67.9177, "step": 99170 }, { "epoch": 0.40069974991616736, "grad_norm": 1017.6248168945312, "learning_rate": 3.0197648965824258e-05, "loss": 57.1318, "step": 99180 }, { "epoch": 0.400740151181535, "grad_norm": 485.7748107910156, "learning_rate": 3.01952466117581e-05, "loss": 54.8188, "step": 99190 }, { "epoch": 0.40078055244690264, "grad_norm": 944.8651733398438, "learning_rate": 3.0192844058930356e-05, "loss": 60.584, "step": 99200 }, { "epoch": 0.4008209537122703, "grad_norm": 964.3688354492188, "learning_rate": 3.019044130738787e-05, "loss": 94.7983, "step": 99210 }, { "epoch": 0.4008613549776379, "grad_norm": 1407.83056640625, "learning_rate": 3.0188038357177497e-05, "loss": 48.4079, "step": 99220 }, { "epoch": 0.40090175624300556, "grad_norm": 583.0308837890625, "learning_rate": 3.0185635208346053e-05, "loss": 41.2452, "step": 99230 }, { "epoch": 0.40094215750837314, "grad_norm": 591.8720092773438, "learning_rate": 3.0183231860940412e-05, "loss": 55.6352, "step": 99240 }, { "epoch": 0.4009825587737408, "grad_norm": 1591.0367431640625, "learning_rate": 3.018082831500743e-05, "loss": 54.9869, "step": 99250 }, { "epoch": 0.4010229600391084, "grad_norm": 947.4282836914062, "learning_rate": 3.0178424570593954e-05, "loss": 60.0285, "step": 99260 }, { "epoch": 0.40106336130447606, "grad_norm": 726.7391967773438, "learning_rate": 3.0176020627746853e-05, "loss": 99.4484, "step": 99270 }, { "epoch": 0.4011037625698437, "grad_norm": 487.5047302246094, "learning_rate": 3.0173616486512983e-05, "loss": 83.2084, "step": 99280 }, { "epoch": 0.40114416383521134, "grad_norm": 2766.857177734375, "learning_rate": 3.017121214693923e-05, "loss": 72.9982, "step": 99290 }, { "epoch": 0.4011845651005789, "grad_norm": 781.8902587890625, "learning_rate": 3.016880760907246e-05, "loss": 47.2423, "step": 99300 }, { "epoch": 0.40122496636594657, "grad_norm": 644.5406494140625, "learning_rate": 3.0166402872959547e-05, "loss": 34.2714, "step": 99310 }, { "epoch": 0.4012653676313142, "grad_norm": 536.601806640625, "learning_rate": 3.0163997938647377e-05, "loss": 66.7018, "step": 99320 }, { "epoch": 0.40130576889668185, "grad_norm": 428.3992614746094, "learning_rate": 3.0161592806182826e-05, "loss": 62.3801, "step": 99330 }, { "epoch": 0.4013461701620495, "grad_norm": 1168.3619384765625, "learning_rate": 3.01591874756128e-05, "loss": 56.47, "step": 99340 }, { "epoch": 0.4013865714274171, "grad_norm": 704.8736572265625, "learning_rate": 3.0156781946984187e-05, "loss": 51.4939, "step": 99350 }, { "epoch": 0.4014269726927847, "grad_norm": 1718.3294677734375, "learning_rate": 3.0154376220343883e-05, "loss": 46.5367, "step": 99360 }, { "epoch": 0.40146737395815235, "grad_norm": 679.1923828125, "learning_rate": 3.0151970295738775e-05, "loss": 51.8275, "step": 99370 }, { "epoch": 0.40150777522352, "grad_norm": 598.9464721679688, "learning_rate": 3.0149564173215786e-05, "loss": 47.5122, "step": 99380 }, { "epoch": 0.40154817648888763, "grad_norm": 979.5762329101562, "learning_rate": 3.014715785282182e-05, "loss": 66.2203, "step": 99390 }, { "epoch": 0.40158857775425527, "grad_norm": 330.1836242675781, "learning_rate": 3.0144751334603787e-05, "loss": 57.3458, "step": 99400 }, { "epoch": 0.4016289790196229, "grad_norm": 316.8360290527344, "learning_rate": 3.01423446186086e-05, "loss": 98.0352, "step": 99410 }, { "epoch": 0.40166938028499055, "grad_norm": 561.7307739257812, "learning_rate": 3.013993770488318e-05, "loss": 58.3772, "step": 99420 }, { "epoch": 0.40170978155035814, "grad_norm": 954.3633422851562, "learning_rate": 3.0137530593474467e-05, "loss": 55.0279, "step": 99430 }, { "epoch": 0.4017501828157258, "grad_norm": 1188.335693359375, "learning_rate": 3.0135123284429366e-05, "loss": 70.7138, "step": 99440 }, { "epoch": 0.4017905840810934, "grad_norm": 947.3251953125, "learning_rate": 3.013271577779482e-05, "loss": 57.347, "step": 99450 }, { "epoch": 0.40183098534646106, "grad_norm": 872.726318359375, "learning_rate": 3.0130308073617765e-05, "loss": 59.0678, "step": 99460 }, { "epoch": 0.4018713866118287, "grad_norm": 1036.0936279296875, "learning_rate": 3.012790017194514e-05, "loss": 70.4954, "step": 99470 }, { "epoch": 0.40191178787719634, "grad_norm": 752.7295532226562, "learning_rate": 3.0125492072823884e-05, "loss": 35.581, "step": 99480 }, { "epoch": 0.4019521891425639, "grad_norm": 340.7840881347656, "learning_rate": 3.0123083776300946e-05, "loss": 63.2269, "step": 99490 }, { "epoch": 0.40199259040793156, "grad_norm": 818.9947509765625, "learning_rate": 3.0120675282423274e-05, "loss": 64.8064, "step": 99500 }, { "epoch": 0.4020329916732992, "grad_norm": 644.2744750976562, "learning_rate": 3.011826659123784e-05, "loss": 40.1466, "step": 99510 }, { "epoch": 0.40207339293866684, "grad_norm": 890.6390380859375, "learning_rate": 3.011585770279158e-05, "loss": 65.8333, "step": 99520 }, { "epoch": 0.4021137942040345, "grad_norm": 690.338623046875, "learning_rate": 3.011344861713147e-05, "loss": 69.3904, "step": 99530 }, { "epoch": 0.4021541954694021, "grad_norm": 902.1892700195312, "learning_rate": 3.0111039334304474e-05, "loss": 63.0215, "step": 99540 }, { "epoch": 0.40219459673476976, "grad_norm": 741.2757568359375, "learning_rate": 3.0108629854357557e-05, "loss": 68.3149, "step": 99550 }, { "epoch": 0.40223499800013734, "grad_norm": 670.7085571289062, "learning_rate": 3.0106220177337696e-05, "loss": 39.7456, "step": 99560 }, { "epoch": 0.402275399265505, "grad_norm": 1225.3446044921875, "learning_rate": 3.010381030329187e-05, "loss": 71.8467, "step": 99570 }, { "epoch": 0.4023158005308726, "grad_norm": 931.3955078125, "learning_rate": 3.010140023226706e-05, "loss": 48.1199, "step": 99580 }, { "epoch": 0.40235620179624026, "grad_norm": 948.512451171875, "learning_rate": 3.0098989964310254e-05, "loss": 54.7482, "step": 99590 }, { "epoch": 0.4023966030616079, "grad_norm": 883.859619140625, "learning_rate": 3.009657949946844e-05, "loss": 64.0414, "step": 99600 }, { "epoch": 0.40243700432697554, "grad_norm": 647.8932495117188, "learning_rate": 3.009416883778861e-05, "loss": 41.6642, "step": 99610 }, { "epoch": 0.40247740559234313, "grad_norm": 890.1572875976562, "learning_rate": 3.009175797931776e-05, "loss": 57.7219, "step": 99620 }, { "epoch": 0.40251780685771077, "grad_norm": 1720.4071044921875, "learning_rate": 3.0089346924102892e-05, "loss": 76.2947, "step": 99630 }, { "epoch": 0.4025582081230784, "grad_norm": 1014.928466796875, "learning_rate": 3.0086935672191012e-05, "loss": 60.2255, "step": 99640 }, { "epoch": 0.40259860938844605, "grad_norm": 1435.76611328125, "learning_rate": 3.008452422362913e-05, "loss": 53.2729, "step": 99650 }, { "epoch": 0.4026390106538137, "grad_norm": 1477.9378662109375, "learning_rate": 3.0082112578464252e-05, "loss": 62.3835, "step": 99660 }, { "epoch": 0.40267941191918133, "grad_norm": 1133.9884033203125, "learning_rate": 3.0079700736743406e-05, "loss": 64.5264, "step": 99670 }, { "epoch": 0.4027198131845489, "grad_norm": 529.5723876953125, "learning_rate": 3.0077288698513595e-05, "loss": 61.7479, "step": 99680 }, { "epoch": 0.40276021444991655, "grad_norm": 437.4736328125, "learning_rate": 3.0074876463821855e-05, "loss": 44.3355, "step": 99690 }, { "epoch": 0.4028006157152842, "grad_norm": 440.5912780761719, "learning_rate": 3.007246403271522e-05, "loss": 60.6817, "step": 99700 }, { "epoch": 0.40284101698065183, "grad_norm": 632.7706298828125, "learning_rate": 3.0070051405240712e-05, "loss": 50.7286, "step": 99710 }, { "epoch": 0.4028814182460195, "grad_norm": 480.6388244628906, "learning_rate": 3.006763858144536e-05, "loss": 41.7258, "step": 99720 }, { "epoch": 0.4029218195113871, "grad_norm": 668.8065795898438, "learning_rate": 3.006522556137621e-05, "loss": 89.2431, "step": 99730 }, { "epoch": 0.40296222077675475, "grad_norm": 831.2247924804688, "learning_rate": 3.006281234508031e-05, "loss": 64.0738, "step": 99740 }, { "epoch": 0.40300262204212234, "grad_norm": 710.2486572265625, "learning_rate": 3.0060398932604707e-05, "loss": 52.5265, "step": 99750 }, { "epoch": 0.40304302330749, "grad_norm": 569.1033325195312, "learning_rate": 3.0057985323996443e-05, "loss": 77.308, "step": 99760 }, { "epoch": 0.4030834245728576, "grad_norm": 1512.23828125, "learning_rate": 3.0055571519302572e-05, "loss": 46.1707, "step": 99770 }, { "epoch": 0.40312382583822526, "grad_norm": 615.7180786132812, "learning_rate": 3.0053157518570163e-05, "loss": 46.0806, "step": 99780 }, { "epoch": 0.4031642271035929, "grad_norm": 826.94580078125, "learning_rate": 3.0050743321846273e-05, "loss": 78.669, "step": 99790 }, { "epoch": 0.40320462836896054, "grad_norm": 1966.9554443359375, "learning_rate": 3.0048328929177963e-05, "loss": 89.0282, "step": 99800 }, { "epoch": 0.4032450296343281, "grad_norm": 836.5111694335938, "learning_rate": 3.0045914340612312e-05, "loss": 68.0784, "step": 99810 }, { "epoch": 0.40328543089969576, "grad_norm": 1213.034912109375, "learning_rate": 3.0043499556196384e-05, "loss": 51.5873, "step": 99820 }, { "epoch": 0.4033258321650634, "grad_norm": 1511.6014404296875, "learning_rate": 3.0041084575977267e-05, "loss": 54.6265, "step": 99830 }, { "epoch": 0.40336623343043104, "grad_norm": 698.375, "learning_rate": 3.0038669400002035e-05, "loss": 52.8981, "step": 99840 }, { "epoch": 0.4034066346957987, "grad_norm": 552.4383544921875, "learning_rate": 3.003625402831777e-05, "loss": 52.009, "step": 99850 }, { "epoch": 0.4034470359611663, "grad_norm": 708.4588623046875, "learning_rate": 3.003383846097157e-05, "loss": 56.2367, "step": 99860 }, { "epoch": 0.40348743722653396, "grad_norm": 606.9962158203125, "learning_rate": 3.0031422698010523e-05, "loss": 66.7383, "step": 99870 }, { "epoch": 0.40352783849190155, "grad_norm": 607.9722900390625, "learning_rate": 3.002900673948173e-05, "loss": 46.2622, "step": 99880 }, { "epoch": 0.4035682397572692, "grad_norm": 1990.047607421875, "learning_rate": 3.0026590585432277e-05, "loss": 67.8025, "step": 99890 }, { "epoch": 0.4036086410226368, "grad_norm": 401.2357482910156, "learning_rate": 3.0024174235909286e-05, "loss": 68.7798, "step": 99900 }, { "epoch": 0.40364904228800447, "grad_norm": 1142.0714111328125, "learning_rate": 3.0021757690959856e-05, "loss": 59.7364, "step": 99910 }, { "epoch": 0.4036894435533721, "grad_norm": 627.4801025390625, "learning_rate": 3.0019340950631103e-05, "loss": 95.0898, "step": 99920 }, { "epoch": 0.40372984481873975, "grad_norm": 6133.18408203125, "learning_rate": 3.0016924014970138e-05, "loss": 97.9595, "step": 99930 }, { "epoch": 0.40377024608410733, "grad_norm": 0.0, "learning_rate": 3.001450688402408e-05, "loss": 60.2756, "step": 99940 }, { "epoch": 0.40381064734947497, "grad_norm": 460.2475891113281, "learning_rate": 3.0012089557840056e-05, "loss": 56.6995, "step": 99950 }, { "epoch": 0.4038510486148426, "grad_norm": 1136.620361328125, "learning_rate": 3.000967203646519e-05, "loss": 58.4774, "step": 99960 }, { "epoch": 0.40389144988021025, "grad_norm": 561.9410400390625, "learning_rate": 3.000725431994662e-05, "loss": 56.8739, "step": 99970 }, { "epoch": 0.4039318511455779, "grad_norm": 1254.2598876953125, "learning_rate": 3.000483640833147e-05, "loss": 47.4478, "step": 99980 }, { "epoch": 0.40397225241094553, "grad_norm": 540.2157592773438, "learning_rate": 3.0002418301666886e-05, "loss": 56.2391, "step": 99990 }, { "epoch": 0.4040126536763131, "grad_norm": 1104.1842041015625, "learning_rate": 3.0000000000000004e-05, "loss": 74.0873, "step": 100000 } ], "logging_steps": 10, "max_steps": 250000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }